Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ members = [
"diskann-benchmark-simd",
"diskann-benchmark",
"diskann-tools",
# Experimental
"experimental-multi-vector-bench",
]

default-members = [
Expand Down Expand Up @@ -62,6 +64,8 @@ diskann-label-filter = { path = "diskann-label-filter", version = "0.45.0" }
diskann-benchmark-runner = { path = "diskann-benchmark-runner", version = "0.45.0" }
diskann-benchmark-core = { path = "diskann-benchmark-core", version = "0.45.0" }
diskann-tools = { path = "diskann-tools", version = "0.45.0" }
# Experimental
experimental-multi-vector-bench = { path = "experimental-multi-vector-bench", version = "0.45.0" }

# External dependencies (shared versions)
anyhow = "1.0.98"
Expand Down
160 changes: 154 additions & 6 deletions diskann-quantization/src/multi_vector/matrix.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
//! - [`Repr`]: Read-only matrix representation.
//! - [`ReprMut`]: Mutable matrix representation.
//! - [`ReprOwned`]: Owning matrix representation.
//! - [`Dense`]: Dense contiguous element access.
//! - [`DenseMut`]: Mutable dense contiguous element access.
//!
//! Each trait refinement has a corresponding constructor:
//!
Expand Down Expand Up @@ -147,6 +149,63 @@ pub unsafe trait ReprOwned: ReprMut {
unsafe fn drop(self, ptr: NonNull<u8>);
}

/// Extension of [`Repr`] for representations that store elements contiguously in memory.
///
/// Dense representations allow viewing the entire matrix data as a flat `&[Element]` slice,
/// in addition to the row-by-row access provided by [`Repr`].
///
/// # Safety
///
/// Implementations must ensure:
///
/// - [`as_slice`](Self::as_slice) returns a valid slice covering all elements.
/// - The returned slice length must equal `self.nrows() * self.ncols()`.
pub unsafe trait Dense: Repr {
/// The element type of the dense matrix.
type Element;

/// Returns the number of columns (elements per row) in the matrix.
///
/// # Safety Contract
///
/// This function must be loosely pure in the sense that for any given instance of
/// `self`, `self.ncols()` must return the same value.
fn ncols(&self) -> usize;

/// Returns the underlying data as an immutable contiguous slice.
///
/// # Safety
///
/// - `ptr` must point to memory compatible with [`Repr::layout`].
/// - The entire range for this slice must be within a single allocation.
/// - The memory must not be mutated for the duration of lifetime `'a`.
/// - The lifetime for the returned slice is inferred from its usage. Correct
/// usage must properly tie the lifetime to a source.
unsafe fn as_slice<'a>(self, ptr: NonNull<u8>) -> &'a [Self::Element];
}

/// Extension of [`Dense`] that supports mutable slice access.
///
/// # Safety
///
/// Implementations must ensure:
///
/// - [`as_slice_mut`](Self::as_slice_mut) returns a valid mutable slice
/// covering all elements.
pub unsafe trait DenseMut: Dense + ReprMut {
/// Returns the underlying data as a mutable contiguous slice.
///
/// # Safety
///
/// - `ptr` must point to memory compatible with [`Repr::layout`].
/// - The entire range for this slice must be within a single allocation.
/// - The memory must not be accessed through any other reference for the
/// duration of lifetime `'a`.
/// - The lifetime for the returned slice is inferred from its usage. Correct
/// usage must properly tie the lifetime to a source.
unsafe fn as_slice_mut<'a>(self, ptr: NonNull<u8>) -> &'a mut [Self::Element];
}

/// A new-type version of `std::alloc::LayoutError` for cleaner error handling.
///
/// This is basically the same as [`std::alloc::LayoutError`], but constructible in
Expand Down Expand Up @@ -375,6 +434,36 @@ unsafe impl<T: Copy> ReprOwned for Standard<T> {
}
}

// SAFETY: Standard stores elements contiguously in row-major order with no padding.
// The slice length equals nrows * ncols, consistent with `Repr::layout`.
unsafe impl<T: Copy> Dense for Standard<T> {
type Element = T;

fn ncols(&self) -> usize {
self.ncols
}

unsafe fn as_slice<'a>(self, ptr: NonNull<u8>) -> &'a [T] {
let len = self.nrows * self.ncols;
// SAFETY: The caller guarantees `ptr` is compatible with `Repr::layout` and
// within a single allocation. `len` equals `nrows * ncols`, matching the
// contiguous row-major layout of `Standard`.
unsafe { std::slice::from_raw_parts(ptr.as_ptr().cast::<T>(), len) }
}
}

// SAFETY: Standard stores elements contiguously, and mutable access to the full
// slice is valid when exclusive access is guaranteed by the caller.
unsafe impl<T: Copy> DenseMut for Standard<T> {
unsafe fn as_slice_mut<'a>(self, ptr: NonNull<u8>) -> &'a mut [T] {
let len = self.nrows * self.ncols;
// SAFETY: The caller guarantees `ptr` is compatible with `Repr::layout`,
// within a single allocation, and exclusively accessible. `len` equals
// `nrows * ncols`, matching the contiguous row-major layout of `Standard`.
unsafe { std::slice::from_raw_parts_mut(ptr.as_ptr().cast::<T>(), len) }
}
}

// SAFETY: The implementation uses guarantees from `Box` to ensure that the pointer
// initialized by it is non-null and properly aligned to the underlying type.
unsafe impl<T> NewOwned<T> for Standard<T>
Expand Down Expand Up @@ -495,7 +584,13 @@ impl<T: ReprOwned> Mat<T> {
}
}

pub(crate) unsafe fn get_row_unchecked(&self, i: usize) -> T::Row<'_> {
/// Returns the i-th row without bounds checking.
///
/// # Safety
///
/// `i` must be less than `self.num_vectors()`.
#[inline]
pub unsafe fn get_row_unchecked(&self, i: usize) -> T::Row<'_> {
// SAFETY: Caller must ensure i < self.num_vectors(). The constructors for this type
// ensure that `ptr` is compatible with `T`.
unsafe { self.repr.get_row(self.ptr, i) }
Expand Down Expand Up @@ -575,12 +670,34 @@ impl<T: ReprOwned> Drop for Mat<T> {
}
}

impl<T: Copy> Mat<Standard<T>> {
impl<T: Dense + ReprOwned> Mat<T> {
/// Returns the raw dimension (columns) of the vectors in the matrix.
#[inline]
pub fn vector_dim(&self) -> usize {
self.repr.ncols()
}

/// Returns the underlying data as a contiguous slice.
///
/// The data is stored in row-major order: `[row0_col0, row0_col1, ..., row0_colN, row1_col0, ...]`.
#[inline]
pub fn as_slice(&self) -> &[T::Element] {
// SAFETY: The Mat was constructed with valid data compatible with `Dense`.
unsafe { self.repr.as_slice(self.ptr) }
}

/// Returns the underlying data as a mutable contiguous slice.
///
/// The data is stored in row-major order: `[row0_col0, row0_col1, ..., row0_colN, row1_col0, ...]`.
#[inline]
pub fn as_slice_mut(&mut self) -> &mut [T::Element]
where
T: DenseMut,
{
// SAFETY: We have exclusive access via `&mut self`, and the Mat was constructed
// with valid data compatible with `DenseMut`.
unsafe { self.repr.as_slice_mut(self.ptr) }
}
}

////////////
Expand Down Expand Up @@ -651,7 +768,7 @@ impl<'a, T: Repr> MatRef<'a, T> {
///
/// `i` must be less than `self.num_vectors()`.
#[inline]
pub(crate) unsafe fn get_row_unchecked(&self, i: usize) -> T::Row<'_> {
pub unsafe fn get_row_unchecked(&self, i: usize) -> T::Row<'_> {
// SAFETY: Caller must ensure i < self.num_vectors().
unsafe { self.repr.get_row(self.ptr, i) }
}
Expand All @@ -677,12 +794,21 @@ impl<'a, T: Repr> MatRef<'a, T> {
}
}

impl<'a, T: Copy> MatRef<'a, Standard<T>> {
impl<'a, T: Dense> MatRef<'a, T> {
/// Returns the raw dimension (columns) of the vectors in the matrix.
#[inline]
pub fn vector_dim(&self) -> usize {
self.repr.ncols()
}

/// Returns the underlying data as a contiguous slice.
///
/// The data is stored in row-major order: `[row0_col0, row0_col1, ..., row0_colN, row1_col0, ...]`.
#[inline]
pub fn as_slice(&self) -> &[T::Element] {
// SAFETY: The MatRef was constructed with valid data compatible with `Dense`.
unsafe { self.repr.as_slice(self.ptr) }
}
}

// Reborrow: Mat -> MatRef
Expand Down Expand Up @@ -784,7 +910,7 @@ impl<'a, T: ReprMut> MatMut<'a, T> {
///
/// `i` must be less than `self.num_vectors()`.
#[inline]
pub(crate) unsafe fn get_row_unchecked(&self, i: usize) -> T::Row<'_> {
pub unsafe fn get_row_unchecked(&self, i: usize) -> T::Row<'_> {
// SAFETY: Caller must ensure i < self.num_vectors().
unsafe { self.repr.get_row(self.ptr, i) }
}
Expand Down Expand Up @@ -869,12 +995,34 @@ impl<'this, 'a, T: ReprMut> ReborrowMut<'this> for MatMut<'a, T> {
}
}

impl<'a, T: Copy> MatMut<'a, Standard<T>> {
impl<'a, T: Dense + ReprMut> MatMut<'a, T> {
/// Returns the raw dimension (columns) of the vectors in the matrix.
#[inline]
pub fn vector_dim(&self) -> usize {
self.repr.ncols()
}

/// Returns the underlying data as a contiguous slice.
///
/// The data is stored in row-major order: `[row0_col0, row0_col1, ..., row0_colN, row1_col0, ...]`.
#[inline]
pub fn as_slice(&self) -> &[T::Element] {
// SAFETY: The MatMut was constructed with valid data compatible with `Dense`.
unsafe { self.repr.as_slice(self.ptr) }
}

/// Returns the underlying data as a mutable contiguous slice.
///
/// The data is stored in row-major order: `[row0_col0, row0_col1, ..., row0_colN, row1_col0, ...]`.
#[inline]
pub fn as_slice_mut(&mut self) -> &mut [T::Element]
where
T: DenseMut,
{
// SAFETY: We have exclusive access via `&mut self`, and the MatMut was constructed
// with valid data compatible with `DenseMut`.
unsafe { self.repr.as_slice_mut(self.ptr) }
}
}

//////////
Expand Down
3 changes: 2 additions & 1 deletion diskann-quantization/src/multi_vector/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -66,5 +66,6 @@ pub(crate) mod matrix;

pub use distance::{Chamfer, MaxSim, MaxSimError, QueryMatRef};
pub use matrix::{
Defaulted, LayoutError, Mat, MatMut, MatRef, Repr, ReprMut, ReprOwned, SliceError, Standard,
Defaulted, Dense, DenseMut, LayoutError, Mat, MatMut, MatRef, Repr, ReprMut, ReprOwned,
SliceError, Standard,
};
35 changes: 35 additions & 0 deletions experimental-multi-vector-bench/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT license.
[package]
name = "experimental-multi-vector-bench"
edition.workspace = true
version.workspace = true
authors.workspace = true
description = "Experimental multi-vector benchmarking support for DiskANN"
documentation.workspace = true
license.workspace = true

[[bin]]
name = "multivec-bench"
path = "src/bin/multivec_bench.rs"

[dependencies]
diskann-linalg.workspace = true
diskann-utils.workspace = true
diskann-quantization.workspace = true
diskann-vector.workspace = true
diskann-wide.workspace = true

# Benchmark dependencies
anyhow.workspace = true
diskann-benchmark-runner.workspace = true
rand.workspace = true
serde = { workspace = true, features = ["derive"] }
serde_json.workspace = true
thiserror.workspace = true

[dev-dependencies]
tempfile.workspace = true

[lints]
workspace = true
Loading
Loading