diff --git a/Cargo.lock b/Cargo.lock index 426fe8795..62d62c80a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -612,6 +612,7 @@ dependencies = [ "diskann-utils", "diskann-vector", "diskann-wide", + "futures-executor", "futures-util", "half", "hashbrown 0.16.1", diff --git a/diskann-providers/Cargo.toml b/diskann-providers/Cargo.toml index d23fcc038..14d2c0d70 100644 --- a/diskann-providers/Cargo.toml +++ b/diskann-providers/Cargo.toml @@ -40,6 +40,7 @@ bf-tree = { workspace = true, optional = true } anyhow.workspace = true prost = "0.14.1" futures-util.workspace = true +futures-executor = "0.3" polonius-the-crab = { version = "0.5.0", optional = true } serde_json = { workspace = true, optional = true } vfs = { workspace = true, optional = true } diff --git a/diskann-providers/src/index/wrapped_async.rs b/diskann-providers/src/index/wrapped_async.rs index d3d37416d..69c4495b8 100644 --- a/diskann-providers/src/index/wrapped_async.rs +++ b/diskann-providers/src/index/wrapped_async.rs @@ -3,10 +3,11 @@ * Licensed under the MIT license. */ +use futures_executor::block_on; use std::{num::NonZeroUsize, sync::Arc}; use diskann::{ - ANNResult, + ANNError, ANNResult, graph::{ self, ConsolidateKind, InplaceDeleteMethod, SearchParams, glue::{ @@ -20,6 +21,14 @@ use diskann::{ utils::{ONE, async_tools::VectorIdBoxSlice}, }; +use crate::{ + model::IndexConfiguration, + storage::{ + AsyncIndexMetadata, AsyncQuantLoadContext, LoadWith, StorageReadProvider, + file_storage_provider::FileStorageProvider, + }, +}; + pub struct DiskANNIndex { /// The underlying async DiskANNIndex. pub inner: Arc>, @@ -317,3 +326,50 @@ where self.handle.block_on(self.inner.get_degree_stats(accessor)) } } + +pub trait SyncLoadWith: Sized { + fn load_with(path: &str, index_config: IndexConfiguration) -> ANNResult; +} + +// Load static memory index from pre-built files synchronously +impl SyncLoadWith for DiskANNIndex +where + DP: DataProvider + LoadWith, +{ + fn load_with(path: &str, index_config: IndexConfiguration) -> ANNResult> { + let storage = FileStorageProvider; + let data_provider = create_data_provider(&storage, path, &index_config); + + match data_provider { + Ok(dp) => { + let index = + DiskANNIndex::::new_with_current_thread_runtime(index_config.config, dp); + Ok(index) + } + Err(e) => Err(e), + } + } +} + +pub fn create_data_provider<'a, P, DP>( + provider: &P, + path: &'a str, + index_config: &'a IndexConfiguration, +) -> ANNResult +where + P: StorageReadProvider, + DP: DataProvider + LoadWith, +{ + let pq_context = AsyncQuantLoadContext { + metadata: AsyncIndexMetadata::new(path), + num_frozen_points: index_config.num_frozen_pts, + metric: index_config.dist_metric, + prefetch_lookahead: index_config.prefetch_lookahead.map(|x| x.get()), + is_disk_index: false, // only support in-memory index loading here + prefetch_cache_line_level: index_config.prefetch_cache_line_level, + }; + + let data_provider = DP::load_with(provider, &pq_context); + + block_on(data_provider) +}