diff --git a/nomt/src/merkle/seek.rs b/nomt/src/merkle/seek.rs index 114d343e06..c46dca66c4 100644 --- a/nomt/src/merkle/seek.rs +++ b/nomt/src/merkle/seek.rs @@ -25,6 +25,7 @@ use nomt_core::{ trie_pos::TriePosition, }; +use std::cmp::Ordering; use std::collections::{ hash_map::{Entry, HashMap}, VecDeque, @@ -204,6 +205,27 @@ impl SeekRequest { beatree_iterator.provide_leaf(leaf); } + let manage_deletions = |deletions: &[KeyPath], mut cur_idx: usize, key: &KeyPath| { + while cur_idx < deletions.len() { + match key.cmp(&deletions[cur_idx]) { + Ordering::Less => { + // key is before the deletion + break; + } + Ordering::Equal => { + cur_idx += 1; + return (cur_idx, true); + } + Ordering::Greater => { + // deletion is before the key. move on until we find a match or go past + cur_idx += 1; + } + } + } + + (cur_idx, false) + }; + let mut deletions_idx = 0; let (key, value_hash) = loop { match beatree_iterator.next() { @@ -212,15 +234,26 @@ impl SeekRequest { overlay_deletions.drain(..deletions_idx); return; } - Some(IterOutput::Item(key, _value)) - if deletions_idx < overlay_deletions.len() - && overlay_deletions[deletions_idx] == key => - { - deletions_idx += 1; - continue; + Some(IterOutput::Item(key, value)) => { + let (new_d_idx, should_skip) = + manage_deletions(&overlay_deletions, deletions_idx, &key); + deletions_idx = new_d_idx; + if should_skip { + continue; + } + + break (key, H::hash_value(&value)); + } + Some(IterOutput::OverflowItem(key, value_hash, _)) => { + let (new_d_idx, should_skip) = + manage_deletions(&overlay_deletions, deletions_idx, &key); + deletions_idx = new_d_idx; + if should_skip { + continue; + } + + break (key, value_hash); } - Some(IterOutput::Item(key, value)) => break (key, H::hash_value(&value)), - Some(IterOutput::OverflowItem(key, value_hash, _)) => break (key, value_hash), } }; diff --git a/nomt/tests/overlay.rs b/nomt/tests/overlay.rs index 56d536f24e..974ea3a1cf 100644 --- a/nomt/tests/overlay.rs +++ b/nomt/tests/overlay.rs @@ -1,5 +1,6 @@ mod common; +use bitvec::prelude::*; use common::Test; fn expected_root(items: Vec<([u8; 32], Vec)>) -> nomt_core::trie::Node { @@ -252,3 +253,105 @@ fn overlay_deletions() { let _overlay_b = test.update().0; } + +macro_rules! key_path { + ($($t:tt)+) => {{ + let mut path = [0u8; 32]; + let slice = bits![u8, Msb0; $($t)+]; + path.view_bits_mut::()[..slice.len()].copy_from_bitslice(&slice); + path + }} +} + +#[test] +fn overlay_deletions_respected_in_seek() { + let mut test = Test::new("overlay_deletions_respected_in_seek"); + + // key_a will be on disk, deleted in overlay. + let key_a = key_path![0, 0, 1, 0, 1]; + + // key_b will be on disk, after key_a + let key_b = key_path![0, 0, 1, 0, 1, 1]; + + // key_c constrains the leaf node to the path [0, 0, 1] + let key_c = key_path![0, 0, 0]; + + // populate. [0, 0, 1] is an internal node with children for key_a and key_b + test.write(key_a, Some(vec![42])); + test.write(key_b, Some(vec![43])); + test.write(key_c, Some(vec![44])); + let _ = test.commit(); + + // first overlay: delete key_a. [0, 0, 1] is now a leaf for key_b + test.start_overlay_session(None); + test.write(key_a, None); + let overlay_a = test.update().0; + + // second overlay: update key_b's value. + // the b-tree iterator contains key_a (deleted in overlay) and key_b, + // so seek must skip over key_a. + test.start_overlay_session([&overlay_a]); + test.write(key_b, Some(vec![22])); + let overlay_b = test.update().0; + + // Now ensure that the trie root is accurate for overlay_c. + assert_eq!( + overlay_b.root().into_inner(), + expected_root(vec![(key_b, vec![22]), (key_c, vec![44]),]), + ) +} + +#[test] +fn overlay_earlier_deletions_respected_in_seek() { + let mut test = Test::new("overlay_earlier_deletions_respected_in_seek"); + + // key_a will be introduced in one overlay and deleted in the next + let key_a = key_path![0, 0, 1, 0, 1]; + + // key_b will have a leaf on disk, deleted in an overlay. after key_a + let key_b = key_path![0, 0, 1, 0, 1, 1]; + + // key_c will have a leaf on disk. after key_b + let key_c = key_path![0, 0, 1, 1, 1, 1, 1]; + + // key_d will have a leaf on disk with the goal of constraining the leaf node to the path + // [0, 0, 1] + let key_d = key_path![0, 0, 0]; + + // populate. [0, 0, 1] is an internal node with children for key_b and key_c + test.write(key_b, Some(vec![42])); + test.write(key_c, Some(vec![43])); + test.write(key_d, Some(vec![44])); + let _ = test.commit(); + + // First overlay: introduce key_a and delete key_b. [0, 0, 1] is + // now internal with children for key_a and key_c + test.start_overlay_session(None); + test.write(key_a, Some(vec![24])); + test.write(key_b, None); + let overlay_a = test.update().0; + + // Second overlay: delete key_a. [0, 0, 1] is now a leaf for key_c + test.start_overlay_session([&overlay_a]); + test.write(key_a, None); + let overlay_b = test.update().0; + + // Third update: update key_c. Seek should skip over deleted key_a to find key_b. + // At this point, the ground truth beatree iterator contains key_b and key_c. + // Seek will: + // 1. encounter the leaf for key_c and initiate a leaf fetch + // 2. gather overlay deletions [key_a, key_b] + // 3. get first item from btree: key_b > key_a. + // 4. ignore the deleted key_a and continue to next deletion, key_b. + // 5. find that key_b is deleted, continue to next item. + // 6. get next item from btree: key_c == key_c, and return it as the leaf. + test.start_overlay_session([&overlay_b, &overlay_a]); + test.write(key_c, Some(vec![22])); + let overlay_c = test.update().0; + + // Now ensure that the trie root is accurate for overlay_c. + assert_eq!( + overlay_c.root().into_inner(), + expected_root(vec![(key_c, vec![22]), (key_d, vec![44]),]), + ) +}