microsoft · lhecker · Feb 14, 2026 · Feb 13, 2026 · lhecker · Feb 13, 2026
diff --git a/crates/edit/benches/lib.rs b/crates/edit/benches/lib.rs
@@ -10,6 +10,7 @@ use edit::helpers::*;
 use edit::{buffer, glob, hash, json, oklab, simd, unicode};
 use stdext::arena::{self, scratch_arena};
 use stdext::collections::BVec;
+use stdext::unicode::Utf8Chars;
 
 struct EditingTracePatch<'a>(usize, usize, &'a str);
 
@@ -272,9 +273,7 @@ fn bench_unicode(c: &mut Criterion) {
     c.benchmark_group("unicode::Utf8Chars")
         .throughput(Throughput::Bytes(bytes.len() as u64))
         .bench_function("next", |b| {
-            b.iter(|| {
-                unicode::Utf8Chars::new(bytes, 0).fold(0u32, |acc, ch| acc.wrapping_add(ch as u32))
-            })
+            b.iter(|| Utf8Chars::new(bytes, 0).fold(0u32, |acc, ch| acc.wrapping_add(ch as u32)))
         });
 }
 

diff --git a/crates/edit/src/buffer/mod.rs b/crates/edit/src/buffer/mod.rs
@@ -36,6 +36,7 @@ use std::str;
 pub use gap_buffer::GapBuffer;
 use stdext::arena::{Arena, scratch_arena};
 use stdext::collections::{BString, BVec};
+use stdext::unicode::Utf8Chars;
 use stdext::{ReplaceRange as _, arena_write_fmt, minmax, slice_as_uninit_mut, slice_copy_safe};
 
 use crate::cell::SemiRefCell;
@@ -45,7 +46,7 @@ use crate::framebuffer::{Framebuffer, IndexedColor};
 use crate::helpers::*;
 use crate::oklab::StraightRgba;
 use crate::simd::memchr2;
-use crate::unicode::{self, Cursor, MeasurementConfig, Utf8Chars};
+use crate::unicode::{self, Cursor, MeasurementConfig};
 use crate::{icu, simd};
 
 /// The margin template is used for line numbers.

diff --git a/crates/edit/src/icu.rs b/crates/edit/src/icu.rs
@@ -13,10 +13,10 @@ use std::{fmt, mem};
 use stdext::arena::{Arena, scratch_arena};
 use stdext::arena_format;
 use stdext::collections::{BString, BVec};
+use stdext::unicode::Utf8Chars;
 
 use crate::buffer::TextBuffer;
 use crate::sys;
-use crate::unicode::Utf8Chars;
 
 pub(crate) const ILLEGAL_ARGUMENT_ERROR: Error = Error(1); // U_ILLEGAL_ARGUMENT_ERROR
 pub const ICU_MISSING_ERROR: Error = Error(0);

diff --git a/crates/edit/src/unicode/measurement.rs b/crates/edit/src/unicode/measurement.rs
@@ -2,8 +2,8 @@
 // Licensed under the MIT License.
 
 use stdext::cold_path;
+use stdext::unicode::Utf8Chars;
 
-use super::Utf8Chars;
 use super::tables::*;
 use crate::document::ReadableDocument;
 use crate::helpers::{CoordType, Point};

diff --git a/crates/edit/src/unicode/mod.rs b/crates/edit/src/unicode/mod.rs
@@ -5,7 +5,5 @@
 
 mod measurement;
 mod tables;
-mod utf8;
 
 pub use measurement::*;
-pub use utf8::*;
diff --git a/crates/edit/src/vt.rs b/crates/edit/src/vt.rs
@@ -5,8 +5,9 @@
 
 use std::time;
 
+use stdext::unicode::Utf8Chars;
+
 use crate::simd::memchr2;
-use crate::unicode::Utf8Chars;
 
 /// The parser produces these tokens.
 pub enum Token<'parser, 'input> {

diff --git a/crates/stdext/src/collections/string.rs b/crates/stdext/src/collections/string.rs
@@ -112,12 +112,6 @@ impl<'a> BString<'a> {
         self.vec.is_empty()
     }
 
-    /// True if if the buffer is full.
-    #[inline]
-    pub fn is_full(&self) -> bool {
-        self.vec.is_full()
-    }
-
     /// The raw UTF-8 bytes.
     #[inline]
     pub fn as_bytes(&self) -> &[u8] {

diff --git a/crates/stdext/src/collections/vec.rs b/crates/stdext/src/collections/vec.rs
@@ -1,6 +1,7 @@
 // Copyright (c) Microsoft Corporation.
 // Licensed under the MIT License.
 
+use std::hint::assert_unchecked;
 use std::iter::FusedIterator;
 use std::marker::PhantomData;
 use std::mem::MaybeUninit;
@@ -109,12 +110,6 @@ impl<'a, T> BVec<'a, T> {
         self.len == 0
     }
 
-    /// True if if the buffer is full.
-    #[inline]
-    pub fn is_full(&self) -> bool {
-        self.len == self.cap
-    }
-
     /// Forcibly sets the length.
     ///
     /// # Safety
@@ -186,28 +181,53 @@ impl<'a, T> BVec<'a, T> {
     /// Ensures space for at least `additional` more elements, with amortized growth.
     #[inline]
     pub fn reserve(&mut self, alloc: &'a dyn Allocator, additional: usize) {
-        if additional > self.cap - self.len {
+        let len = self.len;
+        let cap = self.cap;
+        if additional > cap - len {
             self.grow(alloc, self.cap, additional);
         }
+        unsafe {
+            // Right now the following asserts are somewhat useless, because they only work
+            // if grow() is inline(never). I don't know why that is either. But I'm leaving
+            // them here, in case we need them in the future - they don't hurt until then.
+            // First, we can tell the compiler that re-fetching self.len after grow() is unnecessary.
+            assert_unchecked(self.len == len);
+            // Next, we can assert that after reserve(4), we have room for 4 more elements.
+            // Naively you'd expect this to be `self.len + additional <= self.cap`, but LLVM doesn't
+            // work very well with `<=` bounds, so we use `<` here. It _must_ be `additional - 1`.
+            assert_unchecked(additional == 0 || self.len.unchecked_add(additional - 1) < self.cap);
+        }
     }
 
     /// Ensures space for at least `additional` more elements, without over-allocating.
     #[inline]
     pub fn reserve_exact(&mut self, alloc: &'a dyn Allocator, additional: usize) {
-        if additional > self.cap - self.len {
+        let len = self.len;
+        let cap = self.cap;
+        if additional > cap - len {
             self.grow(alloc, 0, additional);
         }
+        unsafe {
+            // See reserve().
+            assert_unchecked(self.len == len);
+            assert_unchecked(additional == 0 || self.len.unchecked_add(additional - 1) < self.cap);
+        }
     }
 
     #[inline]
     fn reserve_one(&mut self, alloc: &'a dyn Allocator) {
-        if self.is_full() {
-            self.grow(alloc, self.cap, 1);
+        let len = self.len;
+        let cap = self.cap;
+        if len >= cap {
+            self.grow(alloc, cap, 1);
+        }
+        unsafe {
+            // See reserve().
+            assert_unchecked(self.len == len);
+            assert_unchecked(self.len < self.cap);
         }
     }
 
-    // NOTE: I'm using dyn(amic dispatch) to avoid monomorphization bloat and more
-    // importantly because I counter-intuitively found it to boost performance by +20%.
     #[cold]
     fn grow(&mut self, alloc: &'a dyn Allocator, cap: usize, add: usize) {
         debug_assert!(add > 0, "growing by zero makes no sense");
@@ -247,6 +267,22 @@ impl<'a, T> BVec<'a, T> {
         }
     }
 
+    pub fn pop(&mut self) -> Option<T> {
+        if self.is_empty() {
+            return None;
+        }
+        unsafe {
+            self.len -= 1;
+
+            // See: https://github.com/rust-lang/rust/issues/114334
+            // This assert helps the optimizer understand that
+            // after a pop it can push once without reallocating.
+            assert_unchecked(self.len < self.cap);
+
+            Some(self.as_ptr().add(self.len).read())
+        }
+    }
+
     /// Append the items from the iterator `iter`.
     ///
     /// By assuming that your "exact size iterator" returns an *exact* size,
@@ -400,6 +436,7 @@ impl<'a, T: Copy> BVec<'a, T> {
     }
 }
 
+#[cfg(windows)]
 unsafe extern "system" {
     fn MultiByteToWideChar(
         CodePage: u32,
@@ -413,8 +450,11 @@ unsafe extern "system" {
 
 impl<'a> BVec<'a, u16> {
     pub fn push_encode_utf16(&mut self, alloc: &'a dyn Allocator, utf8: &[u8]) {
+        self.reserve(alloc, utf8.len()); // worst case ASCII: 1 byte per char
+
+        // MultiByteToWideChar is ~2x faster than the UTF8 loop below and saves space.
+        #[cfg(windows)]
         unsafe {
-            self.reserve(alloc, utf8.len()); // worst case ASCII: 1 byte per char
             let dst = self.spare_mut_ptr() as *mut u16;
             let len = MultiByteToWideChar(
                 65001,
@@ -426,6 +466,26 @@ impl<'a> BVec<'a, u16> {
             );
             self.len += len.max(0) as usize;
         }
+
+        #[cfg(not(windows))]
+        unsafe {
+            let beg = self.spare_mut_ptr();
+            let mut dst = beg;
+
+            for ch in crate::unicode::Utf8Chars::new(utf8, 0) {
+                if ch <= '\u{FFFF}' {
+                    (*dst).write(ch as u16);
+                    dst = dst.add(1);
+                } else {
+                    let ch = ch as u32 - 0x10000;
+                    (*dst.add(0)).write(0xD800 | ((ch >> 10) as u16));
+                    (*dst.add(1)).write(0xDC00 | ((ch as u16) & 0x3FF));
+                    dst = dst.add(2);
+                }
+            }
+
+            self.len += dst.offset_from_unsigned(beg);
+        }
     }
 }
 

diff --git a/crates/stdext/src/lib.rs b/crates/stdext/src/lib.rs
@@ -9,5 +9,6 @@ pub mod collections;
 mod helpers;
 pub mod simd;
 pub mod sys;
+pub mod unicode;
 
 pub use helpers::*;
diff --git a/crates/stdext/src/unicode/mod.rs b/crates/stdext/src/unicode/mod.rs
@@ -0,0 +1,8 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT License.
+
+//! Everything related to Unicode lives here.
+
+mod utf8;
+
+pub use utf8::*;
diff --git a/crates/edit/src/unicode/utf8.rs → crates/stdext/src/unicode/utf8.rs b/crates/edit/src/unicode/utf8.rs → crates/stdext/src/unicode/utf8.rs