Skip to main content

wasmtime_environ/
string_pool.rs

1//! Simple string interning.
2
3use crate::{error::OutOfMemory, prelude::*};
4use core::{fmt, mem, num::NonZeroU32};
5
6/// An interned string associated with a particular string in a `StringPool`.
7///
8/// Allows for $O(1)$ equality tests, $O(1)$ hashing, and $O(1)$
9/// arbitrary-but-stable ordering.
10#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
11pub struct Atom {
12    index: NonZeroU32,
13}
14
15/// A pool of interned strings.
16///
17/// Insert new strings with [`StringPool::insert`] to get an `Atom` that is
18/// unique per string within the context of the associated pool.
19///
20/// Once you have interned a string into the pool and have its `Atom`, you can
21/// get the interned string slice via `&pool[atom]` or `pool.get(atom)`.
22///
23/// In general, there are no correctness protections against indexing into a
24/// different `StringPool` from the one that the `Atom` was not allocated
25/// inside. Doing so is memory safe but may panic or otherwise return incorrect
26/// results.
27#[derive(Default)]
28pub struct StringPool {
29    /// A map from each string in this pool (as an unsafe borrow from
30    /// `self.strings`) to its `Atom`.
31    map: mem::ManuallyDrop<TryHashMap<&'static str, Atom>>,
32
33    /// Strings in this pool. These must never be mutated or reallocated once
34    /// inserted.
35    strings: mem::ManuallyDrop<TryVec<Box<str>>>,
36}
37
38impl Drop for StringPool {
39    fn drop(&mut self) {
40        // Ensure that `self.map` is dropped before `self.strings`, since
41        // `self.map` borrows from `self.strings`.
42        //
43        // Safety: Neither field will be used again.
44        unsafe {
45            mem::ManuallyDrop::drop(&mut self.map);
46            mem::ManuallyDrop::drop(&mut self.strings);
47        }
48    }
49}
50
51impl fmt::Debug for StringPool {
52    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
53        struct Strings<'a>(&'a StringPool);
54        impl fmt::Debug for Strings<'_> {
55            fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
56                f.debug_map()
57                    .entries(
58                        self.0
59                            .strings
60                            .iter()
61                            .enumerate()
62                            .map(|(i, s)| (Atom::new(i), s)),
63                    )
64                    .finish()
65            }
66        }
67
68        f.debug_struct("StringPool")
69            .field("strings", &Strings(self))
70            .finish()
71    }
72}
73
74impl TryClone for StringPool {
75    fn try_clone(&self) -> Result<Self, OutOfMemory> {
76        Ok(StringPool {
77            map: self.map.try_clone()?,
78            strings: self.strings.try_clone()?,
79        })
80    }
81}
82
83impl TryClone for Atom {
84    fn try_clone(&self) -> Result<Self, OutOfMemory> {
85        Ok(*self)
86    }
87}
88
89impl core::ops::Index<Atom> for StringPool {
90    type Output = str;
91
92    #[inline]
93    #[track_caller]
94    fn index(&self, atom: Atom) -> &Self::Output {
95        self.get(atom).unwrap()
96    }
97}
98
99// For convenience, to avoid `*atom` noise at call sites.
100impl core::ops::Index<&'_ Atom> for StringPool {
101    type Output = str;
102
103    #[inline]
104    #[track_caller]
105    fn index(&self, atom: &Atom) -> &Self::Output {
106        self.get(*atom).unwrap()
107    }
108}
109
110impl serde::ser::Serialize for StringPool {
111    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
112    where
113        S: serde::Serializer,
114    {
115        serde::ser::Serialize::serialize(&*self.strings, serializer)
116    }
117}
118
119impl<'de> serde::de::Deserialize<'de> for StringPool {
120    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
121    where
122        D: serde::Deserializer<'de>,
123    {
124        struct Visitor;
125        impl<'de> serde::de::Visitor<'de> for Visitor {
126            type Value = StringPool;
127
128            fn expecting(&self, f: &mut fmt::Formatter) -> fmt::Result {
129                f.write_str("a `StringPool` sequence of strings")
130            }
131
132            fn visit_seq<A>(self, mut seq: A) -> Result<Self::Value, A::Error>
133            where
134                A: serde::de::SeqAccess<'de>,
135            {
136                use serde::de::Error as _;
137
138                let mut pool = StringPool::new();
139
140                if let Some(len) = seq.size_hint() {
141                    pool.map.reserve(len).map_err(|oom| A::Error::custom(oom))?;
142                    pool.strings
143                        .reserve(len)
144                        .map_err(|oom| A::Error::custom(oom))?;
145                }
146
147                while let Some(s) = seq.next_element::<TryString>()? {
148                    debug_assert_eq!(s.len(), s.capacity());
149                    let s = s.into_boxed_str().map_err(|oom| A::Error::custom(oom))?;
150                    if !pool.map.contains_key(&*s) {
151                        pool.insert_new_boxed_str(s)
152                            .map_err(|oom| A::Error::custom(oom))?;
153                    }
154                }
155
156                Ok(pool)
157            }
158        }
159        deserializer.deserialize_seq(Visitor)
160    }
161}
162
163impl StringPool {
164    /// Create a new, empty pool.
165    pub fn new() -> Self {
166        Self::default()
167    }
168
169    /// Insert a new string into this pool.
170    pub fn insert(&mut self, s: &str) -> Result<Atom, OutOfMemory> {
171        if let Some(atom) = self.map.get(s) {
172            return Ok(*atom);
173        }
174
175        self.map.reserve(1)?;
176        self.strings.reserve(1)?;
177
178        let mut owned = TryString::new();
179        owned.reserve_exact(s.len())?;
180        owned.push_str(s).expect("reserved capacity");
181        let owned = owned
182            .into_boxed_str()
183            .expect("reserved exact capacity, so shouldn't need to realloc");
184
185        self.insert_new_boxed_str(owned)
186    }
187
188    fn insert_new_boxed_str(&mut self, owned: Box<str>) -> Result<Atom, OutOfMemory> {
189        debug_assert!(!self.map.contains_key(&*owned));
190
191        let index = self.strings.len();
192        let atom = Atom::new(index);
193        self.strings.push(owned)?;
194
195        // SAFETY: We never expose this borrow and never mutate or reallocate
196        // strings once inserted into the pool.
197        let s = unsafe { mem::transmute::<&str, &'static str>(&self.strings[index]) };
198
199        let old = self.map.insert(s, atom)?;
200        debug_assert!(old.is_none());
201
202        Ok(atom)
203    }
204
205    /// Get the `Atom` for the given string, if it has already been inserted
206    /// into this pool.
207    pub fn get_atom(&self, s: &str) -> Option<Atom> {
208        self.map.get(s).copied()
209    }
210
211    /// Does this pool contain the given `atom`?
212    #[inline]
213    pub fn contains(&self, atom: Atom) -> bool {
214        atom.index() < self.strings.len()
215    }
216
217    /// Get the string associated with the given `atom`, if the pool contains
218    /// the atom.
219    #[inline]
220    pub fn get(&self, atom: Atom) -> Option<&str> {
221        if self.contains(atom) {
222            Some(&self.strings[atom.index()])
223        } else {
224            None
225        }
226    }
227
228    /// Get the number of strings in this pool.
229    pub fn len(&self) -> usize {
230        self.strings.len()
231    }
232}
233
234impl Default for Atom {
235    #[inline]
236    fn default() -> Self {
237        Self {
238            index: NonZeroU32::MAX,
239        }
240    }
241}
242
243impl fmt::Debug for Atom {
244    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
245        f.debug_struct("Atom")
246            .field("index", &self.index())
247            .finish()
248    }
249}
250
251// Allow using `Atom` in `SecondaryMap`s.
252impl crate::EntityRef for Atom {
253    fn new(index: usize) -> Self {
254        Atom::new(index)
255    }
256
257    fn index(self) -> usize {
258        Atom::index(&self)
259    }
260}
261
262impl serde::ser::Serialize for Atom {
263    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
264    where
265        S: serde::Serializer,
266    {
267        serde::ser::Serialize::serialize(&self.index, serializer)
268    }
269}
270
271impl<'de> serde::de::Deserialize<'de> for Atom {
272    fn deserialize<D>(deserializer: D) -> core::result::Result<Self, D::Error>
273    where
274        D: serde::Deserializer<'de>,
275    {
276        let index = serde::de::Deserialize::deserialize(deserializer)?;
277        Ok(Self { index })
278    }
279}
280
281impl Atom {
282    fn new(index: usize) -> Self {
283        assert!(index < usize::try_from(u32::MAX).unwrap());
284        let index = u32::try_from(index).unwrap();
285        let index = NonZeroU32::new(index + 1).unwrap();
286        Self { index }
287    }
288
289    /// Get this atom's index in its pool.
290    pub fn index(&self) -> usize {
291        let index = self.index.get() - 1;
292        usize::try_from(index).unwrap()
293    }
294}
295
296#[cfg(test)]
297mod tests {
298    use super::*;
299
300    #[test]
301    fn basic() -> Result<()> {
302        let mut pool = StringPool::new();
303
304        let a = pool.insert("a")?;
305        assert_eq!(&pool[a], "a");
306        assert_eq!(pool.get_atom("a"), Some(a));
307
308        let a2 = pool.insert("a")?;
309        assert_eq!(a, a2);
310        assert_eq!(&pool[a2], "a");
311
312        let b = pool.insert("b")?;
313        assert_eq!(&pool[b], "b");
314        assert_ne!(a, b);
315        assert_eq!(pool.get_atom("b"), Some(b));
316
317        assert!(pool.get_atom("zzz").is_none());
318
319        let mut pool2 = StringPool::new();
320        let c = pool2.insert("c")?;
321        assert_eq!(&pool2[c], "c");
322        assert_eq!(a, c);
323        assert_eq!(&pool2[a], "c");
324        assert!(!pool2.contains(b));
325        assert!(pool2.get(b).is_none());
326
327        Ok(())
328    }
329
330    #[test]
331    fn stress() -> Result<()> {
332        let mut pool = StringPool::new();
333
334        let n = if cfg!(miri) { 100 } else { 10_000 };
335
336        for _ in 0..2 {
337            let atoms: TryVec<_> = (0..n).map(|i| pool.insert(&i.to_string())).try_collect()?;
338
339            for atom in atoms {
340                assert!(pool.contains(atom));
341                assert_eq!(&pool[atom], atom.index().to_string());
342            }
343        }
344
345        Ok(())
346    }
347
348    #[test]
349    fn roundtrip_serialize_deserialize() -> Result<()> {
350        let mut pool = StringPool::new();
351        let a = pool.insert("a")?;
352        let b = pool.insert("b")?;
353        let c = pool.insert("c")?;
354
355        let bytes = postcard::to_allocvec(&(pool, a, b, c))?;
356        let (pool, a2, b2, c2) = postcard::from_bytes::<(StringPool, Atom, Atom, Atom)>(&bytes)?;
357
358        assert_eq!(&pool[a], "a");
359        assert_eq!(&pool[b], "b");
360        assert_eq!(&pool[c], "c");
361
362        assert_eq!(&pool[a2], "a");
363        assert_eq!(&pool[b2], "b");
364        assert_eq!(&pool[c2], "c");
365
366        Ok(())
367    }
368}