cranelift_codegen/
ctxhash.rs

1//! A hashmap with "external hashing": nodes are hashed or compared for
2//! equality only with some external context provided on lookup/insert.
3//! This allows very memory-efficient data structures where
4//! node-internal data references some other storage (e.g., offsets into
5//! an array or pool of shared data).
6
7use hashbrown::hash_table::HashTable;
8use std::hash::{Hash, Hasher};
9
10/// Trait that allows for equality comparison given some external
11/// context.
12///
13/// Note that this trait is implemented by the *context*, rather than
14/// the item type, for somewhat complex lifetime reasons (lack of GATs
15/// to allow `for<'ctx> Ctx<'ctx>`-like associated types in traits on
16/// the value type).
17pub trait CtxEq<V1: ?Sized, V2: ?Sized> {
18    /// Determine whether `a` and `b` are equal, given the context in
19    /// `self` and the union-find data structure `uf`.
20    fn ctx_eq(&self, a: &V1, b: &V2) -> bool;
21}
22
23/// Trait that allows for hashing given some external context.
24pub trait CtxHash<Value: ?Sized>: CtxEq<Value, Value> {
25    /// Compute the hash of `value`, given the context in `self` and
26    /// the union-find data structure `uf`.
27    fn ctx_hash<H: Hasher>(&self, state: &mut H, value: &Value);
28}
29
30/// A null-comparator context type for underlying value types that
31/// already have `Eq` and `Hash`.
32#[derive(Default)]
33pub struct NullCtx;
34
35impl<V: Eq + Hash> CtxEq<V, V> for NullCtx {
36    fn ctx_eq(&self, a: &V, b: &V) -> bool {
37        a.eq(b)
38    }
39}
40impl<V: Eq + Hash> CtxHash<V> for NullCtx {
41    fn ctx_hash<H: Hasher>(&self, state: &mut H, value: &V) {
42        value.hash(state);
43    }
44}
45
46/// A bucket in the hash table.
47///
48/// Some performance-related design notes: we cache the hashcode for
49/// speed, as this often buys a few percent speed in
50/// interning-table-heavy workloads. We only keep the low 32 bits of
51/// the hashcode, for memory efficiency: in common use, `K` and `V`
52/// are often 32 bits also, and a 12-byte bucket is measurably better
53/// than a 16-byte bucket.
54struct BucketData<K, V> {
55    hash: u32,
56    k: K,
57    v: V,
58}
59
60/// A HashMap that takes external context for all operations.
61pub struct CtxHashMap<K, V> {
62    raw: HashTable<BucketData<K, V>>,
63}
64
65impl<K, V> CtxHashMap<K, V> {
66    /// Create an empty hashmap with pre-allocated space for the given
67    /// capacity.
68    pub fn with_capacity(capacity: usize) -> Self {
69        Self {
70            raw: HashTable::with_capacity(capacity),
71        }
72    }
73}
74
75fn compute_hash<Ctx, K>(ctx: &Ctx, k: &K) -> u32
76where
77    Ctx: CtxHash<K>,
78{
79    let mut hasher = rustc_hash::FxHasher::default();
80    ctx.ctx_hash(&mut hasher, k);
81    hasher.finish() as u32
82}
83
84impl<K, V> CtxHashMap<K, V> {
85    /// Insert a new key-value pair, returning the old value associated
86    /// with this key (if any).
87    pub fn insert<Ctx>(&mut self, k: K, v: V, ctx: &Ctx) -> Option<V>
88    where
89        Ctx: CtxEq<K, K> + CtxHash<K>,
90    {
91        let hash = compute_hash(ctx, &k);
92        match self.raw.find_mut(hash as u64, |bucket| {
93            hash == bucket.hash && ctx.ctx_eq(&bucket.k, &k)
94        }) {
95            Some(bucket) => Some(std::mem::replace(&mut bucket.v, v)),
96            None => {
97                let data = BucketData { hash, k, v };
98                self.raw
99                    .insert_unique(hash as u64, data, |bucket| bucket.hash as u64);
100                None
101            }
102        }
103    }
104
105    /// Look up a key, returning a borrow of the value if present.
106    pub fn get<'a, Q, Ctx>(&'a self, k: &Q, ctx: &Ctx) -> Option<&'a V>
107    where
108        Ctx: CtxEq<K, Q> + CtxHash<Q> + CtxHash<K>,
109    {
110        let hash = compute_hash(ctx, k);
111        self.raw
112            .find(hash as u64, |bucket| {
113                hash == bucket.hash && ctx.ctx_eq(&bucket.k, k)
114            })
115            .map(|bucket| &bucket.v)
116    }
117
118    /// Look up a key, returning an `Entry` that refers to an existing
119    /// value or allows inserting a new one.
120    pub fn entry<'a, Ctx>(&'a mut self, k: K, ctx: &Ctx) -> Entry<'a, K, V>
121    where
122        Ctx: CtxEq<K, K> + CtxHash<K>,
123    {
124        let hash = compute_hash(ctx, &k);
125        let raw = self.raw.entry(
126            hash as u64,
127            |bucket| hash == bucket.hash && ctx.ctx_eq(&bucket.k, &k),
128            |bucket| compute_hash(ctx, &bucket.k) as u64,
129        );
130        match raw {
131            hashbrown::hash_table::Entry::Occupied(o) => Entry::Occupied(OccupiedEntry { raw: o }),
132            hashbrown::hash_table::Entry::Vacant(v) => Entry::Vacant(VacantEntry {
133                hash,
134                key: k,
135                raw: v,
136            }),
137        }
138    }
139}
140
141/// A reference to an existing or vacant entry in the hash table.
142pub enum Entry<'a, K, V> {
143    Occupied(OccupiedEntry<'a, K, V>),
144    Vacant(VacantEntry<'a, K, V>),
145}
146
147/// A reference to an occupied entry in the hash table.
148pub struct OccupiedEntry<'a, K, V> {
149    raw: hashbrown::hash_table::OccupiedEntry<'a, BucketData<K, V>>,
150}
151
152/// A reference to a vacant entry in the hash table.
153pub struct VacantEntry<'a, K, V> {
154    hash: u32,
155    key: K,
156    raw: hashbrown::hash_table::VacantEntry<'a, BucketData<K, V>>,
157}
158
159impl<'a, K, V> OccupiedEntry<'a, K, V> {
160    /// Get the existing value.
161    pub fn get(&self) -> &V {
162        &self.raw.get().v
163    }
164
165    /// Get the existing value, mutably.
166    pub fn get_mut(&mut self) -> &mut V {
167        &mut self.raw.get_mut().v
168    }
169}
170
171impl<'a, K, V> VacantEntry<'a, K, V> {
172    /// Insert a new value.
173    pub fn insert(self, v: V) {
174        self.raw.insert(BucketData {
175            hash: self.hash,
176            k: self.key,
177            v,
178        });
179    }
180}
181
182#[cfg(test)]
183mod test {
184    use super::*;
185
186    #[derive(Clone, Copy, Debug)]
187    struct Key {
188        index: u32,
189    }
190    struct Ctx {
191        vals: &'static [&'static str],
192    }
193    impl CtxEq<Key, Key> for Ctx {
194        fn ctx_eq(&self, a: &Key, b: &Key) -> bool {
195            self.vals[a.index as usize].eq(self.vals[b.index as usize])
196        }
197    }
198    impl CtxHash<Key> for Ctx {
199        fn ctx_hash<H: Hasher>(&self, state: &mut H, value: &Key) {
200            self.vals[value.index as usize].hash(state);
201        }
202    }
203
204    #[test]
205    fn test_basic() {
206        let ctx = Ctx {
207            vals: &["a", "b", "a"],
208        };
209
210        let k0 = Key { index: 0 };
211        let k1 = Key { index: 1 };
212        let k2 = Key { index: 2 };
213
214        assert!(ctx.ctx_eq(&k0, &k2));
215        assert!(!ctx.ctx_eq(&k0, &k1));
216        assert!(!ctx.ctx_eq(&k2, &k1));
217
218        let mut map: CtxHashMap<Key, u64> = CtxHashMap::with_capacity(4);
219        assert_eq!(map.insert(k0, 42, &ctx), None);
220        assert_eq!(map.insert(k2, 84, &ctx), Some(42));
221        assert_eq!(map.get(&k1, &ctx), None);
222        assert_eq!(*map.get(&k0, &ctx).unwrap(), 84);
223    }
224}