cranelift_codegen/
ctxhash.rs

1//! A hashmap with "external hashing": nodes are hashed or compared for
2//! equality only with some external context provided on lookup/insert.
3//! This allows very memory-efficient data structures where
4//! node-internal data references some other storage (e.g., offsets into
5//! an array or pool of shared data).
6
7use hashbrown::hash_table::HashTable;
8use std::hash::{Hash, Hasher};
9
10/// Trait that allows for equality comparison given some external
11/// context.
12///
13/// Note that this trait is implemented by the *context*, rather than
14/// the item type, for somewhat complex lifetime reasons (lack of GATs
15/// to allow `for<'ctx> Ctx<'ctx>`-like associated types in traits on
16/// the value type).
17pub trait CtxEq<V1: ?Sized, V2: ?Sized> {
18    /// Determine whether `a` and `b` are equal, given the context in
19    /// `self` and the union-find data structure `uf`.
20    fn ctx_eq(&self, a: &V1, b: &V2) -> bool;
21}
22
23/// Trait that allows for hashing given some external context.
24pub trait CtxHash<Value: ?Sized>: CtxEq<Value, Value> {
25    /// Compute the hash of `value`, given the context in `self` and
26    /// the union-find data structure `uf`.
27    fn ctx_hash<H: Hasher>(&self, state: &mut H, value: &Value);
28}
29
30/// A null-comparator context type for underlying value types that
31/// already have `Eq` and `Hash`.
32#[derive(Default)]
33pub struct NullCtx;
34
35impl<V: Eq + Hash> CtxEq<V, V> for NullCtx {
36    fn ctx_eq(&self, a: &V, b: &V) -> bool {
37        a.eq(b)
38    }
39}
40impl<V: Eq + Hash> CtxHash<V> for NullCtx {
41    fn ctx_hash<H: Hasher>(&self, state: &mut H, value: &V) {
42        value.hash(state);
43    }
44}
45
46/// A bucket in the hash table.
47///
48/// Some performance-related design notes: we cache the hashcode for
49/// speed, as this often buys a few percent speed in
50/// interning-table-heavy workloads. We only keep the low 32 bits of
51/// the hashcode, for memory efficiency: in common use, `K` and `V`
52/// are often 32 bits also, and a 12-byte bucket is measurably better
53/// than a 16-byte bucket.
54struct BucketData<K, V> {
55    hash: u32,
56    k: K,
57    v: V,
58}
59
60/// A HashMap that takes external context for all operations.
61pub struct CtxHashMap<K, V> {
62    raw: HashTable<BucketData<K, V>>,
63}
64
65impl<K, V> CtxHashMap<K, V> {
66    /// Create an empty hashmap with pre-allocated space for the given
67    /// capacity.
68    pub fn with_capacity(capacity: usize) -> Self {
69        Self {
70            raw: HashTable::with_capacity(capacity),
71        }
72    }
73}
74
75fn compute_hash<Ctx, K>(ctx: &Ctx, k: &K) -> u32
76where
77    Ctx: CtxHash<K>,
78{
79    let mut hasher = rustc_hash::FxHasher::default();
80    ctx.ctx_hash(&mut hasher, k);
81    hasher.finish() as u32
82}
83
84impl<K, V> CtxHashMap<K, V> {
85    /// Insert a new key-value pair, returning the old value associated
86    /// with this key (if any).
87    pub fn insert<Ctx>(&mut self, k: K, v: V, ctx: &Ctx) -> Option<V>
88    where
89        Ctx: CtxEq<K, K> + CtxHash<K>,
90    {
91        let hash = compute_hash(ctx, &k);
92        match self.raw.find_mut(hash as u64, |bucket| {
93            hash == bucket.hash && ctx.ctx_eq(&bucket.k, &k)
94        }) {
95            Some(bucket) => Some(std::mem::replace(&mut bucket.v, v)),
96            None => {
97                let data = BucketData { hash, k, v };
98                self.raw
99                    .insert_unique(hash as u64, data, |bucket| bucket.hash as u64);
100                None
101            }
102        }
103    }
104
105    /// Look up a key, returning a borrow of the value if present.
106    pub fn get<'a, Q, Ctx>(&'a self, k: &Q, ctx: &Ctx) -> Option<&'a V>
107    where
108        Ctx: CtxEq<K, Q> + CtxHash<Q> + CtxHash<K>,
109    {
110        let hash = compute_hash(ctx, k);
111        self.raw
112            .find(hash as u64, |bucket| {
113                hash == bucket.hash && ctx.ctx_eq(&bucket.k, k)
114            })
115            .map(|bucket| &bucket.v)
116    }
117}
118
119#[cfg(test)]
120mod test {
121    use super::*;
122
123    #[derive(Clone, Copy, Debug)]
124    struct Key {
125        index: u32,
126    }
127    struct Ctx {
128        vals: &'static [&'static str],
129    }
130    impl CtxEq<Key, Key> for Ctx {
131        fn ctx_eq(&self, a: &Key, b: &Key) -> bool {
132            self.vals[a.index as usize].eq(self.vals[b.index as usize])
133        }
134    }
135    impl CtxHash<Key> for Ctx {
136        fn ctx_hash<H: Hasher>(&self, state: &mut H, value: &Key) {
137            self.vals[value.index as usize].hash(state);
138        }
139    }
140
141    #[test]
142    fn test_basic() {
143        let ctx = Ctx {
144            vals: &["a", "b", "a"],
145        };
146
147        let k0 = Key { index: 0 };
148        let k1 = Key { index: 1 };
149        let k2 = Key { index: 2 };
150
151        assert!(ctx.ctx_eq(&k0, &k2));
152        assert!(!ctx.ctx_eq(&k0, &k1));
153        assert!(!ctx.ctx_eq(&k2, &k1));
154
155        let mut map: CtxHashMap<Key, u64> = CtxHashMap::with_capacity(4);
156        assert_eq!(map.insert(k0, 42, &ctx), None);
157        assert_eq!(map.insert(k2, 84, &ctx), Some(42));
158        assert_eq!(map.get(&k1, &ctx), None);
159        assert_eq!(*map.get(&k0, &ctx).unwrap(), 84);
160    }
161}