cranelift_codegen/alias_analysis.rs
1//! Alias analysis, consisting of a "last store" pass and a "memory
2//! values" pass. These two passes operate as one fused pass, and so
3//! are implemented together here.
4//!
5//! We partition memory state into several *disjoint pieces* of
6//! "abstract state". There are a finite number of such pieces:
7//! currently, we call them "heap", "table", "vmctx", and "other".Any
8//! given address in memory belongs to exactly one disjoint piece.
9//!
10//! One never tracks which piece a concrete address belongs to at
11//! runtime; this is a purely static concept. Instead, all
12//! memory-accessing instructions (loads and stores) are labeled with
13//! one of these four categories in the `MemFlags`. It is forbidden
14//! for a load or store to access memory under one category and a
15//! later load or store to access the same memory under a different
16//! category. This is ensured to be true by construction during
17//! frontend translation into CLIF and during legalization.
18//!
19//! Given that this non-aliasing property is ensured by the producer
20//! of CLIF, we can compute a *may-alias* property: one load or store
21//! may-alias another load or store if both access the same category
22//! of abstract state.
23//!
24//! The "last store" pass helps to compute this aliasing: it scans the
25//! code, finding at each program point the last instruction that
26//! *might have* written to a given part of abstract state.
27//!
28//! We can't say for sure that the "last store" *did* actually write
29//! that state, but we know for sure that no instruction *later* than
30//! it (up to the current instruction) did. However, we can get a
31//! must-alias property from this: if at a given load or store, we
32//! look backward to the "last store", *AND* we find that it has
33//! exactly the same address expression and type, then we know that
34//! the current instruction's access *must* be to the same memory
35//! location.
36//!
37//! To get this must-alias property, we compute a sparse table of
38//! "memory values": these are known equivalences between SSA `Value`s
39//! and particular locations in memory. The memory-values table is a
40//! mapping from (last store, address expression, type) to SSA
41//! value. At a store, we can insert into this table directly. At a
42//! load, we can also insert, if we don't already have a value (from
43//! the store that produced the load's value).
44//!
45//! Then we can do two optimizations at once given this table. If a
46//! load accesses a location identified by a (last store, address,
47//! type) key already in the table, we replace it with the SSA value
48//! for that memory location. This is usually known as "redundant load
49//! elimination" if the value came from an earlier load of the same
50//! location, or "store-to-load forwarding" if the value came from an
51//! earlier store to the same location.
52//!
53//! In theory we could also do *dead-store elimination*, where if a
54//! store overwrites a key in the table, *and* if no other load/store
55//! to the abstract state category occurred, *and* no other trapping
56//! instruction occurred (at which point we need an up-to-date memory
57//! state because post-trap-termination memory state can be observed),
58//! *and* we can prove the original store could not have trapped, then
59//! we can eliminate the original store. Because this is so complex,
60//! and the conditions for doing it correctly when post-trap state
61//! must be correct likely reduce the potential benefit, we don't yet
62//! do this.
63
64use crate::{
65 cursor::{Cursor, FuncCursor},
66 dominator_tree::DominatorTree,
67 inst_predicates::{
68 has_memory_fence_semantics, inst_addr_offset_type, inst_store_data, visit_block_succs,
69 },
70 ir::{immediates::Offset32, AliasRegion, Block, Function, Inst, Opcode, Type, Value},
71 trace,
72};
73use cranelift_entity::{packed_option::PackedOption, EntityRef};
74use rustc_hash::{FxHashMap, FxHashSet};
75
76/// For a given program point, the vector of last-store instruction
77/// indices for each disjoint category of abstract state.
78#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
79pub struct LastStores {
80 heap: PackedOption<Inst>,
81 table: PackedOption<Inst>,
82 vmctx: PackedOption<Inst>,
83 other: PackedOption<Inst>,
84}
85
86impl LastStores {
87 fn update(&mut self, func: &Function, inst: Inst) {
88 let opcode = func.dfg.insts[inst].opcode();
89 if has_memory_fence_semantics(opcode) {
90 self.heap = inst.into();
91 self.table = inst.into();
92 self.vmctx = inst.into();
93 self.other = inst.into();
94 } else if opcode.can_store() {
95 if let Some(memflags) = func.dfg.insts[inst].memflags() {
96 match memflags.alias_region() {
97 None => self.other = inst.into(),
98 Some(AliasRegion::Heap) => self.heap = inst.into(),
99 Some(AliasRegion::Table) => self.table = inst.into(),
100 Some(AliasRegion::Vmctx) => self.vmctx = inst.into(),
101 }
102 } else {
103 self.heap = inst.into();
104 self.table = inst.into();
105 self.vmctx = inst.into();
106 self.other = inst.into();
107 }
108 }
109 }
110
111 fn get_last_store(&self, func: &Function, inst: Inst) -> PackedOption<Inst> {
112 if let Some(memflags) = func.dfg.insts[inst].memflags() {
113 match memflags.alias_region() {
114 None => self.other,
115 Some(AliasRegion::Heap) => self.heap,
116 Some(AliasRegion::Table) => self.table,
117 Some(AliasRegion::Vmctx) => self.vmctx,
118 }
119 } else if func.dfg.insts[inst].opcode().can_load()
120 || func.dfg.insts[inst].opcode().can_store()
121 {
122 inst.into()
123 } else {
124 PackedOption::default()
125 }
126 }
127
128 fn meet_from(&mut self, other: &LastStores, loc: Inst) {
129 let meet = |a: PackedOption<Inst>, b: PackedOption<Inst>| -> PackedOption<Inst> {
130 match (a.into(), b.into()) {
131 (None, None) => None.into(),
132 (Some(a), None) => a,
133 (None, Some(b)) => b,
134 (Some(a), Some(b)) if a == b => a,
135 _ => loc.into(),
136 }
137 };
138
139 self.heap = meet(self.heap, other.heap);
140 self.table = meet(self.table, other.table);
141 self.vmctx = meet(self.vmctx, other.vmctx);
142 self.other = meet(self.other, other.other);
143 }
144}
145
146/// A key identifying a unique memory location.
147///
148/// For the result of a load to be equivalent to the result of another
149/// load, or the store data from a store, we need for (i) the
150/// "version" of memory (here ensured by having the same last store
151/// instruction to touch the disjoint category of abstract state we're
152/// accessing); (ii) the address must be the same (here ensured by
153/// having the same SSA value, which doesn't change after computed);
154/// (iii) the offset must be the same; and (iv) the accessed type and
155/// extension mode (e.g., 8-to-32, signed) must be the same.
156#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
157struct MemoryLoc {
158 last_store: PackedOption<Inst>,
159 address: Value,
160 offset: Offset32,
161 ty: Type,
162 /// We keep the *opcode* of the instruction that produced the
163 /// value we record at this key if the opcode is anything other
164 /// than an ordinary load or store. This is needed when we
165 /// consider loads that extend the value: e.g., an 8-to-32
166 /// sign-extending load will produce a 32-bit value from an 8-bit
167 /// value in memory, so we can only reuse that (as part of RLE)
168 /// for another load with the same extending opcode.
169 ///
170 /// We could improve the transform to insert explicit extend ops
171 /// in place of extending loads when we know the memory value, but
172 /// we haven't yet done this.
173 extending_opcode: Option<Opcode>,
174}
175
176/// An alias-analysis pass.
177pub struct AliasAnalysis<'a> {
178 /// The domtree for the function.
179 domtree: &'a DominatorTree,
180
181 /// Input state to a basic block.
182 block_input: FxHashMap<Block, LastStores>,
183
184 /// Known memory-value equivalences. This is the result of the
185 /// analysis. This is a mapping from (last store, address
186 /// expression, offset, type) to SSA `Value`.
187 ///
188 /// We keep the defining inst around for quick dominance checks.
189 mem_values: FxHashMap<MemoryLoc, (Inst, Value)>,
190}
191
192impl<'a> AliasAnalysis<'a> {
193 /// Perform an alias analysis pass.
194 pub fn new(func: &Function, domtree: &'a DominatorTree) -> AliasAnalysis<'a> {
195 trace!("alias analysis: input is:\n{:?}", func);
196 let mut analysis = AliasAnalysis {
197 domtree,
198 block_input: FxHashMap::default(),
199 mem_values: FxHashMap::default(),
200 };
201
202 analysis.compute_block_input_states(func);
203 analysis
204 }
205
206 fn compute_block_input_states(&mut self, func: &Function) {
207 let mut queue = vec![];
208 let mut queue_set = FxHashSet::default();
209 let entry = func.layout.entry_block().unwrap();
210 queue.push(entry);
211 queue_set.insert(entry);
212
213 while let Some(block) = queue.pop() {
214 queue_set.remove(&block);
215 let mut state = *self
216 .block_input
217 .entry(block)
218 .or_insert_with(|| LastStores::default());
219
220 trace!(
221 "alias analysis: input to block{} is {:?}",
222 block.index(),
223 state
224 );
225
226 for inst in func.layout.block_insts(block) {
227 state.update(func, inst);
228 trace!("after inst{}: state is {:?}", inst.index(), state);
229 }
230
231 visit_block_succs(func, block, |_inst, succ, _from_table| {
232 let succ_first_inst = func.layout.block_insts(succ).into_iter().next().unwrap();
233 let updated = match self.block_input.get_mut(&succ) {
234 Some(succ_state) => {
235 let old = *succ_state;
236 succ_state.meet_from(&state, succ_first_inst);
237 *succ_state != old
238 }
239 None => {
240 self.block_input.insert(succ, state);
241 true
242 }
243 };
244
245 if updated && queue_set.insert(succ) {
246 queue.push(succ);
247 }
248 });
249 }
250 }
251
252 /// Get the starting state for a block.
253 pub fn block_starting_state(&self, block: Block) -> LastStores {
254 self.block_input
255 .get(&block)
256 .cloned()
257 .unwrap_or_else(|| LastStores::default())
258 }
259
260 /// Process one instruction. Meant to be invoked in program order
261 /// within a block, and ideally in RPO or at least some domtree
262 /// preorder for maximal reuse.
263 ///
264 /// Returns `true` if instruction was removed.
265 pub fn process_inst(
266 &mut self,
267 func: &mut Function,
268 state: &mut LastStores,
269 inst: Inst,
270 ) -> Option<Value> {
271 trace!(
272 "alias analysis: scanning at inst{} with state {:?} ({:?})",
273 inst.index(),
274 state,
275 func.dfg.insts[inst],
276 );
277
278 let replacing_value = if let Some((address, offset, ty)) = inst_addr_offset_type(func, inst)
279 {
280 let address = func.dfg.resolve_aliases(address);
281 let opcode = func.dfg.insts[inst].opcode();
282
283 if opcode.can_store() {
284 let store_data = inst_store_data(func, inst).unwrap();
285 let store_data = func.dfg.resolve_aliases(store_data);
286 let mem_loc = MemoryLoc {
287 last_store: inst.into(),
288 address,
289 offset,
290 ty,
291 extending_opcode: get_ext_opcode(opcode),
292 };
293 trace!(
294 "alias analysis: at inst{}: store with data v{} at loc {:?}",
295 inst.index(),
296 store_data.index(),
297 mem_loc
298 );
299 self.mem_values.insert(mem_loc, (inst, store_data));
300
301 None
302 } else if opcode.can_load() {
303 let last_store = state.get_last_store(func, inst);
304 let load_result = func.dfg.inst_results(inst)[0];
305 let mem_loc = MemoryLoc {
306 last_store,
307 address,
308 offset,
309 ty,
310 extending_opcode: get_ext_opcode(opcode),
311 };
312 trace!(
313 "alias analysis: at inst{}: load with last_store inst{} at loc {:?}",
314 inst.index(),
315 last_store.map(|inst| inst.index()).unwrap_or(usize::MAX),
316 mem_loc
317 );
318
319 // Is there a Value already known to be stored
320 // at this specific memory location? If so,
321 // we can alias the load result to this
322 // already-known Value.
323 //
324 // Check if the definition dominates this
325 // location; it might not, if it comes from a
326 // load (stores will always dominate though if
327 // their `last_store` survives through
328 // meet-points to this use-site).
329 let aliased =
330 if let Some((def_inst, value)) = self.mem_values.get(&mem_loc).cloned() {
331 trace!(
332 " -> sees known value v{} from inst{}",
333 value.index(),
334 def_inst.index()
335 );
336 if self.domtree.dominates(def_inst, inst, &func.layout) {
337 trace!(
338 " -> dominates; value equiv from v{} to v{} inserted",
339 load_result.index(),
340 value.index()
341 );
342 Some(value)
343 } else {
344 None
345 }
346 } else {
347 None
348 };
349
350 // Otherwise, we can keep *this* load around
351 // as a new equivalent value.
352 if aliased.is_none() {
353 trace!(
354 " -> inserting load result v{} at loc {:?}",
355 load_result.index(),
356 mem_loc
357 );
358 self.mem_values.insert(mem_loc, (inst, load_result));
359 }
360
361 aliased
362 } else {
363 None
364 }
365 } else {
366 None
367 };
368
369 state.update(func, inst);
370
371 replacing_value
372 }
373
374 /// Make a pass and update known-redundant loads to aliased
375 /// values. We interleave the updates with the memory-location
376 /// tracking because resolving some aliases may expose others
377 /// (e.g. in cases of double-indirection with two separate chains
378 /// of loads).
379 pub fn compute_and_update_aliases(&mut self, func: &mut Function) {
380 let mut pos = FuncCursor::new(func);
381
382 while let Some(block) = pos.next_block() {
383 let mut state = self.block_starting_state(block);
384 while let Some(inst) = pos.next_inst() {
385 if let Some(replaced_result) = self.process_inst(pos.func, &mut state, inst) {
386 let result = pos.func.dfg.inst_results(inst)[0];
387 pos.func.dfg.clear_results(inst);
388 pos.func.dfg.change_to_alias(result, replaced_result);
389 pos.remove_inst_and_step_back();
390 }
391 }
392 }
393 }
394}
395
396fn get_ext_opcode(op: Opcode) -> Option<Opcode> {
397 debug_assert!(op.can_load() || op.can_store());
398 match op {
399 Opcode::Load | Opcode::Store => None,
400 _ => Some(op),
401 }
402}