cranelift_codegen/egraph.rs
1//! Support for egraphs represented in the DataFlowGraph.
2
3use crate::alias_analysis::{AliasAnalysis, LastStores};
4use crate::ctxhash::{CtxEq, CtxHash, CtxHashMap};
5use crate::cursor::{Cursor, CursorPosition, FuncCursor};
6use crate::dominator_tree::{DominatorTree, DominatorTreePreorder};
7use crate::egraph::elaborate::Elaborator;
8use crate::inst_predicates::{is_mergeable_for_egraph, is_pure_for_egraph};
9use crate::ir::pcc::Fact;
10use crate::ir::{
11 Block, DataFlowGraph, Function, Inst, InstructionData, Opcode, Type, Value, ValueDef,
12 ValueListPool,
13};
14use crate::loop_analysis::LoopAnalysis;
15use crate::opts::IsleContext;
16use crate::scoped_hash_map::{Entry as ScopedEntry, ScopedHashMap};
17use crate::settings::Flags;
18use crate::trace;
19use crate::unionfind::UnionFind;
20use core::cmp::Ordering;
21use cranelift_control::ControlPlane;
22use cranelift_entity::packed_option::ReservedValue;
23use cranelift_entity::SecondaryMap;
24use rustc_hash::FxHashSet;
25use smallvec::SmallVec;
26use std::hash::Hasher;
27
28mod cost;
29mod elaborate;
30
31/// Pass over a Function that does the whole aegraph thing.
32///
33/// - Removes non-skeleton nodes from the Layout.
34/// - Performs a GVN-and-rule-application pass over all Values
35/// reachable from the skeleton, potentially creating new Union
36/// nodes (i.e., an aegraph) so that some values have multiple
37/// representations.
38/// - Does "extraction" on the aegraph: selects the best value out of
39/// the tree-of-Union nodes for each used value.
40/// - Does "scoped elaboration" on the aegraph: chooses one or more
41/// locations for pure nodes to become instructions again in the
42/// layout, as forced by the skeleton.
43///
44/// At the beginning and end of this pass, the CLIF should be in a
45/// state that passes the verifier and, additionally, has no Union
46/// nodes. During the pass, Union nodes may exist, and instructions in
47/// the layout may refer to results of instructions that are not
48/// placed in the layout.
49pub struct EgraphPass<'a> {
50 /// The function we're operating on.
51 func: &'a mut Function,
52 /// Dominator tree for the CFG, used to visit blocks in pre-order
53 /// so we see value definitions before their uses, and also used for
54 /// O(1) dominance checks.
55 domtree: DominatorTreePreorder,
56 /// Alias analysis, used during optimization.
57 alias_analysis: &'a mut AliasAnalysis<'a>,
58 /// Loop analysis results, used for built-in LICM during
59 /// elaboration.
60 loop_analysis: &'a LoopAnalysis,
61 /// Compiler flags.
62 flags: &'a Flags,
63 /// Chaos-mode control-plane so we can test that we still get
64 /// correct results when our heuristics make bad decisions.
65 ctrl_plane: &'a mut ControlPlane,
66 /// Which canonical Values do we want to rematerialize in each
67 /// block where they're used?
68 ///
69 /// (A canonical Value is the *oldest* Value in an eclass,
70 /// i.e. tree of union value-nodes).
71 remat_values: FxHashSet<Value>,
72 /// Stats collected while we run this pass.
73 pub(crate) stats: Stats,
74 /// Union-find that maps all members of a Union tree (eclass) back
75 /// to the *oldest* (lowest-numbered) `Value`.
76 pub(crate) eclasses: UnionFind<Value>,
77}
78
79// The maximum number of rewrites we will take from a single call into ISLE.
80const MATCHES_LIMIT: usize = 5;
81
82/// Context passed through node insertion and optimization.
83pub(crate) struct OptimizeCtx<'opt, 'analysis>
84where
85 'analysis: 'opt,
86{
87 // Borrowed from EgraphPass:
88 pub(crate) func: &'opt mut Function,
89 pub(crate) value_to_opt_value: &'opt mut SecondaryMap<Value, Value>,
90 pub(crate) gvn_map: &'opt mut CtxHashMap<(Type, InstructionData), Value>,
91 pub(crate) effectful_gvn_map: &'opt mut ScopedHashMap<(Type, InstructionData), Option<Value>>,
92 available_block: &'opt mut SecondaryMap<Value, Block>,
93 pub(crate) eclasses: &'opt mut UnionFind<Value>,
94 pub(crate) remat_values: &'opt mut FxHashSet<Value>,
95 pub(crate) stats: &'opt mut Stats,
96 domtree: &'opt DominatorTreePreorder,
97 pub(crate) alias_analysis: &'opt mut AliasAnalysis<'analysis>,
98 pub(crate) alias_analysis_state: &'opt mut LastStores,
99 flags: &'opt Flags,
100 ctrl_plane: &'opt mut ControlPlane,
101 // Held locally during optimization of one node (recursively):
102 pub(crate) rewrite_depth: usize,
103 pub(crate) subsume_values: FxHashSet<Value>,
104 optimized_values: SmallVec<[Value; MATCHES_LIMIT]>,
105}
106
107/// For passing to `insert_pure_enode`. Sometimes the enode already
108/// exists as an Inst (from the original CLIF), and sometimes we're in
109/// the middle of creating it and want to avoid inserting it if
110/// possible until we know we need it.
111pub(crate) enum NewOrExistingInst {
112 New(InstructionData, Type),
113 Existing(Inst),
114}
115
116impl NewOrExistingInst {
117 fn get_inst_key<'a>(&'a self, dfg: &'a DataFlowGraph) -> (Type, InstructionData) {
118 match self {
119 NewOrExistingInst::New(data, ty) => (*ty, *data),
120 NewOrExistingInst::Existing(inst) => {
121 let ty = dfg.ctrl_typevar(*inst);
122 (ty, dfg.insts[*inst])
123 }
124 }
125 }
126}
127
128impl<'opt, 'analysis> OptimizeCtx<'opt, 'analysis>
129where
130 'analysis: 'opt,
131{
132 /// Optimization of a single instruction.
133 ///
134 /// This does a few things:
135 /// - Looks up the instruction in the GVN deduplication map. If we
136 /// already have the same instruction somewhere else, with the
137 /// same args, then we can alias the original instruction's
138 /// results and omit this instruction entirely.
139 /// - Note that we do this canonicalization based on the
140 /// instruction with its arguments as *canonical* eclass IDs,
141 /// that is, the oldest (smallest index) `Value` reachable in
142 /// the tree-of-unions (whole eclass). This ensures that we
143 /// properly canonicalize newer nodes that use newer "versions"
144 /// of a value that are still equal to the older versions.
145 /// - If the instruction is "new" (not deduplicated), then apply
146 /// optimization rules:
147 /// - All of the mid-end rules written in ISLE.
148 /// - Store-to-load forwarding.
149 /// - Update the value-to-opt-value map, and update the eclass
150 /// union-find, if we rewrote the value to different form(s).
151 pub(crate) fn insert_pure_enode(&mut self, inst: NewOrExistingInst) -> Value {
152 // Create the external context for looking up and updating the
153 // GVN map. This is necessary so that instructions themselves
154 // do not have to carry all the references or data for a full
155 // `Eq` or `Hash` impl.
156 let gvn_context = GVNContext {
157 union_find: self.eclasses,
158 value_lists: &self.func.dfg.value_lists,
159 };
160
161 self.stats.pure_inst += 1;
162 if let NewOrExistingInst::New(..) = inst {
163 self.stats.new_inst += 1;
164 }
165
166 // Does this instruction already exist? If so, add entries to
167 // the value-map to rewrite uses of its results to the results
168 // of the original (existing) instruction. If not, optimize
169 // the new instruction.
170 if let Some(&orig_result) = self
171 .gvn_map
172 .get(&inst.get_inst_key(&self.func.dfg), &gvn_context)
173 {
174 self.stats.pure_inst_deduped += 1;
175 if let NewOrExistingInst::Existing(inst) = inst {
176 debug_assert_eq!(self.func.dfg.inst_results(inst).len(), 1);
177 let result = self.func.dfg.first_result(inst);
178 debug_assert!(
179 self.domtree.dominates(
180 self.available_block[orig_result],
181 self.get_available_block(inst)
182 ),
183 "GVN shouldn't replace {result} (available in {}) with non-dominating {orig_result} (available in {})",
184 self.get_available_block(inst),
185 self.available_block[orig_result],
186 );
187 self.value_to_opt_value[result] = orig_result;
188 self.func.dfg.merge_facts(result, orig_result);
189 }
190 orig_result
191 } else {
192 // Now actually insert the InstructionData and attach
193 // result value (exactly one).
194 let (inst, result, ty) = match inst {
195 NewOrExistingInst::New(data, typevar) => {
196 let inst = self.func.dfg.make_inst(data);
197 // TODO: reuse return value?
198 self.func.dfg.make_inst_results(inst, typevar);
199 let result = self.func.dfg.first_result(inst);
200 // Add to eclass unionfind.
201 self.eclasses.add(result);
202 // New inst. We need to do the analysis of its result.
203 (inst, result, typevar)
204 }
205 NewOrExistingInst::Existing(inst) => {
206 let result = self.func.dfg.first_result(inst);
207 let ty = self.func.dfg.ctrl_typevar(inst);
208 (inst, result, ty)
209 }
210 };
211
212 self.attach_constant_fact(inst, result, ty);
213
214 self.available_block[result] = self.get_available_block(inst);
215 let opt_value = self.optimize_pure_enode(inst);
216
217 for &argument in self.func.dfg.inst_args(inst) {
218 self.eclasses.pin_index(argument);
219 }
220
221 let gvn_context = GVNContext {
222 union_find: self.eclasses,
223 value_lists: &self.func.dfg.value_lists,
224 };
225 self.gvn_map
226 .insert((ty, self.func.dfg.insts[inst]), opt_value, &gvn_context);
227 self.value_to_opt_value[result] = opt_value;
228 opt_value
229 }
230 }
231
232 /// Find the block where a pure instruction first becomes available,
233 /// defined as the block that is closest to the root where all of
234 /// its arguments are available. In the unusual case where a pure
235 /// instruction has no arguments (e.g. get_return_address), we can
236 /// place it anywhere, so it is available in the entry block.
237 ///
238 /// This function does not compute available blocks recursively.
239 /// All of the instruction's arguments must have had their available
240 /// blocks assigned already.
241 fn get_available_block(&self, inst: Inst) -> Block {
242 // Side-effecting instructions have different rules for where
243 // they become available, so this function does not apply.
244 debug_assert!(is_pure_for_egraph(self.func, inst));
245
246 // Note that the def-point of all arguments to an instruction
247 // in SSA lie on a line of direct ancestors in the domtree, and
248 // so do their available-blocks. This means that for any pair of
249 // arguments, their available blocks are either the same or one
250 // strictly dominates the other. We just need to find any argument
251 // whose available block is deepest in the domtree.
252 self.func.dfg.insts[inst]
253 .arguments(&self.func.dfg.value_lists)
254 .iter()
255 .map(|&v| {
256 let block = self.available_block[v];
257 debug_assert!(!block.is_reserved_value());
258 block
259 })
260 .max_by(|&x, &y| {
261 if self.domtree.dominates(x, y) {
262 Ordering::Less
263 } else {
264 debug_assert!(self.domtree.dominates(y, x));
265 Ordering::Greater
266 }
267 })
268 .unwrap_or(self.func.layout.entry_block().unwrap())
269 }
270
271 /// Optimizes an enode by applying any matching mid-end rewrite
272 /// rules (or store-to-load forwarding, which is a special case),
273 /// unioning together all possible optimized (or rewritten) forms
274 /// of this expression into an eclass and returning the `Value`
275 /// that represents that eclass.
276 fn optimize_pure_enode(&mut self, inst: Inst) -> Value {
277 // A pure node always has exactly one result.
278 let orig_value = self.func.dfg.first_result(inst);
279
280 let mut optimized_values = std::mem::take(&mut self.optimized_values);
281
282 // Limit rewrite depth. When we apply optimization rules, they
283 // may create new nodes (values) and those are, recursively,
284 // optimized eagerly as soon as they are created. So we may
285 // have more than one ISLE invocation on the stack. (This is
286 // necessary so that as the toplevel builds the
287 // right-hand-side expression bottom-up, it uses the "latest"
288 // optimized values for all the constituent parts.) To avoid
289 // infinite or problematic recursion, we bound the rewrite
290 // depth to a small constant here.
291 const REWRITE_LIMIT: usize = 5;
292 if self.rewrite_depth > REWRITE_LIMIT {
293 self.stats.rewrite_depth_limit += 1;
294 return orig_value;
295 }
296 self.rewrite_depth += 1;
297 trace!("Incrementing rewrite depth; now {}", self.rewrite_depth);
298
299 // Invoke the ISLE toplevel constructor, getting all new
300 // values produced as equivalents to this value.
301 trace!("Calling into ISLE with original value {}", orig_value);
302 self.stats.rewrite_rule_invoked += 1;
303 debug_assert!(optimized_values.is_empty());
304 crate::opts::generated_code::constructor_simplify(
305 &mut IsleContext { ctx: self },
306 orig_value,
307 &mut optimized_values,
308 );
309
310 optimized_values.push(orig_value);
311
312 // Remove any values from optimized_values that do not have
313 // the highest possible available block in the domtree, in
314 // O(n) time. This loop scans in reverse, establishing the
315 // loop invariant that all values at indices >= idx have the
316 // same available block, which is the best available block
317 // seen so far. Note that orig_value must also be removed if
318 // it isn't in the best block, so we push it above, which means
319 // optimized_values is never empty: there's always at least one
320 // value in best_block.
321 let mut best_block = self.available_block[*optimized_values.last().unwrap()];
322 for idx in (0..optimized_values.len() - 1).rev() {
323 // At the beginning of each iteration, there is a non-empty
324 // collection of values after idx, which are all available
325 // at best_block.
326 let this_block = self.available_block[optimized_values[idx]];
327 if this_block != best_block {
328 if self.domtree.dominates(this_block, best_block) {
329 // If the available block for this value dominates
330 // the best block we've seen so far, discard all
331 // the values we already checked and leave only this
332 // value in the tail of the vector.
333 optimized_values.truncate(idx + 1);
334 best_block = this_block;
335 } else {
336 // Otherwise the tail of the vector contains values
337 // which are all better than this value, so we can
338 // swap any of them in place of this value to delete
339 // this one in O(1) time.
340 debug_assert!(self.domtree.dominates(best_block, this_block));
341 optimized_values.swap_remove(idx);
342 debug_assert!(optimized_values.len() > idx);
343 }
344 }
345 }
346
347 // It's not supposed to matter what order `simplify` returns values in.
348 self.ctrl_plane.shuffle(&mut optimized_values);
349
350 let num_matches = optimized_values.len();
351 if num_matches > MATCHES_LIMIT {
352 trace!(
353 "Reached maximum matches limit; too many optimized values \
354 ({num_matches} > {MATCHES_LIMIT}); ignoring rest.",
355 );
356 optimized_values.truncate(MATCHES_LIMIT);
357 }
358
359 trace!(" -> returned from ISLE: {orig_value} -> {optimized_values:?}");
360
361 // Create a union of all new values with the original (or
362 // maybe just one new value marked as "subsuming" the
363 // original, if present.)
364 let mut union_value = optimized_values.pop().unwrap();
365 for optimized_value in optimized_values.drain(..) {
366 trace!(
367 "Returned from ISLE for {}, got {:?}",
368 orig_value,
369 optimized_value
370 );
371 if optimized_value == orig_value {
372 trace!(" -> same as orig value; skipping");
373 continue;
374 }
375 if self.subsume_values.contains(&optimized_value) {
376 // Merge in the unionfind so canonicalization
377 // still works, but take *only* the subsuming
378 // value, and break now.
379 self.eclasses.union(optimized_value, union_value);
380 self.func.dfg.merge_facts(optimized_value, union_value);
381 union_value = optimized_value;
382 break;
383 }
384
385 let old_union_value = union_value;
386 union_value = self.func.dfg.union(old_union_value, optimized_value);
387 self.available_block[union_value] = best_block;
388 self.stats.union += 1;
389 trace!(" -> union: now {}", union_value);
390 self.eclasses.add(union_value);
391 self.eclasses.union(old_union_value, optimized_value);
392 self.func.dfg.merge_facts(old_union_value, optimized_value);
393 self.eclasses.union(old_union_value, union_value);
394 }
395
396 self.rewrite_depth -= 1;
397 trace!("Decrementing rewrite depth; now {}", self.rewrite_depth);
398
399 debug_assert!(self.optimized_values.is_empty());
400 self.optimized_values = optimized_values;
401
402 union_value
403 }
404
405 /// Optimize a "skeleton" instruction, possibly removing
406 /// it. Returns `true` if the instruction should be removed from
407 /// the layout.
408 fn optimize_skeleton_inst(&mut self, inst: Inst) -> bool {
409 self.stats.skeleton_inst += 1;
410
411 for &result in self.func.dfg.inst_results(inst) {
412 self.available_block[result] = self.func.layout.inst_block(inst).unwrap();
413 }
414
415 // First, can we try to deduplicate? We need to keep some copy
416 // of the instruction around because it's side-effecting, but
417 // we may be able to reuse an earlier instance of it.
418 if is_mergeable_for_egraph(self.func, inst) {
419 let result = self.func.dfg.inst_results(inst).get(0).copied();
420 trace!(" -> mergeable side-effecting op {}", inst);
421
422 // Does this instruction already exist? If so, add entries to
423 // the value-map to rewrite uses of its results to the results
424 // of the original (existing) instruction. If not, optimize
425 // the new instruction.
426 //
427 // Note that we use the "effectful GVN map", which is
428 // scoped: because effectful ops are not removed from the
429 // skeleton (`Layout`), we need to be mindful of whether
430 // our current position is dominated by an instance of the
431 // instruction. (See #5796 for details.)
432 let ty = self.func.dfg.ctrl_typevar(inst);
433 match self
434 .effectful_gvn_map
435 .entry((ty, self.func.dfg.insts[inst]))
436 {
437 ScopedEntry::Occupied(o) => {
438 let orig_result = *o.get();
439 match (result, orig_result) {
440 (Some(result), Some(orig_result)) => {
441 // Hit in GVN map -- reuse value.
442 self.value_to_opt_value[result] = orig_result;
443 trace!(" -> merges result {} to {}", result, orig_result);
444 }
445 (None, None) => {
446 // Hit in the GVN map, but the instruction doesn't
447 // produce results, only side effects. Nothing else
448 // to do here.
449 trace!(" -> merges with dominating instruction");
450 }
451 (_, _) => unreachable!(),
452 }
453 true
454 }
455 ScopedEntry::Vacant(v) => {
456 // Otherwise, insert it into the value-map.
457 if let Some(result) = result {
458 self.value_to_opt_value[result] = result;
459 }
460 v.insert(result);
461 trace!(" -> inserts as new (no GVN)");
462 false
463 }
464 }
465 }
466 // Otherwise, if a load or store, process it with the alias
467 // analysis to see if we can optimize it (rewrite in terms of
468 // an earlier load or stored value).
469 else if let Some(new_result) =
470 self.alias_analysis
471 .process_inst(self.func, self.alias_analysis_state, inst)
472 {
473 self.stats.alias_analysis_removed += 1;
474 let result = self.func.dfg.first_result(inst);
475 trace!(
476 " -> inst {} has result {} replaced with {}",
477 inst,
478 result,
479 new_result
480 );
481 self.value_to_opt_value[result] = new_result;
482 self.func.dfg.merge_facts(result, new_result);
483 true
484 }
485 // Otherwise, generic side-effecting op -- always keep it, and
486 // set its results to identity-map to original values.
487 else {
488 // Set all results to identity-map to themselves
489 // in the value-to-opt-value map.
490 for &result in self.func.dfg.inst_results(inst) {
491 self.value_to_opt_value[result] = result;
492 self.eclasses.add(result);
493 }
494 false
495 }
496 }
497
498 /// Helper to propagate facts on constant values: if PCC is
499 /// enabled, then unconditionally add a fact attesting to the
500 /// Value's concrete value.
501 fn attach_constant_fact(&mut self, inst: Inst, value: Value, ty: Type) {
502 if self.flags.enable_pcc() {
503 if let InstructionData::UnaryImm {
504 opcode: Opcode::Iconst,
505 imm,
506 } = self.func.dfg.insts[inst]
507 {
508 let imm: i64 = imm.into();
509 self.func.dfg.facts[value] =
510 Some(Fact::constant(ty.bits().try_into().unwrap(), imm as u64));
511 }
512 }
513 }
514}
515
516impl<'a> EgraphPass<'a> {
517 /// Create a new EgraphPass.
518 pub fn new(
519 func: &'a mut Function,
520 raw_domtree: &'a DominatorTree,
521 loop_analysis: &'a LoopAnalysis,
522 alias_analysis: &'a mut AliasAnalysis<'a>,
523 flags: &'a Flags,
524 ctrl_plane: &'a mut ControlPlane,
525 ) -> Self {
526 let num_values = func.dfg.num_values();
527 let mut domtree = DominatorTreePreorder::new();
528 domtree.compute(raw_domtree);
529 Self {
530 func,
531 domtree,
532 loop_analysis,
533 alias_analysis,
534 flags,
535 ctrl_plane,
536 stats: Stats::default(),
537 eclasses: UnionFind::with_capacity(num_values),
538 remat_values: FxHashSet::default(),
539 }
540 }
541
542 /// Run the process.
543 pub fn run(&mut self) {
544 self.remove_pure_and_optimize();
545
546 trace!("egraph built:\n{}\n", self.func.display());
547 if cfg!(feature = "trace-log") {
548 for (value, def) in self.func.dfg.values_and_defs() {
549 trace!(" -> {} = {:?}", value, def);
550 match def {
551 ValueDef::Result(i, 0) => {
552 trace!(" -> {} = {:?}", i, self.func.dfg.insts[i]);
553 }
554 _ => {}
555 }
556 }
557 }
558 trace!("stats: {:#?}", self.stats);
559 trace!("pinned_union_count: {}", self.eclasses.pinned_union_count);
560 self.elaborate();
561 }
562
563 /// Remove pure nodes from the `Layout` of the function, ensuring
564 /// that only the "side-effect skeleton" remains, and also
565 /// optimize the pure nodes. This is the first step of
566 /// egraph-based processing and turns the pure CFG-based CLIF into
567 /// a CFG skeleton with a sea of (optimized) nodes tying it
568 /// together.
569 ///
570 /// As we walk through the code, we eagerly apply optimization
571 /// rules; at any given point we have a "latest version" of an
572 /// eclass of possible representations for a `Value` in the
573 /// original program, which is itself a `Value` at the root of a
574 /// union-tree. We keep a map from the original values to these
575 /// optimized values. When we encounter any instruction (pure or
576 /// side-effecting skeleton) we rewrite its arguments to capture
577 /// the "latest" optimized forms of these values. (We need to do
578 /// this as part of this pass, and not later using a finished map,
579 /// because the eclass can continue to be updated and we need to
580 /// only refer to its subset that exists at this stage, to
581 /// maintain acyclicity.)
582 fn remove_pure_and_optimize(&mut self) {
583 let mut cursor = FuncCursor::new(self.func);
584 let mut value_to_opt_value: SecondaryMap<Value, Value> =
585 SecondaryMap::with_default(Value::reserved_value());
586
587 // Map from instruction to value for hash-consing of pure ops
588 // into the egraph. This can be a standard (non-scoped)
589 // hashmap because pure ops have no location: they are
590 // "outside of" control flow.
591 //
592 // Note also that we keep the controlling typevar (the `Type`
593 // in the tuple below) because it may disambiguate
594 // instructions that are identical except for type.
595 let mut gvn_map: CtxHashMap<(Type, InstructionData), Value> =
596 CtxHashMap::with_capacity(cursor.func.dfg.num_values());
597
598 // Map from instruction to an optional value for GVN'ing of effectful
599 // but idempotent ops, which remain in the side-effecting skeleton. This
600 // needs to be scoped because we cannot deduplicate one instruction to
601 // another that is in a non-dominating block.
602 //
603 // If the instruction produces a value, then it is stored in the map and
604 // can be used to GVN the results of idempotently side-effectful
605 // instructions. If the instruction does not produce a value, and is
606 // only used for its effects, then the entry's value is `None`. In the
607 // latter case, we can still deduplicate the idempotent instructions,
608 // but there is no value to GVN.
609 //
610 // Note that we can use a ScopedHashMap here without the "context" (as
611 // needed by CtxHashMap) because in practice the ops we want to GVN have
612 // all their args inline. Equality on the InstructionData itself is
613 // conservative: two insts whose struct contents compare shallowly equal
614 // are definitely identical, but identical insts in a deep-equality
615 // sense may not compare shallowly equal, due to list indirection. This
616 // is fine for GVN, because it is still sound to skip any given GVN
617 // opportunity (and keep the original instructions).
618 //
619 // As above, we keep the controlling typevar here as part of the key:
620 // effectful instructions may (as for pure instructions) be
621 // differentiated only on the type.
622 let mut effectful_gvn_map: ScopedHashMap<(Type, InstructionData), Option<Value>> =
623 ScopedHashMap::new();
624
625 // We assign an "available block" to every value. Values tied to
626 // the side-effecting skeleton are available in the block where
627 // they're defined. Results from pure instructions could legally
628 // float up the domtree so they are available as soon as all
629 // their arguments are available. Values which identify union
630 // nodes are available in the same block as all values in the
631 // eclass, enforced by optimize_pure_enode.
632 let mut available_block: SecondaryMap<Value, Block> =
633 SecondaryMap::with_default(Block::reserved_value());
634
635 // This is an initial guess at the size we'll need, but we add
636 // more values as we build simplified alternative expressions so
637 // this is likely to realloc again later.
638 available_block.resize(cursor.func.dfg.num_values());
639
640 // In domtree preorder, visit blocks. (TODO: factor out an
641 // iterator from this and elaborator.)
642 let root = cursor.layout().entry_block().unwrap();
643 enum StackEntry {
644 Visit(Block),
645 Pop,
646 }
647 let mut block_stack = vec![StackEntry::Visit(root)];
648 while let Some(entry) = block_stack.pop() {
649 match entry {
650 StackEntry::Visit(block) => {
651 // We popped this block; push children
652 // immediately, then process this block.
653 block_stack.push(StackEntry::Pop);
654 block_stack.extend(
655 self.ctrl_plane
656 .shuffled(self.domtree.children(block))
657 .map(StackEntry::Visit),
658 );
659 effectful_gvn_map.increment_depth();
660
661 trace!("Processing block {}", block);
662 cursor.set_position(CursorPosition::Before(block));
663
664 let mut alias_analysis_state = self.alias_analysis.block_starting_state(block);
665
666 for ¶m in cursor.func.dfg.block_params(block) {
667 trace!("creating initial singleton eclass for blockparam {}", param);
668 self.eclasses.add(param);
669 value_to_opt_value[param] = param;
670 available_block[param] = block;
671 }
672 while let Some(inst) = cursor.next_inst() {
673 trace!("Processing inst {}", inst);
674
675 // While we're passing over all insts, create initial
676 // singleton eclasses for all result and blockparam
677 // values. Also do initial analysis of all inst
678 // results.
679 for &result in cursor.func.dfg.inst_results(inst) {
680 trace!("creating initial singleton eclass for {}", result);
681 self.eclasses.add(result);
682 }
683
684 // Rewrite args of *all* instructions using the
685 // value-to-opt-value map.
686 cursor.func.dfg.map_inst_values(inst, |arg| {
687 let new_value = value_to_opt_value[arg];
688 trace!("rewriting arg {} of inst {} to {}", arg, inst, new_value);
689 debug_assert_ne!(new_value, Value::reserved_value());
690 new_value
691 });
692
693 // Build a context for optimization, with borrows of
694 // state. We can't invoke a method on `self` because
695 // we've borrowed `self.func` mutably (as
696 // `cursor.func`) so we pull apart the pieces instead
697 // here.
698 let mut ctx = OptimizeCtx {
699 func: cursor.func,
700 value_to_opt_value: &mut value_to_opt_value,
701 gvn_map: &mut gvn_map,
702 effectful_gvn_map: &mut effectful_gvn_map,
703 available_block: &mut available_block,
704 eclasses: &mut self.eclasses,
705 rewrite_depth: 0,
706 subsume_values: FxHashSet::default(),
707 remat_values: &mut self.remat_values,
708 stats: &mut self.stats,
709 domtree: &self.domtree,
710 alias_analysis: self.alias_analysis,
711 alias_analysis_state: &mut alias_analysis_state,
712 flags: self.flags,
713 ctrl_plane: self.ctrl_plane,
714 optimized_values: Default::default(),
715 };
716
717 if is_pure_for_egraph(ctx.func, inst) {
718 // Insert into GVN map and optimize any new nodes
719 // inserted (recursively performing this work for
720 // any nodes the optimization rules produce).
721 let inst = NewOrExistingInst::Existing(inst);
722 ctx.insert_pure_enode(inst);
723 // We've now rewritten all uses, or will when we
724 // see them, and the instruction exists as a pure
725 // enode in the eclass, so we can remove it.
726 cursor.remove_inst_and_step_back();
727 } else {
728 if ctx.optimize_skeleton_inst(inst) {
729 cursor.remove_inst_and_step_back();
730 }
731 }
732 }
733 }
734 StackEntry::Pop => {
735 effectful_gvn_map.decrement_depth();
736 }
737 }
738 }
739 }
740
741 /// Scoped elaboration: compute a final ordering of op computation
742 /// for each block and update the given Func body. After this
743 /// runs, the function body is back into the state where every
744 /// Inst with an used result is placed in the layout (possibly
745 /// duplicated, if our code-motion logic decides this is the best
746 /// option).
747 ///
748 /// This works in concert with the domtree. We do a preorder
749 /// traversal of the domtree, tracking a scoped map from Id to
750 /// (new) Value. The map's scopes correspond to levels in the
751 /// domtree.
752 ///
753 /// At each block, we iterate forward over the side-effecting
754 /// eclasses, and recursively generate their arg eclasses, then
755 /// emit the ops themselves.
756 ///
757 /// To use an eclass in a given block, we first look it up in the
758 /// scoped map, and get the Value if already present. If not, we
759 /// need to generate it. We emit the extracted enode for this
760 /// eclass after recursively generating its args. Eclasses are
761 /// thus computed "as late as possible", but then memoized into
762 /// the Id-to-Value map and available to all dominated blocks and
763 /// for the rest of this block. (This subsumes GVN.)
764 fn elaborate(&mut self) {
765 let mut elaborator = Elaborator::new(
766 self.func,
767 &self.domtree,
768 self.loop_analysis,
769 &self.remat_values,
770 &mut self.stats,
771 self.ctrl_plane,
772 );
773 elaborator.elaborate();
774
775 self.check_post_egraph();
776 }
777
778 #[cfg(debug_assertions)]
779 fn check_post_egraph(&self) {
780 // Verify that no union nodes are reachable from inst args,
781 // and that all inst args' defining instructions are in the
782 // layout.
783 for block in self.func.layout.blocks() {
784 for inst in self.func.layout.block_insts(block) {
785 self.func
786 .dfg
787 .inst_values(inst)
788 .for_each(|arg| match self.func.dfg.value_def(arg) {
789 ValueDef::Result(i, _) => {
790 debug_assert!(self.func.layout.inst_block(i).is_some());
791 }
792 ValueDef::Union(..) => {
793 panic!("egraph union node {arg} still reachable at {inst}!");
794 }
795 _ => {}
796 })
797 }
798 }
799 }
800
801 #[cfg(not(debug_assertions))]
802 fn check_post_egraph(&self) {}
803}
804
805/// Implementation of external-context equality and hashing on
806/// InstructionData. This allows us to deduplicate instructions given
807/// some context that lets us see its value lists and the mapping from
808/// any value to "canonical value" (in an eclass).
809struct GVNContext<'a> {
810 value_lists: &'a ValueListPool,
811 union_find: &'a UnionFind<Value>,
812}
813
814impl<'a> CtxEq<(Type, InstructionData), (Type, InstructionData)> for GVNContext<'a> {
815 fn ctx_eq(
816 &self,
817 (a_ty, a_inst): &(Type, InstructionData),
818 (b_ty, b_inst): &(Type, InstructionData),
819 ) -> bool {
820 a_ty == b_ty
821 && a_inst.eq(b_inst, self.value_lists, |value| {
822 self.union_find.find(value)
823 })
824 }
825}
826
827impl<'a> CtxHash<(Type, InstructionData)> for GVNContext<'a> {
828 fn ctx_hash<H: Hasher>(&self, state: &mut H, (ty, inst): &(Type, InstructionData)) {
829 std::hash::Hash::hash(&ty, state);
830 inst.hash(state, self.value_lists, |value| self.union_find.find(value));
831 }
832}
833
834/// Statistics collected during egraph-based processing.
835#[derive(Clone, Debug, Default)]
836pub(crate) struct Stats {
837 pub(crate) pure_inst: u64,
838 pub(crate) pure_inst_deduped: u64,
839 pub(crate) skeleton_inst: u64,
840 pub(crate) alias_analysis_removed: u64,
841 pub(crate) new_inst: u64,
842 pub(crate) union: u64,
843 pub(crate) subsume: u64,
844 pub(crate) remat: u64,
845 pub(crate) rewrite_rule_invoked: u64,
846 pub(crate) rewrite_depth_limit: u64,
847 pub(crate) elaborate_visit_node: u64,
848 pub(crate) elaborate_memoize_hit: u64,
849 pub(crate) elaborate_memoize_miss: u64,
850 pub(crate) elaborate_remat: u64,
851 pub(crate) elaborate_licm_hoist: u64,
852 pub(crate) elaborate_func: u64,
853 pub(crate) elaborate_func_pre_insts: u64,
854 pub(crate) elaborate_func_post_insts: u64,
855 pub(crate) elaborate_best_cost_fixpoint_iters: u64,
856}