cranelift_codegen/egraph/elaborate.rs
1//! Elaboration phase: lowers EGraph back to sequences of operations
2//! in CFG nodes.
3
4use super::Stats;
5use super::cost::Cost;
6use crate::ctxhash::NullCtx;
7use crate::dominator_tree::DominatorTree;
8use crate::hash_map::Entry as HashEntry;
9use crate::inst_predicates::is_pure_for_egraph;
10use crate::ir::{Block, Function, Inst, Value, ValueDef};
11use crate::loop_analysis::{Loop, LoopAnalysis};
12use crate::scoped_hash_map::ScopedHashMap;
13use crate::trace;
14use crate::{FxHashMap, FxHashSet};
15use alloc::vec::Vec;
16use cranelift_control::ControlPlane;
17use cranelift_entity::{EntitySet, SecondaryMap, packed_option::ReservedValue};
18use smallvec::{SmallVec, smallvec};
19
20pub(crate) struct Elaborator<'a> {
21 func: &'a mut Function,
22 domtree: &'a DominatorTree,
23 loop_analysis: &'a LoopAnalysis,
24 /// Map from Value that is produced by a pure Inst (and was thus
25 /// not in the side-effecting skeleton) to the value produced by
26 /// an elaborated inst (placed in the layout) to whose results we
27 /// refer in the final code.
28 ///
29 /// The first time we use some result of an instruction during
30 /// elaboration, we can place it and insert an identity map (inst
31 /// results to that same inst's results) in this scoped
32 /// map. Within that block and its dom-tree children, that mapping
33 /// is visible and we can continue to use it. This allows us to
34 /// avoid cloning the instruction. However, if we pop that scope
35 /// and use it somewhere else as well, we will need to
36 /// duplicate. We detect this case by checking, when a value that
37 /// we want is not present in this map, whether the producing inst
38 /// is already placed in the Layout. If so, we duplicate, and
39 /// insert non-identity mappings from the original inst's results
40 /// to the cloned inst's results.
41 ///
42 /// Note that as values may refer to unions that represent a subset
43 /// of a larger eclass, it's not valid to walk towards the root of a
44 /// union tree: doing so would potentially equate values that fall
45 /// on different branches of the dominator tree.
46 value_to_elaborated_value: ScopedHashMap<Value, ElaboratedValue>,
47 /// Map from Value to the best (lowest-cost) Value in its eclass
48 /// (tree of union value-nodes).
49 value_to_best_value: SecondaryMap<Value, BestEntry>,
50 /// Stack of blocks and loops in current elaboration path.
51 loop_stack: SmallVec<[LoopStackEntry; 8]>,
52 /// The current block into which we are elaborating.
53 cur_block: Block,
54 /// Values that opt rules have indicated should be rematerialized
55 /// in every block they are used (e.g., immediates or other
56 /// "cheap-to-compute" ops).
57 remat_values: &'a FxHashSet<Value>,
58 /// Explicitly-unrolled value elaboration stack.
59 elab_stack: Vec<ElabStackEntry>,
60 /// Results from the elab stack.
61 elab_result_stack: Vec<ElaboratedValue>,
62 /// Explicitly-unrolled block elaboration stack.
63 block_stack: Vec<BlockStackEntry>,
64 /// Copies of values that have been rematerialized.
65 remat_copies: FxHashMap<(Block, Value), Value>,
66 /// Stats for various events during egraph processing, to help
67 /// with optimization of this infrastructure.
68 stats: &'a mut Stats,
69 /// Chaos-mode control-plane so we can test that we still get
70 /// correct results when our heuristics make bad decisions.
71 ctrl_plane: &'a mut ControlPlane,
72}
73
74#[derive(Clone, Copy, Debug, PartialEq, Eq)]
75struct BestEntry(Cost, Value);
76
77impl PartialOrd for BestEntry {
78 fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
79 Some(self.cmp(other))
80 }
81}
82
83impl Ord for BestEntry {
84 #[inline]
85 fn cmp(&self, other: &Self) -> core::cmp::Ordering {
86 self.0.cmp(&other.0).then_with(|| {
87 // Note that this comparison is reversed. When costs are equal,
88 // prefer the value with the bigger index. This is a heuristic that
89 // prefers results of rewrites to the original value, since we
90 // expect that our rewrites are generally improvements.
91 self.1.cmp(&other.1).reverse()
92 })
93 }
94}
95
96#[derive(Clone, Copy, Debug)]
97struct ElaboratedValue {
98 in_block: Block,
99 value: Value,
100}
101
102#[derive(Clone, Debug)]
103struct LoopStackEntry {
104 /// The loop identifier.
105 lp: Loop,
106 /// The hoist point: a block that immediately dominates this
107 /// loop. May not be an immediate predecessor, but will be a valid
108 /// point to place all loop-invariant ops: they must depend only
109 /// on inputs that dominate the loop, so are available at (the end
110 /// of) this block.
111 hoist_block: Block,
112 /// The depth in the scope map.
113 scope_depth: u32,
114}
115
116#[derive(Clone, Debug)]
117enum ElabStackEntry {
118 /// Next action is to resolve this value into an elaborated inst
119 /// (placed into the layout) that produces the value, and
120 /// recursively elaborate the insts that produce its args.
121 ///
122 /// Any inserted ops should be inserted before `before`, which is
123 /// the instruction demanding this value.
124 Start { value: Value, before: Inst },
125 /// Args have been pushed; waiting for results.
126 PendingInst {
127 inst: Inst,
128 result_idx: usize,
129 num_args: usize,
130 before: Inst,
131 },
132}
133
134#[derive(Clone, Debug)]
135enum BlockStackEntry {
136 Elaborate { block: Block, idom: Option<Block> },
137 Pop,
138}
139
140impl<'a> Elaborator<'a> {
141 pub(crate) fn new(
142 func: &'a mut Function,
143 domtree: &'a DominatorTree,
144 loop_analysis: &'a LoopAnalysis,
145 remat_values: &'a FxHashSet<Value>,
146 stats: &'a mut Stats,
147 ctrl_plane: &'a mut ControlPlane,
148 ) -> Self {
149 let num_values = func.dfg.num_values();
150 let mut value_to_best_value =
151 SecondaryMap::with_default(BestEntry(Cost::infinity(), Value::reserved_value()));
152 value_to_best_value.resize(num_values);
153 Self {
154 func,
155 domtree,
156 loop_analysis,
157 value_to_elaborated_value: ScopedHashMap::with_capacity(num_values),
158 value_to_best_value,
159 loop_stack: smallvec![],
160 cur_block: Block::reserved_value(),
161 remat_values,
162 elab_stack: vec![],
163 elab_result_stack: vec![],
164 block_stack: vec![],
165 remat_copies: FxHashMap::default(),
166 stats,
167 ctrl_plane,
168 }
169 }
170
171 fn start_block(&mut self, idom: Option<Block>, block: Block) {
172 trace!(
173 "start_block: block {:?} with idom {:?} at loop depth {:?} scope depth {}",
174 block,
175 idom,
176 self.loop_stack.len(),
177 self.value_to_elaborated_value.depth()
178 );
179
180 // Pop any loop levels we're no longer in.
181 while let Some(inner_loop) = self.loop_stack.last() {
182 if self.loop_analysis.is_in_loop(block, inner_loop.lp) {
183 break;
184 }
185 self.loop_stack.pop();
186 }
187
188 // Note that if the *entry* block is a loop header, we will
189 // not make note of the loop here because it will not have an
190 // immediate dominator. We must disallow this case because we
191 // will skip adding the `LoopStackEntry` here but our
192 // `LoopAnalysis` will otherwise still make note of this loop
193 // and loop depths will not match.
194 if let Some(idom) = idom {
195 if let Some(lp) = self.loop_analysis.is_loop_header(block) {
196 self.loop_stack.push(LoopStackEntry {
197 lp,
198 // Any code hoisted out of this loop will have code
199 // placed in `idom`, and will have def mappings
200 // inserted in to the scoped hashmap at that block's
201 // level.
202 hoist_block: idom,
203 scope_depth: (self.value_to_elaborated_value.depth() - 1) as u32,
204 });
205 trace!(
206 " -> loop header, pushing; depth now {}",
207 self.loop_stack.len()
208 );
209 }
210 } else {
211 debug_assert!(
212 self.loop_analysis.is_loop_header(block).is_none(),
213 "Entry block (domtree root) cannot be a loop header!"
214 );
215 }
216
217 trace!("block {}: loop stack is {:?}", block, self.loop_stack);
218
219 self.cur_block = block;
220 }
221
222 fn topo_sorted_values(&self) -> Vec<Value> {
223 #[derive(Debug)]
224 enum Event {
225 Enter,
226 Exit,
227 }
228 let mut stack = Vec::<(Event, Value)>::new();
229
230 // Traverse the CFG in pre-order so that, when we look at the
231 // instructions and operands inside each block, we see value defs before
232 // uses.
233 for block in crate::traversals::Dfs::new().pre_order_iter(&self.func) {
234 for inst in self.func.layout.block_insts(block) {
235 stack.extend(self.func.dfg.inst_values(inst).map(|v| (Event::Enter, v)));
236 }
237 }
238
239 // We pushed in the desired order, so popping would implicitly reverse
240 // that. Avoid that by reversing the initial stack before we start
241 // traversing the DFG.
242 stack.reverse();
243
244 let mut sorted = Vec::with_capacity(self.func.dfg.values().len());
245 let mut seen = EntitySet::<Value>::with_capacity(self.func.dfg.values().len());
246
247 // Post-order traversal of the DFG, visiting value defs before uses.
248 while let Some((event, value)) = stack.pop() {
249 match event {
250 Event::Enter => {
251 if seen.insert(value) {
252 stack.push((Event::Exit, value));
253 match self.func.dfg.value_def(value) {
254 ValueDef::Result(inst, _) => {
255 stack.extend(
256 self.func
257 .dfg
258 .inst_values(inst)
259 .rev()
260 .filter(|v| !seen.contains(*v))
261 .map(|v| (Event::Enter, v)),
262 );
263 }
264 ValueDef::Union(a, b) => {
265 if !seen.contains(b) {
266 stack.push((Event::Enter, b));
267 }
268 if !seen.contains(a) {
269 stack.push((Event::Enter, a));
270 }
271 }
272 ValueDef::Param(..) => {}
273 }
274 }
275 }
276 Event::Exit => {
277 sorted.push(value);
278 }
279 }
280 }
281
282 sorted
283 }
284
285 fn compute_best_values(&mut self) {
286 let sorted_values = self.topo_sorted_values();
287
288 let best = &mut self.value_to_best_value;
289
290 // We can't make random decisions inside the fixpoint loop below because
291 // that could cause values to change on every iteration of the loop,
292 // which would make the loop never terminate. So in chaos testing
293 // mode we need a form of making suboptimal decisions that is fully
294 // deterministic. We choose to simply make the worst decision we know
295 // how to do instead of the best.
296 let use_worst = self.ctrl_plane.get_decision();
297
298 trace!(
299 "Computing the {} values for each eclass",
300 if use_worst {
301 "worst (chaos mode)"
302 } else {
303 "best"
304 }
305 );
306
307 // Because the values are topologically sorted, we know that we will see
308 // defs before uses, so an instruction's operands' costs will already be
309 // computed by the time we are computing the cost for the current value
310 // and its instruction.
311 for value in sorted_values.iter().copied() {
312 let def = self.func.dfg.value_def(value);
313 trace!("computing best for value {:?} def {:?}", value, def);
314
315 match def {
316 // Pick the best of the two options based on min-cost. This
317 // works because each element of `best` is a `(cost, value)`
318 // tuple; `cost` comes first so the natural comparison works
319 // based on cost, and breaks ties based on value number.
320 ValueDef::Union(x, y) => {
321 debug_assert!(!best[x].1.is_reserved_value());
322 debug_assert!(!best[y].1.is_reserved_value());
323 best[value] = if use_worst {
324 core::cmp::max(best[x], best[y])
325 } else {
326 core::cmp::min(best[x], best[y])
327 };
328 trace!(
329 " -> best of union({:?}, {:?}) = {:?}",
330 best[x], best[y], best[value]
331 );
332 }
333
334 ValueDef::Param(_, _) => {
335 best[value] = BestEntry(Cost::zero(), value);
336 }
337
338 // If the Inst is inserted into the layout (which is,
339 // at this point, only the side-effecting skeleton),
340 // then it must be computed and thus we give it zero
341 // cost.
342 ValueDef::Result(inst, _) => {
343 if let Some(_) = self.func.layout.inst_block(inst) {
344 best[value] = BestEntry(Cost::zero(), value);
345 } else {
346 let inst_data = &self.func.dfg.insts[inst];
347 // N.B.: at this point we know that the opcode is
348 // pure, so `pure_op_cost`'s precondition is
349 // satisfied.
350 let cost = Cost::of_pure_op(
351 inst_data.opcode(),
352 self.func.dfg.inst_values(inst).map(|value| {
353 debug_assert!(!best[value].1.is_reserved_value());
354 best[value].0
355 }),
356 );
357 best[value] = BestEntry(cost, value);
358 trace!(" -> cost of value {} = {:?}", value, cost);
359 }
360 }
361 };
362
363 // You might be expecting an assert that the best cost we just
364 // computed is not infinity, however infinite cost *can* happen in
365 // practice. First, note that our cost function doesn't know about
366 // any shared structure in the dataflow graph, it only sums operand
367 // costs. (And trying to avoid that by deduping a single operation's
368 // operands is a losing game because you can always just add one
369 // indirection and go from `add(x, x)` to `add(foo(x), bar(x))` to
370 // hide the shared structure.) Given that blindness to sharing, we
371 // can make cost grow exponentially with a linear sequence of
372 // operations:
373 //
374 // v0 = iconst.i32 1 ;; cost = 1
375 // v1 = iadd v0, v0 ;; cost = 3 + 1 + 1
376 // v2 = iadd v1, v1 ;; cost = 3 + 5 + 5
377 // v3 = iadd v2, v2 ;; cost = 3 + 13 + 13
378 // v4 = iadd v3, v3 ;; cost = 3 + 29 + 29
379 // v5 = iadd v4, v4 ;; cost = 3 + 61 + 61
380 // v6 = iadd v5, v5 ;; cost = 3 + 125 + 125
381 // ;; etc...
382 //
383 // Such a chain can cause cost to saturate to infinity. How do we
384 // choose which e-node is best when there are multiple that have
385 // saturated to infinity? It doesn't matter. As long as invariant
386 // (2) for optimization rules is upheld by our rule set (see
387 // `cranelift/codegen/src/opts/README.md`) it is safe to choose
388 // *any* e-node in the e-class. At worst we will produce suboptimal
389 // code, but never an incorrectness.
390 }
391 }
392
393 /// Elaborate use of an eclass, inserting any needed new
394 /// instructions before the given inst `before`. Should only be
395 /// given values corresponding to results of instructions or
396 /// blockparams.
397 fn elaborate_eclass_use(&mut self, value: Value, before: Inst) -> ElaboratedValue {
398 debug_assert_ne!(value, Value::reserved_value());
399
400 // Kick off the process by requesting this result
401 // value.
402 self.elab_stack
403 .push(ElabStackEntry::Start { value, before });
404
405 // Now run the explicit-stack recursion until we reach
406 // the root.
407 self.process_elab_stack();
408 debug_assert_eq!(self.elab_result_stack.len(), 1);
409 self.elab_result_stack.pop().unwrap()
410 }
411
412 /// Possibly rematerialize the instruction producing the value in
413 /// `arg` and rewrite `arg` to refer to it, if needed. Returns
414 /// `true` if a rewrite occurred.
415 fn maybe_remat_arg(
416 remat_values: &FxHashSet<Value>,
417 func: &mut Function,
418 remat_copies: &mut FxHashMap<(Block, Value), Value>,
419 insert_block: Block,
420 before: Inst,
421 arg: &mut ElaboratedValue,
422 stats: &mut Stats,
423 ) -> bool {
424 // TODO (#7313): we may want to consider recursive
425 // rematerialization as well. We could process the arguments of
426 // the rematerialized instruction up to a certain depth. This
427 // would affect, e.g., adds-with-one-constant-arg, which are
428 // currently rematerialized. Right now we don't do this, to
429 // avoid the need for another fixpoint loop here.
430 if arg.in_block != insert_block && remat_values.contains(&arg.value) {
431 let new_value = match remat_copies.entry((insert_block, arg.value)) {
432 HashEntry::Occupied(o) => *o.get(),
433 HashEntry::Vacant(v) => {
434 let inst = func.dfg.value_def(arg.value).inst().unwrap();
435 debug_assert_eq!(func.dfg.inst_results(inst).len(), 1);
436 let new_inst = func.dfg.clone_inst(inst);
437 func.layout.insert_inst(new_inst, before);
438 let new_result = func.dfg.inst_results(new_inst)[0];
439 *v.insert(new_result)
440 }
441 };
442 trace!("rematerialized {} as {}", arg.value, new_value);
443 arg.value = new_value;
444 stats.elaborate_remat += 1;
445 true
446 } else {
447 false
448 }
449 }
450
451 fn process_elab_stack(&mut self) {
452 while let Some(entry) = self.elab_stack.pop() {
453 match entry {
454 ElabStackEntry::Start { value, before } => {
455 debug_assert!(self.func.dfg.value_is_real(value));
456
457 self.stats.elaborate_visit_node += 1;
458
459 // Get the best option; we use `value` (latest
460 // value) here so we have a full view of the
461 // eclass.
462 trace!("looking up best value for {}", value);
463 let BestEntry(_, best_value) = self.value_to_best_value[value];
464 trace!("elaborate: value {} -> best {}", value, best_value);
465 debug_assert_ne!(best_value, Value::reserved_value());
466
467 if let Some(elab_val) =
468 self.value_to_elaborated_value.get(&NullCtx, &best_value)
469 {
470 // Value is available; use it.
471 trace!("elaborate: value {} -> {:?}", value, elab_val);
472 self.stats.elaborate_memoize_hit += 1;
473 self.elab_result_stack.push(*elab_val);
474 continue;
475 }
476
477 self.stats.elaborate_memoize_miss += 1;
478
479 // Now resolve the value to its definition to see
480 // how we can compute it.
481 let (inst, result_idx) = match self.func.dfg.value_def(best_value) {
482 ValueDef::Result(inst, result_idx) => {
483 trace!(
484 " -> value {} is result {} of {}",
485 best_value, result_idx, inst
486 );
487 (inst, result_idx)
488 }
489 ValueDef::Param(in_block, _) => {
490 // We don't need to do anything to compute
491 // this value; just push its result on the
492 // result stack (blockparams are already
493 // available).
494 trace!(" -> value {} is a blockparam", best_value);
495 self.elab_result_stack.push(ElaboratedValue {
496 in_block,
497 value: best_value,
498 });
499 continue;
500 }
501 ValueDef::Union(_, _) => {
502 panic!("Should never have a Union value as the best value");
503 }
504 };
505
506 trace!(
507 " -> result {} of inst {:?}",
508 result_idx, self.func.dfg.insts[inst]
509 );
510
511 // We're going to need to use this instruction
512 // result, placing the instruction into the
513 // layout. First, enqueue all args to be
514 // elaborated. Push state to receive the results
515 // and later elab this inst.
516 let num_args = self.func.dfg.inst_values(inst).count();
517 self.elab_stack.push(ElabStackEntry::PendingInst {
518 inst,
519 result_idx,
520 num_args,
521 before,
522 });
523
524 // Push args in reverse order so we process the
525 // first arg first.
526 for arg in self.func.dfg.inst_values(inst).rev() {
527 debug_assert_ne!(arg, Value::reserved_value());
528 self.elab_stack
529 .push(ElabStackEntry::Start { value: arg, before });
530 }
531 }
532
533 ElabStackEntry::PendingInst {
534 inst,
535 result_idx,
536 num_args,
537 before,
538 } => {
539 trace!(
540 "PendingInst: {} result {} args {} before {}",
541 inst, result_idx, num_args, before
542 );
543
544 // We should have all args resolved at this
545 // point. Grab them and drain them out, removing
546 // them.
547 let arg_idx = self.elab_result_stack.len() - num_args;
548 let arg_values = &mut self.elab_result_stack[arg_idx..];
549
550 // Compute max loop depth.
551 //
552 // Note that if there are no arguments then this instruction
553 // is allowed to get hoisted up one loop. This is not
554 // usually used since no-argument values are things like
555 // constants which are typically rematerialized, but for the
556 // `vconst` instruction 128-bit constants aren't as easily
557 // rematerialized. They're hoisted out of inner loops but
558 // not to the function entry which may run the risk of
559 // placing too much register pressure on the entire
560 // function. This is modeled with the `.saturating_sub(1)`
561 // as the default if there's otherwise no maximum.
562 let loop_hoist_level = arg_values
563 .iter()
564 .map(|&value| {
565 // Find the outermost loop level at which
566 // the value's defining block *is not* a
567 // member. This is the loop-nest level
568 // whose hoist-block we hoist to.
569 let hoist_level = self
570 .loop_stack
571 .iter()
572 .position(|loop_entry| {
573 !self.loop_analysis.is_in_loop(value.in_block, loop_entry.lp)
574 })
575 .unwrap_or(self.loop_stack.len());
576 trace!(
577 " -> arg: elab_value {:?} hoist level {:?}",
578 value, hoist_level
579 );
580 hoist_level
581 })
582 .max()
583 .unwrap_or(self.loop_stack.len().saturating_sub(1));
584 trace!(
585 " -> loop hoist level: {:?}; cur loop depth: {:?}, loop_stack: {:?}",
586 loop_hoist_level,
587 self.loop_stack.len(),
588 self.loop_stack,
589 );
590
591 // We know that this is a pure inst, because
592 // non-pure roots have already been placed in the
593 // value-to-elab'd-value map, so they will not
594 // reach this stage of processing.
595 //
596 // We now must determine the location at which we
597 // place the instruction. This is the current
598 // block *unless* we hoist above a loop when all
599 // args are loop-invariant (and this op is pure).
600 let (scope_depth, before, insert_block) = if loop_hoist_level
601 == self.loop_stack.len()
602 {
603 // Depends on some value at the current
604 // loop depth, or remat forces it here:
605 // place it at the current location.
606 (
607 self.value_to_elaborated_value.depth(),
608 before,
609 self.func.layout.inst_block(before).unwrap(),
610 )
611 } else {
612 // Does not depend on any args at current
613 // loop depth: hoist out of loop.
614 self.stats.elaborate_licm_hoist += 1;
615 let data = &self.loop_stack[loop_hoist_level];
616 // `data.hoist_block` should dominate `before`'s block.
617 let before_block = self.func.layout.inst_block(before).unwrap();
618 debug_assert!(self.domtree.block_dominates(data.hoist_block, before_block));
619 // Determine the instruction at which we
620 // insert in `data.hoist_block`.
621 let before = self.func.layout.last_inst(data.hoist_block).unwrap();
622 (data.scope_depth as usize, before, data.hoist_block)
623 };
624
625 trace!(
626 " -> decided to place: before {} insert_block {}",
627 before, insert_block
628 );
629
630 // Now that we have the location for the
631 // instruction, check if any of its args are remat
632 // values. If so, and if we don't have a copy of
633 // the rematerializing instruction for this block
634 // yet, create one.
635 let mut remat_arg = false;
636 for arg_value in arg_values.iter_mut() {
637 if Self::maybe_remat_arg(
638 &self.remat_values,
639 &mut self.func,
640 &mut self.remat_copies,
641 insert_block,
642 before,
643 arg_value,
644 &mut self.stats,
645 ) {
646 remat_arg = true;
647 }
648 }
649
650 // Now we need to place `inst` at the computed
651 // location (just before `before`). Note that
652 // `inst` may already have been placed somewhere
653 // else, because a pure node may be elaborated at
654 // more than one place. In this case, we need to
655 // duplicate the instruction (and return the
656 // `Value`s for that duplicated instance instead).
657 //
658 // Also clone if we rematerialized, because we
659 // don't want to rewrite the args in the original
660 // copy.
661 trace!("need inst {} before {}", inst, before);
662 let inst = if self.func.layout.inst_block(inst).is_some() || remat_arg {
663 // Clone the inst!
664 let new_inst = self.func.dfg.clone_inst(inst);
665 trace!(
666 " -> inst {} already has a location; cloned to {}",
667 inst, new_inst
668 );
669 // Create mappings in the
670 // value-to-elab'd-value map from original
671 // results to cloned results.
672 for (&result, &new_result) in self
673 .func
674 .dfg
675 .inst_results(inst)
676 .iter()
677 .zip(self.func.dfg.inst_results(new_inst).iter())
678 {
679 let elab_value = ElaboratedValue {
680 value: new_result,
681 in_block: insert_block,
682 };
683 let best_result = self.value_to_best_value[result];
684 self.value_to_elaborated_value.insert_if_absent_with_depth(
685 &NullCtx,
686 best_result.1,
687 elab_value,
688 scope_depth,
689 );
690
691 self.value_to_best_value[new_result] = best_result;
692
693 trace!(
694 " -> cloned inst has new result {} for orig {}",
695 new_result, result
696 );
697 }
698 new_inst
699 } else {
700 trace!(" -> no location; using original inst");
701 // Create identity mappings from result values
702 // to themselves in this scope, since we're
703 // using the original inst.
704 for &result in self.func.dfg.inst_results(inst) {
705 let elab_value = ElaboratedValue {
706 value: result,
707 in_block: insert_block,
708 };
709 let best_result = self.value_to_best_value[result];
710 self.value_to_elaborated_value.insert_if_absent_with_depth(
711 &NullCtx,
712 best_result.1,
713 elab_value,
714 scope_depth,
715 );
716 trace!(" -> inserting identity mapping for {}", result);
717 }
718 inst
719 };
720
721 // Place the inst just before `before`.
722 assert!(
723 is_pure_for_egraph(self.func, inst),
724 "something has gone very wrong if we are elaborating effectful \
725 instructions, they should have remained in the skeleton"
726 );
727 self.func.layout.insert_inst(inst, before);
728
729 // Update the inst's arguments.
730 self.func
731 .dfg
732 .overwrite_inst_values(inst, arg_values.into_iter().map(|ev| ev.value));
733
734 // Now that we've consumed the arg values, pop
735 // them off the stack.
736 self.elab_result_stack.truncate(arg_idx);
737
738 // Push the requested result index of the
739 // instruction onto the elab-results stack.
740 self.elab_result_stack.push(ElaboratedValue {
741 in_block: insert_block,
742 value: self.func.dfg.inst_results(inst)[result_idx],
743 });
744 }
745 }
746 }
747 }
748
749 fn elaborate_block(&mut self, elab_values: &mut Vec<Value>, idom: Option<Block>, block: Block) {
750 trace!("elaborate_block: block {}", block);
751 self.start_block(idom, block);
752
753 // Iterate over the side-effecting skeleton using the linked
754 // list in Layout. We will insert instructions that are
755 // elaborated *before* `inst`, so we can always use its
756 // next-link to continue the iteration.
757 let mut next_inst = self.func.layout.first_inst(block);
758 let mut first_branch = None;
759 while let Some(inst) = next_inst {
760 trace!(
761 "elaborating inst {} with results {:?}",
762 inst,
763 self.func.dfg.inst_results(inst)
764 );
765 // Record the first branch we see in the block; all
766 // elaboration for args of *any* branch must be inserted
767 // before the *first* branch, because the branch group
768 // must remain contiguous at the end of the block.
769 if self.func.dfg.insts[inst].opcode().is_branch() && first_branch == None {
770 first_branch = Some(inst);
771 }
772
773 // Determine where elaboration inserts insts.
774 let before = first_branch.unwrap_or(inst);
775 trace!(" -> inserting before {}", before);
776
777 elab_values.extend(self.func.dfg.inst_values(inst));
778 for arg in elab_values.iter_mut() {
779 trace!(" -> arg {}", *arg);
780 // Elaborate the arg, placing any newly-inserted insts
781 // before `before`. Get the updated value, which may
782 // be different than the original.
783 let mut new_arg = self.elaborate_eclass_use(*arg, before);
784 Self::maybe_remat_arg(
785 &self.remat_values,
786 &mut self.func,
787 &mut self.remat_copies,
788 block,
789 inst,
790 &mut new_arg,
791 &mut self.stats,
792 );
793 trace!(" -> rewrote arg to {:?}", new_arg);
794 *arg = new_arg.value;
795 }
796 self.func
797 .dfg
798 .overwrite_inst_values(inst, elab_values.drain(..));
799
800 // We need to put the results of this instruction in the
801 // map now.
802 for &result in self.func.dfg.inst_results(inst) {
803 trace!(" -> result {}", result);
804 let best_result = self.value_to_best_value[result];
805 self.value_to_elaborated_value.insert_if_absent(
806 &NullCtx,
807 best_result.1,
808 ElaboratedValue {
809 in_block: block,
810 value: result,
811 },
812 );
813 }
814
815 next_inst = self.func.layout.next_inst(inst);
816 }
817 }
818
819 fn elaborate_domtree(&mut self, domtree: &DominatorTree) {
820 self.block_stack.push(BlockStackEntry::Elaborate {
821 block: self.func.layout.entry_block().unwrap(),
822 idom: None,
823 });
824
825 // A temporary workspace for elaborate_block, allocated here to maximize the use of the
826 // allocation.
827 let mut elab_values = Vec::new();
828
829 while let Some(top) = self.block_stack.pop() {
830 match top {
831 BlockStackEntry::Elaborate { block, idom } => {
832 self.block_stack.push(BlockStackEntry::Pop);
833 self.value_to_elaborated_value.increment_depth();
834
835 self.elaborate_block(&mut elab_values, idom, block);
836
837 // Push children. We are doing a preorder
838 // traversal so we do this after processing this
839 // block above.
840 let block_stack_end = self.block_stack.len();
841 for child in self.ctrl_plane.shuffled(domtree.children(block)) {
842 self.block_stack.push(BlockStackEntry::Elaborate {
843 block: child,
844 idom: Some(block),
845 });
846 }
847 // Reverse what we just pushed so we elaborate in
848 // original block order. (The domtree iter is a
849 // single-ended iter over a singly-linked list so
850 // we can't `.rev()` above.)
851 self.block_stack[block_stack_end..].reverse();
852 }
853 BlockStackEntry::Pop => {
854 self.value_to_elaborated_value.decrement_depth();
855 }
856 }
857 }
858 }
859
860 pub(crate) fn elaborate(&mut self) {
861 self.stats.elaborate_func += 1;
862 self.stats.elaborate_func_pre_insts += self.func.dfg.num_insts() as u64;
863 self.compute_best_values();
864 self.elaborate_domtree(&self.domtree);
865 self.stats.elaborate_func_post_insts += self.func.dfg.num_insts() as u64;
866 }
867}