cranelift_codegen/egraph/elaborate.rs
1//! Elaboration phase: lowers EGraph back to sequences of operations
2//! in CFG nodes.
3
4use super::cost::Cost;
5use super::Stats;
6use crate::dominator_tree::DominatorTreePreorder;
7use crate::hash_map::Entry as HashEntry;
8use crate::inst_predicates::is_pure_for_egraph;
9use crate::ir::{Block, Function, Inst, Value, ValueDef};
10use crate::loop_analysis::{Loop, LoopAnalysis};
11use crate::scoped_hash_map::ScopedHashMap;
12use crate::trace;
13use alloc::vec::Vec;
14use cranelift_control::ControlPlane;
15use cranelift_entity::{packed_option::ReservedValue, SecondaryMap};
16use rustc_hash::{FxHashMap, FxHashSet};
17use smallvec::{smallvec, SmallVec};
18
19pub(crate) struct Elaborator<'a> {
20 func: &'a mut Function,
21 domtree: &'a DominatorTreePreorder,
22 loop_analysis: &'a LoopAnalysis,
23 /// Map from Value that is produced by a pure Inst (and was thus
24 /// not in the side-effecting skeleton) to the value produced by
25 /// an elaborated inst (placed in the layout) to whose results we
26 /// refer in the final code.
27 ///
28 /// The first time we use some result of an instruction during
29 /// elaboration, we can place it and insert an identity map (inst
30 /// results to that same inst's results) in this scoped
31 /// map. Within that block and its dom-tree children, that mapping
32 /// is visible and we can continue to use it. This allows us to
33 /// avoid cloning the instruction. However, if we pop that scope
34 /// and use it somewhere else as well, we will need to
35 /// duplicate. We detect this case by checking, when a value that
36 /// we want is not present in this map, whether the producing inst
37 /// is already placed in the Layout. If so, we duplicate, and
38 /// insert non-identity mappings from the original inst's results
39 /// to the cloned inst's results.
40 ///
41 /// Note that as values may refer to unions that represent a subset
42 /// of a larger eclass, it's not valid to walk towards the root of a
43 /// union tree: doing so would potentially equate values that fall
44 /// on different branches of the dominator tree.
45 value_to_elaborated_value: ScopedHashMap<Value, ElaboratedValue>,
46 /// Map from Value to the best (lowest-cost) Value in its eclass
47 /// (tree of union value-nodes).
48 value_to_best_value: SecondaryMap<Value, BestEntry>,
49 /// Stack of blocks and loops in current elaboration path.
50 loop_stack: SmallVec<[LoopStackEntry; 8]>,
51 /// The current block into which we are elaborating.
52 cur_block: Block,
53 /// Values that opt rules have indicated should be rematerialized
54 /// in every block they are used (e.g., immediates or other
55 /// "cheap-to-compute" ops).
56 remat_values: &'a FxHashSet<Value>,
57 /// Explicitly-unrolled value elaboration stack.
58 elab_stack: Vec<ElabStackEntry>,
59 /// Results from the elab stack.
60 elab_result_stack: Vec<ElaboratedValue>,
61 /// Explicitly-unrolled block elaboration stack.
62 block_stack: Vec<BlockStackEntry>,
63 /// Copies of values that have been rematerialized.
64 remat_copies: FxHashMap<(Block, Value), Value>,
65 /// Stats for various events during egraph processing, to help
66 /// with optimization of this infrastructure.
67 stats: &'a mut Stats,
68 /// Chaos-mode control-plane so we can test that we still get
69 /// correct results when our heuristics make bad decisions.
70 ctrl_plane: &'a mut ControlPlane,
71}
72
73#[derive(Clone, Copy, Debug, PartialEq, Eq)]
74struct BestEntry(Cost, Value);
75
76impl PartialOrd for BestEntry {
77 fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
78 Some(self.cmp(other))
79 }
80}
81
82impl Ord for BestEntry {
83 #[inline]
84 fn cmp(&self, other: &Self) -> std::cmp::Ordering {
85 self.0.cmp(&other.0).then_with(|| {
86 // Note that this comparison is reversed. When costs are equal,
87 // prefer the value with the bigger index. This is a heuristic that
88 // prefers results of rewrites to the original value, since we
89 // expect that our rewrites are generally improvements.
90 self.1.cmp(&other.1).reverse()
91 })
92 }
93}
94
95#[derive(Clone, Copy, Debug)]
96struct ElaboratedValue {
97 in_block: Block,
98 value: Value,
99}
100
101#[derive(Clone, Debug)]
102struct LoopStackEntry {
103 /// The loop identifier.
104 lp: Loop,
105 /// The hoist point: a block that immediately dominates this
106 /// loop. May not be an immediate predecessor, but will be a valid
107 /// point to place all loop-invariant ops: they must depend only
108 /// on inputs that dominate the loop, so are available at (the end
109 /// of) this block.
110 hoist_block: Block,
111 /// The depth in the scope map.
112 scope_depth: u32,
113}
114
115#[derive(Clone, Debug)]
116enum ElabStackEntry {
117 /// Next action is to resolve this value into an elaborated inst
118 /// (placed into the layout) that produces the value, and
119 /// recursively elaborate the insts that produce its args.
120 ///
121 /// Any inserted ops should be inserted before `before`, which is
122 /// the instruction demanding this value.
123 Start { value: Value, before: Inst },
124 /// Args have been pushed; waiting for results.
125 PendingInst {
126 inst: Inst,
127 result_idx: usize,
128 num_args: usize,
129 before: Inst,
130 },
131}
132
133#[derive(Clone, Debug)]
134enum BlockStackEntry {
135 Elaborate { block: Block, idom: Option<Block> },
136 Pop,
137}
138
139impl<'a> Elaborator<'a> {
140 pub(crate) fn new(
141 func: &'a mut Function,
142 domtree: &'a DominatorTreePreorder,
143 loop_analysis: &'a LoopAnalysis,
144 remat_values: &'a FxHashSet<Value>,
145 stats: &'a mut Stats,
146 ctrl_plane: &'a mut ControlPlane,
147 ) -> Self {
148 let num_values = func.dfg.num_values();
149 let mut value_to_best_value =
150 SecondaryMap::with_default(BestEntry(Cost::infinity(), Value::reserved_value()));
151 value_to_best_value.resize(num_values);
152 Self {
153 func,
154 domtree,
155 loop_analysis,
156 value_to_elaborated_value: ScopedHashMap::with_capacity(num_values),
157 value_to_best_value,
158 loop_stack: smallvec![],
159 cur_block: Block::reserved_value(),
160 remat_values,
161 elab_stack: vec![],
162 elab_result_stack: vec![],
163 block_stack: vec![],
164 remat_copies: FxHashMap::default(),
165 stats,
166 ctrl_plane,
167 }
168 }
169
170 fn start_block(&mut self, idom: Option<Block>, block: Block) {
171 trace!(
172 "start_block: block {:?} with idom {:?} at loop depth {:?} scope depth {}",
173 block,
174 idom,
175 self.loop_stack.len(),
176 self.value_to_elaborated_value.depth()
177 );
178
179 // Pop any loop levels we're no longer in.
180 while let Some(inner_loop) = self.loop_stack.last() {
181 if self.loop_analysis.is_in_loop(block, inner_loop.lp) {
182 break;
183 }
184 self.loop_stack.pop();
185 }
186
187 // Note that if the *entry* block is a loop header, we will
188 // not make note of the loop here because it will not have an
189 // immediate dominator. We must disallow this case because we
190 // will skip adding the `LoopStackEntry` here but our
191 // `LoopAnalysis` will otherwise still make note of this loop
192 // and loop depths will not match.
193 if let Some(idom) = idom {
194 if let Some(lp) = self.loop_analysis.is_loop_header(block) {
195 self.loop_stack.push(LoopStackEntry {
196 lp,
197 // Any code hoisted out of this loop will have code
198 // placed in `idom`, and will have def mappings
199 // inserted in to the scoped hashmap at that block's
200 // level.
201 hoist_block: idom,
202 scope_depth: (self.value_to_elaborated_value.depth() - 1) as u32,
203 });
204 trace!(
205 " -> loop header, pushing; depth now {}",
206 self.loop_stack.len()
207 );
208 }
209 } else {
210 debug_assert!(
211 self.loop_analysis.is_loop_header(block).is_none(),
212 "Entry block (domtree root) cannot be a loop header!"
213 );
214 }
215
216 trace!("block {}: loop stack is {:?}", block, self.loop_stack);
217
218 self.cur_block = block;
219 }
220
221 fn compute_best_values(&mut self) {
222 let best = &mut self.value_to_best_value;
223
224 // We can't make random decisions inside the fixpoint loop below because
225 // that could cause values to change on every iteration of the loop,
226 // which would make the loop never terminate. So in chaos testing
227 // mode we need a form of making suboptimal decisions that is fully
228 // deterministic. We choose to simply make the worst decision we know
229 // how to do instead of the best.
230 let use_worst = self.ctrl_plane.get_decision();
231
232 // Do a fixpoint loop to compute the best value for each eclass.
233 //
234 // The maximum number of iterations is the length of the longest chain
235 // of `vNN -> vMM` edges in the dataflow graph where `NN < MM`, so this
236 // is *technically* quadratic, but `cranelift-frontend` won't construct
237 // any such edges. NaN canonicalization will introduce some of these
238 // edges, but they are chains of only two or three edges. So in
239 // practice, we *never* do more than a handful of iterations here unless
240 // (a) we parsed the CLIF from text and the text was funkily numbered,
241 // which we don't really care about, or (b) the CLIF producer did
242 // something weird, in which case it is their responsibility to stop
243 // doing that.
244 trace!(
245 "Entering fixpoint loop to compute the {} values for each eclass",
246 if use_worst {
247 "worst (chaos mode)"
248 } else {
249 "best"
250 }
251 );
252 let mut keep_going = true;
253 while keep_going {
254 keep_going = false;
255 trace!(
256 "fixpoint iteration {}",
257 self.stats.elaborate_best_cost_fixpoint_iters
258 );
259 self.stats.elaborate_best_cost_fixpoint_iters += 1;
260
261 for (value, def) in self.func.dfg.values_and_defs() {
262 trace!("computing best for value {:?} def {:?}", value, def);
263 let orig_best_value = best[value];
264
265 match def {
266 ValueDef::Union(x, y) => {
267 // Pick the best of the two options based on
268 // min-cost. This works because each element of `best`
269 // is a `(cost, value)` tuple; `cost` comes first so
270 // the natural comparison works based on cost, and
271 // breaks ties based on value number.
272 best[value] = if use_worst {
273 if best[x].1.is_reserved_value() {
274 best[y]
275 } else if best[y].1.is_reserved_value() {
276 best[x]
277 } else {
278 std::cmp::max(best[x], best[y])
279 }
280 } else {
281 std::cmp::min(best[x], best[y])
282 };
283 trace!(
284 " -> best of union({:?}, {:?}) = {:?}",
285 best[x],
286 best[y],
287 best[value]
288 );
289 }
290 ValueDef::Param(_, _) => {
291 best[value] = BestEntry(Cost::zero(), value);
292 }
293 // If the Inst is inserted into the layout (which is,
294 // at this point, only the side-effecting skeleton),
295 // then it must be computed and thus we give it zero
296 // cost.
297 ValueDef::Result(inst, _) => {
298 if let Some(_) = self.func.layout.inst_block(inst) {
299 best[value] = BestEntry(Cost::zero(), value);
300 } else {
301 let inst_data = &self.func.dfg.insts[inst];
302 // N.B.: at this point we know that the opcode is
303 // pure, so `pure_op_cost`'s precondition is
304 // satisfied.
305 let cost = Cost::of_pure_op(
306 inst_data.opcode(),
307 self.func.dfg.inst_values(inst).map(|value| best[value].0),
308 );
309 best[value] = BestEntry(cost, value);
310 trace!(" -> cost of value {} = {:?}", value, cost);
311 }
312 }
313 };
314
315 // Keep on iterating the fixpoint loop while we are finding new
316 // best values.
317 keep_going |= orig_best_value != best[value];
318 }
319 }
320
321 if cfg!(any(feature = "trace-log", debug_assertions)) {
322 trace!("finished fixpoint loop to compute best value for each eclass");
323 for value in self.func.dfg.values() {
324 trace!("-> best for eclass {:?}: {:?}", value, best[value]);
325 debug_assert_ne!(best[value].1, Value::reserved_value());
326 // You might additionally be expecting an assert that the best
327 // cost is not infinity, however infinite cost *can* happen in
328 // practice. First, note that our cost function doesn't know
329 // about any shared structure in the dataflow graph, it only
330 // sums operand costs. (And trying to avoid that by deduping a
331 // single operation's operands is a losing game because you can
332 // always just add one indirection and go from `add(x, x)` to
333 // `add(foo(x), bar(x))` to hide the shared structure.) Given
334 // that blindness to sharing, we can make cost grow
335 // exponentially with a linear sequence of operations:
336 //
337 // v0 = iconst.i32 1 ;; cost = 1
338 // v1 = iadd v0, v0 ;; cost = 3 + 1 + 1
339 // v2 = iadd v1, v1 ;; cost = 3 + 5 + 5
340 // v3 = iadd v2, v2 ;; cost = 3 + 13 + 13
341 // v4 = iadd v3, v3 ;; cost = 3 + 29 + 29
342 // v5 = iadd v4, v4 ;; cost = 3 + 61 + 61
343 // v6 = iadd v5, v5 ;; cost = 3 + 125 + 125
344 // ;; etc...
345 //
346 // Such a chain can cause cost to saturate to infinity. How do
347 // we choose which e-node is best when there are multiple that
348 // have saturated to infinity? It doesn't matter. As long as
349 // invariant (2) for optimization rules is upheld by our rule
350 // set (see `cranelift/codegen/src/opts/README.md`) it is safe
351 // to choose *any* e-node in the e-class. At worst we will
352 // produce suboptimal code, but never an incorrectness.
353 }
354 }
355 }
356
357 /// Elaborate use of an eclass, inserting any needed new
358 /// instructions before the given inst `before`. Should only be
359 /// given values corresponding to results of instructions or
360 /// blockparams.
361 fn elaborate_eclass_use(&mut self, value: Value, before: Inst) -> ElaboratedValue {
362 debug_assert_ne!(value, Value::reserved_value());
363
364 // Kick off the process by requesting this result
365 // value.
366 self.elab_stack
367 .push(ElabStackEntry::Start { value, before });
368
369 // Now run the explicit-stack recursion until we reach
370 // the root.
371 self.process_elab_stack();
372 debug_assert_eq!(self.elab_result_stack.len(), 1);
373 self.elab_result_stack.pop().unwrap()
374 }
375
376 /// Possibly rematerialize the instruction producing the value in
377 /// `arg` and rewrite `arg` to refer to it, if needed. Returns
378 /// `true` if a rewrite occurred.
379 fn maybe_remat_arg(
380 remat_values: &FxHashSet<Value>,
381 func: &mut Function,
382 remat_copies: &mut FxHashMap<(Block, Value), Value>,
383 insert_block: Block,
384 before: Inst,
385 arg: &mut ElaboratedValue,
386 stats: &mut Stats,
387 ) -> bool {
388 // TODO (#7313): we may want to consider recursive
389 // rematerialization as well. We could process the arguments of
390 // the rematerialized instruction up to a certain depth. This
391 // would affect, e.g., adds-with-one-constant-arg, which are
392 // currently rematerialized. Right now we don't do this, to
393 // avoid the need for another fixpoint loop here.
394 if arg.in_block != insert_block && remat_values.contains(&arg.value) {
395 let new_value = match remat_copies.entry((insert_block, arg.value)) {
396 HashEntry::Occupied(o) => *o.get(),
397 HashEntry::Vacant(v) => {
398 let inst = func.dfg.value_def(arg.value).inst().unwrap();
399 debug_assert_eq!(func.dfg.inst_results(inst).len(), 1);
400 let new_inst = func.dfg.clone_inst(inst);
401 func.layout.insert_inst(new_inst, before);
402 let new_result = func.dfg.inst_results(new_inst)[0];
403 *v.insert(new_result)
404 }
405 };
406 trace!("rematerialized {} as {}", arg.value, new_value);
407 arg.value = new_value;
408 stats.elaborate_remat += 1;
409 true
410 } else {
411 false
412 }
413 }
414
415 fn process_elab_stack(&mut self) {
416 while let Some(entry) = self.elab_stack.pop() {
417 match entry {
418 ElabStackEntry::Start { value, before } => {
419 debug_assert!(self.func.dfg.value_is_real(value));
420
421 self.stats.elaborate_visit_node += 1;
422
423 // Get the best option; we use `value` (latest
424 // value) here so we have a full view of the
425 // eclass.
426 trace!("looking up best value for {}", value);
427 let BestEntry(_, best_value) = self.value_to_best_value[value];
428 trace!("elaborate: value {} -> best {}", value, best_value);
429 debug_assert_ne!(best_value, Value::reserved_value());
430
431 if let Some(elab_val) = self.value_to_elaborated_value.get(&best_value) {
432 // Value is available; use it.
433 trace!("elaborate: value {} -> {:?}", value, elab_val);
434 self.stats.elaborate_memoize_hit += 1;
435 self.elab_result_stack.push(*elab_val);
436 continue;
437 }
438
439 self.stats.elaborate_memoize_miss += 1;
440
441 // Now resolve the value to its definition to see
442 // how we can compute it.
443 let (inst, result_idx) = match self.func.dfg.value_def(best_value) {
444 ValueDef::Result(inst, result_idx) => {
445 trace!(
446 " -> value {} is result {} of {}",
447 best_value,
448 result_idx,
449 inst
450 );
451 (inst, result_idx)
452 }
453 ValueDef::Param(in_block, _) => {
454 // We don't need to do anything to compute
455 // this value; just push its result on the
456 // result stack (blockparams are already
457 // available).
458 trace!(" -> value {} is a blockparam", best_value);
459 self.elab_result_stack.push(ElaboratedValue {
460 in_block,
461 value: best_value,
462 });
463 continue;
464 }
465 ValueDef::Union(_, _) => {
466 panic!("Should never have a Union value as the best value");
467 }
468 };
469
470 trace!(
471 " -> result {} of inst {:?}",
472 result_idx,
473 self.func.dfg.insts[inst]
474 );
475
476 // We're going to need to use this instruction
477 // result, placing the instruction into the
478 // layout. First, enqueue all args to be
479 // elaborated. Push state to receive the results
480 // and later elab this inst.
481 let num_args = self.func.dfg.inst_values(inst).count();
482 self.elab_stack.push(ElabStackEntry::PendingInst {
483 inst,
484 result_idx,
485 num_args,
486 before,
487 });
488
489 // Push args in reverse order so we process the
490 // first arg first.
491 for arg in self.func.dfg.inst_values(inst).rev() {
492 debug_assert_ne!(arg, Value::reserved_value());
493 self.elab_stack
494 .push(ElabStackEntry::Start { value: arg, before });
495 }
496 }
497
498 ElabStackEntry::PendingInst {
499 inst,
500 result_idx,
501 num_args,
502 before,
503 } => {
504 trace!(
505 "PendingInst: {} result {} args {} before {}",
506 inst,
507 result_idx,
508 num_args,
509 before
510 );
511
512 // We should have all args resolved at this
513 // point. Grab them and drain them out, removing
514 // them.
515 let arg_idx = self.elab_result_stack.len() - num_args;
516 let arg_values = &mut self.elab_result_stack[arg_idx..];
517
518 // Compute max loop depth.
519 //
520 // Note that if there are no arguments then this instruction
521 // is allowed to get hoisted up one loop. This is not
522 // usually used since no-argument values are things like
523 // constants which are typically rematerialized, but for the
524 // `vconst` instruction 128-bit constants aren't as easily
525 // rematerialized. They're hoisted out of inner loops but
526 // not to the function entry which may run the risk of
527 // placing too much register pressure on the entire
528 // function. This is modeled with the `.saturating_sub(1)`
529 // as the default if there's otherwise no maximum.
530 let loop_hoist_level = arg_values
531 .iter()
532 .map(|&value| {
533 // Find the outermost loop level at which
534 // the value's defining block *is not* a
535 // member. This is the loop-nest level
536 // whose hoist-block we hoist to.
537 let hoist_level = self
538 .loop_stack
539 .iter()
540 .position(|loop_entry| {
541 !self.loop_analysis.is_in_loop(value.in_block, loop_entry.lp)
542 })
543 .unwrap_or(self.loop_stack.len());
544 trace!(
545 " -> arg: elab_value {:?} hoist level {:?}",
546 value,
547 hoist_level
548 );
549 hoist_level
550 })
551 .max()
552 .unwrap_or(self.loop_stack.len().saturating_sub(1));
553 trace!(
554 " -> loop hoist level: {:?}; cur loop depth: {:?}, loop_stack: {:?}",
555 loop_hoist_level,
556 self.loop_stack.len(),
557 self.loop_stack,
558 );
559
560 // We know that this is a pure inst, because
561 // non-pure roots have already been placed in the
562 // value-to-elab'd-value map, so they will not
563 // reach this stage of processing.
564 //
565 // We now must determine the location at which we
566 // place the instruction. This is the current
567 // block *unless* we hoist above a loop when all
568 // args are loop-invariant (and this op is pure).
569 let (scope_depth, before, insert_block) =
570 if loop_hoist_level == self.loop_stack.len() {
571 // Depends on some value at the current
572 // loop depth, or remat forces it here:
573 // place it at the current location.
574 (
575 self.value_to_elaborated_value.depth(),
576 before,
577 self.func.layout.inst_block(before).unwrap(),
578 )
579 } else {
580 // Does not depend on any args at current
581 // loop depth: hoist out of loop.
582 self.stats.elaborate_licm_hoist += 1;
583 let data = &self.loop_stack[loop_hoist_level];
584 // `data.hoist_block` should dominate `before`'s block.
585 let before_block = self.func.layout.inst_block(before).unwrap();
586 debug_assert!(self.domtree.dominates(data.hoist_block, before_block));
587 // Determine the instruction at which we
588 // insert in `data.hoist_block`.
589 let before = self.func.layout.last_inst(data.hoist_block).unwrap();
590 (data.scope_depth as usize, before, data.hoist_block)
591 };
592
593 trace!(
594 " -> decided to place: before {} insert_block {}",
595 before,
596 insert_block
597 );
598
599 // Now that we have the location for the
600 // instruction, check if any of its args are remat
601 // values. If so, and if we don't have a copy of
602 // the rematerializing instruction for this block
603 // yet, create one.
604 let mut remat_arg = false;
605 for arg_value in arg_values.iter_mut() {
606 if Self::maybe_remat_arg(
607 &self.remat_values,
608 &mut self.func,
609 &mut self.remat_copies,
610 insert_block,
611 before,
612 arg_value,
613 &mut self.stats,
614 ) {
615 remat_arg = true;
616 }
617 }
618
619 // Now we need to place `inst` at the computed
620 // location (just before `before`). Note that
621 // `inst` may already have been placed somewhere
622 // else, because a pure node may be elaborated at
623 // more than one place. In this case, we need to
624 // duplicate the instruction (and return the
625 // `Value`s for that duplicated instance instead).
626 //
627 // Also clone if we rematerialized, because we
628 // don't want to rewrite the args in the original
629 // copy.
630 trace!("need inst {} before {}", inst, before);
631 let inst = if self.func.layout.inst_block(inst).is_some() || remat_arg {
632 // Clone the inst!
633 let new_inst = self.func.dfg.clone_inst(inst);
634 trace!(
635 " -> inst {} already has a location; cloned to {}",
636 inst,
637 new_inst
638 );
639 // Create mappings in the
640 // value-to-elab'd-value map from original
641 // results to cloned results.
642 for (&result, &new_result) in self
643 .func
644 .dfg
645 .inst_results(inst)
646 .iter()
647 .zip(self.func.dfg.inst_results(new_inst).iter())
648 {
649 let elab_value = ElaboratedValue {
650 value: new_result,
651 in_block: insert_block,
652 };
653 let best_result = self.value_to_best_value[result];
654 self.value_to_elaborated_value.insert_if_absent_with_depth(
655 best_result.1,
656 elab_value,
657 scope_depth,
658 );
659
660 self.value_to_best_value[new_result] = best_result;
661
662 trace!(
663 " -> cloned inst has new result {} for orig {}",
664 new_result,
665 result
666 );
667 }
668 new_inst
669 } else {
670 trace!(" -> no location; using original inst");
671 // Create identity mappings from result values
672 // to themselves in this scope, since we're
673 // using the original inst.
674 for &result in self.func.dfg.inst_results(inst) {
675 let elab_value = ElaboratedValue {
676 value: result,
677 in_block: insert_block,
678 };
679 let best_result = self.value_to_best_value[result];
680 self.value_to_elaborated_value.insert_if_absent_with_depth(
681 best_result.1,
682 elab_value,
683 scope_depth,
684 );
685 trace!(" -> inserting identity mapping for {}", result);
686 }
687 inst
688 };
689
690 // Place the inst just before `before`.
691 assert!(
692 is_pure_for_egraph(self.func, inst),
693 "something has gone very wrong if we are elaborating effectful \
694 instructions, they should have remained in the skeleton"
695 );
696 self.func.layout.insert_inst(inst, before);
697
698 // Update the inst's arguments.
699 self.func
700 .dfg
701 .overwrite_inst_values(inst, arg_values.into_iter().map(|ev| ev.value));
702
703 // Now that we've consumed the arg values, pop
704 // them off the stack.
705 self.elab_result_stack.truncate(arg_idx);
706
707 // Push the requested result index of the
708 // instruction onto the elab-results stack.
709 self.elab_result_stack.push(ElaboratedValue {
710 in_block: insert_block,
711 value: self.func.dfg.inst_results(inst)[result_idx],
712 });
713 }
714 }
715 }
716 }
717
718 fn elaborate_block(&mut self, elab_values: &mut Vec<Value>, idom: Option<Block>, block: Block) {
719 trace!("elaborate_block: block {}", block);
720 self.start_block(idom, block);
721
722 // Iterate over the side-effecting skeleton using the linked
723 // list in Layout. We will insert instructions that are
724 // elaborated *before* `inst`, so we can always use its
725 // next-link to continue the iteration.
726 let mut next_inst = self.func.layout.first_inst(block);
727 let mut first_branch = None;
728 while let Some(inst) = next_inst {
729 trace!(
730 "elaborating inst {} with results {:?}",
731 inst,
732 self.func.dfg.inst_results(inst)
733 );
734 // Record the first branch we see in the block; all
735 // elaboration for args of *any* branch must be inserted
736 // before the *first* branch, because the branch group
737 // must remain contiguous at the end of the block.
738 if self.func.dfg.insts[inst].opcode().is_branch() && first_branch == None {
739 first_branch = Some(inst);
740 }
741
742 // Determine where elaboration inserts insts.
743 let before = first_branch.unwrap_or(inst);
744 trace!(" -> inserting before {}", before);
745
746 elab_values.extend(self.func.dfg.inst_values(inst));
747 for arg in elab_values.iter_mut() {
748 trace!(" -> arg {}", *arg);
749 // Elaborate the arg, placing any newly-inserted insts
750 // before `before`. Get the updated value, which may
751 // be different than the original.
752 let mut new_arg = self.elaborate_eclass_use(*arg, before);
753 Self::maybe_remat_arg(
754 &self.remat_values,
755 &mut self.func,
756 &mut self.remat_copies,
757 block,
758 inst,
759 &mut new_arg,
760 &mut self.stats,
761 );
762 trace!(" -> rewrote arg to {:?}", new_arg);
763 *arg = new_arg.value;
764 }
765 self.func
766 .dfg
767 .overwrite_inst_values(inst, elab_values.drain(..));
768
769 // We need to put the results of this instruction in the
770 // map now.
771 for &result in self.func.dfg.inst_results(inst) {
772 trace!(" -> result {}", result);
773 let best_result = self.value_to_best_value[result];
774 self.value_to_elaborated_value.insert_if_absent(
775 best_result.1,
776 ElaboratedValue {
777 in_block: block,
778 value: result,
779 },
780 );
781 }
782
783 next_inst = self.func.layout.next_inst(inst);
784 }
785 }
786
787 fn elaborate_domtree(&mut self, domtree: &DominatorTreePreorder) {
788 self.block_stack.push(BlockStackEntry::Elaborate {
789 block: self.func.layout.entry_block().unwrap(),
790 idom: None,
791 });
792
793 // A temporary workspace for elaborate_block, allocated here to maximize the use of the
794 // allocation.
795 let mut elab_values = Vec::new();
796
797 while let Some(top) = self.block_stack.pop() {
798 match top {
799 BlockStackEntry::Elaborate { block, idom } => {
800 self.block_stack.push(BlockStackEntry::Pop);
801 self.value_to_elaborated_value.increment_depth();
802
803 self.elaborate_block(&mut elab_values, idom, block);
804
805 // Push children. We are doing a preorder
806 // traversal so we do this after processing this
807 // block above.
808 let block_stack_end = self.block_stack.len();
809 for child in self.ctrl_plane.shuffled(domtree.children(block)) {
810 self.block_stack.push(BlockStackEntry::Elaborate {
811 block: child,
812 idom: Some(block),
813 });
814 }
815 // Reverse what we just pushed so we elaborate in
816 // original block order. (The domtree iter is a
817 // single-ended iter over a singly-linked list so
818 // we can't `.rev()` above.)
819 self.block_stack[block_stack_end..].reverse();
820 }
821 BlockStackEntry::Pop => {
822 self.value_to_elaborated_value.decrement_depth();
823 }
824 }
825 }
826 }
827
828 pub(crate) fn elaborate(&mut self) {
829 self.stats.elaborate_func += 1;
830 self.stats.elaborate_func_pre_insts += self.func.dfg.num_insts() as u64;
831 self.compute_best_values();
832 self.elaborate_domtree(&self.domtree);
833 self.stats.elaborate_func_post_insts += self.func.dfg.num_insts() as u64;
834 }
835}