cranelift_codegen/egraph/
elaborate.rs

1//! Elaboration phase: lowers EGraph back to sequences of operations
2//! in CFG nodes.
3
4use super::cost::Cost;
5use super::Stats;
6use crate::dominator_tree::DominatorTreePreorder;
7use crate::hash_map::Entry as HashEntry;
8use crate::inst_predicates::is_pure_for_egraph;
9use crate::ir::{Block, Function, Inst, Value, ValueDef};
10use crate::loop_analysis::{Loop, LoopAnalysis};
11use crate::scoped_hash_map::ScopedHashMap;
12use crate::trace;
13use alloc::vec::Vec;
14use cranelift_control::ControlPlane;
15use cranelift_entity::{packed_option::ReservedValue, SecondaryMap};
16use rustc_hash::{FxHashMap, FxHashSet};
17use smallvec::{smallvec, SmallVec};
18
19pub(crate) struct Elaborator<'a> {
20    func: &'a mut Function,
21    domtree: &'a DominatorTreePreorder,
22    loop_analysis: &'a LoopAnalysis,
23    /// Map from Value that is produced by a pure Inst (and was thus
24    /// not in the side-effecting skeleton) to the value produced by
25    /// an elaborated inst (placed in the layout) to whose results we
26    /// refer in the final code.
27    ///
28    /// The first time we use some result of an instruction during
29    /// elaboration, we can place it and insert an identity map (inst
30    /// results to that same inst's results) in this scoped
31    /// map. Within that block and its dom-tree children, that mapping
32    /// is visible and we can continue to use it. This allows us to
33    /// avoid cloning the instruction. However, if we pop that scope
34    /// and use it somewhere else as well, we will need to
35    /// duplicate. We detect this case by checking, when a value that
36    /// we want is not present in this map, whether the producing inst
37    /// is already placed in the Layout. If so, we duplicate, and
38    /// insert non-identity mappings from the original inst's results
39    /// to the cloned inst's results.
40    ///
41    /// Note that as values may refer to unions that represent a subset
42    /// of a larger eclass, it's not valid to walk towards the root of a
43    /// union tree: doing so would potentially equate values that fall
44    /// on different branches of the dominator tree.
45    value_to_elaborated_value: ScopedHashMap<Value, ElaboratedValue>,
46    /// Map from Value to the best (lowest-cost) Value in its eclass
47    /// (tree of union value-nodes).
48    value_to_best_value: SecondaryMap<Value, BestEntry>,
49    /// Stack of blocks and loops in current elaboration path.
50    loop_stack: SmallVec<[LoopStackEntry; 8]>,
51    /// The current block into which we are elaborating.
52    cur_block: Block,
53    /// Values that opt rules have indicated should be rematerialized
54    /// in every block they are used (e.g., immediates or other
55    /// "cheap-to-compute" ops).
56    remat_values: &'a FxHashSet<Value>,
57    /// Explicitly-unrolled value elaboration stack.
58    elab_stack: Vec<ElabStackEntry>,
59    /// Results from the elab stack.
60    elab_result_stack: Vec<ElaboratedValue>,
61    /// Explicitly-unrolled block elaboration stack.
62    block_stack: Vec<BlockStackEntry>,
63    /// Copies of values that have been rematerialized.
64    remat_copies: FxHashMap<(Block, Value), Value>,
65    /// Stats for various events during egraph processing, to help
66    /// with optimization of this infrastructure.
67    stats: &'a mut Stats,
68    /// Chaos-mode control-plane so we can test that we still get
69    /// correct results when our heuristics make bad decisions.
70    ctrl_plane: &'a mut ControlPlane,
71}
72
73#[derive(Clone, Copy, Debug, PartialEq, Eq)]
74struct BestEntry(Cost, Value);
75
76impl PartialOrd for BestEntry {
77    fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
78        Some(self.cmp(other))
79    }
80}
81
82impl Ord for BestEntry {
83    #[inline]
84    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
85        self.0.cmp(&other.0).then_with(|| {
86            // Note that this comparison is reversed. When costs are equal,
87            // prefer the value with the bigger index. This is a heuristic that
88            // prefers results of rewrites to the original value, since we
89            // expect that our rewrites are generally improvements.
90            self.1.cmp(&other.1).reverse()
91        })
92    }
93}
94
95#[derive(Clone, Copy, Debug)]
96struct ElaboratedValue {
97    in_block: Block,
98    value: Value,
99}
100
101#[derive(Clone, Debug)]
102struct LoopStackEntry {
103    /// The loop identifier.
104    lp: Loop,
105    /// The hoist point: a block that immediately dominates this
106    /// loop. May not be an immediate predecessor, but will be a valid
107    /// point to place all loop-invariant ops: they must depend only
108    /// on inputs that dominate the loop, so are available at (the end
109    /// of) this block.
110    hoist_block: Block,
111    /// The depth in the scope map.
112    scope_depth: u32,
113}
114
115#[derive(Clone, Debug)]
116enum ElabStackEntry {
117    /// Next action is to resolve this value into an elaborated inst
118    /// (placed into the layout) that produces the value, and
119    /// recursively elaborate the insts that produce its args.
120    ///
121    /// Any inserted ops should be inserted before `before`, which is
122    /// the instruction demanding this value.
123    Start { value: Value, before: Inst },
124    /// Args have been pushed; waiting for results.
125    PendingInst {
126        inst: Inst,
127        result_idx: usize,
128        num_args: usize,
129        before: Inst,
130    },
131}
132
133#[derive(Clone, Debug)]
134enum BlockStackEntry {
135    Elaborate { block: Block, idom: Option<Block> },
136    Pop,
137}
138
139impl<'a> Elaborator<'a> {
140    pub(crate) fn new(
141        func: &'a mut Function,
142        domtree: &'a DominatorTreePreorder,
143        loop_analysis: &'a LoopAnalysis,
144        remat_values: &'a FxHashSet<Value>,
145        stats: &'a mut Stats,
146        ctrl_plane: &'a mut ControlPlane,
147    ) -> Self {
148        let num_values = func.dfg.num_values();
149        let mut value_to_best_value =
150            SecondaryMap::with_default(BestEntry(Cost::infinity(), Value::reserved_value()));
151        value_to_best_value.resize(num_values);
152        Self {
153            func,
154            domtree,
155            loop_analysis,
156            value_to_elaborated_value: ScopedHashMap::with_capacity(num_values),
157            value_to_best_value,
158            loop_stack: smallvec![],
159            cur_block: Block::reserved_value(),
160            remat_values,
161            elab_stack: vec![],
162            elab_result_stack: vec![],
163            block_stack: vec![],
164            remat_copies: FxHashMap::default(),
165            stats,
166            ctrl_plane,
167        }
168    }
169
170    fn start_block(&mut self, idom: Option<Block>, block: Block) {
171        trace!(
172            "start_block: block {:?} with idom {:?} at loop depth {:?} scope depth {}",
173            block,
174            idom,
175            self.loop_stack.len(),
176            self.value_to_elaborated_value.depth()
177        );
178
179        // Pop any loop levels we're no longer in.
180        while let Some(inner_loop) = self.loop_stack.last() {
181            if self.loop_analysis.is_in_loop(block, inner_loop.lp) {
182                break;
183            }
184            self.loop_stack.pop();
185        }
186
187        // Note that if the *entry* block is a loop header, we will
188        // not make note of the loop here because it will not have an
189        // immediate dominator. We must disallow this case because we
190        // will skip adding the `LoopStackEntry` here but our
191        // `LoopAnalysis` will otherwise still make note of this loop
192        // and loop depths will not match.
193        if let Some(idom) = idom {
194            if let Some(lp) = self.loop_analysis.is_loop_header(block) {
195                self.loop_stack.push(LoopStackEntry {
196                    lp,
197                    // Any code hoisted out of this loop will have code
198                    // placed in `idom`, and will have def mappings
199                    // inserted in to the scoped hashmap at that block's
200                    // level.
201                    hoist_block: idom,
202                    scope_depth: (self.value_to_elaborated_value.depth() - 1) as u32,
203                });
204                trace!(
205                    " -> loop header, pushing; depth now {}",
206                    self.loop_stack.len()
207                );
208            }
209        } else {
210            debug_assert!(
211                self.loop_analysis.is_loop_header(block).is_none(),
212                "Entry block (domtree root) cannot be a loop header!"
213            );
214        }
215
216        trace!("block {}: loop stack is {:?}", block, self.loop_stack);
217
218        self.cur_block = block;
219    }
220
221    fn compute_best_values(&mut self) {
222        let best = &mut self.value_to_best_value;
223
224        // We can't make random decisions inside the fixpoint loop below because
225        // that could cause values to change on every iteration of the loop,
226        // which would make the loop never terminate. So in chaos testing
227        // mode we need a form of making suboptimal decisions that is fully
228        // deterministic. We choose to simply make the worst decision we know
229        // how to do instead of the best.
230        let use_worst = self.ctrl_plane.get_decision();
231
232        // Do a fixpoint loop to compute the best value for each eclass.
233        //
234        // The maximum number of iterations is the length of the longest chain
235        // of `vNN -> vMM` edges in the dataflow graph where `NN < MM`, so this
236        // is *technically* quadratic, but `cranelift-frontend` won't construct
237        // any such edges. NaN canonicalization will introduce some of these
238        // edges, but they are chains of only two or three edges. So in
239        // practice, we *never* do more than a handful of iterations here unless
240        // (a) we parsed the CLIF from text and the text was funkily numbered,
241        // which we don't really care about, or (b) the CLIF producer did
242        // something weird, in which case it is their responsibility to stop
243        // doing that.
244        trace!(
245            "Entering fixpoint loop to compute the {} values for each eclass",
246            if use_worst {
247                "worst (chaos mode)"
248            } else {
249                "best"
250            }
251        );
252        let mut keep_going = true;
253        while keep_going {
254            keep_going = false;
255            trace!(
256                "fixpoint iteration {}",
257                self.stats.elaborate_best_cost_fixpoint_iters
258            );
259            self.stats.elaborate_best_cost_fixpoint_iters += 1;
260
261            for (value, def) in self.func.dfg.values_and_defs() {
262                trace!("computing best for value {:?} def {:?}", value, def);
263                let orig_best_value = best[value];
264
265                match def {
266                    ValueDef::Union(x, y) => {
267                        // Pick the best of the two options based on
268                        // min-cost. This works because each element of `best`
269                        // is a `(cost, value)` tuple; `cost` comes first so
270                        // the natural comparison works based on cost, and
271                        // breaks ties based on value number.
272                        best[value] = if use_worst {
273                            if best[x].1.is_reserved_value() {
274                                best[y]
275                            } else if best[y].1.is_reserved_value() {
276                                best[x]
277                            } else {
278                                std::cmp::max(best[x], best[y])
279                            }
280                        } else {
281                            std::cmp::min(best[x], best[y])
282                        };
283                        trace!(
284                            " -> best of union({:?}, {:?}) = {:?}",
285                            best[x],
286                            best[y],
287                            best[value]
288                        );
289                    }
290                    ValueDef::Param(_, _) => {
291                        best[value] = BestEntry(Cost::zero(), value);
292                    }
293                    // If the Inst is inserted into the layout (which is,
294                    // at this point, only the side-effecting skeleton),
295                    // then it must be computed and thus we give it zero
296                    // cost.
297                    ValueDef::Result(inst, _) => {
298                        if let Some(_) = self.func.layout.inst_block(inst) {
299                            best[value] = BestEntry(Cost::zero(), value);
300                        } else {
301                            let inst_data = &self.func.dfg.insts[inst];
302                            // N.B.: at this point we know that the opcode is
303                            // pure, so `pure_op_cost`'s precondition is
304                            // satisfied.
305                            let cost = Cost::of_pure_op(
306                                inst_data.opcode(),
307                                self.func.dfg.inst_values(inst).map(|value| best[value].0),
308                            );
309                            best[value] = BestEntry(cost, value);
310                            trace!(" -> cost of value {} = {:?}", value, cost);
311                        }
312                    }
313                };
314
315                // Keep on iterating the fixpoint loop while we are finding new
316                // best values.
317                keep_going |= orig_best_value != best[value];
318            }
319        }
320
321        if cfg!(any(feature = "trace-log", debug_assertions)) {
322            trace!("finished fixpoint loop to compute best value for each eclass");
323            for value in self.func.dfg.values() {
324                trace!("-> best for eclass {:?}: {:?}", value, best[value]);
325                debug_assert_ne!(best[value].1, Value::reserved_value());
326                // You might additionally be expecting an assert that the best
327                // cost is not infinity, however infinite cost *can* happen in
328                // practice. First, note that our cost function doesn't know
329                // about any shared structure in the dataflow graph, it only
330                // sums operand costs. (And trying to avoid that by deduping a
331                // single operation's operands is a losing game because you can
332                // always just add one indirection and go from `add(x, x)` to
333                // `add(foo(x), bar(x))` to hide the shared structure.) Given
334                // that blindness to sharing, we can make cost grow
335                // exponentially with a linear sequence of operations:
336                //
337                //     v0 = iconst.i32 1    ;; cost = 1
338                //     v1 = iadd v0, v0     ;; cost = 3 + 1 + 1
339                //     v2 = iadd v1, v1     ;; cost = 3 + 5 + 5
340                //     v3 = iadd v2, v2     ;; cost = 3 + 13 + 13
341                //     v4 = iadd v3, v3     ;; cost = 3 + 29 + 29
342                //     v5 = iadd v4, v4     ;; cost = 3 + 61 + 61
343                //     v6 = iadd v5, v5     ;; cost = 3 + 125 + 125
344                //     ;; etc...
345                //
346                // Such a chain can cause cost to saturate to infinity. How do
347                // we choose which e-node is best when there are multiple that
348                // have saturated to infinity? It doesn't matter. As long as
349                // invariant (2) for optimization rules is upheld by our rule
350                // set (see `cranelift/codegen/src/opts/README.md`) it is safe
351                // to choose *any* e-node in the e-class. At worst we will
352                // produce suboptimal code, but never an incorrectness.
353            }
354        }
355    }
356
357    /// Elaborate use of an eclass, inserting any needed new
358    /// instructions before the given inst `before`. Should only be
359    /// given values corresponding to results of instructions or
360    /// blockparams.
361    fn elaborate_eclass_use(&mut self, value: Value, before: Inst) -> ElaboratedValue {
362        debug_assert_ne!(value, Value::reserved_value());
363
364        // Kick off the process by requesting this result
365        // value.
366        self.elab_stack
367            .push(ElabStackEntry::Start { value, before });
368
369        // Now run the explicit-stack recursion until we reach
370        // the root.
371        self.process_elab_stack();
372        debug_assert_eq!(self.elab_result_stack.len(), 1);
373        self.elab_result_stack.pop().unwrap()
374    }
375
376    /// Possibly rematerialize the instruction producing the value in
377    /// `arg` and rewrite `arg` to refer to it, if needed. Returns
378    /// `true` if a rewrite occurred.
379    fn maybe_remat_arg(
380        remat_values: &FxHashSet<Value>,
381        func: &mut Function,
382        remat_copies: &mut FxHashMap<(Block, Value), Value>,
383        insert_block: Block,
384        before: Inst,
385        arg: &mut ElaboratedValue,
386        stats: &mut Stats,
387    ) -> bool {
388        // TODO (#7313): we may want to consider recursive
389        // rematerialization as well. We could process the arguments of
390        // the rematerialized instruction up to a certain depth. This
391        // would affect, e.g., adds-with-one-constant-arg, which are
392        // currently rematerialized. Right now we don't do this, to
393        // avoid the need for another fixpoint loop here.
394        if arg.in_block != insert_block && remat_values.contains(&arg.value) {
395            let new_value = match remat_copies.entry((insert_block, arg.value)) {
396                HashEntry::Occupied(o) => *o.get(),
397                HashEntry::Vacant(v) => {
398                    let inst = func.dfg.value_def(arg.value).inst().unwrap();
399                    debug_assert_eq!(func.dfg.inst_results(inst).len(), 1);
400                    let new_inst = func.dfg.clone_inst(inst);
401                    func.layout.insert_inst(new_inst, before);
402                    let new_result = func.dfg.inst_results(new_inst)[0];
403                    *v.insert(new_result)
404                }
405            };
406            trace!("rematerialized {} as {}", arg.value, new_value);
407            arg.value = new_value;
408            stats.elaborate_remat += 1;
409            true
410        } else {
411            false
412        }
413    }
414
415    fn process_elab_stack(&mut self) {
416        while let Some(entry) = self.elab_stack.pop() {
417            match entry {
418                ElabStackEntry::Start { value, before } => {
419                    debug_assert!(self.func.dfg.value_is_real(value));
420
421                    self.stats.elaborate_visit_node += 1;
422
423                    // Get the best option; we use `value` (latest
424                    // value) here so we have a full view of the
425                    // eclass.
426                    trace!("looking up best value for {}", value);
427                    let BestEntry(_, best_value) = self.value_to_best_value[value];
428                    trace!("elaborate: value {} -> best {}", value, best_value);
429                    debug_assert_ne!(best_value, Value::reserved_value());
430
431                    if let Some(elab_val) = self.value_to_elaborated_value.get(&best_value) {
432                        // Value is available; use it.
433                        trace!("elaborate: value {} -> {:?}", value, elab_val);
434                        self.stats.elaborate_memoize_hit += 1;
435                        self.elab_result_stack.push(*elab_val);
436                        continue;
437                    }
438
439                    self.stats.elaborate_memoize_miss += 1;
440
441                    // Now resolve the value to its definition to see
442                    // how we can compute it.
443                    let (inst, result_idx) = match self.func.dfg.value_def(best_value) {
444                        ValueDef::Result(inst, result_idx) => {
445                            trace!(
446                                " -> value {} is result {} of {}",
447                                best_value,
448                                result_idx,
449                                inst
450                            );
451                            (inst, result_idx)
452                        }
453                        ValueDef::Param(in_block, _) => {
454                            // We don't need to do anything to compute
455                            // this value; just push its result on the
456                            // result stack (blockparams are already
457                            // available).
458                            trace!(" -> value {} is a blockparam", best_value);
459                            self.elab_result_stack.push(ElaboratedValue {
460                                in_block,
461                                value: best_value,
462                            });
463                            continue;
464                        }
465                        ValueDef::Union(_, _) => {
466                            panic!("Should never have a Union value as the best value");
467                        }
468                    };
469
470                    trace!(
471                        " -> result {} of inst {:?}",
472                        result_idx,
473                        self.func.dfg.insts[inst]
474                    );
475
476                    // We're going to need to use this instruction
477                    // result, placing the instruction into the
478                    // layout. First, enqueue all args to be
479                    // elaborated. Push state to receive the results
480                    // and later elab this inst.
481                    let num_args = self.func.dfg.inst_values(inst).count();
482                    self.elab_stack.push(ElabStackEntry::PendingInst {
483                        inst,
484                        result_idx,
485                        num_args,
486                        before,
487                    });
488
489                    // Push args in reverse order so we process the
490                    // first arg first.
491                    for arg in self.func.dfg.inst_values(inst).rev() {
492                        debug_assert_ne!(arg, Value::reserved_value());
493                        self.elab_stack
494                            .push(ElabStackEntry::Start { value: arg, before });
495                    }
496                }
497
498                ElabStackEntry::PendingInst {
499                    inst,
500                    result_idx,
501                    num_args,
502                    before,
503                } => {
504                    trace!(
505                        "PendingInst: {} result {} args {} before {}",
506                        inst,
507                        result_idx,
508                        num_args,
509                        before
510                    );
511
512                    // We should have all args resolved at this
513                    // point. Grab them and drain them out, removing
514                    // them.
515                    let arg_idx = self.elab_result_stack.len() - num_args;
516                    let arg_values = &mut self.elab_result_stack[arg_idx..];
517
518                    // Compute max loop depth.
519                    //
520                    // Note that if there are no arguments then this instruction
521                    // is allowed to get hoisted up one loop. This is not
522                    // usually used since no-argument values are things like
523                    // constants which are typically rematerialized, but for the
524                    // `vconst` instruction 128-bit constants aren't as easily
525                    // rematerialized. They're hoisted out of inner loops but
526                    // not to the function entry which may run the risk of
527                    // placing too much register pressure on the entire
528                    // function. This is modeled with the `.saturating_sub(1)`
529                    // as the default if there's otherwise no maximum.
530                    let loop_hoist_level = arg_values
531                        .iter()
532                        .map(|&value| {
533                            // Find the outermost loop level at which
534                            // the value's defining block *is not* a
535                            // member. This is the loop-nest level
536                            // whose hoist-block we hoist to.
537                            let hoist_level = self
538                                .loop_stack
539                                .iter()
540                                .position(|loop_entry| {
541                                    !self.loop_analysis.is_in_loop(value.in_block, loop_entry.lp)
542                                })
543                                .unwrap_or(self.loop_stack.len());
544                            trace!(
545                                " -> arg: elab_value {:?} hoist level {:?}",
546                                value,
547                                hoist_level
548                            );
549                            hoist_level
550                        })
551                        .max()
552                        .unwrap_or(self.loop_stack.len().saturating_sub(1));
553                    trace!(
554                        " -> loop hoist level: {:?}; cur loop depth: {:?}, loop_stack: {:?}",
555                        loop_hoist_level,
556                        self.loop_stack.len(),
557                        self.loop_stack,
558                    );
559
560                    // We know that this is a pure inst, because
561                    // non-pure roots have already been placed in the
562                    // value-to-elab'd-value map, so they will not
563                    // reach this stage of processing.
564                    //
565                    // We now must determine the location at which we
566                    // place the instruction. This is the current
567                    // block *unless* we hoist above a loop when all
568                    // args are loop-invariant (and this op is pure).
569                    let (scope_depth, before, insert_block) =
570                        if loop_hoist_level == self.loop_stack.len() {
571                            // Depends on some value at the current
572                            // loop depth, or remat forces it here:
573                            // place it at the current location.
574                            (
575                                self.value_to_elaborated_value.depth(),
576                                before,
577                                self.func.layout.inst_block(before).unwrap(),
578                            )
579                        } else {
580                            // Does not depend on any args at current
581                            // loop depth: hoist out of loop.
582                            self.stats.elaborate_licm_hoist += 1;
583                            let data = &self.loop_stack[loop_hoist_level];
584                            // `data.hoist_block` should dominate `before`'s block.
585                            let before_block = self.func.layout.inst_block(before).unwrap();
586                            debug_assert!(self.domtree.dominates(data.hoist_block, before_block));
587                            // Determine the instruction at which we
588                            // insert in `data.hoist_block`.
589                            let before = self.func.layout.last_inst(data.hoist_block).unwrap();
590                            (data.scope_depth as usize, before, data.hoist_block)
591                        };
592
593                    trace!(
594                        " -> decided to place: before {} insert_block {}",
595                        before,
596                        insert_block
597                    );
598
599                    // Now that we have the location for the
600                    // instruction, check if any of its args are remat
601                    // values. If so, and if we don't have a copy of
602                    // the rematerializing instruction for this block
603                    // yet, create one.
604                    let mut remat_arg = false;
605                    for arg_value in arg_values.iter_mut() {
606                        if Self::maybe_remat_arg(
607                            &self.remat_values,
608                            &mut self.func,
609                            &mut self.remat_copies,
610                            insert_block,
611                            before,
612                            arg_value,
613                            &mut self.stats,
614                        ) {
615                            remat_arg = true;
616                        }
617                    }
618
619                    // Now we need to place `inst` at the computed
620                    // location (just before `before`). Note that
621                    // `inst` may already have been placed somewhere
622                    // else, because a pure node may be elaborated at
623                    // more than one place. In this case, we need to
624                    // duplicate the instruction (and return the
625                    // `Value`s for that duplicated instance instead).
626                    //
627                    // Also clone if we rematerialized, because we
628                    // don't want to rewrite the args in the original
629                    // copy.
630                    trace!("need inst {} before {}", inst, before);
631                    let inst = if self.func.layout.inst_block(inst).is_some() || remat_arg {
632                        // Clone the inst!
633                        let new_inst = self.func.dfg.clone_inst(inst);
634                        trace!(
635                            " -> inst {} already has a location; cloned to {}",
636                            inst,
637                            new_inst
638                        );
639                        // Create mappings in the
640                        // value-to-elab'd-value map from original
641                        // results to cloned results.
642                        for (&result, &new_result) in self
643                            .func
644                            .dfg
645                            .inst_results(inst)
646                            .iter()
647                            .zip(self.func.dfg.inst_results(new_inst).iter())
648                        {
649                            let elab_value = ElaboratedValue {
650                                value: new_result,
651                                in_block: insert_block,
652                            };
653                            let best_result = self.value_to_best_value[result];
654                            self.value_to_elaborated_value.insert_if_absent_with_depth(
655                                best_result.1,
656                                elab_value,
657                                scope_depth,
658                            );
659
660                            self.value_to_best_value[new_result] = best_result;
661
662                            trace!(
663                                " -> cloned inst has new result {} for orig {}",
664                                new_result,
665                                result
666                            );
667                        }
668                        new_inst
669                    } else {
670                        trace!(" -> no location; using original inst");
671                        // Create identity mappings from result values
672                        // to themselves in this scope, since we're
673                        // using the original inst.
674                        for &result in self.func.dfg.inst_results(inst) {
675                            let elab_value = ElaboratedValue {
676                                value: result,
677                                in_block: insert_block,
678                            };
679                            let best_result = self.value_to_best_value[result];
680                            self.value_to_elaborated_value.insert_if_absent_with_depth(
681                                best_result.1,
682                                elab_value,
683                                scope_depth,
684                            );
685                            trace!(" -> inserting identity mapping for {}", result);
686                        }
687                        inst
688                    };
689
690                    // Place the inst just before `before`.
691                    assert!(
692                        is_pure_for_egraph(self.func, inst),
693                        "something has gone very wrong if we are elaborating effectful \
694                         instructions, they should have remained in the skeleton"
695                    );
696                    self.func.layout.insert_inst(inst, before);
697
698                    // Update the inst's arguments.
699                    self.func
700                        .dfg
701                        .overwrite_inst_values(inst, arg_values.into_iter().map(|ev| ev.value));
702
703                    // Now that we've consumed the arg values, pop
704                    // them off the stack.
705                    self.elab_result_stack.truncate(arg_idx);
706
707                    // Push the requested result index of the
708                    // instruction onto the elab-results stack.
709                    self.elab_result_stack.push(ElaboratedValue {
710                        in_block: insert_block,
711                        value: self.func.dfg.inst_results(inst)[result_idx],
712                    });
713                }
714            }
715        }
716    }
717
718    fn elaborate_block(&mut self, elab_values: &mut Vec<Value>, idom: Option<Block>, block: Block) {
719        trace!("elaborate_block: block {}", block);
720        self.start_block(idom, block);
721
722        // Iterate over the side-effecting skeleton using the linked
723        // list in Layout. We will insert instructions that are
724        // elaborated *before* `inst`, so we can always use its
725        // next-link to continue the iteration.
726        let mut next_inst = self.func.layout.first_inst(block);
727        let mut first_branch = None;
728        while let Some(inst) = next_inst {
729            trace!(
730                "elaborating inst {} with results {:?}",
731                inst,
732                self.func.dfg.inst_results(inst)
733            );
734            // Record the first branch we see in the block; all
735            // elaboration for args of *any* branch must be inserted
736            // before the *first* branch, because the branch group
737            // must remain contiguous at the end of the block.
738            if self.func.dfg.insts[inst].opcode().is_branch() && first_branch == None {
739                first_branch = Some(inst);
740            }
741
742            // Determine where elaboration inserts insts.
743            let before = first_branch.unwrap_or(inst);
744            trace!(" -> inserting before {}", before);
745
746            elab_values.extend(self.func.dfg.inst_values(inst));
747            for arg in elab_values.iter_mut() {
748                trace!(" -> arg {}", *arg);
749                // Elaborate the arg, placing any newly-inserted insts
750                // before `before`. Get the updated value, which may
751                // be different than the original.
752                let mut new_arg = self.elaborate_eclass_use(*arg, before);
753                Self::maybe_remat_arg(
754                    &self.remat_values,
755                    &mut self.func,
756                    &mut self.remat_copies,
757                    block,
758                    inst,
759                    &mut new_arg,
760                    &mut self.stats,
761                );
762                trace!("   -> rewrote arg to {:?}", new_arg);
763                *arg = new_arg.value;
764            }
765            self.func
766                .dfg
767                .overwrite_inst_values(inst, elab_values.drain(..));
768
769            // We need to put the results of this instruction in the
770            // map now.
771            for &result in self.func.dfg.inst_results(inst) {
772                trace!(" -> result {}", result);
773                let best_result = self.value_to_best_value[result];
774                self.value_to_elaborated_value.insert_if_absent(
775                    best_result.1,
776                    ElaboratedValue {
777                        in_block: block,
778                        value: result,
779                    },
780                );
781            }
782
783            next_inst = self.func.layout.next_inst(inst);
784        }
785    }
786
787    fn elaborate_domtree(&mut self, domtree: &DominatorTreePreorder) {
788        self.block_stack.push(BlockStackEntry::Elaborate {
789            block: self.func.layout.entry_block().unwrap(),
790            idom: None,
791        });
792
793        // A temporary workspace for elaborate_block, allocated here to maximize the use of the
794        // allocation.
795        let mut elab_values = Vec::new();
796
797        while let Some(top) = self.block_stack.pop() {
798            match top {
799                BlockStackEntry::Elaborate { block, idom } => {
800                    self.block_stack.push(BlockStackEntry::Pop);
801                    self.value_to_elaborated_value.increment_depth();
802
803                    self.elaborate_block(&mut elab_values, idom, block);
804
805                    // Push children. We are doing a preorder
806                    // traversal so we do this after processing this
807                    // block above.
808                    let block_stack_end = self.block_stack.len();
809                    for child in self.ctrl_plane.shuffled(domtree.children(block)) {
810                        self.block_stack.push(BlockStackEntry::Elaborate {
811                            block: child,
812                            idom: Some(block),
813                        });
814                    }
815                    // Reverse what we just pushed so we elaborate in
816                    // original block order. (The domtree iter is a
817                    // single-ended iter over a singly-linked list so
818                    // we can't `.rev()` above.)
819                    self.block_stack[block_stack_end..].reverse();
820                }
821                BlockStackEntry::Pop => {
822                    self.value_to_elaborated_value.decrement_depth();
823                }
824            }
825        }
826    }
827
828    pub(crate) fn elaborate(&mut self) {
829        self.stats.elaborate_func += 1;
830        self.stats.elaborate_func_pre_insts += self.func.dfg.num_insts() as u64;
831        self.compute_best_values();
832        self.elaborate_domtree(&self.domtree);
833        self.stats.elaborate_func_post_insts += self.func.dfg.num_insts() as u64;
834    }
835}