Skip to main content

cranelift_codegen/egraph/
elaborate.rs

1//! Elaboration phase: lowers EGraph back to sequences of operations
2//! in CFG nodes.
3
4use super::Stats;
5use super::cost::Cost;
6use crate::ctxhash::NullCtx;
7use crate::dominator_tree::DominatorTree;
8use crate::hash_map::Entry as HashEntry;
9use crate::inst_predicates::is_pure_for_egraph;
10use crate::ir::{Block, Function, Inst, Value, ValueDef};
11use crate::loop_analysis::{Loop, LoopAnalysis};
12use crate::scoped_hash_map::ScopedHashMap;
13use crate::trace;
14use crate::{FxHashMap, FxHashSet};
15use alloc::vec::Vec;
16use cranelift_control::ControlPlane;
17use cranelift_entity::{EntitySet, SecondaryMap, packed_option::ReservedValue};
18use smallvec::{SmallVec, smallvec};
19
20pub(crate) struct Elaborator<'a> {
21    func: &'a mut Function,
22    domtree: &'a DominatorTree,
23    loop_analysis: &'a LoopAnalysis,
24    /// Map from Value that is produced by a pure Inst (and was thus
25    /// not in the side-effecting skeleton) to the value produced by
26    /// an elaborated inst (placed in the layout) to whose results we
27    /// refer in the final code.
28    ///
29    /// The first time we use some result of an instruction during
30    /// elaboration, we can place it and insert an identity map (inst
31    /// results to that same inst's results) in this scoped
32    /// map. Within that block and its dom-tree children, that mapping
33    /// is visible and we can continue to use it. This allows us to
34    /// avoid cloning the instruction. However, if we pop that scope
35    /// and use it somewhere else as well, we will need to
36    /// duplicate. We detect this case by checking, when a value that
37    /// we want is not present in this map, whether the producing inst
38    /// is already placed in the Layout. If so, we duplicate, and
39    /// insert non-identity mappings from the original inst's results
40    /// to the cloned inst's results.
41    ///
42    /// Note that as values may refer to unions that represent a subset
43    /// of a larger eclass, it's not valid to walk towards the root of a
44    /// union tree: doing so would potentially equate values that fall
45    /// on different branches of the dominator tree.
46    value_to_elaborated_value: ScopedHashMap<Value, ElaboratedValue>,
47    /// Map from Value to the best (lowest-cost) Value in its eclass
48    /// (tree of union value-nodes).
49    value_to_best_value: SecondaryMap<Value, BestEntry>,
50    /// Stack of blocks and loops in current elaboration path.
51    loop_stack: SmallVec<[LoopStackEntry; 8]>,
52    /// The current block into which we are elaborating.
53    cur_block: Block,
54    /// Values that opt rules have indicated should be rematerialized
55    /// in every block they are used (e.g., immediates or other
56    /// "cheap-to-compute" ops).
57    remat_values: &'a FxHashSet<Value>,
58    /// Explicitly-unrolled value elaboration stack.
59    elab_stack: Vec<ElabStackEntry>,
60    /// Results from the elab stack.
61    elab_result_stack: Vec<ElaboratedValue>,
62    /// Explicitly-unrolled block elaboration stack.
63    block_stack: Vec<BlockStackEntry>,
64    /// Copies of values that have been rematerialized.
65    remat_copies: FxHashMap<(Block, Value), Value>,
66    /// Stats for various events during egraph processing, to help
67    /// with optimization of this infrastructure.
68    stats: &'a mut Stats,
69    /// Chaos-mode control-plane so we can test that we still get
70    /// correct results when our heuristics make bad decisions.
71    ctrl_plane: &'a mut ControlPlane,
72}
73
74#[derive(Clone, Copy, Debug, PartialEq, Eq)]
75struct BestEntry(Cost, Value);
76
77impl PartialOrd for BestEntry {
78    fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
79        Some(self.cmp(other))
80    }
81}
82
83impl Ord for BestEntry {
84    #[inline]
85    fn cmp(&self, other: &Self) -> core::cmp::Ordering {
86        self.0.cmp(&other.0).then_with(|| {
87            // Note that this comparison is reversed. When costs are equal,
88            // prefer the value with the bigger index. This is a heuristic that
89            // prefers results of rewrites to the original value, since we
90            // expect that our rewrites are generally improvements.
91            self.1.cmp(&other.1).reverse()
92        })
93    }
94}
95
96#[derive(Clone, Copy, Debug)]
97struct ElaboratedValue {
98    in_block: Block,
99    value: Value,
100}
101
102#[derive(Clone, Debug)]
103struct LoopStackEntry {
104    /// The loop identifier.
105    lp: Loop,
106    /// The hoist point: a block that immediately dominates this
107    /// loop. May not be an immediate predecessor, but will be a valid
108    /// point to place all loop-invariant ops: they must depend only
109    /// on inputs that dominate the loop, so are available at (the end
110    /// of) this block.
111    hoist_block: Block,
112    /// The depth in the scope map.
113    scope_depth: u32,
114}
115
116#[derive(Clone, Debug)]
117enum ElabStackEntry {
118    /// Next action is to resolve this value into an elaborated inst
119    /// (placed into the layout) that produces the value, and
120    /// recursively elaborate the insts that produce its args.
121    ///
122    /// Any inserted ops should be inserted before `before`, which is
123    /// the instruction demanding this value.
124    Start { value: Value, before: Inst },
125    /// Args have been pushed; waiting for results.
126    PendingInst {
127        inst: Inst,
128        result_idx: usize,
129        num_args: usize,
130        before: Inst,
131    },
132}
133
134#[derive(Clone, Debug)]
135enum BlockStackEntry {
136    Elaborate { block: Block, idom: Option<Block> },
137    Pop,
138}
139
140impl<'a> Elaborator<'a> {
141    pub(crate) fn new(
142        func: &'a mut Function,
143        domtree: &'a DominatorTree,
144        loop_analysis: &'a LoopAnalysis,
145        remat_values: &'a FxHashSet<Value>,
146        stats: &'a mut Stats,
147        ctrl_plane: &'a mut ControlPlane,
148    ) -> Self {
149        let num_values = func.dfg.num_values();
150        let mut value_to_best_value =
151            SecondaryMap::with_default(BestEntry(Cost::infinity(), Value::reserved_value()));
152        value_to_best_value.resize(num_values);
153        Self {
154            func,
155            domtree,
156            loop_analysis,
157            value_to_elaborated_value: ScopedHashMap::with_capacity(num_values),
158            value_to_best_value,
159            loop_stack: smallvec![],
160            cur_block: Block::reserved_value(),
161            remat_values,
162            elab_stack: vec![],
163            elab_result_stack: vec![],
164            block_stack: vec![],
165            remat_copies: FxHashMap::default(),
166            stats,
167            ctrl_plane,
168        }
169    }
170
171    fn start_block(&mut self, idom: Option<Block>, block: Block) {
172        trace!(
173            "start_block: block {:?} with idom {:?} at loop depth {:?} scope depth {}",
174            block,
175            idom,
176            self.loop_stack.len(),
177            self.value_to_elaborated_value.depth()
178        );
179
180        // Pop any loop levels we're no longer in.
181        while let Some(inner_loop) = self.loop_stack.last() {
182            if self.loop_analysis.is_in_loop(block, inner_loop.lp) {
183                break;
184            }
185            self.loop_stack.pop();
186        }
187
188        // Note that if the *entry* block is a loop header, we will
189        // not make note of the loop here because it will not have an
190        // immediate dominator. We must disallow this case because we
191        // will skip adding the `LoopStackEntry` here but our
192        // `LoopAnalysis` will otherwise still make note of this loop
193        // and loop depths will not match.
194        if let Some(idom) = idom {
195            if let Some(lp) = self.loop_analysis.is_loop_header(block) {
196                self.loop_stack.push(LoopStackEntry {
197                    lp,
198                    // Any code hoisted out of this loop will have code
199                    // placed in `idom`, and will have def mappings
200                    // inserted in to the scoped hashmap at that block's
201                    // level.
202                    hoist_block: idom,
203                    scope_depth: (self.value_to_elaborated_value.depth() - 1) as u32,
204                });
205                trace!(
206                    " -> loop header, pushing; depth now {}",
207                    self.loop_stack.len()
208                );
209            }
210        } else {
211            debug_assert!(
212                self.loop_analysis.is_loop_header(block).is_none(),
213                "Entry block (domtree root) cannot be a loop header!"
214            );
215        }
216
217        trace!("block {}: loop stack is {:?}", block, self.loop_stack);
218
219        self.cur_block = block;
220    }
221
222    fn topo_sorted_values(&self) -> Vec<Value> {
223        #[derive(Debug)]
224        enum Event {
225            Enter,
226            Exit,
227        }
228        let mut stack = Vec::<(Event, Value)>::new();
229
230        // Traverse the CFG in pre-order so that, when we look at the
231        // instructions and operands inside each block, we see value defs before
232        // uses.
233        for block in crate::traversals::Dfs::new().pre_order_iter(&self.func) {
234            for inst in self.func.layout.block_insts(block) {
235                stack.extend(self.func.dfg.inst_values(inst).map(|v| (Event::Enter, v)));
236            }
237        }
238
239        // We pushed in the desired order, so popping would implicitly reverse
240        // that. Avoid that by reversing the initial stack before we start
241        // traversing the DFG.
242        stack.reverse();
243
244        let mut sorted = Vec::with_capacity(self.func.dfg.values().len());
245        let mut seen = EntitySet::<Value>::with_capacity(self.func.dfg.values().len());
246
247        // Post-order traversal of the DFG, visiting value defs before uses.
248        while let Some((event, value)) = stack.pop() {
249            match event {
250                Event::Enter => {
251                    if seen.insert(value) {
252                        stack.push((Event::Exit, value));
253                        match self.func.dfg.value_def(value) {
254                            ValueDef::Result(inst, _) => {
255                                stack.extend(
256                                    self.func
257                                        .dfg
258                                        .inst_values(inst)
259                                        .rev()
260                                        .filter(|v| !seen.contains(*v))
261                                        .map(|v| (Event::Enter, v)),
262                                );
263                            }
264                            ValueDef::Union(a, b) => {
265                                if !seen.contains(b) {
266                                    stack.push((Event::Enter, b));
267                                }
268                                if !seen.contains(a) {
269                                    stack.push((Event::Enter, a));
270                                }
271                            }
272                            ValueDef::Param(..) => {}
273                        }
274                    }
275                }
276                Event::Exit => {
277                    sorted.push(value);
278                }
279            }
280        }
281
282        sorted
283    }
284
285    fn compute_best_values(&mut self) {
286        let sorted_values = self.topo_sorted_values();
287
288        let best = &mut self.value_to_best_value;
289
290        // We can't make random decisions inside the fixpoint loop below because
291        // that could cause values to change on every iteration of the loop,
292        // which would make the loop never terminate. So in chaos testing
293        // mode we need a form of making suboptimal decisions that is fully
294        // deterministic. We choose to simply make the worst decision we know
295        // how to do instead of the best.
296        let use_worst = self.ctrl_plane.get_decision();
297
298        trace!(
299            "Computing the {} values for each eclass",
300            if use_worst {
301                "worst (chaos mode)"
302            } else {
303                "best"
304            }
305        );
306
307        // Because the values are topologically sorted, we know that we will see
308        // defs before uses, so an instruction's operands' costs will already be
309        // computed by the time we are computing the cost for the current value
310        // and its instruction.
311        for value in sorted_values.iter().copied() {
312            let def = self.func.dfg.value_def(value);
313            trace!("computing best for value {:?} def {:?}", value, def);
314
315            match def {
316                // Pick the best of the two options based on min-cost. This
317                // works because each element of `best` is a `(cost, value)`
318                // tuple; `cost` comes first so the natural comparison works
319                // based on cost, and breaks ties based on value number.
320                ValueDef::Union(x, y) => {
321                    debug_assert!(!best[x].1.is_reserved_value());
322                    debug_assert!(!best[y].1.is_reserved_value());
323                    best[value] = if use_worst {
324                        core::cmp::max(best[x], best[y])
325                    } else {
326                        core::cmp::min(best[x], best[y])
327                    };
328                    trace!(
329                        " -> best of union({:?}, {:?}) = {:?}",
330                        best[x], best[y], best[value]
331                    );
332                }
333
334                ValueDef::Param(_, _) => {
335                    best[value] = BestEntry(Cost::zero(), value);
336                }
337
338                // If the Inst is inserted into the layout (which is,
339                // at this point, only the side-effecting skeleton),
340                // then it must be computed and thus we give it zero
341                // cost.
342                ValueDef::Result(inst, _) => {
343                    if let Some(_) = self.func.layout.inst_block(inst) {
344                        best[value] = BestEntry(Cost::zero(), value);
345                    } else {
346                        let inst_data = &self.func.dfg.insts[inst];
347                        // N.B.: at this point we know that the opcode is
348                        // pure, so `pure_op_cost`'s precondition is
349                        // satisfied.
350                        let cost = Cost::of_pure_op(
351                            inst_data.opcode(),
352                            self.func.dfg.inst_values(inst).map(|value| {
353                                debug_assert!(!best[value].1.is_reserved_value());
354                                best[value].0
355                            }),
356                        );
357                        best[value] = BestEntry(cost, value);
358                        trace!(" -> cost of value {} = {:?}", value, cost);
359                    }
360                }
361            };
362
363            // You might be expecting an assert that the best cost we just
364            // computed is not infinity, however infinite cost *can* happen in
365            // practice. First, note that our cost function doesn't know about
366            // any shared structure in the dataflow graph, it only sums operand
367            // costs. (And trying to avoid that by deduping a single operation's
368            // operands is a losing game because you can always just add one
369            // indirection and go from `add(x, x)` to `add(foo(x), bar(x))` to
370            // hide the shared structure.) Given that blindness to sharing, we
371            // can make cost grow exponentially with a linear sequence of
372            // operations:
373            //
374            //     v0 = iconst.i32 1    ;; cost = 1
375            //     v1 = iadd v0, v0     ;; cost = 3 + 1 + 1
376            //     v2 = iadd v1, v1     ;; cost = 3 + 5 + 5
377            //     v3 = iadd v2, v2     ;; cost = 3 + 13 + 13
378            //     v4 = iadd v3, v3     ;; cost = 3 + 29 + 29
379            //     v5 = iadd v4, v4     ;; cost = 3 + 61 + 61
380            //     v6 = iadd v5, v5     ;; cost = 3 + 125 + 125
381            //     ;; etc...
382            //
383            // Such a chain can cause cost to saturate to infinity. How do we
384            // choose which e-node is best when there are multiple that have
385            // saturated to infinity? It doesn't matter. As long as invariant
386            // (2) for optimization rules is upheld by our rule set (see
387            // `cranelift/codegen/src/opts/README.md`) it is safe to choose
388            // *any* e-node in the e-class. At worst we will produce suboptimal
389            // code, but never an incorrectness.
390        }
391    }
392
393    /// Elaborate use of an eclass, inserting any needed new
394    /// instructions before the given inst `before`. Should only be
395    /// given values corresponding to results of instructions or
396    /// blockparams.
397    fn elaborate_eclass_use(&mut self, value: Value, before: Inst) -> ElaboratedValue {
398        debug_assert_ne!(value, Value::reserved_value());
399
400        // Kick off the process by requesting this result
401        // value.
402        self.elab_stack
403            .push(ElabStackEntry::Start { value, before });
404
405        // Now run the explicit-stack recursion until we reach
406        // the root.
407        self.process_elab_stack();
408        debug_assert_eq!(self.elab_result_stack.len(), 1);
409        self.elab_result_stack.pop().unwrap()
410    }
411
412    /// Possibly rematerialize the instruction producing the value in
413    /// `arg` and rewrite `arg` to refer to it, if needed. Returns
414    /// `true` if a rewrite occurred.
415    fn maybe_remat_arg(
416        remat_values: &FxHashSet<Value>,
417        func: &mut Function,
418        remat_copies: &mut FxHashMap<(Block, Value), Value>,
419        insert_block: Block,
420        before: Inst,
421        arg: &mut ElaboratedValue,
422        stats: &mut Stats,
423    ) -> bool {
424        // TODO (#7313): we may want to consider recursive
425        // rematerialization as well. We could process the arguments of
426        // the rematerialized instruction up to a certain depth. This
427        // would affect, e.g., adds-with-one-constant-arg, which are
428        // currently rematerialized. Right now we don't do this, to
429        // avoid the need for another fixpoint loop here.
430        if arg.in_block != insert_block && remat_values.contains(&arg.value) {
431            let new_value = match remat_copies.entry((insert_block, arg.value)) {
432                HashEntry::Occupied(o) => *o.get(),
433                HashEntry::Vacant(v) => {
434                    let inst = func.dfg.value_def(arg.value).inst().unwrap();
435                    debug_assert_eq!(func.dfg.inst_results(inst).len(), 1);
436                    let new_inst = func.dfg.clone_inst(inst);
437                    func.layout.insert_inst(new_inst, before);
438                    let new_result = func.dfg.inst_results(new_inst)[0];
439                    *v.insert(new_result)
440                }
441            };
442            trace!("rematerialized {} as {}", arg.value, new_value);
443            arg.value = new_value;
444            stats.elaborate_remat += 1;
445            true
446        } else {
447            false
448        }
449    }
450
451    fn process_elab_stack(&mut self) {
452        while let Some(entry) = self.elab_stack.pop() {
453            match entry {
454                ElabStackEntry::Start { value, before } => {
455                    debug_assert!(self.func.dfg.value_is_real(value));
456
457                    self.stats.elaborate_visit_node += 1;
458
459                    // Get the best option; we use `value` (latest
460                    // value) here so we have a full view of the
461                    // eclass.
462                    trace!("looking up best value for {}", value);
463                    let BestEntry(_, best_value) = self.value_to_best_value[value];
464                    trace!("elaborate: value {} -> best {}", value, best_value);
465                    debug_assert_ne!(best_value, Value::reserved_value());
466
467                    if let Some(elab_val) =
468                        self.value_to_elaborated_value.get(&NullCtx, &best_value)
469                    {
470                        // Value is available; use it.
471                        trace!("elaborate: value {} -> {:?}", value, elab_val);
472                        self.stats.elaborate_memoize_hit += 1;
473                        self.elab_result_stack.push(*elab_val);
474                        continue;
475                    }
476
477                    self.stats.elaborate_memoize_miss += 1;
478
479                    // Now resolve the value to its definition to see
480                    // how we can compute it.
481                    let (inst, result_idx) = match self.func.dfg.value_def(best_value) {
482                        ValueDef::Result(inst, result_idx) => {
483                            trace!(
484                                " -> value {} is result {} of {}",
485                                best_value, result_idx, inst
486                            );
487                            (inst, result_idx)
488                        }
489                        ValueDef::Param(in_block, _) => {
490                            // We don't need to do anything to compute
491                            // this value; just push its result on the
492                            // result stack (blockparams are already
493                            // available).
494                            trace!(" -> value {} is a blockparam", best_value);
495                            self.elab_result_stack.push(ElaboratedValue {
496                                in_block,
497                                value: best_value,
498                            });
499                            continue;
500                        }
501                        ValueDef::Union(_, _) => {
502                            panic!("Should never have a Union value as the best value");
503                        }
504                    };
505
506                    trace!(
507                        " -> result {} of inst {:?}",
508                        result_idx, self.func.dfg.insts[inst]
509                    );
510
511                    // We're going to need to use this instruction
512                    // result, placing the instruction into the
513                    // layout. First, enqueue all args to be
514                    // elaborated. Push state to receive the results
515                    // and later elab this inst.
516                    let num_args = self.func.dfg.inst_values(inst).count();
517                    self.elab_stack.push(ElabStackEntry::PendingInst {
518                        inst,
519                        result_idx,
520                        num_args,
521                        before,
522                    });
523
524                    // Push args in reverse order so we process the
525                    // first arg first.
526                    for arg in self.func.dfg.inst_values(inst).rev() {
527                        debug_assert_ne!(arg, Value::reserved_value());
528                        self.elab_stack
529                            .push(ElabStackEntry::Start { value: arg, before });
530                    }
531                }
532
533                ElabStackEntry::PendingInst {
534                    inst,
535                    result_idx,
536                    num_args,
537                    before,
538                } => {
539                    trace!(
540                        "PendingInst: {} result {} args {} before {}",
541                        inst, result_idx, num_args, before
542                    );
543
544                    // We should have all args resolved at this
545                    // point. Grab them and drain them out, removing
546                    // them.
547                    let arg_idx = self.elab_result_stack.len() - num_args;
548                    let arg_values = &mut self.elab_result_stack[arg_idx..];
549
550                    // Compute max loop depth.
551                    //
552                    // Note that if there are no arguments then this instruction
553                    // is allowed to get hoisted up one loop. This is not
554                    // usually used since no-argument values are things like
555                    // constants which are typically rematerialized, but for the
556                    // `vconst` instruction 128-bit constants aren't as easily
557                    // rematerialized. They're hoisted out of inner loops but
558                    // not to the function entry which may run the risk of
559                    // placing too much register pressure on the entire
560                    // function. This is modeled with the `.saturating_sub(1)`
561                    // as the default if there's otherwise no maximum.
562                    let loop_hoist_level = arg_values
563                        .iter()
564                        .map(|&value| {
565                            // Find the outermost loop level at which
566                            // the value's defining block *is not* a
567                            // member. This is the loop-nest level
568                            // whose hoist-block we hoist to.
569                            let hoist_level = self
570                                .loop_stack
571                                .iter()
572                                .position(|loop_entry| {
573                                    !self.loop_analysis.is_in_loop(value.in_block, loop_entry.lp)
574                                })
575                                .unwrap_or(self.loop_stack.len());
576                            trace!(
577                                " -> arg: elab_value {:?} hoist level {:?}",
578                                value, hoist_level
579                            );
580                            hoist_level
581                        })
582                        .max()
583                        .unwrap_or(self.loop_stack.len().saturating_sub(1));
584                    trace!(
585                        " -> loop hoist level: {:?}; cur loop depth: {:?}, loop_stack: {:?}",
586                        loop_hoist_level,
587                        self.loop_stack.len(),
588                        self.loop_stack,
589                    );
590
591                    // We know that this is a pure inst, because
592                    // non-pure roots have already been placed in the
593                    // value-to-elab'd-value map, so they will not
594                    // reach this stage of processing.
595                    //
596                    // We now must determine the location at which we
597                    // place the instruction. This is the current
598                    // block *unless* we hoist above a loop when all
599                    // args are loop-invariant (and this op is pure).
600                    let (scope_depth, before, insert_block) = if loop_hoist_level
601                        == self.loop_stack.len()
602                    {
603                        // Depends on some value at the current
604                        // loop depth, or remat forces it here:
605                        // place it at the current location.
606                        (
607                            self.value_to_elaborated_value.depth(),
608                            before,
609                            self.func.layout.inst_block(before).unwrap(),
610                        )
611                    } else {
612                        // Does not depend on any args at current
613                        // loop depth: hoist out of loop.
614                        self.stats.elaborate_licm_hoist += 1;
615                        let data = &self.loop_stack[loop_hoist_level];
616                        // `data.hoist_block` should dominate `before`'s block.
617                        let before_block = self.func.layout.inst_block(before).unwrap();
618                        debug_assert!(self.domtree.block_dominates(data.hoist_block, before_block));
619                        // Determine the instruction at which we
620                        // insert in `data.hoist_block`.
621                        let before = self.func.layout.last_inst(data.hoist_block).unwrap();
622                        (data.scope_depth as usize, before, data.hoist_block)
623                    };
624
625                    trace!(
626                        " -> decided to place: before {} insert_block {}",
627                        before, insert_block
628                    );
629
630                    // Now that we have the location for the
631                    // instruction, check if any of its args are remat
632                    // values. If so, and if we don't have a copy of
633                    // the rematerializing instruction for this block
634                    // yet, create one.
635                    let mut remat_arg = false;
636                    for arg_value in arg_values.iter_mut() {
637                        if Self::maybe_remat_arg(
638                            &self.remat_values,
639                            &mut self.func,
640                            &mut self.remat_copies,
641                            insert_block,
642                            before,
643                            arg_value,
644                            &mut self.stats,
645                        ) {
646                            remat_arg = true;
647                        }
648                    }
649
650                    // Now we need to place `inst` at the computed
651                    // location (just before `before`). Note that
652                    // `inst` may already have been placed somewhere
653                    // else, because a pure node may be elaborated at
654                    // more than one place. In this case, we need to
655                    // duplicate the instruction (and return the
656                    // `Value`s for that duplicated instance instead).
657                    //
658                    // Also clone if we rematerialized, because we
659                    // don't want to rewrite the args in the original
660                    // copy.
661                    trace!("need inst {} before {}", inst, before);
662                    let inst = if self.func.layout.inst_block(inst).is_some() || remat_arg {
663                        // Clone the inst!
664                        let new_inst = self.func.dfg.clone_inst(inst);
665                        trace!(
666                            " -> inst {} already has a location; cloned to {}",
667                            inst, new_inst
668                        );
669                        // Create mappings in the
670                        // value-to-elab'd-value map from original
671                        // results to cloned results.
672                        for (&result, &new_result) in self
673                            .func
674                            .dfg
675                            .inst_results(inst)
676                            .iter()
677                            .zip(self.func.dfg.inst_results(new_inst).iter())
678                        {
679                            let elab_value = ElaboratedValue {
680                                value: new_result,
681                                in_block: insert_block,
682                            };
683                            let best_result = self.value_to_best_value[result];
684                            self.value_to_elaborated_value.insert_if_absent_with_depth(
685                                &NullCtx,
686                                best_result.1,
687                                elab_value,
688                                scope_depth,
689                            );
690
691                            self.value_to_best_value[new_result] = best_result;
692
693                            trace!(
694                                " -> cloned inst has new result {} for orig {}",
695                                new_result, result
696                            );
697                        }
698                        new_inst
699                    } else {
700                        trace!(" -> no location; using original inst");
701                        // Create identity mappings from result values
702                        // to themselves in this scope, since we're
703                        // using the original inst.
704                        for &result in self.func.dfg.inst_results(inst) {
705                            let elab_value = ElaboratedValue {
706                                value: result,
707                                in_block: insert_block,
708                            };
709                            let best_result = self.value_to_best_value[result];
710                            self.value_to_elaborated_value.insert_if_absent_with_depth(
711                                &NullCtx,
712                                best_result.1,
713                                elab_value,
714                                scope_depth,
715                            );
716                            trace!(" -> inserting identity mapping for {}", result);
717                        }
718                        inst
719                    };
720
721                    // Place the inst just before `before`.
722                    assert!(
723                        is_pure_for_egraph(self.func, inst),
724                        "something has gone very wrong if we are elaborating effectful \
725                         instructions, they should have remained in the skeleton"
726                    );
727                    self.func.layout.insert_inst(inst, before);
728
729                    // Update the inst's arguments.
730                    self.func
731                        .dfg
732                        .overwrite_inst_values(inst, arg_values.into_iter().map(|ev| ev.value));
733
734                    // Now that we've consumed the arg values, pop
735                    // them off the stack.
736                    self.elab_result_stack.truncate(arg_idx);
737
738                    // Push the requested result index of the
739                    // instruction onto the elab-results stack.
740                    self.elab_result_stack.push(ElaboratedValue {
741                        in_block: insert_block,
742                        value: self.func.dfg.inst_results(inst)[result_idx],
743                    });
744                }
745            }
746        }
747    }
748
749    fn elaborate_block(&mut self, elab_values: &mut Vec<Value>, idom: Option<Block>, block: Block) {
750        trace!("elaborate_block: block {}", block);
751        self.start_block(idom, block);
752
753        // Iterate over the side-effecting skeleton using the linked
754        // list in Layout. We will insert instructions that are
755        // elaborated *before* `inst`, so we can always use its
756        // next-link to continue the iteration.
757        let mut next_inst = self.func.layout.first_inst(block);
758        let mut first_branch = None;
759        while let Some(inst) = next_inst {
760            trace!(
761                "elaborating inst {} with results {:?}",
762                inst,
763                self.func.dfg.inst_results(inst)
764            );
765            // Record the first branch we see in the block; all
766            // elaboration for args of *any* branch must be inserted
767            // before the *first* branch, because the branch group
768            // must remain contiguous at the end of the block.
769            if self.func.dfg.insts[inst].opcode().is_branch() && first_branch == None {
770                first_branch = Some(inst);
771            }
772
773            // Determine where elaboration inserts insts.
774            let before = first_branch.unwrap_or(inst);
775            trace!(" -> inserting before {}", before);
776
777            elab_values.extend(self.func.dfg.inst_values(inst));
778            for arg in elab_values.iter_mut() {
779                trace!(" -> arg {}", *arg);
780                // Elaborate the arg, placing any newly-inserted insts
781                // before `before`. Get the updated value, which may
782                // be different than the original.
783                let mut new_arg = self.elaborate_eclass_use(*arg, before);
784                Self::maybe_remat_arg(
785                    &self.remat_values,
786                    &mut self.func,
787                    &mut self.remat_copies,
788                    block,
789                    inst,
790                    &mut new_arg,
791                    &mut self.stats,
792                );
793                trace!("   -> rewrote arg to {:?}", new_arg);
794                *arg = new_arg.value;
795            }
796            self.func
797                .dfg
798                .overwrite_inst_values(inst, elab_values.drain(..));
799
800            // We need to put the results of this instruction in the
801            // map now.
802            for &result in self.func.dfg.inst_results(inst) {
803                trace!(" -> result {}", result);
804                let best_result = self.value_to_best_value[result];
805                self.value_to_elaborated_value.insert_if_absent(
806                    &NullCtx,
807                    best_result.1,
808                    ElaboratedValue {
809                        in_block: block,
810                        value: result,
811                    },
812                );
813            }
814
815            next_inst = self.func.layout.next_inst(inst);
816        }
817    }
818
819    fn elaborate_domtree(&mut self, domtree: &DominatorTree) {
820        self.block_stack.push(BlockStackEntry::Elaborate {
821            block: self.func.layout.entry_block().unwrap(),
822            idom: None,
823        });
824
825        // A temporary workspace for elaborate_block, allocated here to maximize the use of the
826        // allocation.
827        let mut elab_values = Vec::new();
828
829        while let Some(top) = self.block_stack.pop() {
830            match top {
831                BlockStackEntry::Elaborate { block, idom } => {
832                    self.block_stack.push(BlockStackEntry::Pop);
833                    self.value_to_elaborated_value.increment_depth();
834
835                    self.elaborate_block(&mut elab_values, idom, block);
836
837                    // Push children. We are doing a preorder
838                    // traversal so we do this after processing this
839                    // block above.
840                    let block_stack_end = self.block_stack.len();
841                    for child in self.ctrl_plane.shuffled(domtree.children(block)) {
842                        self.block_stack.push(BlockStackEntry::Elaborate {
843                            block: child,
844                            idom: Some(block),
845                        });
846                    }
847                    // Reverse what we just pushed so we elaborate in
848                    // original block order. (The domtree iter is a
849                    // single-ended iter over a singly-linked list so
850                    // we can't `.rev()` above.)
851                    self.block_stack[block_stack_end..].reverse();
852                }
853                BlockStackEntry::Pop => {
854                    self.value_to_elaborated_value.decrement_depth();
855                }
856            }
857        }
858    }
859
860    pub(crate) fn elaborate(&mut self) {
861        self.stats.elaborate_func += 1;
862        self.stats.elaborate_func_pre_insts += self.func.dfg.num_insts() as u64;
863        self.compute_best_values();
864        self.elaborate_domtree(&self.domtree);
865        self.stats.elaborate_func_post_insts += self.func.dfg.num_insts() as u64;
866    }
867}