cranelift_codegen/
remove_constant_phis.rs

1//! A Constant-Phi-Node removal pass.
2
3use crate::dominator_tree::DominatorTree;
4use crate::ir;
5use crate::ir::Function;
6use crate::ir::{Block, BlockCall, Inst, Value};
7use crate::timing;
8use bumpalo::Bump;
9use cranelift_entity::SecondaryMap;
10use rustc_hash::{FxHashMap, FxHashSet};
11use smallvec::SmallVec;
12
13// A note on notation.  For the sake of clarity, this file uses the phrase
14// "formal parameters" to mean the `Value`s listed in the block head, and
15// "actual parameters" to mean the `Value`s passed in a branch or a jump:
16//
17// block4(v16: i32, v18: i32):            <-- formal parameters
18//   ...
19//   brif v27, block7(v22, v24), block6   <-- actual parameters
20
21// This transformation pass (conceptually) partitions all values in the
22// function into two groups:
23//
24// * Group A: values defined by block formal parameters, except for the entry block.
25//
26// * Group B: All other values: that is, values defined by instructions,
27//   and the formals of the entry block.
28//
29// For each value in Group A, it attempts to establish whether it will have
30// the value of exactly one member of Group B.  If so, the formal parameter is
31// deleted, all corresponding actual parameters (in jumps/branches to the
32// defining block) are deleted, and a rename is inserted.
33//
34// The entry block is special-cased because (1) we don't know what values flow
35// to its formals and (2) in any case we can't change its formals.
36//
37// Work proceeds in three phases.
38//
39// * Phase 1: examine all instructions.  For each block, make up a useful
40//   grab-bag of information, `BlockSummary`, that summarises the block's
41//   formals and jump/branch instruction.  This is used by Phases 2 and 3.
42//
43// * Phase 2: for each value in Group A, try to find a single Group B value
44//   that flows to it.  This is done using a classical iterative forward
45//   dataflow analysis over a simple constant-propagation style lattice.  It
46//   converges quickly in practice -- I have seen at most 4 iterations.  This
47//   is relatively cheap because the iteration is done over the
48//   `BlockSummary`s, and does not visit each instruction.  The resulting
49//   fixed point is stored in a `SolverState`.
50//
51// * Phase 3: using the `SolverState` and `BlockSummary`, edit the function to
52//   remove redundant formals and actuals, and to insert suitable renames.
53//
54// Note that the effectiveness of the analysis depends on on the fact that
55// there are no copy instructions in Cranelift's IR.  If there were, the
56// computation of `actual_absval` in Phase 2 would have to be extended to
57// chase through such copies.
58//
59// For large functions, the analysis cost using the new AArch64 backend is about
60// 0.6% of the non-optimising compile time, as measured by instruction counts.
61// This transformation usually pays for itself several times over, though, by
62// reducing the isel/regalloc cost downstream.  Gains of up to 7% have been
63// seen for large functions.
64
65/// The `Value`s (Group B) that can flow to a formal parameter (Group A).
66#[derive(Clone, Copy, Debug, PartialEq)]
67enum AbstractValue {
68    /// Two or more values flow to this formal.
69    Many,
70
71    /// Exactly one value, as stated, flows to this formal.  The `Value`s that
72    /// can appear here are exactly: `Value`s defined by `Inst`s, plus the
73    /// `Value`s defined by the formals of the entry block.  Note that this is
74    /// exactly the set of `Value`s that are *not* tracked in the solver below
75    /// (see `SolverState`).
76    One(Value /*Group B*/),
77
78    /// No value flows to this formal.
79    None,
80}
81
82impl AbstractValue {
83    fn join(self, other: AbstractValue) -> AbstractValue {
84        match (self, other) {
85            // Joining with `None` has no effect
86            (AbstractValue::None, p2) => p2,
87            (p1, AbstractValue::None) => p1,
88            // Joining with `Many` produces `Many`
89            (AbstractValue::Many, _p2) => AbstractValue::Many,
90            (_p1, AbstractValue::Many) => AbstractValue::Many,
91            // The only interesting case
92            (AbstractValue::One(v1), AbstractValue::One(v2)) => {
93                if v1 == v2 {
94                    AbstractValue::One(v1)
95                } else {
96                    AbstractValue::Many
97                }
98            }
99        }
100    }
101
102    fn is_one(self) -> bool {
103        matches!(self, AbstractValue::One(_))
104    }
105}
106
107#[derive(Clone, Copy, Debug)]
108struct OutEdge<'a> {
109    /// An instruction that transfers control.
110    inst: Inst,
111    /// The index into branch_destinations for this instruction that corresponds
112    /// to this edge.
113    branch_index: u32,
114    /// The block that control is transferred to.
115    block: Block,
116    /// The arguments to that block.
117    ///
118    /// These values can be from both groups A and B.
119    args: &'a [Value],
120}
121
122impl<'a> OutEdge<'a> {
123    /// Construct a new `OutEdge` for the given instruction.
124    ///
125    /// Returns `None` if this is an edge without any block arguments, which
126    /// means we can ignore it for this analysis's purposes.
127    #[inline]
128    fn new(
129        bump: &'a Bump,
130        dfg: &ir::DataFlowGraph,
131        inst: Inst,
132        branch_index: usize,
133        block: BlockCall,
134    ) -> Option<Self> {
135        let inst_var_args = block.args_slice(&dfg.value_lists);
136
137        // Skip edges without params.
138        if inst_var_args.is_empty() {
139            return None;
140        }
141
142        Some(OutEdge {
143            inst,
144            branch_index: branch_index as u32,
145            block: block.block(&dfg.value_lists),
146            args: bump.alloc_slice_fill_iter(
147                inst_var_args
148                    .iter()
149                    .map(|value| dfg.resolve_aliases(*value)),
150            ),
151        })
152    }
153}
154
155/// For some block, a useful bundle of info.  The `Block` itself is not stored
156/// here since it will be the key in the associated `FxHashMap` -- see
157/// `summaries` below.  For the `SmallVec` tuning params: most blocks have
158/// few parameters, hence `4`.  And almost all blocks have either one or two
159/// successors, hence `2`.
160#[derive(Clone, Debug, Default)]
161struct BlockSummary<'a> {
162    /// Formal parameters for this `Block`.
163    ///
164    /// These values are from group A.
165    formals: &'a [Value],
166
167    /// Each outgoing edge from this block.
168    ///
169    /// We don't bother to include transfers that pass zero parameters
170    /// since that makes more work for the solver for no purpose.
171    ///
172    /// We optimize for the case where a branch instruction has up to two
173    /// outgoing edges, as unconditional jumps and conditional branches are
174    /// more prominent than br_table.
175    dests: SmallVec<[OutEdge<'a>; 2]>,
176}
177
178impl<'a> BlockSummary<'a> {
179    /// Construct a new `BlockSummary`, using `values` as its backing storage.
180    #[inline]
181    fn new(bump: &'a Bump, formals: &[Value]) -> Self {
182        Self {
183            formals: bump.alloc_slice_copy(formals),
184            dests: Default::default(),
185        }
186    }
187}
188
189/// Solver state.  This holds a AbstractValue for each formal parameter, except
190/// for those from the entry block.
191struct SolverState {
192    absvals: FxHashMap<Value /*Group A*/, AbstractValue>,
193}
194
195impl SolverState {
196    fn new() -> Self {
197        Self {
198            absvals: FxHashMap::default(),
199        }
200    }
201
202    fn get(&self, actual: Value) -> AbstractValue {
203        *self
204            .absvals
205            .get(&actual)
206            .unwrap_or_else(|| panic!("SolverState::get: formal param {actual:?} is untracked?!"))
207    }
208
209    fn maybe_get(&self, actual: Value) -> Option<&AbstractValue> {
210        self.absvals.get(&actual)
211    }
212
213    fn set(&mut self, actual: Value, lp: AbstractValue) {
214        match self.absvals.insert(actual, lp) {
215            Some(_old_lp) => {}
216            None => panic!("SolverState::set: formal param {actual:?} is untracked?!"),
217        }
218    }
219}
220
221/// Detect phis in `func` that will only ever produce one value, using a
222/// classic forward dataflow analysis.  Then remove them.
223#[inline(never)]
224pub fn do_remove_constant_phis(func: &mut Function, domtree: &mut DominatorTree) {
225    let _tt = timing::remove_constant_phis();
226    debug_assert!(domtree.is_valid());
227
228    // Phase 1 of 3: for each block, make a summary containing all relevant
229    // info.  The solver will iterate over the summaries, rather than having
230    // to inspect each instruction in each block.
231    let bump =
232        Bump::with_capacity(domtree.cfg_postorder().len() * 4 * std::mem::size_of::<Value>());
233    let mut summaries =
234        SecondaryMap::<Block, BlockSummary>::with_capacity(domtree.cfg_postorder().len());
235
236    for b in domtree.cfg_rpo().copied() {
237        let formals = func.dfg.block_params(b);
238        let mut summary = BlockSummary::new(&bump, formals);
239
240        for inst in func.layout.block_insts(b) {
241            for (ix, dest) in func.dfg.insts[inst]
242                .branch_destination(&func.dfg.jump_tables)
243                .iter()
244                .enumerate()
245            {
246                if let Some(edge) = OutEdge::new(&bump, &func.dfg, inst, ix, *dest) {
247                    summary.dests.push(edge);
248                }
249            }
250        }
251
252        // Ensure the invariant that all blocks (except for the entry) appear
253        // in the summary, *unless* they have neither formals nor any
254        // param-carrying branches/jumps.
255        if formals.len() > 0 || summary.dests.len() > 0 {
256            summaries[b] = summary;
257        }
258    }
259
260    // Phase 2 of 3: iterate over the summaries in reverse postorder,
261    // computing new `AbstractValue`s for each tracked `Value`.  The set of
262    // tracked `Value`s is exactly Group A as described above.
263
264    let entry_block = func
265        .layout
266        .entry_block()
267        .expect("remove_constant_phis: entry block unknown");
268
269    // Set up initial solver state
270    let mut state = SolverState::new();
271
272    for b in domtree.cfg_rpo().copied() {
273        // For each block, get the formals
274        if b == entry_block {
275            continue;
276        }
277        let formals = func.dfg.block_params(b);
278        for formal in formals {
279            let mb_old_absval = state.absvals.insert(*formal, AbstractValue::None);
280            assert!(mb_old_absval.is_none());
281        }
282    }
283
284    // Solve: repeatedly traverse the blocks in reverse postorder, until there
285    // are no changes.
286    let mut iter_no = 0;
287    loop {
288        iter_no += 1;
289        let mut changed = false;
290
291        for src in domtree.cfg_rpo().copied() {
292            let src_summary = &summaries[src];
293            for edge in &src_summary.dests {
294                assert!(edge.block != entry_block);
295                // By contrast, the dst block must have a summary.  Phase 1
296                // will have only included an entry in `src_summary.dests` if
297                // that branch/jump carried at least one parameter.  So the
298                // dst block does take parameters, so it must have a summary.
299                let dst_summary = &summaries[edge.block];
300                let dst_formals = &dst_summary.formals;
301                assert_eq!(edge.args.len(), dst_formals.len());
302                for (formal, actual) in dst_formals.iter().zip(edge.args) {
303                    // Find the abstract value for `actual`.  If it is a block
304                    // formal parameter then the most recent abstract value is
305                    // to be found in the solver state.  If not, then it's a
306                    // real value defining point (not a phi), in which case
307                    // return it itself.
308                    let actual_absval = match state.maybe_get(*actual) {
309                        Some(pt) => *pt,
310                        None => AbstractValue::One(*actual),
311                    };
312
313                    // And `join` the new value with the old.
314                    let formal_absval_old = state.get(*formal);
315                    let formal_absval_new = formal_absval_old.join(actual_absval);
316                    if formal_absval_new != formal_absval_old {
317                        changed = true;
318                        state.set(*formal, formal_absval_new);
319                    }
320                }
321            }
322        }
323
324        if !changed {
325            break;
326        }
327    }
328
329    let mut n_consts = 0;
330    for absval in state.absvals.values() {
331        if absval.is_one() {
332            n_consts += 1;
333        }
334    }
335
336    // Phase 3 of 3: edit the function to remove constant formals, using the
337    // summaries and the final solver state as a guide.
338
339    // Make up a set of blocks that need editing.
340    let mut need_editing = FxHashSet::<Block>::default();
341    for (block, summary) in summaries.iter() {
342        if block == entry_block {
343            continue;
344        }
345        for formal in summary.formals {
346            let formal_absval = state.get(*formal);
347            if formal_absval.is_one() {
348                need_editing.insert(block);
349                break;
350            }
351        }
352    }
353
354    // Firstly, deal with the formals.  For each formal which is redundant,
355    // remove it, and also add a reroute from it to the constant value which
356    // it we know it to be.
357    for b in &need_editing {
358        let mut del_these = SmallVec::<[(Value, Value); 32]>::new();
359        let formals: &[Value] = func.dfg.block_params(*b);
360        for formal in formals {
361            // The state must give an absval for `formal`.
362            if let AbstractValue::One(replacement_val) = state.get(*formal) {
363                del_these.push((*formal, replacement_val));
364            }
365        }
366        // We can delete the formals in any order.  However,
367        // `remove_block_param` works by sliding backwards all arguments to
368        // the right of the value it is asked to delete.  Hence when removing more
369        // than one formal, it is significantly more efficient to ask it to
370        // remove the rightmost formal first, and hence this `rev()`.
371        for (redundant_formal, replacement_val) in del_these.into_iter().rev() {
372            func.dfg.remove_block_param(redundant_formal);
373            func.dfg.change_to_alias(redundant_formal, replacement_val);
374        }
375    }
376
377    // Secondly, visit all branch insns.  If the destination has had its
378    // formals changed, change the actuals accordingly.  Don't scan all insns,
379    // rather just visit those as listed in the summaries we prepared earlier.
380    let mut old_actuals = alloc::vec::Vec::new();
381    for summary in summaries.values() {
382        for edge in &summary.dests {
383            if !need_editing.contains(&edge.block) {
384                continue;
385            }
386
387            let dfg = &mut func.dfg;
388            let dests = dfg.insts[edge.inst].branch_destination_mut(&mut dfg.jump_tables);
389            let block = &mut dests[edge.branch_index as usize];
390
391            old_actuals.extend(block.args_slice(&dfg.value_lists));
392
393            // Check that the numbers of arguments make sense.
394            let formals = &summaries[edge.block].formals;
395            assert_eq!(formals.len(), old_actuals.len());
396
397            // Filter out redundant block arguments.
398            let mut formals = formals.iter();
399            old_actuals.retain(|_| {
400                let formal_i = formals.next().unwrap();
401                !state.get(*formal_i).is_one()
402            });
403
404            // Replace the block with a new one that only includes the non-redundant arguments.
405            // This leaks the value list from the old block,
406            // https://github.com/bytecodealliance/wasmtime/issues/5451 for more information.
407            let destination = block.block(&dfg.value_lists);
408            *block = BlockCall::new(destination, &old_actuals, &mut dfg.value_lists);
409            old_actuals.clear();
410        }
411    }
412
413    log::debug!(
414        "do_remove_constant_phis: done, {} iters.   {} formals, of which {} const.",
415        iter_no,
416        state.absvals.len(),
417        n_consts
418    );
419}