cranelift_codegen/machinst/blockorder.rs
1//! Computation of basic block order in emitted code.
2//!
3//! This module handles the translation from CLIF BBs to VCode BBs.
4//!
5//! The basic idea is that we compute a sequence of "lowered blocks" that
6//! correspond to one or more blocks in the graph: (CLIF CFG) `union` (implicit
7//! block on *every* edge). Conceptually, the lowering pipeline wants to insert
8//! moves for phi-nodes on every block-to-block transfer; these blocks always
9//! conceptually exist, but may be merged with an "original" CLIF block (and
10//! hence not actually exist; this is equivalent to inserting the blocks only on
11//! critical edges).
12//!
13//! In other words, starting from a CFG like this (where each "CLIF block" and
14//! "(edge N->M)" is a separate basic block):
15//!
16//! ```plain
17//!
18//! CLIF block 0
19//! / \
20//! (edge 0->1) (edge 0->2)
21//! | |
22//! CLIF block 1 CLIF block 2
23//! \ /
24//! (edge 1->3) (edge 2->3)
25//! \ /
26//! CLIF block 3
27//! ```
28//!
29//! We can produce a CFG of lowered blocks like so:
30//!
31//! ```plain
32//! +--------------+
33//! | CLIF block 0 |
34//! +--------------+
35//! / \
36//! +--------------+ +--------------+
37//! | (edge 0->1) | | (edge 0->2) |
38//! | CLIF block 1 | | CLIF block 2 |
39//! | (edge 1->3) | | (edge 2->3) |
40//! +--------------+ +--------------+
41//! \ /
42//! \ /
43//! +------------+
44//! |CLIF block 3|
45//! +------------+
46//! ```
47//!
48//! Each `LoweredBlock` names just an original CLIF block, or just an edge block.
49//!
50//! To compute this lowering, we do a DFS over the CLIF-plus-edge-block graph
51//! (never actually materialized, just defined by a "successors" function), and
52//! compute the reverse postorder.
53//!
54//! This algorithm isn't perfect w.r.t. generated code quality: we don't, for
55//! example, consider any information about whether edge blocks will actually
56//! have content, because this computation happens as part of lowering *before*
57//! regalloc, and regalloc may or may not insert moves/spills/reloads on any
58//! particular edge. But it works relatively well and is conceptually simple.
59//! Furthermore, the [MachBuffer] machine-code sink performs final peephole-like
60//! branch editing that in practice elides empty blocks and simplifies some of
61//! the other redundancies that this scheme produces.
62
63use crate::dominator_tree::DominatorTree;
64use crate::entity::SecondaryMap;
65use crate::inst_predicates::visit_block_succs;
66use crate::ir::{Block, Function, Inst, Opcode};
67use crate::{machinst::*, trace};
68use rustc_hash::{FxHashMap, FxHashSet};
69
70/// Mapping from CLIF BBs to VCode BBs.
71#[derive(Debug)]
72pub struct BlockLoweringOrder {
73 /// Lowered blocks, in BlockIndex order. Each block is some combination of
74 /// (i) a CLIF block, and (ii) inserted crit-edge blocks before or after;
75 /// see [LoweredBlock] for details.
76 lowered_order: Vec<LoweredBlock>,
77 /// BlockIndex values for successors for all lowered blocks, indexing `lowered_order`.
78 lowered_succ_indices: Vec<BlockIndex>,
79 /// Ranges in `lowered_succ_indices` giving the successor lists for each lowered
80 /// block. Indexed by lowering-order index (`BlockIndex`).
81 lowered_succ_ranges: Vec<(Option<Inst>, std::ops::Range<usize>)>,
82 /// Cold blocks. These blocks are not reordered in the
83 /// `lowered_order` above; the lowered order must respect RPO
84 /// (uses after defs) in order for lowering to be
85 /// correct. Instead, this set is used to provide `is_cold()`,
86 /// which is used by VCode emission to sink the blocks at the last
87 /// moment (when we actually emit bytes into the MachBuffer).
88 cold_blocks: FxHashSet<BlockIndex>,
89 /// Lowered blocks that are indirect branch targets.
90 indirect_branch_targets: FxHashSet<BlockIndex>,
91}
92
93#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
94pub enum LoweredBlock {
95 /// Block in original CLIF.
96 Orig {
97 /// Original CLIF block.
98 block: Block,
99 },
100
101 /// Critical edge between two CLIF blocks.
102 CriticalEdge {
103 /// The predecessor block.
104 pred: Block,
105
106 /// The successor block.
107 succ: Block,
108
109 /// The index of this branch in the successor edges from `pred`, following the same
110 /// indexing order as `inst_predicates::visit_block_succs`. This is used to distinguish
111 /// multiple edges between the same CLIF blocks.
112 succ_idx: u32,
113 },
114}
115
116impl LoweredBlock {
117 /// Unwrap an `Orig` block.
118 pub fn orig_block(&self) -> Option<Block> {
119 match self {
120 &LoweredBlock::Orig { block } => Some(block),
121 &LoweredBlock::CriticalEdge { .. } => None,
122 }
123 }
124
125 /// The associated in-edge predecessor, if this is a critical edge.
126 #[cfg(test)]
127 pub fn in_edge(&self) -> Option<Block> {
128 match self {
129 &LoweredBlock::CriticalEdge { pred, .. } => Some(pred),
130 &LoweredBlock::Orig { .. } => None,
131 }
132 }
133
134 /// The associated out-edge successor, if this is a critical edge.
135 #[cfg(test)]
136 pub fn out_edge(&self) -> Option<Block> {
137 match self {
138 &LoweredBlock::CriticalEdge { succ, .. } => Some(succ),
139 &LoweredBlock::Orig { .. } => None,
140 }
141 }
142}
143
144impl BlockLoweringOrder {
145 /// Compute and return a lowered block order for `f`.
146 pub fn new(
147 f: &Function,
148 domtree: &DominatorTree,
149 ctrl_plane: &mut ControlPlane,
150 ) -> BlockLoweringOrder {
151 trace!("BlockLoweringOrder: function body {:?}", f);
152
153 // Step 1: compute the in-edge and out-edge count of every block.
154 let mut block_in_count = SecondaryMap::with_default(0);
155 let mut block_out_count = SecondaryMap::with_default(0);
156
157 // Block successors are stored as `LoweredBlocks` to simplify the construction of
158 // `lowered_succs` in the final result. Initially, all entries are `Orig` values, and are
159 // updated to be `CriticalEdge` when those cases are identified in step 2 below.
160 let mut block_succs: SmallVec<[LoweredBlock; 128]> = SmallVec::new();
161 let mut block_succ_range = SecondaryMap::with_default(0..0);
162
163 let mut indirect_branch_target_clif_blocks = FxHashSet::default();
164
165 for block in f.layout.blocks() {
166 let start = block_succs.len();
167 visit_block_succs(f, block, |_, succ, from_table| {
168 block_out_count[block] += 1;
169 block_in_count[succ] += 1;
170 block_succs.push(LoweredBlock::Orig { block: succ });
171
172 if from_table {
173 indirect_branch_target_clif_blocks.insert(succ);
174 }
175 });
176
177 // Ensure that blocks terminated by br_table instructions with an empty jump table are
178 // still treated like conditional blocks from the point of view of critical edge
179 // splitting.
180 if let Some(inst) = f.layout.last_inst(block) {
181 if Opcode::BrTable == f.dfg.insts[inst].opcode() {
182 block_out_count[block] = block_out_count[block].max(2);
183 }
184 }
185
186 let end = block_succs.len();
187 block_succ_range[block] = start..end;
188 }
189
190 // Step 2: walk the postorder from the domtree in reverse to produce our desired node
191 // lowering order, identifying critical edges to split along the way.
192
193 let mut lowered_order = Vec::new();
194
195 for &block in domtree.cfg_rpo() {
196 lowered_order.push(LoweredBlock::Orig { block });
197
198 if block_out_count[block] > 1 {
199 let range = block_succ_range[block].clone();
200
201 // If chaos-mode is enabled in the control plane, iterate over
202 // the successors in an arbitrary order, which should have no
203 // impact on correctness. The order of the blocks is generally
204 // relevant: Uses must be seen before defs for dead-code
205 // elimination.
206 let succs = ctrl_plane.shuffled(block_succs[range].iter_mut().enumerate());
207
208 for (succ_ix, lb) in succs {
209 let succ = lb.orig_block().unwrap();
210 if block_in_count[succ] > 1 {
211 // Mutate the successor to be a critical edge, as `block` has multiple
212 // edges leaving it, and `succ` has multiple edges entering it.
213 *lb = LoweredBlock::CriticalEdge {
214 pred: block,
215 succ,
216 succ_idx: succ_ix as u32,
217 };
218 lowered_order.push(*lb);
219 }
220 }
221 }
222 }
223
224 let lb_to_bindex = FxHashMap::from_iter(
225 lowered_order
226 .iter()
227 .enumerate()
228 .map(|(i, &lb)| (lb, BlockIndex::new(i))),
229 );
230
231 // Step 3: build the successor tables given the lowering order. We can't perform this step
232 // during the creation of `lowering_order`, as we need `lb_to_bindex` to be fully populated
233 // first.
234 let mut lowered_succ_indices = Vec::new();
235 let mut cold_blocks = FxHashSet::default();
236 let mut indirect_branch_targets = FxHashSet::default();
237 let lowered_succ_ranges =
238 Vec::from_iter(lowered_order.iter().enumerate().map(|(ix, lb)| {
239 let bindex = BlockIndex::new(ix);
240 let start = lowered_succ_indices.len();
241 let opt_inst = match lb {
242 // Block successors are pulled directly over, as they'll have been mutated when
243 // determining the block order already.
244 &LoweredBlock::Orig { block } => {
245 let range = block_succ_range[block].clone();
246 lowered_succ_indices
247 .extend(block_succs[range].iter().map(|lb| lb_to_bindex[lb]));
248
249 if f.layout.is_cold(block) {
250 cold_blocks.insert(bindex);
251 }
252
253 if indirect_branch_target_clif_blocks.contains(&block) {
254 indirect_branch_targets.insert(bindex);
255 }
256
257 let last = f.layout.last_inst(block).unwrap();
258 let opcode = f.dfg.insts[last].opcode();
259
260 assert!(opcode.is_terminator());
261
262 opcode.is_branch().then_some(last)
263 }
264
265 // Critical edges won't have successor information in block_succ_range, but
266 // they only have a single known successor to record anyway.
267 &LoweredBlock::CriticalEdge { succ, .. } => {
268 let succ_index = lb_to_bindex[&LoweredBlock::Orig { block: succ }];
269 lowered_succ_indices.push(succ_index);
270
271 // Edges inherit indirect branch and cold block metadata from their
272 // successor.
273
274 if f.layout.is_cold(succ) {
275 cold_blocks.insert(bindex);
276 }
277
278 if indirect_branch_target_clif_blocks.contains(&succ) {
279 indirect_branch_targets.insert(bindex);
280 }
281
282 None
283 }
284 };
285 let end = lowered_succ_indices.len();
286 (opt_inst, start..end)
287 }));
288
289 let result = BlockLoweringOrder {
290 lowered_order,
291 lowered_succ_indices,
292 lowered_succ_ranges,
293 cold_blocks,
294 indirect_branch_targets,
295 };
296
297 trace!("BlockLoweringOrder: {:#?}", result);
298 result
299 }
300
301 /// Get the lowered order of blocks.
302 pub fn lowered_order(&self) -> &[LoweredBlock] {
303 &self.lowered_order[..]
304 }
305
306 /// Get the successor indices for a lowered block.
307 pub fn succ_indices(&self, block: BlockIndex) -> (Option<Inst>, &[BlockIndex]) {
308 let (opt_inst, range) = &self.lowered_succ_ranges[block.index()];
309 (*opt_inst, &self.lowered_succ_indices[range.clone()])
310 }
311
312 /// Determine whether the given lowered-block index is cold.
313 pub fn is_cold(&self, block: BlockIndex) -> bool {
314 self.cold_blocks.contains(&block)
315 }
316
317 /// Determine whether the given lowered block index is an indirect branch
318 /// target.
319 pub fn is_indirect_branch_target(&self, block: BlockIndex) -> bool {
320 self.indirect_branch_targets.contains(&block)
321 }
322}
323
324#[cfg(test)]
325mod test {
326 use super::*;
327 use crate::cursor::{Cursor, FuncCursor};
328 use crate::flowgraph::ControlFlowGraph;
329 use crate::ir::types::*;
330 use crate::ir::UserFuncName;
331 use crate::ir::{AbiParam, InstBuilder, Signature};
332 use crate::isa::CallConv;
333
334 fn build_test_func(n_blocks: usize, edges: &[(usize, usize)]) -> BlockLoweringOrder {
335 assert!(n_blocks > 0);
336
337 let name = UserFuncName::testcase("test0");
338 let mut sig = Signature::new(CallConv::SystemV);
339 sig.params.push(AbiParam::new(I32));
340 let mut func = Function::with_name_signature(name, sig);
341 let blocks = (0..n_blocks)
342 .map(|i| {
343 let bb = func.dfg.make_block();
344 assert!(bb.as_u32() == i as u32);
345 bb
346 })
347 .collect::<Vec<_>>();
348
349 let arg0 = func.dfg.append_block_param(blocks[0], I32);
350
351 let mut pos = FuncCursor::new(&mut func);
352
353 let mut edge = 0;
354 for i in 0..n_blocks {
355 pos.insert_block(blocks[i]);
356 let mut succs = vec![];
357 while edge < edges.len() && edges[edge].0 == i {
358 succs.push(edges[edge].1);
359 edge += 1;
360 }
361 if succs.len() == 0 {
362 pos.ins().return_(&[arg0]);
363 } else if succs.len() == 1 {
364 pos.ins().jump(blocks[succs[0]], &[]);
365 } else if succs.len() == 2 {
366 pos.ins()
367 .brif(arg0, blocks[succs[0]], &[], blocks[succs[1]], &[]);
368 } else {
369 panic!("Too many successors");
370 }
371 }
372
373 let mut cfg = ControlFlowGraph::new();
374 cfg.compute(&func);
375 let dom_tree = DominatorTree::with_function(&func, &cfg);
376
377 BlockLoweringOrder::new(&func, &dom_tree, &mut Default::default())
378 }
379
380 #[test]
381 fn test_blockorder_diamond() {
382 let order = build_test_func(4, &[(0, 1), (0, 2), (1, 3), (2, 3)]);
383
384 // This test case doesn't need to introduce any critical edges, as all regalloc allocations
385 // can sit on either the entry or exit of blocks 1 and 2.
386 assert_eq!(order.lowered_order.len(), 4);
387 }
388
389 #[test]
390 fn test_blockorder_critedge() {
391 // 0
392 // / \
393 // 1 2
394 // / \ \
395 // 3 4 |
396 // |\ _|____|
397 // | \/ |
398 // | /\ |
399 // 5 6
400 //
401 // (3 -> 5, and 3 -> 6 are critical edges and must be split)
402 //
403 let order = build_test_func(
404 7,
405 &[
406 (0, 1),
407 (0, 2),
408 (1, 3),
409 (1, 4),
410 (2, 5),
411 (3, 5),
412 (3, 6),
413 (4, 6),
414 ],
415 );
416
417 assert_eq!(order.lowered_order.len(), 9);
418 println!("ordered = {:?}", order.lowered_order);
419
420 // block 0
421 assert_eq!(order.lowered_order[0].orig_block().unwrap().as_u32(), 0);
422 assert!(order.lowered_order[0].in_edge().is_none());
423 assert!(order.lowered_order[0].out_edge().is_none());
424
425 // block 2
426 assert_eq!(order.lowered_order[1].orig_block().unwrap().as_u32(), 2);
427 assert!(order.lowered_order[1].in_edge().is_none());
428 assert!(order.lowered_order[1].out_edge().is_none());
429
430 // block 1
431 assert_eq!(order.lowered_order[2].orig_block().unwrap().as_u32(), 1);
432 assert!(order.lowered_order[2].in_edge().is_none());
433 assert!(order.lowered_order[2].out_edge().is_none());
434
435 // block 4
436 assert_eq!(order.lowered_order[3].orig_block().unwrap().as_u32(), 4);
437 assert!(order.lowered_order[3].in_edge().is_none());
438 assert!(order.lowered_order[3].out_edge().is_none());
439
440 // block 3
441 assert_eq!(order.lowered_order[4].orig_block().unwrap().as_u32(), 3);
442 assert!(order.lowered_order[4].in_edge().is_none());
443 assert!(order.lowered_order[4].out_edge().is_none());
444
445 // critical edge 3 -> 5
446 assert!(order.lowered_order[5].orig_block().is_none());
447 assert_eq!(order.lowered_order[5].in_edge().unwrap().as_u32(), 3);
448 assert_eq!(order.lowered_order[5].out_edge().unwrap().as_u32(), 5);
449
450 // critical edge 3 -> 6
451 assert!(order.lowered_order[6].orig_block().is_none());
452 assert_eq!(order.lowered_order[6].in_edge().unwrap().as_u32(), 3);
453 assert_eq!(order.lowered_order[6].out_edge().unwrap().as_u32(), 6);
454
455 // block 6
456 assert_eq!(order.lowered_order[7].orig_block().unwrap().as_u32(), 6);
457 assert!(order.lowered_order[7].in_edge().is_none());
458 assert!(order.lowered_order[7].out_edge().is_none());
459
460 // block 5
461 assert_eq!(order.lowered_order[8].orig_block().unwrap().as_u32(), 5);
462 assert!(order.lowered_order[8].in_edge().is_none());
463 assert!(order.lowered_order[8].out_edge().is_none());
464 }
465}