cranelift_codegen/machinst/vcode.rs
1//! This implements the VCode container: a CFG of Insts that have been lowered.
2//!
3//! VCode is virtual-register code. An instruction in VCode is almost a machine
4//! instruction; however, its register slots can refer to virtual registers in
5//! addition to real machine registers.
6//!
7//! VCode is structured with traditional basic blocks, and
8//! each block must be terminated by an unconditional branch (one target), a
9//! conditional branch (two targets), or a return (no targets). Note that this
10//! slightly differs from the machine code of most ISAs: in most ISAs, a
11//! conditional branch has one target (and the not-taken case falls through).
12//! However, we expect that machine backends will elide branches to the following
13//! block (i.e., zero-offset jumps), and will be able to codegen a branch-cond /
14//! branch-uncond pair if *both* targets are not fallthrough. This allows us to
15//! play with layout prior to final binary emission, as well, if we want.
16//!
17//! See the main module comment in `mod.rs` for more details on the VCode-based
18//! backend pipeline.
19
20use crate::ir::pcc::*;
21use crate::ir::{self, types, Constant, ConstantData, ValueLabel};
22use crate::machinst::*;
23use crate::ranges::Ranges;
24use crate::timing;
25use crate::trace;
26use crate::CodegenError;
27use crate::{LabelValueLoc, ValueLocRange};
28use regalloc2::{
29 Edit, Function as RegallocFunction, InstOrEdit, InstRange, MachineEnv, Operand,
30 OperandConstraint, OperandKind, PRegSet, RegClass,
31};
32use rustc_hash::FxHashMap;
33
34use core::mem::take;
35use cranelift_entity::{entity_impl, Keys};
36use std::collections::hash_map::Entry;
37use std::collections::HashMap;
38use std::fmt;
39
40/// Index referring to an instruction in VCode.
41pub type InsnIndex = regalloc2::Inst;
42
43/// Extension trait for `InsnIndex` to allow conversion to a
44/// `BackwardsInsnIndex`.
45trait ToBackwardsInsnIndex {
46 fn to_backwards_insn_index(&self, num_insts: usize) -> BackwardsInsnIndex;
47}
48
49impl ToBackwardsInsnIndex for InsnIndex {
50 fn to_backwards_insn_index(&self, num_insts: usize) -> BackwardsInsnIndex {
51 BackwardsInsnIndex::new(num_insts - self.index() - 1)
52 }
53}
54
55/// An index referring to an instruction in the VCode when it is backwards,
56/// during VCode construction.
57#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
58#[cfg_attr(
59 feature = "enable-serde",
60 derive(::serde::Serialize, ::serde::Deserialize)
61)]
62pub struct BackwardsInsnIndex(InsnIndex);
63
64impl BackwardsInsnIndex {
65 pub fn new(i: usize) -> Self {
66 BackwardsInsnIndex(InsnIndex::new(i))
67 }
68}
69
70/// Index referring to a basic block in VCode.
71pub type BlockIndex = regalloc2::Block;
72
73/// VCodeInst wraps all requirements for a MachInst to be in VCode: it must be
74/// a `MachInst` and it must be able to emit itself at least to a `SizeCodeSink`.
75pub trait VCodeInst: MachInst + MachInstEmit {}
76impl<I: MachInst + MachInstEmit> VCodeInst for I {}
77
78/// A function in "VCode" (virtualized-register code) form, after
79/// lowering. This is essentially a standard CFG of basic blocks,
80/// where each basic block consists of lowered instructions produced
81/// by the machine-specific backend.
82///
83/// Note that the VCode is immutable once produced, and is not
84/// modified by register allocation in particular. Rather, register
85/// allocation on the `VCode` produces a separate `regalloc2::Output`
86/// struct, and this can be passed to `emit`. `emit` in turn does not
87/// modify the vcode, but produces an `EmitResult`, which contains the
88/// machine code itself, and the associated disassembly and/or
89/// metadata as requested.
90pub struct VCode<I: VCodeInst> {
91 /// VReg IR-level types.
92 vreg_types: Vec<Type>,
93
94 /// Lowered machine instructions in order corresponding to the original IR.
95 insts: Vec<I>,
96
97 /// A map from backwards instruction index to the user stack map for that
98 /// instruction.
99 ///
100 /// This is a sparse side table that only has entries for instructions that
101 /// are safepoints, and only for a subset of those that have an associated
102 /// user stack map.
103 user_stack_maps: FxHashMap<BackwardsInsnIndex, ir::UserStackMap>,
104
105 /// Operands: pre-regalloc references to virtual registers with
106 /// constraints, in one flattened array. This allows the regalloc
107 /// to efficiently access all operands without requiring expensive
108 /// matches or method invocations on insts.
109 operands: Vec<Operand>,
110
111 /// Operand index ranges: for each instruction in `insts`, there
112 /// is a tuple here providing the range in `operands` for that
113 /// instruction's operands.
114 operand_ranges: Ranges,
115
116 /// Clobbers: a sparse map from instruction indices to clobber masks.
117 clobbers: FxHashMap<InsnIndex, PRegSet>,
118
119 /// Source locations for each instruction. (`SourceLoc` is a `u32`, so it is
120 /// reasonable to keep one of these per instruction.)
121 srclocs: Vec<RelSourceLoc>,
122
123 /// Entry block.
124 entry: BlockIndex,
125
126 /// Block instruction indices.
127 block_ranges: Ranges,
128
129 /// Block successors: index range in the `block_succs` list.
130 block_succ_range: Ranges,
131
132 /// Block successor lists, concatenated into one vec. The
133 /// `block_succ_range` list of tuples above gives (start, end)
134 /// ranges within this list that correspond to each basic block's
135 /// successors.
136 block_succs: Vec<regalloc2::Block>,
137
138 /// Block predecessors: index range in the `block_preds` list.
139 block_pred_range: Ranges,
140
141 /// Block predecessor lists, concatenated into one vec. The
142 /// `block_pred_range` list of tuples above gives (start, end)
143 /// ranges within this list that correspond to each basic block's
144 /// predecessors.
145 block_preds: Vec<regalloc2::Block>,
146
147 /// Block parameters: index range in `block_params` below.
148 block_params_range: Ranges,
149
150 /// Block parameter lists, concatenated into one vec. The
151 /// `block_params_range` list of tuples above gives (start, end)
152 /// ranges within this list that correspond to each basic block's
153 /// blockparam vregs.
154 block_params: Vec<regalloc2::VReg>,
155
156 /// Outgoing block arguments on branch instructions, concatenated
157 /// into one list.
158 ///
159 /// Note that this is conceptually a 3D array: we have a VReg list
160 /// per block, per successor. We flatten those three dimensions
161 /// into this 1D vec, then store index ranges in two levels of
162 /// indirection.
163 ///
164 /// Indexed by the indices in `branch_block_arg_succ_range`.
165 branch_block_args: Vec<regalloc2::VReg>,
166
167 /// Array of sequences of (start, end) tuples in
168 /// `branch_block_args`, one for each successor; these sequences
169 /// for each block are concatenated.
170 ///
171 /// Indexed by the indices in `branch_block_arg_succ_range`.
172 branch_block_arg_range: Ranges,
173
174 /// For a given block, indices in `branch_block_arg_range`
175 /// corresponding to all of its successors.
176 branch_block_arg_succ_range: Ranges,
177
178 /// Block-order information.
179 block_order: BlockLoweringOrder,
180
181 /// ABI object.
182 pub(crate) abi: Callee<I::ABIMachineSpec>,
183
184 /// Constant information used during code emission. This should be
185 /// immutable across function compilations within the same module.
186 emit_info: I::Info,
187
188 /// Constants.
189 pub(crate) constants: VCodeConstants,
190
191 /// Value labels for debuginfo attached to vregs.
192 debug_value_labels: Vec<(VReg, InsnIndex, InsnIndex, u32)>,
193
194 pub(crate) sigs: SigSet,
195
196 /// Facts on VRegs, for proof-carrying code verification.
197 facts: Vec<Option<Fact>>,
198
199 log2_min_function_alignment: u8,
200}
201
202/// The result of `VCode::emit`. Contains all information computed
203/// during emission: actual machine code, optionally a disassembly,
204/// and optionally metadata about the code layout.
205pub struct EmitResult {
206 /// The MachBuffer containing the machine code.
207 pub buffer: MachBufferFinalized<Stencil>,
208
209 /// Offset of each basic block, recorded during emission. Computed
210 /// only if `debug_value_labels` is non-empty.
211 pub bb_offsets: Vec<CodeOffset>,
212
213 /// Final basic-block edges, in terms of code offsets of
214 /// bb-starts. Computed only if `debug_value_labels` is non-empty.
215 pub bb_edges: Vec<(CodeOffset, CodeOffset)>,
216
217 /// Final length of function body.
218 pub func_body_len: CodeOffset,
219
220 /// The pretty-printed disassembly, if any. This uses the same
221 /// pretty-printing for MachInsts as the pre-regalloc VCode Debug
222 /// implementation, but additionally includes the prologue and
223 /// epilogue(s), and makes use of the regalloc results.
224 pub disasm: Option<String>,
225
226 /// Offsets of sized stackslots.
227 pub sized_stackslot_offsets: PrimaryMap<StackSlot, u32>,
228
229 /// Offsets of dynamic stackslots.
230 pub dynamic_stackslot_offsets: PrimaryMap<DynamicStackSlot, u32>,
231
232 /// Value-labels information (debug metadata).
233 pub value_labels_ranges: ValueLabelsRanges,
234
235 /// Stack frame size.
236 pub frame_size: u32,
237}
238
239/// A builder for a VCode function body.
240///
241/// This builder has the ability to accept instructions in either
242/// forward or reverse order, depending on the pass direction that
243/// produces the VCode. The lowering from CLIF to VCode<MachInst>
244/// ordinarily occurs in reverse order (in order to allow instructions
245/// to be lowered only if used, and not merged) so a reversal will
246/// occur at the end of lowering to ensure the VCode is in machine
247/// order.
248///
249/// If built in reverse, block and instruction indices used once the
250/// VCode is built are relative to the final (reversed) order, not the
251/// order of construction. Note that this means we do not know the
252/// final block or instruction indices when building, so we do not
253/// hand them out. (The user is assumed to know them when appending
254/// terminator instructions with successor blocks.)
255pub struct VCodeBuilder<I: VCodeInst> {
256 /// In-progress VCode.
257 pub(crate) vcode: VCode<I>,
258
259 /// In what direction is the build occurring?
260 direction: VCodeBuildDirection,
261
262 /// Debug-value label in-progress map, keyed by label. For each
263 /// label, we keep disjoint ranges mapping to vregs. We'll flatten
264 /// this into (vreg, range, label) tuples when done.
265 debug_info: FxHashMap<ValueLabel, Vec<(InsnIndex, InsnIndex, VReg)>>,
266}
267
268/// Direction in which a VCodeBuilder builds VCode.
269#[derive(Clone, Copy, Debug, PartialEq, Eq)]
270pub enum VCodeBuildDirection {
271 // TODO: add `Forward` once we need it and can test it adequately.
272 /// Backward-build pass: we expect the producer to call `emit()`
273 /// with instructions in reverse program order within each block.
274 Backward,
275}
276
277impl<I: VCodeInst> VCodeBuilder<I> {
278 /// Create a new VCodeBuilder.
279 pub fn new(
280 sigs: SigSet,
281 abi: Callee<I::ABIMachineSpec>,
282 emit_info: I::Info,
283 block_order: BlockLoweringOrder,
284 constants: VCodeConstants,
285 direction: VCodeBuildDirection,
286 log2_min_function_alignment: u8,
287 ) -> Self {
288 let vcode = VCode::new(
289 sigs,
290 abi,
291 emit_info,
292 block_order,
293 constants,
294 log2_min_function_alignment,
295 );
296
297 VCodeBuilder {
298 vcode,
299 direction,
300 debug_info: FxHashMap::default(),
301 }
302 }
303
304 pub fn init_retval_area(&mut self, vregs: &mut VRegAllocator<I>) -> CodegenResult<()> {
305 self.vcode.abi.init_retval_area(&self.vcode.sigs, vregs)
306 }
307
308 /// Access the ABI object.
309 pub fn abi(&self) -> &Callee<I::ABIMachineSpec> {
310 &self.vcode.abi
311 }
312
313 /// Access the ABI object.
314 pub fn abi_mut(&mut self) -> &mut Callee<I::ABIMachineSpec> {
315 &mut self.vcode.abi
316 }
317
318 pub fn sigs(&self) -> &SigSet {
319 &self.vcode.sigs
320 }
321
322 pub fn sigs_mut(&mut self) -> &mut SigSet {
323 &mut self.vcode.sigs
324 }
325
326 /// Access to the BlockLoweringOrder object.
327 pub fn block_order(&self) -> &BlockLoweringOrder {
328 &self.vcode.block_order
329 }
330
331 /// Set the current block as the entry block.
332 pub fn set_entry(&mut self, block: BlockIndex) {
333 self.vcode.entry = block;
334 }
335
336 /// End the current basic block. Must be called after emitting vcode insts
337 /// for IR insts and prior to ending the function (building the VCode).
338 pub fn end_bb(&mut self) {
339 let end_idx = self.vcode.insts.len();
340 // Add the instruction index range to the list of blocks.
341 self.vcode.block_ranges.push_end(end_idx);
342 // End the successors list.
343 let succ_end = self.vcode.block_succs.len();
344 self.vcode.block_succ_range.push_end(succ_end);
345 // End the blockparams list.
346 let block_params_end = self.vcode.block_params.len();
347 self.vcode.block_params_range.push_end(block_params_end);
348 // End the branch blockparam args list.
349 let branch_block_arg_succ_end = self.vcode.branch_block_arg_range.len();
350 self.vcode
351 .branch_block_arg_succ_range
352 .push_end(branch_block_arg_succ_end);
353 }
354
355 pub fn add_block_param(&mut self, param: VirtualReg) {
356 self.vcode.block_params.push(param.into());
357 }
358
359 fn add_branch_args_for_succ(&mut self, args: &[Reg]) {
360 self.vcode
361 .branch_block_args
362 .extend(args.iter().map(|&arg| VReg::from(arg)));
363 let end = self.vcode.branch_block_args.len();
364 self.vcode.branch_block_arg_range.push_end(end);
365 }
366
367 /// Push an instruction for the current BB and current IR inst
368 /// within the BB.
369 pub fn push(&mut self, insn: I, loc: RelSourceLoc) {
370 assert!(!insn.is_low_level_branch()); // These are not meant to be in VCode.
371 self.vcode.insts.push(insn);
372 self.vcode.srclocs.push(loc);
373 }
374
375 /// Add a successor block with branch args.
376 pub fn add_succ(&mut self, block: BlockIndex, args: &[Reg]) {
377 self.vcode.block_succs.push(block);
378 self.add_branch_args_for_succ(args);
379 }
380
381 /// Add a debug value label to a register.
382 pub fn add_value_label(&mut self, reg: Reg, label: ValueLabel) {
383 // We'll fix up labels in reverse(). Because we're generating
384 // code bottom-to-top, the liverange of the label goes *from*
385 // the last index at which was defined (or 0, which is the end
386 // of the eventual function) *to* just this instruction, and
387 // no further.
388 let inst = InsnIndex::new(self.vcode.insts.len());
389 let labels = self.debug_info.entry(label).or_insert_with(|| vec![]);
390 let last = labels
391 .last()
392 .map(|(_start, end, _vreg)| *end)
393 .unwrap_or(InsnIndex::new(0));
394 labels.push((last, inst, reg.into()));
395 }
396
397 /// Access the constants.
398 pub fn constants(&mut self) -> &mut VCodeConstants {
399 &mut self.vcode.constants
400 }
401
402 fn compute_preds_from_succs(&mut self) {
403 // Do a linear-time counting sort: first determine how many
404 // times each block appears as a successor.
405 let mut starts = vec![0u32; self.vcode.num_blocks()];
406 for succ in &self.vcode.block_succs {
407 starts[succ.index()] += 1;
408 }
409
410 // Determine for each block the starting index where that
411 // block's predecessors should go. This is equivalent to the
412 // ranges we need to store in block_pred_range.
413 self.vcode.block_pred_range.reserve(starts.len());
414 let mut end = 0;
415 for count in starts.iter_mut() {
416 let start = end;
417 end += *count;
418 *count = start;
419 self.vcode.block_pred_range.push_end(end as usize);
420 }
421 let end = end as usize;
422 debug_assert_eq!(end, self.vcode.block_succs.len());
423
424 // Walk over the successors again, this time grouped by
425 // predecessor, and push the predecessor at the current
426 // starting position of each of its successors. We build
427 // each group of predecessors in whatever order Ranges::iter
428 // returns them; regalloc2 doesn't care.
429 self.vcode.block_preds.resize(end, BlockIndex::invalid());
430 for (pred, range) in self.vcode.block_succ_range.iter() {
431 let pred = BlockIndex::new(pred);
432 for succ in &self.vcode.block_succs[range] {
433 let pos = &mut starts[succ.index()];
434 self.vcode.block_preds[*pos as usize] = pred;
435 *pos += 1;
436 }
437 }
438 debug_assert!(self.vcode.block_preds.iter().all(|pred| pred.is_valid()));
439 }
440
441 /// Called once, when a build in Backward order is complete, to
442 /// perform the overall reversal (into final forward order) and
443 /// finalize metadata accordingly.
444 fn reverse_and_finalize(&mut self, vregs: &VRegAllocator<I>) {
445 let n_insts = self.vcode.insts.len();
446 if n_insts == 0 {
447 return;
448 }
449
450 // Reverse the per-block and per-inst sequences.
451 self.vcode.block_ranges.reverse_index();
452 self.vcode.block_ranges.reverse_target(n_insts);
453 // block_params_range is indexed by block (and blocks were
454 // traversed in reverse) so we reverse it; but block-param
455 // sequences in the concatenated vec can remain in reverse
456 // order (it is effectively an arena of arbitrarily-placed
457 // referenced sequences).
458 self.vcode.block_params_range.reverse_index();
459 // Likewise, we reverse block_succ_range, but the block_succ
460 // concatenated array can remain as-is.
461 self.vcode.block_succ_range.reverse_index();
462 self.vcode.insts.reverse();
463 self.vcode.srclocs.reverse();
464 // Likewise, branch_block_arg_succ_range is indexed by block
465 // so must be reversed.
466 self.vcode.branch_block_arg_succ_range.reverse_index();
467
468 // To translate an instruction index *endpoint* in reversed
469 // order to forward order, compute `n_insts - i`.
470 //
471 // Why not `n_insts - 1 - i`? That would be correct to
472 // translate an individual instruction index (for ten insts 0
473 // to 9 inclusive, inst 0 becomes 9, and inst 9 becomes
474 // 0). But for the usual inclusive-start, exclusive-end range
475 // idiom, inclusive starts become exclusive ends and
476 // vice-versa, so e.g. an (inclusive) start of 0 becomes an
477 // (exclusive) end of 10.
478 let translate = |inst: InsnIndex| InsnIndex::new(n_insts - inst.index());
479
480 // Generate debug-value labels based on per-label maps.
481 for (label, tuples) in &self.debug_info {
482 for &(start, end, vreg) in tuples {
483 let vreg = vregs.resolve_vreg_alias(vreg);
484 let fwd_start = translate(end);
485 let fwd_end = translate(start);
486 self.vcode
487 .debug_value_labels
488 .push((vreg, fwd_start, fwd_end, label.as_u32()));
489 }
490 }
491
492 // Now sort debug value labels by VReg, as required
493 // by regalloc2.
494 self.vcode
495 .debug_value_labels
496 .sort_unstable_by_key(|(vreg, _, _, _)| *vreg);
497 }
498
499 fn collect_operands(&mut self, vregs: &VRegAllocator<I>) {
500 let allocatable = PRegSet::from(self.vcode.machine_env());
501 for (i, insn) in self.vcode.insts.iter_mut().enumerate() {
502 // Push operands from the instruction onto the operand list.
503 //
504 // We rename through the vreg alias table as we collect
505 // the operands. This is better than a separate post-pass
506 // over operands, because it has more cache locality:
507 // operands only need to pass through L1 once. This is
508 // also better than renaming instructions'
509 // operands/registers while lowering, because here we only
510 // need to do the `match` over the instruction to visit
511 // its register fields (which is slow, branchy code) once.
512
513 let mut op_collector =
514 OperandCollector::new(&mut self.vcode.operands, allocatable, |vreg| {
515 vregs.resolve_vreg_alias(vreg)
516 });
517 insn.get_operands(&mut op_collector);
518 let (ops, clobbers) = op_collector.finish();
519 self.vcode.operand_ranges.push_end(ops);
520
521 if clobbers != PRegSet::default() {
522 self.vcode.clobbers.insert(InsnIndex::new(i), clobbers);
523 }
524
525 if let Some((dst, src)) = insn.is_move() {
526 // We should never see non-virtual registers present in move
527 // instructions.
528 assert!(
529 src.is_virtual(),
530 "the real register {src:?} was used as the source of a move instruction"
531 );
532 assert!(
533 dst.to_reg().is_virtual(),
534 "the real register {:?} was used as the destination of a move instruction",
535 dst.to_reg()
536 );
537 }
538 }
539
540 // Translate blockparam args via the vreg aliases table as well.
541 for arg in &mut self.vcode.branch_block_args {
542 let new_arg = vregs.resolve_vreg_alias(*arg);
543 trace!("operandcollector: block arg {:?} -> {:?}", arg, new_arg);
544 *arg = new_arg;
545 }
546 }
547
548 /// Build the final VCode.
549 pub fn build(mut self, mut vregs: VRegAllocator<I>) -> VCode<I> {
550 self.vcode.vreg_types = take(&mut vregs.vreg_types);
551 self.vcode.facts = take(&mut vregs.facts);
552
553 if self.direction == VCodeBuildDirection::Backward {
554 self.reverse_and_finalize(&vregs);
555 }
556 self.collect_operands(&vregs);
557
558 self.compute_preds_from_succs();
559 self.vcode.debug_value_labels.sort_unstable();
560
561 // At this point, nothing in the vcode should mention any
562 // VReg which has been aliased. All the appropriate rewriting
563 // should have happened above. Just to be sure, let's
564 // double-check each field which has vregs.
565 // Note: can't easily check vcode.insts, resolved in collect_operands.
566 // Operands are resolved in collect_operands.
567 vregs.debug_assert_no_vreg_aliases(self.vcode.operands.iter().map(|op| op.vreg()));
568 // Currently block params are never aliased to another vreg.
569 vregs.debug_assert_no_vreg_aliases(self.vcode.block_params.iter().copied());
570 // Branch block args are resolved in collect_operands.
571 vregs.debug_assert_no_vreg_aliases(self.vcode.branch_block_args.iter().copied());
572 // Debug value labels are resolved in reverse_and_finalize.
573 vregs.debug_assert_no_vreg_aliases(
574 self.vcode.debug_value_labels.iter().map(|&(vreg, ..)| vreg),
575 );
576 // Facts are resolved eagerly during set_vreg_alias.
577 vregs.debug_assert_no_vreg_aliases(
578 self.vcode
579 .facts
580 .iter()
581 .zip(&vregs.vreg_types)
582 .enumerate()
583 .filter(|(_, (fact, _))| fact.is_some())
584 .map(|(vreg, (_, &ty))| {
585 let (regclasses, _) = I::rc_for_type(ty).unwrap();
586 VReg::new(vreg, regclasses[0])
587 }),
588 );
589
590 self.vcode
591 }
592
593 /// Add a user stack map for the associated instruction.
594 pub fn add_user_stack_map(
595 &mut self,
596 inst: BackwardsInsnIndex,
597 entries: &[ir::UserStackMapEntry],
598 ) {
599 let stack_map = ir::UserStackMap::new(entries, self.vcode.abi.sized_stackslot_offsets());
600 let old_entry = self.vcode.user_stack_maps.insert(inst, stack_map);
601 debug_assert!(old_entry.is_none());
602 }
603}
604
605const NO_INST_OFFSET: CodeOffset = u32::MAX;
606
607impl<I: VCodeInst> VCode<I> {
608 /// New empty VCode.
609 fn new(
610 sigs: SigSet,
611 abi: Callee<I::ABIMachineSpec>,
612 emit_info: I::Info,
613 block_order: BlockLoweringOrder,
614 constants: VCodeConstants,
615 log2_min_function_alignment: u8,
616 ) -> Self {
617 let n_blocks = block_order.lowered_order().len();
618 VCode {
619 sigs,
620 vreg_types: vec![],
621 insts: Vec::with_capacity(10 * n_blocks),
622 user_stack_maps: FxHashMap::default(),
623 operands: Vec::with_capacity(30 * n_blocks),
624 operand_ranges: Ranges::with_capacity(10 * n_blocks),
625 clobbers: FxHashMap::default(),
626 srclocs: Vec::with_capacity(10 * n_blocks),
627 entry: BlockIndex::new(0),
628 block_ranges: Ranges::with_capacity(n_blocks),
629 block_succ_range: Ranges::with_capacity(n_blocks),
630 block_succs: Vec::with_capacity(n_blocks),
631 block_pred_range: Ranges::default(),
632 block_preds: Vec::new(),
633 block_params_range: Ranges::with_capacity(n_blocks),
634 block_params: Vec::with_capacity(5 * n_blocks),
635 branch_block_args: Vec::with_capacity(10 * n_blocks),
636 branch_block_arg_range: Ranges::with_capacity(2 * n_blocks),
637 branch_block_arg_succ_range: Ranges::with_capacity(n_blocks),
638 block_order,
639 abi,
640 emit_info,
641 constants,
642 debug_value_labels: vec![],
643 facts: vec![],
644 log2_min_function_alignment,
645 }
646 }
647
648 /// Get the ABI-dependent MachineEnv for managing register allocation.
649 pub fn machine_env(&self) -> &MachineEnv {
650 self.abi.machine_env(&self.sigs)
651 }
652
653 /// Get the number of blocks. Block indices will be in the range `0 ..
654 /// (self.num_blocks() - 1)`.
655 pub fn num_blocks(&self) -> usize {
656 self.block_ranges.len()
657 }
658
659 /// The number of lowered instructions.
660 pub fn num_insts(&self) -> usize {
661 self.insts.len()
662 }
663
664 fn compute_clobbers(&self, regalloc: ®alloc2::Output) -> Vec<Writable<RealReg>> {
665 let mut clobbered = PRegSet::default();
666
667 // All moves are included in clobbers.
668 for (_, Edit::Move { to, .. }) in ®alloc.edits {
669 if let Some(preg) = to.as_reg() {
670 clobbered.add(preg);
671 }
672 }
673
674 for (i, range) in self.operand_ranges.iter() {
675 // Skip this instruction if not "included in clobbers" as
676 // per the MachInst. (Some backends use this to implement
677 // ABI specifics; e.g., excluding calls of the same ABI as
678 // the current function from clobbers, because by
679 // definition everything clobbered by the call can be
680 // clobbered by this function without saving as well.)
681 if !self.insts[i].is_included_in_clobbers() {
682 continue;
683 }
684
685 let operands = &self.operands[range.clone()];
686 let allocs = ®alloc.allocs[range];
687 for (operand, alloc) in operands.iter().zip(allocs.iter()) {
688 if operand.kind() == OperandKind::Def {
689 if let Some(preg) = alloc.as_reg() {
690 clobbered.add(preg);
691 }
692 }
693 }
694
695 // Also add explicitly-clobbered registers.
696 if let Some(&inst_clobbered) = self.clobbers.get(&InsnIndex::new(i)) {
697 clobbered.union_from(inst_clobbered);
698 }
699 }
700
701 clobbered
702 .into_iter()
703 .map(|preg| Writable::from_reg(RealReg::from(preg)))
704 .collect()
705 }
706
707 /// Emit the instructions to a `MachBuffer`, containing fixed-up
708 /// code and external reloc/trap/etc. records ready for use. Takes
709 /// the regalloc results as well.
710 ///
711 /// Returns the machine code itself, and optionally metadata
712 /// and/or a disassembly, as an `EmitResult`. The `VCode` itself
713 /// is consumed by the emission process.
714 pub fn emit(
715 mut self,
716 regalloc: ®alloc2::Output,
717 want_disasm: bool,
718 flags: &settings::Flags,
719 ctrl_plane: &mut ControlPlane,
720 ) -> EmitResult
721 where
722 I: VCodeInst,
723 {
724 // To write into disasm string.
725 use core::fmt::Write;
726
727 let _tt = timing::vcode_emit();
728 let mut buffer = MachBuffer::new();
729 buffer.set_log2_min_function_alignment(self.log2_min_function_alignment);
730 let mut bb_starts: Vec<Option<CodeOffset>> = vec![];
731
732 // The first M MachLabels are reserved for block indices.
733 buffer.reserve_labels_for_blocks(self.num_blocks());
734
735 // Register all allocated constants with the `MachBuffer` to ensure that
736 // any references to the constants during instructions can be handled
737 // correctly.
738 buffer.register_constants(&self.constants);
739
740 // Construct the final order we emit code in: cold blocks at the end.
741 let mut final_order: SmallVec<[BlockIndex; 16]> = smallvec![];
742 let mut cold_blocks: SmallVec<[BlockIndex; 16]> = smallvec![];
743 for block in 0..self.num_blocks() {
744 let block = BlockIndex::new(block);
745 if self.block_order.is_cold(block) {
746 cold_blocks.push(block);
747 } else {
748 final_order.push(block);
749 }
750 }
751 final_order.extend(cold_blocks.clone());
752
753 // Compute/save info we need for the prologue: clobbers and
754 // number of spillslots.
755 //
756 // We clone `abi` here because we will mutate it as we
757 // generate the prologue and set other info, but we can't
758 // mutate `VCode`. The info it usually carries prior to
759 // setting clobbers is fairly minimal so this should be
760 // relatively cheap.
761 let clobbers = self.compute_clobbers(regalloc);
762 self.abi
763 .compute_frame_layout(&self.sigs, regalloc.num_spillslots, clobbers);
764
765 // Emit blocks.
766 let mut cur_srcloc = None;
767 let mut last_offset = None;
768 let mut inst_offsets = vec![];
769 let mut state = I::State::new(&self.abi, std::mem::take(ctrl_plane));
770
771 let mut disasm = String::new();
772
773 if !self.debug_value_labels.is_empty() {
774 inst_offsets.resize(self.insts.len(), NO_INST_OFFSET);
775 }
776
777 // Count edits per block ahead of time; this is needed for
778 // lookahead island emission. (We could derive it per-block
779 // with binary search in the edit list, but it's more
780 // efficient to do it in one pass here.)
781 let mut ra_edits_per_block: SmallVec<[u32; 64]> = smallvec![];
782 let mut edit_idx = 0;
783 for block in 0..self.num_blocks() {
784 let end_inst = InsnIndex::new(self.block_ranges.get(block).end);
785 let start_edit_idx = edit_idx;
786 while edit_idx < regalloc.edits.len() && regalloc.edits[edit_idx].0.inst() < end_inst {
787 edit_idx += 1;
788 }
789 let end_edit_idx = edit_idx;
790 ra_edits_per_block.push((end_edit_idx - start_edit_idx) as u32);
791 }
792
793 let is_forward_edge_cfi_enabled = self.abi.is_forward_edge_cfi_enabled();
794 let mut bb_padding = match flags.bb_padding_log2_minus_one() {
795 0 => Vec::new(),
796 n => vec![0; 1 << (n - 1)],
797 };
798 let mut total_bb_padding = 0;
799
800 for (block_order_idx, &block) in final_order.iter().enumerate() {
801 trace!("emitting block {:?}", block);
802
803 // Call the new block hook for state
804 state.on_new_block();
805
806 // Emit NOPs to align the block.
807 let new_offset = I::align_basic_block(buffer.cur_offset());
808 while new_offset > buffer.cur_offset() {
809 // Pad with NOPs up to the aligned block offset.
810 let nop = I::gen_nop((new_offset - buffer.cur_offset()) as usize);
811 nop.emit(&mut buffer, &self.emit_info, &mut Default::default());
812 }
813 assert_eq!(buffer.cur_offset(), new_offset);
814
815 let do_emit = |inst: &I,
816 disasm: &mut String,
817 buffer: &mut MachBuffer<I>,
818 state: &mut I::State| {
819 if want_disasm && !inst.is_args() {
820 let mut s = state.clone();
821 writeln!(disasm, " {}", inst.pretty_print_inst(&mut s)).unwrap();
822 }
823 inst.emit(buffer, &self.emit_info, state);
824 };
825
826 // Is this the first block? Emit the prologue directly if so.
827 if block == self.entry {
828 trace!(" -> entry block");
829 buffer.start_srcloc(Default::default());
830 for inst in &self.abi.gen_prologue() {
831 do_emit(&inst, &mut disasm, &mut buffer, &mut state);
832 }
833 buffer.end_srcloc();
834 }
835
836 // Now emit the regular block body.
837
838 buffer.bind_label(MachLabel::from_block(block), state.ctrl_plane_mut());
839
840 if want_disasm {
841 writeln!(&mut disasm, "block{}:", block.index()).unwrap();
842 }
843
844 if flags.machine_code_cfg_info() {
845 // Track BB starts. If we have backed up due to MachBuffer
846 // branch opts, note that the removed blocks were removed.
847 let cur_offset = buffer.cur_offset();
848 if last_offset.is_some() && cur_offset <= last_offset.unwrap() {
849 for i in (0..bb_starts.len()).rev() {
850 if bb_starts[i].is_some() && cur_offset > bb_starts[i].unwrap() {
851 break;
852 }
853 bb_starts[i] = None;
854 }
855 }
856 bb_starts.push(Some(cur_offset));
857 last_offset = Some(cur_offset);
858 }
859
860 if let Some(block_start) = I::gen_block_start(
861 self.block_order.is_indirect_branch_target(block),
862 is_forward_edge_cfi_enabled,
863 ) {
864 do_emit(&block_start, &mut disasm, &mut buffer, &mut state);
865 }
866
867 for inst_or_edit in regalloc.block_insts_and_edits(&self, block) {
868 match inst_or_edit {
869 InstOrEdit::Inst(iix) => {
870 if !self.debug_value_labels.is_empty() {
871 // If we need to produce debug info,
872 // record the offset of each instruction
873 // so that we can translate value-label
874 // ranges to machine-code offsets.
875
876 // Cold blocks violate monotonicity
877 // assumptions elsewhere (that
878 // instructions in inst-index order are in
879 // order in machine code), so we omit
880 // their offsets here. Value-label range
881 // generation below will skip empty ranges
882 // and ranges with to-offsets of zero.
883 if !self.block_order.is_cold(block) {
884 inst_offsets[iix.index()] = buffer.cur_offset();
885 }
886 }
887
888 // Update the srcloc at this point in the buffer.
889 let srcloc = self.srclocs[iix.index()];
890 if cur_srcloc != Some(srcloc) {
891 if cur_srcloc.is_some() {
892 buffer.end_srcloc();
893 }
894 buffer.start_srcloc(srcloc);
895 cur_srcloc = Some(srcloc);
896 }
897
898 // If this is a safepoint, compute a stack map
899 // and pass it to the emit state.
900 let stack_map_disasm = if self.insts[iix.index()].is_safepoint() {
901 let (user_stack_map, user_stack_map_disasm) = {
902 // The `user_stack_maps` is keyed by reverse
903 // instruction index, so we must flip the
904 // index. We can't put this into a helper method
905 // due to borrowck issues because parts of
906 // `self` are borrowed mutably elsewhere in this
907 // function.
908 let index = iix.to_backwards_insn_index(self.num_insts());
909 let user_stack_map = self.user_stack_maps.remove(&index);
910 let user_stack_map_disasm =
911 user_stack_map.as_ref().map(|m| format!(" ; {m:?}"));
912 (user_stack_map, user_stack_map_disasm)
913 };
914
915 state.pre_safepoint(user_stack_map);
916
917 user_stack_map_disasm
918 } else {
919 None
920 };
921
922 // If the instruction we are about to emit is
923 // a return, place an epilogue at this point
924 // (and don't emit the return; the actual
925 // epilogue will contain it).
926 if self.insts[iix.index()].is_term() == MachTerminator::Ret {
927 for inst in self.abi.gen_epilogue() {
928 do_emit(&inst, &mut disasm, &mut buffer, &mut state);
929 }
930 } else {
931 // Update the operands for this inst using the
932 // allocations from the regalloc result.
933 let mut allocs = regalloc.inst_allocs(iix).iter();
934 self.insts[iix.index()].get_operands(
935 &mut |reg: &mut Reg, constraint, _kind, _pos| {
936 let alloc = allocs
937 .next()
938 .expect("enough allocations for all operands")
939 .as_reg()
940 .expect("only register allocations, not stack allocations")
941 .into();
942
943 if let OperandConstraint::FixedReg(rreg) = constraint {
944 debug_assert_eq!(Reg::from(rreg), alloc);
945 }
946 *reg = alloc;
947 },
948 );
949 debug_assert!(allocs.next().is_none());
950
951 // Emit the instruction!
952 do_emit(
953 &self.insts[iix.index()],
954 &mut disasm,
955 &mut buffer,
956 &mut state,
957 );
958 if let Some(stack_map_disasm) = stack_map_disasm {
959 disasm.push_str(&stack_map_disasm);
960 disasm.push('\n');
961 }
962 }
963 }
964
965 InstOrEdit::Edit(Edit::Move { from, to }) => {
966 // Create a move/spill/reload instruction and
967 // immediately emit it.
968 match (from.as_reg(), to.as_reg()) {
969 (Some(from), Some(to)) => {
970 // Reg-to-reg move.
971 let from_rreg = Reg::from(from);
972 let to_rreg = Writable::from_reg(Reg::from(to));
973 debug_assert_eq!(from.class(), to.class());
974 let ty = I::canonical_type_for_rc(from.class());
975 let mv = I::gen_move(to_rreg, from_rreg, ty);
976 do_emit(&mv, &mut disasm, &mut buffer, &mut state);
977 }
978 (Some(from), None) => {
979 // Spill from register to spillslot.
980 let to = to.as_stack().unwrap();
981 let from_rreg = RealReg::from(from);
982 let spill = self.abi.gen_spill(to, from_rreg);
983 do_emit(&spill, &mut disasm, &mut buffer, &mut state);
984 }
985 (None, Some(to)) => {
986 // Load from spillslot to register.
987 let from = from.as_stack().unwrap();
988 let to_rreg = Writable::from_reg(RealReg::from(to));
989 let reload = self.abi.gen_reload(to_rreg, from);
990 do_emit(&reload, &mut disasm, &mut buffer, &mut state);
991 }
992 (None, None) => {
993 panic!("regalloc2 should have eliminated stack-to-stack moves!");
994 }
995 }
996 }
997 }
998 }
999
1000 if cur_srcloc.is_some() {
1001 buffer.end_srcloc();
1002 cur_srcloc = None;
1003 }
1004
1005 // Do we need an island? Get the worst-case size of the next BB, add
1006 // it to the optional padding behind the block, and pass this to the
1007 // `MachBuffer` to determine if an island is necessary.
1008 let worst_case_next_bb = if block_order_idx < final_order.len() - 1 {
1009 let next_block = final_order[block_order_idx + 1];
1010 let next_block_range = self.block_ranges.get(next_block.index());
1011 let next_block_size = next_block_range.len() as u32;
1012 let next_block_ra_insertions = ra_edits_per_block[next_block.index()];
1013 I::worst_case_size() * (next_block_size + next_block_ra_insertions)
1014 } else {
1015 0
1016 };
1017 let padding = if bb_padding.is_empty() {
1018 0
1019 } else {
1020 bb_padding.len() as u32 + I::LabelUse::ALIGN - 1
1021 };
1022 if buffer.island_needed(padding + worst_case_next_bb) {
1023 buffer.emit_island(padding + worst_case_next_bb, ctrl_plane);
1024 }
1025
1026 // Insert padding, if configured, to stress the `MachBuffer`'s
1027 // relocation and island calculations.
1028 //
1029 // Padding can get quite large during fuzzing though so place a
1030 // total cap on it where when a per-function threshold is exceeded
1031 // the padding is turned back down to zero. This avoids a small-ish
1032 // test case generating a GB+ memory footprint in Cranelift for
1033 // example.
1034 if !bb_padding.is_empty() {
1035 buffer.put_data(&bb_padding);
1036 buffer.align_to(I::LabelUse::ALIGN);
1037 total_bb_padding += bb_padding.len();
1038 if total_bb_padding > (150 << 20) {
1039 bb_padding = Vec::new();
1040 }
1041 }
1042 }
1043
1044 debug_assert!(
1045 self.user_stack_maps.is_empty(),
1046 "any stack maps should have been consumed by instruction emission, still have: {:#?}",
1047 self.user_stack_maps,
1048 );
1049
1050 // Do any optimizations on branches at tail of buffer, as if we had
1051 // bound one last label.
1052 buffer.optimize_branches(ctrl_plane);
1053
1054 // emission state is not needed anymore, move control plane back out
1055 *ctrl_plane = state.take_ctrl_plane();
1056
1057 let func_body_len = buffer.cur_offset();
1058
1059 // Create `bb_edges` and final (filtered) `bb_starts`.
1060 let mut bb_edges = vec![];
1061 let mut bb_offsets = vec![];
1062 if flags.machine_code_cfg_info() {
1063 for block in 0..self.num_blocks() {
1064 if bb_starts[block].is_none() {
1065 // Block was deleted by MachBuffer; skip.
1066 continue;
1067 }
1068 let from = bb_starts[block].unwrap();
1069
1070 bb_offsets.push(from);
1071 // Resolve each `succ` label and add edges.
1072 let succs = self.block_succs(BlockIndex::new(block));
1073 for &succ in succs.iter() {
1074 let to = buffer.resolve_label_offset(MachLabel::from_block(succ));
1075 bb_edges.push((from, to));
1076 }
1077 }
1078 }
1079
1080 self.monotonize_inst_offsets(&mut inst_offsets[..], func_body_len);
1081 let value_labels_ranges =
1082 self.compute_value_labels_ranges(regalloc, &inst_offsets[..], func_body_len);
1083 let frame_size = self.abi.frame_size();
1084
1085 EmitResult {
1086 buffer: buffer.finish(&self.constants, ctrl_plane),
1087 bb_offsets,
1088 bb_edges,
1089 func_body_len,
1090 disasm: if want_disasm { Some(disasm) } else { None },
1091 sized_stackslot_offsets: self.abi.sized_stackslot_offsets().clone(),
1092 dynamic_stackslot_offsets: self.abi.dynamic_stackslot_offsets().clone(),
1093 value_labels_ranges,
1094 frame_size,
1095 }
1096 }
1097
1098 fn monotonize_inst_offsets(&self, inst_offsets: &mut [CodeOffset], func_body_len: u32) {
1099 if self.debug_value_labels.is_empty() {
1100 return;
1101 }
1102
1103 // During emission, branch removal can make offsets of instructions incorrect.
1104 // Consider the following sequence: [insi][jmp0][jmp1][jmp2][insj]
1105 // It will be recorded as (say): [30] [34] [38] [42] [<would be 46>]
1106 // When the jumps get removed we are left with (in "inst_offsets"):
1107 // [insi][jmp0][jmp1][jmp2][insj][...]
1108 // [30] [34] [38] [42] [34]
1109 // Which violates the monotonicity invariant. This method sets offsets of these
1110 // removed instructions such as to make them appear zero-sized:
1111 // [insi][jmp0][jmp1][jmp2][insj][...]
1112 // [30] [34] [34] [34] [34]
1113 //
1114 let mut next_offset = func_body_len;
1115 for inst_index in (0..(inst_offsets.len() - 1)).rev() {
1116 let inst_offset = inst_offsets[inst_index];
1117
1118 // Not all instructions get their offsets recorded.
1119 if inst_offset == NO_INST_OFFSET {
1120 continue;
1121 }
1122
1123 if inst_offset > next_offset {
1124 trace!(
1125 "Fixing code offset of the removed Inst {}: {} -> {}",
1126 inst_index,
1127 inst_offset,
1128 next_offset
1129 );
1130 inst_offsets[inst_index] = next_offset;
1131 continue;
1132 }
1133
1134 next_offset = inst_offset;
1135 }
1136 }
1137
1138 fn compute_value_labels_ranges(
1139 &self,
1140 regalloc: ®alloc2::Output,
1141 inst_offsets: &[CodeOffset],
1142 func_body_len: u32,
1143 ) -> ValueLabelsRanges {
1144 if self.debug_value_labels.is_empty() {
1145 return ValueLabelsRanges::default();
1146 }
1147
1148 let mut value_labels_ranges: ValueLabelsRanges = HashMap::new();
1149 for &(label, from, to, alloc) in ®alloc.debug_locations {
1150 let ranges = value_labels_ranges
1151 .entry(ValueLabel::from_u32(label))
1152 .or_insert_with(|| vec![]);
1153 let from_offset = inst_offsets[from.inst().index()];
1154 let to_offset = if to.inst().index() == inst_offsets.len() {
1155 func_body_len
1156 } else {
1157 inst_offsets[to.inst().index()]
1158 };
1159
1160 // Empty ranges or unavailable offsets can happen
1161 // due to cold blocks and branch removal (see above).
1162 if from_offset == NO_INST_OFFSET
1163 || to_offset == NO_INST_OFFSET
1164 || from_offset == to_offset
1165 {
1166 continue;
1167 }
1168
1169 let loc = if let Some(preg) = alloc.as_reg() {
1170 LabelValueLoc::Reg(Reg::from(preg))
1171 } else {
1172 let slot = alloc.as_stack().unwrap();
1173 let slot_offset = self.abi.get_spillslot_offset(slot);
1174 let slot_base_to_caller_sp_offset = self.abi.slot_base_to_caller_sp_offset();
1175 let caller_sp_to_cfa_offset =
1176 crate::isa::unwind::systemv::caller_sp_to_cfa_offset();
1177 // NOTE: this is a negative offset because it's relative to the caller's SP
1178 let cfa_to_sp_offset =
1179 -((slot_base_to_caller_sp_offset + caller_sp_to_cfa_offset) as i64);
1180 LabelValueLoc::CFAOffset(cfa_to_sp_offset + slot_offset)
1181 };
1182
1183 // ValueLocRanges are recorded by *instruction-end
1184 // offset*. `from_offset` is the *start* of the
1185 // instruction; that is the same as the end of another
1186 // instruction, so we only want to begin coverage once
1187 // we are past the previous instruction's end.
1188 let start = from_offset + 1;
1189
1190 // Likewise, `end` is exclusive, but we want to
1191 // *include* the end of the last
1192 // instruction. `to_offset` is the start of the
1193 // `to`-instruction, which is the exclusive end, i.e.,
1194 // the first instruction not covered. That
1195 // instruction's start is the same as the end of the
1196 // last instruction that is included, so we go one
1197 // byte further to be sure to include it.
1198 let end = to_offset + 1;
1199
1200 // Coalesce adjacent ranges that for the same location
1201 // to minimize output size here and for the consumers.
1202 if let Some(last_loc_range) = ranges.last_mut() {
1203 if last_loc_range.loc == loc && last_loc_range.end == start {
1204 trace!(
1205 "Extending debug range for VL{} in {:?} to {}",
1206 label,
1207 loc,
1208 end
1209 );
1210 last_loc_range.end = end;
1211 continue;
1212 }
1213 }
1214
1215 trace!(
1216 "Recording debug range for VL{} in {:?}: [Inst {}..Inst {}) [{}..{})",
1217 label,
1218 loc,
1219 from.inst().index(),
1220 to.inst().index(),
1221 start,
1222 end
1223 );
1224
1225 ranges.push(ValueLocRange { loc, start, end });
1226 }
1227
1228 value_labels_ranges
1229 }
1230
1231 /// Get the IR block for a BlockIndex, if one exists.
1232 pub fn bindex_to_bb(&self, block: BlockIndex) -> Option<ir::Block> {
1233 self.block_order.lowered_order()[block.index()].orig_block()
1234 }
1235
1236 /// Get the type of a VReg.
1237 pub fn vreg_type(&self, vreg: VReg) -> Type {
1238 self.vreg_types[vreg.vreg()]
1239 }
1240
1241 /// Get the fact, if any, for a given VReg.
1242 pub fn vreg_fact(&self, vreg: VReg) -> Option<&Fact> {
1243 self.facts[vreg.vreg()].as_ref()
1244 }
1245
1246 /// Set the fact for a given VReg.
1247 pub fn set_vreg_fact(&mut self, vreg: VReg, fact: Fact) {
1248 trace!("set fact on {}: {:?}", vreg, fact);
1249 self.facts[vreg.vreg()] = Some(fact);
1250 }
1251
1252 /// Does a given instruction define any facts?
1253 pub fn inst_defines_facts(&self, inst: InsnIndex) -> bool {
1254 self.inst_operands(inst)
1255 .iter()
1256 .filter(|o| o.kind() == OperandKind::Def)
1257 .map(|o| o.vreg())
1258 .any(|vreg| self.facts[vreg.vreg()].is_some())
1259 }
1260
1261 /// Get the user stack map associated with the given forward instruction index.
1262 pub fn get_user_stack_map(&self, inst: InsnIndex) -> Option<&ir::UserStackMap> {
1263 let index = inst.to_backwards_insn_index(self.num_insts());
1264 self.user_stack_maps.get(&index)
1265 }
1266}
1267
1268impl<I: VCodeInst> std::ops::Index<InsnIndex> for VCode<I> {
1269 type Output = I;
1270 fn index(&self, idx: InsnIndex) -> &Self::Output {
1271 &self.insts[idx.index()]
1272 }
1273}
1274
1275impl<I: VCodeInst> RegallocFunction for VCode<I> {
1276 fn num_insts(&self) -> usize {
1277 self.insts.len()
1278 }
1279
1280 fn num_blocks(&self) -> usize {
1281 self.block_ranges.len()
1282 }
1283
1284 fn entry_block(&self) -> BlockIndex {
1285 self.entry
1286 }
1287
1288 fn block_insns(&self, block: BlockIndex) -> InstRange {
1289 let range = self.block_ranges.get(block.index());
1290 InstRange::new(InsnIndex::new(range.start), InsnIndex::new(range.end))
1291 }
1292
1293 fn block_succs(&self, block: BlockIndex) -> &[BlockIndex] {
1294 let range = self.block_succ_range.get(block.index());
1295 &self.block_succs[range]
1296 }
1297
1298 fn block_preds(&self, block: BlockIndex) -> &[BlockIndex] {
1299 let range = self.block_pred_range.get(block.index());
1300 &self.block_preds[range]
1301 }
1302
1303 fn block_params(&self, block: BlockIndex) -> &[VReg] {
1304 // As a special case we don't return block params for the entry block, as all the arguments
1305 // will be defined by the `Inst::Args` instruction.
1306 if block == self.entry {
1307 return &[];
1308 }
1309
1310 let range = self.block_params_range.get(block.index());
1311 &self.block_params[range]
1312 }
1313
1314 fn branch_blockparams(&self, block: BlockIndex, _insn: InsnIndex, succ_idx: usize) -> &[VReg] {
1315 let succ_range = self.branch_block_arg_succ_range.get(block.index());
1316 debug_assert!(succ_idx < succ_range.len());
1317 let branch_block_args = self.branch_block_arg_range.get(succ_range.start + succ_idx);
1318 &self.branch_block_args[branch_block_args]
1319 }
1320
1321 fn is_ret(&self, insn: InsnIndex) -> bool {
1322 match self.insts[insn.index()].is_term() {
1323 // We treat blocks terminated by an unconditional trap like a return for regalloc.
1324 MachTerminator::None => self.insts[insn.index()].is_trap(),
1325 MachTerminator::Ret | MachTerminator::RetCall => true,
1326 MachTerminator::Uncond | MachTerminator::Cond | MachTerminator::Indirect => false,
1327 }
1328 }
1329
1330 fn is_branch(&self, insn: InsnIndex) -> bool {
1331 match self.insts[insn.index()].is_term() {
1332 MachTerminator::Cond | MachTerminator::Uncond | MachTerminator::Indirect => true,
1333 _ => false,
1334 }
1335 }
1336
1337 fn inst_operands(&self, insn: InsnIndex) -> &[Operand] {
1338 let range = self.operand_ranges.get(insn.index());
1339 &self.operands[range]
1340 }
1341
1342 fn inst_clobbers(&self, insn: InsnIndex) -> PRegSet {
1343 self.clobbers.get(&insn).cloned().unwrap_or_default()
1344 }
1345
1346 fn num_vregs(&self) -> usize {
1347 self.vreg_types.len()
1348 }
1349
1350 fn debug_value_labels(&self) -> &[(VReg, InsnIndex, InsnIndex, u32)] {
1351 &self.debug_value_labels
1352 }
1353
1354 fn spillslot_size(&self, regclass: RegClass) -> usize {
1355 self.abi.get_spillslot_size(regclass) as usize
1356 }
1357
1358 fn allow_multiple_vreg_defs(&self) -> bool {
1359 // At least the s390x backend requires this, because the
1360 // `Loop` pseudo-instruction aggregates all Operands so pinned
1361 // vregs (RealRegs) may occur more than once.
1362 true
1363 }
1364}
1365
1366impl<I: VCodeInst> Debug for VRegAllocator<I> {
1367 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1368 writeln!(f, "VRegAllocator {{")?;
1369
1370 let mut alias_keys = self.vreg_aliases.keys().cloned().collect::<Vec<_>>();
1371 alias_keys.sort_unstable();
1372 for key in alias_keys {
1373 let dest = self.vreg_aliases.get(&key).unwrap();
1374 writeln!(f, " {:?} := {:?}", Reg::from(key), Reg::from(*dest))?;
1375 }
1376
1377 for (vreg, fact) in self.facts.iter().enumerate() {
1378 if let Some(fact) = fact {
1379 writeln!(f, " v{vreg} ! {fact}")?;
1380 }
1381 }
1382
1383 writeln!(f, "}}")
1384 }
1385}
1386
1387impl<I: VCodeInst> fmt::Debug for VCode<I> {
1388 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1389 writeln!(f, "VCode {{")?;
1390 writeln!(f, " Entry block: {}", self.entry.index())?;
1391
1392 let mut state = Default::default();
1393
1394 for block in 0..self.num_blocks() {
1395 let block = BlockIndex::new(block);
1396 writeln!(
1397 f,
1398 "Block {}({:?}):",
1399 block.index(),
1400 self.block_params(block)
1401 )?;
1402 if let Some(bb) = self.bindex_to_bb(block) {
1403 writeln!(f, " (original IR block: {bb})")?;
1404 }
1405 for (succ_idx, succ) in self.block_succs(block).iter().enumerate() {
1406 writeln!(
1407 f,
1408 " (successor: Block {}({:?}))",
1409 succ.index(),
1410 self.branch_blockparams(block, InsnIndex::new(0) /* dummy */, succ_idx)
1411 )?;
1412 }
1413 for inst in self.block_ranges.get(block.index()) {
1414 writeln!(
1415 f,
1416 " Inst {}: {}",
1417 inst,
1418 self.insts[inst].pretty_print_inst(&mut state)
1419 )?;
1420 if !self.operands.is_empty() {
1421 for operand in self.inst_operands(InsnIndex::new(inst)) {
1422 if operand.kind() == OperandKind::Def {
1423 if let Some(fact) = &self.facts[operand.vreg().vreg()] {
1424 writeln!(f, " v{} ! {}", operand.vreg().vreg(), fact)?;
1425 }
1426 }
1427 }
1428 }
1429 if let Some(user_stack_map) = self.get_user_stack_map(InsnIndex::new(inst)) {
1430 writeln!(f, " {user_stack_map:?}")?;
1431 }
1432 }
1433 }
1434
1435 writeln!(f, "}}")?;
1436 Ok(())
1437 }
1438}
1439
1440/// This structure manages VReg allocation during the lifetime of the VCodeBuilder.
1441pub struct VRegAllocator<I> {
1442 /// VReg IR-level types.
1443 vreg_types: Vec<Type>,
1444
1445 /// VReg aliases. When the final VCode is built we rewrite all
1446 /// uses of the keys in this table to their replacement values.
1447 ///
1448 /// We use these aliases to rename an instruction's expected
1449 /// result vregs to the returned vregs from lowering, which are
1450 /// usually freshly-allocated temps.
1451 vreg_aliases: FxHashMap<regalloc2::VReg, regalloc2::VReg>,
1452
1453 /// A deferred error, to be bubbled up to the top level of the
1454 /// lowering algorithm. We take this approach because we cannot
1455 /// currently propagate a `Result` upward through ISLE code (the
1456 /// lowering rules) or some ABI code.
1457 deferred_error: Option<CodegenError>,
1458
1459 /// Facts on VRegs, for proof-carrying code.
1460 facts: Vec<Option<Fact>>,
1461
1462 /// The type of instruction that this allocator makes registers for.
1463 _inst: core::marker::PhantomData<I>,
1464}
1465
1466impl<I: VCodeInst> VRegAllocator<I> {
1467 /// Make a new VRegAllocator.
1468 pub fn with_capacity(capacity: usize) -> Self {
1469 let capacity = first_user_vreg_index() + capacity;
1470 let mut vreg_types = Vec::with_capacity(capacity);
1471 vreg_types.resize(first_user_vreg_index(), types::INVALID);
1472 Self {
1473 vreg_types,
1474 vreg_aliases: FxHashMap::with_capacity_and_hasher(capacity, Default::default()),
1475 deferred_error: None,
1476 facts: Vec::with_capacity(capacity),
1477 _inst: core::marker::PhantomData::default(),
1478 }
1479 }
1480
1481 /// Allocate a fresh ValueRegs.
1482 pub fn alloc(&mut self, ty: Type) -> CodegenResult<ValueRegs<Reg>> {
1483 if self.deferred_error.is_some() {
1484 return Err(CodegenError::CodeTooLarge);
1485 }
1486 let v = self.vreg_types.len();
1487 let (regclasses, tys) = I::rc_for_type(ty)?;
1488 if v + regclasses.len() >= VReg::MAX {
1489 return Err(CodegenError::CodeTooLarge);
1490 }
1491
1492 let regs: ValueRegs<Reg> = match regclasses {
1493 &[rc0] => ValueRegs::one(VReg::new(v, rc0).into()),
1494 &[rc0, rc1] => ValueRegs::two(VReg::new(v, rc0).into(), VReg::new(v + 1, rc1).into()),
1495 // We can extend this if/when we support 32-bit targets; e.g.,
1496 // an i128 on a 32-bit machine will need up to four machine regs
1497 // for a `Value`.
1498 _ => panic!("Value must reside in 1 or 2 registers"),
1499 };
1500 for (®_ty, ®) in tys.iter().zip(regs.regs().iter()) {
1501 let vreg = reg.to_virtual_reg().unwrap();
1502 debug_assert_eq!(self.vreg_types.len(), vreg.index());
1503 self.vreg_types.push(reg_ty);
1504 }
1505
1506 // Create empty facts for each allocated vreg.
1507 self.facts.resize(self.vreg_types.len(), None);
1508
1509 Ok(regs)
1510 }
1511
1512 /// Allocate a fresh ValueRegs, deferring any out-of-vregs
1513 /// errors. This is useful in places where we cannot bubble a
1514 /// `CodegenResult` upward easily, and which are known to be
1515 /// invoked from within the lowering loop that checks the deferred
1516 /// error status below.
1517 pub fn alloc_with_deferred_error(&mut self, ty: Type) -> ValueRegs<Reg> {
1518 match self.alloc(ty) {
1519 Ok(x) => x,
1520 Err(e) => {
1521 self.deferred_error = Some(e);
1522 self.bogus_for_deferred_error(ty)
1523 }
1524 }
1525 }
1526
1527 /// Take any deferred error that was accumulated by `alloc_with_deferred_error`.
1528 pub fn take_deferred_error(&mut self) -> Option<CodegenError> {
1529 self.deferred_error.take()
1530 }
1531
1532 /// Produce an bogus VReg placeholder with the proper number of
1533 /// registers for the given type. This is meant to be used with
1534 /// deferred allocation errors (see `Lower::alloc_tmp()`).
1535 fn bogus_for_deferred_error(&self, ty: Type) -> ValueRegs<Reg> {
1536 let (regclasses, _tys) = I::rc_for_type(ty).expect("must have valid type");
1537 match regclasses {
1538 &[rc0] => ValueRegs::one(VReg::new(0, rc0).into()),
1539 &[rc0, rc1] => ValueRegs::two(VReg::new(0, rc0).into(), VReg::new(1, rc1).into()),
1540 _ => panic!("Value must reside in 1 or 2 registers"),
1541 }
1542 }
1543
1544 /// Rewrite any mention of `from` into `to`.
1545 pub fn set_vreg_alias(&mut self, from: Reg, to: Reg) {
1546 let from = from.into();
1547 let resolved_to = self.resolve_vreg_alias(to.into());
1548 // Disallow cycles (see below).
1549 assert_ne!(resolved_to, from);
1550
1551 // Maintain the invariant that PCC facts only exist on vregs
1552 // which aren't aliases. We want to preserve whatever was
1553 // stated about the vreg before its producer was lowered.
1554 if let Some(fact) = self.facts[from.vreg()].take() {
1555 self.set_fact(resolved_to, fact);
1556 }
1557
1558 let old_alias = self.vreg_aliases.insert(from, resolved_to);
1559 debug_assert_eq!(old_alias, None);
1560 }
1561
1562 fn resolve_vreg_alias(&self, mut vreg: regalloc2::VReg) -> regalloc2::VReg {
1563 // We prevent cycles from existing by resolving targets of
1564 // aliases eagerly before setting them. If the target resolves
1565 // to the origin of the alias, then a cycle would be created
1566 // and the alias is disallowed. Because of the structure of
1567 // SSA code (one instruction can refer to another's defs but
1568 // not vice-versa, except indirectly through
1569 // phis/blockparams), cycles should not occur as we use
1570 // aliases to redirect vregs to the temps that actually define
1571 // them.
1572 while let Some(to) = self.vreg_aliases.get(&vreg) {
1573 vreg = *to;
1574 }
1575 vreg
1576 }
1577
1578 #[inline]
1579 fn debug_assert_no_vreg_aliases(&self, mut list: impl Iterator<Item = VReg>) {
1580 debug_assert!(list.all(|vreg| !self.vreg_aliases.contains_key(&vreg)));
1581 }
1582
1583 /// Set the proof-carrying code fact on a given virtual register.
1584 ///
1585 /// Returns the old fact, if any (only one fact can be stored).
1586 fn set_fact(&mut self, vreg: regalloc2::VReg, fact: Fact) -> Option<Fact> {
1587 trace!("vreg {:?} has fact: {:?}", vreg, fact);
1588 debug_assert!(!self.vreg_aliases.contains_key(&vreg));
1589 self.facts[vreg.vreg()].replace(fact)
1590 }
1591
1592 /// Set a fact only if one doesn't already exist.
1593 pub fn set_fact_if_missing(&mut self, vreg: VirtualReg, fact: Fact) {
1594 let vreg = self.resolve_vreg_alias(vreg.into());
1595 if self.facts[vreg.vreg()].is_none() {
1596 self.set_fact(vreg, fact);
1597 }
1598 }
1599
1600 /// Allocate a fresh ValueRegs, with a given fact to apply if
1601 /// the value fits in one VReg.
1602 pub fn alloc_with_maybe_fact(
1603 &mut self,
1604 ty: Type,
1605 fact: Option<Fact>,
1606 ) -> CodegenResult<ValueRegs<Reg>> {
1607 let result = self.alloc(ty)?;
1608
1609 // Ensure that we don't lose a fact on a value that splits
1610 // into multiple VRegs.
1611 assert!(result.len() == 1 || fact.is_none());
1612 if let Some(fact) = fact {
1613 self.set_fact(result.regs()[0].into(), fact);
1614 }
1615
1616 Ok(result)
1617 }
1618}
1619
1620/// This structure tracks the large constants used in VCode that will be emitted separately by the
1621/// [MachBuffer].
1622///
1623/// First, during the lowering phase, constants are inserted using
1624/// [VCodeConstants.insert]; an intermediate handle, `VCodeConstant`, tracks what constants are
1625/// used in this phase. Some deduplication is performed, when possible, as constant
1626/// values are inserted.
1627///
1628/// Secondly, during the emission phase, the [MachBuffer] assigns [MachLabel]s for each of the
1629/// constants so that instructions can refer to the value's memory location. The [MachBuffer]
1630/// then writes the constant values to the buffer.
1631#[derive(Default)]
1632pub struct VCodeConstants {
1633 constants: PrimaryMap<VCodeConstant, VCodeConstantData>,
1634 pool_uses: HashMap<Constant, VCodeConstant>,
1635 well_known_uses: HashMap<*const [u8], VCodeConstant>,
1636 u64s: HashMap<[u8; 8], VCodeConstant>,
1637}
1638impl VCodeConstants {
1639 /// Initialize the structure with the expected number of constants.
1640 pub fn with_capacity(expected_num_constants: usize) -> Self {
1641 Self {
1642 constants: PrimaryMap::with_capacity(expected_num_constants),
1643 pool_uses: HashMap::with_capacity(expected_num_constants),
1644 well_known_uses: HashMap::new(),
1645 u64s: HashMap::new(),
1646 }
1647 }
1648
1649 /// Insert a constant; using this method indicates that a constant value will be used and thus
1650 /// will be emitted to the `MachBuffer`. The current implementation can deduplicate constants
1651 /// that are [VCodeConstantData::Pool] or [VCodeConstantData::WellKnown] but not
1652 /// [VCodeConstantData::Generated].
1653 pub fn insert(&mut self, data: VCodeConstantData) -> VCodeConstant {
1654 match data {
1655 VCodeConstantData::Generated(_) => self.constants.push(data),
1656 VCodeConstantData::Pool(constant, _) => match self.pool_uses.get(&constant) {
1657 None => {
1658 let vcode_constant = self.constants.push(data);
1659 self.pool_uses.insert(constant, vcode_constant);
1660 vcode_constant
1661 }
1662 Some(&vcode_constant) => vcode_constant,
1663 },
1664 VCodeConstantData::WellKnown(data_ref) => {
1665 match self.well_known_uses.entry(data_ref as *const [u8]) {
1666 Entry::Vacant(v) => {
1667 let vcode_constant = self.constants.push(data);
1668 v.insert(vcode_constant);
1669 vcode_constant
1670 }
1671 Entry::Occupied(o) => *o.get(),
1672 }
1673 }
1674 VCodeConstantData::U64(value) => match self.u64s.entry(value) {
1675 Entry::Vacant(v) => {
1676 let vcode_constant = self.constants.push(data);
1677 v.insert(vcode_constant);
1678 vcode_constant
1679 }
1680 Entry::Occupied(o) => *o.get(),
1681 },
1682 }
1683 }
1684
1685 /// Return the number of constants inserted.
1686 pub fn len(&self) -> usize {
1687 self.constants.len()
1688 }
1689
1690 /// Iterate over the `VCodeConstant` keys inserted in this structure.
1691 pub fn keys(&self) -> Keys<VCodeConstant> {
1692 self.constants.keys()
1693 }
1694
1695 /// Iterate over the `VCodeConstant` keys and the data (as a byte slice) inserted in this
1696 /// structure.
1697 pub fn iter(&self) -> impl Iterator<Item = (VCodeConstant, &VCodeConstantData)> {
1698 self.constants.iter()
1699 }
1700
1701 /// Returns the data associated with the specified constant.
1702 pub fn get(&self, c: VCodeConstant) -> &VCodeConstantData {
1703 &self.constants[c]
1704 }
1705
1706 /// Checks if the given [VCodeConstantData] is registered as
1707 /// used by the pool.
1708 pub fn pool_uses(&self, constant: &VCodeConstantData) -> bool {
1709 match constant {
1710 VCodeConstantData::Pool(c, _) => self.pool_uses.contains_key(c),
1711 _ => false,
1712 }
1713 }
1714}
1715
1716/// A use of a constant by one or more VCode instructions; see [VCodeConstants].
1717#[derive(Clone, Copy, Debug, PartialEq, Eq)]
1718pub struct VCodeConstant(u32);
1719entity_impl!(VCodeConstant);
1720
1721/// Identify the different types of constant that can be inserted into [VCodeConstants]. Tracking
1722/// these separately instead of as raw byte buffers allows us to avoid some duplication.
1723pub enum VCodeConstantData {
1724 /// A constant already present in the Cranelift IR
1725 /// [ConstantPool](crate::ir::constant::ConstantPool).
1726 Pool(Constant, ConstantData),
1727 /// A reference to a well-known constant value that is statically encoded within the compiler.
1728 WellKnown(&'static [u8]),
1729 /// A constant value generated during lowering; the value may depend on the instruction context
1730 /// which makes it difficult to de-duplicate--if possible, use other variants.
1731 Generated(ConstantData),
1732 /// A constant of at most 64 bits. These are deduplicated as
1733 /// well. Stored as a fixed-size array of `u8` so that we do not
1734 /// encounter endianness problems when cross-compiling.
1735 U64([u8; 8]),
1736}
1737impl VCodeConstantData {
1738 /// Retrieve the constant data as a byte slice.
1739 pub fn as_slice(&self) -> &[u8] {
1740 match self {
1741 VCodeConstantData::Pool(_, d) | VCodeConstantData::Generated(d) => d.as_slice(),
1742 VCodeConstantData::WellKnown(d) => d,
1743 VCodeConstantData::U64(value) => &value[..],
1744 }
1745 }
1746
1747 /// Calculate the alignment of the constant data.
1748 pub fn alignment(&self) -> u32 {
1749 if self.as_slice().len() <= 8 {
1750 8
1751 } else {
1752 16
1753 }
1754 }
1755}
1756
1757#[cfg(test)]
1758mod test {
1759 use super::*;
1760 use std::mem::size_of;
1761
1762 #[test]
1763 fn size_of_constant_structs() {
1764 assert_eq!(size_of::<Constant>(), 4);
1765 assert_eq!(size_of::<VCodeConstant>(), 4);
1766 assert_eq!(size_of::<ConstantData>(), 3 * size_of::<usize>());
1767 assert_eq!(size_of::<VCodeConstantData>(), 4 * size_of::<usize>());
1768 assert_eq!(
1769 size_of::<PrimaryMap<VCodeConstant, VCodeConstantData>>(),
1770 3 * size_of::<usize>()
1771 );
1772 // TODO The VCodeConstants structure's memory size could be further optimized.
1773 // With certain versions of Rust, each `HashMap` in `VCodeConstants` occupied at
1774 // least 48 bytes, making an empty `VCodeConstants` cost 120 bytes.
1775 }
1776}