cranelift_codegen/machinst/
mod.rs

1//! This module exposes the machine-specific backend definition pieces.
2//!
3//! The MachInst infrastructure is the compiler backend, from CLIF
4//! (ir::Function) to machine code. The purpose of this infrastructure is, at a
5//! high level, to do instruction selection/lowering (to machine instructions),
6//! register allocation, and then perform all the fixups to branches, constant
7//! data references, etc., needed to actually generate machine code.
8//!
9//! The container for machine instructions, at various stages of construction,
10//! is the `VCode` struct. We refer to a sequence of machine instructions organized
11//! into basic blocks as "vcode". This is short for "virtual-register code".
12//!
13//! The compilation pipeline, from an `ir::Function` (already optimized as much as
14//! you like by machine-independent optimization passes) onward, is as follows.
15//!
16//! ```plain
17//!
18//!     ir::Function                (SSA IR, machine-independent opcodes)
19//!         |
20//!         |  [lower]
21//!         |
22//!     VCode<arch_backend::Inst>   (machine instructions:
23//!         |                        - mostly virtual registers.
24//!         |                        - cond branches in two-target form.
25//!         |                        - branch targets are block indices.
26//!         |                        - in-memory constants held by insns,
27//!         |                          with unknown offsets.
28//!         |                        - critical edges (actually all edges)
29//!         |                          are split.)
30//!         |
31//!         | [regalloc --> `regalloc2::Output`; VCode is unchanged]
32//!         |
33//!         | [binary emission via MachBuffer]
34//!         |
35//!     Vec<u8>                     (machine code:
36//!         |                        - two-dest branches resolved via
37//!         |                          streaming branch resolution/simplification.
38//!         |                        - regalloc `Allocation` results used directly
39//!         |                          by instruction emission code.
40//!         |                        - prologue and epilogue(s) built and emitted
41//!         |                          directly during emission.
42//!         |                        - SP-relative offsets resolved by tracking
43//!         |                          EmitState.)
44//!
45//! ```
46
47use crate::binemit::{Addend, CodeInfo, CodeOffset, Reloc};
48use crate::ir::{
49    self, DynamicStackSlot, RelSourceLoc, StackSlot, Type, function::FunctionParameters,
50};
51use crate::isa::FunctionAlignment;
52use crate::result::CodegenResult;
53use crate::settings;
54use crate::settings::Flags;
55use crate::value_label::ValueLabelsRanges;
56use alloc::vec::Vec;
57use core::fmt::Debug;
58use cranelift_control::ControlPlane;
59use cranelift_entity::PrimaryMap;
60use regalloc2::VReg;
61use smallvec::{SmallVec, smallvec};
62use std::string::String;
63
64#[cfg(feature = "enable-serde")]
65use serde_derive::{Deserialize, Serialize};
66
67#[macro_use]
68pub mod isle;
69
70pub mod lower;
71pub use lower::*;
72pub mod vcode;
73pub use vcode::*;
74pub mod compile;
75pub use compile::*;
76pub mod blockorder;
77pub use blockorder::*;
78pub mod abi;
79pub use abi::*;
80pub mod buffer;
81pub use buffer::*;
82pub mod helpers;
83pub use helpers::*;
84pub mod valueregs;
85pub use reg::*;
86pub use valueregs::*;
87pub mod pcc;
88pub mod reg;
89
90/// A machine instruction.
91pub trait MachInst: Clone + Debug {
92    /// The ABI machine spec for this `MachInst`.
93    type ABIMachineSpec: ABIMachineSpec<I = Self>;
94
95    /// Return the registers referenced by this machine instruction along with
96    /// the modes of reference (use, def, modify).
97    fn get_operands(&mut self, collector: &mut impl OperandVisitor);
98
99    /// If this is a simple move, return the (source, destination) tuple of registers.
100    fn is_move(&self) -> Option<(Writable<Reg>, Reg)>;
101
102    /// Is this a terminator (branch or ret)? If so, return its type
103    /// (ret/uncond/cond) and target if applicable.
104    fn is_term(&self) -> MachTerminator;
105
106    /// Is this an unconditional trap?
107    fn is_trap(&self) -> bool;
108
109    /// Is this an "args" pseudoinst?
110    fn is_args(&self) -> bool;
111
112    /// Should this instruction's clobber-list be included in the
113    /// clobber-set?
114    fn is_included_in_clobbers(&self) -> bool;
115
116    /// Does this instruction access memory?
117    fn is_mem_access(&self) -> bool;
118
119    /// Generate a move.
120    fn gen_move(to_reg: Writable<Reg>, from_reg: Reg, ty: Type) -> Self;
121
122    /// Generate a dummy instruction that will keep a value alive but
123    /// has no other purpose.
124    fn gen_dummy_use(reg: Reg) -> Self;
125
126    /// Determine register class(es) to store the given Cranelift type, and the
127    /// Cranelift type actually stored in the underlying register(s).  May return
128    /// an error if the type isn't supported by this backend.
129    ///
130    /// If the type requires multiple registers, then the list of registers is
131    /// returned in little-endian order.
132    ///
133    /// Note that the type actually stored in the register(s) may differ in the
134    /// case that a value is split across registers: for example, on a 32-bit
135    /// target, an I64 may be stored in two registers, each of which holds an
136    /// I32. The actually-stored types are used only to inform the backend when
137    /// generating spills and reloads for individual registers.
138    fn rc_for_type(ty: Type) -> CodegenResult<(&'static [RegClass], &'static [Type])>;
139
140    /// Get an appropriate type that can fully hold a value in a given
141    /// register class. This may not be the only type that maps to
142    /// that class, but when used with `gen_move()` or the ABI trait's
143    /// load/spill constructors, it should produce instruction(s) that
144    /// move the entire register contents.
145    fn canonical_type_for_rc(rc: RegClass) -> Type;
146
147    /// Generate a jump to another target. Used during lowering of
148    /// control flow.
149    fn gen_jump(target: MachLabel) -> Self;
150
151    /// Generate a store of an immediate 64-bit integer to a register. Used by
152    /// the control plane to generate random instructions.
153    fn gen_imm_u64(_value: u64, _dst: Writable<Reg>) -> Option<Self> {
154        None
155    }
156
157    /// Generate a store of an immediate 64-bit integer to a register. Used by
158    /// the control plane to generate random instructions. The tmp register may
159    /// be used by architectures which don't support writing immediate values to
160    /// floating point registers directly.
161    fn gen_imm_f64(_value: f64, _tmp: Writable<Reg>, _dst: Writable<Reg>) -> SmallVec<[Self; 2]> {
162        SmallVec::new()
163    }
164
165    /// Generate a NOP. The `preferred_size` parameter allows the caller to
166    /// request a NOP of that size, or as close to it as possible. The machine
167    /// backend may return a NOP whose binary encoding is smaller than the
168    /// preferred size, but must not return a NOP that is larger. However,
169    /// the instruction must have a nonzero size if preferred_size is nonzero.
170    fn gen_nop(preferred_size: usize) -> Self;
171
172    /// Align a basic block offset (from start of function).  By default, no
173    /// alignment occurs.
174    fn align_basic_block(offset: CodeOffset) -> CodeOffset {
175        offset
176    }
177
178    /// What is the worst-case instruction size emitted by this instruction type?
179    fn worst_case_size() -> CodeOffset;
180
181    /// What is the register class used for reference types (GC-observable pointers)? Can
182    /// be dependent on compilation flags.
183    fn ref_type_regclass(_flags: &Flags) -> RegClass;
184
185    /// Is this a safepoint?
186    fn is_safepoint(&self) -> bool;
187
188    /// Generate an instruction that must appear at the beginning of a basic
189    /// block, if any. Note that the return value must not be subject to
190    /// register allocation.
191    fn gen_block_start(
192        _is_indirect_branch_target: bool,
193        _is_forward_edge_cfi_enabled: bool,
194    ) -> Option<Self> {
195        None
196    }
197
198    /// Returns a description of the alignment required for functions for this
199    /// architecture.
200    fn function_alignment() -> FunctionAlignment;
201
202    /// Is this a low-level, one-way branch, not meant for use in a
203    /// VCode body? These instructions are meant to be used only when
204    /// directly emitted, i.e. when `MachInst` is used as an assembler
205    /// library.
206    fn is_low_level_branch(&self) -> bool {
207        false
208    }
209
210    /// A label-use kind: a type that describes the types of label references that
211    /// can occur in an instruction.
212    type LabelUse: MachInstLabelUse;
213
214    /// Byte representation of a trap opcode which is inserted by `MachBuffer`
215    /// during its `defer_trap` method.
216    const TRAP_OPCODE: &'static [u8];
217}
218
219/// A descriptor of a label reference (use) in an instruction set.
220pub trait MachInstLabelUse: Clone + Copy + Debug + Eq {
221    /// Required alignment for any veneer. Usually the required instruction
222    /// alignment (e.g., 4 for a RISC with 32-bit instructions, or 1 for x86).
223    const ALIGN: CodeOffset;
224
225    /// What is the maximum PC-relative range (positive)? E.g., if `1024`, a
226    /// label-reference fixup at offset `x` is valid if the label resolves to `x
227    /// + 1024`.
228    fn max_pos_range(self) -> CodeOffset;
229    /// What is the maximum PC-relative range (negative)? This is the absolute
230    /// value; i.e., if `1024`, then a label-reference fixup at offset `x` is
231    /// valid if the label resolves to `x - 1024`.
232    fn max_neg_range(self) -> CodeOffset;
233    /// What is the size of code-buffer slice this label-use needs to patch in
234    /// the label's value?
235    fn patch_size(self) -> CodeOffset;
236    /// Perform a code-patch, given the offset into the buffer of this label use
237    /// and the offset into the buffer of the label's definition.
238    /// It is guaranteed that, given `delta = offset - label_offset`, we will
239    /// have `offset >= -self.max_neg_range()` and `offset <=
240    /// self.max_pos_range()`.
241    fn patch(self, buffer: &mut [u8], use_offset: CodeOffset, label_offset: CodeOffset);
242    /// Can the label-use be patched to a veneer that supports a longer range?
243    /// Usually valid for jumps (a short-range jump can jump to a longer-range
244    /// jump), but not for e.g. constant pool references, because the constant
245    /// load would require different code (one more level of indirection).
246    fn supports_veneer(self) -> bool;
247    /// How many bytes are needed for a veneer?
248    fn veneer_size(self) -> CodeOffset;
249    /// What's the largest possible veneer that may be generated?
250    fn worst_case_veneer_size() -> CodeOffset;
251    /// Generate a veneer. The given code-buffer slice is `self.veneer_size()`
252    /// bytes long at offset `veneer_offset` in the buffer. The original
253    /// label-use will be patched to refer to this veneer's offset.  A new
254    /// (offset, LabelUse) is returned that allows the veneer to use the actual
255    /// label. For veneers to work properly, it is expected that the new veneer
256    /// has a larger range; on most platforms this probably means either a
257    /// "long-range jump" (e.g., on ARM, the 26-bit form), or if already at that
258    /// stage, a jump that supports a full 32-bit range, for example.
259    fn generate_veneer(self, buffer: &mut [u8], veneer_offset: CodeOffset) -> (CodeOffset, Self);
260
261    /// Returns the corresponding label-use for the relocation specified.
262    ///
263    /// This returns `None` if the relocation doesn't have a corresponding
264    /// representation for the target architecture.
265    fn from_reloc(reloc: Reloc, addend: Addend) -> Option<Self>;
266}
267
268/// Describes a block terminator (not call) in the VCode.
269///
270/// Actual targets are not included: the single-source-of-truth for
271/// those is the VCode itself, which holds, for each block, successors
272/// and outgoing branch args per successor.
273#[derive(Clone, Debug, PartialEq, Eq)]
274pub enum MachTerminator {
275    /// Not a terminator.
276    None,
277    /// A return instruction.
278    Ret,
279    /// A tail call.
280    RetCall,
281    /// A branch.
282    Branch,
283}
284
285/// A trait describing the ability to encode a MachInst into binary machine code.
286pub trait MachInstEmit: MachInst {
287    /// Persistent state carried across `emit` invocations.
288    type State: MachInstEmitState<Self>;
289
290    /// Constant information used in `emit` invocations.
291    type Info;
292
293    /// Emit the instruction.
294    fn emit(&self, code: &mut MachBuffer<Self>, info: &Self::Info, state: &mut Self::State);
295
296    /// Pretty-print the instruction.
297    fn pretty_print_inst(&self, state: &mut Self::State) -> String;
298}
299
300/// A trait describing the emission state carried between MachInsts when
301/// emitting a function body.
302pub trait MachInstEmitState<I: VCodeInst>: Default + Clone + Debug {
303    /// Create a new emission state given the ABI object.
304    fn new(abi: &Callee<I::ABIMachineSpec>, ctrl_plane: ControlPlane) -> Self;
305
306    /// Update the emission state before emitting an instruction that is a
307    /// safepoint.
308    fn pre_safepoint(&mut self, user_stack_map: Option<ir::UserStackMap>);
309
310    /// The emission state holds ownership of a control plane, so it doesn't
311    /// have to be passed around explicitly too much. `ctrl_plane_mut` may
312    /// be used if temporary access to the control plane is needed by some
313    /// other function that doesn't have access to the emission state.
314    fn ctrl_plane_mut(&mut self) -> &mut ControlPlane;
315
316    /// Used to continue using a control plane after the emission state is
317    /// not needed anymore.
318    fn take_ctrl_plane(self) -> ControlPlane;
319
320    /// A hook that triggers when first emitting a new block.
321    /// It is guaranteed to be called before any instructions are emitted.
322    fn on_new_block(&mut self) {}
323
324    /// The [`FrameLayout`] for the function currently being compiled.
325    fn frame_layout(&self) -> &FrameLayout;
326}
327
328/// The result of a `MachBackend::compile_function()` call. Contains machine
329/// code (as bytes) and a disassembly, if requested.
330#[derive(PartialEq, Debug, Clone)]
331#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
332pub struct CompiledCodeBase<T: CompilePhase> {
333    /// Machine code.
334    pub buffer: MachBufferFinalized<T>,
335    /// Size of stack frame, in bytes.
336    pub frame_size: u32,
337    /// Disassembly, if requested.
338    pub vcode: Option<String>,
339    /// Debug info: value labels to registers/stackslots at code offsets.
340    pub value_labels_ranges: ValueLabelsRanges,
341    /// Debug info: stackslots to stack pointer offsets.
342    pub sized_stackslot_offsets: PrimaryMap<StackSlot, u32>,
343    /// Debug info: stackslots to stack pointer offsets.
344    pub dynamic_stackslot_offsets: PrimaryMap<DynamicStackSlot, u32>,
345    /// Basic-block layout info: block start offsets.
346    ///
347    /// This info is generated only if the `machine_code_cfg_info`
348    /// flag is set.
349    pub bb_starts: Vec<CodeOffset>,
350    /// Basic-block layout info: block edges. Each edge is `(from,
351    /// to)`, where `from` and `to` are basic-block start offsets of
352    /// the respective blocks.
353    ///
354    /// This info is generated only if the `machine_code_cfg_info`
355    /// flag is set.
356    pub bb_edges: Vec<(CodeOffset, CodeOffset)>,
357}
358
359impl CompiledCodeStencil {
360    /// Apply function parameters to finalize a stencil into its final form.
361    pub fn apply_params(self, params: &FunctionParameters) -> CompiledCode {
362        CompiledCode {
363            buffer: self.buffer.apply_base_srcloc(params.base_srcloc()),
364            frame_size: self.frame_size,
365            vcode: self.vcode,
366            value_labels_ranges: self.value_labels_ranges,
367            sized_stackslot_offsets: self.sized_stackslot_offsets,
368            dynamic_stackslot_offsets: self.dynamic_stackslot_offsets,
369            bb_starts: self.bb_starts,
370            bb_edges: self.bb_edges,
371        }
372    }
373}
374
375impl<T: CompilePhase> CompiledCodeBase<T> {
376    /// Get a `CodeInfo` describing section sizes from this compilation result.
377    pub fn code_info(&self) -> CodeInfo {
378        CodeInfo {
379            total_size: self.buffer.total_size(),
380        }
381    }
382
383    /// Returns a reference to the machine code generated for this function compilation.
384    pub fn code_buffer(&self) -> &[u8] {
385        self.buffer.data()
386    }
387
388    /// Get the disassembly of the buffer, using the given capstone context.
389    #[cfg(feature = "disas")]
390    pub fn disassemble(
391        &self,
392        params: Option<&crate::ir::function::FunctionParameters>,
393        cs: &capstone::Capstone,
394    ) -> Result<String, anyhow::Error> {
395        use std::fmt::Write;
396
397        let mut buf = String::new();
398
399        let relocs = self.buffer.relocs();
400        let traps = self.buffer.traps();
401
402        // Normalize the block starts to include an initial block of offset 0.
403        let mut block_starts = Vec::new();
404        if self.bb_starts.first().copied() != Some(0) {
405            block_starts.push(0);
406        }
407        block_starts.extend_from_slice(&self.bb_starts);
408        block_starts.push(self.buffer.data().len() as u32);
409
410        // Iterate over block regions, to ensure that we always produce block labels
411        for (n, (&start, &end)) in block_starts
412            .iter()
413            .zip(block_starts.iter().skip(1))
414            .enumerate()
415        {
416            writeln!(buf, "block{n}: ; offset 0x{start:x}")?;
417
418            let buffer = &self.buffer.data()[start as usize..end as usize];
419            let insns = cs.disasm_all(buffer, start as u64).map_err(map_caperr)?;
420            for i in insns.iter() {
421                write!(buf, "  ")?;
422
423                let op_str = i.op_str().unwrap_or("");
424                if let Some(s) = i.mnemonic() {
425                    write!(buf, "{s}")?;
426                    if !op_str.is_empty() {
427                        write!(buf, " ")?;
428                    }
429                }
430
431                write!(buf, "{op_str}")?;
432
433                let end = i.address() + i.bytes().len() as u64;
434                let contains = |off| i.address() <= off && off < end;
435
436                for reloc in relocs.iter().filter(|reloc| contains(reloc.offset as u64)) {
437                    write!(
438                        buf,
439                        " ; reloc_external {} {} {}",
440                        reloc.kind,
441                        reloc.target.display(params),
442                        reloc.addend,
443                    )?;
444                }
445
446                if let Some(trap) = traps.iter().find(|trap| contains(trap.offset as u64)) {
447                    write!(buf, " ; trap: {}", trap.code)?;
448                }
449
450                writeln!(buf)?;
451            }
452        }
453
454        return Ok(buf);
455
456        fn map_caperr(err: capstone::Error) -> anyhow::Error {
457            anyhow::format_err!("{}", err)
458        }
459    }
460}
461
462/// Result of compiling a `FunctionStencil`, before applying `FunctionParameters` onto it.
463///
464/// Only used internally, in a transient manner, for the incremental compilation cache.
465pub type CompiledCodeStencil = CompiledCodeBase<Stencil>;
466
467/// `CompiledCode` in its final form (i.e. after `FunctionParameters` have been applied), ready for
468/// consumption.
469pub type CompiledCode = CompiledCodeBase<Final>;
470
471impl CompiledCode {
472    /// If available, return information about the code layout in the
473    /// final machine code: the offsets (in bytes) of each basic-block
474    /// start, and all basic-block edges.
475    pub fn get_code_bb_layout(&self) -> (Vec<usize>, Vec<(usize, usize)>) {
476        (
477            self.bb_starts.iter().map(|&off| off as usize).collect(),
478            self.bb_edges
479                .iter()
480                .map(|&(from, to)| (from as usize, to as usize))
481                .collect(),
482        )
483    }
484
485    /// Creates unwind information for the function.
486    ///
487    /// Returns `None` if the function has no unwind information.
488    #[cfg(feature = "unwind")]
489    pub fn create_unwind_info(
490        &self,
491        isa: &dyn crate::isa::TargetIsa,
492    ) -> CodegenResult<Option<crate::isa::unwind::UnwindInfo>> {
493        use crate::isa::unwind::UnwindInfoKind;
494        let unwind_info_kind = match isa.triple().operating_system {
495            target_lexicon::OperatingSystem::Windows => UnwindInfoKind::Windows,
496            _ => UnwindInfoKind::SystemV,
497        };
498        self.create_unwind_info_of_kind(isa, unwind_info_kind)
499    }
500
501    /// Creates unwind information for the function using the supplied
502    /// "kind". Supports cross-OS (but not cross-arch) generation.
503    ///
504    /// Returns `None` if the function has no unwind information.
505    #[cfg(feature = "unwind")]
506    pub fn create_unwind_info_of_kind(
507        &self,
508        isa: &dyn crate::isa::TargetIsa,
509        unwind_info_kind: crate::isa::unwind::UnwindInfoKind,
510    ) -> CodegenResult<Option<crate::isa::unwind::UnwindInfo>> {
511        isa.emit_unwind_info(self, unwind_info_kind)
512    }
513}
514
515/// An object that can be used to create the text section of an executable.
516///
517/// This primarily handles resolving relative relocations at
518/// text-section-assembly time rather than at load/link time. This
519/// architecture-specific logic is sort of like a linker, but only for one
520/// object file at a time.
521pub trait TextSectionBuilder {
522    /// Appends `data` to the text section with the `align` specified.
523    ///
524    /// If `labeled` is `true` then this also binds the appended data to the
525    /// `n`th label for how many times this has been called with `labeled:
526    /// true`. The label target can be passed as the `target` argument to
527    /// `resolve_reloc`.
528    ///
529    /// This function returns the offset at which the data was placed in the
530    /// text section.
531    fn append(
532        &mut self,
533        labeled: bool,
534        data: &[u8],
535        align: u32,
536        ctrl_plane: &mut ControlPlane,
537    ) -> u64;
538
539    /// Attempts to resolve a relocation for this function.
540    ///
541    /// The `offset` is the offset of the relocation, within the text section.
542    /// The `reloc` is the kind of relocation.
543    /// The `addend` is the value to add to the relocation.
544    /// The `target` is the labeled function that is the target of this
545    /// relocation.
546    ///
547    /// Labeled functions are created with the `append` function above by
548    /// setting the `labeled` parameter to `true`.
549    ///
550    /// If this builder does not know how to handle `reloc` then this function
551    /// will return `false`. Otherwise this function will return `true` and this
552    /// relocation will be resolved in the final bytes returned by `finish`.
553    fn resolve_reloc(&mut self, offset: u64, reloc: Reloc, addend: Addend, target: usize) -> bool;
554
555    /// A debug-only option which is used to for
556    fn force_veneers(&mut self);
557
558    /// Write the `data` provided at `offset`, for example when resolving a
559    /// relocation.
560    fn write(&mut self, offset: u64, data: &[u8]);
561
562    /// Completes this text section, filling out any final details, and returns
563    /// the bytes of the text section.
564    fn finish(&mut self, ctrl_plane: &mut ControlPlane) -> Vec<u8>;
565}