cranelift_codegen/machinst/mod.rs
1//! This module exposes the machine-specific backend definition pieces.
2//!
3//! The MachInst infrastructure is the compiler backend, from CLIF
4//! (ir::Function) to machine code. The purpose of this infrastructure is, at a
5//! high level, to do instruction selection/lowering (to machine instructions),
6//! register allocation, and then perform all the fixups to branches, constant
7//! data references, etc., needed to actually generate machine code.
8//!
9//! The container for machine instructions, at various stages of construction,
10//! is the `VCode` struct. We refer to a sequence of machine instructions organized
11//! into basic blocks as "vcode". This is short for "virtual-register code".
12//!
13//! The compilation pipeline, from an `ir::Function` (already optimized as much as
14//! you like by machine-independent optimization passes) onward, is as follows.
15//!
16//! ```plain
17//!
18//! ir::Function (SSA IR, machine-independent opcodes)
19//! |
20//! | [lower]
21//! |
22//! VCode<arch_backend::Inst> (machine instructions:
23//! | - mostly virtual registers.
24//! | - cond branches in two-target form.
25//! | - branch targets are block indices.
26//! | - in-memory constants held by insns,
27//! | with unknown offsets.
28//! | - critical edges (actually all edges)
29//! | are split.)
30//! |
31//! | [regalloc --> `regalloc2::Output`; VCode is unchanged]
32//! |
33//! | [binary emission via MachBuffer]
34//! |
35//! Vec<u8> (machine code:
36//! | - two-dest branches resolved via
37//! | streaming branch resolution/simplification.
38//! | - regalloc `Allocation` results used directly
39//! | by instruction emission code.
40//! | - prologue and epilogue(s) built and emitted
41//! | directly during emission.
42//! | - SP-relative offsets resolved by tracking
43//! | EmitState.)
44//!
45//! ```
46
47use crate::binemit::{Addend, CodeInfo, CodeOffset, Reloc};
48use crate::ir::{
49 self, DynamicStackSlot, RelSourceLoc, StackSlot, Type, function::FunctionParameters,
50};
51use crate::isa::FunctionAlignment;
52use crate::result::CodegenResult;
53use crate::settings;
54use crate::settings::Flags;
55use crate::value_label::ValueLabelsRanges;
56use alloc::string::String;
57use alloc::vec::Vec;
58use core::fmt::Debug;
59use cranelift_control::ControlPlane;
60use cranelift_entity::PrimaryMap;
61use regalloc2::VReg;
62use smallvec::{SmallVec, smallvec};
63
64#[cfg(feature = "enable-serde")]
65use serde_derive::{Deserialize, Serialize};
66
67#[macro_use]
68pub mod isle;
69
70pub mod lower;
71pub use lower::*;
72pub mod vcode;
73pub use vcode::*;
74pub mod compile;
75pub use compile::*;
76pub mod blockorder;
77pub use blockorder::*;
78pub mod abi;
79pub use abi::*;
80pub mod buffer;
81pub use buffer::*;
82pub mod helpers;
83pub use helpers::*;
84pub mod valueregs;
85pub use reg::*;
86pub use valueregs::*;
87pub mod reg;
88
89/// A machine instruction.
90pub trait MachInst: Clone + Debug {
91 /// The ABI machine spec for this `MachInst`.
92 type ABIMachineSpec: ABIMachineSpec<I = Self>;
93
94 /// Return the registers referenced by this machine instruction along with
95 /// the modes of reference (use, def, modify).
96 fn get_operands(&mut self, collector: &mut impl OperandVisitor);
97
98 /// If this is a simple move, return the (source, destination) tuple of registers.
99 fn is_move(&self) -> Option<(Writable<Reg>, Reg)>;
100
101 /// Is this a terminator (branch or ret)? If so, return its type
102 /// (ret/uncond/cond) and target if applicable.
103 fn is_term(&self) -> MachTerminator;
104
105 /// Is this an unconditional trap?
106 fn is_trap(&self) -> bool;
107
108 /// Is this an "args" pseudoinst?
109 fn is_args(&self) -> bool;
110
111 /// Classify the type of call instruction this is.
112 ///
113 /// This enables more granular function type analysis and optimization.
114 /// Returns `CallType::None` for non-call instructions, `CallType::Regular`
115 /// for normal calls that return to the caller, and `CallType::TailCall`
116 /// for tail calls that don't return to the caller.
117 fn call_type(&self) -> CallType;
118
119 /// Should this instruction's clobber-list be included in the
120 /// clobber-set?
121 fn is_included_in_clobbers(&self) -> bool;
122
123 /// Does this instruction access memory?
124 fn is_mem_access(&self) -> bool;
125
126 /// Generate a move.
127 fn gen_move(to_reg: Writable<Reg>, from_reg: Reg, ty: Type) -> Self;
128
129 /// Generate a dummy instruction that will keep a value alive but
130 /// has no other purpose.
131 fn gen_dummy_use(reg: Reg) -> Self;
132
133 /// Determine register class(es) to store the given Cranelift type, and the
134 /// Cranelift type actually stored in the underlying register(s). May return
135 /// an error if the type isn't supported by this backend.
136 ///
137 /// If the type requires multiple registers, then the list of registers is
138 /// returned in little-endian order.
139 ///
140 /// Note that the type actually stored in the register(s) may differ in the
141 /// case that a value is split across registers: for example, on a 32-bit
142 /// target, an I64 may be stored in two registers, each of which holds an
143 /// I32. The actually-stored types are used only to inform the backend when
144 /// generating spills and reloads for individual registers.
145 fn rc_for_type(ty: Type) -> CodegenResult<(&'static [RegClass], &'static [Type])>;
146
147 /// Get an appropriate type that can fully hold a value in a given
148 /// register class. This may not be the only type that maps to
149 /// that class, but when used with `gen_move()` or the ABI trait's
150 /// load/spill constructors, it should produce instruction(s) that
151 /// move the entire register contents.
152 fn canonical_type_for_rc(rc: RegClass) -> Type;
153
154 /// Generate a jump to another target. Used during lowering of
155 /// control flow.
156 fn gen_jump(target: MachLabel) -> Self;
157
158 /// Generate a store of an immediate 64-bit integer to a register. Used by
159 /// the control plane to generate random instructions.
160 fn gen_imm_u64(_value: u64, _dst: Writable<Reg>) -> Option<Self> {
161 None
162 }
163
164 /// Generate a store of an immediate 64-bit integer to a register. Used by
165 /// the control plane to generate random instructions. The tmp register may
166 /// be used by architectures which don't support writing immediate values to
167 /// floating point registers directly.
168 fn gen_imm_f64(_value: f64, _tmp: Writable<Reg>, _dst: Writable<Reg>) -> SmallVec<[Self; 2]> {
169 SmallVec::new()
170 }
171
172 /// Generate a NOP. The `preferred_size` parameter allows the caller to
173 /// request a NOP of that size, or as close to it as possible. The machine
174 /// backend may return a NOP whose binary encoding is smaller than the
175 /// preferred size, but must not return a NOP that is larger. However,
176 /// the instruction must have a nonzero size if preferred_size is nonzero.
177 fn gen_nop(preferred_size: usize) -> Self;
178
179 /// The various kinds of NOP, with size, sorted in ascending-size
180 /// order.
181 fn gen_nop_units() -> Vec<Vec<u8>>;
182
183 /// Align a basic block offset (from start of function). By default, no
184 /// alignment occurs.
185 fn align_basic_block(offset: CodeOffset) -> CodeOffset {
186 offset
187 }
188
189 /// What is the worst-case instruction size emitted by this instruction type?
190 fn worst_case_size() -> CodeOffset;
191
192 /// What is the register class used for reference types (GC-observable pointers)? Can
193 /// be dependent on compilation flags.
194 fn ref_type_regclass(_flags: &Flags) -> RegClass;
195
196 /// Is this a safepoint?
197 fn is_safepoint(&self) -> bool;
198
199 /// Generate an instruction that must appear at the beginning of a basic
200 /// block, if any. Note that the return value must not be subject to
201 /// register allocation.
202 fn gen_block_start(
203 _is_indirect_branch_target: bool,
204 _is_forward_edge_cfi_enabled: bool,
205 ) -> Option<Self> {
206 None
207 }
208
209 /// Returns a description of the alignment required for functions for this
210 /// architecture.
211 fn function_alignment() -> FunctionAlignment;
212
213 /// Is this a low-level, one-way branch, not meant for use in a
214 /// VCode body? These instructions are meant to be used only when
215 /// directly emitted, i.e. when `MachInst` is used as an assembler
216 /// library.
217 fn is_low_level_branch(&self) -> bool {
218 false
219 }
220
221 /// A label-use kind: a type that describes the types of label references that
222 /// can occur in an instruction.
223 type LabelUse: MachInstLabelUse;
224
225 /// Byte representation of a trap opcode which is inserted by `MachBuffer`
226 /// during its `defer_trap` method.
227 const TRAP_OPCODE: &'static [u8];
228}
229
230/// A descriptor of a label reference (use) in an instruction set.
231pub trait MachInstLabelUse: Clone + Copy + Debug + Eq {
232 /// Required alignment for any veneer. Usually the required instruction
233 /// alignment (e.g., 4 for a RISC with 32-bit instructions, or 1 for x86).
234 const ALIGN: CodeOffset;
235
236 /// What is the maximum PC-relative range (positive)? E.g., if `1024`, a
237 /// label-reference fixup at offset `x` is valid if the label resolves to `x
238 /// + 1024`.
239 fn max_pos_range(self) -> CodeOffset;
240 /// What is the maximum PC-relative range (negative)? This is the absolute
241 /// value; i.e., if `1024`, then a label-reference fixup at offset `x` is
242 /// valid if the label resolves to `x - 1024`.
243 fn max_neg_range(self) -> CodeOffset;
244 /// What is the size of code-buffer slice this label-use needs to patch in
245 /// the label's value?
246 fn patch_size(self) -> CodeOffset;
247 /// Perform a code-patch, given the offset into the buffer of this label use
248 /// and the offset into the buffer of the label's definition.
249 /// It is guaranteed that, given `delta = offset - label_offset`, we will
250 /// have `offset >= -self.max_neg_range()` and `offset <=
251 /// self.max_pos_range()`.
252 fn patch(self, buffer: &mut [u8], use_offset: CodeOffset, label_offset: CodeOffset);
253 /// Can the label-use be patched to a veneer that supports a longer range?
254 /// Usually valid for jumps (a short-range jump can jump to a longer-range
255 /// jump), but not for e.g. constant pool references, because the constant
256 /// load would require different code (one more level of indirection).
257 fn supports_veneer(self) -> bool;
258 /// How many bytes are needed for a veneer?
259 fn veneer_size(self) -> CodeOffset;
260 /// What's the largest possible veneer that may be generated?
261 fn worst_case_veneer_size() -> CodeOffset;
262 /// Generate a veneer. The given code-buffer slice is `self.veneer_size()`
263 /// bytes long at offset `veneer_offset` in the buffer. The original
264 /// label-use will be patched to refer to this veneer's offset. A new
265 /// (offset, LabelUse) is returned that allows the veneer to use the actual
266 /// label. For veneers to work properly, it is expected that the new veneer
267 /// has a larger range; on most platforms this probably means either a
268 /// "long-range jump" (e.g., on ARM, the 26-bit form), or if already at that
269 /// stage, a jump that supports a full 32-bit range, for example.
270 fn generate_veneer(self, buffer: &mut [u8], veneer_offset: CodeOffset) -> (CodeOffset, Self);
271
272 /// Returns the corresponding label-use for the relocation specified.
273 ///
274 /// This returns `None` if the relocation doesn't have a corresponding
275 /// representation for the target architecture.
276 fn from_reloc(reloc: Reloc, addend: Addend) -> Option<Self>;
277}
278
279/// Classification of call instruction types for granular analysis.
280#[derive(Clone, Copy, Debug, PartialEq, Eq)]
281pub enum CallType {
282 /// Not a call instruction.
283 None,
284 /// Regular call that returns to the caller.
285 Regular,
286 /// Tail call that doesn't return to the caller.
287 TailCall,
288}
289
290/// Function classification based on call patterns.
291///
292/// This enum classifies functions based on their calling behavior to enable
293/// targeted optimizations. Functions are categorized as:
294/// - `None`: No calls at all (can use simplified calling conventions)
295/// - `TailOnly`: Only tail calls (may skip frame setup in some cases)
296/// - `Regular`: Has regular calls (requires full calling convention support)
297#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
298pub enum FunctionCalls {
299 /// Function makes no calls at all.
300 #[default]
301 None,
302 /// Function only makes tail calls (no regular calls).
303 TailOnly,
304 /// Function makes at least one regular call (may also have tail calls).
305 Regular,
306}
307
308impl FunctionCalls {
309 /// Update the function classification based on a new call instruction.
310 ///
311 /// This method implements the merge logic for accumulating call patterns:
312 /// - Any regular call makes the function Regular
313 /// - Tail calls upgrade None to TailOnly
314 /// - Regular always stays Regular
315 pub fn update(&mut self, call_type: CallType) {
316 *self = match (*self, call_type) {
317 // No call instruction - state unchanged
318 (current, CallType::None) => current,
319 // Regular call always results in Regular classification
320 (_, CallType::Regular) => FunctionCalls::Regular,
321 // Tail call: None becomes TailOnly, others unchanged
322 (FunctionCalls::None, CallType::TailCall) => FunctionCalls::TailOnly,
323 (current, CallType::TailCall) => current,
324 };
325 }
326}
327
328/// Describes a block terminator (not call) in the VCode.
329///
330/// Actual targets are not included: the single-source-of-truth for
331/// those is the VCode itself, which holds, for each block, successors
332/// and outgoing branch args per successor.
333#[derive(Clone, Debug, PartialEq, Eq)]
334pub enum MachTerminator {
335 /// Not a terminator.
336 None,
337 /// A return instruction.
338 Ret,
339 /// A tail call.
340 RetCall,
341 /// A branch.
342 Branch,
343}
344
345/// A trait describing the ability to encode a MachInst into binary machine code.
346pub trait MachInstEmit: MachInst {
347 /// Persistent state carried across `emit` invocations.
348 type State: MachInstEmitState<Self>;
349
350 /// Constant information used in `emit` invocations.
351 type Info;
352
353 /// Emit the instruction.
354 fn emit(&self, code: &mut MachBuffer<Self>, info: &Self::Info, state: &mut Self::State);
355
356 /// Pretty-print the instruction.
357 fn pretty_print_inst(&self, state: &mut Self::State) -> String;
358}
359
360/// A trait describing the emission state carried between MachInsts when
361/// emitting a function body.
362pub trait MachInstEmitState<I: VCodeInst>: Default + Clone + Debug {
363 /// Create a new emission state given the ABI object.
364 fn new(abi: &Callee<I::ABIMachineSpec>, ctrl_plane: ControlPlane) -> Self;
365
366 /// Update the emission state before emitting an instruction that is a
367 /// safepoint.
368 fn pre_safepoint(&mut self, user_stack_map: Option<ir::UserStackMap>);
369
370 /// The emission state holds ownership of a control plane, so it doesn't
371 /// have to be passed around explicitly too much. `ctrl_plane_mut` may
372 /// be used if temporary access to the control plane is needed by some
373 /// other function that doesn't have access to the emission state.
374 fn ctrl_plane_mut(&mut self) -> &mut ControlPlane;
375
376 /// Used to continue using a control plane after the emission state is
377 /// not needed anymore.
378 fn take_ctrl_plane(self) -> ControlPlane;
379
380 /// A hook that triggers when first emitting a new block.
381 /// It is guaranteed to be called before any instructions are emitted.
382 fn on_new_block(&mut self) {}
383
384 /// The [`FrameLayout`] for the function currently being compiled.
385 fn frame_layout(&self) -> &FrameLayout;
386}
387
388/// The result of a `MachBackend::compile_function()` call. Contains machine
389/// code (as bytes) and a disassembly, if requested.
390#[derive(PartialEq, Debug, Clone)]
391#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
392pub struct CompiledCodeBase<T: CompilePhase> {
393 /// Machine code.
394 pub buffer: MachBufferFinalized<T>,
395 /// Disassembly, if requested.
396 pub vcode: Option<String>,
397 /// Debug info: value labels to registers/stackslots at code offsets.
398 pub value_labels_ranges: ValueLabelsRanges,
399 /// Basic-block layout info: block start offsets.
400 ///
401 /// This info is generated only if the `machine_code_cfg_info`
402 /// flag is set.
403 pub bb_starts: Vec<CodeOffset>,
404 /// Basic-block layout info: block edges. Each edge is `(from,
405 /// to)`, where `from` and `to` are basic-block start offsets of
406 /// the respective blocks.
407 ///
408 /// This info is generated only if the `machine_code_cfg_info`
409 /// flag is set.
410 pub bb_edges: Vec<(CodeOffset, CodeOffset)>,
411}
412
413impl CompiledCodeStencil {
414 /// Apply function parameters to finalize a stencil into its final form.
415 pub fn apply_params(self, params: &FunctionParameters) -> CompiledCode {
416 CompiledCode {
417 buffer: self.buffer.apply_base_srcloc(params.base_srcloc()),
418 vcode: self.vcode,
419 value_labels_ranges: self.value_labels_ranges,
420 bb_starts: self.bb_starts,
421 bb_edges: self.bb_edges,
422 }
423 }
424}
425
426impl<T: CompilePhase> CompiledCodeBase<T> {
427 /// Get a `CodeInfo` describing section sizes from this compilation result.
428 pub fn code_info(&self) -> CodeInfo {
429 CodeInfo {
430 total_size: self.buffer.total_size(),
431 }
432 }
433
434 /// Returns a reference to the machine code generated for this function compilation.
435 pub fn code_buffer(&self) -> &[u8] {
436 self.buffer.data()
437 }
438
439 /// Get the disassembly of the buffer, using the given capstone context.
440 #[cfg(feature = "disas")]
441 pub fn disassemble(
442 &self,
443 params: Option<&crate::ir::function::FunctionParameters>,
444 cs: &capstone::Capstone,
445 ) -> Result<String, anyhow::Error> {
446 use core::fmt::Write;
447
448 let mut buf = String::new();
449
450 let relocs = self.buffer.relocs();
451 let traps = self.buffer.traps();
452 let mut patchables = self.buffer.patchable_call_sites().peekable();
453
454 // Normalize the block starts to include an initial block of offset 0.
455 let mut block_starts = Vec::new();
456 if self.bb_starts.first().copied() != Some(0) {
457 block_starts.push(0);
458 }
459 block_starts.extend_from_slice(&self.bb_starts);
460 block_starts.push(self.buffer.data().len() as u32);
461
462 // Iterate over block regions, to ensure that we always produce block labels
463 for (n, (&start, &end)) in block_starts
464 .iter()
465 .zip(block_starts.iter().skip(1))
466 .enumerate()
467 {
468 writeln!(buf, "block{n}: ; offset 0x{start:x}")?;
469
470 let buffer = &self.buffer.data()[start as usize..end as usize];
471 let insns = cs.disasm_all(buffer, start as u64).map_err(map_caperr)?;
472 for i in insns.iter() {
473 write!(buf, " ")?;
474
475 let op_str = i.op_str().unwrap_or("");
476 if let Some(s) = i.mnemonic() {
477 write!(buf, "{s}")?;
478 if !op_str.is_empty() {
479 write!(buf, " ")?;
480 }
481 }
482
483 write!(buf, "{op_str}")?;
484
485 let end = i.address() + i.bytes().len() as u64;
486 let contains = |off| i.address() <= off && off < end;
487
488 for reloc in relocs.iter().filter(|reloc| contains(reloc.offset as u64)) {
489 write!(
490 buf,
491 " ; reloc_external {} {} {}",
492 reloc.kind,
493 reloc.target.display(params),
494 reloc.addend,
495 )?;
496 }
497
498 if let Some(trap) = traps.iter().find(|trap| contains(trap.offset as u64)) {
499 write!(buf, " ; trap: {}", trap.code)?;
500 }
501
502 if let Some(patchable) = patchables.peek()
503 && patchable.ret_addr == end as u32
504 {
505 write!(
506 buf,
507 " ; patchable call: NOP out last {} bytes",
508 patchable.len
509 )?;
510 patchables.next();
511 }
512
513 writeln!(buf)?;
514 }
515 }
516
517 return Ok(buf);
518
519 fn map_caperr(err: capstone::Error) -> anyhow::Error {
520 anyhow::format_err!("{err}")
521 }
522 }
523}
524
525/// Result of compiling a `FunctionStencil`, before applying `FunctionParameters` onto it.
526///
527/// Only used internally, in a transient manner, for the incremental compilation cache.
528pub type CompiledCodeStencil = CompiledCodeBase<Stencil>;
529
530/// `CompiledCode` in its final form (i.e. after `FunctionParameters` have been applied), ready for
531/// consumption.
532pub type CompiledCode = CompiledCodeBase<Final>;
533
534impl CompiledCode {
535 /// If available, return information about the code layout in the
536 /// final machine code: the offsets (in bytes) of each basic-block
537 /// start, and all basic-block edges.
538 pub fn get_code_bb_layout(&self) -> (Vec<usize>, Vec<(usize, usize)>) {
539 (
540 self.bb_starts.iter().map(|&off| off as usize).collect(),
541 self.bb_edges
542 .iter()
543 .map(|&(from, to)| (from as usize, to as usize))
544 .collect(),
545 )
546 }
547
548 /// Creates unwind information for the function.
549 ///
550 /// Returns `None` if the function has no unwind information.
551 #[cfg(feature = "unwind")]
552 pub fn create_unwind_info(
553 &self,
554 isa: &dyn crate::isa::TargetIsa,
555 ) -> CodegenResult<Option<crate::isa::unwind::UnwindInfo>> {
556 use crate::isa::unwind::UnwindInfoKind;
557 let unwind_info_kind = match isa.triple().operating_system {
558 target_lexicon::OperatingSystem::Windows => UnwindInfoKind::Windows,
559 _ => UnwindInfoKind::SystemV,
560 };
561 self.create_unwind_info_of_kind(isa, unwind_info_kind)
562 }
563
564 /// Creates unwind information for the function using the supplied
565 /// "kind". Supports cross-OS (but not cross-arch) generation.
566 ///
567 /// Returns `None` if the function has no unwind information.
568 #[cfg(feature = "unwind")]
569 pub fn create_unwind_info_of_kind(
570 &self,
571 isa: &dyn crate::isa::TargetIsa,
572 unwind_info_kind: crate::isa::unwind::UnwindInfoKind,
573 ) -> CodegenResult<Option<crate::isa::unwind::UnwindInfo>> {
574 isa.emit_unwind_info(self, unwind_info_kind)
575 }
576}
577
578/// An object that can be used to create the text section of an executable.
579///
580/// This primarily handles resolving relative relocations at
581/// text-section-assembly time rather than at load/link time. This
582/// architecture-specific logic is sort of like a linker, but only for one
583/// object file at a time.
584pub trait TextSectionBuilder {
585 /// Appends `data` to the text section with the `align` specified.
586 ///
587 /// If `labeled` is `true` then this also binds the appended data to the
588 /// `n`th label for how many times this has been called with `labeled:
589 /// true`. The label target can be passed as the `target` argument to
590 /// `resolve_reloc`.
591 ///
592 /// This function returns the offset at which the data was placed in the
593 /// text section.
594 fn append(
595 &mut self,
596 labeled: bool,
597 data: &[u8],
598 align: u32,
599 ctrl_plane: &mut ControlPlane,
600 ) -> u64;
601
602 /// Attempts to resolve a relocation for this function.
603 ///
604 /// The `offset` is the offset of the relocation, within the text section.
605 /// The `reloc` is the kind of relocation.
606 /// The `addend` is the value to add to the relocation.
607 /// The `target` is the labeled function that is the target of this
608 /// relocation.
609 ///
610 /// Labeled functions are created with the `append` function above by
611 /// setting the `labeled` parameter to `true`.
612 ///
613 /// If this builder does not know how to handle `reloc` then this function
614 /// will return `false`. Otherwise this function will return `true` and this
615 /// relocation will be resolved in the final bytes returned by `finish`.
616 fn resolve_reloc(&mut self, offset: u64, reloc: Reloc, addend: Addend, target: usize) -> bool;
617
618 /// A debug-only option which is used to for
619 fn force_veneers(&mut self);
620
621 /// Write the `data` provided at `offset`, for example when resolving a
622 /// relocation.
623 fn write(&mut self, offset: u64, data: &[u8]);
624
625 /// Completes this text section, filling out any final details, and returns
626 /// the bytes of the text section.
627 fn finish(&mut self, ctrl_plane: &mut ControlPlane) -> Vec<u8>;
628}