cranelift_codegen/machinst/abi.rs
1//! Implementation of a vanilla ABI, shared between several machines. The
2//! implementation here assumes that arguments will be passed in registers
3//! first, then additional args on the stack; that the stack grows downward,
4//! contains a standard frame (return address and frame pointer), and the
5//! compiler is otherwise free to allocate space below that with its choice of
6//! layout; and that the machine has some notion of caller- and callee-save
7//! registers. Most modern machines, e.g. x86-64 and AArch64, should fit this
8//! mold and thus both of these backends use this shared implementation.
9//!
10//! See the documentation in specific machine backends for the "instantiation"
11//! of this generic ABI, i.e., which registers are caller/callee-save, arguments
12//! and return values, and any other special requirements.
13//!
14//! For now the implementation here assumes a 64-bit machine, but we intend to
15//! make this 32/64-bit-generic shortly.
16//!
17//! # Vanilla ABI
18//!
19//! First, arguments and return values are passed in registers up to a certain
20//! fixed count, after which they overflow onto the stack. Multiple return
21//! values either fit in registers, or are returned in a separate return-value
22//! area on the stack, given by a hidden extra parameter.
23//!
24//! Note that the exact stack layout is up to us. We settled on the
25//! below design based on several requirements. In particular, we need
26//! to be able to generate instructions (or instruction sequences) to
27//! access arguments, stack slots, and spill slots before we know how
28//! many spill slots or clobber-saves there will be, because of our
29//! pass structure. We also prefer positive offsets to negative
30//! offsets because of an asymmetry in some machines' addressing modes
31//! (e.g., on AArch64, positive offsets have a larger possible range
32//! without a long-form sequence to synthesize an arbitrary
33//! offset). We also need clobber-save registers to be "near" the
34//! frame pointer: Windows unwind information requires it to be within
35//! 240 bytes of RBP. Finally, it is not allowed to access memory
36//! below the current SP value.
37//!
38//! We assume that a prologue first pushes the frame pointer (and
39//! return address above that, if the machine does not do that in
40//! hardware). We set FP to point to this two-word frame record. We
41//! store all other frame slots below this two-word frame record, as
42//! well as enough space for arguments to the largest possible
43//! function call. The stack pointer then remains at this position
44//! for the duration of the function, allowing us to address all
45//! frame storage at positive offsets from SP.
46//!
47//! Note that if we ever support dynamic stack-space allocation (for
48//! `alloca`), we will need a way to reference spill slots and stack
49//! slots relative to a dynamic SP, because we will no longer be able
50//! to know a static offset from SP to the slots at any particular
51//! program point. Probably the best solution at that point will be to
52//! revert to using the frame pointer as the reference for all slots,
53//! to allow generating spill/reload and stackslot accesses before we
54//! know how large the clobber-saves will be.
55//!
56//! # Stack Layout
57//!
58//! The stack looks like:
59//!
60//! ```plain
61//! (high address)
62//! | ... |
63//! | caller frames |
64//! | ... |
65//! +===========================+
66//! | ... |
67//! | stack args |
68//! Canonical Frame Address --> | (accessed via FP) |
69//! +---------------------------+
70//! SP at function entry -----> | return address |
71//! +---------------------------+
72//! FP after prologue --------> | FP (pushed by prologue) |
73//! +---------------------------+ -----
74//! | ... | |
75//! | clobbered callee-saves | |
76//! unwind-frame base --------> | (pushed by prologue) | |
77//! +---------------------------+ ----- |
78//! | ... | | |
79//! | spill slots | | |
80//! | (accessed via SP) | fixed active
81//! | ... | frame size
82//! | stack slots | storage |
83//! | (accessed via SP) | size |
84//! | (alloc'd by prologue) | | |
85//! +---------------------------+ ----- |
86//! | [alignment as needed] | |
87//! | ... | |
88//! | args for largest call | |
89//! SP -----------------------> | (alloc'd by prologue) | |
90//! +===========================+ -----
91//!
92//! (low address)
93//! ```
94//!
95//! # Multi-value Returns
96//!
97//! We support multi-value returns by using multiple return-value
98//! registers. In some cases this is an extension of the base system
99//! ABI. See each platform's `abi.rs` implementation for details.
100
101use crate::entity::SecondaryMap;
102use crate::ir::types::*;
103use crate::ir::{ArgumentExtension, ArgumentPurpose, ExceptionTag, Signature};
104use crate::isa::TargetIsa;
105use crate::settings::ProbestackStrategy;
106use crate::CodegenError;
107use crate::{ir, isa};
108use crate::{machinst::*, trace};
109use alloc::boxed::Box;
110use cranelift_entity::packed_option::PackedOption;
111use regalloc2::{MachineEnv, PReg, PRegSet};
112use rustc_hash::FxHashMap;
113use smallvec::smallvec;
114use std::collections::HashMap;
115use std::marker::PhantomData;
116
117/// A small vector of instructions (with some reasonable size); appropriate for
118/// a small fixed sequence implementing one operation.
119pub type SmallInstVec<I> = SmallVec<[I; 4]>;
120
121/// A type used by backends to track argument-binding info in the "args"
122/// pseudoinst. The pseudoinst holds a vec of `ArgPair` structs.
123#[derive(Clone, Debug)]
124pub struct ArgPair {
125 /// The vreg that is defined by this args pseudoinst.
126 pub vreg: Writable<Reg>,
127 /// The preg that the arg arrives in; this constrains the vreg's
128 /// placement at the pseudoinst.
129 pub preg: Reg,
130}
131
132/// A type used by backends to track return register binding info in the "ret"
133/// pseudoinst. The pseudoinst holds a vec of `RetPair` structs.
134#[derive(Clone, Debug)]
135pub struct RetPair {
136 /// The vreg that is returned by this pseudionst.
137 pub vreg: Reg,
138 /// The preg that the arg is returned through; this constrains the vreg's
139 /// placement at the pseudoinst.
140 pub preg: Reg,
141}
142
143/// A location for (part of) an argument or return value. These "storage slots"
144/// are specified for each register-sized part of an argument.
145#[derive(Clone, Copy, Debug, PartialEq, Eq)]
146pub enum ABIArgSlot {
147 /// In a real register.
148 Reg {
149 /// Register that holds this arg.
150 reg: RealReg,
151 /// Value type of this arg.
152 ty: ir::Type,
153 /// Should this arg be zero- or sign-extended?
154 extension: ir::ArgumentExtension,
155 },
156 /// Arguments only: on stack, at given offset from SP at entry.
157 Stack {
158 /// Offset of this arg relative to the base of stack args.
159 offset: i64,
160 /// Value type of this arg.
161 ty: ir::Type,
162 /// Should this arg be zero- or sign-extended?
163 extension: ir::ArgumentExtension,
164 },
165}
166
167impl ABIArgSlot {
168 /// The type of the value that will be stored in this slot.
169 pub fn get_type(&self) -> ir::Type {
170 match self {
171 ABIArgSlot::Reg { ty, .. } => *ty,
172 ABIArgSlot::Stack { ty, .. } => *ty,
173 }
174 }
175}
176
177/// A vector of `ABIArgSlot`s. Inline capacity for one element because basically
178/// 100% of values use one slot. Only `i128`s need multiple slots, and they are
179/// super rare (and never happen with Wasm).
180pub type ABIArgSlotVec = SmallVec<[ABIArgSlot; 1]>;
181
182/// An ABIArg is composed of one or more parts. This allows for a CLIF-level
183/// Value to be passed with its parts in more than one location at the ABI
184/// level. For example, a 128-bit integer may be passed in two 64-bit registers,
185/// or even a 64-bit register and a 64-bit stack slot, on a 64-bit machine. The
186/// number of "parts" should correspond to the number of registers used to store
187/// this type according to the machine backend.
188///
189/// As an invariant, the `purpose` for every part must match. As a further
190/// invariant, a `StructArg` part cannot appear with any other part.
191#[derive(Clone, Debug)]
192pub enum ABIArg {
193 /// Storage slots (registers or stack locations) for each part of the
194 /// argument value. The number of slots must equal the number of register
195 /// parts used to store a value of this type.
196 Slots {
197 /// Slots, one per register part.
198 slots: ABIArgSlotVec,
199 /// Purpose of this arg.
200 purpose: ir::ArgumentPurpose,
201 },
202 /// Structure argument. We reserve stack space for it, but the CLIF-level
203 /// semantics are a little weird: the value passed to the call instruction,
204 /// and received in the corresponding block param, is a *pointer*. On the
205 /// caller side, we memcpy the data from the passed-in pointer to the stack
206 /// area; on the callee side, we compute a pointer to this stack area and
207 /// provide that as the argument's value.
208 StructArg {
209 /// Offset of this arg relative to base of stack args.
210 offset: i64,
211 /// Size of this arg on the stack.
212 size: u64,
213 /// Purpose of this arg.
214 purpose: ir::ArgumentPurpose,
215 },
216 /// Implicit argument. Similar to a StructArg, except that we have the
217 /// target type, not a pointer type, at the CLIF-level. This argument is
218 /// still being passed via reference implicitly.
219 ImplicitPtrArg {
220 /// Register or stack slot holding a pointer to the buffer.
221 pointer: ABIArgSlot,
222 /// Offset of the argument buffer.
223 offset: i64,
224 /// Type of the implicit argument.
225 ty: Type,
226 /// Purpose of this arg.
227 purpose: ir::ArgumentPurpose,
228 },
229}
230
231impl ABIArg {
232 /// Create an ABIArg from one register.
233 pub fn reg(
234 reg: RealReg,
235 ty: ir::Type,
236 extension: ir::ArgumentExtension,
237 purpose: ir::ArgumentPurpose,
238 ) -> ABIArg {
239 ABIArg::Slots {
240 slots: smallvec![ABIArgSlot::Reg { reg, ty, extension }],
241 purpose,
242 }
243 }
244
245 /// Create an ABIArg from one stack slot.
246 pub fn stack(
247 offset: i64,
248 ty: ir::Type,
249 extension: ir::ArgumentExtension,
250 purpose: ir::ArgumentPurpose,
251 ) -> ABIArg {
252 ABIArg::Slots {
253 slots: smallvec![ABIArgSlot::Stack {
254 offset,
255 ty,
256 extension,
257 }],
258 purpose,
259 }
260 }
261}
262
263/// Are we computing information about arguments or return values? Much of the
264/// handling is factored out into common routines; this enum allows us to
265/// distinguish which case we're handling.
266#[derive(Clone, Copy, Debug, PartialEq, Eq)]
267pub enum ArgsOrRets {
268 /// Arguments.
269 Args,
270 /// Return values.
271 Rets,
272}
273
274/// Abstract location for a machine-specific ABI impl to translate into the
275/// appropriate addressing mode.
276#[derive(Clone, Copy, Debug, PartialEq, Eq)]
277pub enum StackAMode {
278 /// Offset into the current frame's argument area.
279 IncomingArg(i64, u32),
280 /// Offset within the stack slots in the current frame.
281 Slot(i64),
282 /// Offset into the callee frame's argument area.
283 OutgoingArg(i64),
284}
285
286impl StackAMode {
287 fn offset_by(&self, offset: u32) -> Self {
288 match self {
289 StackAMode::IncomingArg(off, size) => {
290 StackAMode::IncomingArg(off.checked_add(i64::from(offset)).unwrap(), *size)
291 }
292 StackAMode::Slot(off) => StackAMode::Slot(off.checked_add(i64::from(offset)).unwrap()),
293 StackAMode::OutgoingArg(off) => {
294 StackAMode::OutgoingArg(off.checked_add(i64::from(offset)).unwrap())
295 }
296 }
297 }
298}
299
300/// Trait implemented by machine-specific backend to represent ISA flags.
301pub trait IsaFlags: Clone {
302 /// Get a flag indicating whether forward-edge CFI is enabled.
303 fn is_forward_edge_cfi_enabled(&self) -> bool {
304 false
305 }
306}
307
308/// Used as an out-parameter to accumulate a sequence of `ABIArg`s in
309/// `ABIMachineSpec::compute_arg_locs`. Wraps the shared allocation for all
310/// `ABIArg`s in `SigSet` and exposes just the args for the current
311/// `compute_arg_locs` call.
312pub struct ArgsAccumulator<'a> {
313 sig_set_abi_args: &'a mut Vec<ABIArg>,
314 start: usize,
315 non_formal_flag: bool,
316}
317
318impl<'a> ArgsAccumulator<'a> {
319 fn new(sig_set_abi_args: &'a mut Vec<ABIArg>) -> Self {
320 let start = sig_set_abi_args.len();
321 ArgsAccumulator {
322 sig_set_abi_args,
323 start,
324 non_formal_flag: false,
325 }
326 }
327
328 #[inline]
329 pub fn push(&mut self, arg: ABIArg) {
330 debug_assert!(!self.non_formal_flag);
331 self.sig_set_abi_args.push(arg)
332 }
333
334 #[inline]
335 pub fn push_non_formal(&mut self, arg: ABIArg) {
336 self.non_formal_flag = true;
337 self.sig_set_abi_args.push(arg)
338 }
339
340 #[inline]
341 pub fn args(&self) -> &[ABIArg] {
342 &self.sig_set_abi_args[self.start..]
343 }
344
345 #[inline]
346 pub fn args_mut(&mut self) -> &mut [ABIArg] {
347 &mut self.sig_set_abi_args[self.start..]
348 }
349}
350
351/// Trait implemented by machine-specific backend to provide information about
352/// register assignments and to allow generating the specific instructions for
353/// stack loads/saves, prologues/epilogues, etc.
354pub trait ABIMachineSpec {
355 /// The instruction type.
356 type I: VCodeInst;
357
358 /// The ISA flags type.
359 type F: IsaFlags;
360
361 /// This is the limit for the size of argument and return-value areas on the
362 /// stack. We place a reasonable limit here to avoid integer overflow issues
363 /// with 32-bit arithmetic.
364 const STACK_ARG_RET_SIZE_LIMIT: u32;
365
366 /// Returns the number of bits in a word, that is 32/64 for 32/64-bit architecture.
367 fn word_bits() -> u32;
368
369 /// Returns the number of bytes in a word.
370 fn word_bytes() -> u32 {
371 return Self::word_bits() / 8;
372 }
373
374 /// Returns word-size integer type.
375 fn word_type() -> Type {
376 match Self::word_bits() {
377 32 => I32,
378 64 => I64,
379 _ => unreachable!(),
380 }
381 }
382
383 /// Returns word register class.
384 fn word_reg_class() -> RegClass {
385 RegClass::Int
386 }
387
388 /// Returns required stack alignment in bytes.
389 fn stack_align(call_conv: isa::CallConv) -> u32;
390
391 /// Process a list of parameters or return values and allocate them to registers
392 /// and stack slots.
393 ///
394 /// The argument locations should be pushed onto the given `ArgsAccumulator`
395 /// in order. Any extra arguments added (such as return area pointers)
396 /// should come at the end of the list so that the first N lowered
397 /// parameters align with the N clif parameters.
398 ///
399 /// Returns the stack-space used (rounded up to as alignment requires), and
400 /// if `add_ret_area_ptr` was passed, the index of the extra synthetic arg
401 /// that was added.
402 fn compute_arg_locs(
403 call_conv: isa::CallConv,
404 flags: &settings::Flags,
405 params: &[ir::AbiParam],
406 args_or_rets: ArgsOrRets,
407 add_ret_area_ptr: bool,
408 args: ArgsAccumulator,
409 ) -> CodegenResult<(u32, Option<usize>)>;
410
411 /// Generate a load from the stack.
412 fn gen_load_stack(mem: StackAMode, into_reg: Writable<Reg>, ty: Type) -> Self::I;
413
414 /// Generate a store to the stack.
415 fn gen_store_stack(mem: StackAMode, from_reg: Reg, ty: Type) -> Self::I;
416
417 /// Generate a move.
418 fn gen_move(to_reg: Writable<Reg>, from_reg: Reg, ty: Type) -> Self::I;
419
420 /// Generate an integer-extend operation.
421 fn gen_extend(
422 to_reg: Writable<Reg>,
423 from_reg: Reg,
424 is_signed: bool,
425 from_bits: u8,
426 to_bits: u8,
427 ) -> Self::I;
428
429 /// Generate an "args" pseudo-instruction to capture input args in
430 /// registers.
431 fn gen_args(args: Vec<ArgPair>) -> Self::I;
432
433 /// Generate a "rets" pseudo-instruction that moves vregs to return
434 /// registers.
435 fn gen_rets(rets: Vec<RetPair>) -> Self::I;
436
437 /// Generate an add-with-immediate. Note that even if this uses a scratch
438 /// register, it must satisfy two requirements:
439 ///
440 /// - The add-imm sequence must only clobber caller-save registers that are
441 /// not used for arguments, because it will be placed in the prologue
442 /// before the clobbered callee-save registers are saved.
443 ///
444 /// - The add-imm sequence must work correctly when `from_reg` and/or
445 /// `into_reg` are the register returned by `get_stacklimit_reg()`.
446 fn gen_add_imm(
447 call_conv: isa::CallConv,
448 into_reg: Writable<Reg>,
449 from_reg: Reg,
450 imm: u32,
451 ) -> SmallInstVec<Self::I>;
452
453 /// Generate a sequence that traps with a `TrapCode::StackOverflow` code if
454 /// the stack pointer is less than the given limit register (assuming the
455 /// stack grows downward).
456 fn gen_stack_lower_bound_trap(limit_reg: Reg) -> SmallInstVec<Self::I>;
457
458 /// Generate an instruction to compute an address of a stack slot (FP- or
459 /// SP-based offset).
460 fn gen_get_stack_addr(mem: StackAMode, into_reg: Writable<Reg>) -> Self::I;
461
462 /// Get a fixed register to use to compute a stack limit. This is needed for
463 /// certain sequences generated after the register allocator has already
464 /// run. This must satisfy two requirements:
465 ///
466 /// - It must be a caller-save register that is not used for arguments,
467 /// because it will be clobbered in the prologue before the clobbered
468 /// callee-save registers are saved.
469 ///
470 /// - It must be safe to pass as an argument and/or destination to
471 /// `gen_add_imm()`. This is relevant when an addition with a large
472 /// immediate needs its own temporary; it cannot use the same fixed
473 /// temporary as this one.
474 fn get_stacklimit_reg(call_conv: isa::CallConv) -> Reg;
475
476 /// Generate a load to the given [base+offset] address.
477 fn gen_load_base_offset(into_reg: Writable<Reg>, base: Reg, offset: i32, ty: Type) -> Self::I;
478
479 /// Generate a store from the given [base+offset] address.
480 fn gen_store_base_offset(base: Reg, offset: i32, from_reg: Reg, ty: Type) -> Self::I;
481
482 /// Adjust the stack pointer up or down.
483 fn gen_sp_reg_adjust(amount: i32) -> SmallInstVec<Self::I>;
484
485 /// Compute a FrameLayout structure containing a sorted list of all clobbered
486 /// registers that are callee-saved according to the ABI, as well as the sizes
487 /// of all parts of the stack frame. The result is used to emit the prologue
488 /// and epilogue routines.
489 fn compute_frame_layout(
490 call_conv: isa::CallConv,
491 flags: &settings::Flags,
492 sig: &Signature,
493 regs: &[Writable<RealReg>],
494 is_leaf: bool,
495 incoming_args_size: u32,
496 tail_args_size: u32,
497 stackslots_size: u32,
498 fixed_frame_storage_size: u32,
499 outgoing_args_size: u32,
500 ) -> FrameLayout;
501
502 /// Generate the usual frame-setup sequence for this architecture: e.g.,
503 /// `push rbp / mov rbp, rsp` on x86-64, or `stp fp, lr, [sp, #-16]!` on
504 /// AArch64.
505 fn gen_prologue_frame_setup(
506 call_conv: isa::CallConv,
507 flags: &settings::Flags,
508 isa_flags: &Self::F,
509 frame_layout: &FrameLayout,
510 ) -> SmallInstVec<Self::I>;
511
512 /// Generate the usual frame-restore sequence for this architecture.
513 fn gen_epilogue_frame_restore(
514 call_conv: isa::CallConv,
515 flags: &settings::Flags,
516 isa_flags: &Self::F,
517 frame_layout: &FrameLayout,
518 ) -> SmallInstVec<Self::I>;
519
520 /// Generate a return instruction.
521 fn gen_return(
522 call_conv: isa::CallConv,
523 isa_flags: &Self::F,
524 frame_layout: &FrameLayout,
525 ) -> SmallInstVec<Self::I>;
526
527 /// Generate a probestack call.
528 fn gen_probestack(insts: &mut SmallInstVec<Self::I>, frame_size: u32);
529
530 /// Generate a inline stack probe.
531 fn gen_inline_probestack(
532 insts: &mut SmallInstVec<Self::I>,
533 call_conv: isa::CallConv,
534 frame_size: u32,
535 guard_size: u32,
536 );
537
538 /// Generate a clobber-save sequence. The implementation here should return
539 /// a sequence of instructions that "push" or otherwise save to the stack all
540 /// registers written/modified by the function body that are callee-saved.
541 /// The sequence of instructions should adjust the stack pointer downward,
542 /// and should align as necessary according to ABI requirements.
543 fn gen_clobber_save(
544 call_conv: isa::CallConv,
545 flags: &settings::Flags,
546 frame_layout: &FrameLayout,
547 ) -> SmallVec<[Self::I; 16]>;
548
549 /// Generate a clobber-restore sequence. This sequence should perform the
550 /// opposite of the clobber-save sequence generated above, assuming that SP
551 /// going into the sequence is at the same point that it was left when the
552 /// clobber-save sequence finished.
553 fn gen_clobber_restore(
554 call_conv: isa::CallConv,
555 flags: &settings::Flags,
556 frame_layout: &FrameLayout,
557 ) -> SmallVec<[Self::I; 16]>;
558
559 /// Generate a memcpy invocation. Used to set up struct
560 /// args. Takes `src`, `dst` as read-only inputs and passes a temporary
561 /// allocator.
562 fn gen_memcpy<F: FnMut(Type) -> Writable<Reg>>(
563 call_conv: isa::CallConv,
564 dst: Reg,
565 src: Reg,
566 size: usize,
567 alloc_tmp: F,
568 ) -> SmallVec<[Self::I; 8]>;
569
570 /// Get the number of spillslots required for the given register-class.
571 fn get_number_of_spillslots_for_value(
572 rc: RegClass,
573 target_vector_bytes: u32,
574 isa_flags: &Self::F,
575 ) -> u32;
576
577 /// Get the ABI-dependent MachineEnv for managing register allocation.
578 fn get_machine_env(flags: &settings::Flags, call_conv: isa::CallConv) -> &MachineEnv;
579
580 /// Get all caller-save registers, that is, registers that we expect
581 /// not to be saved across a call to a callee with the given ABI.
582 fn get_regs_clobbered_by_call(
583 call_conv_of_callee: isa::CallConv,
584 is_exception: bool,
585 ) -> PRegSet;
586
587 /// Get the needed extension mode, given the mode attached to the argument
588 /// in the signature and the calling convention. The input (the attribute in
589 /// the signature) specifies what extension type should be done *if* the ABI
590 /// requires extension to the full register; this method's return value
591 /// indicates whether the extension actually *will* be done.
592 fn get_ext_mode(
593 call_conv: isa::CallConv,
594 specified: ir::ArgumentExtension,
595 ) -> ir::ArgumentExtension;
596
597 /// Get a temporary register that is available to use after a call
598 /// completes and that does not interfere with register-carried
599 /// return values. This is used to move stack-carried return
600 /// values directly into spillslots if needed.
601 fn retval_temp_reg(call_conv_of_callee: isa::CallConv) -> Writable<Reg>;
602
603 /// Get the exception payload registers, if any, for a calling
604 /// convention.
605 fn exception_payload_regs(_call_conv: isa::CallConv) -> &'static [Reg] {
606 &[]
607 }
608}
609
610/// Out-of-line data for calls, to keep the size of `Inst` down.
611#[derive(Clone, Debug)]
612pub struct CallInfo<T> {
613 /// Receiver of this call
614 pub dest: T,
615 /// Register uses of this call.
616 pub uses: CallArgList,
617 /// Register defs of this call.
618 pub defs: CallRetList,
619 /// Registers clobbered by this call, as per its calling convention.
620 pub clobbers: PRegSet,
621 /// The calling convention of the callee.
622 pub callee_conv: isa::CallConv,
623 /// The calling convention of the caller.
624 pub caller_conv: isa::CallConv,
625 /// The number of bytes that the callee will pop from the stack for the
626 /// caller, if any. (Used for popping stack arguments with the `tail`
627 /// calling convention.)
628 pub callee_pop_size: u32,
629 /// Information for a try-call, if this is one. We combine
630 /// handling of calls and try-calls as much as possible to share
631 /// argument/return logic; they mostly differ in the metadata that
632 /// they emit, which this information feeds into.
633 pub try_call_info: Option<TryCallInfo>,
634}
635
636/// Out-of-line information present on `try_call` instructions only:
637/// information that is used to generate exception-handling tables and
638/// link up to destination blocks properly.
639#[derive(Clone, Debug)]
640pub struct TryCallInfo {
641 /// The target to jump to on a normal returhn.
642 pub continuation: MachLabel,
643 /// Exception tags to catch and corresponding destination labels.
644 pub exception_dests: Box<[(PackedOption<ExceptionTag>, MachLabel)]>,
645}
646
647impl<T> CallInfo<T> {
648 /// Creates an empty set of info with no clobbers/uses/etc with the
649 /// specified ABI
650 pub fn empty(dest: T, call_conv: isa::CallConv) -> CallInfo<T> {
651 CallInfo {
652 dest,
653 uses: smallvec![],
654 defs: smallvec![],
655 clobbers: PRegSet::empty(),
656 caller_conv: call_conv,
657 callee_conv: call_conv,
658 callee_pop_size: 0,
659 try_call_info: None,
660 }
661 }
662}
663
664/// The id of an ABI signature within the `SigSet`.
665#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
666pub struct Sig(u32);
667cranelift_entity::entity_impl!(Sig);
668
669impl Sig {
670 fn prev(self) -> Option<Sig> {
671 self.0.checked_sub(1).map(Sig)
672 }
673}
674
675/// ABI information shared between body (callee) and caller.
676#[derive(Clone, Debug)]
677pub struct SigData {
678 /// Currently both return values and arguments are stored in a continuous space vector
679 /// in `SigSet::abi_args`.
680 ///
681 /// ```plain
682 /// +----------------------------------------------+
683 /// | return values |
684 /// | ... |
685 /// rets_end --> +----------------------------------------------+
686 /// | arguments |
687 /// | ... |
688 /// args_end --> +----------------------------------------------+
689 ///
690 /// ```
691 ///
692 /// Note we only store two offsets as rets_end == args_start, and rets_start == prev.args_end.
693 ///
694 /// Argument location ending offset (regs or stack slots). Stack offsets are relative to
695 /// SP on entry to function.
696 ///
697 /// This is a index into the `SigSet::abi_args`.
698 args_end: u32,
699
700 /// Return-value location ending offset. Stack offsets are relative to the return-area
701 /// pointer.
702 ///
703 /// This is a index into the `SigSet::abi_args`.
704 rets_end: u32,
705
706 /// Space on stack used to store arguments. We're storing the size in u32 to
707 /// reduce the size of the struct.
708 sized_stack_arg_space: u32,
709
710 /// Space on stack used to store return values. We're storing the size in u32 to
711 /// reduce the size of the struct.
712 sized_stack_ret_space: u32,
713
714 /// Index in `args` of the stack-return-value-area argument.
715 stack_ret_arg: Option<u16>,
716
717 /// Calling convention used.
718 call_conv: isa::CallConv,
719}
720
721impl SigData {
722 /// Get total stack space required for arguments.
723 pub fn sized_stack_arg_space(&self) -> u32 {
724 self.sized_stack_arg_space
725 }
726
727 /// Get total stack space required for return values.
728 pub fn sized_stack_ret_space(&self) -> u32 {
729 self.sized_stack_ret_space
730 }
731
732 /// Get calling convention used.
733 pub fn call_conv(&self) -> isa::CallConv {
734 self.call_conv
735 }
736
737 /// The index of the stack-return-value-area argument, if any.
738 pub fn stack_ret_arg(&self) -> Option<u16> {
739 self.stack_ret_arg
740 }
741}
742
743/// A (mostly) deduplicated set of ABI signatures.
744///
745/// We say "mostly" because we do not dedupe between signatures interned via
746/// `ir::SigRef` (direct and indirect calls; the vast majority of signatures in
747/// this set) vs via `ir::Signature` (the callee itself and libcalls). Doing
748/// this final bit of deduplication would require filling out the
749/// `ir_signature_to_abi_sig`, which is a bunch of allocations (not just the
750/// hash map itself but params and returns vecs in each signature) that we want
751/// to avoid.
752///
753/// In general, prefer using the `ir::SigRef`-taking methods to the
754/// `ir::Signature`-taking methods when you can get away with it, as they don't
755/// require cloning non-copy types that will trigger heap allocations.
756///
757/// This type can be indexed by `Sig` to access its associated `SigData`.
758pub struct SigSet {
759 /// Interned `ir::Signature`s that we already have an ABI signature for.
760 ir_signature_to_abi_sig: FxHashMap<ir::Signature, Sig>,
761
762 /// Interned `ir::SigRef`s that we already have an ABI signature for.
763 ir_sig_ref_to_abi_sig: SecondaryMap<ir::SigRef, Option<Sig>>,
764
765 /// A single, shared allocation for all `ABIArg`s used by all
766 /// `SigData`s. Each `SigData` references its args/rets via indices into
767 /// this allocation.
768 abi_args: Vec<ABIArg>,
769
770 /// The actual ABI signatures, keyed by `Sig`.
771 sigs: PrimaryMap<Sig, SigData>,
772}
773
774impl SigSet {
775 /// Construct a new `SigSet`, interning all of the signatures used by the
776 /// given function.
777 pub fn new<M>(func: &ir::Function, flags: &settings::Flags) -> CodegenResult<Self>
778 where
779 M: ABIMachineSpec,
780 {
781 let arg_estimate = func.dfg.signatures.len() * 6;
782
783 let mut sigs = SigSet {
784 ir_signature_to_abi_sig: FxHashMap::default(),
785 ir_sig_ref_to_abi_sig: SecondaryMap::with_capacity(func.dfg.signatures.len()),
786 abi_args: Vec::with_capacity(arg_estimate),
787 sigs: PrimaryMap::with_capacity(1 + func.dfg.signatures.len()),
788 };
789
790 sigs.make_abi_sig_from_ir_signature::<M>(func.signature.clone(), flags)?;
791 for sig_ref in func.dfg.signatures.keys() {
792 sigs.make_abi_sig_from_ir_sig_ref::<M>(sig_ref, &func.dfg, flags)?;
793 }
794
795 Ok(sigs)
796 }
797
798 /// Have we already interned an ABI signature for the given `ir::Signature`?
799 pub fn have_abi_sig_for_signature(&self, signature: &ir::Signature) -> bool {
800 self.ir_signature_to_abi_sig.contains_key(signature)
801 }
802
803 /// Construct and intern an ABI signature for the given `ir::Signature`.
804 pub fn make_abi_sig_from_ir_signature<M>(
805 &mut self,
806 signature: ir::Signature,
807 flags: &settings::Flags,
808 ) -> CodegenResult<Sig>
809 where
810 M: ABIMachineSpec,
811 {
812 // Because the `HashMap` entry API requires taking ownership of the
813 // lookup key -- and we want to avoid unnecessary clones of
814 // `ir::Signature`s, even at the cost of duplicate lookups -- we can't
815 // have a single, get-or-create-style method for interning
816 // `ir::Signature`s into ABI signatures. So at least (debug) assert that
817 // we aren't creating duplicate ABI signatures for the same
818 // `ir::Signature`.
819 debug_assert!(!self.have_abi_sig_for_signature(&signature));
820
821 let sig_data = self.from_func_sig::<M>(&signature, flags)?;
822 let sig = self.sigs.push(sig_data);
823 self.ir_signature_to_abi_sig.insert(signature, sig);
824 Ok(sig)
825 }
826
827 fn make_abi_sig_from_ir_sig_ref<M>(
828 &mut self,
829 sig_ref: ir::SigRef,
830 dfg: &ir::DataFlowGraph,
831 flags: &settings::Flags,
832 ) -> CodegenResult<Sig>
833 where
834 M: ABIMachineSpec,
835 {
836 if let Some(sig) = self.ir_sig_ref_to_abi_sig[sig_ref] {
837 return Ok(sig);
838 }
839 let signature = &dfg.signatures[sig_ref];
840 let sig_data = self.from_func_sig::<M>(signature, flags)?;
841 let sig = self.sigs.push(sig_data);
842 self.ir_sig_ref_to_abi_sig[sig_ref] = Some(sig);
843 Ok(sig)
844 }
845
846 /// Get the already-interned ABI signature id for the given `ir::SigRef`.
847 pub fn abi_sig_for_sig_ref(&self, sig_ref: ir::SigRef) -> Sig {
848 self.ir_sig_ref_to_abi_sig[sig_ref]
849 .expect("must call `make_abi_sig_from_ir_sig_ref` before `get_abi_sig_for_sig_ref`")
850 }
851
852 /// Get the already-interned ABI signature id for the given `ir::Signature`.
853 pub fn abi_sig_for_signature(&self, signature: &ir::Signature) -> Sig {
854 self.ir_signature_to_abi_sig
855 .get(signature)
856 .copied()
857 .expect("must call `make_abi_sig_from_ir_signature` before `get_abi_sig_for_signature`")
858 }
859
860 pub fn from_func_sig<M: ABIMachineSpec>(
861 &mut self,
862 sig: &ir::Signature,
863 flags: &settings::Flags,
864 ) -> CodegenResult<SigData> {
865 // Keep in sync with ensure_struct_return_ptr_is_returned
866 if sig.uses_special_return(ArgumentPurpose::StructReturn) {
867 panic!("Explicit StructReturn return value not allowed: {sig:?}")
868 }
869 let tmp;
870 let returns = if let Some(struct_ret_index) =
871 sig.special_param_index(ArgumentPurpose::StructReturn)
872 {
873 if !sig.returns.is_empty() {
874 panic!("No return values are allowed when using StructReturn: {sig:?}");
875 }
876 tmp = [sig.params[struct_ret_index]];
877 &tmp
878 } else {
879 sig.returns.as_slice()
880 };
881
882 // Compute args and retvals from signature. Handle retvals first,
883 // because we may need to add a return-area arg to the args.
884
885 // NOTE: We rely on the order of the args (rets -> args) inserted to compute the offsets in
886 // `SigSet::args()` and `SigSet::rets()`. Therefore, we cannot change the two
887 // compute_arg_locs order.
888 let (sized_stack_ret_space, _) = M::compute_arg_locs(
889 sig.call_conv,
890 flags,
891 &returns,
892 ArgsOrRets::Rets,
893 /* extra ret-area ptr = */ false,
894 ArgsAccumulator::new(&mut self.abi_args),
895 )?;
896 if !flags.enable_multi_ret_implicit_sret() {
897 assert_eq!(sized_stack_ret_space, 0);
898 }
899 let rets_end = u32::try_from(self.abi_args.len()).unwrap();
900
901 // To avoid overflow issues, limit the return size to something reasonable.
902 if sized_stack_ret_space > M::STACK_ARG_RET_SIZE_LIMIT {
903 return Err(CodegenError::ImplLimitExceeded);
904 }
905
906 let need_stack_return_area = sized_stack_ret_space > 0;
907 if need_stack_return_area {
908 assert!(!sig.uses_special_param(ir::ArgumentPurpose::StructReturn));
909 }
910
911 let (sized_stack_arg_space, stack_ret_arg) = M::compute_arg_locs(
912 sig.call_conv,
913 flags,
914 &sig.params,
915 ArgsOrRets::Args,
916 need_stack_return_area,
917 ArgsAccumulator::new(&mut self.abi_args),
918 )?;
919 let args_end = u32::try_from(self.abi_args.len()).unwrap();
920
921 // To avoid overflow issues, limit the arg size to something reasonable.
922 if sized_stack_arg_space > M::STACK_ARG_RET_SIZE_LIMIT {
923 return Err(CodegenError::ImplLimitExceeded);
924 }
925
926 trace!(
927 "ABISig: sig {:?} => args end = {} rets end = {}
928 arg stack = {} ret stack = {} stack_ret_arg = {:?}",
929 sig,
930 args_end,
931 rets_end,
932 sized_stack_arg_space,
933 sized_stack_ret_space,
934 need_stack_return_area,
935 );
936
937 let stack_ret_arg = stack_ret_arg.map(|s| u16::try_from(s).unwrap());
938 Ok(SigData {
939 args_end,
940 rets_end,
941 sized_stack_arg_space,
942 sized_stack_ret_space,
943 stack_ret_arg,
944 call_conv: sig.call_conv,
945 })
946 }
947
948 /// Get this signature's ABI arguments.
949 pub fn args(&self, sig: Sig) -> &[ABIArg] {
950 let sig_data = &self.sigs[sig];
951 // Please see comments in `SigSet::from_func_sig` of how we store the offsets.
952 let start = usize::try_from(sig_data.rets_end).unwrap();
953 let end = usize::try_from(sig_data.args_end).unwrap();
954 &self.abi_args[start..end]
955 }
956
957 /// Get information specifying how to pass the implicit pointer
958 /// to the return-value area on the stack, if required.
959 pub fn get_ret_arg(&self, sig: Sig) -> Option<ABIArg> {
960 let sig_data = &self.sigs[sig];
961 if let Some(i) = sig_data.stack_ret_arg {
962 Some(self.args(sig)[usize::from(i)].clone())
963 } else {
964 None
965 }
966 }
967
968 /// Get information specifying how to pass one argument.
969 pub fn get_arg(&self, sig: Sig, idx: usize) -> ABIArg {
970 self.args(sig)[idx].clone()
971 }
972
973 /// Get this signature's ABI returns.
974 pub fn rets(&self, sig: Sig) -> &[ABIArg] {
975 let sig_data = &self.sigs[sig];
976 // Please see comments in `SigSet::from_func_sig` of how we store the offsets.
977 let start = usize::try_from(sig.prev().map_or(0, |prev| self.sigs[prev].args_end)).unwrap();
978 let end = usize::try_from(sig_data.rets_end).unwrap();
979 &self.abi_args[start..end]
980 }
981
982 /// Get information specifying how to pass one return value.
983 pub fn get_ret(&self, sig: Sig, idx: usize) -> ABIArg {
984 self.rets(sig)[idx].clone()
985 }
986
987 /// Get the number of arguments expected.
988 pub fn num_args(&self, sig: Sig) -> usize {
989 let len = self.args(sig).len();
990 if self.sigs[sig].stack_ret_arg.is_some() {
991 len - 1
992 } else {
993 len
994 }
995 }
996
997 /// Get the number of return values expected.
998 pub fn num_rets(&self, sig: Sig) -> usize {
999 self.rets(sig).len()
1000 }
1001}
1002
1003// NB: we do _not_ implement `IndexMut` because these signatures are
1004// deduplicated and shared!
1005impl std::ops::Index<Sig> for SigSet {
1006 type Output = SigData;
1007
1008 fn index(&self, sig: Sig) -> &Self::Output {
1009 &self.sigs[sig]
1010 }
1011}
1012
1013/// Structure describing the layout of a function's stack frame.
1014#[derive(Clone, Debug, Default)]
1015pub struct FrameLayout {
1016 /// N.B. The areas whose sizes are given in this structure fully
1017 /// cover the current function's stack frame, from high to low
1018 /// stack addresses in the sequence below. Each size contains
1019 /// any alignment padding that may be required by the ABI.
1020
1021 /// Size of incoming arguments on the stack. This is not technically
1022 /// part of this function's frame, but code in the function will still
1023 /// need to access it. Depending on the ABI, we may need to set up a
1024 /// frame pointer to do so; we also may need to pop this area from the
1025 /// stack upon return.
1026 pub incoming_args_size: u32,
1027
1028 /// The size of the incoming argument area, taking into account any
1029 /// potential increase in size required for tail calls present in the
1030 /// function. In the case that no tail calls are present, this value
1031 /// will be the same as [`Self::incoming_args_size`].
1032 pub tail_args_size: u32,
1033
1034 /// Size of the "setup area", typically holding the return address
1035 /// and/or the saved frame pointer. This may be written either during
1036 /// the call itself (e.g. a pushed return address) or by code emitted
1037 /// from gen_prologue_frame_setup. In any case, after that code has
1038 /// completed execution, the stack pointer is expected to point to the
1039 /// bottom of this area. The same holds at the start of code emitted
1040 /// by gen_epilogue_frame_restore.
1041 pub setup_area_size: u32,
1042
1043 /// Size of the area used to save callee-saved clobbered registers.
1044 /// This area is accessed by code emitted from gen_clobber_save and
1045 /// gen_clobber_restore.
1046 pub clobber_size: u32,
1047
1048 /// Storage allocated for the fixed part of the stack frame.
1049 /// This contains stack slots and spill slots.
1050 pub fixed_frame_storage_size: u32,
1051
1052 /// The size of all stackslots.
1053 pub stackslots_size: u32,
1054
1055 /// Stack size to be reserved for outgoing arguments, if used by
1056 /// the current ABI, or 0 otherwise. After gen_clobber_save and
1057 /// before gen_clobber_restore, the stack pointer points to the
1058 /// bottom of this area.
1059 pub outgoing_args_size: u32,
1060
1061 /// Sorted list of callee-saved registers that are clobbered
1062 /// according to the ABI. These registers will be saved and
1063 /// restored by gen_clobber_save and gen_clobber_restore.
1064 pub clobbered_callee_saves: Vec<Writable<RealReg>>,
1065}
1066
1067impl FrameLayout {
1068 /// Split the clobbered callee-save registers into integer-class and
1069 /// float-class groups.
1070 ///
1071 /// This method does not currently support vector-class callee-save
1072 /// registers because no current backend has them.
1073 pub fn clobbered_callee_saves_by_class(&self) -> (&[Writable<RealReg>], &[Writable<RealReg>]) {
1074 let (ints, floats) = self.clobbered_callee_saves.split_at(
1075 self.clobbered_callee_saves
1076 .partition_point(|r| r.to_reg().class() == RegClass::Int),
1077 );
1078 debug_assert!(floats.iter().all(|r| r.to_reg().class() == RegClass::Float));
1079 (ints, floats)
1080 }
1081
1082 /// The size of FP to SP while the frame is active (not during prologue
1083 /// setup or epilogue tear down).
1084 pub fn active_size(&self) -> u32 {
1085 self.outgoing_args_size + self.fixed_frame_storage_size + self.clobber_size
1086 }
1087
1088 /// Get the offset from the SP to the sized stack slots area.
1089 pub fn sp_to_sized_stack_slots(&self) -> u32 {
1090 self.outgoing_args_size
1091 }
1092}
1093
1094/// ABI object for a function body.
1095pub struct Callee<M: ABIMachineSpec> {
1096 /// CLIF-level signature, possibly normalized.
1097 ir_sig: ir::Signature,
1098 /// Signature: arg and retval regs.
1099 sig: Sig,
1100 /// Defined dynamic types.
1101 dynamic_type_sizes: HashMap<Type, u32>,
1102 /// Offsets to each dynamic stackslot.
1103 dynamic_stackslots: PrimaryMap<DynamicStackSlot, u32>,
1104 /// Offsets to each sized stackslot.
1105 sized_stackslots: PrimaryMap<StackSlot, u32>,
1106 /// Total stack size of all stackslots
1107 stackslots_size: u32,
1108 /// Stack size to be reserved for outgoing arguments.
1109 outgoing_args_size: u32,
1110 /// Initially the number of bytes originating in the callers frame where stack arguments will
1111 /// live. After lowering this number may be larger than the size expected by the function being
1112 /// compiled, as tail calls potentially require more space for stack arguments.
1113 tail_args_size: u32,
1114 /// Register-argument defs, to be provided to the `args`
1115 /// pseudo-inst, and pregs to constrain them to.
1116 reg_args: Vec<ArgPair>,
1117 /// Finalized frame layout for this function.
1118 frame_layout: Option<FrameLayout>,
1119 /// The register holding the return-area pointer, if needed.
1120 ret_area_ptr: Option<Reg>,
1121 /// Calling convention this function expects.
1122 call_conv: isa::CallConv,
1123 /// The settings controlling this function's compilation.
1124 flags: settings::Flags,
1125 /// The ISA-specific flag values controlling this function's compilation.
1126 isa_flags: M::F,
1127 /// Whether or not this function is a "leaf", meaning it calls no other
1128 /// functions
1129 is_leaf: bool,
1130 /// If this function has a stack limit specified, then `Reg` is where the
1131 /// stack limit will be located after the instructions specified have been
1132 /// executed.
1133 ///
1134 /// Note that this is intended for insertion into the prologue, if
1135 /// present. Also note that because the instructions here execute in the
1136 /// prologue this happens after legalization/register allocation/etc so we
1137 /// need to be extremely careful with each instruction. The instructions are
1138 /// manually register-allocated and carefully only use caller-saved
1139 /// registers and keep nothing live after this sequence of instructions.
1140 stack_limit: Option<(Reg, SmallInstVec<M::I>)>,
1141
1142 _mach: PhantomData<M>,
1143}
1144
1145fn get_special_purpose_param_register(
1146 f: &ir::Function,
1147 sigs: &SigSet,
1148 sig: Sig,
1149 purpose: ir::ArgumentPurpose,
1150) -> Option<Reg> {
1151 let idx = f.signature.special_param_index(purpose)?;
1152 match &sigs.args(sig)[idx] {
1153 &ABIArg::Slots { ref slots, .. } => match &slots[0] {
1154 &ABIArgSlot::Reg { reg, .. } => Some(reg.into()),
1155 _ => None,
1156 },
1157 _ => None,
1158 }
1159}
1160
1161fn checked_round_up(val: u32, mask: u32) -> Option<u32> {
1162 Some(val.checked_add(mask)? & !mask)
1163}
1164
1165impl<M: ABIMachineSpec> Callee<M> {
1166 /// Create a new body ABI instance.
1167 pub fn new(
1168 f: &ir::Function,
1169 isa: &dyn TargetIsa,
1170 isa_flags: &M::F,
1171 sigs: &SigSet,
1172 ) -> CodegenResult<Self> {
1173 trace!("ABI: func signature {:?}", f.signature);
1174
1175 let flags = isa.flags().clone();
1176 let sig = sigs.abi_sig_for_signature(&f.signature);
1177
1178 let call_conv = f.signature.call_conv;
1179 // Only these calling conventions are supported.
1180 debug_assert!(
1181 call_conv == isa::CallConv::SystemV
1182 || call_conv == isa::CallConv::Tail
1183 || call_conv == isa::CallConv::Fast
1184 || call_conv == isa::CallConv::Cold
1185 || call_conv == isa::CallConv::WindowsFastcall
1186 || call_conv == isa::CallConv::AppleAarch64
1187 || call_conv == isa::CallConv::Winch,
1188 "Unsupported calling convention: {call_conv:?}"
1189 );
1190
1191 // Compute sized stackslot locations and total stackslot size.
1192 let mut end_offset: u32 = 0;
1193 let mut sized_stackslots = PrimaryMap::new();
1194
1195 for (stackslot, data) in f.sized_stack_slots.iter() {
1196 // We start our computation possibly unaligned where the previous
1197 // stackslot left off.
1198 let unaligned_start_offset = end_offset;
1199
1200 // The start of the stackslot must be aligned.
1201 //
1202 // We always at least machine-word-align slots, but also
1203 // satisfy the user's requested alignment.
1204 debug_assert!(data.align_shift < 32);
1205 let align = std::cmp::max(M::word_bytes(), 1u32 << data.align_shift);
1206 let mask = align - 1;
1207 let start_offset = checked_round_up(unaligned_start_offset, mask)
1208 .ok_or(CodegenError::ImplLimitExceeded)?;
1209
1210 // The end offset is the start offset increased by the size
1211 end_offset = start_offset
1212 .checked_add(data.size)
1213 .ok_or(CodegenError::ImplLimitExceeded)?;
1214
1215 debug_assert_eq!(stackslot.as_u32() as usize, sized_stackslots.len());
1216 sized_stackslots.push(start_offset);
1217 }
1218
1219 // Compute dynamic stackslot locations and total stackslot size.
1220 let mut dynamic_stackslots = PrimaryMap::new();
1221 for (stackslot, data) in f.dynamic_stack_slots.iter() {
1222 debug_assert_eq!(stackslot.as_u32() as usize, dynamic_stackslots.len());
1223
1224 // This computation is similar to the stackslots above
1225 let unaligned_start_offset = end_offset;
1226
1227 let mask = M::word_bytes() - 1;
1228 let start_offset = checked_round_up(unaligned_start_offset, mask)
1229 .ok_or(CodegenError::ImplLimitExceeded)?;
1230
1231 let ty = f.get_concrete_dynamic_ty(data.dyn_ty).ok_or_else(|| {
1232 CodegenError::Unsupported(format!("invalid dynamic vector type: {}", data.dyn_ty))
1233 })?;
1234
1235 end_offset = start_offset
1236 .checked_add(isa.dynamic_vector_bytes(ty))
1237 .ok_or(CodegenError::ImplLimitExceeded)?;
1238
1239 dynamic_stackslots.push(start_offset);
1240 }
1241
1242 // The size of the stackslots needs to be word aligned
1243 let stackslots_size = checked_round_up(end_offset, M::word_bytes() - 1)
1244 .ok_or(CodegenError::ImplLimitExceeded)?;
1245
1246 let mut dynamic_type_sizes = HashMap::with_capacity(f.dfg.dynamic_types.len());
1247 for (dyn_ty, _data) in f.dfg.dynamic_types.iter() {
1248 let ty = f
1249 .get_concrete_dynamic_ty(dyn_ty)
1250 .unwrap_or_else(|| panic!("invalid dynamic vector type: {dyn_ty}"));
1251 let size = isa.dynamic_vector_bytes(ty);
1252 dynamic_type_sizes.insert(ty, size);
1253 }
1254
1255 // Figure out what instructions, if any, will be needed to check the
1256 // stack limit. This can either be specified as a special-purpose
1257 // argument or as a global value which often calculates the stack limit
1258 // from the arguments.
1259 let stack_limit = f
1260 .stack_limit
1261 .map(|gv| gen_stack_limit::<M>(f, sigs, sig, gv));
1262
1263 let tail_args_size = sigs[sig].sized_stack_arg_space;
1264
1265 Ok(Self {
1266 ir_sig: ensure_struct_return_ptr_is_returned(&f.signature),
1267 sig,
1268 dynamic_stackslots,
1269 dynamic_type_sizes,
1270 sized_stackslots,
1271 stackslots_size,
1272 outgoing_args_size: 0,
1273 tail_args_size,
1274 reg_args: vec![],
1275 frame_layout: None,
1276 ret_area_ptr: None,
1277 call_conv,
1278 flags,
1279 isa_flags: isa_flags.clone(),
1280 is_leaf: f.is_leaf(),
1281 stack_limit,
1282 _mach: PhantomData,
1283 })
1284 }
1285
1286 /// Inserts instructions necessary for checking the stack limit into the
1287 /// prologue.
1288 ///
1289 /// This function will generate instructions necessary for perform a stack
1290 /// check at the header of a function. The stack check is intended to trap
1291 /// if the stack pointer goes below a particular threshold, preventing stack
1292 /// overflow in wasm or other code. The `stack_limit` argument here is the
1293 /// register which holds the threshold below which we're supposed to trap.
1294 /// This function is known to allocate `stack_size` bytes and we'll push
1295 /// instructions onto `insts`.
1296 ///
1297 /// Note that the instructions generated here are special because this is
1298 /// happening so late in the pipeline (e.g. after register allocation). This
1299 /// means that we need to do manual register allocation here and also be
1300 /// careful to not clobber any callee-saved or argument registers. For now
1301 /// this routine makes do with the `spilltmp_reg` as one temporary
1302 /// register, and a second register of `tmp2` which is caller-saved. This
1303 /// should be fine for us since no spills should happen in this sequence of
1304 /// instructions, so our register won't get accidentally clobbered.
1305 ///
1306 /// No values can be live after the prologue, but in this case that's ok
1307 /// because we just need to perform a stack check before progressing with
1308 /// the rest of the function.
1309 fn insert_stack_check(
1310 &self,
1311 stack_limit: Reg,
1312 stack_size: u32,
1313 insts: &mut SmallInstVec<M::I>,
1314 ) {
1315 // With no explicit stack allocated we can just emit the simple check of
1316 // the stack registers against the stack limit register, and trap if
1317 // it's out of bounds.
1318 if stack_size == 0 {
1319 insts.extend(M::gen_stack_lower_bound_trap(stack_limit));
1320 return;
1321 }
1322
1323 // Note that the 32k stack size here is pretty special. See the
1324 // documentation in x86/abi.rs for why this is here. The general idea is
1325 // that we're protecting against overflow in the addition that happens
1326 // below.
1327 if stack_size >= 32 * 1024 {
1328 insts.extend(M::gen_stack_lower_bound_trap(stack_limit));
1329 }
1330
1331 // Add the `stack_size` to `stack_limit`, placing the result in
1332 // `scratch`.
1333 //
1334 // Note though that `stack_limit`'s register may be the same as
1335 // `scratch`. If our stack size doesn't fit into an immediate this
1336 // means we need a second scratch register for loading the stack size
1337 // into a register.
1338 let scratch = Writable::from_reg(M::get_stacklimit_reg(self.call_conv));
1339 insts.extend(M::gen_add_imm(self.call_conv, scratch, stack_limit, stack_size).into_iter());
1340 insts.extend(M::gen_stack_lower_bound_trap(scratch.to_reg()));
1341 }
1342}
1343
1344/// Generates the instructions necessary for the `gv` to be materialized into a
1345/// register.
1346///
1347/// This function will return a register that will contain the result of
1348/// evaluating `gv`. It will also return any instructions necessary to calculate
1349/// the value of the register.
1350///
1351/// Note that global values are typically lowered to instructions via the
1352/// standard legalization pass. Unfortunately though prologue generation happens
1353/// so late in the pipeline that we can't use these legalization passes to
1354/// generate the instructions for `gv`. As a result we duplicate some lowering
1355/// of `gv` here and support only some global values. This is similar to what
1356/// the x86 backend does for now, and hopefully this can be somewhat cleaned up
1357/// in the future too!
1358///
1359/// Also note that this function will make use of `writable_spilltmp_reg()` as a
1360/// temporary register to store values in if necessary. Currently after we write
1361/// to this register there's guaranteed to be no spilled values between where
1362/// it's used, because we're not participating in register allocation anyway!
1363fn gen_stack_limit<M: ABIMachineSpec>(
1364 f: &ir::Function,
1365 sigs: &SigSet,
1366 sig: Sig,
1367 gv: ir::GlobalValue,
1368) -> (Reg, SmallInstVec<M::I>) {
1369 let mut insts = smallvec![];
1370 let reg = generate_gv::<M>(f, sigs, sig, gv, &mut insts);
1371 return (reg, insts);
1372}
1373
1374fn generate_gv<M: ABIMachineSpec>(
1375 f: &ir::Function,
1376 sigs: &SigSet,
1377 sig: Sig,
1378 gv: ir::GlobalValue,
1379 insts: &mut SmallInstVec<M::I>,
1380) -> Reg {
1381 match f.global_values[gv] {
1382 // Return the direct register the vmcontext is in
1383 ir::GlobalValueData::VMContext => {
1384 get_special_purpose_param_register(f, sigs, sig, ir::ArgumentPurpose::VMContext)
1385 .expect("no vmcontext parameter found")
1386 }
1387 // Load our base value into a register, then load from that register
1388 // in to a temporary register.
1389 ir::GlobalValueData::Load {
1390 base,
1391 offset,
1392 global_type: _,
1393 flags: _,
1394 } => {
1395 let base = generate_gv::<M>(f, sigs, sig, base, insts);
1396 let into_reg = Writable::from_reg(M::get_stacklimit_reg(f.stencil.signature.call_conv));
1397 insts.push(M::gen_load_base_offset(
1398 into_reg,
1399 base,
1400 offset.into(),
1401 M::word_type(),
1402 ));
1403 return into_reg.to_reg();
1404 }
1405 ref other => panic!("global value for stack limit not supported: {other}"),
1406 }
1407}
1408
1409/// Returns true if the signature needs to be legalized.
1410fn missing_struct_return(sig: &ir::Signature) -> bool {
1411 sig.uses_special_param(ArgumentPurpose::StructReturn)
1412 && !sig.uses_special_return(ArgumentPurpose::StructReturn)
1413}
1414
1415fn ensure_struct_return_ptr_is_returned(sig: &ir::Signature) -> ir::Signature {
1416 // Keep in sync with Callee::new
1417 let mut sig = sig.clone();
1418 if sig.uses_special_return(ArgumentPurpose::StructReturn) {
1419 panic!("Explicit StructReturn return value not allowed: {sig:?}")
1420 }
1421 if let Some(struct_ret_index) = sig.special_param_index(ArgumentPurpose::StructReturn) {
1422 if !sig.returns.is_empty() {
1423 panic!("No return values are allowed when using StructReturn: {sig:?}");
1424 }
1425 sig.returns.insert(0, sig.params[struct_ret_index]);
1426 }
1427 sig
1428}
1429
1430/// ### Pre-Regalloc Functions
1431///
1432/// These methods of `Callee` may only be called before regalloc.
1433impl<M: ABIMachineSpec> Callee<M> {
1434 /// Access the (possibly legalized) signature.
1435 pub fn signature(&self) -> &ir::Signature {
1436 debug_assert!(
1437 !missing_struct_return(&self.ir_sig),
1438 "`Callee::ir_sig` is always legalized"
1439 );
1440 &self.ir_sig
1441 }
1442
1443 /// Initialize. This is called after the Callee is constructed because it
1444 /// may allocate a temp vreg, which can only be allocated once the lowering
1445 /// context exists.
1446 pub fn init_retval_area(
1447 &mut self,
1448 sigs: &SigSet,
1449 vregs: &mut VRegAllocator<M::I>,
1450 ) -> CodegenResult<()> {
1451 if sigs[self.sig].stack_ret_arg.is_some() {
1452 let ret_area_ptr = vregs.alloc(M::word_type())?;
1453 self.ret_area_ptr = Some(ret_area_ptr.only_reg().unwrap());
1454 }
1455 Ok(())
1456 }
1457
1458 /// Get the return area pointer register, if any.
1459 pub fn ret_area_ptr(&self) -> Option<Reg> {
1460 self.ret_area_ptr
1461 }
1462
1463 /// Accumulate outgoing arguments.
1464 ///
1465 /// This ensures that at least `size` bytes are allocated in the prologue to
1466 /// be available for use in function calls to hold arguments and/or return
1467 /// values. If this function is called multiple times, the maximum of all
1468 /// `size` values will be available.
1469 pub fn accumulate_outgoing_args_size(&mut self, size: u32) {
1470 if size > self.outgoing_args_size {
1471 self.outgoing_args_size = size;
1472 }
1473 }
1474
1475 /// Accumulate the incoming argument area size requirements for a tail call,
1476 /// as it could be larger than the incoming arguments of the function
1477 /// currently being compiled.
1478 pub fn accumulate_tail_args_size(&mut self, size: u32) {
1479 if size > self.tail_args_size {
1480 self.tail_args_size = size;
1481 }
1482 }
1483
1484 pub fn is_forward_edge_cfi_enabled(&self) -> bool {
1485 self.isa_flags.is_forward_edge_cfi_enabled()
1486 }
1487
1488 /// Get the calling convention implemented by this ABI object.
1489 pub fn call_conv(&self) -> isa::CallConv {
1490 self.call_conv
1491 }
1492
1493 /// Get the ABI-dependent MachineEnv for managing register allocation.
1494 pub fn machine_env(&self) -> &MachineEnv {
1495 M::get_machine_env(&self.flags, self.call_conv)
1496 }
1497
1498 /// The offsets of all sized stack slots (not spill slots) for debuginfo purposes.
1499 pub fn sized_stackslot_offsets(&self) -> &PrimaryMap<StackSlot, u32> {
1500 &self.sized_stackslots
1501 }
1502
1503 /// The offsets of all dynamic stack slots (not spill slots) for debuginfo purposes.
1504 pub fn dynamic_stackslot_offsets(&self) -> &PrimaryMap<DynamicStackSlot, u32> {
1505 &self.dynamic_stackslots
1506 }
1507
1508 /// Generate an instruction which copies an argument to a destination
1509 /// register.
1510 pub fn gen_copy_arg_to_regs(
1511 &mut self,
1512 sigs: &SigSet,
1513 idx: usize,
1514 into_regs: ValueRegs<Writable<Reg>>,
1515 vregs: &mut VRegAllocator<M::I>,
1516 ) -> SmallInstVec<M::I> {
1517 let mut insts = smallvec![];
1518 let mut copy_arg_slot_to_reg = |slot: &ABIArgSlot, into_reg: &Writable<Reg>| {
1519 match slot {
1520 &ABIArgSlot::Reg { reg, .. } => {
1521 // Add a preg -> def pair to the eventual `args`
1522 // instruction. Extension mode doesn't matter
1523 // (we're copying out, not in; we ignore high bits
1524 // by convention).
1525 let arg = ArgPair {
1526 vreg: *into_reg,
1527 preg: reg.into(),
1528 };
1529 self.reg_args.push(arg);
1530 }
1531 &ABIArgSlot::Stack {
1532 offset,
1533 ty,
1534 extension,
1535 ..
1536 } => {
1537 // However, we have to respect the extension mode for stack
1538 // slots, or else we grab the wrong bytes on big-endian.
1539 let ext = M::get_ext_mode(sigs[self.sig].call_conv, extension);
1540 let ty =
1541 if ext != ArgumentExtension::None && M::word_bits() > ty_bits(ty) as u32 {
1542 M::word_type()
1543 } else {
1544 ty
1545 };
1546 insts.push(M::gen_load_stack(
1547 StackAMode::IncomingArg(offset, sigs[self.sig].sized_stack_arg_space),
1548 *into_reg,
1549 ty,
1550 ));
1551 }
1552 }
1553 };
1554
1555 match &sigs.args(self.sig)[idx] {
1556 &ABIArg::Slots { ref slots, .. } => {
1557 assert_eq!(into_regs.len(), slots.len());
1558 for (slot, into_reg) in slots.iter().zip(into_regs.regs().iter()) {
1559 copy_arg_slot_to_reg(&slot, &into_reg);
1560 }
1561 }
1562 &ABIArg::StructArg { offset, .. } => {
1563 let into_reg = into_regs.only_reg().unwrap();
1564 // Buffer address is implicitly defined by the ABI.
1565 insts.push(M::gen_get_stack_addr(
1566 StackAMode::IncomingArg(offset, sigs[self.sig].sized_stack_arg_space),
1567 into_reg,
1568 ));
1569 }
1570 &ABIArg::ImplicitPtrArg { pointer, ty, .. } => {
1571 let into_reg = into_regs.only_reg().unwrap();
1572 // We need to dereference the pointer.
1573 let base = match &pointer {
1574 &ABIArgSlot::Reg { reg, ty, .. } => {
1575 let tmp = vregs.alloc_with_deferred_error(ty).only_reg().unwrap();
1576 self.reg_args.push(ArgPair {
1577 vreg: Writable::from_reg(tmp),
1578 preg: reg.into(),
1579 });
1580 tmp
1581 }
1582 &ABIArgSlot::Stack { offset, ty, .. } => {
1583 let addr_reg = writable_value_regs(vregs.alloc_with_deferred_error(ty))
1584 .only_reg()
1585 .unwrap();
1586 insts.push(M::gen_load_stack(
1587 StackAMode::IncomingArg(offset, sigs[self.sig].sized_stack_arg_space),
1588 addr_reg,
1589 ty,
1590 ));
1591 addr_reg.to_reg()
1592 }
1593 };
1594 insts.push(M::gen_load_base_offset(into_reg, base, 0, ty));
1595 }
1596 }
1597 insts
1598 }
1599
1600 /// Generate an instruction which copies a source register to a return value slot.
1601 pub fn gen_copy_regs_to_retval(
1602 &self,
1603 sigs: &SigSet,
1604 idx: usize,
1605 from_regs: ValueRegs<Reg>,
1606 vregs: &mut VRegAllocator<M::I>,
1607 ) -> (SmallVec<[RetPair; 2]>, SmallInstVec<M::I>) {
1608 let mut reg_pairs = smallvec![];
1609 let mut ret = smallvec![];
1610 let word_bits = M::word_bits() as u8;
1611 match &sigs.rets(self.sig)[idx] {
1612 &ABIArg::Slots { ref slots, .. } => {
1613 assert_eq!(from_regs.len(), slots.len());
1614 for (slot, &from_reg) in slots.iter().zip(from_regs.regs().iter()) {
1615 match slot {
1616 &ABIArgSlot::Reg {
1617 reg, ty, extension, ..
1618 } => {
1619 let from_bits = ty_bits(ty) as u8;
1620 let ext = M::get_ext_mode(sigs[self.sig].call_conv, extension);
1621 let vreg = match (ext, from_bits) {
1622 (ir::ArgumentExtension::Uext, n)
1623 | (ir::ArgumentExtension::Sext, n)
1624 if n < word_bits =>
1625 {
1626 let signed = ext == ir::ArgumentExtension::Sext;
1627 let dst =
1628 writable_value_regs(vregs.alloc_with_deferred_error(ty))
1629 .only_reg()
1630 .unwrap();
1631 ret.push(M::gen_extend(
1632 dst, from_reg, signed, from_bits,
1633 /* to_bits = */ word_bits,
1634 ));
1635 dst.to_reg()
1636 }
1637 _ => {
1638 // No move needed, regalloc2 will emit it using the constraint
1639 // added by the RetPair.
1640 from_reg
1641 }
1642 };
1643 reg_pairs.push(RetPair {
1644 vreg,
1645 preg: Reg::from(reg),
1646 });
1647 }
1648 &ABIArgSlot::Stack {
1649 offset,
1650 ty,
1651 extension,
1652 ..
1653 } => {
1654 let mut ty = ty;
1655 let from_bits = ty_bits(ty) as u8;
1656 // A machine ABI implementation should ensure that stack frames
1657 // have "reasonable" size. All current ABIs for machinst
1658 // backends (aarch64 and x64) enforce a 128MB limit.
1659 let off = i32::try_from(offset).expect(
1660 "Argument stack offset greater than 2GB; should hit impl limit first",
1661 );
1662 let ext = M::get_ext_mode(sigs[self.sig].call_conv, extension);
1663 // Trash the from_reg; it should be its last use.
1664 match (ext, from_bits) {
1665 (ir::ArgumentExtension::Uext, n)
1666 | (ir::ArgumentExtension::Sext, n)
1667 if n < word_bits =>
1668 {
1669 assert_eq!(M::word_reg_class(), from_reg.class());
1670 let signed = ext == ir::ArgumentExtension::Sext;
1671 let dst =
1672 writable_value_regs(vregs.alloc_with_deferred_error(ty))
1673 .only_reg()
1674 .unwrap();
1675 ret.push(M::gen_extend(
1676 dst, from_reg, signed, from_bits,
1677 /* to_bits = */ word_bits,
1678 ));
1679 // Store the extended version.
1680 ty = M::word_type();
1681 }
1682 _ => {}
1683 };
1684 ret.push(M::gen_store_base_offset(
1685 self.ret_area_ptr.unwrap(),
1686 off,
1687 from_reg,
1688 ty,
1689 ));
1690 }
1691 }
1692 }
1693 }
1694 ABIArg::StructArg { .. } => {
1695 panic!("StructArg in return position is unsupported");
1696 }
1697 ABIArg::ImplicitPtrArg { .. } => {
1698 panic!("ImplicitPtrArg in return position is unsupported");
1699 }
1700 }
1701 (reg_pairs, ret)
1702 }
1703
1704 /// Generate any setup instruction needed to save values to the
1705 /// return-value area. This is usually used when were are multiple return
1706 /// values or an otherwise large return value that must be passed on the
1707 /// stack; typically the ABI specifies an extra hidden argument that is a
1708 /// pointer to that memory.
1709 pub fn gen_retval_area_setup(
1710 &mut self,
1711 sigs: &SigSet,
1712 vregs: &mut VRegAllocator<M::I>,
1713 ) -> Option<M::I> {
1714 if let Some(i) = sigs[self.sig].stack_ret_arg {
1715 let ret_area_ptr = Writable::from_reg(self.ret_area_ptr.unwrap());
1716 let insts =
1717 self.gen_copy_arg_to_regs(sigs, i.into(), ValueRegs::one(ret_area_ptr), vregs);
1718 insts.into_iter().next().map(|inst| {
1719 trace!(
1720 "gen_retval_area_setup: inst {:?}; ptr reg is {:?}",
1721 inst,
1722 ret_area_ptr.to_reg()
1723 );
1724 inst
1725 })
1726 } else {
1727 trace!("gen_retval_area_setup: not needed");
1728 None
1729 }
1730 }
1731
1732 /// Generate a return instruction.
1733 pub fn gen_rets(&self, rets: Vec<RetPair>) -> M::I {
1734 M::gen_rets(rets)
1735 }
1736
1737 /// Set up arguments values `args` for a call with signature `sig`.
1738 /// This will return a series of instructions to be emitted to set
1739 /// up all arguments, as well as a `CallArgList` list representing
1740 /// the arguments passed in registers. The latter need to be added
1741 /// as constraints to the actual call instruction.
1742 pub fn gen_call_args(
1743 &self,
1744 sigs: &SigSet,
1745 sig: Sig,
1746 args: &[ValueRegs<Reg>],
1747 is_tail_call: bool,
1748 flags: &settings::Flags,
1749 vregs: &mut VRegAllocator<M::I>,
1750 ) -> (CallArgList, SmallInstVec<M::I>) {
1751 let mut uses: CallArgList = smallvec![];
1752 let mut insts = smallvec![];
1753
1754 assert_eq!(args.len(), sigs.num_args(sig));
1755
1756 let call_conv = sigs[sig].call_conv;
1757 let stack_arg_space = sigs[sig].sized_stack_arg_space;
1758 let stack_arg = |offset| {
1759 if is_tail_call {
1760 StackAMode::IncomingArg(offset, stack_arg_space)
1761 } else {
1762 StackAMode::OutgoingArg(offset)
1763 }
1764 };
1765
1766 let word_ty = M::word_type();
1767 let word_rc = M::word_reg_class();
1768 let word_bits = M::word_bits() as usize;
1769
1770 if is_tail_call {
1771 debug_assert_eq!(
1772 self.call_conv,
1773 isa::CallConv::Tail,
1774 "Can only do `return_call`s from within a `tail` calling convention function"
1775 );
1776 }
1777
1778 // Helper to process a single argument slot (register or stack slot).
1779 // This will either add the register to the `uses` list or write the
1780 // value to the stack slot in the outgoing argument area (or for tail
1781 // calls, the incoming argument area).
1782 let mut process_arg_slot = |insts: &mut SmallInstVec<M::I>, slot, vreg, ty| {
1783 match &slot {
1784 &ABIArgSlot::Reg { reg, .. } => {
1785 uses.push(CallArgPair {
1786 vreg,
1787 preg: reg.into(),
1788 });
1789 }
1790 &ABIArgSlot::Stack { offset, .. } => {
1791 insts.push(M::gen_store_stack(stack_arg(offset), vreg, ty));
1792 }
1793 };
1794 };
1795
1796 // First pass: Handle `StructArg` arguments. These need to be copied
1797 // into their associated stack buffers. This should happen before any
1798 // of the other arguments are processed, as the `memcpy` call might
1799 // clobber registers used by other arguments.
1800 for (idx, from_regs) in args.iter().enumerate() {
1801 match &sigs.args(sig)[idx] {
1802 &ABIArg::Slots { .. } | &ABIArg::ImplicitPtrArg { .. } => {}
1803 &ABIArg::StructArg { offset, size, .. } => {
1804 let tmp = vregs.alloc_with_deferred_error(word_ty).only_reg().unwrap();
1805 insts.push(M::gen_get_stack_addr(
1806 stack_arg(offset),
1807 Writable::from_reg(tmp),
1808 ));
1809 insts.extend(M::gen_memcpy(
1810 isa::CallConv::for_libcall(flags, call_conv),
1811 tmp,
1812 from_regs.only_reg().unwrap(),
1813 size as usize,
1814 |ty| {
1815 Writable::from_reg(
1816 vregs.alloc_with_deferred_error(ty).only_reg().unwrap(),
1817 )
1818 },
1819 ));
1820 }
1821 }
1822 }
1823
1824 // Second pass: Handle everything except `StructArg` arguments.
1825 for (idx, from_regs) in args.iter().enumerate() {
1826 match sigs.args(sig)[idx] {
1827 ABIArg::Slots { ref slots, .. } => {
1828 assert_eq!(from_regs.len(), slots.len());
1829 for (slot, from_reg) in slots.iter().zip(from_regs.regs().iter()) {
1830 // Load argument slot value from `from_reg`, and perform any zero-
1831 // or sign-extension that is required by the ABI.
1832 let (ty, extension) = match *slot {
1833 ABIArgSlot::Reg { ty, extension, .. } => (ty, extension),
1834 ABIArgSlot::Stack { ty, extension, .. } => (ty, extension),
1835 };
1836 let ext = M::get_ext_mode(call_conv, extension);
1837 let (vreg, ty) = if ext != ir::ArgumentExtension::None
1838 && ty_bits(ty) < word_bits
1839 {
1840 assert_eq!(word_rc, from_reg.class());
1841 let signed = match ext {
1842 ir::ArgumentExtension::Uext => false,
1843 ir::ArgumentExtension::Sext => true,
1844 _ => unreachable!(),
1845 };
1846 let tmp = vregs.alloc_with_deferred_error(word_ty).only_reg().unwrap();
1847 insts.push(M::gen_extend(
1848 Writable::from_reg(tmp),
1849 *from_reg,
1850 signed,
1851 ty_bits(ty) as u8,
1852 word_bits as u8,
1853 ));
1854 (tmp, word_ty)
1855 } else {
1856 (*from_reg, ty)
1857 };
1858 process_arg_slot(&mut insts, *slot, vreg, ty);
1859 }
1860 }
1861 ABIArg::ImplicitPtrArg {
1862 offset,
1863 pointer,
1864 ty,
1865 ..
1866 } => {
1867 let vreg = from_regs.only_reg().unwrap();
1868 let tmp = vregs.alloc_with_deferred_error(word_ty).only_reg().unwrap();
1869 insts.push(M::gen_get_stack_addr(
1870 stack_arg(offset),
1871 Writable::from_reg(tmp),
1872 ));
1873 insts.push(M::gen_store_base_offset(tmp, 0, vreg, ty));
1874 process_arg_slot(&mut insts, pointer, tmp, word_ty);
1875 }
1876 ABIArg::StructArg { .. } => {}
1877 }
1878 }
1879
1880 // Finally, set the stack-return pointer to the return argument area.
1881 // For tail calls, this means forwarding the incoming stack-return pointer.
1882 if let Some(ret_arg) = sigs.get_ret_arg(sig) {
1883 let ret_area = if is_tail_call {
1884 self.ret_area_ptr.expect(
1885 "if the tail callee has a return pointer, then the tail caller must as well",
1886 )
1887 } else {
1888 let tmp = vregs.alloc_with_deferred_error(word_ty).only_reg().unwrap();
1889 let amode = StackAMode::OutgoingArg(stack_arg_space.into());
1890 insts.push(M::gen_get_stack_addr(amode, Writable::from_reg(tmp)));
1891 tmp
1892 };
1893 match ret_arg {
1894 // The return pointer must occupy a single slot.
1895 ABIArg::Slots { slots, .. } => {
1896 assert_eq!(slots.len(), 1);
1897 process_arg_slot(&mut insts, slots[0], ret_area, word_ty);
1898 }
1899 _ => unreachable!(),
1900 }
1901 }
1902
1903 (uses, insts)
1904 }
1905
1906 /// Set up return values `outputs` for a call with signature `sig`.
1907 /// This does not emit (or return) any instructions, but returns a
1908 /// `CallRetList` representing the return value constraints. This
1909 /// needs to be added to the actual call instruction.
1910 ///
1911 /// If `try_call_payloads` is non-zero, it is expected to hold
1912 /// exception payload registers for try_call instructions. These
1913 /// will be added as needed to the `CallRetList` as well.
1914 pub fn gen_call_rets(
1915 &self,
1916 sigs: &SigSet,
1917 sig: Sig,
1918 outputs: &[ValueRegs<Reg>],
1919 try_call_payloads: Option<&[Writable<Reg>]>,
1920 vregs: &mut VRegAllocator<M::I>,
1921 ) -> CallRetList {
1922 let callee_conv = sigs[sig].call_conv;
1923 let stack_arg_space = sigs[sig].sized_stack_arg_space;
1924
1925 let word_ty = M::word_type();
1926 let word_bits = M::word_bits() as usize;
1927
1928 let mut defs: CallRetList = smallvec![];
1929 let mut outputs = outputs.into_iter();
1930 let num_rets = sigs.num_rets(sig);
1931 for idx in 0..num_rets {
1932 let ret = sigs.rets(sig)[idx].clone();
1933 match ret {
1934 ABIArg::Slots {
1935 ref slots, purpose, ..
1936 } => {
1937 // We do not use the returned copy of the return buffer pointer,
1938 // so skip any StructReturn returns that may be present.
1939 if purpose == ArgumentPurpose::StructReturn {
1940 continue;
1941 }
1942 let retval_regs = outputs.next().unwrap();
1943 assert_eq!(retval_regs.len(), slots.len());
1944 for (slot, retval_reg) in slots.iter().zip(retval_regs.regs().iter()) {
1945 // We do not perform any extension because we're copying out, not in,
1946 // and we ignore high bits in our own registers by convention. However,
1947 // we still need to use the proper extended type to access stack slots
1948 // (this is critical on big-endian systems).
1949 let (ty, extension) = match *slot {
1950 ABIArgSlot::Reg { ty, extension, .. } => (ty, extension),
1951 ABIArgSlot::Stack { ty, extension, .. } => (ty, extension),
1952 };
1953 let ext = M::get_ext_mode(callee_conv, extension);
1954 let ty = if ext != ir::ArgumentExtension::None && ty_bits(ty) < word_bits {
1955 word_ty
1956 } else {
1957 ty
1958 };
1959
1960 match slot {
1961 &ABIArgSlot::Reg { reg, .. } => {
1962 defs.push(CallRetPair {
1963 vreg: Writable::from_reg(*retval_reg),
1964 location: RetLocation::Reg(reg.into(), ty),
1965 });
1966 }
1967 &ABIArgSlot::Stack { offset, .. } => {
1968 let amode =
1969 StackAMode::OutgoingArg(offset + i64::from(stack_arg_space));
1970 defs.push(CallRetPair {
1971 vreg: Writable::from_reg(*retval_reg),
1972 location: RetLocation::Stack(amode, ty),
1973 });
1974 }
1975 }
1976 }
1977 }
1978 ABIArg::StructArg { .. } => {
1979 panic!("StructArg not supported in return position");
1980 }
1981 ABIArg::ImplicitPtrArg { .. } => {
1982 panic!("ImplicitPtrArg not supported in return position");
1983 }
1984 }
1985 }
1986 assert!(outputs.next().is_none());
1987
1988 if let Some(try_call_payloads) = try_call_payloads {
1989 // We need to update `defs` to contain the exception
1990 // payload regs as well. We have two sources of info that
1991 // we join:
1992 //
1993 // - The machine-specific ABI implementation `M`, which
1994 // tells us the particular registers that payload values
1995 // must be in
1996 // - The passed-in lowering context, which gives us the
1997 // vregs we must define.
1998 //
1999 // Note that payload values may need to end up in the same
2000 // physical registers as ordinary return values; this is
2001 // not a conflict, because we either get one or the
2002 // other. For regalloc's purposes, we define both starting
2003 // here at the callsite, but we can share one def in the
2004 // `defs` list and alias one vreg to another. Thus we
2005 // handle the two cases below for each payload register:
2006 // overlaps a return value (and we alias to it) or not
2007 // (and we add a def).
2008 let pregs = M::exception_payload_regs(callee_conv);
2009 for (i, &preg) in pregs.iter().enumerate() {
2010 let vreg = try_call_payloads[i];
2011 if let Some(existing) = defs.iter().find(|def| match def.location {
2012 RetLocation::Reg(r, _) => r == preg,
2013 _ => false,
2014 }) {
2015 vregs.set_vreg_alias(vreg.to_reg(), existing.vreg.to_reg());
2016 } else {
2017 defs.push(CallRetPair {
2018 vreg,
2019 location: RetLocation::Reg(preg, word_ty),
2020 });
2021 }
2022 }
2023 }
2024
2025 defs
2026 }
2027
2028 /// Populate a `CallInfo` for a call with signature `sig`.
2029 ///
2030 /// `dest` is the target-specific call destination value
2031 /// `uses` is the `CallArgList` describing argument constraints
2032 /// `defs` is the `CallRetList` describing return constraints
2033 /// `try_call_info` describes exception targets for try_call instructions
2034 ///
2035 /// The clobber list is computed here from the above data.
2036 pub fn gen_call_info<T>(
2037 &self,
2038 sigs: &SigSet,
2039 sig: Sig,
2040 dest: T,
2041 uses: CallArgList,
2042 defs: CallRetList,
2043 try_call_info: Option<TryCallInfo>,
2044 ) -> CallInfo<T> {
2045 let caller_conv = self.call_conv;
2046 let callee_conv = sigs[sig].call_conv;
2047 let stack_arg_space = sigs[sig].sized_stack_arg_space;
2048
2049 let clobbers = {
2050 // Get clobbers: all caller-saves. These may include return value
2051 // regs, which we will remove from the clobber set below.
2052 let mut clobbers =
2053 <M>::get_regs_clobbered_by_call(callee_conv, try_call_info.is_some());
2054
2055 // Remove retval regs from clobbers.
2056 for def in &defs {
2057 if let RetLocation::Reg(preg, _) = def.location {
2058 clobbers.remove(PReg::from(preg.to_real_reg().unwrap()));
2059 }
2060 }
2061
2062 clobbers
2063 };
2064
2065 // Any adjustment to SP to account for required outgoing arguments/stack return values must
2066 // be done inside of the call pseudo-op, to ensure that SP is always in a consistent
2067 // state for all other instructions. For example, if a tail-call abi function is called
2068 // here, the reclamation of the outgoing argument area must be done inside of the call
2069 // pseudo-op's emission to ensure that SP is consistent at all other points in the lowered
2070 // function. (Except the prologue and epilogue, but those are fairly special parts of the
2071 // function that establish the SP invariants that are relied on elsewhere and are generated
2072 // after the register allocator has run and thus cannot have register allocator-inserted
2073 // references to SP offsets.)
2074
2075 let callee_pop_size = if callee_conv == isa::CallConv::Tail {
2076 // The tail calling convention has callees pop stack arguments.
2077 stack_arg_space
2078 } else {
2079 0
2080 };
2081
2082 CallInfo {
2083 dest,
2084 uses,
2085 defs,
2086 clobbers,
2087 callee_conv,
2088 caller_conv,
2089 callee_pop_size,
2090 try_call_info,
2091 }
2092 }
2093
2094 /// Produce an instruction that computes a sized stackslot address.
2095 pub fn sized_stackslot_addr(
2096 &self,
2097 slot: StackSlot,
2098 offset: u32,
2099 into_reg: Writable<Reg>,
2100 ) -> M::I {
2101 // Offset from beginning of stackslot area.
2102 let stack_off = self.sized_stackslots[slot] as i64;
2103 let sp_off: i64 = stack_off + (offset as i64);
2104 M::gen_get_stack_addr(StackAMode::Slot(sp_off), into_reg)
2105 }
2106
2107 /// Produce an instruction that computes a dynamic stackslot address.
2108 pub fn dynamic_stackslot_addr(&self, slot: DynamicStackSlot, into_reg: Writable<Reg>) -> M::I {
2109 let stack_off = self.dynamic_stackslots[slot] as i64;
2110 M::gen_get_stack_addr(StackAMode::Slot(stack_off), into_reg)
2111 }
2112
2113 /// Get an `args` pseudo-inst, if any, that should appear at the
2114 /// very top of the function body prior to regalloc.
2115 pub fn take_args(&mut self) -> Option<M::I> {
2116 if self.reg_args.len() > 0 {
2117 // Very first instruction is an `args` pseudo-inst that
2118 // establishes live-ranges for in-register arguments and
2119 // constrains them at the start of the function to the
2120 // locations defined by the ABI.
2121 Some(M::gen_args(std::mem::take(&mut self.reg_args)))
2122 } else {
2123 None
2124 }
2125 }
2126}
2127
2128/// ### Post-Regalloc Functions
2129///
2130/// These methods of `Callee` may only be called after
2131/// regalloc.
2132impl<M: ABIMachineSpec> Callee<M> {
2133 /// Compute the final frame layout, post-regalloc.
2134 ///
2135 /// This must be called before gen_prologue or gen_epilogue.
2136 pub fn compute_frame_layout(
2137 &mut self,
2138 sigs: &SigSet,
2139 spillslots: usize,
2140 clobbered: Vec<Writable<RealReg>>,
2141 ) {
2142 let bytes = M::word_bytes();
2143 let total_stacksize = self.stackslots_size + bytes * spillslots as u32;
2144 let mask = M::stack_align(self.call_conv) - 1;
2145 let total_stacksize = (total_stacksize + mask) & !mask; // 16-align the stack.
2146 self.frame_layout = Some(M::compute_frame_layout(
2147 self.call_conv,
2148 &self.flags,
2149 self.signature(),
2150 &clobbered,
2151 self.is_leaf,
2152 self.stack_args_size(sigs),
2153 self.tail_args_size,
2154 self.stackslots_size,
2155 total_stacksize,
2156 self.outgoing_args_size,
2157 ));
2158 }
2159
2160 /// Generate a prologue, post-regalloc.
2161 ///
2162 /// This should include any stack frame or other setup necessary to use the
2163 /// other methods (`load_arg`, `store_retval`, and spillslot accesses.)
2164 pub fn gen_prologue(&self) -> SmallInstVec<M::I> {
2165 let frame_layout = self.frame_layout();
2166 let mut insts = smallvec![];
2167
2168 // Set up frame.
2169 insts.extend(M::gen_prologue_frame_setup(
2170 self.call_conv,
2171 &self.flags,
2172 &self.isa_flags,
2173 &frame_layout,
2174 ));
2175
2176 // The stack limit check needs to cover all the stack adjustments we
2177 // might make, up to the next stack limit check in any function we
2178 // call. Since this happens after frame setup, the current function's
2179 // setup area needs to be accounted for in the caller's stack limit
2180 // check, but we need to account for any setup area that our callees
2181 // might need. Note that s390x may also use the outgoing args area for
2182 // backtrace support even in leaf functions, so that should be accounted
2183 // for unconditionally.
2184 let total_stacksize = (frame_layout.tail_args_size - frame_layout.incoming_args_size)
2185 + frame_layout.clobber_size
2186 + frame_layout.fixed_frame_storage_size
2187 + frame_layout.outgoing_args_size
2188 + if self.is_leaf {
2189 0
2190 } else {
2191 frame_layout.setup_area_size
2192 };
2193
2194 // Leaf functions with zero stack don't need a stack check if one's
2195 // specified, otherwise always insert the stack check.
2196 if total_stacksize > 0 || !self.is_leaf {
2197 if let Some((reg, stack_limit_load)) = &self.stack_limit {
2198 insts.extend(stack_limit_load.clone());
2199 self.insert_stack_check(*reg, total_stacksize, &mut insts);
2200 }
2201
2202 if self.flags.enable_probestack() {
2203 let guard_size = 1 << self.flags.probestack_size_log2();
2204 match self.flags.probestack_strategy() {
2205 ProbestackStrategy::Inline => M::gen_inline_probestack(
2206 &mut insts,
2207 self.call_conv,
2208 total_stacksize,
2209 guard_size,
2210 ),
2211 ProbestackStrategy::Outline => {
2212 if total_stacksize >= guard_size {
2213 M::gen_probestack(&mut insts, total_stacksize);
2214 }
2215 }
2216 }
2217 }
2218 }
2219
2220 // Save clobbered registers.
2221 insts.extend(M::gen_clobber_save(
2222 self.call_conv,
2223 &self.flags,
2224 &frame_layout,
2225 ));
2226
2227 insts
2228 }
2229
2230 /// Generate an epilogue, post-regalloc.
2231 ///
2232 /// Note that this must generate the actual return instruction (rather than
2233 /// emitting this in the lowering logic), because the epilogue code comes
2234 /// before the return and the two are likely closely related.
2235 pub fn gen_epilogue(&self) -> SmallInstVec<M::I> {
2236 let frame_layout = self.frame_layout();
2237 let mut insts = smallvec![];
2238
2239 // Restore clobbered registers.
2240 insts.extend(M::gen_clobber_restore(
2241 self.call_conv,
2242 &self.flags,
2243 &frame_layout,
2244 ));
2245
2246 // Tear down frame.
2247 insts.extend(M::gen_epilogue_frame_restore(
2248 self.call_conv,
2249 &self.flags,
2250 &self.isa_flags,
2251 &frame_layout,
2252 ));
2253
2254 // And return.
2255 insts.extend(M::gen_return(
2256 self.call_conv,
2257 &self.isa_flags,
2258 &frame_layout,
2259 ));
2260
2261 trace!("Epilogue: {:?}", insts);
2262 insts
2263 }
2264
2265 /// Return a reference to the computed frame layout information. This
2266 /// function will panic if it's called before [`Self::compute_frame_layout`].
2267 pub fn frame_layout(&self) -> &FrameLayout {
2268 self.frame_layout
2269 .as_ref()
2270 .expect("frame layout not computed before prologue generation")
2271 }
2272
2273 /// Returns the full frame size for the given function, after prologue
2274 /// emission has run. This comprises the spill slots and stack-storage
2275 /// slots as well as storage for clobbered callee-save registers, but
2276 /// not arguments arguments pushed at callsites within this function,
2277 /// or other ephemeral pushes.
2278 pub fn frame_size(&self) -> u32 {
2279 let frame_layout = self.frame_layout();
2280 frame_layout.clobber_size + frame_layout.fixed_frame_storage_size
2281 }
2282
2283 /// Returns offset from the slot base in the current frame to the caller's SP.
2284 pub fn slot_base_to_caller_sp_offset(&self) -> u32 {
2285 let frame_layout = self.frame_layout();
2286 frame_layout.clobber_size
2287 + frame_layout.fixed_frame_storage_size
2288 + frame_layout.setup_area_size
2289 }
2290
2291 /// Returns the size of arguments expected on the stack.
2292 pub fn stack_args_size(&self, sigs: &SigSet) -> u32 {
2293 sigs[self.sig].sized_stack_arg_space
2294 }
2295
2296 /// Get the spill-slot size.
2297 pub fn get_spillslot_size(&self, rc: RegClass) -> u32 {
2298 let max = if self.dynamic_type_sizes.len() == 0 {
2299 16
2300 } else {
2301 *self
2302 .dynamic_type_sizes
2303 .iter()
2304 .max_by(|x, y| x.1.cmp(&y.1))
2305 .map(|(_k, v)| v)
2306 .unwrap()
2307 };
2308 M::get_number_of_spillslots_for_value(rc, max, &self.isa_flags)
2309 }
2310
2311 /// Get the spill slot offset relative to the fixed allocation area start.
2312 pub fn get_spillslot_offset(&self, slot: SpillSlot) -> i64 {
2313 // Offset from beginning of spillslot area.
2314 let islot = slot.index() as i64;
2315 let spill_off = islot * M::word_bytes() as i64;
2316 let sp_off = self.stackslots_size as i64 + spill_off;
2317
2318 sp_off
2319 }
2320
2321 /// Generate a spill.
2322 pub fn gen_spill(&self, to_slot: SpillSlot, from_reg: RealReg) -> M::I {
2323 let ty = M::I::canonical_type_for_rc(from_reg.class());
2324 debug_assert_eq!(<M>::I::rc_for_type(ty).unwrap().1, &[ty]);
2325
2326 let sp_off = self.get_spillslot_offset(to_slot);
2327 trace!("gen_spill: {from_reg:?} into slot {to_slot:?} at offset {sp_off}");
2328
2329 let from = StackAMode::Slot(sp_off);
2330 <M>::gen_store_stack(from, Reg::from(from_reg), ty)
2331 }
2332
2333 /// Generate a reload (fill).
2334 pub fn gen_reload(&self, to_reg: Writable<RealReg>, from_slot: SpillSlot) -> M::I {
2335 let ty = M::I::canonical_type_for_rc(to_reg.to_reg().class());
2336 debug_assert_eq!(<M>::I::rc_for_type(ty).unwrap().1, &[ty]);
2337
2338 let sp_off = self.get_spillslot_offset(from_slot);
2339 trace!("gen_reload: {to_reg:?} from slot {from_slot:?} at offset {sp_off}");
2340
2341 let from = StackAMode::Slot(sp_off);
2342 <M>::gen_load_stack(from, to_reg.map(Reg::from), ty)
2343 }
2344}
2345
2346/// An input argument to a call instruction: the vreg that is used,
2347/// and the preg it is constrained to (per the ABI).
2348#[derive(Clone, Debug)]
2349pub struct CallArgPair {
2350 /// The virtual register to use for the argument.
2351 pub vreg: Reg,
2352 /// The real register into which the arg goes.
2353 pub preg: Reg,
2354}
2355
2356/// An output return value from a call instruction: the vreg that is
2357/// defined, and the preg or stack location it is constrained to (per
2358/// the ABI).
2359#[derive(Clone, Debug)]
2360pub struct CallRetPair {
2361 /// The virtual register to define from this return value.
2362 pub vreg: Writable<Reg>,
2363 /// The real register from which the return value is read.
2364 pub location: RetLocation,
2365}
2366
2367/// A location to load a return-value from after a call completes.
2368#[derive(Clone, Debug, PartialEq, Eq)]
2369pub enum RetLocation {
2370 /// A physical register.
2371 Reg(Reg, Type),
2372 /// A stack location, identified by a `StackAMode`.
2373 Stack(StackAMode, Type),
2374}
2375
2376pub type CallArgList = SmallVec<[CallArgPair; 8]>;
2377pub type CallRetList = SmallVec<[CallRetPair; 8]>;
2378
2379impl<T> CallInfo<T> {
2380 /// Emit loads for any stack-carried return values using the call
2381 /// info and allocations.
2382 pub fn emit_retval_loads<
2383 M: ABIMachineSpec,
2384 EmitFn: FnMut(M::I),
2385 IslandFn: Fn(u32) -> Option<M::I>,
2386 >(
2387 &self,
2388 stackslots_size: u32,
2389 mut emit: EmitFn,
2390 emit_island: IslandFn,
2391 ) {
2392 // Count stack-ret locations and emit an island to account for
2393 // this space usage.
2394 let mut space_needed = 0;
2395 for CallRetPair { location, .. } in &self.defs {
2396 if let RetLocation::Stack(..) = location {
2397 // Assume up to ten instructions, semi-arbitrarily:
2398 // load from stack, store to spillslot, codegen of
2399 // large offsets on RISC ISAs.
2400 space_needed += 10 * M::I::worst_case_size();
2401 }
2402 }
2403 if space_needed > 0 {
2404 if let Some(island_inst) = emit_island(space_needed) {
2405 emit(island_inst);
2406 }
2407 }
2408
2409 let temp = M::retval_temp_reg(self.callee_conv);
2410 // The temporary must be noted as clobbered.
2411 debug_assert!(M::get_regs_clobbered_by_call(
2412 self.callee_conv,
2413 self.try_call_info.is_some()
2414 )
2415 .contains(PReg::from(temp.to_reg().to_real_reg().unwrap())));
2416
2417 for CallRetPair { vreg, location } in &self.defs {
2418 match location {
2419 RetLocation::Reg(preg, ..) => {
2420 // The temporary must not also be an actual return
2421 // value register.
2422 debug_assert!(*preg != temp.to_reg());
2423 }
2424 RetLocation::Stack(amode, ty) => {
2425 if let Some(spillslot) = vreg.to_reg().to_spillslot() {
2426 // `temp` is an integer register of machine word
2427 // width, but `ty` may be floating-point/vector,
2428 // which (i) may not be loadable directly into an
2429 // int reg, and (ii) may be wider than a machine
2430 // word. For simplicity, and because there are not
2431 // always easy choices for volatile float/vec regs
2432 // (see e.g. x86-64, where fastcall clobbers only
2433 // xmm0-xmm5, but tail uses xmm0-xmm7 for
2434 // returns), we use the integer temp register in
2435 // steps.
2436 let parts = (ty.bytes() + M::word_bytes() - 1) / M::word_bytes();
2437 for part in 0..parts {
2438 emit(M::gen_load_stack(
2439 amode.offset_by(part * M::word_bytes()),
2440 temp,
2441 M::word_type(),
2442 ));
2443 emit(M::gen_store_stack(
2444 StackAMode::Slot(
2445 i64::from(stackslots_size)
2446 + i64::from(M::word_bytes())
2447 * ((spillslot.index() as i64) + (part as i64)),
2448 ),
2449 temp.to_reg(),
2450 M::word_type(),
2451 ));
2452 }
2453 } else {
2454 assert_ne!(*vreg, temp);
2455 emit(M::gen_load_stack(*amode, *vreg, *ty));
2456 }
2457 }
2458 }
2459 }
2460 }
2461}
2462
2463#[cfg(test)]
2464mod tests {
2465 use super::SigData;
2466
2467 #[test]
2468 fn sig_data_size() {
2469 // The size of `SigData` is performance sensitive, so make sure
2470 // we don't regress it unintentionally.
2471 assert_eq!(std::mem::size_of::<SigData>(), 24);
2472 }
2473}