cranelift_codegen/machinst/abi.rs
1//! Implementation of a vanilla ABI, shared between several machines. The
2//! implementation here assumes that arguments will be passed in registers
3//! first, then additional args on the stack; that the stack grows downward,
4//! contains a standard frame (return address and frame pointer), and the
5//! compiler is otherwise free to allocate space below that with its choice of
6//! layout; and that the machine has some notion of caller- and callee-save
7//! registers. Most modern machines, e.g. x86-64 and AArch64, should fit this
8//! mold and thus both of these backends use this shared implementation.
9//!
10//! See the documentation in specific machine backends for the "instantiation"
11//! of this generic ABI, i.e., which registers are caller/callee-save, arguments
12//! and return values, and any other special requirements.
13//!
14//! For now the implementation here assumes a 64-bit machine, but we intend to
15//! make this 32/64-bit-generic shortly.
16//!
17//! # Vanilla ABI
18//!
19//! First, arguments and return values are passed in registers up to a certain
20//! fixed count, after which they overflow onto the stack. Multiple return
21//! values either fit in registers, or are returned in a separate return-value
22//! area on the stack, given by a hidden extra parameter.
23//!
24//! Note that the exact stack layout is up to us. We settled on the
25//! below design based on several requirements. In particular, we need
26//! to be able to generate instructions (or instruction sequences) to
27//! access arguments, stack slots, and spill slots before we know how
28//! many spill slots or clobber-saves there will be, because of our
29//! pass structure. We also prefer positive offsets to negative
30//! offsets because of an asymmetry in some machines' addressing modes
31//! (e.g., on AArch64, positive offsets have a larger possible range
32//! without a long-form sequence to synthesize an arbitrary
33//! offset). We also need clobber-save registers to be "near" the
34//! frame pointer: Windows unwind information requires it to be within
35//! 240 bytes of RBP. Finally, it is not allowed to access memory
36//! below the current SP value.
37//!
38//! We assume that a prologue first pushes the frame pointer (and
39//! return address above that, if the machine does not do that in
40//! hardware). We set FP to point to this two-word frame record. We
41//! store all other frame slots below this two-word frame record, as
42//! well as enough space for arguments to the largest possible
43//! function call. The stack pointer then remains at this position
44//! for the duration of the function, allowing us to address all
45//! frame storage at positive offsets from SP.
46//!
47//! Note that if we ever support dynamic stack-space allocation (for
48//! `alloca`), we will need a way to reference spill slots and stack
49//! slots relative to a dynamic SP, because we will no longer be able
50//! to know a static offset from SP to the slots at any particular
51//! program point. Probably the best solution at that point will be to
52//! revert to using the frame pointer as the reference for all slots,
53//! to allow generating spill/reload and stackslot accesses before we
54//! know how large the clobber-saves will be.
55//!
56//! # Stack Layout
57//!
58//! The stack looks like:
59//!
60//! ```plain
61//! (high address)
62//! | ... |
63//! | caller frames |
64//! | ... |
65//! +===========================+
66//! | ... |
67//! | stack args |
68//! Canonical Frame Address --> | (accessed via FP) |
69//! +---------------------------+
70//! SP at function entry -----> | return address |
71//! +---------------------------+
72//! FP after prologue --------> | FP (pushed by prologue) |
73//! +---------------------------+ -----
74//! | ... | |
75//! | clobbered callee-saves | |
76//! unwind-frame base --------> | (pushed by prologue) | |
77//! +---------------------------+ ----- |
78//! | ... | | |
79//! | spill slots | | |
80//! | (accessed via SP) | fixed active
81//! | ... | frame size
82//! | stack slots | storage |
83//! | (accessed via SP) | size |
84//! | (alloc'd by prologue) | | |
85//! +---------------------------+ ----- |
86//! | [alignment as needed] | |
87//! | ... | |
88//! | args for largest call | |
89//! SP -----------------------> | (alloc'd by prologue) | |
90//! +===========================+ -----
91//!
92//! (low address)
93//! ```
94//!
95//! # Multi-value Returns
96//!
97//! We support multi-value returns by using multiple return-value
98//! registers. In some cases this is an extension of the base system
99//! ABI. See each platform's `abi.rs` implementation for details.
100
101use crate::CodegenError;
102use crate::entity::SecondaryMap;
103use crate::ir::{ArgumentExtension, ArgumentPurpose, ExceptionTag, Signature};
104use crate::ir::{StackSlotKey, types::*};
105use crate::isa::TargetIsa;
106use crate::settings::ProbestackStrategy;
107use crate::{ir, isa};
108use crate::{machinst::*, trace};
109use alloc::boxed::Box;
110use regalloc2::{MachineEnv, PReg, PRegSet};
111use rustc_hash::FxHashMap;
112use smallvec::smallvec;
113use std::collections::HashMap;
114use std::marker::PhantomData;
115
116/// A small vector of instructions (with some reasonable size); appropriate for
117/// a small fixed sequence implementing one operation.
118pub type SmallInstVec<I> = SmallVec<[I; 4]>;
119
120/// A type used by backends to track argument-binding info in the "args"
121/// pseudoinst. The pseudoinst holds a vec of `ArgPair` structs.
122#[derive(Clone, Debug)]
123pub struct ArgPair {
124 /// The vreg that is defined by this args pseudoinst.
125 pub vreg: Writable<Reg>,
126 /// The preg that the arg arrives in; this constrains the vreg's
127 /// placement at the pseudoinst.
128 pub preg: Reg,
129}
130
131/// A type used by backends to track return register binding info in the "ret"
132/// pseudoinst. The pseudoinst holds a vec of `RetPair` structs.
133#[derive(Clone, Debug)]
134pub struct RetPair {
135 /// The vreg that is returned by this pseudionst.
136 pub vreg: Reg,
137 /// The preg that the arg is returned through; this constrains the vreg's
138 /// placement at the pseudoinst.
139 pub preg: Reg,
140}
141
142/// A location for (part of) an argument or return value. These "storage slots"
143/// are specified for each register-sized part of an argument.
144#[derive(Clone, Copy, Debug, PartialEq, Eq)]
145pub enum ABIArgSlot {
146 /// In a real register.
147 Reg {
148 /// Register that holds this arg.
149 reg: RealReg,
150 /// Value type of this arg.
151 ty: ir::Type,
152 /// Should this arg be zero- or sign-extended?
153 extension: ir::ArgumentExtension,
154 },
155 /// Arguments only: on stack, at given offset from SP at entry.
156 Stack {
157 /// Offset of this arg relative to the base of stack args.
158 offset: i64,
159 /// Value type of this arg.
160 ty: ir::Type,
161 /// Should this arg be zero- or sign-extended?
162 extension: ir::ArgumentExtension,
163 },
164}
165
166impl ABIArgSlot {
167 /// The type of the value that will be stored in this slot.
168 pub fn get_type(&self) -> ir::Type {
169 match self {
170 ABIArgSlot::Reg { ty, .. } => *ty,
171 ABIArgSlot::Stack { ty, .. } => *ty,
172 }
173 }
174}
175
176/// A vector of `ABIArgSlot`s. Inline capacity for one element because basically
177/// 100% of values use one slot. Only `i128`s need multiple slots, and they are
178/// super rare (and never happen with Wasm).
179pub type ABIArgSlotVec = SmallVec<[ABIArgSlot; 1]>;
180
181/// An ABIArg is composed of one or more parts. This allows for a CLIF-level
182/// Value to be passed with its parts in more than one location at the ABI
183/// level. For example, a 128-bit integer may be passed in two 64-bit registers,
184/// or even a 64-bit register and a 64-bit stack slot, on a 64-bit machine. The
185/// number of "parts" should correspond to the number of registers used to store
186/// this type according to the machine backend.
187///
188/// As an invariant, the `purpose` for every part must match. As a further
189/// invariant, a `StructArg` part cannot appear with any other part.
190#[derive(Clone, Debug)]
191pub enum ABIArg {
192 /// Storage slots (registers or stack locations) for each part of the
193 /// argument value. The number of slots must equal the number of register
194 /// parts used to store a value of this type.
195 Slots {
196 /// Slots, one per register part.
197 slots: ABIArgSlotVec,
198 /// Purpose of this arg.
199 purpose: ir::ArgumentPurpose,
200 },
201 /// Structure argument. We reserve stack space for it, but the CLIF-level
202 /// semantics are a little weird: the value passed to the call instruction,
203 /// and received in the corresponding block param, is a *pointer*. On the
204 /// caller side, we memcpy the data from the passed-in pointer to the stack
205 /// area; on the callee side, we compute a pointer to this stack area and
206 /// provide that as the argument's value.
207 StructArg {
208 /// Offset of this arg relative to base of stack args.
209 offset: i64,
210 /// Size of this arg on the stack.
211 size: u64,
212 /// Purpose of this arg.
213 purpose: ir::ArgumentPurpose,
214 },
215 /// Implicit argument. Similar to a StructArg, except that we have the
216 /// target type, not a pointer type, at the CLIF-level. This argument is
217 /// still being passed via reference implicitly.
218 ImplicitPtrArg {
219 /// Register or stack slot holding a pointer to the buffer.
220 pointer: ABIArgSlot,
221 /// Offset of the argument buffer.
222 offset: i64,
223 /// Type of the implicit argument.
224 ty: Type,
225 /// Purpose of this arg.
226 purpose: ir::ArgumentPurpose,
227 },
228}
229
230impl ABIArg {
231 /// Create an ABIArg from one register.
232 pub fn reg(
233 reg: RealReg,
234 ty: ir::Type,
235 extension: ir::ArgumentExtension,
236 purpose: ir::ArgumentPurpose,
237 ) -> ABIArg {
238 ABIArg::Slots {
239 slots: smallvec![ABIArgSlot::Reg { reg, ty, extension }],
240 purpose,
241 }
242 }
243
244 /// Create an ABIArg from one stack slot.
245 pub fn stack(
246 offset: i64,
247 ty: ir::Type,
248 extension: ir::ArgumentExtension,
249 purpose: ir::ArgumentPurpose,
250 ) -> ABIArg {
251 ABIArg::Slots {
252 slots: smallvec![ABIArgSlot::Stack {
253 offset,
254 ty,
255 extension,
256 }],
257 purpose,
258 }
259 }
260}
261
262/// Are we computing information about arguments or return values? Much of the
263/// handling is factored out into common routines; this enum allows us to
264/// distinguish which case we're handling.
265#[derive(Clone, Copy, Debug, PartialEq, Eq)]
266pub enum ArgsOrRets {
267 /// Arguments.
268 Args,
269 /// Return values.
270 Rets,
271}
272
273/// Abstract location for a machine-specific ABI impl to translate into the
274/// appropriate addressing mode.
275#[derive(Clone, Copy, Debug, PartialEq, Eq)]
276pub enum StackAMode {
277 /// Offset into the current frame's argument area.
278 IncomingArg(i64, u32),
279 /// Offset within the stack slots in the current frame.
280 Slot(i64),
281 /// Offset into the callee frame's argument area.
282 OutgoingArg(i64),
283}
284
285impl StackAMode {
286 fn offset_by(&self, offset: u32) -> Self {
287 match self {
288 StackAMode::IncomingArg(off, size) => {
289 StackAMode::IncomingArg(off.checked_add(i64::from(offset)).unwrap(), *size)
290 }
291 StackAMode::Slot(off) => StackAMode::Slot(off.checked_add(i64::from(offset)).unwrap()),
292 StackAMode::OutgoingArg(off) => {
293 StackAMode::OutgoingArg(off.checked_add(i64::from(offset)).unwrap())
294 }
295 }
296 }
297}
298
299/// Trait implemented by machine-specific backend to represent ISA flags.
300pub trait IsaFlags: Clone {
301 /// Get a flag indicating whether forward-edge CFI is enabled.
302 fn is_forward_edge_cfi_enabled(&self) -> bool {
303 false
304 }
305}
306
307/// Used as an out-parameter to accumulate a sequence of `ABIArg`s in
308/// `ABIMachineSpec::compute_arg_locs`. Wraps the shared allocation for all
309/// `ABIArg`s in `SigSet` and exposes just the args for the current
310/// `compute_arg_locs` call.
311pub struct ArgsAccumulator<'a> {
312 sig_set_abi_args: &'a mut Vec<ABIArg>,
313 start: usize,
314 non_formal_flag: bool,
315}
316
317impl<'a> ArgsAccumulator<'a> {
318 fn new(sig_set_abi_args: &'a mut Vec<ABIArg>) -> Self {
319 let start = sig_set_abi_args.len();
320 ArgsAccumulator {
321 sig_set_abi_args,
322 start,
323 non_formal_flag: false,
324 }
325 }
326
327 #[inline]
328 pub fn push(&mut self, arg: ABIArg) {
329 debug_assert!(!self.non_formal_flag);
330 self.sig_set_abi_args.push(arg)
331 }
332
333 #[inline]
334 pub fn push_non_formal(&mut self, arg: ABIArg) {
335 self.non_formal_flag = true;
336 self.sig_set_abi_args.push(arg)
337 }
338
339 #[inline]
340 pub fn args(&self) -> &[ABIArg] {
341 &self.sig_set_abi_args[self.start..]
342 }
343
344 #[inline]
345 pub fn args_mut(&mut self) -> &mut [ABIArg] {
346 &mut self.sig_set_abi_args[self.start..]
347 }
348}
349
350/// Trait implemented by machine-specific backend to provide information about
351/// register assignments and to allow generating the specific instructions for
352/// stack loads/saves, prologues/epilogues, etc.
353pub trait ABIMachineSpec {
354 /// The instruction type.
355 type I: VCodeInst;
356
357 /// The ISA flags type.
358 type F: IsaFlags;
359
360 /// This is the limit for the size of argument and return-value areas on the
361 /// stack. We place a reasonable limit here to avoid integer overflow issues
362 /// with 32-bit arithmetic.
363 const STACK_ARG_RET_SIZE_LIMIT: u32;
364
365 /// Returns the number of bits in a word, that is 32/64 for 32/64-bit architecture.
366 fn word_bits() -> u32;
367
368 /// Returns the number of bytes in a word.
369 fn word_bytes() -> u32 {
370 return Self::word_bits() / 8;
371 }
372
373 /// Returns word-size integer type.
374 fn word_type() -> Type {
375 match Self::word_bits() {
376 32 => I32,
377 64 => I64,
378 _ => unreachable!(),
379 }
380 }
381
382 /// Returns word register class.
383 fn word_reg_class() -> RegClass {
384 RegClass::Int
385 }
386
387 /// Returns required stack alignment in bytes.
388 fn stack_align(call_conv: isa::CallConv) -> u32;
389
390 /// Process a list of parameters or return values and allocate them to registers
391 /// and stack slots.
392 ///
393 /// The argument locations should be pushed onto the given `ArgsAccumulator`
394 /// in order. Any extra arguments added (such as return area pointers)
395 /// should come at the end of the list so that the first N lowered
396 /// parameters align with the N clif parameters.
397 ///
398 /// Returns the stack-space used (rounded up to as alignment requires), and
399 /// if `add_ret_area_ptr` was passed, the index of the extra synthetic arg
400 /// that was added.
401 fn compute_arg_locs(
402 call_conv: isa::CallConv,
403 flags: &settings::Flags,
404 params: &[ir::AbiParam],
405 args_or_rets: ArgsOrRets,
406 add_ret_area_ptr: bool,
407 args: ArgsAccumulator,
408 ) -> CodegenResult<(u32, Option<usize>)>;
409
410 /// Generate a load from the stack.
411 fn gen_load_stack(mem: StackAMode, into_reg: Writable<Reg>, ty: Type) -> Self::I;
412
413 /// Generate a store to the stack.
414 fn gen_store_stack(mem: StackAMode, from_reg: Reg, ty: Type) -> Self::I;
415
416 /// Generate a move.
417 fn gen_move(to_reg: Writable<Reg>, from_reg: Reg, ty: Type) -> Self::I;
418
419 /// Generate an integer-extend operation.
420 fn gen_extend(
421 to_reg: Writable<Reg>,
422 from_reg: Reg,
423 is_signed: bool,
424 from_bits: u8,
425 to_bits: u8,
426 ) -> Self::I;
427
428 /// Generate an "args" pseudo-instruction to capture input args in
429 /// registers.
430 fn gen_args(args: Vec<ArgPair>) -> Self::I;
431
432 /// Generate a "rets" pseudo-instruction that moves vregs to return
433 /// registers.
434 fn gen_rets(rets: Vec<RetPair>) -> Self::I;
435
436 /// Generate an add-with-immediate. Note that even if this uses a scratch
437 /// register, it must satisfy two requirements:
438 ///
439 /// - The add-imm sequence must only clobber caller-save registers that are
440 /// not used for arguments, because it will be placed in the prologue
441 /// before the clobbered callee-save registers are saved.
442 ///
443 /// - The add-imm sequence must work correctly when `from_reg` and/or
444 /// `into_reg` are the register returned by `get_stacklimit_reg()`.
445 fn gen_add_imm(
446 call_conv: isa::CallConv,
447 into_reg: Writable<Reg>,
448 from_reg: Reg,
449 imm: u32,
450 ) -> SmallInstVec<Self::I>;
451
452 /// Generate a sequence that traps with a `TrapCode::StackOverflow` code if
453 /// the stack pointer is less than the given limit register (assuming the
454 /// stack grows downward).
455 fn gen_stack_lower_bound_trap(limit_reg: Reg) -> SmallInstVec<Self::I>;
456
457 /// Generate an instruction to compute an address of a stack slot (FP- or
458 /// SP-based offset).
459 fn gen_get_stack_addr(mem: StackAMode, into_reg: Writable<Reg>) -> Self::I;
460
461 /// Get a fixed register to use to compute a stack limit. This is needed for
462 /// certain sequences generated after the register allocator has already
463 /// run. This must satisfy two requirements:
464 ///
465 /// - It must be a caller-save register that is not used for arguments,
466 /// because it will be clobbered in the prologue before the clobbered
467 /// callee-save registers are saved.
468 ///
469 /// - It must be safe to pass as an argument and/or destination to
470 /// `gen_add_imm()`. This is relevant when an addition with a large
471 /// immediate needs its own temporary; it cannot use the same fixed
472 /// temporary as this one.
473 fn get_stacklimit_reg(call_conv: isa::CallConv) -> Reg;
474
475 /// Generate a load to the given [base+offset] address.
476 fn gen_load_base_offset(into_reg: Writable<Reg>, base: Reg, offset: i32, ty: Type) -> Self::I;
477
478 /// Generate a store from the given [base+offset] address.
479 fn gen_store_base_offset(base: Reg, offset: i32, from_reg: Reg, ty: Type) -> Self::I;
480
481 /// Adjust the stack pointer up or down.
482 fn gen_sp_reg_adjust(amount: i32) -> SmallInstVec<Self::I>;
483
484 /// Compute a FrameLayout structure containing a sorted list of all clobbered
485 /// registers that are callee-saved according to the ABI, as well as the sizes
486 /// of all parts of the stack frame. The result is used to emit the prologue
487 /// and epilogue routines.
488 fn compute_frame_layout(
489 call_conv: isa::CallConv,
490 flags: &settings::Flags,
491 sig: &Signature,
492 regs: &[Writable<RealReg>],
493 function_calls: FunctionCalls,
494 incoming_args_size: u32,
495 tail_args_size: u32,
496 stackslots_size: u32,
497 fixed_frame_storage_size: u32,
498 outgoing_args_size: u32,
499 ) -> FrameLayout;
500
501 /// Generate the usual frame-setup sequence for this architecture: e.g.,
502 /// `push rbp / mov rbp, rsp` on x86-64, or `stp fp, lr, [sp, #-16]!` on
503 /// AArch64.
504 fn gen_prologue_frame_setup(
505 call_conv: isa::CallConv,
506 flags: &settings::Flags,
507 isa_flags: &Self::F,
508 frame_layout: &FrameLayout,
509 ) -> SmallInstVec<Self::I>;
510
511 /// Generate the usual frame-restore sequence for this architecture.
512 fn gen_epilogue_frame_restore(
513 call_conv: isa::CallConv,
514 flags: &settings::Flags,
515 isa_flags: &Self::F,
516 frame_layout: &FrameLayout,
517 ) -> SmallInstVec<Self::I>;
518
519 /// Generate a return instruction.
520 fn gen_return(
521 call_conv: isa::CallConv,
522 isa_flags: &Self::F,
523 frame_layout: &FrameLayout,
524 ) -> SmallInstVec<Self::I>;
525
526 /// Generate a probestack call.
527 fn gen_probestack(insts: &mut SmallInstVec<Self::I>, frame_size: u32);
528
529 /// Generate a inline stack probe.
530 fn gen_inline_probestack(
531 insts: &mut SmallInstVec<Self::I>,
532 call_conv: isa::CallConv,
533 frame_size: u32,
534 guard_size: u32,
535 );
536
537 /// Generate a clobber-save sequence. The implementation here should return
538 /// a sequence of instructions that "push" or otherwise save to the stack all
539 /// registers written/modified by the function body that are callee-saved.
540 /// The sequence of instructions should adjust the stack pointer downward,
541 /// and should align as necessary according to ABI requirements.
542 fn gen_clobber_save(
543 call_conv: isa::CallConv,
544 flags: &settings::Flags,
545 frame_layout: &FrameLayout,
546 ) -> SmallVec<[Self::I; 16]>;
547
548 /// Generate a clobber-restore sequence. This sequence should perform the
549 /// opposite of the clobber-save sequence generated above, assuming that SP
550 /// going into the sequence is at the same point that it was left when the
551 /// clobber-save sequence finished.
552 fn gen_clobber_restore(
553 call_conv: isa::CallConv,
554 flags: &settings::Flags,
555 frame_layout: &FrameLayout,
556 ) -> SmallVec<[Self::I; 16]>;
557
558 /// Generate a memcpy invocation. Used to set up struct
559 /// args. Takes `src`, `dst` as read-only inputs and passes a temporary
560 /// allocator.
561 fn gen_memcpy<F: FnMut(Type) -> Writable<Reg>>(
562 call_conv: isa::CallConv,
563 dst: Reg,
564 src: Reg,
565 size: usize,
566 alloc_tmp: F,
567 ) -> SmallVec<[Self::I; 8]>;
568
569 /// Get the number of spillslots required for the given register-class.
570 fn get_number_of_spillslots_for_value(
571 rc: RegClass,
572 target_vector_bytes: u32,
573 isa_flags: &Self::F,
574 ) -> u32;
575
576 /// Get the ABI-dependent MachineEnv for managing register allocation.
577 fn get_machine_env(flags: &settings::Flags, call_conv: isa::CallConv) -> &MachineEnv;
578
579 /// Get all caller-save registers, that is, registers that we expect
580 /// not to be saved across a call to a callee with the given ABI.
581 fn get_regs_clobbered_by_call(
582 call_conv_of_callee: isa::CallConv,
583 is_exception: bool,
584 ) -> PRegSet;
585
586 /// Get the needed extension mode, given the mode attached to the argument
587 /// in the signature and the calling convention. The input (the attribute in
588 /// the signature) specifies what extension type should be done *if* the ABI
589 /// requires extension to the full register; this method's return value
590 /// indicates whether the extension actually *will* be done.
591 fn get_ext_mode(
592 call_conv: isa::CallConv,
593 specified: ir::ArgumentExtension,
594 ) -> ir::ArgumentExtension;
595
596 /// Get a temporary register that is available to use after a call
597 /// completes and that does not interfere with register-carried
598 /// return values. This is used to move stack-carried return
599 /// values directly into spillslots if needed.
600 fn retval_temp_reg(call_conv_of_callee: isa::CallConv) -> Writable<Reg>;
601
602 /// Get the exception payload registers, if any, for a calling
603 /// convention.
604 ///
605 /// Note that the argument here is the calling convention of the *callee*.
606 /// This might differ from the caller but the exceptional payloads that are
607 /// available are defined by the callee, not the caller.
608 fn exception_payload_regs(callee_conv: isa::CallConv) -> &'static [Reg] {
609 let _ = callee_conv;
610 &[]
611 }
612}
613
614/// Out-of-line data for calls, to keep the size of `Inst` down.
615#[derive(Clone, Debug)]
616pub struct CallInfo<T> {
617 /// Receiver of this call
618 pub dest: T,
619 /// Register uses of this call.
620 pub uses: CallArgList,
621 /// Register defs of this call.
622 pub defs: CallRetList,
623 /// Registers clobbered by this call, as per its calling convention.
624 pub clobbers: PRegSet,
625 /// The calling convention of the callee.
626 pub callee_conv: isa::CallConv,
627 /// The calling convention of the caller.
628 pub caller_conv: isa::CallConv,
629 /// The number of bytes that the callee will pop from the stack for the
630 /// caller, if any. (Used for popping stack arguments with the `tail`
631 /// calling convention.)
632 pub callee_pop_size: u32,
633 /// Information for a try-call, if this is one. We combine
634 /// handling of calls and try-calls as much as possible to share
635 /// argument/return logic; they mostly differ in the metadata that
636 /// they emit, which this information feeds into.
637 pub try_call_info: Option<TryCallInfo>,
638}
639
640/// Out-of-line information present on `try_call` instructions only:
641/// information that is used to generate exception-handling tables and
642/// link up to destination blocks properly.
643#[derive(Clone, Debug)]
644pub struct TryCallInfo {
645 /// The target to jump to on a normal returhn.
646 pub continuation: MachLabel,
647 /// Exception tags to catch and corresponding destination labels.
648 pub exception_handlers: Box<[TryCallHandler]>,
649}
650
651/// Information about an individual handler at a try-call site.
652#[derive(Clone, Debug)]
653pub enum TryCallHandler {
654 /// If the tag matches (given the current context), recover at the
655 /// label.
656 Tag(ExceptionTag, MachLabel),
657 /// Recover at the label unconditionally.
658 Default(MachLabel),
659 /// Set the dynamic context for interpreting tags at this point in
660 /// the handler list.
661 Context(Reg),
662}
663
664impl<T> CallInfo<T> {
665 /// Creates an empty set of info with no clobbers/uses/etc with the
666 /// specified ABI
667 pub fn empty(dest: T, call_conv: isa::CallConv) -> CallInfo<T> {
668 CallInfo {
669 dest,
670 uses: smallvec![],
671 defs: smallvec![],
672 clobbers: PRegSet::empty(),
673 caller_conv: call_conv,
674 callee_conv: call_conv,
675 callee_pop_size: 0,
676 try_call_info: None,
677 }
678 }
679}
680
681/// The id of an ABI signature within the `SigSet`.
682#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
683pub struct Sig(u32);
684cranelift_entity::entity_impl!(Sig);
685
686impl Sig {
687 fn prev(self) -> Option<Sig> {
688 self.0.checked_sub(1).map(Sig)
689 }
690}
691
692/// ABI information shared between body (callee) and caller.
693#[derive(Clone, Debug)]
694pub struct SigData {
695 /// Currently both return values and arguments are stored in a continuous space vector
696 /// in `SigSet::abi_args`.
697 ///
698 /// ```plain
699 /// +----------------------------------------------+
700 /// | return values |
701 /// | ... |
702 /// rets_end --> +----------------------------------------------+
703 /// | arguments |
704 /// | ... |
705 /// args_end --> +----------------------------------------------+
706 ///
707 /// ```
708 ///
709 /// Note we only store two offsets as rets_end == args_start, and rets_start == prev.args_end.
710 ///
711 /// Argument location ending offset (regs or stack slots). Stack offsets are relative to
712 /// SP on entry to function.
713 ///
714 /// This is a index into the `SigSet::abi_args`.
715 args_end: u32,
716
717 /// Return-value location ending offset. Stack offsets are relative to the return-area
718 /// pointer.
719 ///
720 /// This is a index into the `SigSet::abi_args`.
721 rets_end: u32,
722
723 /// Space on stack used to store arguments. We're storing the size in u32 to
724 /// reduce the size of the struct.
725 sized_stack_arg_space: u32,
726
727 /// Space on stack used to store return values. We're storing the size in u32 to
728 /// reduce the size of the struct.
729 sized_stack_ret_space: u32,
730
731 /// Index in `args` of the stack-return-value-area argument.
732 stack_ret_arg: Option<u16>,
733
734 /// Calling convention used.
735 call_conv: isa::CallConv,
736}
737
738impl SigData {
739 /// Get total stack space required for arguments.
740 pub fn sized_stack_arg_space(&self) -> u32 {
741 self.sized_stack_arg_space
742 }
743
744 /// Get total stack space required for return values.
745 pub fn sized_stack_ret_space(&self) -> u32 {
746 self.sized_stack_ret_space
747 }
748
749 /// Get calling convention used.
750 pub fn call_conv(&self) -> isa::CallConv {
751 self.call_conv
752 }
753
754 /// The index of the stack-return-value-area argument, if any.
755 pub fn stack_ret_arg(&self) -> Option<u16> {
756 self.stack_ret_arg
757 }
758}
759
760/// A (mostly) deduplicated set of ABI signatures.
761///
762/// We say "mostly" because we do not dedupe between signatures interned via
763/// `ir::SigRef` (direct and indirect calls; the vast majority of signatures in
764/// this set) vs via `ir::Signature` (the callee itself and libcalls). Doing
765/// this final bit of deduplication would require filling out the
766/// `ir_signature_to_abi_sig`, which is a bunch of allocations (not just the
767/// hash map itself but params and returns vecs in each signature) that we want
768/// to avoid.
769///
770/// In general, prefer using the `ir::SigRef`-taking methods to the
771/// `ir::Signature`-taking methods when you can get away with it, as they don't
772/// require cloning non-copy types that will trigger heap allocations.
773///
774/// This type can be indexed by `Sig` to access its associated `SigData`.
775pub struct SigSet {
776 /// Interned `ir::Signature`s that we already have an ABI signature for.
777 ir_signature_to_abi_sig: FxHashMap<ir::Signature, Sig>,
778
779 /// Interned `ir::SigRef`s that we already have an ABI signature for.
780 ir_sig_ref_to_abi_sig: SecondaryMap<ir::SigRef, Option<Sig>>,
781
782 /// A single, shared allocation for all `ABIArg`s used by all
783 /// `SigData`s. Each `SigData` references its args/rets via indices into
784 /// this allocation.
785 abi_args: Vec<ABIArg>,
786
787 /// The actual ABI signatures, keyed by `Sig`.
788 sigs: PrimaryMap<Sig, SigData>,
789}
790
791impl SigSet {
792 /// Construct a new `SigSet`, interning all of the signatures used by the
793 /// given function.
794 pub fn new<M>(func: &ir::Function, flags: &settings::Flags) -> CodegenResult<Self>
795 where
796 M: ABIMachineSpec,
797 {
798 let arg_estimate = func.dfg.signatures.len() * 6;
799
800 let mut sigs = SigSet {
801 ir_signature_to_abi_sig: FxHashMap::default(),
802 ir_sig_ref_to_abi_sig: SecondaryMap::with_capacity(func.dfg.signatures.len()),
803 abi_args: Vec::with_capacity(arg_estimate),
804 sigs: PrimaryMap::with_capacity(1 + func.dfg.signatures.len()),
805 };
806
807 sigs.make_abi_sig_from_ir_signature::<M>(func.signature.clone(), flags)?;
808 for sig_ref in func.dfg.signatures.keys() {
809 sigs.make_abi_sig_from_ir_sig_ref::<M>(sig_ref, &func.dfg, flags)?;
810 }
811
812 Ok(sigs)
813 }
814
815 /// Have we already interned an ABI signature for the given `ir::Signature`?
816 pub fn have_abi_sig_for_signature(&self, signature: &ir::Signature) -> bool {
817 self.ir_signature_to_abi_sig.contains_key(signature)
818 }
819
820 /// Construct and intern an ABI signature for the given `ir::Signature`.
821 pub fn make_abi_sig_from_ir_signature<M>(
822 &mut self,
823 signature: ir::Signature,
824 flags: &settings::Flags,
825 ) -> CodegenResult<Sig>
826 where
827 M: ABIMachineSpec,
828 {
829 // Because the `HashMap` entry API requires taking ownership of the
830 // lookup key -- and we want to avoid unnecessary clones of
831 // `ir::Signature`s, even at the cost of duplicate lookups -- we can't
832 // have a single, get-or-create-style method for interning
833 // `ir::Signature`s into ABI signatures. So at least (debug) assert that
834 // we aren't creating duplicate ABI signatures for the same
835 // `ir::Signature`.
836 debug_assert!(!self.have_abi_sig_for_signature(&signature));
837
838 let sig_data = self.from_func_sig::<M>(&signature, flags)?;
839 let sig = self.sigs.push(sig_data);
840 self.ir_signature_to_abi_sig.insert(signature, sig);
841 Ok(sig)
842 }
843
844 fn make_abi_sig_from_ir_sig_ref<M>(
845 &mut self,
846 sig_ref: ir::SigRef,
847 dfg: &ir::DataFlowGraph,
848 flags: &settings::Flags,
849 ) -> CodegenResult<Sig>
850 where
851 M: ABIMachineSpec,
852 {
853 if let Some(sig) = self.ir_sig_ref_to_abi_sig[sig_ref] {
854 return Ok(sig);
855 }
856 let signature = &dfg.signatures[sig_ref];
857 let sig_data = self.from_func_sig::<M>(signature, flags)?;
858 let sig = self.sigs.push(sig_data);
859 self.ir_sig_ref_to_abi_sig[sig_ref] = Some(sig);
860 Ok(sig)
861 }
862
863 /// Get the already-interned ABI signature id for the given `ir::SigRef`.
864 pub fn abi_sig_for_sig_ref(&self, sig_ref: ir::SigRef) -> Sig {
865 self.ir_sig_ref_to_abi_sig[sig_ref]
866 .expect("must call `make_abi_sig_from_ir_sig_ref` before `get_abi_sig_for_sig_ref`")
867 }
868
869 /// Get the already-interned ABI signature id for the given `ir::Signature`.
870 pub fn abi_sig_for_signature(&self, signature: &ir::Signature) -> Sig {
871 self.ir_signature_to_abi_sig
872 .get(signature)
873 .copied()
874 .expect("must call `make_abi_sig_from_ir_signature` before `get_abi_sig_for_signature`")
875 }
876
877 pub fn from_func_sig<M: ABIMachineSpec>(
878 &mut self,
879 sig: &ir::Signature,
880 flags: &settings::Flags,
881 ) -> CodegenResult<SigData> {
882 // Keep in sync with ensure_struct_return_ptr_is_returned
883 if sig.uses_special_return(ArgumentPurpose::StructReturn) {
884 panic!("Explicit StructReturn return value not allowed: {sig:?}")
885 }
886 let tmp;
887 let returns = if let Some(struct_ret_index) =
888 sig.special_param_index(ArgumentPurpose::StructReturn)
889 {
890 if !sig.returns.is_empty() {
891 panic!("No return values are allowed when using StructReturn: {sig:?}");
892 }
893 tmp = [sig.params[struct_ret_index]];
894 &tmp
895 } else {
896 sig.returns.as_slice()
897 };
898
899 // Compute args and retvals from signature. Handle retvals first,
900 // because we may need to add a return-area arg to the args.
901
902 // NOTE: We rely on the order of the args (rets -> args) inserted to compute the offsets in
903 // `SigSet::args()` and `SigSet::rets()`. Therefore, we cannot change the two
904 // compute_arg_locs order.
905 let (sized_stack_ret_space, _) = M::compute_arg_locs(
906 sig.call_conv,
907 flags,
908 &returns,
909 ArgsOrRets::Rets,
910 /* extra ret-area ptr = */ false,
911 ArgsAccumulator::new(&mut self.abi_args),
912 )?;
913 if !flags.enable_multi_ret_implicit_sret() {
914 assert_eq!(sized_stack_ret_space, 0);
915 }
916 let rets_end = u32::try_from(self.abi_args.len()).unwrap();
917
918 // To avoid overflow issues, limit the return size to something reasonable.
919 if sized_stack_ret_space > M::STACK_ARG_RET_SIZE_LIMIT {
920 return Err(CodegenError::ImplLimitExceeded);
921 }
922
923 let need_stack_return_area = sized_stack_ret_space > 0;
924 if need_stack_return_area {
925 assert!(!sig.uses_special_param(ir::ArgumentPurpose::StructReturn));
926 }
927
928 let (sized_stack_arg_space, stack_ret_arg) = M::compute_arg_locs(
929 sig.call_conv,
930 flags,
931 &sig.params,
932 ArgsOrRets::Args,
933 need_stack_return_area,
934 ArgsAccumulator::new(&mut self.abi_args),
935 )?;
936 let args_end = u32::try_from(self.abi_args.len()).unwrap();
937
938 // To avoid overflow issues, limit the arg size to something reasonable.
939 if sized_stack_arg_space > M::STACK_ARG_RET_SIZE_LIMIT {
940 return Err(CodegenError::ImplLimitExceeded);
941 }
942
943 trace!(
944 "ABISig: sig {:?} => args end = {} rets end = {}
945 arg stack = {} ret stack = {} stack_ret_arg = {:?}",
946 sig,
947 args_end,
948 rets_end,
949 sized_stack_arg_space,
950 sized_stack_ret_space,
951 need_stack_return_area,
952 );
953
954 let stack_ret_arg = stack_ret_arg.map(|s| u16::try_from(s).unwrap());
955 Ok(SigData {
956 args_end,
957 rets_end,
958 sized_stack_arg_space,
959 sized_stack_ret_space,
960 stack_ret_arg,
961 call_conv: sig.call_conv,
962 })
963 }
964
965 /// Get this signature's ABI arguments.
966 pub fn args(&self, sig: Sig) -> &[ABIArg] {
967 let sig_data = &self.sigs[sig];
968 // Please see comments in `SigSet::from_func_sig` of how we store the offsets.
969 let start = usize::try_from(sig_data.rets_end).unwrap();
970 let end = usize::try_from(sig_data.args_end).unwrap();
971 &self.abi_args[start..end]
972 }
973
974 /// Get information specifying how to pass the implicit pointer
975 /// to the return-value area on the stack, if required.
976 pub fn get_ret_arg(&self, sig: Sig) -> Option<ABIArg> {
977 let sig_data = &self.sigs[sig];
978 if let Some(i) = sig_data.stack_ret_arg {
979 Some(self.args(sig)[usize::from(i)].clone())
980 } else {
981 None
982 }
983 }
984
985 /// Get information specifying how to pass one argument.
986 pub fn get_arg(&self, sig: Sig, idx: usize) -> ABIArg {
987 self.args(sig)[idx].clone()
988 }
989
990 /// Get this signature's ABI returns.
991 pub fn rets(&self, sig: Sig) -> &[ABIArg] {
992 let sig_data = &self.sigs[sig];
993 // Please see comments in `SigSet::from_func_sig` of how we store the offsets.
994 let start = usize::try_from(sig.prev().map_or(0, |prev| self.sigs[prev].args_end)).unwrap();
995 let end = usize::try_from(sig_data.rets_end).unwrap();
996 &self.abi_args[start..end]
997 }
998
999 /// Get information specifying how to pass one return value.
1000 pub fn get_ret(&self, sig: Sig, idx: usize) -> ABIArg {
1001 self.rets(sig)[idx].clone()
1002 }
1003
1004 /// Get the number of arguments expected.
1005 pub fn num_args(&self, sig: Sig) -> usize {
1006 let len = self.args(sig).len();
1007 if self.sigs[sig].stack_ret_arg.is_some() {
1008 len - 1
1009 } else {
1010 len
1011 }
1012 }
1013
1014 /// Get the number of return values expected.
1015 pub fn num_rets(&self, sig: Sig) -> usize {
1016 self.rets(sig).len()
1017 }
1018}
1019
1020// NB: we do _not_ implement `IndexMut` because these signatures are
1021// deduplicated and shared!
1022impl std::ops::Index<Sig> for SigSet {
1023 type Output = SigData;
1024
1025 fn index(&self, sig: Sig) -> &Self::Output {
1026 &self.sigs[sig]
1027 }
1028}
1029
1030/// Structure describing the layout of a function's stack frame.
1031#[derive(Clone, Debug, Default)]
1032pub struct FrameLayout {
1033 /// Word size in bytes, so this struct can be
1034 /// monomorphic/independent of `ABIMachineSpec`.
1035 pub word_bytes: u32,
1036
1037 /// N.B. The areas whose sizes are given in this structure fully
1038 /// cover the current function's stack frame, from high to low
1039 /// stack addresses in the sequence below. Each size contains
1040 /// any alignment padding that may be required by the ABI.
1041
1042 /// Size of incoming arguments on the stack. This is not technically
1043 /// part of this function's frame, but code in the function will still
1044 /// need to access it. Depending on the ABI, we may need to set up a
1045 /// frame pointer to do so; we also may need to pop this area from the
1046 /// stack upon return.
1047 pub incoming_args_size: u32,
1048
1049 /// The size of the incoming argument area, taking into account any
1050 /// potential increase in size required for tail calls present in the
1051 /// function. In the case that no tail calls are present, this value
1052 /// will be the same as [`Self::incoming_args_size`].
1053 pub tail_args_size: u32,
1054
1055 /// Size of the "setup area", typically holding the return address
1056 /// and/or the saved frame pointer. This may be written either during
1057 /// the call itself (e.g. a pushed return address) or by code emitted
1058 /// from gen_prologue_frame_setup. In any case, after that code has
1059 /// completed execution, the stack pointer is expected to point to the
1060 /// bottom of this area. The same holds at the start of code emitted
1061 /// by gen_epilogue_frame_restore.
1062 pub setup_area_size: u32,
1063
1064 /// Size of the area used to save callee-saved clobbered registers.
1065 /// This area is accessed by code emitted from gen_clobber_save and
1066 /// gen_clobber_restore.
1067 pub clobber_size: u32,
1068
1069 /// Storage allocated for the fixed part of the stack frame.
1070 /// This contains stack slots and spill slots.
1071 pub fixed_frame_storage_size: u32,
1072
1073 /// The size of all stackslots.
1074 pub stackslots_size: u32,
1075
1076 /// Stack size to be reserved for outgoing arguments, if used by
1077 /// the current ABI, or 0 otherwise. After gen_clobber_save and
1078 /// before gen_clobber_restore, the stack pointer points to the
1079 /// bottom of this area.
1080 pub outgoing_args_size: u32,
1081
1082 /// Sorted list of callee-saved registers that are clobbered
1083 /// according to the ABI. These registers will be saved and
1084 /// restored by gen_clobber_save and gen_clobber_restore.
1085 pub clobbered_callee_saves: Vec<Writable<RealReg>>,
1086
1087 /// The function's call pattern classification.
1088 pub function_calls: FunctionCalls,
1089}
1090
1091impl FrameLayout {
1092 /// Split the clobbered callee-save registers into integer-class and
1093 /// float-class groups.
1094 ///
1095 /// This method does not currently support vector-class callee-save
1096 /// registers because no current backend has them.
1097 pub fn clobbered_callee_saves_by_class(&self) -> (&[Writable<RealReg>], &[Writable<RealReg>]) {
1098 let (ints, floats) = self.clobbered_callee_saves.split_at(
1099 self.clobbered_callee_saves
1100 .partition_point(|r| r.to_reg().class() == RegClass::Int),
1101 );
1102 debug_assert!(floats.iter().all(|r| r.to_reg().class() == RegClass::Float));
1103 (ints, floats)
1104 }
1105
1106 /// The size of FP to SP while the frame is active (not during prologue
1107 /// setup or epilogue tear down).
1108 pub fn active_size(&self) -> u32 {
1109 self.outgoing_args_size + self.fixed_frame_storage_size + self.clobber_size
1110 }
1111
1112 /// Get the offset from the SP to the sized stack slots area.
1113 pub fn sp_to_sized_stack_slots(&self) -> u32 {
1114 self.outgoing_args_size
1115 }
1116
1117 /// Get the offset of a spill slot from SP.
1118 pub fn spillslot_offset(&self, spillslot: SpillSlot) -> i64 {
1119 // Offset from beginning of spillslot area.
1120 let islot = spillslot.index() as i64;
1121 let spill_off = islot * self.word_bytes as i64;
1122 let sp_off = self.stackslots_size as i64 + spill_off;
1123
1124 sp_off
1125 }
1126
1127 /// Get the offset from SP up to FP.
1128 pub fn sp_to_fp(&self) -> u32 {
1129 self.outgoing_args_size + self.fixed_frame_storage_size + self.clobber_size
1130 }
1131}
1132
1133/// ABI object for a function body.
1134pub struct Callee<M: ABIMachineSpec> {
1135 /// CLIF-level signature, possibly normalized.
1136 ir_sig: ir::Signature,
1137 /// Signature: arg and retval regs.
1138 sig: Sig,
1139 /// Defined dynamic types.
1140 dynamic_type_sizes: HashMap<Type, u32>,
1141 /// Offsets to each dynamic stackslot.
1142 dynamic_stackslots: PrimaryMap<DynamicStackSlot, u32>,
1143 /// Offsets to each sized stackslot.
1144 sized_stackslots: PrimaryMap<StackSlot, u32>,
1145 /// Descriptors for sized stackslots.
1146 sized_stackslot_keys: SecondaryMap<StackSlot, Option<StackSlotKey>>,
1147 /// Total stack size of all stackslots
1148 stackslots_size: u32,
1149 /// Stack size to be reserved for outgoing arguments.
1150 outgoing_args_size: u32,
1151 /// Initially the number of bytes originating in the callers frame where stack arguments will
1152 /// live. After lowering this number may be larger than the size expected by the function being
1153 /// compiled, as tail calls potentially require more space for stack arguments.
1154 tail_args_size: u32,
1155 /// Register-argument defs, to be provided to the `args`
1156 /// pseudo-inst, and pregs to constrain them to.
1157 reg_args: Vec<ArgPair>,
1158 /// Finalized frame layout for this function.
1159 frame_layout: Option<FrameLayout>,
1160 /// The register holding the return-area pointer, if needed.
1161 ret_area_ptr: Option<Reg>,
1162 /// Calling convention this function expects.
1163 call_conv: isa::CallConv,
1164 /// The settings controlling this function's compilation.
1165 flags: settings::Flags,
1166 /// The ISA-specific flag values controlling this function's compilation.
1167 isa_flags: M::F,
1168 /// If this function has a stack limit specified, then `Reg` is where the
1169 /// stack limit will be located after the instructions specified have been
1170 /// executed.
1171 ///
1172 /// Note that this is intended for insertion into the prologue, if
1173 /// present. Also note that because the instructions here execute in the
1174 /// prologue this happens after legalization/register allocation/etc so we
1175 /// need to be extremely careful with each instruction. The instructions are
1176 /// manually register-allocated and carefully only use caller-saved
1177 /// registers and keep nothing live after this sequence of instructions.
1178 stack_limit: Option<(Reg, SmallInstVec<M::I>)>,
1179
1180 _mach: PhantomData<M>,
1181}
1182
1183fn get_special_purpose_param_register(
1184 f: &ir::Function,
1185 sigs: &SigSet,
1186 sig: Sig,
1187 purpose: ir::ArgumentPurpose,
1188) -> Option<Reg> {
1189 let idx = f.signature.special_param_index(purpose)?;
1190 match &sigs.args(sig)[idx] {
1191 &ABIArg::Slots { ref slots, .. } => match &slots[0] {
1192 &ABIArgSlot::Reg { reg, .. } => Some(reg.into()),
1193 _ => None,
1194 },
1195 _ => None,
1196 }
1197}
1198
1199fn checked_round_up(val: u32, mask: u32) -> Option<u32> {
1200 Some(val.checked_add(mask)? & !mask)
1201}
1202
1203impl<M: ABIMachineSpec> Callee<M> {
1204 /// Create a new body ABI instance.
1205 pub fn new(
1206 f: &ir::Function,
1207 isa: &dyn TargetIsa,
1208 isa_flags: &M::F,
1209 sigs: &SigSet,
1210 ) -> CodegenResult<Self> {
1211 trace!("ABI: func signature {:?}", f.signature);
1212
1213 let flags = isa.flags().clone();
1214 let sig = sigs.abi_sig_for_signature(&f.signature);
1215
1216 let call_conv = f.signature.call_conv;
1217 // Only these calling conventions are supported.
1218 debug_assert!(
1219 call_conv == isa::CallConv::SystemV
1220 || call_conv == isa::CallConv::Tail
1221 || call_conv == isa::CallConv::Fast
1222 || call_conv == isa::CallConv::Cold
1223 || call_conv == isa::CallConv::WindowsFastcall
1224 || call_conv == isa::CallConv::AppleAarch64
1225 || call_conv == isa::CallConv::Winch
1226 || call_conv == isa::CallConv::Patchable,
1227 "Unsupported calling convention: {call_conv:?}"
1228 );
1229
1230 // Compute sized stackslot locations and total stackslot size.
1231 let mut end_offset: u32 = 0;
1232 let mut sized_stackslots = PrimaryMap::new();
1233 let mut sized_stackslot_keys = SecondaryMap::new();
1234
1235 for (stackslot, data) in f.sized_stack_slots.iter() {
1236 // We start our computation possibly unaligned where the previous
1237 // stackslot left off.
1238 let unaligned_start_offset = end_offset;
1239
1240 // The start of the stackslot must be aligned.
1241 //
1242 // We always at least machine-word-align slots, but also
1243 // satisfy the user's requested alignment.
1244 debug_assert!(data.align_shift < 32);
1245 let align = std::cmp::max(M::word_bytes(), 1u32 << data.align_shift);
1246 let mask = align - 1;
1247 let start_offset = checked_round_up(unaligned_start_offset, mask)
1248 .ok_or(CodegenError::ImplLimitExceeded)?;
1249
1250 // The end offset is the start offset increased by the size
1251 end_offset = start_offset
1252 .checked_add(data.size)
1253 .ok_or(CodegenError::ImplLimitExceeded)?;
1254
1255 debug_assert_eq!(stackslot.as_u32() as usize, sized_stackslots.len());
1256 sized_stackslots.push(start_offset);
1257 sized_stackslot_keys[stackslot] = data.key;
1258 }
1259
1260 // Compute dynamic stackslot locations and total stackslot size.
1261 let mut dynamic_stackslots = PrimaryMap::new();
1262 for (stackslot, data) in f.dynamic_stack_slots.iter() {
1263 debug_assert_eq!(stackslot.as_u32() as usize, dynamic_stackslots.len());
1264
1265 // This computation is similar to the stackslots above
1266 let unaligned_start_offset = end_offset;
1267
1268 let mask = M::word_bytes() - 1;
1269 let start_offset = checked_round_up(unaligned_start_offset, mask)
1270 .ok_or(CodegenError::ImplLimitExceeded)?;
1271
1272 let ty = f.get_concrete_dynamic_ty(data.dyn_ty).ok_or_else(|| {
1273 CodegenError::Unsupported(format!("invalid dynamic vector type: {}", data.dyn_ty))
1274 })?;
1275
1276 end_offset = start_offset
1277 .checked_add(isa.dynamic_vector_bytes(ty))
1278 .ok_or(CodegenError::ImplLimitExceeded)?;
1279
1280 dynamic_stackslots.push(start_offset);
1281 }
1282
1283 // The size of the stackslots needs to be word aligned
1284 let stackslots_size = checked_round_up(end_offset, M::word_bytes() - 1)
1285 .ok_or(CodegenError::ImplLimitExceeded)?;
1286
1287 let mut dynamic_type_sizes = HashMap::with_capacity(f.dfg.dynamic_types.len());
1288 for (dyn_ty, _data) in f.dfg.dynamic_types.iter() {
1289 let ty = f
1290 .get_concrete_dynamic_ty(dyn_ty)
1291 .unwrap_or_else(|| panic!("invalid dynamic vector type: {dyn_ty}"));
1292 let size = isa.dynamic_vector_bytes(ty);
1293 dynamic_type_sizes.insert(ty, size);
1294 }
1295
1296 // Figure out what instructions, if any, will be needed to check the
1297 // stack limit. This can either be specified as a special-purpose
1298 // argument or as a global value which often calculates the stack limit
1299 // from the arguments.
1300 let stack_limit = f
1301 .stack_limit
1302 .map(|gv| gen_stack_limit::<M>(f, sigs, sig, gv));
1303
1304 let tail_args_size = sigs[sig].sized_stack_arg_space;
1305
1306 Ok(Self {
1307 ir_sig: ensure_struct_return_ptr_is_returned(&f.signature),
1308 sig,
1309 dynamic_stackslots,
1310 dynamic_type_sizes,
1311 sized_stackslots,
1312 sized_stackslot_keys,
1313 stackslots_size,
1314 outgoing_args_size: 0,
1315 tail_args_size,
1316 reg_args: vec![],
1317 frame_layout: None,
1318 ret_area_ptr: None,
1319 call_conv,
1320 flags,
1321 isa_flags: isa_flags.clone(),
1322 stack_limit,
1323 _mach: PhantomData,
1324 })
1325 }
1326
1327 /// Inserts instructions necessary for checking the stack limit into the
1328 /// prologue.
1329 ///
1330 /// This function will generate instructions necessary for perform a stack
1331 /// check at the header of a function. The stack check is intended to trap
1332 /// if the stack pointer goes below a particular threshold, preventing stack
1333 /// overflow in wasm or other code. The `stack_limit` argument here is the
1334 /// register which holds the threshold below which we're supposed to trap.
1335 /// This function is known to allocate `stack_size` bytes and we'll push
1336 /// instructions onto `insts`.
1337 ///
1338 /// Note that the instructions generated here are special because this is
1339 /// happening so late in the pipeline (e.g. after register allocation). This
1340 /// means that we need to do manual register allocation here and also be
1341 /// careful to not clobber any callee-saved or argument registers. For now
1342 /// this routine makes do with the `spilltmp_reg` as one temporary
1343 /// register, and a second register of `tmp2` which is caller-saved. This
1344 /// should be fine for us since no spills should happen in this sequence of
1345 /// instructions, so our register won't get accidentally clobbered.
1346 ///
1347 /// No values can be live after the prologue, but in this case that's ok
1348 /// because we just need to perform a stack check before progressing with
1349 /// the rest of the function.
1350 fn insert_stack_check(
1351 &self,
1352 stack_limit: Reg,
1353 stack_size: u32,
1354 insts: &mut SmallInstVec<M::I>,
1355 ) {
1356 // With no explicit stack allocated we can just emit the simple check of
1357 // the stack registers against the stack limit register, and trap if
1358 // it's out of bounds.
1359 if stack_size == 0 {
1360 insts.extend(M::gen_stack_lower_bound_trap(stack_limit));
1361 return;
1362 }
1363
1364 // Note that the 32k stack size here is pretty special. See the
1365 // documentation in x86/abi.rs for why this is here. The general idea is
1366 // that we're protecting against overflow in the addition that happens
1367 // below.
1368 if stack_size >= 32 * 1024 {
1369 insts.extend(M::gen_stack_lower_bound_trap(stack_limit));
1370 }
1371
1372 // Add the `stack_size` to `stack_limit`, placing the result in
1373 // `scratch`.
1374 //
1375 // Note though that `stack_limit`'s register may be the same as
1376 // `scratch`. If our stack size doesn't fit into an immediate this
1377 // means we need a second scratch register for loading the stack size
1378 // into a register.
1379 let scratch = Writable::from_reg(M::get_stacklimit_reg(self.call_conv));
1380 insts.extend(M::gen_add_imm(
1381 self.call_conv,
1382 scratch,
1383 stack_limit,
1384 stack_size,
1385 ));
1386 insts.extend(M::gen_stack_lower_bound_trap(scratch.to_reg()));
1387 }
1388}
1389
1390/// Generates the instructions necessary for the `gv` to be materialized into a
1391/// register.
1392///
1393/// This function will return a register that will contain the result of
1394/// evaluating `gv`. It will also return any instructions necessary to calculate
1395/// the value of the register.
1396///
1397/// Note that global values are typically lowered to instructions via the
1398/// standard legalization pass. Unfortunately though prologue generation happens
1399/// so late in the pipeline that we can't use these legalization passes to
1400/// generate the instructions for `gv`. As a result we duplicate some lowering
1401/// of `gv` here and support only some global values. This is similar to what
1402/// the x86 backend does for now, and hopefully this can be somewhat cleaned up
1403/// in the future too!
1404///
1405/// Also note that this function will make use of `writable_spilltmp_reg()` as a
1406/// temporary register to store values in if necessary. Currently after we write
1407/// to this register there's guaranteed to be no spilled values between where
1408/// it's used, because we're not participating in register allocation anyway!
1409fn gen_stack_limit<M: ABIMachineSpec>(
1410 f: &ir::Function,
1411 sigs: &SigSet,
1412 sig: Sig,
1413 gv: ir::GlobalValue,
1414) -> (Reg, SmallInstVec<M::I>) {
1415 let mut insts = smallvec![];
1416 let reg = generate_gv::<M>(f, sigs, sig, gv, &mut insts);
1417 return (reg, insts);
1418}
1419
1420fn generate_gv<M: ABIMachineSpec>(
1421 f: &ir::Function,
1422 sigs: &SigSet,
1423 sig: Sig,
1424 gv: ir::GlobalValue,
1425 insts: &mut SmallInstVec<M::I>,
1426) -> Reg {
1427 match f.global_values[gv] {
1428 // Return the direct register the vmcontext is in
1429 ir::GlobalValueData::VMContext => {
1430 get_special_purpose_param_register(f, sigs, sig, ir::ArgumentPurpose::VMContext)
1431 .expect("no vmcontext parameter found")
1432 }
1433 // Load our base value into a register, then load from that register
1434 // in to a temporary register.
1435 ir::GlobalValueData::Load {
1436 base,
1437 offset,
1438 global_type: _,
1439 flags: _,
1440 } => {
1441 let base = generate_gv::<M>(f, sigs, sig, base, insts);
1442 let into_reg = Writable::from_reg(M::get_stacklimit_reg(f.stencil.signature.call_conv));
1443 insts.push(M::gen_load_base_offset(
1444 into_reg,
1445 base,
1446 offset.into(),
1447 M::word_type(),
1448 ));
1449 return into_reg.to_reg();
1450 }
1451 ref other => panic!("global value for stack limit not supported: {other}"),
1452 }
1453}
1454
1455/// Returns true if the signature needs to be legalized.
1456fn missing_struct_return(sig: &ir::Signature) -> bool {
1457 sig.uses_special_param(ArgumentPurpose::StructReturn)
1458 && !sig.uses_special_return(ArgumentPurpose::StructReturn)
1459}
1460
1461fn ensure_struct_return_ptr_is_returned(sig: &ir::Signature) -> ir::Signature {
1462 // Keep in sync with Callee::new
1463 let mut sig = sig.clone();
1464 if sig.uses_special_return(ArgumentPurpose::StructReturn) {
1465 panic!("Explicit StructReturn return value not allowed: {sig:?}")
1466 }
1467 if let Some(struct_ret_index) = sig.special_param_index(ArgumentPurpose::StructReturn) {
1468 if !sig.returns.is_empty() {
1469 panic!("No return values are allowed when using StructReturn: {sig:?}");
1470 }
1471 sig.returns.insert(0, sig.params[struct_ret_index]);
1472 }
1473 sig
1474}
1475
1476/// ### Pre-Regalloc Functions
1477///
1478/// These methods of `Callee` may only be called before regalloc.
1479impl<M: ABIMachineSpec> Callee<M> {
1480 /// Access the (possibly legalized) signature.
1481 pub fn signature(&self) -> &ir::Signature {
1482 debug_assert!(
1483 !missing_struct_return(&self.ir_sig),
1484 "`Callee::ir_sig` is always legalized"
1485 );
1486 &self.ir_sig
1487 }
1488
1489 /// Initialize. This is called after the Callee is constructed because it
1490 /// may allocate a temp vreg, which can only be allocated once the lowering
1491 /// context exists.
1492 pub fn init_retval_area(
1493 &mut self,
1494 sigs: &SigSet,
1495 vregs: &mut VRegAllocator<M::I>,
1496 ) -> CodegenResult<()> {
1497 if sigs[self.sig].stack_ret_arg.is_some() {
1498 let ret_area_ptr = vregs.alloc(M::word_type())?;
1499 self.ret_area_ptr = Some(ret_area_ptr.only_reg().unwrap());
1500 }
1501 Ok(())
1502 }
1503
1504 /// Get the return area pointer register, if any.
1505 pub fn ret_area_ptr(&self) -> Option<Reg> {
1506 self.ret_area_ptr
1507 }
1508
1509 /// Accumulate outgoing arguments.
1510 ///
1511 /// This ensures that at least `size` bytes are allocated in the prologue to
1512 /// be available for use in function calls to hold arguments and/or return
1513 /// values. If this function is called multiple times, the maximum of all
1514 /// `size` values will be available.
1515 pub fn accumulate_outgoing_args_size(&mut self, size: u32) {
1516 if size > self.outgoing_args_size {
1517 self.outgoing_args_size = size;
1518 }
1519 }
1520
1521 /// Accumulate the incoming argument area size requirements for a tail call,
1522 /// as it could be larger than the incoming arguments of the function
1523 /// currently being compiled.
1524 pub fn accumulate_tail_args_size(&mut self, size: u32) {
1525 if size > self.tail_args_size {
1526 self.tail_args_size = size;
1527 }
1528 }
1529
1530 pub fn is_forward_edge_cfi_enabled(&self) -> bool {
1531 self.isa_flags.is_forward_edge_cfi_enabled()
1532 }
1533
1534 /// Get the calling convention implemented by this ABI object.
1535 pub fn call_conv(&self) -> isa::CallConv {
1536 self.call_conv
1537 }
1538
1539 /// Get the ABI-dependent MachineEnv for managing register allocation.
1540 pub fn machine_env(&self) -> &MachineEnv {
1541 M::get_machine_env(&self.flags, self.call_conv)
1542 }
1543
1544 /// The offsets of all sized stack slots (not spill slots) for debuginfo purposes.
1545 pub fn sized_stackslot_offsets(&self) -> &PrimaryMap<StackSlot, u32> {
1546 &self.sized_stackslots
1547 }
1548
1549 /// The offsets of all dynamic stack slots (not spill slots) for debuginfo purposes.
1550 pub fn dynamic_stackslot_offsets(&self) -> &PrimaryMap<DynamicStackSlot, u32> {
1551 &self.dynamic_stackslots
1552 }
1553
1554 /// Generate an instruction which copies an argument to a destination
1555 /// register.
1556 pub fn gen_copy_arg_to_regs(
1557 &mut self,
1558 sigs: &SigSet,
1559 idx: usize,
1560 into_regs: ValueRegs<Writable<Reg>>,
1561 vregs: &mut VRegAllocator<M::I>,
1562 ) -> SmallInstVec<M::I> {
1563 let mut insts = smallvec![];
1564 let mut copy_arg_slot_to_reg = |slot: &ABIArgSlot, into_reg: &Writable<Reg>| {
1565 match slot {
1566 &ABIArgSlot::Reg { reg, .. } => {
1567 // Add a preg -> def pair to the eventual `args`
1568 // instruction. Extension mode doesn't matter
1569 // (we're copying out, not in; we ignore high bits
1570 // by convention).
1571 let arg = ArgPair {
1572 vreg: *into_reg,
1573 preg: reg.into(),
1574 };
1575 self.reg_args.push(arg);
1576 }
1577 &ABIArgSlot::Stack {
1578 offset,
1579 ty,
1580 extension,
1581 ..
1582 } => {
1583 // However, we have to respect the extension mode for stack
1584 // slots, or else we grab the wrong bytes on big-endian.
1585 let ext = M::get_ext_mode(sigs[self.sig].call_conv, extension);
1586 let ty =
1587 if ext != ArgumentExtension::None && M::word_bits() > ty_bits(ty) as u32 {
1588 M::word_type()
1589 } else {
1590 ty
1591 };
1592 insts.push(M::gen_load_stack(
1593 StackAMode::IncomingArg(offset, sigs[self.sig].sized_stack_arg_space),
1594 *into_reg,
1595 ty,
1596 ));
1597 }
1598 }
1599 };
1600
1601 match &sigs.args(self.sig)[idx] {
1602 &ABIArg::Slots { ref slots, .. } => {
1603 assert_eq!(into_regs.len(), slots.len());
1604 for (slot, into_reg) in slots.iter().zip(into_regs.regs().iter()) {
1605 copy_arg_slot_to_reg(&slot, &into_reg);
1606 }
1607 }
1608 &ABIArg::StructArg { offset, .. } => {
1609 let into_reg = into_regs.only_reg().unwrap();
1610 // Buffer address is implicitly defined by the ABI.
1611 insts.push(M::gen_get_stack_addr(
1612 StackAMode::IncomingArg(offset, sigs[self.sig].sized_stack_arg_space),
1613 into_reg,
1614 ));
1615 }
1616 &ABIArg::ImplicitPtrArg { pointer, ty, .. } => {
1617 let into_reg = into_regs.only_reg().unwrap();
1618 // We need to dereference the pointer.
1619 let base = match &pointer {
1620 &ABIArgSlot::Reg { reg, ty, .. } => {
1621 let tmp = vregs.alloc_with_deferred_error(ty).only_reg().unwrap();
1622 self.reg_args.push(ArgPair {
1623 vreg: Writable::from_reg(tmp),
1624 preg: reg.into(),
1625 });
1626 tmp
1627 }
1628 &ABIArgSlot::Stack { offset, ty, .. } => {
1629 let addr_reg = writable_value_regs(vregs.alloc_with_deferred_error(ty))
1630 .only_reg()
1631 .unwrap();
1632 insts.push(M::gen_load_stack(
1633 StackAMode::IncomingArg(offset, sigs[self.sig].sized_stack_arg_space),
1634 addr_reg,
1635 ty,
1636 ));
1637 addr_reg.to_reg()
1638 }
1639 };
1640 insts.push(M::gen_load_base_offset(into_reg, base, 0, ty));
1641 }
1642 }
1643 insts
1644 }
1645
1646 /// Generate an instruction which copies a source register to a return value slot.
1647 pub fn gen_copy_regs_to_retval(
1648 &self,
1649 sigs: &SigSet,
1650 idx: usize,
1651 from_regs: ValueRegs<Reg>,
1652 vregs: &mut VRegAllocator<M::I>,
1653 ) -> (SmallVec<[RetPair; 2]>, SmallInstVec<M::I>) {
1654 let mut reg_pairs = smallvec![];
1655 let mut ret = smallvec![];
1656 let word_bits = M::word_bits() as u8;
1657 match &sigs.rets(self.sig)[idx] {
1658 &ABIArg::Slots { ref slots, .. } => {
1659 assert_eq!(from_regs.len(), slots.len());
1660 for (slot, &from_reg) in slots.iter().zip(from_regs.regs().iter()) {
1661 match slot {
1662 &ABIArgSlot::Reg {
1663 reg, ty, extension, ..
1664 } => {
1665 let from_bits = ty_bits(ty) as u8;
1666 let ext = M::get_ext_mode(sigs[self.sig].call_conv, extension);
1667 let vreg = match (ext, from_bits) {
1668 (ir::ArgumentExtension::Uext, n)
1669 | (ir::ArgumentExtension::Sext, n)
1670 if n < word_bits =>
1671 {
1672 let signed = ext == ir::ArgumentExtension::Sext;
1673 let dst =
1674 writable_value_regs(vregs.alloc_with_deferred_error(ty))
1675 .only_reg()
1676 .unwrap();
1677 ret.push(M::gen_extend(
1678 dst, from_reg, signed, from_bits,
1679 /* to_bits = */ word_bits,
1680 ));
1681 dst.to_reg()
1682 }
1683 _ => {
1684 // No move needed, regalloc2 will emit it using the constraint
1685 // added by the RetPair.
1686 from_reg
1687 }
1688 };
1689 reg_pairs.push(RetPair {
1690 vreg,
1691 preg: Reg::from(reg),
1692 });
1693 }
1694 &ABIArgSlot::Stack {
1695 offset,
1696 ty,
1697 extension,
1698 ..
1699 } => {
1700 let mut ty = ty;
1701 let from_bits = ty_bits(ty) as u8;
1702 // A machine ABI implementation should ensure that stack frames
1703 // have "reasonable" size. All current ABIs for machinst
1704 // backends (aarch64 and x64) enforce a 128MB limit.
1705 let off = i32::try_from(offset).expect(
1706 "Argument stack offset greater than 2GB; should hit impl limit first",
1707 );
1708 let ext = M::get_ext_mode(sigs[self.sig].call_conv, extension);
1709 // Trash the from_reg; it should be its last use.
1710 match (ext, from_bits) {
1711 (ir::ArgumentExtension::Uext, n)
1712 | (ir::ArgumentExtension::Sext, n)
1713 if n < word_bits =>
1714 {
1715 assert_eq!(M::word_reg_class(), from_reg.class());
1716 let signed = ext == ir::ArgumentExtension::Sext;
1717 let dst =
1718 writable_value_regs(vregs.alloc_with_deferred_error(ty))
1719 .only_reg()
1720 .unwrap();
1721 ret.push(M::gen_extend(
1722 dst, from_reg, signed, from_bits,
1723 /* to_bits = */ word_bits,
1724 ));
1725 // Store the extended version.
1726 ty = M::word_type();
1727 }
1728 _ => {}
1729 };
1730 ret.push(M::gen_store_base_offset(
1731 self.ret_area_ptr.unwrap(),
1732 off,
1733 from_reg,
1734 ty,
1735 ));
1736 }
1737 }
1738 }
1739 }
1740 ABIArg::StructArg { .. } => {
1741 panic!("StructArg in return position is unsupported");
1742 }
1743 ABIArg::ImplicitPtrArg { .. } => {
1744 panic!("ImplicitPtrArg in return position is unsupported");
1745 }
1746 }
1747 (reg_pairs, ret)
1748 }
1749
1750 /// Generate any setup instruction needed to save values to the
1751 /// return-value area. This is usually used when were are multiple return
1752 /// values or an otherwise large return value that must be passed on the
1753 /// stack; typically the ABI specifies an extra hidden argument that is a
1754 /// pointer to that memory.
1755 pub fn gen_retval_area_setup(
1756 &mut self,
1757 sigs: &SigSet,
1758 vregs: &mut VRegAllocator<M::I>,
1759 ) -> Option<M::I> {
1760 if let Some(i) = sigs[self.sig].stack_ret_arg {
1761 let ret_area_ptr = Writable::from_reg(self.ret_area_ptr.unwrap());
1762 let insts =
1763 self.gen_copy_arg_to_regs(sigs, i.into(), ValueRegs::one(ret_area_ptr), vregs);
1764 insts.into_iter().next().map(|inst| {
1765 trace!(
1766 "gen_retval_area_setup: inst {:?}; ptr reg is {:?}",
1767 inst,
1768 ret_area_ptr.to_reg()
1769 );
1770 inst
1771 })
1772 } else {
1773 trace!("gen_retval_area_setup: not needed");
1774 None
1775 }
1776 }
1777
1778 /// Generate a return instruction.
1779 pub fn gen_rets(&self, rets: Vec<RetPair>) -> M::I {
1780 M::gen_rets(rets)
1781 }
1782
1783 /// Set up arguments values `args` for a call with signature `sig`.
1784 /// This will return a series of instructions to be emitted to set
1785 /// up all arguments, as well as a `CallArgList` list representing
1786 /// the arguments passed in registers. The latter need to be added
1787 /// as constraints to the actual call instruction.
1788 pub fn gen_call_args(
1789 &self,
1790 sigs: &SigSet,
1791 sig: Sig,
1792 args: &[ValueRegs<Reg>],
1793 is_tail_call: bool,
1794 flags: &settings::Flags,
1795 vregs: &mut VRegAllocator<M::I>,
1796 ) -> (CallArgList, SmallInstVec<M::I>) {
1797 let mut uses: CallArgList = smallvec![];
1798 let mut insts = smallvec![];
1799
1800 assert_eq!(args.len(), sigs.num_args(sig));
1801
1802 let call_conv = sigs[sig].call_conv;
1803 let stack_arg_space = sigs[sig].sized_stack_arg_space;
1804 let stack_arg = |offset| {
1805 if is_tail_call {
1806 StackAMode::IncomingArg(offset, stack_arg_space)
1807 } else {
1808 StackAMode::OutgoingArg(offset)
1809 }
1810 };
1811
1812 let word_ty = M::word_type();
1813 let word_rc = M::word_reg_class();
1814 let word_bits = M::word_bits() as usize;
1815
1816 if is_tail_call {
1817 debug_assert_eq!(
1818 self.call_conv,
1819 isa::CallConv::Tail,
1820 "Can only do `return_call`s from within a `tail` calling convention function"
1821 );
1822 }
1823
1824 // Helper to process a single argument slot (register or stack slot).
1825 // This will either add the register to the `uses` list or write the
1826 // value to the stack slot in the outgoing argument area (or for tail
1827 // calls, the incoming argument area).
1828 let mut process_arg_slot = |insts: &mut SmallInstVec<M::I>, slot, vreg, ty| {
1829 match &slot {
1830 &ABIArgSlot::Reg { reg, .. } => {
1831 uses.push(CallArgPair {
1832 vreg,
1833 preg: reg.into(),
1834 });
1835 }
1836 &ABIArgSlot::Stack { offset, .. } => {
1837 insts.push(M::gen_store_stack(stack_arg(offset), vreg, ty));
1838 }
1839 };
1840 };
1841
1842 // First pass: Handle `StructArg` arguments. These need to be copied
1843 // into their associated stack buffers. This should happen before any
1844 // of the other arguments are processed, as the `memcpy` call might
1845 // clobber registers used by other arguments.
1846 for (idx, from_regs) in args.iter().enumerate() {
1847 match &sigs.args(sig)[idx] {
1848 &ABIArg::Slots { .. } | &ABIArg::ImplicitPtrArg { .. } => {}
1849 &ABIArg::StructArg { offset, size, .. } => {
1850 let tmp = vregs.alloc_with_deferred_error(word_ty).only_reg().unwrap();
1851 insts.push(M::gen_get_stack_addr(
1852 stack_arg(offset),
1853 Writable::from_reg(tmp),
1854 ));
1855 insts.extend(M::gen_memcpy(
1856 isa::CallConv::for_libcall(flags, call_conv),
1857 tmp,
1858 from_regs.only_reg().unwrap(),
1859 size as usize,
1860 |ty| {
1861 Writable::from_reg(
1862 vregs.alloc_with_deferred_error(ty).only_reg().unwrap(),
1863 )
1864 },
1865 ));
1866 }
1867 }
1868 }
1869
1870 // Second pass: Handle everything except `StructArg` arguments.
1871 for (idx, from_regs) in args.iter().enumerate() {
1872 match sigs.args(sig)[idx] {
1873 ABIArg::Slots { ref slots, .. } => {
1874 assert_eq!(from_regs.len(), slots.len());
1875 for (slot, from_reg) in slots.iter().zip(from_regs.regs().iter()) {
1876 // Load argument slot value from `from_reg`, and perform any zero-
1877 // or sign-extension that is required by the ABI.
1878 let (ty, extension) = match *slot {
1879 ABIArgSlot::Reg { ty, extension, .. } => (ty, extension),
1880 ABIArgSlot::Stack { ty, extension, .. } => (ty, extension),
1881 };
1882 let ext = M::get_ext_mode(call_conv, extension);
1883 let (vreg, ty) = if ext != ir::ArgumentExtension::None
1884 && ty_bits(ty) < word_bits
1885 {
1886 assert_eq!(word_rc, from_reg.class());
1887 let signed = match ext {
1888 ir::ArgumentExtension::Uext => false,
1889 ir::ArgumentExtension::Sext => true,
1890 _ => unreachable!(),
1891 };
1892 let tmp = vregs.alloc_with_deferred_error(word_ty).only_reg().unwrap();
1893 insts.push(M::gen_extend(
1894 Writable::from_reg(tmp),
1895 *from_reg,
1896 signed,
1897 ty_bits(ty) as u8,
1898 word_bits as u8,
1899 ));
1900 (tmp, word_ty)
1901 } else {
1902 (*from_reg, ty)
1903 };
1904 process_arg_slot(&mut insts, *slot, vreg, ty);
1905 }
1906 }
1907 ABIArg::ImplicitPtrArg {
1908 offset,
1909 pointer,
1910 ty,
1911 ..
1912 } => {
1913 let vreg = from_regs.only_reg().unwrap();
1914 let tmp = vregs.alloc_with_deferred_error(word_ty).only_reg().unwrap();
1915 insts.push(M::gen_get_stack_addr(
1916 stack_arg(offset),
1917 Writable::from_reg(tmp),
1918 ));
1919 insts.push(M::gen_store_base_offset(tmp, 0, vreg, ty));
1920 process_arg_slot(&mut insts, pointer, tmp, word_ty);
1921 }
1922 ABIArg::StructArg { .. } => {}
1923 }
1924 }
1925
1926 // Finally, set the stack-return pointer to the return argument area.
1927 // For tail calls, this means forwarding the incoming stack-return pointer.
1928 if let Some(ret_arg) = sigs.get_ret_arg(sig) {
1929 let ret_area = if is_tail_call {
1930 self.ret_area_ptr.expect(
1931 "if the tail callee has a return pointer, then the tail caller must as well",
1932 )
1933 } else {
1934 let tmp = vregs.alloc_with_deferred_error(word_ty).only_reg().unwrap();
1935 let amode = StackAMode::OutgoingArg(stack_arg_space.into());
1936 insts.push(M::gen_get_stack_addr(amode, Writable::from_reg(tmp)));
1937 tmp
1938 };
1939 match ret_arg {
1940 // The return pointer must occupy a single slot.
1941 ABIArg::Slots { slots, .. } => {
1942 assert_eq!(slots.len(), 1);
1943 process_arg_slot(&mut insts, slots[0], ret_area, word_ty);
1944 }
1945 _ => unreachable!(),
1946 }
1947 }
1948
1949 (uses, insts)
1950 }
1951
1952 /// Set up return values `outputs` for a call with signature `sig`.
1953 /// This does not emit (or return) any instructions, but returns a
1954 /// `CallRetList` representing the return value constraints. This
1955 /// needs to be added to the actual call instruction.
1956 ///
1957 /// If `try_call_payloads` is non-zero, it is expected to hold
1958 /// exception payload registers for try_call instructions. These
1959 /// will be added as needed to the `CallRetList` as well.
1960 pub fn gen_call_rets(
1961 &self,
1962 sigs: &SigSet,
1963 sig: Sig,
1964 outputs: &[ValueRegs<Reg>],
1965 try_call_payloads: Option<&[Writable<Reg>]>,
1966 vregs: &mut VRegAllocator<M::I>,
1967 ) -> CallRetList {
1968 let callee_conv = sigs[sig].call_conv;
1969 let stack_arg_space = sigs[sig].sized_stack_arg_space;
1970
1971 let word_ty = M::word_type();
1972 let word_bits = M::word_bits() as usize;
1973
1974 let mut defs: CallRetList = smallvec![];
1975 let mut outputs = outputs.into_iter();
1976 let num_rets = sigs.num_rets(sig);
1977 for idx in 0..num_rets {
1978 let ret = sigs.rets(sig)[idx].clone();
1979 match ret {
1980 ABIArg::Slots {
1981 ref slots, purpose, ..
1982 } => {
1983 // We do not use the returned copy of the return buffer pointer,
1984 // so skip any StructReturn returns that may be present.
1985 if purpose == ArgumentPurpose::StructReturn {
1986 continue;
1987 }
1988 let retval_regs = outputs.next().unwrap();
1989 assert_eq!(retval_regs.len(), slots.len());
1990 for (slot, retval_reg) in slots.iter().zip(retval_regs.regs().iter()) {
1991 // We do not perform any extension because we're copying out, not in,
1992 // and we ignore high bits in our own registers by convention. However,
1993 // we still need to use the proper extended type to access stack slots
1994 // (this is critical on big-endian systems).
1995 let (ty, extension) = match *slot {
1996 ABIArgSlot::Reg { ty, extension, .. } => (ty, extension),
1997 ABIArgSlot::Stack { ty, extension, .. } => (ty, extension),
1998 };
1999 let ext = M::get_ext_mode(callee_conv, extension);
2000 let ty = if ext != ir::ArgumentExtension::None && ty_bits(ty) < word_bits {
2001 word_ty
2002 } else {
2003 ty
2004 };
2005
2006 match slot {
2007 &ABIArgSlot::Reg { reg, .. } => {
2008 defs.push(CallRetPair {
2009 vreg: Writable::from_reg(*retval_reg),
2010 location: RetLocation::Reg(reg.into(), ty),
2011 });
2012 }
2013 &ABIArgSlot::Stack { offset, .. } => {
2014 let amode =
2015 StackAMode::OutgoingArg(offset + i64::from(stack_arg_space));
2016 defs.push(CallRetPair {
2017 vreg: Writable::from_reg(*retval_reg),
2018 location: RetLocation::Stack(amode, ty),
2019 });
2020 }
2021 }
2022 }
2023 }
2024 ABIArg::StructArg { .. } => {
2025 panic!("StructArg not supported in return position");
2026 }
2027 ABIArg::ImplicitPtrArg { .. } => {
2028 panic!("ImplicitPtrArg not supported in return position");
2029 }
2030 }
2031 }
2032 assert!(outputs.next().is_none());
2033
2034 if let Some(try_call_payloads) = try_call_payloads {
2035 // Let `M` say where the payload values are going to end up and then
2036 // double-check it's the same size as the calling convention's
2037 // reported number of exception types.
2038 let pregs = M::exception_payload_regs(callee_conv);
2039 assert_eq!(
2040 callee_conv.exception_payload_types(M::word_type()).len(),
2041 pregs.len()
2042 );
2043
2044 // We need to update `defs` to contain the exception
2045 // payload regs as well. We have two sources of info that
2046 // we join:
2047 //
2048 // - The machine-specific ABI implementation `M`, which
2049 // tells us the particular registers that payload values
2050 // must be in
2051 // - The passed-in lowering context, which gives us the
2052 // vregs we must define.
2053 //
2054 // Note that payload values may need to end up in the same
2055 // physical registers as ordinary return values; this is
2056 // not a conflict, because we either get one or the
2057 // other. For regalloc's purposes, we define both starting
2058 // here at the callsite, but we can share one def in the
2059 // `defs` list and alias one vreg to another. Thus we
2060 // handle the two cases below for each payload register:
2061 // overlaps a return value (and we alias to it) or not
2062 // (and we add a def).
2063 for (i, &preg) in pregs.iter().enumerate() {
2064 let vreg = try_call_payloads[i];
2065 if let Some(existing) = defs.iter().find(|def| match def.location {
2066 RetLocation::Reg(r, _) => r == preg,
2067 _ => false,
2068 }) {
2069 vregs.set_vreg_alias(vreg.to_reg(), existing.vreg.to_reg());
2070 } else {
2071 defs.push(CallRetPair {
2072 vreg,
2073 location: RetLocation::Reg(preg, word_ty),
2074 });
2075 }
2076 }
2077 }
2078
2079 defs
2080 }
2081
2082 /// Populate a `CallInfo` for a call with signature `sig`.
2083 ///
2084 /// `dest` is the target-specific call destination value
2085 /// `uses` is the `CallArgList` describing argument constraints
2086 /// `defs` is the `CallRetList` describing return constraints
2087 /// `try_call_info` describes exception targets for try_call instructions
2088 ///
2089 /// The clobber list is computed here from the above data.
2090 pub fn gen_call_info<T>(
2091 &self,
2092 sigs: &SigSet,
2093 sig: Sig,
2094 dest: T,
2095 uses: CallArgList,
2096 defs: CallRetList,
2097 try_call_info: Option<TryCallInfo>,
2098 ) -> CallInfo<T> {
2099 let caller_conv = self.call_conv;
2100 let callee_conv = sigs[sig].call_conv;
2101 let stack_arg_space = sigs[sig].sized_stack_arg_space;
2102
2103 let clobbers = {
2104 // Get clobbers: all caller-saves. These may include return value
2105 // regs, which we will remove from the clobber set below.
2106 let mut clobbers =
2107 <M>::get_regs_clobbered_by_call(callee_conv, try_call_info.is_some());
2108
2109 // Remove retval regs from clobbers.
2110 for def in &defs {
2111 if let RetLocation::Reg(preg, _) = def.location {
2112 clobbers.remove(PReg::from(preg.to_real_reg().unwrap()));
2113 }
2114 }
2115
2116 clobbers
2117 };
2118
2119 // Any adjustment to SP to account for required outgoing arguments/stack return values must
2120 // be done inside of the call pseudo-op, to ensure that SP is always in a consistent
2121 // state for all other instructions. For example, if a tail-call abi function is called
2122 // here, the reclamation of the outgoing argument area must be done inside of the call
2123 // pseudo-op's emission to ensure that SP is consistent at all other points in the lowered
2124 // function. (Except the prologue and epilogue, but those are fairly special parts of the
2125 // function that establish the SP invariants that are relied on elsewhere and are generated
2126 // after the register allocator has run and thus cannot have register allocator-inserted
2127 // references to SP offsets.)
2128
2129 let callee_pop_size = if callee_conv == isa::CallConv::Tail {
2130 // The tail calling convention has callees pop stack arguments.
2131 stack_arg_space
2132 } else {
2133 0
2134 };
2135
2136 CallInfo {
2137 dest,
2138 uses,
2139 defs,
2140 clobbers,
2141 callee_conv,
2142 caller_conv,
2143 callee_pop_size,
2144 try_call_info,
2145 }
2146 }
2147
2148 /// Get the raw offset of a sized stackslot in the slot region.
2149 pub fn sized_stackslot_offset(&self, slot: StackSlot) -> u32 {
2150 self.sized_stackslots[slot]
2151 }
2152
2153 /// Produce an instruction that computes a sized stackslot address.
2154 pub fn sized_stackslot_addr(
2155 &self,
2156 slot: StackSlot,
2157 offset: u32,
2158 into_reg: Writable<Reg>,
2159 ) -> M::I {
2160 // Offset from beginning of stackslot area.
2161 let stack_off = self.sized_stackslots[slot] as i64;
2162 let sp_off: i64 = stack_off + (offset as i64);
2163 M::gen_get_stack_addr(StackAMode::Slot(sp_off), into_reg)
2164 }
2165
2166 /// Produce an instruction that computes a dynamic stackslot address.
2167 pub fn dynamic_stackslot_addr(&self, slot: DynamicStackSlot, into_reg: Writable<Reg>) -> M::I {
2168 let stack_off = self.dynamic_stackslots[slot] as i64;
2169 M::gen_get_stack_addr(StackAMode::Slot(stack_off), into_reg)
2170 }
2171
2172 /// Get an `args` pseudo-inst, if any, that should appear at the
2173 /// very top of the function body prior to regalloc.
2174 pub fn take_args(&mut self) -> Option<M::I> {
2175 if self.reg_args.len() > 0 {
2176 // Very first instruction is an `args` pseudo-inst that
2177 // establishes live-ranges for in-register arguments and
2178 // constrains them at the start of the function to the
2179 // locations defined by the ABI.
2180 Some(M::gen_args(std::mem::take(&mut self.reg_args)))
2181 } else {
2182 None
2183 }
2184 }
2185}
2186
2187/// ### Post-Regalloc Functions
2188///
2189/// These methods of `Callee` may only be called after
2190/// regalloc.
2191impl<M: ABIMachineSpec> Callee<M> {
2192 /// Compute the final frame layout, post-regalloc.
2193 ///
2194 /// This must be called before gen_prologue or gen_epilogue.
2195 pub fn compute_frame_layout(
2196 &mut self,
2197 sigs: &SigSet,
2198 spillslots: usize,
2199 clobbered: Vec<Writable<RealReg>>,
2200 function_calls: FunctionCalls,
2201 ) {
2202 let bytes = M::word_bytes();
2203 let total_stacksize = self.stackslots_size + bytes * spillslots as u32;
2204 let mask = M::stack_align(self.call_conv) - 1;
2205 let total_stacksize = (total_stacksize + mask) & !mask; // 16-align the stack.
2206 self.frame_layout = Some(M::compute_frame_layout(
2207 self.call_conv,
2208 &self.flags,
2209 self.signature(),
2210 &clobbered,
2211 function_calls,
2212 self.stack_args_size(sigs),
2213 self.tail_args_size,
2214 self.stackslots_size,
2215 total_stacksize,
2216 self.outgoing_args_size,
2217 ));
2218 }
2219
2220 /// Generate a prologue, post-regalloc.
2221 ///
2222 /// This should include any stack frame or other setup necessary to use the
2223 /// other methods (`load_arg`, `store_retval`, and spillslot accesses.)
2224 pub fn gen_prologue(&self) -> SmallInstVec<M::I> {
2225 let frame_layout = self.frame_layout();
2226 let mut insts = smallvec![];
2227
2228 // Set up frame.
2229 insts.extend(M::gen_prologue_frame_setup(
2230 self.call_conv,
2231 &self.flags,
2232 &self.isa_flags,
2233 &frame_layout,
2234 ));
2235
2236 // The stack limit check needs to cover all the stack adjustments we
2237 // might make, up to the next stack limit check in any function we
2238 // call. Since this happens after frame setup, the current function's
2239 // setup area needs to be accounted for in the caller's stack limit
2240 // check, but we need to account for any setup area that our callees
2241 // might need. Note that s390x may also use the outgoing args area for
2242 // backtrace support even in leaf functions, so that should be accounted
2243 // for unconditionally.
2244 let total_stacksize = (frame_layout.tail_args_size - frame_layout.incoming_args_size)
2245 + frame_layout.clobber_size
2246 + frame_layout.fixed_frame_storage_size
2247 + frame_layout.outgoing_args_size
2248 + if frame_layout.function_calls == FunctionCalls::None {
2249 0
2250 } else {
2251 frame_layout.setup_area_size
2252 };
2253
2254 // Leaf functions with zero stack don't need a stack check if one's
2255 // specified, otherwise always insert the stack check.
2256 if total_stacksize > 0 || frame_layout.function_calls != FunctionCalls::None {
2257 if let Some((reg, stack_limit_load)) = &self.stack_limit {
2258 insts.extend(stack_limit_load.clone());
2259 self.insert_stack_check(*reg, total_stacksize, &mut insts);
2260 }
2261
2262 if self.flags.enable_probestack() {
2263 let guard_size = 1 << self.flags.probestack_size_log2();
2264 match self.flags.probestack_strategy() {
2265 ProbestackStrategy::Inline => M::gen_inline_probestack(
2266 &mut insts,
2267 self.call_conv,
2268 total_stacksize,
2269 guard_size,
2270 ),
2271 ProbestackStrategy::Outline => {
2272 if total_stacksize >= guard_size {
2273 M::gen_probestack(&mut insts, total_stacksize);
2274 }
2275 }
2276 }
2277 }
2278 }
2279
2280 // Save clobbered registers.
2281 insts.extend(M::gen_clobber_save(
2282 self.call_conv,
2283 &self.flags,
2284 &frame_layout,
2285 ));
2286
2287 insts
2288 }
2289
2290 /// Generate an epilogue, post-regalloc.
2291 ///
2292 /// Note that this must generate the actual return instruction (rather than
2293 /// emitting this in the lowering logic), because the epilogue code comes
2294 /// before the return and the two are likely closely related.
2295 pub fn gen_epilogue(&self) -> SmallInstVec<M::I> {
2296 let frame_layout = self.frame_layout();
2297 let mut insts = smallvec![];
2298
2299 // Restore clobbered registers.
2300 insts.extend(M::gen_clobber_restore(
2301 self.call_conv,
2302 &self.flags,
2303 &frame_layout,
2304 ));
2305
2306 // Tear down frame.
2307 insts.extend(M::gen_epilogue_frame_restore(
2308 self.call_conv,
2309 &self.flags,
2310 &self.isa_flags,
2311 &frame_layout,
2312 ));
2313
2314 // And return.
2315 insts.extend(M::gen_return(
2316 self.call_conv,
2317 &self.isa_flags,
2318 &frame_layout,
2319 ));
2320
2321 trace!("Epilogue: {:?}", insts);
2322 insts
2323 }
2324
2325 /// Return a reference to the computed frame layout information. This
2326 /// function will panic if it's called before [`Self::compute_frame_layout`].
2327 pub fn frame_layout(&self) -> &FrameLayout {
2328 self.frame_layout
2329 .as_ref()
2330 .expect("frame layout not computed before prologue generation")
2331 }
2332
2333 /// Returns the offset from SP to FP for the given function, after
2334 /// the prologue has set up the frame. This comprises the spill
2335 /// slots and stack-storage slots as well as storage for clobbered
2336 /// callee-save registers and outgoing arguments at callsites
2337 /// (space for which is reserved during frame setup).
2338 pub fn sp_to_fp_offset(&self) -> u32 {
2339 let frame_layout = self.frame_layout();
2340 frame_layout.clobber_size
2341 + frame_layout.fixed_frame_storage_size
2342 + frame_layout.outgoing_args_size
2343 }
2344
2345 /// Returns offset from the slot base in the current frame to the caller's SP.
2346 pub fn slot_base_to_caller_sp_offset(&self) -> u32 {
2347 // Note: this looks very similar to `frame_size()` above, but
2348 // it differs in both endpoints: it measures from the bottom
2349 // of stackslots, excluding outgoing args; and it includes the
2350 // setup area (FP/LR) size and any extra tail-args space.
2351 let frame_layout = self.frame_layout();
2352 frame_layout.clobber_size
2353 + frame_layout.fixed_frame_storage_size
2354 + frame_layout.setup_area_size
2355 + (frame_layout.tail_args_size - frame_layout.incoming_args_size)
2356 }
2357
2358 /// Returns the size of arguments expected on the stack.
2359 pub fn stack_args_size(&self, sigs: &SigSet) -> u32 {
2360 sigs[self.sig].sized_stack_arg_space
2361 }
2362
2363 /// Get the spill-slot size.
2364 pub fn get_spillslot_size(&self, rc: RegClass) -> u32 {
2365 let max = if self.dynamic_type_sizes.len() == 0 {
2366 16
2367 } else {
2368 *self
2369 .dynamic_type_sizes
2370 .iter()
2371 .max_by(|x, y| x.1.cmp(&y.1))
2372 .map(|(_k, v)| v)
2373 .unwrap()
2374 };
2375 M::get_number_of_spillslots_for_value(rc, max, &self.isa_flags)
2376 }
2377
2378 /// Get the spill slot offset relative to the fixed allocation area start.
2379 pub fn get_spillslot_offset(&self, slot: SpillSlot) -> i64 {
2380 self.frame_layout().spillslot_offset(slot)
2381 }
2382
2383 /// Generate a spill.
2384 pub fn gen_spill(&self, to_slot: SpillSlot, from_reg: RealReg) -> M::I {
2385 let ty = M::I::canonical_type_for_rc(from_reg.class());
2386 debug_assert_eq!(<M>::I::rc_for_type(ty).unwrap().1, &[ty]);
2387
2388 let sp_off = self.get_spillslot_offset(to_slot);
2389 trace!("gen_spill: {from_reg:?} into slot {to_slot:?} at offset {sp_off}");
2390
2391 let from = StackAMode::Slot(sp_off);
2392 <M>::gen_store_stack(from, Reg::from(from_reg), ty)
2393 }
2394
2395 /// Generate a reload (fill).
2396 pub fn gen_reload(&self, to_reg: Writable<RealReg>, from_slot: SpillSlot) -> M::I {
2397 let ty = M::I::canonical_type_for_rc(to_reg.to_reg().class());
2398 debug_assert_eq!(<M>::I::rc_for_type(ty).unwrap().1, &[ty]);
2399
2400 let sp_off = self.get_spillslot_offset(from_slot);
2401 trace!("gen_reload: {to_reg:?} from slot {from_slot:?} at offset {sp_off}");
2402
2403 let from = StackAMode::Slot(sp_off);
2404 <M>::gen_load_stack(from, to_reg.map(Reg::from), ty)
2405 }
2406
2407 /// Provide metadata to be emitted alongside machine code.
2408 ///
2409 /// This metadata describes the frame layout sufficiently to find
2410 /// stack slots, so that runtimes and unwinders can observe state
2411 /// set up by compiled code in stackslots allocated for that
2412 /// purpose.
2413 pub fn frame_slot_metadata(&self) -> MachBufferFrameLayout {
2414 let frame_to_fp_offset = self.sp_to_fp_offset();
2415 let mut stackslots = SecondaryMap::with_capacity(self.sized_stackslots.len());
2416 let storage_area_base = self.frame_layout().outgoing_args_size;
2417 for (slot, storage_area_offset) in &self.sized_stackslots {
2418 stackslots[slot] = MachBufferStackSlot {
2419 offset: storage_area_base.checked_add(*storage_area_offset).unwrap(),
2420 key: self.sized_stackslot_keys[slot],
2421 };
2422 }
2423 MachBufferFrameLayout {
2424 frame_to_fp_offset,
2425 stackslots,
2426 }
2427 }
2428}
2429
2430/// An input argument to a call instruction: the vreg that is used,
2431/// and the preg it is constrained to (per the ABI).
2432#[derive(Clone, Debug)]
2433pub struct CallArgPair {
2434 /// The virtual register to use for the argument.
2435 pub vreg: Reg,
2436 /// The real register into which the arg goes.
2437 pub preg: Reg,
2438}
2439
2440/// An output return value from a call instruction: the vreg that is
2441/// defined, and the preg or stack location it is constrained to (per
2442/// the ABI).
2443#[derive(Clone, Debug)]
2444pub struct CallRetPair {
2445 /// The virtual register to define from this return value.
2446 pub vreg: Writable<Reg>,
2447 /// The real register from which the return value is read.
2448 pub location: RetLocation,
2449}
2450
2451/// A location to load a return-value from after a call completes.
2452#[derive(Clone, Debug, PartialEq, Eq)]
2453pub enum RetLocation {
2454 /// A physical register.
2455 Reg(Reg, Type),
2456 /// A stack location, identified by a `StackAMode`.
2457 Stack(StackAMode, Type),
2458}
2459
2460pub type CallArgList = SmallVec<[CallArgPair; 8]>;
2461pub type CallRetList = SmallVec<[CallRetPair; 8]>;
2462
2463impl<T> CallInfo<T> {
2464 /// Emit loads for any stack-carried return values using the call
2465 /// info and allocations.
2466 pub fn emit_retval_loads<
2467 M: ABIMachineSpec,
2468 EmitFn: FnMut(M::I),
2469 IslandFn: Fn(u32) -> Option<M::I>,
2470 >(
2471 &self,
2472 stackslots_size: u32,
2473 mut emit: EmitFn,
2474 emit_island: IslandFn,
2475 ) {
2476 // Count stack-ret locations and emit an island to account for
2477 // this space usage.
2478 let mut space_needed = 0;
2479 for CallRetPair { location, .. } in &self.defs {
2480 if let RetLocation::Stack(..) = location {
2481 // Assume up to ten instructions, semi-arbitrarily:
2482 // load from stack, store to spillslot, codegen of
2483 // large offsets on RISC ISAs.
2484 space_needed += 10 * M::I::worst_case_size();
2485 }
2486 }
2487 if space_needed > 0 {
2488 if let Some(island_inst) = emit_island(space_needed) {
2489 emit(island_inst);
2490 }
2491 }
2492
2493 let temp = M::retval_temp_reg(self.callee_conv);
2494 // The temporary must be noted as clobbered unless there are
2495 // no returns (hence it isn't needed). The latter can only be
2496 // the case statically for an ABI when the ABI doesn't allow
2497 // any returns at all (e.g., patchable-call ABI).
2498 debug_assert!(
2499 self.defs.is_empty()
2500 || M::get_regs_clobbered_by_call(self.callee_conv, self.try_call_info.is_some())
2501 .contains(PReg::from(temp.to_reg().to_real_reg().unwrap()))
2502 );
2503
2504 for CallRetPair { vreg, location } in &self.defs {
2505 match location {
2506 RetLocation::Reg(preg, ..) => {
2507 // The temporary must not also be an actual return
2508 // value register.
2509 debug_assert!(*preg != temp.to_reg());
2510 }
2511 RetLocation::Stack(amode, ty) => {
2512 if let Some(spillslot) = vreg.to_reg().to_spillslot() {
2513 // `temp` is an integer register of machine word
2514 // width, but `ty` may be floating-point/vector,
2515 // which (i) may not be loadable directly into an
2516 // int reg, and (ii) may be wider than a machine
2517 // word. For simplicity, and because there are not
2518 // always easy choices for volatile float/vec regs
2519 // (see e.g. x86-64, where fastcall clobbers only
2520 // xmm0-xmm5, but tail uses xmm0-xmm7 for
2521 // returns), we use the integer temp register in
2522 // steps.
2523 let parts = (ty.bytes() + M::word_bytes() - 1) / M::word_bytes();
2524 let one_part_load_ty =
2525 Type::int_with_byte_size(M::word_bytes().min(ty.bytes()) as u16)
2526 .unwrap();
2527 for part in 0..parts {
2528 emit(M::gen_load_stack(
2529 amode.offset_by(part * M::word_bytes()),
2530 temp,
2531 one_part_load_ty,
2532 ));
2533 emit(M::gen_store_stack(
2534 StackAMode::Slot(
2535 i64::from(stackslots_size)
2536 + i64::from(M::word_bytes())
2537 * ((spillslot.index() as i64) + (part as i64)),
2538 ),
2539 temp.to_reg(),
2540 M::word_type(),
2541 ));
2542 }
2543 } else {
2544 assert_ne!(*vreg, temp);
2545 emit(M::gen_load_stack(*amode, *vreg, *ty));
2546 }
2547 }
2548 }
2549 }
2550 }
2551}
2552
2553impl TryCallInfo {
2554 pub(crate) fn exception_handlers(
2555 &self,
2556 layout: &FrameLayout,
2557 ) -> impl Iterator<Item = MachExceptionHandler> {
2558 self.exception_handlers.iter().map(|handler| match handler {
2559 TryCallHandler::Tag(tag, label) => MachExceptionHandler::Tag(*tag, *label),
2560 TryCallHandler::Default(label) => MachExceptionHandler::Default(*label),
2561 TryCallHandler::Context(reg) => {
2562 let loc = if let Some(spillslot) = reg.to_spillslot() {
2563 // The spillslot offset is relative to the "fixed
2564 // storage area", which comes after outgoing args.
2565 let offset = layout.spillslot_offset(spillslot) + i64::from(layout.outgoing_args_size);
2566 ExceptionContextLoc::SPOffset(u32::try_from(offset).expect("SP offset cannot be negative or larger than 4GiB"))
2567 } else if let Some(realreg) = reg.to_real_reg() {
2568 ExceptionContextLoc::GPR(realreg.hw_enc())
2569 } else {
2570 panic!("Virtual register present in try-call handler clause after register allocation");
2571 };
2572 MachExceptionHandler::Context(loc)
2573 }
2574 })
2575 }
2576
2577 pub(crate) fn pretty_print_dests(&self) -> String {
2578 self.exception_handlers
2579 .iter()
2580 .map(|handler| match handler {
2581 TryCallHandler::Tag(tag, label) => format!("{tag:?}: {label:?}"),
2582 TryCallHandler::Default(label) => format!("default: {label:?}"),
2583 TryCallHandler::Context(loc) => format!("context {loc:?}"),
2584 })
2585 .collect::<Vec<_>>()
2586 .join(", ")
2587 }
2588
2589 pub(crate) fn collect_operands(&mut self, collector: &mut impl OperandVisitor) {
2590 for handler in &mut self.exception_handlers {
2591 match handler {
2592 TryCallHandler::Context(ctx) => {
2593 collector.any_late_use(ctx);
2594 }
2595 TryCallHandler::Tag(_, _) | TryCallHandler::Default(_) => {}
2596 }
2597 }
2598 }
2599}
2600
2601#[cfg(test)]
2602mod tests {
2603 use super::SigData;
2604
2605 #[test]
2606 fn sig_data_size() {
2607 // The size of `SigData` is performance sensitive, so make sure
2608 // we don't regress it unintentionally.
2609 assert_eq!(std::mem::size_of::<SigData>(), 24);
2610 }
2611}