cranelift_codegen/isa/aarch64/
abi.rs

1//! Implementation of a standard AArch64 ABI.
2
3use crate::ir;
4use crate::ir::types;
5use crate::ir::types::*;
6use crate::ir::MemFlags;
7use crate::ir::{dynamic_to_fixed, ExternalName, LibCall, Signature};
8use crate::isa;
9use crate::isa::aarch64::{inst::*, settings as aarch64_settings, AArch64Backend};
10use crate::isa::unwind::UnwindInst;
11use crate::isa::winch;
12use crate::machinst::*;
13use crate::settings;
14use crate::CodegenResult;
15use alloc::boxed::Box;
16use alloc::vec::Vec;
17use regalloc2::{MachineEnv, PReg, PRegSet};
18use smallvec::{smallvec, SmallVec};
19use std::borrow::ToOwned;
20use std::sync::OnceLock;
21
22// We use a generic implementation that factors out AArch64 and x64 ABI commonalities, because
23// these ABIs are very similar.
24
25/// Support for the AArch64 ABI from the callee side (within a function body).
26pub(crate) type AArch64Callee = Callee<AArch64MachineDeps>;
27
28/// Support for the AArch64 ABI from the caller side (at a callsite).
29pub(crate) type AArch64CallSite = CallSite<AArch64MachineDeps>;
30
31impl Into<AMode> for StackAMode {
32    fn into(self) -> AMode {
33        match self {
34            StackAMode::IncomingArg(off, stack_args_size) => AMode::IncomingArg {
35                off: i64::from(stack_args_size) - off,
36            },
37            StackAMode::Slot(off) => AMode::SlotOffset { off },
38            StackAMode::OutgoingArg(off) => AMode::SPOffset { off },
39        }
40    }
41}
42
43// Returns the size of stack space needed to store the
44// `clobbered_callee_saved` registers.
45fn compute_clobber_size(clobbered_callee_saves: &[Writable<RealReg>]) -> u32 {
46    let mut int_regs = 0;
47    let mut vec_regs = 0;
48    for &reg in clobbered_callee_saves {
49        match reg.to_reg().class() {
50            RegClass::Int => {
51                int_regs += 1;
52            }
53            RegClass::Float => {
54                vec_regs += 1;
55            }
56            RegClass::Vector => unreachable!(),
57        }
58    }
59
60    // Round up to multiple of 2, to keep 16-byte stack alignment.
61    let int_save_bytes = (int_regs + (int_regs & 1)) * 8;
62    // The Procedure Call Standard for the Arm 64-bit Architecture
63    // (AAPCS64, including several related ABIs such as the one used by
64    // Windows) mandates saving only the bottom 8 bytes of the vector
65    // registers, so we round up the number of registers to ensure
66    // proper stack alignment (similarly to the situation with
67    // `int_reg`).
68    let vec_reg_size = 8;
69    let vec_save_padding = vec_regs & 1;
70    // FIXME: SVE: ABI is different to Neon, so do we treat all vec regs as Z-regs?
71    let vec_save_bytes = (vec_regs + vec_save_padding) * vec_reg_size;
72
73    int_save_bytes + vec_save_bytes
74}
75
76/// AArch64-specific ABI behavior. This struct just serves as an implementation
77/// point for the trait; it is never actually instantiated.
78pub struct AArch64MachineDeps;
79
80impl IsaFlags for aarch64_settings::Flags {
81    fn is_forward_edge_cfi_enabled(&self) -> bool {
82        self.use_bti()
83    }
84}
85
86impl ABIMachineSpec for AArch64MachineDeps {
87    type I = Inst;
88
89    type F = aarch64_settings::Flags;
90
91    /// This is the limit for the size of argument and return-value areas on the
92    /// stack. We place a reasonable limit here to avoid integer overflow issues
93    /// with 32-bit arithmetic: for now, 128 MB.
94    const STACK_ARG_RET_SIZE_LIMIT: u32 = 128 * 1024 * 1024;
95
96    fn word_bits() -> u32 {
97        64
98    }
99
100    /// Return required stack alignment in bytes.
101    fn stack_align(_call_conv: isa::CallConv) -> u32 {
102        16
103    }
104
105    fn compute_arg_locs(
106        call_conv: isa::CallConv,
107        flags: &settings::Flags,
108        params: &[ir::AbiParam],
109        args_or_rets: ArgsOrRets,
110        add_ret_area_ptr: bool,
111        mut args: ArgsAccumulator,
112    ) -> CodegenResult<(u32, Option<usize>)> {
113        let is_apple_cc = call_conv == isa::CallConv::AppleAarch64;
114        let is_winch_return = call_conv == isa::CallConv::Winch && args_or_rets == ArgsOrRets::Rets;
115
116        // See AArch64 ABI (https://github.com/ARM-software/abi-aa/blob/2021Q1/aapcs64/aapcs64.rst#64parameter-passing), sections 6.4.
117        //
118        // MacOS aarch64 is slightly different, see also
119        // https://developer.apple.com/documentation/xcode/writing_arm64_code_for_apple_platforms.
120        // We are diverging from the MacOS aarch64 implementation in the
121        // following ways:
122        // - sign- and zero- extensions of data types less than 32 bits are not
123        // implemented yet.
124        // - we align the arguments stack space to a 16-bytes boundary, while
125        // the MacOS allows aligning only on 8 bytes. In practice it means we're
126        // slightly overallocating when calling, which is fine, and doesn't
127        // break our other invariants that the stack is always allocated in
128        // 16-bytes chunks.
129
130        let mut next_xreg = if call_conv == isa::CallConv::Tail {
131            // We reserve `x0` for the return area pointer. For simplicity, we
132            // reserve it even when there is no return area pointer needed. This
133            // also means that identity functions don't have to shuffle arguments to
134            // different return registers because we shifted all argument register
135            // numbers down by one to make space for the return area pointer.
136            //
137            // Also, we cannot use all allocatable GPRs as arguments because we need
138            // at least one allocatable register for holding the callee address in
139            // indirect calls. So skip `x1` also, reserving it for that role.
140            2
141        } else {
142            0
143        };
144        let mut next_vreg = 0;
145        let mut next_stack: u32 = 0;
146
147        // Note on return values: on the regular ABI, we may return values
148        // in 8 registers for V128 and I64 registers independently of the
149        // number of register values returned in the other class. That is,
150        // we can return values in up to 8 integer and
151        // 8 vector registers at once.
152        let max_per_class_reg_vals = 8; // x0-x7 and v0-v7
153        let mut remaining_reg_vals = 16;
154
155        let ret_area_ptr = if add_ret_area_ptr {
156            debug_assert_eq!(args_or_rets, ArgsOrRets::Args);
157            if call_conv != isa::CallConv::Winch {
158                // In the AAPCS64 calling convention the return area pointer is
159                // stored in x8.
160                Some(ABIArg::reg(
161                    xreg(8).to_real_reg().unwrap(),
162                    I64,
163                    ir::ArgumentExtension::None,
164                    ir::ArgumentPurpose::Normal,
165                ))
166            } else {
167                // Use x0 for the return area pointer in the Winch calling convention
168                // to simplify the ABI handling code in Winch by avoiding an AArch64
169                // special case to assign it to x8.
170                next_xreg += 1;
171                Some(ABIArg::reg(
172                    xreg(0).to_real_reg().unwrap(),
173                    I64,
174                    ir::ArgumentExtension::None,
175                    ir::ArgumentPurpose::Normal,
176                ))
177            }
178        } else {
179            None
180        };
181
182        for (i, param) in params.into_iter().enumerate() {
183            if is_apple_cc && param.value_type == types::F128 && !flags.enable_llvm_abi_extensions()
184            {
185                panic!(
186                    "f128 args/return values not supported for apple_aarch64 unless LLVM ABI extensions are enabled"
187                );
188            }
189
190            let (rcs, reg_types) = Inst::rc_for_type(param.value_type)?;
191
192            if let ir::ArgumentPurpose::StructReturn = param.purpose {
193                assert!(
194                    call_conv != isa::CallConv::Tail,
195                    "support for StructReturn parameters is not implemented for the `tail` \
196                    calling convention yet",
197                );
198            }
199
200            if let ir::ArgumentPurpose::StructArgument(_) = param.purpose {
201                panic!(
202                    "StructArgument parameters are not supported on arm64. \
203                    Use regular pointer arguments instead."
204                );
205            }
206
207            if let ir::ArgumentPurpose::StructReturn = param.purpose {
208                // FIXME add assert_eq!(args_or_rets, ArgsOrRets::Args); once
209                // ensure_struct_return_ptr_is_returned is gone.
210                assert!(
211                    param.value_type == types::I64,
212                    "StructReturn must be a pointer sized integer"
213                );
214                args.push(ABIArg::Slots {
215                    slots: smallvec![ABIArgSlot::Reg {
216                        reg: xreg(8).to_real_reg().unwrap(),
217                        ty: types::I64,
218                        extension: param.extension,
219                    },],
220                    purpose: ir::ArgumentPurpose::StructReturn,
221                });
222                continue;
223            }
224
225            // Handle multi register params
226            //
227            // See AArch64 ABI (https://github.com/ARM-software/abi-aa/blob/2021Q1/aapcs64/aapcs64.rst#642parameter-passing-rules), (Section 6.4.2 Stage C).
228            //
229            // For arguments with alignment of 16 we round up the register number
230            // to the next even value. So we can never allocate for example an i128
231            // to X1 and X2, we have to skip one register and do X2, X3
232            // (Stage C.8)
233            // Note: The Apple ABI deviates a bit here. They don't respect Stage C.8
234            // and will happily allocate a i128 to X1 and X2
235            //
236            // For integer types with alignment of 16 we also have the additional
237            // restriction of passing the lower half in Xn and the upper half in Xn+1
238            // (Stage C.9)
239            //
240            // For examples of how LLVM handles this: https://godbolt.org/z/bhd3vvEfh
241            //
242            // On the Apple ABI it is unspecified if we can spill half the value into the stack
243            // i.e load the lower half into x7 and the upper half into the stack
244            // LLVM does not seem to do this, so we are going to replicate that behaviour
245            let is_multi_reg = rcs.len() >= 2;
246            if is_multi_reg {
247                assert!(
248                    rcs.len() == 2,
249                    "Unable to handle multi reg params with more than 2 regs"
250                );
251                assert!(
252                    rcs == &[RegClass::Int, RegClass::Int],
253                    "Unable to handle non i64 regs"
254                );
255
256                let reg_class_space = max_per_class_reg_vals - next_xreg;
257                let reg_space = remaining_reg_vals;
258
259                if reg_space >= 2 && reg_class_space >= 2 {
260                    // The aarch64 ABI does not allow us to start a split argument
261                    // at an odd numbered register. So we need to skip one register
262                    //
263                    // TODO: The Fast ABI should probably not skip the register
264                    if !is_apple_cc && next_xreg % 2 != 0 {
265                        next_xreg += 1;
266                    }
267
268                    let lower_reg = xreg(next_xreg);
269                    let upper_reg = xreg(next_xreg + 1);
270
271                    args.push(ABIArg::Slots {
272                        slots: smallvec![
273                            ABIArgSlot::Reg {
274                                reg: lower_reg.to_real_reg().unwrap(),
275                                ty: reg_types[0],
276                                extension: param.extension,
277                            },
278                            ABIArgSlot::Reg {
279                                reg: upper_reg.to_real_reg().unwrap(),
280                                ty: reg_types[1],
281                                extension: param.extension,
282                            },
283                        ],
284                        purpose: param.purpose,
285                    });
286
287                    next_xreg += 2;
288                    remaining_reg_vals -= 2;
289                    continue;
290                }
291            } else {
292                // Single Register parameters
293                let rc = rcs[0];
294                let next_reg = match rc {
295                    RegClass::Int => &mut next_xreg,
296                    RegClass::Float => &mut next_vreg,
297                    RegClass::Vector => unreachable!(),
298                };
299
300                let push_to_reg = if is_winch_return {
301                    // Winch uses the first register to return the last result
302                    i == params.len() - 1
303                } else {
304                    // Use max_per_class_reg_vals & remaining_reg_vals otherwise
305                    *next_reg < max_per_class_reg_vals && remaining_reg_vals > 0
306                };
307
308                if push_to_reg {
309                    let reg = match rc {
310                        RegClass::Int => xreg(*next_reg),
311                        RegClass::Float => vreg(*next_reg),
312                        RegClass::Vector => unreachable!(),
313                    };
314                    // Overlay Z-regs on V-regs for parameter passing.
315                    let ty = if param.value_type.is_dynamic_vector() {
316                        dynamic_to_fixed(param.value_type)
317                    } else {
318                        param.value_type
319                    };
320                    args.push(ABIArg::reg(
321                        reg.to_real_reg().unwrap(),
322                        ty,
323                        param.extension,
324                        param.purpose,
325                    ));
326                    *next_reg += 1;
327                    remaining_reg_vals -= 1;
328                    continue;
329                }
330            }
331
332            // Spill to the stack
333
334            if args_or_rets == ArgsOrRets::Rets && !flags.enable_multi_ret_implicit_sret() {
335                return Err(crate::CodegenError::Unsupported(
336                    "Too many return values to fit in registers. \
337                    Use a StructReturn argument instead. (#9510)"
338                        .to_owned(),
339                ));
340            }
341
342            // Compute the stack slot's size.
343            let size = (ty_bits(param.value_type) / 8) as u32;
344
345            let size = if is_apple_cc || is_winch_return {
346                // MacOS and Winch aarch64 allows stack slots with
347                // sizes less than 8 bytes. They still need to be
348                // properly aligned on their natural data alignment,
349                // though.
350                size
351            } else {
352                // Every arg takes a minimum slot of 8 bytes. (16-byte stack
353                // alignment happens separately after all args.)
354                std::cmp::max(size, 8)
355            };
356
357            if !is_winch_return {
358                // Align the stack slot.
359                debug_assert!(size.is_power_of_two());
360                next_stack = align_to(next_stack, size);
361            }
362
363            let slots = reg_types
364                .iter()
365                .copied()
366                // Build the stack locations from each slot
367                .scan(next_stack, |next_stack, ty| {
368                    let slot_offset = *next_stack as i64;
369                    *next_stack += (ty_bits(ty) / 8) as u32;
370
371                    Some((ty, slot_offset))
372                })
373                .map(|(ty, offset)| ABIArgSlot::Stack {
374                    offset,
375                    ty,
376                    extension: param.extension,
377                })
378                .collect();
379
380            args.push(ABIArg::Slots {
381                slots,
382                purpose: param.purpose,
383            });
384
385            next_stack += size;
386        }
387
388        let extra_arg = if let Some(ret_area_ptr) = ret_area_ptr {
389            args.push_non_formal(ret_area_ptr);
390            Some(args.args().len() - 1)
391        } else {
392            None
393        };
394
395        if is_winch_return {
396            winch::reverse_stack(args, next_stack, false);
397        }
398
399        next_stack = align_to(next_stack, 16);
400
401        Ok((next_stack, extra_arg))
402    }
403
404    fn gen_load_stack(mem: StackAMode, into_reg: Writable<Reg>, ty: Type) -> Inst {
405        Inst::gen_load(into_reg, mem.into(), ty, MemFlags::trusted())
406    }
407
408    fn gen_store_stack(mem: StackAMode, from_reg: Reg, ty: Type) -> Inst {
409        Inst::gen_store(mem.into(), from_reg, ty, MemFlags::trusted())
410    }
411
412    fn gen_move(to_reg: Writable<Reg>, from_reg: Reg, ty: Type) -> Inst {
413        Inst::gen_move(to_reg, from_reg, ty)
414    }
415
416    fn gen_extend(
417        to_reg: Writable<Reg>,
418        from_reg: Reg,
419        signed: bool,
420        from_bits: u8,
421        to_bits: u8,
422    ) -> Inst {
423        assert!(from_bits < to_bits);
424        Inst::Extend {
425            rd: to_reg,
426            rn: from_reg,
427            signed,
428            from_bits,
429            to_bits,
430        }
431    }
432
433    fn gen_args(args: Vec<ArgPair>) -> Inst {
434        Inst::Args { args }
435    }
436
437    fn gen_rets(rets: Vec<RetPair>) -> Inst {
438        Inst::Rets { rets }
439    }
440
441    fn gen_add_imm(
442        _call_conv: isa::CallConv,
443        into_reg: Writable<Reg>,
444        from_reg: Reg,
445        imm: u32,
446    ) -> SmallInstVec<Inst> {
447        let imm = imm as u64;
448        let mut insts = SmallVec::new();
449        if let Some(imm12) = Imm12::maybe_from_u64(imm) {
450            insts.push(Inst::AluRRImm12 {
451                alu_op: ALUOp::Add,
452                size: OperandSize::Size64,
453                rd: into_reg,
454                rn: from_reg,
455                imm12,
456            });
457        } else {
458            let scratch2 = writable_tmp2_reg();
459            assert_ne!(scratch2.to_reg(), from_reg);
460            // `gen_add_imm` is only ever called after register allocation has taken place, and as a
461            // result it's ok to reuse the scratch2 register here. If that changes, we'll need to
462            // plumb through a way to allocate temporary virtual registers
463            insts.extend(Inst::load_constant(scratch2, imm.into(), &mut |_| scratch2));
464            insts.push(Inst::AluRRRExtend {
465                alu_op: ALUOp::Add,
466                size: OperandSize::Size64,
467                rd: into_reg,
468                rn: from_reg,
469                rm: scratch2.to_reg(),
470                extendop: ExtendOp::UXTX,
471            });
472        }
473        insts
474    }
475
476    fn gen_stack_lower_bound_trap(limit_reg: Reg) -> SmallInstVec<Inst> {
477        let mut insts = SmallVec::new();
478        insts.push(Inst::AluRRRExtend {
479            alu_op: ALUOp::SubS,
480            size: OperandSize::Size64,
481            rd: writable_zero_reg(),
482            rn: stack_reg(),
483            rm: limit_reg,
484            extendop: ExtendOp::UXTX,
485        });
486        insts.push(Inst::TrapIf {
487            trap_code: ir::TrapCode::STACK_OVERFLOW,
488            // Here `Lo` == "less than" when interpreting the two
489            // operands as unsigned integers.
490            kind: CondBrKind::Cond(Cond::Lo),
491        });
492        insts
493    }
494
495    fn gen_get_stack_addr(mem: StackAMode, into_reg: Writable<Reg>) -> Inst {
496        // FIXME: Do something different for dynamic types?
497        let mem = mem.into();
498        Inst::LoadAddr { rd: into_reg, mem }
499    }
500
501    fn get_stacklimit_reg(_call_conv: isa::CallConv) -> Reg {
502        spilltmp_reg()
503    }
504
505    fn gen_load_base_offset(into_reg: Writable<Reg>, base: Reg, offset: i32, ty: Type) -> Inst {
506        let mem = AMode::RegOffset {
507            rn: base,
508            off: offset as i64,
509        };
510        Inst::gen_load(into_reg, mem, ty, MemFlags::trusted())
511    }
512
513    fn gen_store_base_offset(base: Reg, offset: i32, from_reg: Reg, ty: Type) -> Inst {
514        let mem = AMode::RegOffset {
515            rn: base,
516            off: offset as i64,
517        };
518        Inst::gen_store(mem, from_reg, ty, MemFlags::trusted())
519    }
520
521    fn gen_sp_reg_adjust(amount: i32) -> SmallInstVec<Inst> {
522        if amount == 0 {
523            return SmallVec::new();
524        }
525
526        let (amount, is_sub) = if amount > 0 {
527            (amount as u64, false)
528        } else {
529            (-amount as u64, true)
530        };
531
532        let alu_op = if is_sub { ALUOp::Sub } else { ALUOp::Add };
533
534        let mut ret = SmallVec::new();
535        if let Some(imm12) = Imm12::maybe_from_u64(amount) {
536            let adj_inst = Inst::AluRRImm12 {
537                alu_op,
538                size: OperandSize::Size64,
539                rd: writable_stack_reg(),
540                rn: stack_reg(),
541                imm12,
542            };
543            ret.push(adj_inst);
544        } else {
545            let tmp = writable_spilltmp_reg();
546            // `gen_sp_reg_adjust` is called after regalloc2, so it's acceptable to reuse `tmp` for
547            // intermediates in `load_constant`.
548            let const_inst = Inst::load_constant(tmp, amount, &mut |_| tmp);
549            let adj_inst = Inst::AluRRRExtend {
550                alu_op,
551                size: OperandSize::Size64,
552                rd: writable_stack_reg(),
553                rn: stack_reg(),
554                rm: tmp.to_reg(),
555                extendop: ExtendOp::UXTX,
556            };
557            ret.extend(const_inst);
558            ret.push(adj_inst);
559        }
560        ret
561    }
562
563    fn gen_prologue_frame_setup(
564        call_conv: isa::CallConv,
565        flags: &settings::Flags,
566        isa_flags: &aarch64_settings::Flags,
567        frame_layout: &FrameLayout,
568    ) -> SmallInstVec<Inst> {
569        let setup_frame = frame_layout.setup_area_size > 0;
570        let mut insts = SmallVec::new();
571
572        match select_api_key(isa_flags, call_conv, setup_frame) {
573            Some(key) => {
574                insts.push(Inst::Paci { key });
575                if flags.unwind_info() {
576                    insts.push(Inst::Unwind {
577                        inst: UnwindInst::Aarch64SetPointerAuth {
578                            return_addresses: true,
579                        },
580                    });
581                }
582            }
583            None => {
584                if isa_flags.use_bti() {
585                    insts.push(Inst::Bti {
586                        targets: BranchTargetType::C,
587                    });
588                }
589
590                if flags.unwind_info() && call_conv == isa::CallConv::AppleAarch64 {
591                    // The macOS unwinder seems to require this.
592                    insts.push(Inst::Unwind {
593                        inst: UnwindInst::Aarch64SetPointerAuth {
594                            return_addresses: false,
595                        },
596                    });
597                }
598            }
599        }
600
601        if setup_frame {
602            // stp fp (x29), lr (x30), [sp, #-16]!
603            insts.push(Inst::StoreP64 {
604                rt: fp_reg(),
605                rt2: link_reg(),
606                mem: PairAMode::SPPreIndexed {
607                    simm7: SImm7Scaled::maybe_from_i64(-16, types::I64).unwrap(),
608                },
609                flags: MemFlags::trusted(),
610            });
611
612            if flags.unwind_info() {
613                insts.push(Inst::Unwind {
614                    inst: UnwindInst::PushFrameRegs {
615                        offset_upward_to_caller_sp: frame_layout.setup_area_size,
616                    },
617                });
618            }
619
620            // mov fp (x29), sp. This uses the ADDI rd, rs, 0 form of `MOV` because
621            // the usual encoding (`ORR`) does not work with SP.
622            insts.push(Inst::AluRRImm12 {
623                alu_op: ALUOp::Add,
624                size: OperandSize::Size64,
625                rd: writable_fp_reg(),
626                rn: stack_reg(),
627                imm12: Imm12 {
628                    bits: 0,
629                    shift12: false,
630                },
631            });
632        }
633
634        insts
635    }
636
637    fn gen_epilogue_frame_restore(
638        call_conv: isa::CallConv,
639        _flags: &settings::Flags,
640        _isa_flags: &aarch64_settings::Flags,
641        frame_layout: &FrameLayout,
642    ) -> SmallInstVec<Inst> {
643        let setup_frame = frame_layout.setup_area_size > 0;
644        let mut insts = SmallVec::new();
645
646        if setup_frame {
647            // N.B.: sp is already adjusted to the appropriate place by the
648            // clobber-restore code (which also frees the fixed frame). Hence, there
649            // is no need for the usual `mov sp, fp` here.
650
651            // `ldp fp, lr, [sp], #16`
652            insts.push(Inst::LoadP64 {
653                rt: writable_fp_reg(),
654                rt2: writable_link_reg(),
655                mem: PairAMode::SPPostIndexed {
656                    simm7: SImm7Scaled::maybe_from_i64(16, types::I64).unwrap(),
657                },
658                flags: MemFlags::trusted(),
659            });
660        }
661
662        if call_conv == isa::CallConv::Tail && frame_layout.tail_args_size > 0 {
663            insts.extend(Self::gen_sp_reg_adjust(
664                frame_layout.tail_args_size.try_into().unwrap(),
665            ));
666        }
667
668        insts
669    }
670
671    fn gen_return(
672        call_conv: isa::CallConv,
673        isa_flags: &aarch64_settings::Flags,
674        frame_layout: &FrameLayout,
675    ) -> SmallInstVec<Inst> {
676        let setup_frame = frame_layout.setup_area_size > 0;
677
678        match select_api_key(isa_flags, call_conv, setup_frame) {
679            Some(key) => {
680                smallvec![Inst::AuthenticatedRet {
681                    key,
682                    is_hint: !isa_flags.has_pauth(),
683                }]
684            }
685            None => {
686                smallvec![Inst::Ret {}]
687            }
688        }
689    }
690
691    fn gen_probestack(_insts: &mut SmallInstVec<Self::I>, _: u32) {
692        // TODO: implement if we ever require stack probes on an AArch64 host
693        // (unlikely unless Lucet is ported)
694        unimplemented!("Stack probing is unimplemented on AArch64");
695    }
696
697    fn gen_inline_probestack(
698        insts: &mut SmallInstVec<Self::I>,
699        _call_conv: isa::CallConv,
700        frame_size: u32,
701        guard_size: u32,
702    ) {
703        // The stack probe loop currently takes 6 instructions and each inline
704        // probe takes 2 (ish, these numbers sort of depend on the constants).
705        // Set this to 3 to keep the max size of the probe to 6 instructions.
706        const PROBE_MAX_UNROLL: u32 = 3;
707
708        // Calculate how many probes we need to perform. Round down, as we only
709        // need to probe whole guard_size regions we'd otherwise skip over.
710        let probe_count = frame_size / guard_size;
711        if probe_count == 0 {
712            // No probe necessary
713        } else if probe_count <= PROBE_MAX_UNROLL {
714            Self::gen_probestack_unroll(insts, guard_size, probe_count)
715        } else {
716            Self::gen_probestack_loop(insts, frame_size, guard_size)
717        }
718    }
719
720    fn gen_clobber_save(
721        _call_conv: isa::CallConv,
722        flags: &settings::Flags,
723        frame_layout: &FrameLayout,
724    ) -> SmallVec<[Inst; 16]> {
725        let (clobbered_int, clobbered_vec) = frame_layout.clobbered_callee_saves_by_class();
726
727        let mut insts = SmallVec::new();
728        let setup_frame = frame_layout.setup_area_size > 0;
729
730        // When a return_call within this function required more stack arguments than we have
731        // present, resize the incoming argument area of the frame to accommodate those arguments.
732        let incoming_args_diff = frame_layout.tail_args_size - frame_layout.incoming_args_size;
733        if incoming_args_diff > 0 {
734            // Decrement SP to account for the additional space required by a tail call.
735            insts.extend(Self::gen_sp_reg_adjust(-(incoming_args_diff as i32)));
736            if flags.unwind_info() {
737                insts.push(Inst::Unwind {
738                    inst: UnwindInst::StackAlloc {
739                        size: incoming_args_diff,
740                    },
741                });
742            }
743
744            // Move fp and lr down.
745            if setup_frame {
746                // Reload the frame pointer from the stack.
747                insts.push(Inst::ULoad64 {
748                    rd: regs::writable_fp_reg(),
749                    mem: AMode::SPOffset {
750                        off: i64::from(incoming_args_diff),
751                    },
752                    flags: MemFlags::trusted(),
753                });
754
755                // Store the frame pointer and link register again at the new SP
756                insts.push(Inst::StoreP64 {
757                    rt: fp_reg(),
758                    rt2: link_reg(),
759                    mem: PairAMode::SignedOffset {
760                        reg: regs::stack_reg(),
761                        simm7: SImm7Scaled::maybe_from_i64(0, types::I64).unwrap(),
762                    },
763                    flags: MemFlags::trusted(),
764                });
765
766                // Keep the frame pointer in sync
767                insts.push(Self::gen_move(
768                    regs::writable_fp_reg(),
769                    regs::stack_reg(),
770                    types::I64,
771                ));
772            }
773        }
774
775        if flags.unwind_info() && setup_frame {
776            // The *unwind* frame (but not the actual frame) starts at the
777            // clobbers, just below the saved FP/LR pair.
778            insts.push(Inst::Unwind {
779                inst: UnwindInst::DefineNewFrame {
780                    offset_downward_to_clobbers: frame_layout.clobber_size,
781                    offset_upward_to_caller_sp: frame_layout.setup_area_size,
782                },
783            });
784        }
785
786        // We use pre-indexed addressing modes here, rather than the possibly
787        // more efficient "subtract sp once then used fixed offsets" scheme,
788        // because (i) we cannot necessarily guarantee that the offset of a
789        // clobber-save slot will be within a SImm7Scaled (+504-byte) offset
790        // range of the whole frame including other slots, it is more complex to
791        // conditionally generate a two-stage SP adjustment (clobbers then fixed
792        // frame) otherwise, and generally we just want to maintain simplicity
793        // here for maintainability.  Because clobbers are at the top of the
794        // frame, just below FP, all that is necessary is to use the pre-indexed
795        // "push" `[sp, #-16]!` addressing mode.
796        //
797        // `frame_offset` tracks offset above start-of-clobbers for unwind-info
798        // purposes.
799        let mut clobber_offset = frame_layout.clobber_size;
800        let clobber_offset_change = 16;
801        let iter = clobbered_int.chunks_exact(2);
802
803        if let [rd] = iter.remainder() {
804            let rd: Reg = rd.to_reg().into();
805
806            debug_assert_eq!(rd.class(), RegClass::Int);
807            // str rd, [sp, #-16]!
808            insts.push(Inst::Store64 {
809                rd,
810                mem: AMode::SPPreIndexed {
811                    simm9: SImm9::maybe_from_i64(-clobber_offset_change).unwrap(),
812                },
813                flags: MemFlags::trusted(),
814            });
815
816            if flags.unwind_info() {
817                clobber_offset -= clobber_offset_change as u32;
818                insts.push(Inst::Unwind {
819                    inst: UnwindInst::SaveReg {
820                        clobber_offset,
821                        reg: rd.to_real_reg().unwrap(),
822                    },
823                });
824            }
825        }
826
827        let mut iter = iter.rev();
828
829        while let Some([rt, rt2]) = iter.next() {
830            // .to_reg().into(): Writable<RealReg> --> RealReg --> Reg
831            let rt: Reg = rt.to_reg().into();
832            let rt2: Reg = rt2.to_reg().into();
833
834            debug_assert!(rt.class() == RegClass::Int);
835            debug_assert!(rt2.class() == RegClass::Int);
836
837            // stp rt, rt2, [sp, #-16]!
838            insts.push(Inst::StoreP64 {
839                rt,
840                rt2,
841                mem: PairAMode::SPPreIndexed {
842                    simm7: SImm7Scaled::maybe_from_i64(-clobber_offset_change, types::I64).unwrap(),
843                },
844                flags: MemFlags::trusted(),
845            });
846
847            if flags.unwind_info() {
848                clobber_offset -= clobber_offset_change as u32;
849                insts.push(Inst::Unwind {
850                    inst: UnwindInst::SaveReg {
851                        clobber_offset,
852                        reg: rt.to_real_reg().unwrap(),
853                    },
854                });
855                insts.push(Inst::Unwind {
856                    inst: UnwindInst::SaveReg {
857                        clobber_offset: clobber_offset + (clobber_offset_change / 2) as u32,
858                        reg: rt2.to_real_reg().unwrap(),
859                    },
860                });
861            }
862        }
863
864        let store_vec_reg = |rd| Inst::FpuStore64 {
865            rd,
866            mem: AMode::SPPreIndexed {
867                simm9: SImm9::maybe_from_i64(-clobber_offset_change).unwrap(),
868            },
869            flags: MemFlags::trusted(),
870        };
871        let iter = clobbered_vec.chunks_exact(2);
872
873        if let [rd] = iter.remainder() {
874            let rd: Reg = rd.to_reg().into();
875
876            debug_assert_eq!(rd.class(), RegClass::Float);
877            insts.push(store_vec_reg(rd));
878
879            if flags.unwind_info() {
880                clobber_offset -= clobber_offset_change as u32;
881                insts.push(Inst::Unwind {
882                    inst: UnwindInst::SaveReg {
883                        clobber_offset,
884                        reg: rd.to_real_reg().unwrap(),
885                    },
886                });
887            }
888        }
889
890        let store_vec_reg_pair = |rt, rt2| {
891            let clobber_offset_change = 16;
892
893            (
894                Inst::FpuStoreP64 {
895                    rt,
896                    rt2,
897                    mem: PairAMode::SPPreIndexed {
898                        simm7: SImm7Scaled::maybe_from_i64(-clobber_offset_change, F64).unwrap(),
899                    },
900                    flags: MemFlags::trusted(),
901                },
902                clobber_offset_change as u32,
903            )
904        };
905        let mut iter = iter.rev();
906
907        while let Some([rt, rt2]) = iter.next() {
908            let rt: Reg = rt.to_reg().into();
909            let rt2: Reg = rt2.to_reg().into();
910
911            debug_assert_eq!(rt.class(), RegClass::Float);
912            debug_assert_eq!(rt2.class(), RegClass::Float);
913
914            let (inst, clobber_offset_change) = store_vec_reg_pair(rt, rt2);
915
916            insts.push(inst);
917
918            if flags.unwind_info() {
919                clobber_offset -= clobber_offset_change;
920                insts.push(Inst::Unwind {
921                    inst: UnwindInst::SaveReg {
922                        clobber_offset,
923                        reg: rt.to_real_reg().unwrap(),
924                    },
925                });
926                insts.push(Inst::Unwind {
927                    inst: UnwindInst::SaveReg {
928                        clobber_offset: clobber_offset + clobber_offset_change / 2,
929                        reg: rt2.to_real_reg().unwrap(),
930                    },
931                });
932            }
933        }
934
935        // Allocate the fixed frame below the clobbers if necessary.
936        let stack_size = frame_layout.fixed_frame_storage_size + frame_layout.outgoing_args_size;
937        if stack_size > 0 {
938            insts.extend(Self::gen_sp_reg_adjust(-(stack_size as i32)));
939            if flags.unwind_info() {
940                insts.push(Inst::Unwind {
941                    inst: UnwindInst::StackAlloc { size: stack_size },
942                });
943            }
944        }
945
946        insts
947    }
948
949    fn gen_clobber_restore(
950        _call_conv: isa::CallConv,
951        _flags: &settings::Flags,
952        frame_layout: &FrameLayout,
953    ) -> SmallVec<[Inst; 16]> {
954        let mut insts = SmallVec::new();
955        let (clobbered_int, clobbered_vec) = frame_layout.clobbered_callee_saves_by_class();
956
957        // Free the fixed frame if necessary.
958        let stack_size = frame_layout.fixed_frame_storage_size + frame_layout.outgoing_args_size;
959        if stack_size > 0 {
960            insts.extend(Self::gen_sp_reg_adjust(stack_size as i32));
961        }
962
963        let load_vec_reg = |rd| Inst::FpuLoad64 {
964            rd,
965            mem: AMode::SPPostIndexed {
966                simm9: SImm9::maybe_from_i64(16).unwrap(),
967            },
968            flags: MemFlags::trusted(),
969        };
970        let load_vec_reg_pair = |rt, rt2| Inst::FpuLoadP64 {
971            rt,
972            rt2,
973            mem: PairAMode::SPPostIndexed {
974                simm7: SImm7Scaled::maybe_from_i64(16, F64).unwrap(),
975            },
976            flags: MemFlags::trusted(),
977        };
978
979        let mut iter = clobbered_vec.chunks_exact(2);
980
981        while let Some([rt, rt2]) = iter.next() {
982            let rt: Writable<Reg> = rt.map(|r| r.into());
983            let rt2: Writable<Reg> = rt2.map(|r| r.into());
984
985            debug_assert_eq!(rt.to_reg().class(), RegClass::Float);
986            debug_assert_eq!(rt2.to_reg().class(), RegClass::Float);
987            insts.push(load_vec_reg_pair(rt, rt2));
988        }
989
990        debug_assert!(iter.remainder().len() <= 1);
991
992        if let [rd] = iter.remainder() {
993            let rd: Writable<Reg> = rd.map(|r| r.into());
994
995            debug_assert_eq!(rd.to_reg().class(), RegClass::Float);
996            insts.push(load_vec_reg(rd));
997        }
998
999        let mut iter = clobbered_int.chunks_exact(2);
1000
1001        while let Some([rt, rt2]) = iter.next() {
1002            let rt: Writable<Reg> = rt.map(|r| r.into());
1003            let rt2: Writable<Reg> = rt2.map(|r| r.into());
1004
1005            debug_assert_eq!(rt.to_reg().class(), RegClass::Int);
1006            debug_assert_eq!(rt2.to_reg().class(), RegClass::Int);
1007            // ldp rt, rt2, [sp], #16
1008            insts.push(Inst::LoadP64 {
1009                rt,
1010                rt2,
1011                mem: PairAMode::SPPostIndexed {
1012                    simm7: SImm7Scaled::maybe_from_i64(16, I64).unwrap(),
1013                },
1014                flags: MemFlags::trusted(),
1015            });
1016        }
1017
1018        debug_assert!(iter.remainder().len() <= 1);
1019
1020        if let [rd] = iter.remainder() {
1021            let rd: Writable<Reg> = rd.map(|r| r.into());
1022
1023            debug_assert_eq!(rd.to_reg().class(), RegClass::Int);
1024            // ldr rd, [sp], #16
1025            insts.push(Inst::ULoad64 {
1026                rd,
1027                mem: AMode::SPPostIndexed {
1028                    simm9: SImm9::maybe_from_i64(16).unwrap(),
1029                },
1030                flags: MemFlags::trusted(),
1031            });
1032        }
1033
1034        insts
1035    }
1036
1037    fn gen_call(dest: &CallDest, tmp: Writable<Reg>, info: CallInfo<()>) -> SmallVec<[Inst; 2]> {
1038        let mut insts = SmallVec::new();
1039        match dest {
1040            CallDest::ExtName(name, RelocDistance::Near) => {
1041                let info = Box::new(info.map(|()| name.clone()));
1042                insts.push(Inst::Call { info });
1043            }
1044            CallDest::ExtName(name, RelocDistance::Far) => {
1045                insts.push(Inst::LoadExtName {
1046                    rd: tmp,
1047                    name: Box::new(name.clone()),
1048                    offset: 0,
1049                });
1050                let info = Box::new(info.map(|()| tmp.to_reg()));
1051                insts.push(Inst::CallInd { info });
1052            }
1053            CallDest::Reg(reg) => {
1054                let info = Box::new(info.map(|()| *reg));
1055                insts.push(Inst::CallInd { info });
1056            }
1057        }
1058
1059        insts
1060    }
1061
1062    fn gen_memcpy<F: FnMut(Type) -> Writable<Reg>>(
1063        call_conv: isa::CallConv,
1064        dst: Reg,
1065        src: Reg,
1066        size: usize,
1067        mut alloc_tmp: F,
1068    ) -> SmallVec<[Self::I; 8]> {
1069        let mut insts = SmallVec::new();
1070        let arg0 = writable_xreg(0);
1071        let arg1 = writable_xreg(1);
1072        let arg2 = writable_xreg(2);
1073        let tmp = alloc_tmp(Self::word_type());
1074        insts.extend(Inst::load_constant(tmp, size as u64, &mut alloc_tmp));
1075        insts.push(Inst::Call {
1076            info: Box::new(CallInfo {
1077                dest: ExternalName::LibCall(LibCall::Memcpy),
1078                uses: smallvec![
1079                    CallArgPair {
1080                        vreg: dst,
1081                        preg: arg0.to_reg()
1082                    },
1083                    CallArgPair {
1084                        vreg: src,
1085                        preg: arg1.to_reg()
1086                    },
1087                    CallArgPair {
1088                        vreg: tmp.to_reg(),
1089                        preg: arg2.to_reg()
1090                    }
1091                ],
1092                defs: smallvec![],
1093                clobbers: Self::get_regs_clobbered_by_call(call_conv),
1094                caller_conv: call_conv,
1095                callee_conv: call_conv,
1096                callee_pop_size: 0,
1097            }),
1098        });
1099        insts
1100    }
1101
1102    fn get_number_of_spillslots_for_value(
1103        rc: RegClass,
1104        vector_size: u32,
1105        _isa_flags: &Self::F,
1106    ) -> u32 {
1107        assert_eq!(vector_size % 8, 0);
1108        // We allocate in terms of 8-byte slots.
1109        match rc {
1110            RegClass::Int => 1,
1111            RegClass::Float => vector_size / 8,
1112            RegClass::Vector => unreachable!(),
1113        }
1114    }
1115
1116    fn get_machine_env(flags: &settings::Flags, _call_conv: isa::CallConv) -> &MachineEnv {
1117        if flags.enable_pinned_reg() {
1118            static MACHINE_ENV: OnceLock<MachineEnv> = OnceLock::new();
1119            MACHINE_ENV.get_or_init(|| create_reg_env(true))
1120        } else {
1121            static MACHINE_ENV: OnceLock<MachineEnv> = OnceLock::new();
1122            MACHINE_ENV.get_or_init(|| create_reg_env(false))
1123        }
1124    }
1125
1126    fn get_regs_clobbered_by_call(call_conv: isa::CallConv) -> PRegSet {
1127        match call_conv {
1128            isa::CallConv::Winch => WINCH_CLOBBERS,
1129            _ => DEFAULT_AAPCS_CLOBBERS,
1130        }
1131    }
1132
1133    fn get_ext_mode(
1134        call_conv: isa::CallConv,
1135        specified: ir::ArgumentExtension,
1136    ) -> ir::ArgumentExtension {
1137        if call_conv == isa::CallConv::AppleAarch64 {
1138            specified
1139        } else {
1140            ir::ArgumentExtension::None
1141        }
1142    }
1143
1144    fn compute_frame_layout(
1145        call_conv: isa::CallConv,
1146        flags: &settings::Flags,
1147        sig: &Signature,
1148        regs: &[Writable<RealReg>],
1149        is_leaf: bool,
1150        incoming_args_size: u32,
1151        tail_args_size: u32,
1152        fixed_frame_storage_size: u32,
1153        outgoing_args_size: u32,
1154    ) -> FrameLayout {
1155        let mut regs: Vec<Writable<RealReg>> = regs
1156            .iter()
1157            .cloned()
1158            .filter(|r| {
1159                is_reg_saved_in_prologue(call_conv, flags.enable_pinned_reg(), sig, r.to_reg())
1160            })
1161            .collect();
1162
1163        // Sort registers for deterministic code output. We can do an unstable
1164        // sort because the registers will be unique (there are no dups).
1165        regs.sort_unstable();
1166
1167        // Compute clobber size.
1168        let clobber_size = compute_clobber_size(&regs);
1169
1170        // Compute linkage frame size.
1171        let setup_area_size = if flags.preserve_frame_pointers()
1172            || !is_leaf
1173            // The function arguments that are passed on the stack are addressed
1174            // relative to the Frame Pointer.
1175            || incoming_args_size > 0
1176            || clobber_size > 0
1177            || fixed_frame_storage_size > 0
1178        {
1179            16 // FP, LR
1180        } else {
1181            0
1182        };
1183
1184        // Return FrameLayout structure.
1185        FrameLayout {
1186            incoming_args_size,
1187            tail_args_size,
1188            setup_area_size,
1189            clobber_size,
1190            fixed_frame_storage_size,
1191            outgoing_args_size,
1192            clobbered_callee_saves: regs,
1193        }
1194    }
1195}
1196
1197impl AArch64MachineDeps {
1198    fn gen_probestack_unroll(insts: &mut SmallInstVec<Inst>, guard_size: u32, probe_count: u32) {
1199        // When manually unrolling adjust the stack pointer and then write a zero
1200        // to the stack at that offset. This generates something like
1201        // `sub sp, sp, #1, lsl #12` followed by `stur wzr, [sp]`.
1202        //
1203        // We do this because valgrind expects us to never write beyond the stack
1204        // pointer and associated redzone.
1205        // See: https://github.com/bytecodealliance/wasmtime/issues/7454
1206        for _ in 0..probe_count {
1207            insts.extend(Self::gen_sp_reg_adjust(-(guard_size as i32)));
1208
1209            insts.push(Inst::gen_store(
1210                AMode::SPOffset { off: 0 },
1211                zero_reg(),
1212                I32,
1213                MemFlags::trusted(),
1214            ));
1215        }
1216
1217        // Restore the stack pointer to its original value
1218        insts.extend(Self::gen_sp_reg_adjust((guard_size * probe_count) as i32));
1219    }
1220
1221    fn gen_probestack_loop(insts: &mut SmallInstVec<Inst>, frame_size: u32, guard_size: u32) {
1222        // The non-unrolled version uses two temporary registers. The
1223        // `start` contains the current offset from sp and counts downwards
1224        // during the loop by increments of `guard_size`. The `end` is
1225        // the size of the frame and where we stop.
1226        //
1227        // Note that this emission is all post-regalloc so it should be ok
1228        // to use the temporary registers here as input/output as the loop
1229        // itself is not allowed to use the registers.
1230        let start = writable_spilltmp_reg();
1231        let end = writable_tmp2_reg();
1232        // `gen_inline_probestack` is called after regalloc2, so it's acceptable to reuse
1233        // `start` and `end` as temporaries in load_constant.
1234        insts.extend(Inst::load_constant(start, 0, &mut |_| start));
1235        insts.extend(Inst::load_constant(end, frame_size.into(), &mut |_| end));
1236        insts.push(Inst::StackProbeLoop {
1237            start,
1238            end: end.to_reg(),
1239            step: Imm12::maybe_from_u64(guard_size.into()).unwrap(),
1240        });
1241    }
1242}
1243
1244fn select_api_key(
1245    isa_flags: &aarch64_settings::Flags,
1246    call_conv: isa::CallConv,
1247    setup_frame: bool,
1248) -> Option<APIKey> {
1249    if isa_flags.sign_return_address() && (setup_frame || isa_flags.sign_return_address_all()) {
1250        // The `tail` calling convention uses a zero modifier rather than SP
1251        // because tail calls may happen with a different stack pointer than
1252        // when the function was entered, meaning that it won't be the same when
1253        // the return address is decrypted.
1254        Some(if isa_flags.sign_return_address_with_bkey() {
1255            match call_conv {
1256                isa::CallConv::Tail => APIKey::BZ,
1257                _ => APIKey::BSP,
1258            }
1259        } else {
1260            match call_conv {
1261                isa::CallConv::Tail => APIKey::AZ,
1262                _ => APIKey::ASP,
1263            }
1264        })
1265    } else {
1266        None
1267    }
1268}
1269
1270impl AArch64CallSite {
1271    pub fn emit_return_call(
1272        mut self,
1273        ctx: &mut Lower<Inst>,
1274        args: isle::ValueSlice,
1275        backend: &AArch64Backend,
1276    ) {
1277        let new_stack_arg_size =
1278            u32::try_from(self.sig(ctx.sigs()).sized_stack_arg_space()).unwrap();
1279
1280        ctx.abi_mut().accumulate_tail_args_size(new_stack_arg_size);
1281
1282        // Put all arguments in registers and stack slots (within that newly
1283        // allocated stack space).
1284        self.emit_args(ctx, args);
1285        self.emit_stack_ret_arg_for_tail_call(ctx);
1286
1287        let dest = self.dest().clone();
1288        let uses = self.take_uses();
1289        let key = select_api_key(&backend.isa_flags, isa::CallConv::Tail, true);
1290
1291        match dest {
1292            CallDest::ExtName(callee, RelocDistance::Near) => {
1293                let info = Box::new(ReturnCallInfo {
1294                    dest: callee,
1295                    uses,
1296                    key,
1297                    new_stack_arg_size,
1298                });
1299                ctx.emit(Inst::ReturnCall { info });
1300            }
1301            CallDest::ExtName(name, RelocDistance::Far) => {
1302                let callee = ctx.alloc_tmp(types::I64).only_reg().unwrap();
1303                ctx.emit(Inst::LoadExtName {
1304                    rd: callee,
1305                    name: Box::new(name),
1306                    offset: 0,
1307                });
1308                let info = Box::new(ReturnCallInfo {
1309                    dest: callee.to_reg(),
1310                    uses,
1311                    key,
1312                    new_stack_arg_size,
1313                });
1314                ctx.emit(Inst::ReturnCallInd { info });
1315            }
1316            CallDest::Reg(callee) => {
1317                let info = Box::new(ReturnCallInfo {
1318                    dest: callee,
1319                    uses,
1320                    key,
1321                    new_stack_arg_size,
1322                });
1323                ctx.emit(Inst::ReturnCallInd { info });
1324            }
1325        }
1326    }
1327}
1328
1329/// Is the given register saved in the prologue if clobbered, i.e., is it a
1330/// callee-save?
1331fn is_reg_saved_in_prologue(
1332    _call_conv: isa::CallConv,
1333    enable_pinned_reg: bool,
1334    sig: &Signature,
1335    r: RealReg,
1336) -> bool {
1337    // FIXME: We need to inspect whether a function is returning Z or P regs too.
1338    let save_z_regs = sig
1339        .params
1340        .iter()
1341        .filter(|p| p.value_type.is_dynamic_vector())
1342        .count()
1343        != 0;
1344
1345    match r.class() {
1346        RegClass::Int => {
1347            // x19 - x28 inclusive are callee-saves.
1348            // However, x21 is the pinned reg if `enable_pinned_reg`
1349            // is set, and is implicitly globally-allocated, hence not
1350            // callee-saved in prologues.
1351            if enable_pinned_reg && r.hw_enc() == PINNED_REG {
1352                false
1353            } else {
1354                r.hw_enc() >= 19 && r.hw_enc() <= 28
1355            }
1356        }
1357        RegClass::Float => {
1358            // If a subroutine takes at least one argument in scalable vector registers
1359            // or scalable predicate registers, or if it is a function that returns
1360            // results in such registers, it must ensure that the entire contents of
1361            // z8-z23 are preserved across the call. In other cases it need only
1362            // preserve the low 64 bits of z8-z15.
1363            if save_z_regs {
1364                r.hw_enc() >= 8 && r.hw_enc() <= 23
1365            } else {
1366                // v8 - v15 inclusive are callee-saves.
1367                r.hw_enc() >= 8 && r.hw_enc() <= 15
1368            }
1369        }
1370        RegClass::Vector => unreachable!(),
1371    }
1372}
1373
1374const fn default_aapcs_clobbers() -> PRegSet {
1375    PRegSet::empty()
1376        // x0 - x17 inclusive are caller-saves.
1377        .with(xreg_preg(0))
1378        .with(xreg_preg(1))
1379        .with(xreg_preg(2))
1380        .with(xreg_preg(3))
1381        .with(xreg_preg(4))
1382        .with(xreg_preg(5))
1383        .with(xreg_preg(6))
1384        .with(xreg_preg(7))
1385        .with(xreg_preg(8))
1386        .with(xreg_preg(9))
1387        .with(xreg_preg(10))
1388        .with(xreg_preg(11))
1389        .with(xreg_preg(12))
1390        .with(xreg_preg(13))
1391        .with(xreg_preg(14))
1392        .with(xreg_preg(15))
1393        .with(xreg_preg(16))
1394        .with(xreg_preg(17))
1395        // v0 - v7 inclusive and v16 - v31 inclusive are
1396        // caller-saves. The upper 64 bits of v8 - v15 inclusive are
1397        // also caller-saves.  However, because we cannot currently
1398        // represent partial registers to regalloc2, we indicate here
1399        // that every vector register is caller-save. Because this
1400        // function is used at *callsites*, approximating in this
1401        // direction (save more than necessary) is conservative and
1402        // thus safe.
1403        //
1404        // Note that we exclude clobbers from a call instruction when
1405        // a call instruction's callee has the same ABI as the caller
1406        // (the current function body); this is safe (anything
1407        // clobbered by callee can be clobbered by caller as well) and
1408        // avoids unnecessary saves of v8-v15 in the prologue even
1409        // though we include them as defs here.
1410        .with(vreg_preg(0))
1411        .with(vreg_preg(1))
1412        .with(vreg_preg(2))
1413        .with(vreg_preg(3))
1414        .with(vreg_preg(4))
1415        .with(vreg_preg(5))
1416        .with(vreg_preg(6))
1417        .with(vreg_preg(7))
1418        .with(vreg_preg(8))
1419        .with(vreg_preg(9))
1420        .with(vreg_preg(10))
1421        .with(vreg_preg(11))
1422        .with(vreg_preg(12))
1423        .with(vreg_preg(13))
1424        .with(vreg_preg(14))
1425        .with(vreg_preg(15))
1426        .with(vreg_preg(16))
1427        .with(vreg_preg(17))
1428        .with(vreg_preg(18))
1429        .with(vreg_preg(19))
1430        .with(vreg_preg(20))
1431        .with(vreg_preg(21))
1432        .with(vreg_preg(22))
1433        .with(vreg_preg(23))
1434        .with(vreg_preg(24))
1435        .with(vreg_preg(25))
1436        .with(vreg_preg(26))
1437        .with(vreg_preg(27))
1438        .with(vreg_preg(28))
1439        .with(vreg_preg(29))
1440        .with(vreg_preg(30))
1441        .with(vreg_preg(31))
1442}
1443
1444const fn winch_clobbers() -> PRegSet {
1445    PRegSet::empty()
1446        .with(xreg_preg(0))
1447        .with(xreg_preg(1))
1448        .with(xreg_preg(2))
1449        .with(xreg_preg(3))
1450        .with(xreg_preg(4))
1451        .with(xreg_preg(5))
1452        .with(xreg_preg(6))
1453        .with(xreg_preg(7))
1454        .with(xreg_preg(8))
1455        .with(xreg_preg(9))
1456        .with(xreg_preg(10))
1457        .with(xreg_preg(11))
1458        .with(xreg_preg(12))
1459        .with(xreg_preg(13))
1460        .with(xreg_preg(14))
1461        .with(xreg_preg(15))
1462        .with(xreg_preg(16))
1463        .with(xreg_preg(17))
1464        // x18 is used to carry platform state and is not allocatable by Winch.
1465        //
1466        // x19 - x27 are considered caller-saved in Winch's calling convention.
1467        .with(xreg_preg(19))
1468        .with(xreg_preg(20))
1469        .with(xreg_preg(21))
1470        .with(xreg_preg(22))
1471        .with(xreg_preg(23))
1472        .with(xreg_preg(24))
1473        .with(xreg_preg(25))
1474        .with(xreg_preg(26))
1475        .with(xreg_preg(27))
1476        // x28 is used as the shadow stack pointer and is considered
1477        // callee-saved.
1478        //
1479        // All vregs are considered caller-saved.
1480        .with(vreg_preg(0))
1481        .with(vreg_preg(1))
1482        .with(vreg_preg(2))
1483        .with(vreg_preg(3))
1484        .with(vreg_preg(4))
1485        .with(vreg_preg(5))
1486        .with(vreg_preg(6))
1487        .with(vreg_preg(7))
1488        .with(vreg_preg(8))
1489        .with(vreg_preg(9))
1490        .with(vreg_preg(10))
1491        .with(vreg_preg(11))
1492        .with(vreg_preg(12))
1493        .with(vreg_preg(13))
1494        .with(vreg_preg(14))
1495        .with(vreg_preg(15))
1496        .with(vreg_preg(16))
1497        .with(vreg_preg(17))
1498        .with(vreg_preg(18))
1499        .with(vreg_preg(19))
1500        .with(vreg_preg(20))
1501        .with(vreg_preg(21))
1502        .with(vreg_preg(22))
1503        .with(vreg_preg(23))
1504        .with(vreg_preg(24))
1505        .with(vreg_preg(25))
1506        .with(vreg_preg(26))
1507        .with(vreg_preg(27))
1508        .with(vreg_preg(28))
1509        .with(vreg_preg(29))
1510        .with(vreg_preg(30))
1511        .with(vreg_preg(31))
1512}
1513
1514const DEFAULT_AAPCS_CLOBBERS: PRegSet = default_aapcs_clobbers();
1515const WINCH_CLOBBERS: PRegSet = winch_clobbers();
1516
1517fn create_reg_env(enable_pinned_reg: bool) -> MachineEnv {
1518    fn preg(r: Reg) -> PReg {
1519        r.to_real_reg().unwrap().into()
1520    }
1521
1522    let mut env = MachineEnv {
1523        preferred_regs_by_class: [
1524            vec![
1525                preg(xreg(0)),
1526                preg(xreg(1)),
1527                preg(xreg(2)),
1528                preg(xreg(3)),
1529                preg(xreg(4)),
1530                preg(xreg(5)),
1531                preg(xreg(6)),
1532                preg(xreg(7)),
1533                preg(xreg(8)),
1534                preg(xreg(9)),
1535                preg(xreg(10)),
1536                preg(xreg(11)),
1537                preg(xreg(12)),
1538                preg(xreg(13)),
1539                preg(xreg(14)),
1540                preg(xreg(15)),
1541                // x16 and x17 are spilltmp and tmp2 (see above).
1542                // x18 could be used by the platform to carry inter-procedural state;
1543                // conservatively assume so and make it not allocatable.
1544                // x19-28 are callee-saved and so not preferred.
1545                // x21 is the pinned register (if enabled) and not allocatable if so.
1546                // x29 is FP, x30 is LR, x31 is SP/ZR.
1547            ],
1548            vec![
1549                preg(vreg(0)),
1550                preg(vreg(1)),
1551                preg(vreg(2)),
1552                preg(vreg(3)),
1553                preg(vreg(4)),
1554                preg(vreg(5)),
1555                preg(vreg(6)),
1556                preg(vreg(7)),
1557                // v8-15 are callee-saved and so not preferred.
1558                preg(vreg(16)),
1559                preg(vreg(17)),
1560                preg(vreg(18)),
1561                preg(vreg(19)),
1562                preg(vreg(20)),
1563                preg(vreg(21)),
1564                preg(vreg(22)),
1565                preg(vreg(23)),
1566                preg(vreg(24)),
1567                preg(vreg(25)),
1568                preg(vreg(26)),
1569                preg(vreg(27)),
1570                preg(vreg(28)),
1571                preg(vreg(29)),
1572                preg(vreg(30)),
1573                preg(vreg(31)),
1574            ],
1575            // Vector Regclass is unused
1576            vec![],
1577        ],
1578        non_preferred_regs_by_class: [
1579            vec![
1580                preg(xreg(19)),
1581                preg(xreg(20)),
1582                // x21 is pinned reg if enabled; we add to this list below if not.
1583                preg(xreg(22)),
1584                preg(xreg(23)),
1585                preg(xreg(24)),
1586                preg(xreg(25)),
1587                preg(xreg(26)),
1588                preg(xreg(27)),
1589                preg(xreg(28)),
1590            ],
1591            vec![
1592                preg(vreg(8)),
1593                preg(vreg(9)),
1594                preg(vreg(10)),
1595                preg(vreg(11)),
1596                preg(vreg(12)),
1597                preg(vreg(13)),
1598                preg(vreg(14)),
1599                preg(vreg(15)),
1600            ],
1601            // Vector Regclass is unused
1602            vec![],
1603        ],
1604        fixed_stack_slots: vec![],
1605        scratch_by_class: [None, None, None],
1606    };
1607
1608    if !enable_pinned_reg {
1609        debug_assert_eq!(PINNED_REG, 21); // We assumed this above in hardcoded reg list.
1610        env.non_preferred_regs_by_class[0].push(preg(xreg(PINNED_REG)));
1611    }
1612
1613    env
1614}