cranelift_codegen/isa/aarch64/
abi.rs

1//! Implementation of a standard AArch64 ABI.
2
3use crate::CodegenResult;
4use crate::ir;
5use crate::ir::MemFlags;
6use crate::ir::types;
7use crate::ir::types::*;
8use crate::ir::{ExternalName, LibCall, Signature, dynamic_to_fixed};
9use crate::isa;
10use crate::isa::aarch64::{inst::*, settings as aarch64_settings};
11use crate::isa::unwind::UnwindInst;
12use crate::isa::winch;
13use crate::machinst::*;
14use crate::settings;
15use alloc::boxed::Box;
16use alloc::vec::Vec;
17use regalloc2::{MachineEnv, PReg, PRegSet};
18use smallvec::{SmallVec, smallvec};
19use std::borrow::ToOwned;
20use std::sync::OnceLock;
21
22// We use a generic implementation that factors out AArch64 and x64 ABI commonalities, because
23// these ABIs are very similar.
24
25/// Support for the AArch64 ABI from the callee side (within a function body).
26pub(crate) type AArch64Callee = Callee<AArch64MachineDeps>;
27
28impl From<StackAMode> for AMode {
29    fn from(stack: StackAMode) -> AMode {
30        match stack {
31            StackAMode::IncomingArg(off, stack_args_size) => AMode::IncomingArg {
32                off: i64::from(stack_args_size) - off,
33            },
34            StackAMode::Slot(off) => AMode::SlotOffset { off },
35            StackAMode::OutgoingArg(off) => AMode::SPOffset { off },
36        }
37    }
38}
39
40// Returns the size of stack space needed to store the
41// `clobbered_callee_saved` registers.
42fn compute_clobber_size(clobbered_callee_saves: &[Writable<RealReg>]) -> u32 {
43    let mut int_regs = 0;
44    let mut vec_regs = 0;
45    for &reg in clobbered_callee_saves {
46        match reg.to_reg().class() {
47            RegClass::Int => {
48                int_regs += 1;
49            }
50            RegClass::Float => {
51                vec_regs += 1;
52            }
53            RegClass::Vector => unreachable!(),
54        }
55    }
56
57    // Round up to multiple of 2, to keep 16-byte stack alignment.
58    let int_save_bytes = (int_regs + (int_regs & 1)) * 8;
59    // The Procedure Call Standard for the Arm 64-bit Architecture
60    // (AAPCS64, including several related ABIs such as the one used by
61    // Windows) mandates saving only the bottom 8 bytes of the vector
62    // registers, so we round up the number of registers to ensure
63    // proper stack alignment (similarly to the situation with
64    // `int_reg`).
65    let vec_reg_size = 8;
66    let vec_save_padding = vec_regs & 1;
67    // FIXME: SVE: ABI is different to Neon, so do we treat all vec regs as Z-regs?
68    let vec_save_bytes = (vec_regs + vec_save_padding) * vec_reg_size;
69
70    int_save_bytes + vec_save_bytes
71}
72
73/// AArch64-specific ABI behavior. This struct just serves as an implementation
74/// point for the trait; it is never actually instantiated.
75pub struct AArch64MachineDeps;
76
77impl IsaFlags for aarch64_settings::Flags {
78    fn is_forward_edge_cfi_enabled(&self) -> bool {
79        self.use_bti()
80    }
81}
82
83impl ABIMachineSpec for AArch64MachineDeps {
84    type I = Inst;
85
86    type F = aarch64_settings::Flags;
87
88    /// This is the limit for the size of argument and return-value areas on the
89    /// stack. We place a reasonable limit here to avoid integer overflow issues
90    /// with 32-bit arithmetic: for now, 128 MB.
91    const STACK_ARG_RET_SIZE_LIMIT: u32 = 128 * 1024 * 1024;
92
93    fn word_bits() -> u32 {
94        64
95    }
96
97    /// Return required stack alignment in bytes.
98    fn stack_align(_call_conv: isa::CallConv) -> u32 {
99        16
100    }
101
102    fn compute_arg_locs(
103        call_conv: isa::CallConv,
104        flags: &settings::Flags,
105        params: &[ir::AbiParam],
106        args_or_rets: ArgsOrRets,
107        add_ret_area_ptr: bool,
108        mut args: ArgsAccumulator,
109    ) -> CodegenResult<(u32, Option<usize>)> {
110        let is_apple_cc = call_conv == isa::CallConv::AppleAarch64;
111        let is_winch_return = call_conv == isa::CallConv::Winch && args_or_rets == ArgsOrRets::Rets;
112
113        // See AArch64 ABI (https://github.com/ARM-software/abi-aa/blob/2021Q1/aapcs64/aapcs64.rst#64parameter-passing), sections 6.4.
114        //
115        // MacOS aarch64 is slightly different, see also
116        // https://developer.apple.com/documentation/xcode/writing_arm64_code_for_apple_platforms.
117        // We are diverging from the MacOS aarch64 implementation in the
118        // following ways:
119        // - sign- and zero- extensions of data types less than 32 bits are not
120        // implemented yet.
121        // - we align the arguments stack space to a 16-bytes boundary, while
122        // the MacOS allows aligning only on 8 bytes. In practice it means we're
123        // slightly overallocating when calling, which is fine, and doesn't
124        // break our other invariants that the stack is always allocated in
125        // 16-bytes chunks.
126
127        let mut next_xreg = if call_conv == isa::CallConv::Tail {
128            // We reserve `x0` for the return area pointer. For simplicity, we
129            // reserve it even when there is no return area pointer needed. This
130            // also means that identity functions don't have to shuffle arguments to
131            // different return registers because we shifted all argument register
132            // numbers down by one to make space for the return area pointer.
133            //
134            // Also, we cannot use all allocatable GPRs as arguments because we need
135            // at least one allocatable register for holding the callee address in
136            // indirect calls. So skip `x1` also, reserving it for that role.
137            2
138        } else {
139            0
140        };
141        let mut next_vreg = 0;
142        let mut next_stack: u32 = 0;
143
144        // Note on return values: on the regular ABI, we may return values
145        // in 8 registers for V128 and I64 registers independently of the
146        // number of register values returned in the other class. That is,
147        // we can return values in up to 8 integer and
148        // 8 vector registers at once.
149        let max_per_class_reg_vals = 8; // x0-x7 and v0-v7
150        let mut remaining_reg_vals = 16;
151
152        let ret_area_ptr = if add_ret_area_ptr {
153            debug_assert_eq!(args_or_rets, ArgsOrRets::Args);
154            if call_conv != isa::CallConv::Winch {
155                // In the AAPCS64 calling convention the return area pointer is
156                // stored in x8.
157                Some(ABIArg::reg(
158                    xreg(8).to_real_reg().unwrap(),
159                    I64,
160                    ir::ArgumentExtension::None,
161                    ir::ArgumentPurpose::Normal,
162                ))
163            } else {
164                // Use x0 for the return area pointer in the Winch calling convention
165                // to simplify the ABI handling code in Winch by avoiding an AArch64
166                // special case to assign it to x8.
167                next_xreg += 1;
168                Some(ABIArg::reg(
169                    xreg(0).to_real_reg().unwrap(),
170                    I64,
171                    ir::ArgumentExtension::None,
172                    ir::ArgumentPurpose::Normal,
173                ))
174            }
175        } else {
176            None
177        };
178
179        for (i, param) in params.into_iter().enumerate() {
180            if is_apple_cc && param.value_type == types::F128 && !flags.enable_llvm_abi_extensions()
181            {
182                panic!(
183                    "f128 args/return values not supported for apple_aarch64 unless LLVM ABI extensions are enabled"
184                );
185            }
186
187            let (rcs, reg_types) = Inst::rc_for_type(param.value_type)?;
188
189            if let ir::ArgumentPurpose::StructReturn = param.purpose {
190                assert!(
191                    call_conv != isa::CallConv::Tail,
192                    "support for StructReturn parameters is not implemented for the `tail` \
193                    calling convention yet",
194                );
195            }
196
197            if let ir::ArgumentPurpose::StructArgument(_) = param.purpose {
198                panic!(
199                    "StructArgument parameters are not supported on arm64. \
200                    Use regular pointer arguments instead."
201                );
202            }
203
204            if let ir::ArgumentPurpose::StructReturn = param.purpose {
205                // FIXME add assert_eq!(args_or_rets, ArgsOrRets::Args); once
206                // ensure_struct_return_ptr_is_returned is gone.
207                assert!(
208                    param.value_type == types::I64,
209                    "StructReturn must be a pointer sized integer"
210                );
211                args.push(ABIArg::Slots {
212                    slots: smallvec![ABIArgSlot::Reg {
213                        reg: xreg(8).to_real_reg().unwrap(),
214                        ty: types::I64,
215                        extension: param.extension,
216                    },],
217                    purpose: ir::ArgumentPurpose::StructReturn,
218                });
219                continue;
220            }
221
222            // Handle multi register params
223            //
224            // See AArch64 ABI (https://github.com/ARM-software/abi-aa/blob/2021Q1/aapcs64/aapcs64.rst#642parameter-passing-rules), (Section 6.4.2 Stage C).
225            //
226            // For arguments with alignment of 16 we round up the register number
227            // to the next even value. So we can never allocate for example an i128
228            // to X1 and X2, we have to skip one register and do X2, X3
229            // (Stage C.8)
230            // Note: The Apple ABI deviates a bit here. They don't respect Stage C.8
231            // and will happily allocate a i128 to X1 and X2
232            //
233            // For integer types with alignment of 16 we also have the additional
234            // restriction of passing the lower half in Xn and the upper half in Xn+1
235            // (Stage C.9)
236            //
237            // For examples of how LLVM handles this: https://godbolt.org/z/bhd3vvEfh
238            //
239            // On the Apple ABI it is unspecified if we can spill half the value into the stack
240            // i.e load the lower half into x7 and the upper half into the stack
241            // LLVM does not seem to do this, so we are going to replicate that behaviour
242            let is_multi_reg = rcs.len() >= 2;
243            if is_multi_reg {
244                assert!(
245                    rcs.len() == 2,
246                    "Unable to handle multi reg params with more than 2 regs"
247                );
248                assert!(
249                    rcs == &[RegClass::Int, RegClass::Int],
250                    "Unable to handle non i64 regs"
251                );
252
253                let reg_class_space = max_per_class_reg_vals - next_xreg;
254                let reg_space = remaining_reg_vals;
255
256                if reg_space >= 2 && reg_class_space >= 2 {
257                    // The aarch64 ABI does not allow us to start a split argument
258                    // at an odd numbered register. So we need to skip one register
259                    //
260                    // TODO: The Fast ABI should probably not skip the register
261                    if !is_apple_cc && next_xreg % 2 != 0 {
262                        next_xreg += 1;
263                    }
264
265                    let lower_reg = xreg(next_xreg);
266                    let upper_reg = xreg(next_xreg + 1);
267
268                    args.push(ABIArg::Slots {
269                        slots: smallvec![
270                            ABIArgSlot::Reg {
271                                reg: lower_reg.to_real_reg().unwrap(),
272                                ty: reg_types[0],
273                                extension: param.extension,
274                            },
275                            ABIArgSlot::Reg {
276                                reg: upper_reg.to_real_reg().unwrap(),
277                                ty: reg_types[1],
278                                extension: param.extension,
279                            },
280                        ],
281                        purpose: param.purpose,
282                    });
283
284                    next_xreg += 2;
285                    remaining_reg_vals -= 2;
286                    continue;
287                }
288            } else {
289                // Single Register parameters
290                let rc = rcs[0];
291                let next_reg = match rc {
292                    RegClass::Int => &mut next_xreg,
293                    RegClass::Float => &mut next_vreg,
294                    RegClass::Vector => unreachable!(),
295                };
296
297                let push_to_reg = if is_winch_return {
298                    // Winch uses the first register to return the last result
299                    i == params.len() - 1
300                } else {
301                    // Use max_per_class_reg_vals & remaining_reg_vals otherwise
302                    *next_reg < max_per_class_reg_vals && remaining_reg_vals > 0
303                };
304
305                if push_to_reg {
306                    let reg = match rc {
307                        RegClass::Int => xreg(*next_reg),
308                        RegClass::Float => vreg(*next_reg),
309                        RegClass::Vector => unreachable!(),
310                    };
311                    // Overlay Z-regs on V-regs for parameter passing.
312                    let ty = if param.value_type.is_dynamic_vector() {
313                        dynamic_to_fixed(param.value_type)
314                    } else {
315                        param.value_type
316                    };
317                    args.push(ABIArg::reg(
318                        reg.to_real_reg().unwrap(),
319                        ty,
320                        param.extension,
321                        param.purpose,
322                    ));
323                    *next_reg += 1;
324                    remaining_reg_vals -= 1;
325                    continue;
326                }
327            }
328
329            // Spill to the stack
330
331            if args_or_rets == ArgsOrRets::Rets && !flags.enable_multi_ret_implicit_sret() {
332                return Err(crate::CodegenError::Unsupported(
333                    "Too many return values to fit in registers. \
334                    Use a StructReturn argument instead. (#9510)"
335                        .to_owned(),
336                ));
337            }
338
339            // Compute the stack slot's size.
340            let size = (ty_bits(param.value_type) / 8) as u32;
341
342            let size = if is_apple_cc || is_winch_return {
343                // MacOS and Winch aarch64 allows stack slots with
344                // sizes less than 8 bytes. They still need to be
345                // properly aligned on their natural data alignment,
346                // though.
347                size
348            } else {
349                // Every arg takes a minimum slot of 8 bytes. (16-byte stack
350                // alignment happens separately after all args.)
351                std::cmp::max(size, 8)
352            };
353
354            if !is_winch_return {
355                // Align the stack slot.
356                debug_assert!(size.is_power_of_two());
357                next_stack = align_to(next_stack, size);
358            }
359
360            let slots = reg_types
361                .iter()
362                .copied()
363                // Build the stack locations from each slot
364                .scan(next_stack, |next_stack, ty| {
365                    let slot_offset = *next_stack as i64;
366                    *next_stack += (ty_bits(ty) / 8) as u32;
367
368                    Some((ty, slot_offset))
369                })
370                .map(|(ty, offset)| ABIArgSlot::Stack {
371                    offset,
372                    ty,
373                    extension: param.extension,
374                })
375                .collect();
376
377            args.push(ABIArg::Slots {
378                slots,
379                purpose: param.purpose,
380            });
381
382            next_stack += size;
383        }
384
385        let extra_arg = if let Some(ret_area_ptr) = ret_area_ptr {
386            args.push_non_formal(ret_area_ptr);
387            Some(args.args().len() - 1)
388        } else {
389            None
390        };
391
392        if is_winch_return {
393            winch::reverse_stack(args, next_stack, false);
394        }
395
396        next_stack = align_to(next_stack, 16);
397
398        Ok((next_stack, extra_arg))
399    }
400
401    fn gen_load_stack(mem: StackAMode, into_reg: Writable<Reg>, ty: Type) -> Inst {
402        Inst::gen_load(into_reg, mem.into(), ty, MemFlags::trusted())
403    }
404
405    fn gen_store_stack(mem: StackAMode, from_reg: Reg, ty: Type) -> Inst {
406        Inst::gen_store(mem.into(), from_reg, ty, MemFlags::trusted())
407    }
408
409    fn gen_move(to_reg: Writable<Reg>, from_reg: Reg, ty: Type) -> Inst {
410        Inst::gen_move(to_reg, from_reg, ty)
411    }
412
413    fn gen_extend(
414        to_reg: Writable<Reg>,
415        from_reg: Reg,
416        signed: bool,
417        from_bits: u8,
418        to_bits: u8,
419    ) -> Inst {
420        assert!(from_bits < to_bits);
421        Inst::Extend {
422            rd: to_reg,
423            rn: from_reg,
424            signed,
425            from_bits,
426            to_bits,
427        }
428    }
429
430    fn gen_args(args: Vec<ArgPair>) -> Inst {
431        Inst::Args { args }
432    }
433
434    fn gen_rets(rets: Vec<RetPair>) -> Inst {
435        Inst::Rets { rets }
436    }
437
438    fn gen_add_imm(
439        _call_conv: isa::CallConv,
440        into_reg: Writable<Reg>,
441        from_reg: Reg,
442        imm: u32,
443    ) -> SmallInstVec<Inst> {
444        let imm = imm as u64;
445        let mut insts = SmallVec::new();
446        if let Some(imm12) = Imm12::maybe_from_u64(imm) {
447            insts.push(Inst::AluRRImm12 {
448                alu_op: ALUOp::Add,
449                size: OperandSize::Size64,
450                rd: into_reg,
451                rn: from_reg,
452                imm12,
453            });
454        } else {
455            let scratch2 = writable_tmp2_reg();
456            assert_ne!(scratch2.to_reg(), from_reg);
457            // `gen_add_imm` is only ever called after register allocation has taken place, and as a
458            // result it's ok to reuse the scratch2 register here. If that changes, we'll need to
459            // plumb through a way to allocate temporary virtual registers
460            insts.extend(Inst::load_constant(scratch2, imm));
461            insts.push(Inst::AluRRRExtend {
462                alu_op: ALUOp::Add,
463                size: OperandSize::Size64,
464                rd: into_reg,
465                rn: from_reg,
466                rm: scratch2.to_reg(),
467                extendop: ExtendOp::UXTX,
468            });
469        }
470        insts
471    }
472
473    fn gen_stack_lower_bound_trap(limit_reg: Reg) -> SmallInstVec<Inst> {
474        let mut insts = SmallVec::new();
475        insts.push(Inst::AluRRRExtend {
476            alu_op: ALUOp::SubS,
477            size: OperandSize::Size64,
478            rd: writable_zero_reg(),
479            rn: stack_reg(),
480            rm: limit_reg,
481            extendop: ExtendOp::UXTX,
482        });
483        insts.push(Inst::TrapIf {
484            trap_code: ir::TrapCode::STACK_OVERFLOW,
485            // Here `Lo` == "less than" when interpreting the two
486            // operands as unsigned integers.
487            kind: CondBrKind::Cond(Cond::Lo),
488        });
489        insts
490    }
491
492    fn gen_get_stack_addr(mem: StackAMode, into_reg: Writable<Reg>) -> Inst {
493        // FIXME: Do something different for dynamic types?
494        let mem = mem.into();
495        Inst::LoadAddr { rd: into_reg, mem }
496    }
497
498    fn get_stacklimit_reg(_call_conv: isa::CallConv) -> Reg {
499        spilltmp_reg()
500    }
501
502    fn gen_load_base_offset(into_reg: Writable<Reg>, base: Reg, offset: i32, ty: Type) -> Inst {
503        let mem = AMode::RegOffset {
504            rn: base,
505            off: offset as i64,
506        };
507        Inst::gen_load(into_reg, mem, ty, MemFlags::trusted())
508    }
509
510    fn gen_store_base_offset(base: Reg, offset: i32, from_reg: Reg, ty: Type) -> Inst {
511        let mem = AMode::RegOffset {
512            rn: base,
513            off: offset as i64,
514        };
515        Inst::gen_store(mem, from_reg, ty, MemFlags::trusted())
516    }
517
518    fn gen_sp_reg_adjust(amount: i32) -> SmallInstVec<Inst> {
519        if amount == 0 {
520            return SmallVec::new();
521        }
522
523        let (amount, is_sub) = if amount > 0 {
524            (amount as u64, false)
525        } else {
526            (-amount as u64, true)
527        };
528
529        let alu_op = if is_sub { ALUOp::Sub } else { ALUOp::Add };
530
531        let mut ret = SmallVec::new();
532        if let Some(imm12) = Imm12::maybe_from_u64(amount) {
533            let adj_inst = Inst::AluRRImm12 {
534                alu_op,
535                size: OperandSize::Size64,
536                rd: writable_stack_reg(),
537                rn: stack_reg(),
538                imm12,
539            };
540            ret.push(adj_inst);
541        } else {
542            let tmp = writable_spilltmp_reg();
543            // `gen_sp_reg_adjust` is called after regalloc2, so it's acceptable to reuse `tmp` for
544            // intermediates in `load_constant`.
545            let const_inst = Inst::load_constant(tmp, amount);
546            let adj_inst = Inst::AluRRRExtend {
547                alu_op,
548                size: OperandSize::Size64,
549                rd: writable_stack_reg(),
550                rn: stack_reg(),
551                rm: tmp.to_reg(),
552                extendop: ExtendOp::UXTX,
553            };
554            ret.extend(const_inst);
555            ret.push(adj_inst);
556        }
557        ret
558    }
559
560    fn gen_prologue_frame_setup(
561        call_conv: isa::CallConv,
562        flags: &settings::Flags,
563        isa_flags: &aarch64_settings::Flags,
564        frame_layout: &FrameLayout,
565    ) -> SmallInstVec<Inst> {
566        let setup_frame = frame_layout.setup_area_size > 0;
567        let mut insts = SmallVec::new();
568
569        match Self::select_api_key(isa_flags, call_conv, setup_frame) {
570            Some(key) => {
571                insts.push(Inst::Paci { key });
572                if flags.unwind_info() {
573                    insts.push(Inst::Unwind {
574                        inst: UnwindInst::Aarch64SetPointerAuth {
575                            return_addresses: true,
576                        },
577                    });
578                }
579            }
580            None => {
581                if isa_flags.use_bti() {
582                    insts.push(Inst::Bti {
583                        targets: BranchTargetType::C,
584                    });
585                }
586
587                if flags.unwind_info() && call_conv == isa::CallConv::AppleAarch64 {
588                    // The macOS unwinder seems to require this.
589                    insts.push(Inst::Unwind {
590                        inst: UnwindInst::Aarch64SetPointerAuth {
591                            return_addresses: false,
592                        },
593                    });
594                }
595            }
596        }
597
598        if setup_frame {
599            // stp fp (x29), lr (x30), [sp, #-16]!
600            insts.push(Inst::StoreP64 {
601                rt: fp_reg(),
602                rt2: link_reg(),
603                mem: PairAMode::SPPreIndexed {
604                    simm7: SImm7Scaled::maybe_from_i64(-16, types::I64).unwrap(),
605                },
606                flags: MemFlags::trusted(),
607            });
608
609            if flags.unwind_info() {
610                insts.push(Inst::Unwind {
611                    inst: UnwindInst::PushFrameRegs {
612                        offset_upward_to_caller_sp: frame_layout.setup_area_size,
613                    },
614                });
615            }
616
617            // mov fp (x29), sp. This uses the ADDI rd, rs, 0 form of `MOV` because
618            // the usual encoding (`ORR`) does not work with SP.
619            insts.push(Inst::AluRRImm12 {
620                alu_op: ALUOp::Add,
621                size: OperandSize::Size64,
622                rd: writable_fp_reg(),
623                rn: stack_reg(),
624                imm12: Imm12 {
625                    bits: 0,
626                    shift12: false,
627                },
628            });
629        }
630
631        insts
632    }
633
634    fn gen_epilogue_frame_restore(
635        call_conv: isa::CallConv,
636        _flags: &settings::Flags,
637        _isa_flags: &aarch64_settings::Flags,
638        frame_layout: &FrameLayout,
639    ) -> SmallInstVec<Inst> {
640        let setup_frame = frame_layout.setup_area_size > 0;
641        let mut insts = SmallVec::new();
642
643        if setup_frame {
644            // N.B.: sp is already adjusted to the appropriate place by the
645            // clobber-restore code (which also frees the fixed frame). Hence, there
646            // is no need for the usual `mov sp, fp` here.
647
648            // `ldp fp, lr, [sp], #16`
649            insts.push(Inst::LoadP64 {
650                rt: writable_fp_reg(),
651                rt2: writable_link_reg(),
652                mem: PairAMode::SPPostIndexed {
653                    simm7: SImm7Scaled::maybe_from_i64(16, types::I64).unwrap(),
654                },
655                flags: MemFlags::trusted(),
656            });
657        }
658
659        if call_conv == isa::CallConv::Tail && frame_layout.tail_args_size > 0 {
660            insts.extend(Self::gen_sp_reg_adjust(
661                frame_layout.tail_args_size.try_into().unwrap(),
662            ));
663        }
664
665        insts
666    }
667
668    fn gen_return(
669        call_conv: isa::CallConv,
670        isa_flags: &aarch64_settings::Flags,
671        frame_layout: &FrameLayout,
672    ) -> SmallInstVec<Inst> {
673        let setup_frame = frame_layout.setup_area_size > 0;
674
675        match Self::select_api_key(isa_flags, call_conv, setup_frame) {
676            Some(key) => {
677                smallvec![Inst::AuthenticatedRet {
678                    key,
679                    is_hint: !isa_flags.has_pauth(),
680                }]
681            }
682            None => {
683                smallvec![Inst::Ret {}]
684            }
685        }
686    }
687
688    fn gen_probestack(_insts: &mut SmallInstVec<Self::I>, _: u32) {
689        // TODO: implement if we ever require stack probes on an AArch64 host
690        // (unlikely unless Lucet is ported)
691        unimplemented!("Stack probing is unimplemented on AArch64");
692    }
693
694    fn gen_inline_probestack(
695        insts: &mut SmallInstVec<Self::I>,
696        _call_conv: isa::CallConv,
697        frame_size: u32,
698        guard_size: u32,
699    ) {
700        // The stack probe loop currently takes 6 instructions and each inline
701        // probe takes 2 (ish, these numbers sort of depend on the constants).
702        // Set this to 3 to keep the max size of the probe to 6 instructions.
703        const PROBE_MAX_UNROLL: u32 = 3;
704
705        // Calculate how many probes we need to perform. Round down, as we only
706        // need to probe whole guard_size regions we'd otherwise skip over.
707        let probe_count = frame_size / guard_size;
708        if probe_count == 0 {
709            // No probe necessary
710        } else if probe_count <= PROBE_MAX_UNROLL {
711            Self::gen_probestack_unroll(insts, guard_size, probe_count)
712        } else {
713            Self::gen_probestack_loop(insts, frame_size, guard_size)
714        }
715    }
716
717    fn gen_clobber_save(
718        _call_conv: isa::CallConv,
719        flags: &settings::Flags,
720        frame_layout: &FrameLayout,
721    ) -> SmallVec<[Inst; 16]> {
722        let (clobbered_int, clobbered_vec) = frame_layout.clobbered_callee_saves_by_class();
723
724        let mut insts = SmallVec::new();
725        let setup_frame = frame_layout.setup_area_size > 0;
726
727        // When a return_call within this function required more stack arguments than we have
728        // present, resize the incoming argument area of the frame to accommodate those arguments.
729        let incoming_args_diff = frame_layout.tail_args_size - frame_layout.incoming_args_size;
730        if incoming_args_diff > 0 {
731            // Decrement SP to account for the additional space required by a tail call.
732            insts.extend(Self::gen_sp_reg_adjust(-(incoming_args_diff as i32)));
733            if flags.unwind_info() {
734                insts.push(Inst::Unwind {
735                    inst: UnwindInst::StackAlloc {
736                        size: incoming_args_diff,
737                    },
738                });
739            }
740
741            // Move fp and lr down.
742            if setup_frame {
743                // Reload the frame pointer from the stack.
744                insts.push(Inst::ULoad64 {
745                    rd: regs::writable_fp_reg(),
746                    mem: AMode::SPOffset {
747                        off: i64::from(incoming_args_diff),
748                    },
749                    flags: MemFlags::trusted(),
750                });
751
752                // Store the frame pointer and link register again at the new SP
753                insts.push(Inst::StoreP64 {
754                    rt: fp_reg(),
755                    rt2: link_reg(),
756                    mem: PairAMode::SignedOffset {
757                        reg: regs::stack_reg(),
758                        simm7: SImm7Scaled::maybe_from_i64(0, types::I64).unwrap(),
759                    },
760                    flags: MemFlags::trusted(),
761                });
762
763                // Keep the frame pointer in sync
764                insts.push(Self::gen_move(
765                    regs::writable_fp_reg(),
766                    regs::stack_reg(),
767                    types::I64,
768                ));
769            }
770        }
771
772        if flags.unwind_info() && setup_frame {
773            // The *unwind* frame (but not the actual frame) starts at the
774            // clobbers, just below the saved FP/LR pair.
775            insts.push(Inst::Unwind {
776                inst: UnwindInst::DefineNewFrame {
777                    offset_downward_to_clobbers: frame_layout.clobber_size,
778                    offset_upward_to_caller_sp: frame_layout.setup_area_size,
779                },
780            });
781        }
782
783        // We use pre-indexed addressing modes here, rather than the possibly
784        // more efficient "subtract sp once then used fixed offsets" scheme,
785        // because (i) we cannot necessarily guarantee that the offset of a
786        // clobber-save slot will be within a SImm7Scaled (+504-byte) offset
787        // range of the whole frame including other slots, it is more complex to
788        // conditionally generate a two-stage SP adjustment (clobbers then fixed
789        // frame) otherwise, and generally we just want to maintain simplicity
790        // here for maintainability.  Because clobbers are at the top of the
791        // frame, just below FP, all that is necessary is to use the pre-indexed
792        // "push" `[sp, #-16]!` addressing mode.
793        //
794        // `frame_offset` tracks offset above start-of-clobbers for unwind-info
795        // purposes.
796        let mut clobber_offset = frame_layout.clobber_size;
797        let clobber_offset_change = 16;
798        let iter = clobbered_int.chunks_exact(2);
799
800        if let [rd] = iter.remainder() {
801            let rd: Reg = rd.to_reg().into();
802
803            debug_assert_eq!(rd.class(), RegClass::Int);
804            // str rd, [sp, #-16]!
805            insts.push(Inst::Store64 {
806                rd,
807                mem: AMode::SPPreIndexed {
808                    simm9: SImm9::maybe_from_i64(-clobber_offset_change).unwrap(),
809                },
810                flags: MemFlags::trusted(),
811            });
812
813            if flags.unwind_info() {
814                clobber_offset -= clobber_offset_change as u32;
815                insts.push(Inst::Unwind {
816                    inst: UnwindInst::SaveReg {
817                        clobber_offset,
818                        reg: rd.to_real_reg().unwrap(),
819                    },
820                });
821            }
822        }
823
824        let mut iter = iter.rev();
825
826        while let Some([rt, rt2]) = iter.next() {
827            // .to_reg().into(): Writable<RealReg> --> RealReg --> Reg
828            let rt: Reg = rt.to_reg().into();
829            let rt2: Reg = rt2.to_reg().into();
830
831            debug_assert!(rt.class() == RegClass::Int);
832            debug_assert!(rt2.class() == RegClass::Int);
833
834            // stp rt, rt2, [sp, #-16]!
835            insts.push(Inst::StoreP64 {
836                rt,
837                rt2,
838                mem: PairAMode::SPPreIndexed {
839                    simm7: SImm7Scaled::maybe_from_i64(-clobber_offset_change, types::I64).unwrap(),
840                },
841                flags: MemFlags::trusted(),
842            });
843
844            if flags.unwind_info() {
845                clobber_offset -= clobber_offset_change as u32;
846                insts.push(Inst::Unwind {
847                    inst: UnwindInst::SaveReg {
848                        clobber_offset,
849                        reg: rt.to_real_reg().unwrap(),
850                    },
851                });
852                insts.push(Inst::Unwind {
853                    inst: UnwindInst::SaveReg {
854                        clobber_offset: clobber_offset + (clobber_offset_change / 2) as u32,
855                        reg: rt2.to_real_reg().unwrap(),
856                    },
857                });
858            }
859        }
860
861        let store_vec_reg = |rd| Inst::FpuStore64 {
862            rd,
863            mem: AMode::SPPreIndexed {
864                simm9: SImm9::maybe_from_i64(-clobber_offset_change).unwrap(),
865            },
866            flags: MemFlags::trusted(),
867        };
868        let iter = clobbered_vec.chunks_exact(2);
869
870        if let [rd] = iter.remainder() {
871            let rd: Reg = rd.to_reg().into();
872
873            debug_assert_eq!(rd.class(), RegClass::Float);
874            insts.push(store_vec_reg(rd));
875
876            if flags.unwind_info() {
877                clobber_offset -= clobber_offset_change as u32;
878                insts.push(Inst::Unwind {
879                    inst: UnwindInst::SaveReg {
880                        clobber_offset,
881                        reg: rd.to_real_reg().unwrap(),
882                    },
883                });
884            }
885        }
886
887        let store_vec_reg_pair = |rt, rt2| {
888            let clobber_offset_change = 16;
889
890            (
891                Inst::FpuStoreP64 {
892                    rt,
893                    rt2,
894                    mem: PairAMode::SPPreIndexed {
895                        simm7: SImm7Scaled::maybe_from_i64(-clobber_offset_change, F64).unwrap(),
896                    },
897                    flags: MemFlags::trusted(),
898                },
899                clobber_offset_change as u32,
900            )
901        };
902        let mut iter = iter.rev();
903
904        while let Some([rt, rt2]) = iter.next() {
905            let rt: Reg = rt.to_reg().into();
906            let rt2: Reg = rt2.to_reg().into();
907
908            debug_assert_eq!(rt.class(), RegClass::Float);
909            debug_assert_eq!(rt2.class(), RegClass::Float);
910
911            let (inst, clobber_offset_change) = store_vec_reg_pair(rt, rt2);
912
913            insts.push(inst);
914
915            if flags.unwind_info() {
916                clobber_offset -= clobber_offset_change;
917                insts.push(Inst::Unwind {
918                    inst: UnwindInst::SaveReg {
919                        clobber_offset,
920                        reg: rt.to_real_reg().unwrap(),
921                    },
922                });
923                insts.push(Inst::Unwind {
924                    inst: UnwindInst::SaveReg {
925                        clobber_offset: clobber_offset + clobber_offset_change / 2,
926                        reg: rt2.to_real_reg().unwrap(),
927                    },
928                });
929            }
930        }
931
932        // Allocate the fixed frame below the clobbers if necessary.
933        let stack_size = frame_layout.fixed_frame_storage_size + frame_layout.outgoing_args_size;
934        if stack_size > 0 {
935            insts.extend(Self::gen_sp_reg_adjust(-(stack_size as i32)));
936            if flags.unwind_info() {
937                insts.push(Inst::Unwind {
938                    inst: UnwindInst::StackAlloc { size: stack_size },
939                });
940            }
941        }
942
943        insts
944    }
945
946    fn gen_clobber_restore(
947        _call_conv: isa::CallConv,
948        _flags: &settings::Flags,
949        frame_layout: &FrameLayout,
950    ) -> SmallVec<[Inst; 16]> {
951        let mut insts = SmallVec::new();
952        let (clobbered_int, clobbered_vec) = frame_layout.clobbered_callee_saves_by_class();
953
954        // Free the fixed frame if necessary.
955        let stack_size = frame_layout.fixed_frame_storage_size + frame_layout.outgoing_args_size;
956        if stack_size > 0 {
957            insts.extend(Self::gen_sp_reg_adjust(stack_size as i32));
958        }
959
960        let load_vec_reg = |rd| Inst::FpuLoad64 {
961            rd,
962            mem: AMode::SPPostIndexed {
963                simm9: SImm9::maybe_from_i64(16).unwrap(),
964            },
965            flags: MemFlags::trusted(),
966        };
967        let load_vec_reg_pair = |rt, rt2| Inst::FpuLoadP64 {
968            rt,
969            rt2,
970            mem: PairAMode::SPPostIndexed {
971                simm7: SImm7Scaled::maybe_from_i64(16, F64).unwrap(),
972            },
973            flags: MemFlags::trusted(),
974        };
975
976        let mut iter = clobbered_vec.chunks_exact(2);
977
978        while let Some([rt, rt2]) = iter.next() {
979            let rt: Writable<Reg> = rt.map(|r| r.into());
980            let rt2: Writable<Reg> = rt2.map(|r| r.into());
981
982            debug_assert_eq!(rt.to_reg().class(), RegClass::Float);
983            debug_assert_eq!(rt2.to_reg().class(), RegClass::Float);
984            insts.push(load_vec_reg_pair(rt, rt2));
985        }
986
987        debug_assert!(iter.remainder().len() <= 1);
988
989        if let [rd] = iter.remainder() {
990            let rd: Writable<Reg> = rd.map(|r| r.into());
991
992            debug_assert_eq!(rd.to_reg().class(), RegClass::Float);
993            insts.push(load_vec_reg(rd));
994        }
995
996        let mut iter = clobbered_int.chunks_exact(2);
997
998        while let Some([rt, rt2]) = iter.next() {
999            let rt: Writable<Reg> = rt.map(|r| r.into());
1000            let rt2: Writable<Reg> = rt2.map(|r| r.into());
1001
1002            debug_assert_eq!(rt.to_reg().class(), RegClass::Int);
1003            debug_assert_eq!(rt2.to_reg().class(), RegClass::Int);
1004            // ldp rt, rt2, [sp], #16
1005            insts.push(Inst::LoadP64 {
1006                rt,
1007                rt2,
1008                mem: PairAMode::SPPostIndexed {
1009                    simm7: SImm7Scaled::maybe_from_i64(16, I64).unwrap(),
1010                },
1011                flags: MemFlags::trusted(),
1012            });
1013        }
1014
1015        debug_assert!(iter.remainder().len() <= 1);
1016
1017        if let [rd] = iter.remainder() {
1018            let rd: Writable<Reg> = rd.map(|r| r.into());
1019
1020            debug_assert_eq!(rd.to_reg().class(), RegClass::Int);
1021            // ldr rd, [sp], #16
1022            insts.push(Inst::ULoad64 {
1023                rd,
1024                mem: AMode::SPPostIndexed {
1025                    simm9: SImm9::maybe_from_i64(16).unwrap(),
1026                },
1027                flags: MemFlags::trusted(),
1028            });
1029        }
1030
1031        insts
1032    }
1033
1034    fn gen_memcpy<F: FnMut(Type) -> Writable<Reg>>(
1035        call_conv: isa::CallConv,
1036        dst: Reg,
1037        src: Reg,
1038        size: usize,
1039        mut alloc_tmp: F,
1040    ) -> SmallVec<[Self::I; 8]> {
1041        let mut insts = SmallVec::new();
1042        let arg0 = writable_xreg(0);
1043        let arg1 = writable_xreg(1);
1044        let arg2 = writable_xreg(2);
1045        let tmp = alloc_tmp(Self::word_type());
1046        insts.extend(Inst::load_constant(tmp, size as u64));
1047        insts.push(Inst::Call {
1048            info: Box::new(CallInfo {
1049                dest: ExternalName::LibCall(LibCall::Memcpy),
1050                uses: smallvec![
1051                    CallArgPair {
1052                        vreg: dst,
1053                        preg: arg0.to_reg()
1054                    },
1055                    CallArgPair {
1056                        vreg: src,
1057                        preg: arg1.to_reg()
1058                    },
1059                    CallArgPair {
1060                        vreg: tmp.to_reg(),
1061                        preg: arg2.to_reg()
1062                    }
1063                ],
1064                defs: smallvec![],
1065                clobbers: Self::get_regs_clobbered_by_call(call_conv, false),
1066                caller_conv: call_conv,
1067                callee_conv: call_conv,
1068                callee_pop_size: 0,
1069                try_call_info: None,
1070            }),
1071        });
1072        insts
1073    }
1074
1075    fn get_number_of_spillslots_for_value(
1076        rc: RegClass,
1077        vector_size: u32,
1078        _isa_flags: &Self::F,
1079    ) -> u32 {
1080        assert_eq!(vector_size % 8, 0);
1081        // We allocate in terms of 8-byte slots.
1082        match rc {
1083            RegClass::Int => 1,
1084            RegClass::Float => vector_size / 8,
1085            RegClass::Vector => unreachable!(),
1086        }
1087    }
1088
1089    fn get_machine_env(flags: &settings::Flags, _call_conv: isa::CallConv) -> &MachineEnv {
1090        if flags.enable_pinned_reg() {
1091            static MACHINE_ENV: OnceLock<MachineEnv> = OnceLock::new();
1092            MACHINE_ENV.get_or_init(|| create_reg_env(true))
1093        } else {
1094            static MACHINE_ENV: OnceLock<MachineEnv> = OnceLock::new();
1095            MACHINE_ENV.get_or_init(|| create_reg_env(false))
1096        }
1097    }
1098
1099    fn get_regs_clobbered_by_call(call_conv: isa::CallConv, is_exception: bool) -> PRegSet {
1100        match (call_conv, is_exception) {
1101            (isa::CallConv::Tail, true) => ALL_CLOBBERS,
1102            (isa::CallConv::Winch, true) => ALL_CLOBBERS,
1103            (isa::CallConv::Winch, false) => WINCH_CLOBBERS,
1104            (isa::CallConv::SystemV, _) => DEFAULT_AAPCS_CLOBBERS,
1105            (_, false) => DEFAULT_AAPCS_CLOBBERS,
1106            (_, true) => panic!("unimplemented clobbers for exn abi of {call_conv:?}"),
1107        }
1108    }
1109
1110    fn get_ext_mode(
1111        call_conv: isa::CallConv,
1112        specified: ir::ArgumentExtension,
1113    ) -> ir::ArgumentExtension {
1114        if call_conv == isa::CallConv::AppleAarch64 {
1115            specified
1116        } else {
1117            ir::ArgumentExtension::None
1118        }
1119    }
1120
1121    fn compute_frame_layout(
1122        call_conv: isa::CallConv,
1123        flags: &settings::Flags,
1124        sig: &Signature,
1125        regs: &[Writable<RealReg>],
1126        function_calls: FunctionCalls,
1127        incoming_args_size: u32,
1128        tail_args_size: u32,
1129        stackslots_size: u32,
1130        fixed_frame_storage_size: u32,
1131        outgoing_args_size: u32,
1132    ) -> FrameLayout {
1133        let mut regs: Vec<Writable<RealReg>> = regs
1134            .iter()
1135            .cloned()
1136            .filter(|r| {
1137                is_reg_saved_in_prologue(call_conv, flags.enable_pinned_reg(), sig, r.to_reg())
1138            })
1139            .collect();
1140
1141        // Sort registers for deterministic code output. We can do an unstable
1142        // sort because the registers will be unique (there are no dups).
1143        regs.sort_unstable();
1144
1145        // Compute clobber size.
1146        let clobber_size = compute_clobber_size(&regs);
1147
1148        // Compute linkage frame size.
1149        let setup_area_size = if flags.preserve_frame_pointers()
1150            || function_calls != FunctionCalls::None
1151            // The function arguments that are passed on the stack are addressed
1152            // relative to the Frame Pointer.
1153            || incoming_args_size > 0
1154            || clobber_size > 0
1155            || fixed_frame_storage_size > 0
1156        {
1157            16 // FP, LR
1158        } else {
1159            0
1160        };
1161
1162        // Return FrameLayout structure.
1163        FrameLayout {
1164            word_bytes: 8,
1165            incoming_args_size,
1166            tail_args_size,
1167            setup_area_size,
1168            clobber_size,
1169            fixed_frame_storage_size,
1170            stackslots_size,
1171            outgoing_args_size,
1172            clobbered_callee_saves: regs,
1173            function_calls,
1174        }
1175    }
1176
1177    fn retval_temp_reg(_call_conv_of_callee: isa::CallConv) -> Writable<Reg> {
1178        // Use x9 as a temp if needed: clobbered, not a
1179        // retval.
1180        regs::writable_xreg(9)
1181    }
1182
1183    fn exception_payload_regs(call_conv: isa::CallConv) -> &'static [Reg] {
1184        const PAYLOAD_REGS: &'static [Reg] = &[regs::xreg(0), regs::xreg(1)];
1185        match call_conv {
1186            isa::CallConv::SystemV | isa::CallConv::Tail => PAYLOAD_REGS,
1187            _ => &[],
1188        }
1189    }
1190}
1191
1192impl AArch64MachineDeps {
1193    fn gen_probestack_unroll(insts: &mut SmallInstVec<Inst>, guard_size: u32, probe_count: u32) {
1194        // When manually unrolling adjust the stack pointer and then write a zero
1195        // to the stack at that offset. This generates something like
1196        // `sub sp, sp, #1, lsl #12` followed by `stur wzr, [sp]`.
1197        //
1198        // We do this because valgrind expects us to never write beyond the stack
1199        // pointer and associated redzone.
1200        // See: https://github.com/bytecodealliance/wasmtime/issues/7454
1201        for _ in 0..probe_count {
1202            insts.extend(Self::gen_sp_reg_adjust(-(guard_size as i32)));
1203
1204            insts.push(Inst::gen_store(
1205                AMode::SPOffset { off: 0 },
1206                zero_reg(),
1207                I32,
1208                MemFlags::trusted(),
1209            ));
1210        }
1211
1212        // Restore the stack pointer to its original value
1213        insts.extend(Self::gen_sp_reg_adjust((guard_size * probe_count) as i32));
1214    }
1215
1216    fn gen_probestack_loop(insts: &mut SmallInstVec<Inst>, frame_size: u32, guard_size: u32) {
1217        // The non-unrolled version uses two temporary registers. The
1218        // `start` contains the current offset from sp and counts downwards
1219        // during the loop by increments of `guard_size`. The `end` is
1220        // the size of the frame and where we stop.
1221        //
1222        // Note that this emission is all post-regalloc so it should be ok
1223        // to use the temporary registers here as input/output as the loop
1224        // itself is not allowed to use the registers.
1225        let start = writable_spilltmp_reg();
1226        let end = writable_tmp2_reg();
1227        // `gen_inline_probestack` is called after regalloc2, so it's acceptable to reuse
1228        // `start` and `end` as temporaries in load_constant.
1229        insts.extend(Inst::load_constant(start, 0));
1230        insts.extend(Inst::load_constant(end, frame_size.into()));
1231        insts.push(Inst::StackProbeLoop {
1232            start,
1233            end: end.to_reg(),
1234            step: Imm12::maybe_from_u64(guard_size.into()).unwrap(),
1235        });
1236    }
1237
1238    pub fn select_api_key(
1239        isa_flags: &aarch64_settings::Flags,
1240        call_conv: isa::CallConv,
1241        setup_frame: bool,
1242    ) -> Option<APIKey> {
1243        if isa_flags.sign_return_address() && (setup_frame || isa_flags.sign_return_address_all()) {
1244            // The `tail` calling convention uses a zero modifier rather than SP
1245            // because tail calls may happen with a different stack pointer than
1246            // when the function was entered, meaning that it won't be the same when
1247            // the return address is decrypted.
1248            Some(if isa_flags.sign_return_address_with_bkey() {
1249                match call_conv {
1250                    isa::CallConv::Tail => APIKey::BZ,
1251                    _ => APIKey::BSP,
1252                }
1253            } else {
1254                match call_conv {
1255                    isa::CallConv::Tail => APIKey::AZ,
1256                    _ => APIKey::ASP,
1257                }
1258            })
1259        } else {
1260            None
1261        }
1262    }
1263}
1264
1265/// Is the given register saved in the prologue if clobbered, i.e., is it a
1266/// callee-save?
1267fn is_reg_saved_in_prologue(
1268    _call_conv: isa::CallConv,
1269    enable_pinned_reg: bool,
1270    sig: &Signature,
1271    r: RealReg,
1272) -> bool {
1273    // FIXME: We need to inspect whether a function is returning Z or P regs too.
1274    let save_z_regs = sig
1275        .params
1276        .iter()
1277        .filter(|p| p.value_type.is_dynamic_vector())
1278        .count()
1279        != 0;
1280
1281    match r.class() {
1282        RegClass::Int => {
1283            // x19 - x28 inclusive are callee-saves.
1284            // However, x21 is the pinned reg if `enable_pinned_reg`
1285            // is set, and is implicitly globally-allocated, hence not
1286            // callee-saved in prologues.
1287            if enable_pinned_reg && r.hw_enc() == PINNED_REG {
1288                false
1289            } else {
1290                r.hw_enc() >= 19 && r.hw_enc() <= 28
1291            }
1292        }
1293        RegClass::Float => {
1294            // If a subroutine takes at least one argument in scalable vector registers
1295            // or scalable predicate registers, or if it is a function that returns
1296            // results in such registers, it must ensure that the entire contents of
1297            // z8-z23 are preserved across the call. In other cases it need only
1298            // preserve the low 64 bits of z8-z15.
1299            if save_z_regs {
1300                r.hw_enc() >= 8 && r.hw_enc() <= 23
1301            } else {
1302                // v8 - v15 inclusive are callee-saves.
1303                r.hw_enc() >= 8 && r.hw_enc() <= 15
1304            }
1305        }
1306        RegClass::Vector => unreachable!(),
1307    }
1308}
1309
1310const fn default_aapcs_clobbers() -> PRegSet {
1311    PRegSet::empty()
1312        // x0 - x17 inclusive are caller-saves.
1313        .with(xreg_preg(0))
1314        .with(xreg_preg(1))
1315        .with(xreg_preg(2))
1316        .with(xreg_preg(3))
1317        .with(xreg_preg(4))
1318        .with(xreg_preg(5))
1319        .with(xreg_preg(6))
1320        .with(xreg_preg(7))
1321        .with(xreg_preg(8))
1322        .with(xreg_preg(9))
1323        .with(xreg_preg(10))
1324        .with(xreg_preg(11))
1325        .with(xreg_preg(12))
1326        .with(xreg_preg(13))
1327        .with(xreg_preg(14))
1328        .with(xreg_preg(15))
1329        .with(xreg_preg(16))
1330        .with(xreg_preg(17))
1331        // v0 - v7 inclusive and v16 - v31 inclusive are
1332        // caller-saves. The upper 64 bits of v8 - v15 inclusive are
1333        // also caller-saves.  However, because we cannot currently
1334        // represent partial registers to regalloc2, we indicate here
1335        // that every vector register is caller-save. Because this
1336        // function is used at *callsites*, approximating in this
1337        // direction (save more than necessary) is conservative and
1338        // thus safe.
1339        //
1340        // Note that we exclude clobbers from a call instruction when
1341        // a call instruction's callee has the same ABI as the caller
1342        // (the current function body); this is safe (anything
1343        // clobbered by callee can be clobbered by caller as well) and
1344        // avoids unnecessary saves of v8-v15 in the prologue even
1345        // though we include them as defs here.
1346        .with(vreg_preg(0))
1347        .with(vreg_preg(1))
1348        .with(vreg_preg(2))
1349        .with(vreg_preg(3))
1350        .with(vreg_preg(4))
1351        .with(vreg_preg(5))
1352        .with(vreg_preg(6))
1353        .with(vreg_preg(7))
1354        .with(vreg_preg(8))
1355        .with(vreg_preg(9))
1356        .with(vreg_preg(10))
1357        .with(vreg_preg(11))
1358        .with(vreg_preg(12))
1359        .with(vreg_preg(13))
1360        .with(vreg_preg(14))
1361        .with(vreg_preg(15))
1362        .with(vreg_preg(16))
1363        .with(vreg_preg(17))
1364        .with(vreg_preg(18))
1365        .with(vreg_preg(19))
1366        .with(vreg_preg(20))
1367        .with(vreg_preg(21))
1368        .with(vreg_preg(22))
1369        .with(vreg_preg(23))
1370        .with(vreg_preg(24))
1371        .with(vreg_preg(25))
1372        .with(vreg_preg(26))
1373        .with(vreg_preg(27))
1374        .with(vreg_preg(28))
1375        .with(vreg_preg(29))
1376        .with(vreg_preg(30))
1377        .with(vreg_preg(31))
1378}
1379
1380const fn winch_clobbers() -> PRegSet {
1381    PRegSet::empty()
1382        .with(xreg_preg(0))
1383        .with(xreg_preg(1))
1384        .with(xreg_preg(2))
1385        .with(xreg_preg(3))
1386        .with(xreg_preg(4))
1387        .with(xreg_preg(5))
1388        .with(xreg_preg(6))
1389        .with(xreg_preg(7))
1390        .with(xreg_preg(8))
1391        .with(xreg_preg(9))
1392        .with(xreg_preg(10))
1393        .with(xreg_preg(11))
1394        .with(xreg_preg(12))
1395        .with(xreg_preg(13))
1396        .with(xreg_preg(14))
1397        .with(xreg_preg(15))
1398        .with(xreg_preg(16))
1399        .with(xreg_preg(17))
1400        // x18 is used to carry platform state and is not allocatable by Winch.
1401        //
1402        // x19 - x27 are considered caller-saved in Winch's calling convention.
1403        .with(xreg_preg(19))
1404        .with(xreg_preg(20))
1405        .with(xreg_preg(21))
1406        .with(xreg_preg(22))
1407        .with(xreg_preg(23))
1408        .with(xreg_preg(24))
1409        .with(xreg_preg(25))
1410        .with(xreg_preg(26))
1411        .with(xreg_preg(27))
1412        // x28 is used as the shadow stack pointer and is considered
1413        // callee-saved.
1414        //
1415        // All vregs are considered caller-saved.
1416        .with(vreg_preg(0))
1417        .with(vreg_preg(1))
1418        .with(vreg_preg(2))
1419        .with(vreg_preg(3))
1420        .with(vreg_preg(4))
1421        .with(vreg_preg(5))
1422        .with(vreg_preg(6))
1423        .with(vreg_preg(7))
1424        .with(vreg_preg(8))
1425        .with(vreg_preg(9))
1426        .with(vreg_preg(10))
1427        .with(vreg_preg(11))
1428        .with(vreg_preg(12))
1429        .with(vreg_preg(13))
1430        .with(vreg_preg(14))
1431        .with(vreg_preg(15))
1432        .with(vreg_preg(16))
1433        .with(vreg_preg(17))
1434        .with(vreg_preg(18))
1435        .with(vreg_preg(19))
1436        .with(vreg_preg(20))
1437        .with(vreg_preg(21))
1438        .with(vreg_preg(22))
1439        .with(vreg_preg(23))
1440        .with(vreg_preg(24))
1441        .with(vreg_preg(25))
1442        .with(vreg_preg(26))
1443        .with(vreg_preg(27))
1444        .with(vreg_preg(28))
1445        .with(vreg_preg(29))
1446        .with(vreg_preg(30))
1447        .with(vreg_preg(31))
1448}
1449
1450const fn all_clobbers() -> PRegSet {
1451    PRegSet::empty()
1452        // integer registers: x0 to x28 inclusive. (x29 is FP, x30 is
1453        // LR, x31 is SP/ZR.)
1454        .with(xreg_preg(0))
1455        .with(xreg_preg(1))
1456        .with(xreg_preg(2))
1457        .with(xreg_preg(3))
1458        .with(xreg_preg(4))
1459        .with(xreg_preg(5))
1460        .with(xreg_preg(6))
1461        .with(xreg_preg(7))
1462        .with(xreg_preg(8))
1463        .with(xreg_preg(9))
1464        .with(xreg_preg(10))
1465        .with(xreg_preg(11))
1466        .with(xreg_preg(12))
1467        .with(xreg_preg(13))
1468        .with(xreg_preg(14))
1469        .with(xreg_preg(15))
1470        .with(xreg_preg(16))
1471        .with(xreg_preg(17))
1472        .with(xreg_preg(18))
1473        .with(xreg_preg(19))
1474        .with(xreg_preg(20))
1475        .with(xreg_preg(21))
1476        .with(xreg_preg(22))
1477        .with(xreg_preg(23))
1478        .with(xreg_preg(24))
1479        .with(xreg_preg(25))
1480        .with(xreg_preg(26))
1481        .with(xreg_preg(27))
1482        .with(xreg_preg(28))
1483        // vector registers: v0 to v31 inclusive.
1484        .with(vreg_preg(0))
1485        .with(vreg_preg(1))
1486        .with(vreg_preg(2))
1487        .with(vreg_preg(3))
1488        .with(vreg_preg(4))
1489        .with(vreg_preg(5))
1490        .with(vreg_preg(6))
1491        .with(vreg_preg(7))
1492        .with(vreg_preg(8))
1493        .with(vreg_preg(9))
1494        .with(vreg_preg(10))
1495        .with(vreg_preg(11))
1496        .with(vreg_preg(12))
1497        .with(vreg_preg(13))
1498        .with(vreg_preg(14))
1499        .with(vreg_preg(15))
1500        .with(vreg_preg(16))
1501        .with(vreg_preg(17))
1502        .with(vreg_preg(18))
1503        .with(vreg_preg(19))
1504        .with(vreg_preg(20))
1505        .with(vreg_preg(21))
1506        .with(vreg_preg(22))
1507        .with(vreg_preg(23))
1508        .with(vreg_preg(24))
1509        .with(vreg_preg(25))
1510        .with(vreg_preg(26))
1511        .with(vreg_preg(27))
1512        .with(vreg_preg(28))
1513        .with(vreg_preg(29))
1514        .with(vreg_preg(30))
1515        .with(vreg_preg(31))
1516}
1517
1518const DEFAULT_AAPCS_CLOBBERS: PRegSet = default_aapcs_clobbers();
1519const WINCH_CLOBBERS: PRegSet = winch_clobbers();
1520const ALL_CLOBBERS: PRegSet = all_clobbers();
1521
1522fn create_reg_env(enable_pinned_reg: bool) -> MachineEnv {
1523    fn preg(r: Reg) -> PReg {
1524        r.to_real_reg().unwrap().into()
1525    }
1526
1527    let mut env = MachineEnv {
1528        preferred_regs_by_class: [
1529            vec![
1530                preg(xreg(0)),
1531                preg(xreg(1)),
1532                preg(xreg(2)),
1533                preg(xreg(3)),
1534                preg(xreg(4)),
1535                preg(xreg(5)),
1536                preg(xreg(6)),
1537                preg(xreg(7)),
1538                preg(xreg(8)),
1539                preg(xreg(9)),
1540                preg(xreg(10)),
1541                preg(xreg(11)),
1542                preg(xreg(12)),
1543                preg(xreg(13)),
1544                preg(xreg(14)),
1545                preg(xreg(15)),
1546                // x16 and x17 are spilltmp and tmp2 (see above).
1547                // x18 could be used by the platform to carry inter-procedural state;
1548                // conservatively assume so and make it not allocatable.
1549                // x19-28 are callee-saved and so not preferred.
1550                // x21 is the pinned register (if enabled) and not allocatable if so.
1551                // x29 is FP, x30 is LR, x31 is SP/ZR.
1552            ],
1553            vec![
1554                preg(vreg(0)),
1555                preg(vreg(1)),
1556                preg(vreg(2)),
1557                preg(vreg(3)),
1558                preg(vreg(4)),
1559                preg(vreg(5)),
1560                preg(vreg(6)),
1561                preg(vreg(7)),
1562                // v8-15 are callee-saved and so not preferred.
1563                preg(vreg(16)),
1564                preg(vreg(17)),
1565                preg(vreg(18)),
1566                preg(vreg(19)),
1567                preg(vreg(20)),
1568                preg(vreg(21)),
1569                preg(vreg(22)),
1570                preg(vreg(23)),
1571                preg(vreg(24)),
1572                preg(vreg(25)),
1573                preg(vreg(26)),
1574                preg(vreg(27)),
1575                preg(vreg(28)),
1576                preg(vreg(29)),
1577                preg(vreg(30)),
1578                preg(vreg(31)),
1579            ],
1580            // Vector Regclass is unused
1581            vec![],
1582        ],
1583        non_preferred_regs_by_class: [
1584            vec![
1585                preg(xreg(19)),
1586                preg(xreg(20)),
1587                // x21 is pinned reg if enabled; we add to this list below if not.
1588                preg(xreg(22)),
1589                preg(xreg(23)),
1590                preg(xreg(24)),
1591                preg(xreg(25)),
1592                preg(xreg(26)),
1593                preg(xreg(27)),
1594                preg(xreg(28)),
1595            ],
1596            vec![
1597                preg(vreg(8)),
1598                preg(vreg(9)),
1599                preg(vreg(10)),
1600                preg(vreg(11)),
1601                preg(vreg(12)),
1602                preg(vreg(13)),
1603                preg(vreg(14)),
1604                preg(vreg(15)),
1605            ],
1606            // Vector Regclass is unused
1607            vec![],
1608        ],
1609        fixed_stack_slots: vec![],
1610        scratch_by_class: [None, None, None],
1611    };
1612
1613    if !enable_pinned_reg {
1614        debug_assert_eq!(PINNED_REG, 21); // We assumed this above in hardcoded reg list.
1615        env.non_preferred_regs_by_class[0].push(preg(xreg(PINNED_REG)));
1616    }
1617
1618    env
1619}