Skip to main content

cranelift_codegen/isa/x64/
abi.rs

1//! Implementation of the standard x64 ABI.
2
3use crate::CodegenResult;
4use crate::ir::{self, LibCall, MemFlags, Signature, TrapCode, types};
5use crate::ir::{ExternalName, types::*};
6use crate::isa;
7use crate::isa::winch;
8use crate::isa::{CallConv, unwind::UnwindInst, x64::inst::*, x64::settings as x64_settings};
9use crate::machinst::abi::*;
10use crate::machinst::*;
11use crate::settings;
12use alloc::borrow::ToOwned;
13use alloc::boxed::Box;
14use alloc::vec::Vec;
15use args::*;
16use cranelift_assembler_x64 as asm;
17use regalloc2::{MachineEnv, PReg, PRegSet};
18use smallvec::{SmallVec, smallvec};
19
20/// Support for the x64 ABI from the callee side (within a function body).
21pub(crate) type X64Callee = Callee<X64ABIMachineSpec>;
22
23/// Implementation of ABI primitives for x64.
24pub struct X64ABIMachineSpec;
25
26impl X64ABIMachineSpec {
27    fn gen_probestack_unroll(insts: &mut SmallInstVec<Inst>, guard_size: u32, probe_count: u32) {
28        insts.reserve(probe_count as usize);
29        for _ in 0..probe_count {
30            // "Allocate" stack space for the probe by decrementing the stack pointer before
31            // the write. This is required to make valgrind happy.
32            // See: https://github.com/bytecodealliance/wasmtime/issues/7454
33            insts.extend(Self::gen_sp_reg_adjust(-(guard_size as i32)));
34
35            // Touch the current page by storing an immediate zero.
36            // mov  [rsp], 0
37            insts.push(Inst::External {
38                inst: asm::inst::movl_mi::new(Amode::imm_reg(0, regs::rsp()), 0i32.cast_unsigned())
39                    .into(),
40            });
41        }
42
43        // Restore the stack pointer to its original value
44        insts.extend(Self::gen_sp_reg_adjust((guard_size * probe_count) as i32));
45    }
46
47    fn gen_probestack_loop(
48        insts: &mut SmallInstVec<Inst>,
49        _call_conv: isa::CallConv,
50        frame_size: u32,
51        guard_size: u32,
52    ) {
53        // We have to use a caller-saved register since clobbering only
54        // happens after stack probing.
55        // `r11` is caller saved on both Fastcall and SystemV, and not used
56        // for argument passing, so it's pretty much free. It is also not
57        // used by the stacklimit mechanism.
58        let tmp = regs::r11();
59        debug_assert!({
60            let real_reg = tmp.to_real_reg().unwrap();
61            !is_callee_save_systemv(real_reg, false) && !is_callee_save_fastcall(real_reg, false)
62        });
63
64        insts.push(Inst::StackProbeLoop {
65            tmp: Writable::from_reg(tmp),
66            frame_size,
67            guard_size,
68        });
69    }
70}
71
72impl IsaFlags for x64_settings::Flags {}
73
74impl ABIMachineSpec for X64ABIMachineSpec {
75    type I = Inst;
76
77    type F = x64_settings::Flags;
78
79    /// This is the limit for the size of argument and return-value areas on the
80    /// stack. We place a reasonable limit here to avoid integer overflow issues
81    /// with 32-bit arithmetic: for now, 128 MB.
82    const STACK_ARG_RET_SIZE_LIMIT: u32 = 128 * 1024 * 1024;
83
84    fn word_bits() -> u32 {
85        64
86    }
87
88    /// Return required stack alignment in bytes.
89    fn stack_align(_call_conv: isa::CallConv) -> u32 {
90        16
91    }
92
93    fn compute_arg_locs(
94        call_conv: isa::CallConv,
95        flags: &settings::Flags,
96        params: &[ir::AbiParam],
97        args_or_rets: ArgsOrRets,
98        add_ret_area_ptr: bool,
99        mut args: ArgsAccumulator,
100    ) -> CodegenResult<(u32, Option<usize>)> {
101        let is_fastcall = call_conv == CallConv::WindowsFastcall;
102        let is_tail = call_conv == CallConv::Tail;
103
104        let mut next_gpr = 0;
105        let mut next_vreg = 0;
106        let mut next_stack: u32 = 0;
107        let mut next_param_idx = 0; // Fastcall cares about overall param index
108
109        if args_or_rets == ArgsOrRets::Args && is_fastcall {
110            // Fastcall always reserves 32 bytes of shadow space corresponding to
111            // the four initial in-arg parameters.
112            //
113            // (See:
114            // https://learn.microsoft.com/en-us/cpp/build/x64-calling-convention?view=msvc-170)
115            next_stack = 32;
116        }
117
118        let ret_area_ptr = if add_ret_area_ptr {
119            debug_assert_eq!(args_or_rets, ArgsOrRets::Args);
120            next_gpr += 1;
121            next_param_idx += 1;
122            // In the SystemV and WindowsFastcall ABIs, the return area pointer is the first
123            // argument. For the Tail and Winch ABIs we do the same for simplicity sake.
124            Some(ABIArg::reg(
125                get_intreg_for_arg(call_conv, 0, 0)
126                    .unwrap()
127                    .to_real_reg()
128                    .unwrap(),
129                types::I64,
130                ir::ArgumentExtension::None,
131                ir::ArgumentPurpose::Normal,
132            ))
133        } else {
134            None
135        };
136
137        // If any param uses extension, the winch calling convention will not pack its results
138        // on the stack and will instead align them to 8-byte boundaries the same way that all the
139        // other calling conventions do. This isn't consistent with Winch itself, but is fine as
140        // Winch only uses this calling convention via trampolines, and those trampolines don't add
141        // extension annotations. Additionally, handling extension attributes this way allows clif
142        // functions that use them with the Winch calling convention to interact successfully with
143        // testing infrastructure.
144        // The results are also not packed if any of the types are `f16`. This is to simplify the
145        // implementation of `Inst::load`/`Inst::store` (which would otherwise require multiple
146        // instructions), and doesn't affect Winch itself as Winch doesn't support `f16` at all.
147        let uses_extension = params.iter().any(|p| {
148            p.extension != ir::ArgumentExtension::None
149                || p.value_type == types::F16
150                || p.value_type == types::I8X2
151        });
152
153        for (ix, param) in params.iter().enumerate() {
154            let last_param = ix == params.len() - 1;
155
156            if let ir::ArgumentPurpose::StructArgument(size) = param.purpose {
157                let offset = next_stack as i64;
158                let size = size;
159                assert!(size % 8 == 0, "StructArgument size is not properly aligned");
160                next_stack += size;
161                args.push(ABIArg::StructArg {
162                    offset,
163                    size: size as u64,
164                    purpose: param.purpose,
165                });
166                continue;
167            }
168
169            // Find regclass(es) of the register(s) used to store a value of this type.
170            let (rcs, reg_tys) = Inst::rc_for_type(param.value_type)?;
171
172            // Now assign ABIArgSlots for each register-sized part.
173            //
174            // Note that the handling of `i128` values is unique here:
175            //
176            // - If `enable_llvm_abi_extensions` is set in the flags, each
177            //   `i128` is split into two `i64`s and assigned exactly as if it
178            //   were two consecutive 64-bit args, except that if one of the
179            //   two halves is forced onto the stack, the other half is too.
180            //   This is consistent with LLVM's behavior, and is needed for
181            //   some uses of Cranelift (e.g., the rustc backend).
182            //
183            // - Otherwise, if the calling convention is Tail, we behave as in
184            //   the previous case, even if `enable_llvm_abi_extensions` is not
185            //   set in the flags: This is a custom calling convention defined
186            //   by Cranelift, LLVM doesn't know about it.
187            //
188            // - Otherwise, both SysV and Fastcall specify behavior (use of
189            //   vector register, a register pair, or passing by reference
190            //   depending on the case), but for simplicity, we will just panic if
191            //   an i128 type appears in a signature and the LLVM extensions flag
192            //   is not set.
193            //
194            // For examples of how rustc compiles i128 args and return values on
195            // both SysV and Fastcall platforms, see:
196            // https://godbolt.org/z/PhG3ob
197
198            if param.value_type.bits() > 64
199                && !(param.value_type.is_vector() || param.value_type.is_float())
200                && !flags.enable_llvm_abi_extensions()
201                && !is_tail
202            {
203                panic!(
204                    "i128 args/return values not supported unless LLVM ABI extensions are enabled"
205                );
206            }
207            // As MSVC doesn't support f16/f128 there is no standard way to pass/return them with
208            // the Windows ABI. LLVM passes/returns them in XMM registers.
209            if matches!(param.value_type, types::F16 | types::F128)
210                && is_fastcall
211                && !flags.enable_llvm_abi_extensions()
212            {
213                panic!(
214                    "f16/f128 args/return values not supported for windows_fastcall unless LLVM ABI extensions are enabled"
215                );
216            }
217
218            // Windows fastcall dictates that `__m128i` and `f128` parameters to
219            // a function are passed indirectly as pointers, so handle that as a
220            // special case before the loop below.
221            if (param.value_type.is_vector() || param.value_type.is_float())
222                && param.value_type.bits() >= 128
223                && args_or_rets == ArgsOrRets::Args
224                && is_fastcall
225            {
226                let pointer = match get_intreg_for_arg(call_conv, next_gpr, next_param_idx) {
227                    Some(reg) => {
228                        next_gpr += 1;
229                        ABIArgSlot::Reg {
230                            reg: reg.to_real_reg().unwrap(),
231                            ty: ir::types::I64,
232                            extension: ir::ArgumentExtension::None,
233                        }
234                    }
235
236                    None => {
237                        next_stack = align_to(next_stack, 8) + 8;
238                        ABIArgSlot::Stack {
239                            offset: (next_stack - 8) as i64,
240                            ty: ir::types::I64,
241                            extension: param.extension,
242                        }
243                    }
244                };
245                next_param_idx += 1;
246                args.push(ABIArg::ImplicitPtrArg {
247                    // NB: this is filled in after this loop
248                    offset: 0,
249                    pointer,
250                    ty: param.value_type,
251                    purpose: param.purpose,
252                });
253                continue;
254            }
255
256            // SystemV dictates that 128bit int parameters are always either
257            // passed in two registers or on the stack, so handle that as a
258            // special case before the loop below.
259            if param.value_type == types::I128
260                && args_or_rets == ArgsOrRets::Args
261                && call_conv == CallConv::SystemV
262            {
263                let mut slots = ABIArgSlotVec::new();
264                match (
265                    get_intreg_for_arg(CallConv::SystemV, next_gpr, next_param_idx),
266                    get_intreg_for_arg(CallConv::SystemV, next_gpr + 1, next_param_idx + 1),
267                ) {
268                    (Some(reg1), Some(reg2)) => {
269                        slots.push(ABIArgSlot::Reg {
270                            reg: reg1.to_real_reg().unwrap(),
271                            ty: ir::types::I64,
272                            extension: ir::ArgumentExtension::None,
273                        });
274                        slots.push(ABIArgSlot::Reg {
275                            reg: reg2.to_real_reg().unwrap(),
276                            ty: ir::types::I64,
277                            extension: ir::ArgumentExtension::None,
278                        });
279                    }
280                    _ => {
281                        let size = 16;
282
283                        // Align.
284                        next_stack = align_to(next_stack, size);
285
286                        slots.push(ABIArgSlot::Stack {
287                            offset: next_stack as i64,
288                            ty: ir::types::I64,
289                            extension: param.extension,
290                        });
291                        slots.push(ABIArgSlot::Stack {
292                            offset: next_stack as i64 + 8,
293                            ty: ir::types::I64,
294                            extension: param.extension,
295                        });
296                        next_stack += size;
297                    }
298                };
299                // Unconditionally increment next_gpr even when storing the
300                // argument on the stack to prevent reusing a possibly
301                // remaining register for the next argument.
302                next_gpr += 2;
303                next_param_idx += 2;
304
305                args.push(ABIArg::Slots {
306                    slots,
307                    purpose: param.purpose,
308                });
309                continue;
310            }
311
312            let mut slots = ABIArgSlotVec::new();
313            for (ix, (rc, reg_ty)) in rcs.iter().zip(reg_tys.iter()).enumerate() {
314                let last_slot = last_param && ix == rcs.len() - 1;
315
316                let intreg = *rc == RegClass::Int;
317                let nextreg = if intreg {
318                    match args_or_rets {
319                        ArgsOrRets::Args => get_intreg_for_arg(call_conv, next_gpr, next_param_idx),
320                        ArgsOrRets::Rets => {
321                            get_intreg_for_retval(call_conv, flags, next_gpr, last_slot)
322                        }
323                    }
324                } else {
325                    match args_or_rets {
326                        ArgsOrRets::Args => {
327                            get_fltreg_for_arg(call_conv, next_vreg, next_param_idx)
328                        }
329                        ArgsOrRets::Rets => get_fltreg_for_retval(call_conv, next_vreg, last_slot),
330                    }
331                };
332                next_param_idx += 1;
333                if let Some(reg) = nextreg {
334                    if intreg {
335                        next_gpr += 1;
336                    } else {
337                        next_vreg += 1;
338                    }
339                    slots.push(ABIArgSlot::Reg {
340                        reg: reg.to_real_reg().unwrap(),
341                        ty: *reg_ty,
342                        extension: param.extension,
343                    });
344                } else {
345                    if args_or_rets == ArgsOrRets::Rets && !flags.enable_multi_ret_implicit_sret() {
346                        return Err(crate::CodegenError::Unsupported(
347                            "Too many return values to fit in registers. \
348                            Use a StructReturn argument instead. (#9510)"
349                                .to_owned(),
350                        ));
351                    }
352
353                    let size = reg_ty.bytes();
354                    let size = if call_conv == CallConv::Winch
355                        && args_or_rets == ArgsOrRets::Rets
356                        && !uses_extension
357                    {
358                        size
359                    } else {
360                        let size = core::cmp::max(size, 8);
361
362                        // Align.
363                        debug_assert!(size.is_power_of_two());
364                        next_stack = align_to(next_stack, size);
365                        size
366                    };
367
368                    slots.push(ABIArgSlot::Stack {
369                        offset: next_stack as i64,
370                        ty: *reg_ty,
371                        extension: param.extension,
372                    });
373                    next_stack += size;
374                }
375            }
376
377            args.push(ABIArg::Slots {
378                slots,
379                purpose: param.purpose,
380            });
381        }
382
383        // Fastcall's indirect 128+ bit vector arguments are all located on the
384        // stack, and stack space is reserved after all parameters are passed,
385        // so allocate from the space now.
386        if args_or_rets == ArgsOrRets::Args && is_fastcall {
387            for arg in args.args_mut() {
388                if let ABIArg::ImplicitPtrArg { offset, .. } = arg {
389                    assert_eq!(*offset, 0);
390                    next_stack = align_to(next_stack, 16);
391                    *offset = next_stack as i64;
392                    next_stack += 16;
393                }
394            }
395        }
396        let extra_arg_idx = if let Some(ret_area_ptr) = ret_area_ptr {
397            args.push_non_formal(ret_area_ptr);
398            Some(args.args().len() - 1)
399        } else {
400            None
401        };
402
403        // Winch writes the first result to the highest offset, so we need to iterate through the
404        // args and adjust the offsets down.
405        if call_conv == CallConv::Winch && args_or_rets == ArgsOrRets::Rets {
406            winch::reverse_stack(args, next_stack, uses_extension);
407        }
408
409        next_stack = align_to(next_stack, 16);
410
411        Ok((next_stack, extra_arg_idx))
412    }
413
414    fn gen_load_stack(mem: StackAMode, into_reg: Writable<Reg>, ty: Type) -> Self::I {
415        // For integer-typed values, we always load a full 64 bits (and we always spill a full 64
416        // bits as well -- see `Inst::store()`).
417        let ty = match ty {
418            types::I8 | types::I16 | types::I32 => types::I64,
419            // Stack slots are always at least 8 bytes, so it's fine to load 4 bytes instead of only
420            // two.
421            types::F16 | types::I8X2 => types::F32,
422            _ => ty,
423        };
424        Inst::load(ty, mem, into_reg, ExtKind::None)
425    }
426
427    fn gen_store_stack(mem: StackAMode, from_reg: Reg, ty: Type) -> Self::I {
428        let ty = match ty {
429            // See `gen_load_stack`.
430            types::F16 | types::I8X2 => types::F32,
431            _ => ty,
432        };
433        Inst::store(ty, from_reg, mem)
434    }
435
436    fn gen_move(to_reg: Writable<Reg>, from_reg: Reg, ty: Type) -> Self::I {
437        Inst::gen_move(to_reg, from_reg, ty)
438    }
439
440    /// Generate an integer-extend operation.
441    fn gen_extend(
442        to_reg: Writable<Reg>,
443        from_reg: Reg,
444        is_signed: bool,
445        from_bits: u8,
446        to_bits: u8,
447    ) -> Self::I {
448        let ext_mode = ExtMode::new(from_bits as u16, to_bits as u16)
449            .unwrap_or_else(|| panic!("invalid extension: {from_bits} -> {to_bits}"));
450        if is_signed {
451            Inst::movsx_rm_r(ext_mode, RegMem::reg(from_reg), to_reg)
452        } else {
453            Inst::movzx_rm_r(ext_mode, RegMem::reg(from_reg), to_reg)
454        }
455    }
456
457    fn gen_args(args: Vec<ArgPair>) -> Inst {
458        Inst::Args { args }
459    }
460
461    fn gen_rets(rets: Vec<RetPair>) -> Inst {
462        Inst::Rets { rets }
463    }
464
465    fn gen_add_imm(
466        _call_conv: isa::CallConv,
467        into_reg: Writable<Reg>,
468        from_reg: Reg,
469        imm: u32,
470    ) -> SmallInstVec<Self::I> {
471        let mut ret = SmallVec::new();
472        if from_reg != into_reg.to_reg() {
473            ret.push(Inst::gen_move(into_reg, from_reg, I64));
474        }
475        let imm = i32::try_from(imm).expect("`imm` is too large to fit in a 32-bit immediate");
476        ret.push(Inst::addq_mi(into_reg, imm));
477        ret
478    }
479
480    fn gen_stack_lower_bound_trap(limit_reg: Reg) -> SmallInstVec<Self::I> {
481        smallvec![
482            Inst::External {
483                inst: asm::inst::cmpq_rm::new(Gpr::unwrap_new(limit_reg), Gpr::RSP,).into(),
484            },
485            Inst::TrapIf {
486                // NBE == "> unsigned"; args above are reversed; this tests limit_reg > rsp.
487                cc: CC::NBE,
488                trap_code: TrapCode::STACK_OVERFLOW,
489            },
490        ]
491    }
492
493    fn gen_get_stack_addr(mem: StackAMode, into_reg: Writable<Reg>) -> Self::I {
494        let mem: SyntheticAmode = mem.into();
495        Inst::External {
496            inst: asm::inst::leaq_rm::new(into_reg, mem).into(),
497        }
498    }
499
500    fn get_stacklimit_reg(_call_conv: isa::CallConv) -> Reg {
501        // As per comment on trait definition, we must return a caller-save
502        // register that is not used as an argument here.
503        debug_assert!(!is_callee_save_systemv(
504            regs::r10().to_real_reg().unwrap(),
505            false
506        ));
507        regs::r10()
508    }
509
510    fn gen_load_base_offset(into_reg: Writable<Reg>, base: Reg, offset: i32, ty: Type) -> Self::I {
511        // Only ever used for I64s, F128s and vectors; if that changes, see if
512        // the ExtKind below needs to be changed.
513        assert!(ty == I64 || ty.is_vector() || ty == F128);
514        let mem = Amode::imm_reg(offset, base);
515        Inst::load(ty, mem, into_reg, ExtKind::None)
516    }
517
518    fn gen_store_base_offset(base: Reg, offset: i32, from_reg: Reg, ty: Type) -> Self::I {
519        let ty = match ty {
520            // See `gen_load_stack`.
521            types::F16 | types::I8X2 => types::F32,
522            _ => ty,
523        };
524        let mem = Amode::imm_reg(offset, base);
525        Inst::store(ty, from_reg, mem)
526    }
527
528    fn gen_sp_reg_adjust(amount: i32) -> SmallInstVec<Self::I> {
529        let rsp = Writable::from_reg(regs::rsp());
530        let inst = if amount >= 0 {
531            Inst::addq_mi(rsp, amount)
532        } else {
533            Inst::subq_mi(rsp, -amount)
534        };
535        smallvec![inst]
536    }
537
538    fn gen_prologue_frame_setup(
539        _call_conv: isa::CallConv,
540        flags: &settings::Flags,
541        _isa_flags: &x64_settings::Flags,
542        frame_layout: &FrameLayout,
543    ) -> SmallInstVec<Self::I> {
544        let r_rsp = Gpr::RSP;
545        let r_rbp = Gpr::RBP;
546        let w_rbp = Writable::from_reg(r_rbp);
547        let mut insts = SmallVec::new();
548        // `push %rbp`
549        // RSP before the call will be 0 % 16.  So here, it is 8 % 16.
550        insts.push(Inst::External {
551            inst: asm::inst::pushq_o::new(r_rbp).into(),
552        });
553
554        if flags.unwind_info() {
555            insts.push(Inst::Unwind {
556                inst: UnwindInst::PushFrameRegs {
557                    offset_upward_to_caller_sp: frame_layout.setup_area_size,
558                },
559            });
560        }
561
562        // `mov %rsp, %rbp`
563        // RSP is now 0 % 16
564        insts.push(Inst::External {
565            inst: asm::inst::movq_mr::new(w_rbp, r_rsp).into(),
566        });
567
568        insts
569    }
570
571    fn gen_epilogue_frame_restore(
572        _call_conv: isa::CallConv,
573        _flags: &settings::Flags,
574        _isa_flags: &x64_settings::Flags,
575        _frame_layout: &FrameLayout,
576    ) -> SmallInstVec<Self::I> {
577        let rbp = Gpr::RBP;
578        let rsp = Gpr::RSP;
579
580        let mut insts = SmallVec::new();
581        // `mov %rbp, %rsp`
582        insts.push(Inst::External {
583            inst: asm::inst::movq_mr::new(Writable::from_reg(rsp), rbp).into(),
584        });
585        // `pop %rbp`
586        insts.push(Inst::External {
587            inst: asm::inst::popq_o::new(Writable::from_reg(rbp)).into(),
588        });
589        insts
590    }
591
592    fn gen_return(
593        call_conv: CallConv,
594        _isa_flags: &x64_settings::Flags,
595        frame_layout: &FrameLayout,
596    ) -> SmallInstVec<Self::I> {
597        // Emit return instruction.
598        let stack_bytes_to_pop = if call_conv == CallConv::Tail {
599            frame_layout.tail_args_size
600        } else {
601            0
602        };
603        let inst = if stack_bytes_to_pop == 0 {
604            asm::inst::retq_zo::new().into()
605        } else {
606            let stack_bytes_to_pop = u16::try_from(stack_bytes_to_pop).unwrap();
607            asm::inst::retq_i::new(stack_bytes_to_pop).into()
608        };
609        smallvec![Inst::External { inst }]
610    }
611
612    fn gen_probestack(insts: &mut SmallInstVec<Self::I>, frame_size: u32) {
613        insts.push(Inst::imm(
614            OperandSize::Size32,
615            frame_size as u64,
616            Writable::from_reg(regs::rax()),
617        ));
618        insts.push(Inst::CallKnown {
619            // No need to include arg here: we are post-regalloc
620            // so no constraints will be seen anyway.
621            info: Box::new(CallInfo::empty(
622                ExternalName::LibCall(LibCall::Probestack),
623                CallConv::Probestack,
624            )),
625        });
626    }
627
628    fn gen_inline_probestack(
629        insts: &mut SmallInstVec<Self::I>,
630        call_conv: isa::CallConv,
631        frame_size: u32,
632        guard_size: u32,
633    ) {
634        // Unroll at most n consecutive probes, before falling back to using a loop
635        //
636        // This was number was picked because the loop version is 38 bytes long. We can fit
637        // 4 inline probes in that space, so unroll if its beneficial in terms of code size.
638        const PROBE_MAX_UNROLL: u32 = 4;
639
640        // Calculate how many probes we need to perform. Round down, as we only
641        // need to probe whole guard_size regions we'd otherwise skip over.
642        let probe_count = frame_size / guard_size;
643        if probe_count == 0 {
644            // No probe necessary
645        } else if probe_count <= PROBE_MAX_UNROLL {
646            Self::gen_probestack_unroll(insts, guard_size, probe_count)
647        } else {
648            Self::gen_probestack_loop(insts, call_conv, frame_size, guard_size)
649        }
650    }
651
652    fn gen_clobber_save(
653        _call_conv: isa::CallConv,
654        flags: &settings::Flags,
655        frame_layout: &FrameLayout,
656    ) -> SmallVec<[Self::I; 16]> {
657        let mut insts = SmallVec::new();
658
659        // When a return_call within this function required more stack arguments than we have
660        // present, resize the incoming argument area of the frame to accommodate those arguments.
661        let incoming_args_diff = frame_layout.tail_args_size - frame_layout.incoming_args_size;
662        if incoming_args_diff > 0 {
663            // Decrement the stack pointer to make space for the new arguments.
664            let rsp = Writable::from_reg(regs::rsp());
665            insts.push(Inst::subq_mi(
666                rsp,
667                i32::try_from(incoming_args_diff)
668                    .expect("`incoming_args_diff` is too large to fit in a 32-bit immediate"),
669            ));
670
671            // Make sure to keep the frame pointer and stack pointer in sync at
672            // this point.
673            let rbp = Gpr::RBP;
674            let rsp = Gpr::RSP;
675            insts.push(Inst::External {
676                inst: asm::inst::movq_mr::new(Writable::from_reg(rbp), rsp).into(),
677            });
678
679            let incoming_args_diff = i32::try_from(incoming_args_diff).unwrap();
680
681            // Move the saved frame pointer down by `incoming_args_diff`.
682            let addr = Amode::imm_reg(incoming_args_diff, regs::rsp());
683            let r11 = Writable::from_reg(Gpr::R11);
684            let inst = asm::inst::movq_rm::new(r11, addr).into();
685            insts.push(Inst::External { inst });
686            let inst = asm::inst::movq_mr::new(Amode::imm_reg(0, regs::rsp()), r11.to_reg()).into();
687            insts.push(Inst::External { inst });
688
689            // Move the saved return address down by `incoming_args_diff`.
690            let addr = Amode::imm_reg(incoming_args_diff + 8, regs::rsp());
691            let inst = asm::inst::movq_rm::new(r11, addr).into();
692            insts.push(Inst::External { inst });
693            let inst = asm::inst::movq_mr::new(Amode::imm_reg(8, regs::rsp()), r11.to_reg()).into();
694            insts.push(Inst::External { inst });
695        }
696
697        // We need to factor `incoming_args_diff` into the offset upward here, as we have grown
698        // the argument area -- `setup_area_size` alone will not be the correct offset up to the
699        // original caller's SP.
700        let offset_upward_to_caller_sp = frame_layout.setup_area_size + incoming_args_diff;
701        if flags.unwind_info() && offset_upward_to_caller_sp > 0 {
702            // Emit unwind info: start the frame. The frame (from unwind
703            // consumers' point of view) starts at clobbbers, just below
704            // the FP and return address. Spill slots and stack slots are
705            // part of our actual frame but do not concern the unwinder.
706            insts.push(Inst::Unwind {
707                inst: UnwindInst::DefineNewFrame {
708                    offset_downward_to_clobbers: frame_layout.clobber_size,
709                    offset_upward_to_caller_sp,
710                },
711            });
712        }
713
714        // Adjust the stack pointer downward for clobbers and the function fixed
715        // frame (spillslots, storage slots, and argument area).
716        let stack_size = frame_layout.fixed_frame_storage_size
717            + frame_layout.clobber_size
718            + frame_layout.outgoing_args_size;
719        if stack_size > 0 {
720            let rsp = Writable::from_reg(regs::rsp());
721            let stack_size = i32::try_from(stack_size)
722                .expect("`stack_size` is too large to fit in a 32-bit immediate");
723            insts.push(Inst::subq_mi(rsp, stack_size));
724        }
725
726        // Store each clobbered register in order at offsets from RSP,
727        // placing them above the fixed frame slots.
728        let clobber_offset =
729            frame_layout.fixed_frame_storage_size + frame_layout.outgoing_args_size;
730        let mut cur_offset = 0;
731        for reg in &frame_layout.clobbered_callee_saves {
732            let r_reg = reg.to_reg();
733            let ty = match r_reg.class() {
734                RegClass::Int => types::I64,
735                RegClass::Float => types::I8X16,
736                RegClass::Vector => unreachable!(),
737            };
738
739            // Align to 8 or 16 bytes as required by the storage type of the clobber.
740            cur_offset = align_to(cur_offset, ty.bytes());
741            let off = cur_offset;
742            cur_offset += ty.bytes();
743
744            insts.push(Inst::store(
745                ty,
746                r_reg.into(),
747                Amode::imm_reg(i32::try_from(off + clobber_offset).unwrap(), regs::rsp()),
748            ));
749
750            if flags.unwind_info() {
751                insts.push(Inst::Unwind {
752                    inst: UnwindInst::SaveReg {
753                        clobber_offset: off,
754                        reg: r_reg,
755                    },
756                });
757            }
758        }
759
760        insts
761    }
762
763    fn gen_clobber_restore(
764        _call_conv: isa::CallConv,
765        _flags: &settings::Flags,
766        frame_layout: &FrameLayout,
767    ) -> SmallVec<[Self::I; 16]> {
768        let mut insts = SmallVec::new();
769
770        // Restore regs by loading from offsets of RSP. We compute the offset from
771        // the same base as above in clobber_save, as RSP won't change between the
772        // prologue and epilogue.
773        let mut cur_offset =
774            frame_layout.fixed_frame_storage_size + frame_layout.outgoing_args_size;
775        for reg in &frame_layout.clobbered_callee_saves {
776            let rreg = reg.to_reg();
777            let ty = match rreg.class() {
778                RegClass::Int => types::I64,
779                RegClass::Float => types::I8X16,
780                RegClass::Vector => unreachable!(),
781            };
782
783            // Align to 8 or 16 bytes as required by the storage type of the clobber.
784            cur_offset = align_to(cur_offset, ty.bytes());
785
786            insts.push(Inst::load(
787                ty,
788                Amode::imm_reg(cur_offset.try_into().unwrap(), regs::rsp()),
789                Writable::from_reg(rreg.into()),
790                ExtKind::None,
791            ));
792
793            cur_offset += ty.bytes();
794        }
795
796        let stack_size = frame_layout.fixed_frame_storage_size
797            + frame_layout.clobber_size
798            + frame_layout.outgoing_args_size;
799
800        // Adjust RSP back upward.
801        if stack_size > 0 {
802            let rsp = Writable::from_reg(regs::rsp());
803            let stack_size = i32::try_from(stack_size)
804                .expect("`stack_size` is too large to fit in a 32-bit immediate");
805            insts.push(Inst::addq_mi(rsp, stack_size));
806        }
807
808        insts
809    }
810
811    fn gen_memcpy<F: FnMut(Type) -> Writable<Reg>>(
812        call_conv: isa::CallConv,
813        dst: Reg,
814        src: Reg,
815        size: usize,
816        mut alloc_tmp: F,
817    ) -> SmallVec<[Self::I; 8]> {
818        let mut insts = SmallVec::new();
819        let arg0 = get_intreg_for_arg(call_conv, 0, 0).unwrap();
820        let arg1 = get_intreg_for_arg(call_conv, 1, 1).unwrap();
821        let arg2 = get_intreg_for_arg(call_conv, 2, 2).unwrap();
822        let temp = alloc_tmp(Self::word_type());
823        let temp2 = alloc_tmp(Self::word_type());
824        insts.push(Inst::imm(OperandSize::Size64, size as u64, temp));
825        // We use an indirect call and a full LoadExtName because we do not have
826        // information about the libcall `RelocDistance` here, so we
827        // conservatively use the more flexible calling sequence.
828        insts.push(Inst::LoadExtName {
829            dst: temp2.map(Gpr::unwrap_new),
830            name: Box::new(ExternalName::LibCall(LibCall::Memcpy)),
831            offset: 0,
832            distance: RelocDistance::Far,
833        });
834        let callee_pop_size = 0;
835        insts.push(Inst::call_unknown(Box::new(CallInfo {
836            dest: RegMem::reg(temp2.to_reg()),
837            uses: smallvec![
838                CallArgPair {
839                    vreg: dst,
840                    preg: arg0
841                },
842                CallArgPair {
843                    vreg: src,
844                    preg: arg1
845                },
846                CallArgPair {
847                    vreg: temp.to_reg(),
848                    preg: arg2
849                },
850            ],
851            defs: smallvec![],
852            clobbers: Self::get_regs_clobbered_by_call(call_conv, false),
853            callee_pop_size,
854            callee_conv: call_conv,
855            caller_conv: call_conv,
856            try_call_info: None,
857            patchable: false,
858        })));
859        insts
860    }
861
862    fn get_number_of_spillslots_for_value(
863        rc: RegClass,
864        vector_scale: u32,
865        _isa_flags: &Self::F,
866    ) -> u32 {
867        // We allocate in terms of 8-byte slots.
868        match rc {
869            RegClass::Int => 1,
870            RegClass::Float => vector_scale / 8,
871            RegClass::Vector => unreachable!(),
872        }
873    }
874
875    fn get_machine_env(flags: &settings::Flags, _call_conv: isa::CallConv) -> &MachineEnv {
876        if flags.enable_pinned_reg() {
877            static MACHINE_ENV: MachineEnv = create_reg_env_systemv(true);
878            &MACHINE_ENV
879        } else {
880            static MACHINE_ENV: MachineEnv = create_reg_env_systemv(false);
881            &MACHINE_ENV
882        }
883    }
884
885    fn get_regs_clobbered_by_call(
886        call_conv_of_callee: isa::CallConv,
887        is_exception: bool,
888    ) -> PRegSet {
889        match (call_conv_of_callee, is_exception) {
890            (isa::CallConv::Tail, true) => ALL_CLOBBERS,
891            // Note that "PreserveAll" actually preserves nothing at
892            // the callsite if used for a `try_call`, because the
893            // unwinder ABI for `try_call`s is still "no clobbered
894            // register restores" for this ABI (so as to work with
895            // Wasmtime).
896            (isa::CallConv::PreserveAll, true) => ALL_CLOBBERS,
897            (isa::CallConv::Winch, _) => ALL_CLOBBERS,
898            (isa::CallConv::SystemV, _) => SYSV_CLOBBERS,
899            (isa::CallConv::WindowsFastcall, false) => WINDOWS_CLOBBERS,
900            (isa::CallConv::PreserveAll, _) => NO_CLOBBERS,
901            (_, false) => SYSV_CLOBBERS,
902            (call_conv, true) => panic!("unimplemented clobbers for exn abi of {call_conv:?}"),
903        }
904    }
905
906    fn get_ext_mode(
907        _call_conv: isa::CallConv,
908        specified: ir::ArgumentExtension,
909    ) -> ir::ArgumentExtension {
910        specified
911    }
912
913    fn compute_frame_layout(
914        call_conv: CallConv,
915        flags: &settings::Flags,
916        _sig: &Signature,
917        regs: &[Writable<RealReg>],
918        function_calls: FunctionCalls,
919        incoming_args_size: u32,
920        tail_args_size: u32,
921        stackslots_size: u32,
922        fixed_frame_storage_size: u32,
923        outgoing_args_size: u32,
924    ) -> FrameLayout {
925        debug_assert!(tail_args_size >= incoming_args_size);
926
927        let mut regs: Vec<Writable<RealReg>> = match call_conv {
928            // The `winch` calling convention doesn't have any callee-save
929            // registers.
930            CallConv::Winch => vec![],
931            CallConv::Fast | CallConv::SystemV | CallConv::Tail => regs
932                .iter()
933                .cloned()
934                .filter(|r| is_callee_save_systemv(r.to_reg(), flags.enable_pinned_reg()))
935                .collect(),
936            CallConv::WindowsFastcall => regs
937                .iter()
938                .cloned()
939                .filter(|r| is_callee_save_fastcall(r.to_reg(), flags.enable_pinned_reg()))
940                .collect(),
941            // The `preserve_all` calling convention makes every reg a callee-save reg.
942            CallConv::PreserveAll => regs.iter().cloned().collect(),
943            CallConv::Probestack => todo!("probestack?"),
944            CallConv::AppleAarch64 => unreachable!(),
945        };
946        // Sort registers for deterministic code output. We can do an unstable sort because the
947        // registers will be unique (there are no dups).
948        regs.sort_unstable();
949
950        // Compute clobber size.
951        let clobber_size = compute_clobber_size(&regs);
952
953        // Compute setup area size.
954        let setup_area_size = 16; // RBP, return address
955
956        // Return FrameLayout structure.
957        FrameLayout {
958            word_bytes: 8,
959            incoming_args_size,
960            tail_args_size: align_to(tail_args_size, 16),
961            setup_area_size,
962            clobber_size,
963            fixed_frame_storage_size,
964            stackslots_size,
965            outgoing_args_size,
966            clobbered_callee_saves: regs,
967            function_calls,
968        }
969    }
970
971    fn retval_temp_reg(_call_conv_of_callee: isa::CallConv) -> Writable<Reg> {
972        // Use r11 as a temp: clobbered anyway, and
973        // not otherwise used as a return value in any of our
974        // supported calling conventions.
975        Writable::from_reg(regs::r11())
976    }
977
978    fn exception_payload_regs(call_conv: isa::CallConv) -> &'static [Reg] {
979        const PAYLOAD_REGS: &'static [Reg] = &[regs::rax(), regs::rdx()];
980        match call_conv {
981            isa::CallConv::SystemV | isa::CallConv::Tail | isa::CallConv::PreserveAll => {
982                PAYLOAD_REGS
983            }
984            _ => &[],
985        }
986    }
987}
988
989impl From<StackAMode> for SyntheticAmode {
990    fn from(amode: StackAMode) -> Self {
991        // We enforce a 128 MB stack-frame size limit above, so these
992        // `expect()`s should never fail.
993        match amode {
994            StackAMode::IncomingArg(off, stack_args_size) => {
995                let offset = u32::try_from(off).expect(
996                    "Offset in IncomingArg is greater than 4GB; should hit impl limit first",
997                );
998                SyntheticAmode::IncomingArg {
999                    offset: stack_args_size - offset,
1000                }
1001            }
1002            StackAMode::Slot(off) => {
1003                let off = i32::try_from(off)
1004                    .expect("Offset in Slot is greater than 2GB; should hit impl limit first");
1005                SyntheticAmode::slot_offset(off)
1006            }
1007            StackAMode::OutgoingArg(off) => {
1008                let off = i32::try_from(off).expect(
1009                    "Offset in OutgoingArg is greater than 2GB; should hit impl limit first",
1010                );
1011                SyntheticAmode::Real(Amode::ImmReg {
1012                    simm32: off,
1013                    base: regs::rsp(),
1014                    flags: MemFlags::trusted(),
1015                })
1016            }
1017        }
1018    }
1019}
1020
1021fn get_intreg_for_arg(call_conv: CallConv, idx: usize, arg_idx: usize) -> Option<Reg> {
1022    let is_fastcall = call_conv == CallConv::WindowsFastcall;
1023
1024    // Fastcall counts by absolute argument number; SysV counts by argument of
1025    // this (integer) class.
1026    let i = if is_fastcall { arg_idx } else { idx };
1027    match (i, is_fastcall) {
1028        (0, false) => Some(regs::rdi()),
1029        (1, false) => Some(regs::rsi()),
1030        (2, false) => Some(regs::rdx()),
1031        (3, false) => Some(regs::rcx()),
1032        (4, false) => Some(regs::r8()),
1033        (5, false) => Some(regs::r9()),
1034        (0, true) => Some(regs::rcx()),
1035        (1, true) => Some(regs::rdx()),
1036        (2, true) => Some(regs::r8()),
1037        (3, true) => Some(regs::r9()),
1038        _ => None,
1039    }
1040}
1041
1042fn get_fltreg_for_arg(call_conv: CallConv, idx: usize, arg_idx: usize) -> Option<Reg> {
1043    let is_fastcall = call_conv == CallConv::WindowsFastcall;
1044
1045    // Fastcall counts by absolute argument number; SysV counts by argument of
1046    // this (floating-point) class.
1047    let i = if is_fastcall { arg_idx } else { idx };
1048    match (i, is_fastcall) {
1049        (0, false) => Some(regs::xmm0()),
1050        (1, false) => Some(regs::xmm1()),
1051        (2, false) => Some(regs::xmm2()),
1052        (3, false) => Some(regs::xmm3()),
1053        (4, false) => Some(regs::xmm4()),
1054        (5, false) => Some(regs::xmm5()),
1055        (6, false) => Some(regs::xmm6()),
1056        (7, false) => Some(regs::xmm7()),
1057        (0, true) => Some(regs::xmm0()),
1058        (1, true) => Some(regs::xmm1()),
1059        (2, true) => Some(regs::xmm2()),
1060        (3, true) => Some(regs::xmm3()),
1061        _ => None,
1062    }
1063}
1064
1065fn get_intreg_for_retval(
1066    call_conv: CallConv,
1067    flags: &settings::Flags,
1068    intreg_idx: usize,
1069    is_last: bool,
1070) -> Option<Reg> {
1071    match call_conv {
1072        CallConv::Tail => match intreg_idx {
1073            0 => Some(regs::rax()),
1074            1 => Some(regs::rcx()),
1075            2 => Some(regs::rdx()),
1076            3 => Some(regs::rsi()),
1077            4 => Some(regs::rdi()),
1078            5 => Some(regs::r8()),
1079            6 => Some(regs::r9()),
1080            7 => Some(regs::r10()),
1081            // NB: `r11` is reserved as a scratch register that is
1082            // also part of the clobber set.
1083            // NB: `r15` is reserved as a scratch register.
1084            _ => None,
1085        },
1086        CallConv::Fast | CallConv::SystemV | CallConv::PreserveAll => match intreg_idx {
1087            0 => Some(regs::rax()),
1088            1 => Some(regs::rdx()),
1089            2 if flags.enable_llvm_abi_extensions() => Some(regs::rcx()),
1090            _ => None,
1091        },
1092        CallConv::WindowsFastcall => match intreg_idx {
1093            0 => Some(regs::rax()),
1094            1 => Some(regs::rdx()), // The Rust ABI for i128s needs this.
1095            _ => None,
1096        },
1097
1098        CallConv::Winch => is_last.then(|| regs::rax()),
1099        CallConv::Probestack => todo!(),
1100        CallConv::AppleAarch64 => unreachable!(),
1101    }
1102}
1103
1104fn get_fltreg_for_retval(call_conv: CallConv, fltreg_idx: usize, is_last: bool) -> Option<Reg> {
1105    match call_conv {
1106        CallConv::Tail => match fltreg_idx {
1107            0 => Some(regs::xmm0()),
1108            1 => Some(regs::xmm1()),
1109            2 => Some(regs::xmm2()),
1110            3 => Some(regs::xmm3()),
1111            4 => Some(regs::xmm4()),
1112            5 => Some(regs::xmm5()),
1113            6 => Some(regs::xmm6()),
1114            7 => Some(regs::xmm7()),
1115            _ => None,
1116        },
1117        CallConv::Fast | CallConv::SystemV | CallConv::PreserveAll => match fltreg_idx {
1118            0 => Some(regs::xmm0()),
1119            1 => Some(regs::xmm1()),
1120            _ => None,
1121        },
1122        CallConv::WindowsFastcall => match fltreg_idx {
1123            0 => Some(regs::xmm0()),
1124            _ => None,
1125        },
1126        CallConv::Winch => is_last.then(|| regs::xmm0()),
1127        CallConv::Probestack => todo!(),
1128        CallConv::AppleAarch64 => unreachable!(),
1129    }
1130}
1131
1132fn is_callee_save_systemv(r: RealReg, enable_pinned_reg: bool) -> bool {
1133    use asm::gpr::enc::*;
1134
1135    match r.class() {
1136        RegClass::Int => match r.hw_enc() {
1137            RBX | RBP | R12 | R13 | R14 => true,
1138            // R15 is the pinned register; if we're using it that way,
1139            // it is effectively globally-allocated, and is not
1140            // callee-saved.
1141            R15 => !enable_pinned_reg,
1142            _ => false,
1143        },
1144        RegClass::Float => false,
1145        RegClass::Vector => unreachable!(),
1146    }
1147}
1148
1149fn is_callee_save_fastcall(r: RealReg, enable_pinned_reg: bool) -> bool {
1150    use asm::gpr::enc::*;
1151    use asm::xmm::enc::*;
1152
1153    match r.class() {
1154        RegClass::Int => match r.hw_enc() {
1155            RBX | RBP | RSI | RDI | R12 | R13 | R14 => true,
1156            // See above for SysV: we must treat the pinned reg specially.
1157            R15 => !enable_pinned_reg,
1158            _ => false,
1159        },
1160        RegClass::Float => match r.hw_enc() {
1161            XMM6 | XMM7 | XMM8 | XMM9 | XMM10 | XMM11 | XMM12 | XMM13 | XMM14 | XMM15 => true,
1162            _ => false,
1163        },
1164        RegClass::Vector => unreachable!(),
1165    }
1166}
1167
1168fn compute_clobber_size(clobbers: &[Writable<RealReg>]) -> u32 {
1169    let mut clobbered_size = 0;
1170    for reg in clobbers {
1171        match reg.to_reg().class() {
1172            RegClass::Int => {
1173                clobbered_size += 8;
1174            }
1175            RegClass::Float => {
1176                clobbered_size = align_to(clobbered_size, 16);
1177                clobbered_size += 16;
1178            }
1179            RegClass::Vector => unreachable!(),
1180        }
1181    }
1182    align_to(clobbered_size, 16)
1183}
1184
1185const WINDOWS_CLOBBERS: PRegSet = windows_clobbers();
1186const SYSV_CLOBBERS: PRegSet = sysv_clobbers();
1187pub(crate) const ALL_CLOBBERS: PRegSet = all_clobbers();
1188const NO_CLOBBERS: PRegSet = PRegSet::empty();
1189
1190const fn windows_clobbers() -> PRegSet {
1191    use asm::gpr::enc::*;
1192    use asm::xmm::enc::*;
1193
1194    PRegSet::empty()
1195        .with(regs::gpr_preg(RAX))
1196        .with(regs::gpr_preg(RCX))
1197        .with(regs::gpr_preg(RDX))
1198        .with(regs::gpr_preg(R8))
1199        .with(regs::gpr_preg(R9))
1200        .with(regs::gpr_preg(R10))
1201        .with(regs::gpr_preg(R11))
1202        .with(regs::fpr_preg(XMM0))
1203        .with(regs::fpr_preg(XMM1))
1204        .with(regs::fpr_preg(XMM2))
1205        .with(regs::fpr_preg(XMM3))
1206        .with(regs::fpr_preg(XMM4))
1207        .with(regs::fpr_preg(XMM5))
1208}
1209
1210const fn sysv_clobbers() -> PRegSet {
1211    use asm::gpr::enc::*;
1212    use asm::xmm::enc::*;
1213
1214    PRegSet::empty()
1215        .with(regs::gpr_preg(RAX))
1216        .with(regs::gpr_preg(RCX))
1217        .with(regs::gpr_preg(RDX))
1218        .with(regs::gpr_preg(RSI))
1219        .with(regs::gpr_preg(RDI))
1220        .with(regs::gpr_preg(R8))
1221        .with(regs::gpr_preg(R9))
1222        .with(regs::gpr_preg(R10))
1223        .with(regs::gpr_preg(R11))
1224        .with(regs::fpr_preg(XMM0))
1225        .with(regs::fpr_preg(XMM1))
1226        .with(regs::fpr_preg(XMM2))
1227        .with(regs::fpr_preg(XMM3))
1228        .with(regs::fpr_preg(XMM4))
1229        .with(regs::fpr_preg(XMM5))
1230        .with(regs::fpr_preg(XMM6))
1231        .with(regs::fpr_preg(XMM7))
1232        .with(regs::fpr_preg(XMM8))
1233        .with(regs::fpr_preg(XMM9))
1234        .with(regs::fpr_preg(XMM10))
1235        .with(regs::fpr_preg(XMM11))
1236        .with(regs::fpr_preg(XMM12))
1237        .with(regs::fpr_preg(XMM13))
1238        .with(regs::fpr_preg(XMM14))
1239        .with(regs::fpr_preg(XMM15))
1240}
1241
1242/// For calling conventions that clobber all registers.
1243const fn all_clobbers() -> PRegSet {
1244    use asm::gpr::enc::*;
1245    use asm::xmm::enc::*;
1246
1247    PRegSet::empty()
1248        .with(regs::gpr_preg(RAX))
1249        .with(regs::gpr_preg(RCX))
1250        .with(regs::gpr_preg(RDX))
1251        .with(regs::gpr_preg(RBX))
1252        .with(regs::gpr_preg(RSI))
1253        .with(regs::gpr_preg(RDI))
1254        .with(regs::gpr_preg(R8))
1255        .with(regs::gpr_preg(R9))
1256        .with(regs::gpr_preg(R10))
1257        .with(regs::gpr_preg(R11))
1258        .with(regs::gpr_preg(R12))
1259        .with(regs::gpr_preg(R13))
1260        .with(regs::gpr_preg(R14))
1261        .with(regs::gpr_preg(R15))
1262        .with(regs::fpr_preg(XMM0))
1263        .with(regs::fpr_preg(XMM1))
1264        .with(regs::fpr_preg(XMM2))
1265        .with(regs::fpr_preg(XMM3))
1266        .with(regs::fpr_preg(XMM4))
1267        .with(regs::fpr_preg(XMM5))
1268        .with(regs::fpr_preg(XMM6))
1269        .with(regs::fpr_preg(XMM7))
1270        .with(regs::fpr_preg(XMM8))
1271        .with(regs::fpr_preg(XMM9))
1272        .with(regs::fpr_preg(XMM10))
1273        .with(regs::fpr_preg(XMM11))
1274        .with(regs::fpr_preg(XMM12))
1275        .with(regs::fpr_preg(XMM13))
1276        .with(regs::fpr_preg(XMM14))
1277        .with(regs::fpr_preg(XMM15))
1278}
1279
1280const fn create_reg_env_systemv(enable_pinned_reg: bool) -> MachineEnv {
1281    const fn preg(r: Reg) -> PReg {
1282        r.to_real_reg().unwrap().preg()
1283    }
1284
1285    let mut env = MachineEnv {
1286        preferred_regs_by_class: [
1287            // Preferred GPRs: caller-saved in the SysV ABI.
1288            PRegSet::empty()
1289                .with(preg(regs::rsi()))
1290                .with(preg(regs::rdi()))
1291                .with(preg(regs::rax()))
1292                .with(preg(regs::rcx()))
1293                .with(preg(regs::rdx()))
1294                .with(preg(regs::r8()))
1295                .with(preg(regs::r9()))
1296                .with(preg(regs::r10()))
1297                .with(preg(regs::r11())),
1298            // Preferred XMMs: the first 8, which can have smaller encodings
1299            // with AVX instructions.
1300            PRegSet::empty()
1301                .with(preg(regs::xmm0()))
1302                .with(preg(regs::xmm1()))
1303                .with(preg(regs::xmm2()))
1304                .with(preg(regs::xmm3()))
1305                .with(preg(regs::xmm4()))
1306                .with(preg(regs::xmm5()))
1307                .with(preg(regs::xmm6()))
1308                .with(preg(regs::xmm7())),
1309            // The Vector Regclass is unused
1310            PRegSet::empty(),
1311        ],
1312        non_preferred_regs_by_class: [
1313            // Non-preferred GPRs: callee-saved in the SysV ABI.
1314            PRegSet::empty()
1315                .with(preg(regs::rbx()))
1316                .with(preg(regs::r12()))
1317                .with(preg(regs::r13()))
1318                .with(preg(regs::r14())),
1319            // Non-preferred XMMs: the last 8 registers, which can have larger
1320            // encodings with AVX instructions.
1321            PRegSet::empty()
1322                .with(preg(regs::xmm8()))
1323                .with(preg(regs::xmm9()))
1324                .with(preg(regs::xmm10()))
1325                .with(preg(regs::xmm11()))
1326                .with(preg(regs::xmm12()))
1327                .with(preg(regs::xmm13()))
1328                .with(preg(regs::xmm14()))
1329                .with(preg(regs::xmm15())),
1330            // The Vector Regclass is unused
1331            PRegSet::empty(),
1332        ],
1333        fixed_stack_slots: vec![],
1334        scratch_by_class: [None, None, None],
1335    };
1336
1337    debug_assert!(regs::PINNED_REG == cranelift_assembler_x64::gpr::enc::R15);
1338    if !enable_pinned_reg {
1339        env.non_preferred_regs_by_class[0] =
1340            env.non_preferred_regs_by_class[0].with(preg(regs::r15()));
1341    }
1342
1343    env
1344}