cranelift_codegen/isa/x64/
abi.rs

1//! Implementation of the standard x64 ABI.
2
3use crate::CodegenResult;
4use crate::ir::{self, LibCall, MemFlags, Signature, TrapCode, types};
5use crate::ir::{ExternalName, types::*};
6use crate::isa;
7use crate::isa::winch;
8use crate::isa::{CallConv, unwind::UnwindInst, x64::inst::*, x64::settings as x64_settings};
9use crate::machinst::abi::*;
10use crate::machinst::*;
11use crate::settings;
12use alloc::borrow::ToOwned;
13use alloc::boxed::Box;
14use alloc::vec::Vec;
15use args::*;
16use cranelift_assembler_x64 as asm;
17use regalloc2::{MachineEnv, PReg, PRegSet};
18use smallvec::{SmallVec, smallvec};
19use std::sync::OnceLock;
20
21/// Support for the x64 ABI from the callee side (within a function body).
22pub(crate) type X64Callee = Callee<X64ABIMachineSpec>;
23
24/// Implementation of ABI primitives for x64.
25pub struct X64ABIMachineSpec;
26
27impl X64ABIMachineSpec {
28    fn gen_probestack_unroll(insts: &mut SmallInstVec<Inst>, guard_size: u32, probe_count: u32) {
29        insts.reserve(probe_count as usize);
30        for _ in 0..probe_count {
31            // "Allocate" stack space for the probe by decrementing the stack pointer before
32            // the write. This is required to make valgrind happy.
33            // See: https://github.com/bytecodealliance/wasmtime/issues/7454
34            insts.extend(Self::gen_sp_reg_adjust(-(guard_size as i32)));
35
36            // Touch the current page by storing an immediate zero.
37            // mov  [rsp], 0
38            insts.push(Inst::External {
39                inst: asm::inst::movl_mi::new(Amode::imm_reg(0, regs::rsp()), 0i32.cast_unsigned())
40                    .into(),
41            });
42        }
43
44        // Restore the stack pointer to its original value
45        insts.extend(Self::gen_sp_reg_adjust((guard_size * probe_count) as i32));
46    }
47
48    fn gen_probestack_loop(
49        insts: &mut SmallInstVec<Inst>,
50        _call_conv: isa::CallConv,
51        frame_size: u32,
52        guard_size: u32,
53    ) {
54        // We have to use a caller-saved register since clobbering only
55        // happens after stack probing.
56        // `r11` is caller saved on both Fastcall and SystemV, and not used
57        // for argument passing, so it's pretty much free. It is also not
58        // used by the stacklimit mechanism.
59        let tmp = regs::r11();
60        debug_assert!({
61            let real_reg = tmp.to_real_reg().unwrap();
62            !is_callee_save_systemv(real_reg, false) && !is_callee_save_fastcall(real_reg, false)
63        });
64
65        insts.push(Inst::StackProbeLoop {
66            tmp: Writable::from_reg(tmp),
67            frame_size,
68            guard_size,
69        });
70    }
71}
72
73impl IsaFlags for x64_settings::Flags {}
74
75impl ABIMachineSpec for X64ABIMachineSpec {
76    type I = Inst;
77
78    type F = x64_settings::Flags;
79
80    /// This is the limit for the size of argument and return-value areas on the
81    /// stack. We place a reasonable limit here to avoid integer overflow issues
82    /// with 32-bit arithmetic: for now, 128 MB.
83    const STACK_ARG_RET_SIZE_LIMIT: u32 = 128 * 1024 * 1024;
84
85    fn word_bits() -> u32 {
86        64
87    }
88
89    /// Return required stack alignment in bytes.
90    fn stack_align(_call_conv: isa::CallConv) -> u32 {
91        16
92    }
93
94    fn compute_arg_locs(
95        call_conv: isa::CallConv,
96        flags: &settings::Flags,
97        params: &[ir::AbiParam],
98        args_or_rets: ArgsOrRets,
99        add_ret_area_ptr: bool,
100        mut args: ArgsAccumulator,
101    ) -> CodegenResult<(u32, Option<usize>)> {
102        let is_fastcall = call_conv == CallConv::WindowsFastcall;
103        let is_tail = call_conv == CallConv::Tail;
104
105        let mut next_gpr = 0;
106        let mut next_vreg = 0;
107        let mut next_stack: u32 = 0;
108        let mut next_param_idx = 0; // Fastcall cares about overall param index
109
110        if args_or_rets == ArgsOrRets::Args && is_fastcall {
111            // Fastcall always reserves 32 bytes of shadow space corresponding to
112            // the four initial in-arg parameters.
113            //
114            // (See:
115            // https://learn.microsoft.com/en-us/cpp/build/x64-calling-convention?view=msvc-170)
116            next_stack = 32;
117        }
118
119        let ret_area_ptr = if add_ret_area_ptr {
120            debug_assert_eq!(args_or_rets, ArgsOrRets::Args);
121            next_gpr += 1;
122            next_param_idx += 1;
123            // In the SystemV and WindowsFastcall ABIs, the return area pointer is the first
124            // argument. For the Tail and Winch ABIs we do the same for simplicity sake.
125            Some(ABIArg::reg(
126                get_intreg_for_arg(call_conv, 0, 0)
127                    .unwrap()
128                    .to_real_reg()
129                    .unwrap(),
130                types::I64,
131                ir::ArgumentExtension::None,
132                ir::ArgumentPurpose::Normal,
133            ))
134        } else {
135            None
136        };
137
138        // If any param uses extension, the winch calling convention will not pack its results
139        // on the stack and will instead align them to 8-byte boundaries the same way that all the
140        // other calling conventions do. This isn't consistent with Winch itself, but is fine as
141        // Winch only uses this calling convention via trampolines, and those trampolines don't add
142        // extension annotations. Additionally, handling extension attributes this way allows clif
143        // functions that use them with the Winch calling convention to interact successfully with
144        // testing infrastructure.
145        // The results are also not packed if any of the types are `f16`. This is to simplify the
146        // implementation of `Inst::load`/`Inst::store` (which would otherwise require multiple
147        // instructions), and doesn't affect Winch itself as Winch doesn't support `f16` at all.
148        let uses_extension = params.iter().any(|p| {
149            p.extension != ir::ArgumentExtension::None
150                || p.value_type == types::F16
151                || p.value_type == types::I8X2
152        });
153
154        for (ix, param) in params.iter().enumerate() {
155            let last_param = ix == params.len() - 1;
156
157            if let ir::ArgumentPurpose::StructArgument(size) = param.purpose {
158                let offset = next_stack as i64;
159                let size = size;
160                assert!(size % 8 == 0, "StructArgument size is not properly aligned");
161                next_stack += size;
162                args.push(ABIArg::StructArg {
163                    offset,
164                    size: size as u64,
165                    purpose: param.purpose,
166                });
167                continue;
168            }
169
170            // Find regclass(es) of the register(s) used to store a value of this type.
171            let (rcs, reg_tys) = Inst::rc_for_type(param.value_type)?;
172
173            // Now assign ABIArgSlots for each register-sized part.
174            //
175            // Note that the handling of `i128` values is unique here:
176            //
177            // - If `enable_llvm_abi_extensions` is set in the flags, each
178            //   `i128` is split into two `i64`s and assigned exactly as if it
179            //   were two consecutive 64-bit args, except that if one of the
180            //   two halves is forced onto the stack, the other half is too.
181            //   This is consistent with LLVM's behavior, and is needed for
182            //   some uses of Cranelift (e.g., the rustc backend).
183            //
184            // - Otherwise, if the calling convention is Tail, we behave as in
185            //   the previous case, even if `enable_llvm_abi_extensions` is not
186            //   set in the flags: This is a custom calling convention defined
187            //   by Cranelift, LLVM doesn't know about it.
188            //
189            // - Otherwise, both SysV and Fastcall specify behavior (use of
190            //   vector register, a register pair, or passing by reference
191            //   depending on the case), but for simplicity, we will just panic if
192            //   an i128 type appears in a signature and the LLVM extensions flag
193            //   is not set.
194            //
195            // For examples of how rustc compiles i128 args and return values on
196            // both SysV and Fastcall platforms, see:
197            // https://godbolt.org/z/PhG3ob
198
199            if param.value_type.bits() > 64
200                && !(param.value_type.is_vector() || param.value_type.is_float())
201                && !flags.enable_llvm_abi_extensions()
202                && !is_tail
203            {
204                panic!(
205                    "i128 args/return values not supported unless LLVM ABI extensions are enabled"
206                );
207            }
208            // As MSVC doesn't support f16/f128 there is no standard way to pass/return them with
209            // the Windows ABI. LLVM passes/returns them in XMM registers.
210            if matches!(param.value_type, types::F16 | types::F128)
211                && is_fastcall
212                && !flags.enable_llvm_abi_extensions()
213            {
214                panic!(
215                    "f16/f128 args/return values not supported for windows_fastcall unless LLVM ABI extensions are enabled"
216                );
217            }
218
219            // Windows fastcall dictates that `__m128i` and `f128` parameters to
220            // a function are passed indirectly as pointers, so handle that as a
221            // special case before the loop below.
222            if (param.value_type.is_vector() || param.value_type.is_float())
223                && param.value_type.bits() >= 128
224                && args_or_rets == ArgsOrRets::Args
225                && is_fastcall
226            {
227                let pointer = match get_intreg_for_arg(call_conv, next_gpr, next_param_idx) {
228                    Some(reg) => {
229                        next_gpr += 1;
230                        ABIArgSlot::Reg {
231                            reg: reg.to_real_reg().unwrap(),
232                            ty: ir::types::I64,
233                            extension: ir::ArgumentExtension::None,
234                        }
235                    }
236
237                    None => {
238                        next_stack = align_to(next_stack, 8) + 8;
239                        ABIArgSlot::Stack {
240                            offset: (next_stack - 8) as i64,
241                            ty: ir::types::I64,
242                            extension: param.extension,
243                        }
244                    }
245                };
246                next_param_idx += 1;
247                args.push(ABIArg::ImplicitPtrArg {
248                    // NB: this is filled in after this loop
249                    offset: 0,
250                    pointer,
251                    ty: param.value_type,
252                    purpose: param.purpose,
253                });
254                continue;
255            }
256
257            // SystemV dictates that 128bit int parameters are always either
258            // passed in two registers or on the stack, so handle that as a
259            // special case before the loop below.
260            if param.value_type == types::I128
261                && args_or_rets == ArgsOrRets::Args
262                && call_conv == CallConv::SystemV
263            {
264                let mut slots = ABIArgSlotVec::new();
265                match (
266                    get_intreg_for_arg(CallConv::SystemV, next_gpr, next_param_idx),
267                    get_intreg_for_arg(CallConv::SystemV, next_gpr + 1, next_param_idx + 1),
268                ) {
269                    (Some(reg1), Some(reg2)) => {
270                        slots.push(ABIArgSlot::Reg {
271                            reg: reg1.to_real_reg().unwrap(),
272                            ty: ir::types::I64,
273                            extension: ir::ArgumentExtension::None,
274                        });
275                        slots.push(ABIArgSlot::Reg {
276                            reg: reg2.to_real_reg().unwrap(),
277                            ty: ir::types::I64,
278                            extension: ir::ArgumentExtension::None,
279                        });
280                    }
281                    _ => {
282                        let size = 16;
283
284                        // Align.
285                        next_stack = align_to(next_stack, size);
286
287                        slots.push(ABIArgSlot::Stack {
288                            offset: next_stack as i64,
289                            ty: ir::types::I64,
290                            extension: param.extension,
291                        });
292                        slots.push(ABIArgSlot::Stack {
293                            offset: next_stack as i64 + 8,
294                            ty: ir::types::I64,
295                            extension: param.extension,
296                        });
297                        next_stack += size;
298                    }
299                };
300                // Unconditionally increment next_gpr even when storing the
301                // argument on the stack to prevent reusing a possibly
302                // remaining register for the next argument.
303                next_gpr += 2;
304                next_param_idx += 2;
305
306                args.push(ABIArg::Slots {
307                    slots,
308                    purpose: param.purpose,
309                });
310                continue;
311            }
312
313            let mut slots = ABIArgSlotVec::new();
314            for (ix, (rc, reg_ty)) in rcs.iter().zip(reg_tys.iter()).enumerate() {
315                let last_slot = last_param && ix == rcs.len() - 1;
316
317                let intreg = *rc == RegClass::Int;
318                let nextreg = if intreg {
319                    match args_or_rets {
320                        ArgsOrRets::Args => get_intreg_for_arg(call_conv, next_gpr, next_param_idx),
321                        ArgsOrRets::Rets => {
322                            get_intreg_for_retval(call_conv, flags, next_gpr, last_slot)
323                        }
324                    }
325                } else {
326                    match args_or_rets {
327                        ArgsOrRets::Args => {
328                            get_fltreg_for_arg(call_conv, next_vreg, next_param_idx)
329                        }
330                        ArgsOrRets::Rets => get_fltreg_for_retval(call_conv, next_vreg, last_slot),
331                    }
332                };
333                next_param_idx += 1;
334                if let Some(reg) = nextreg {
335                    if intreg {
336                        next_gpr += 1;
337                    } else {
338                        next_vreg += 1;
339                    }
340                    slots.push(ABIArgSlot::Reg {
341                        reg: reg.to_real_reg().unwrap(),
342                        ty: *reg_ty,
343                        extension: param.extension,
344                    });
345                } else {
346                    if args_or_rets == ArgsOrRets::Rets && !flags.enable_multi_ret_implicit_sret() {
347                        return Err(crate::CodegenError::Unsupported(
348                            "Too many return values to fit in registers. \
349                            Use a StructReturn argument instead. (#9510)"
350                                .to_owned(),
351                        ));
352                    }
353
354                    let size = reg_ty.bytes();
355                    let size = if call_conv == CallConv::Winch
356                        && args_or_rets == ArgsOrRets::Rets
357                        && !uses_extension
358                    {
359                        size
360                    } else {
361                        let size = core::cmp::max(size, 8);
362
363                        // Align.
364                        debug_assert!(size.is_power_of_two());
365                        next_stack = align_to(next_stack, size);
366                        size
367                    };
368
369                    slots.push(ABIArgSlot::Stack {
370                        offset: next_stack as i64,
371                        ty: *reg_ty,
372                        extension: param.extension,
373                    });
374                    next_stack += size;
375                }
376            }
377
378            args.push(ABIArg::Slots {
379                slots,
380                purpose: param.purpose,
381            });
382        }
383
384        // Fastcall's indirect 128+ bit vector arguments are all located on the
385        // stack, and stack space is reserved after all parameters are passed,
386        // so allocate from the space now.
387        if args_or_rets == ArgsOrRets::Args && is_fastcall {
388            for arg in args.args_mut() {
389                if let ABIArg::ImplicitPtrArg { offset, .. } = arg {
390                    assert_eq!(*offset, 0);
391                    next_stack = align_to(next_stack, 16);
392                    *offset = next_stack as i64;
393                    next_stack += 16;
394                }
395            }
396        }
397        let extra_arg_idx = if let Some(ret_area_ptr) = ret_area_ptr {
398            args.push_non_formal(ret_area_ptr);
399            Some(args.args().len() - 1)
400        } else {
401            None
402        };
403
404        // Winch writes the first result to the highest offset, so we need to iterate through the
405        // args and adjust the offsets down.
406        if call_conv == CallConv::Winch && args_or_rets == ArgsOrRets::Rets {
407            winch::reverse_stack(args, next_stack, uses_extension);
408        }
409
410        next_stack = align_to(next_stack, 16);
411
412        Ok((next_stack, extra_arg_idx))
413    }
414
415    fn gen_load_stack(mem: StackAMode, into_reg: Writable<Reg>, ty: Type) -> Self::I {
416        // For integer-typed values, we always load a full 64 bits (and we always spill a full 64
417        // bits as well -- see `Inst::store()`).
418        let ty = match ty {
419            types::I8 | types::I16 | types::I32 => types::I64,
420            // Stack slots are always at least 8 bytes, so it's fine to load 4 bytes instead of only
421            // two.
422            types::F16 | types::I8X2 => types::F32,
423            _ => ty,
424        };
425        Inst::load(ty, mem, into_reg, ExtKind::None)
426    }
427
428    fn gen_store_stack(mem: StackAMode, from_reg: Reg, ty: Type) -> Self::I {
429        let ty = match ty {
430            // See `gen_load_stack`.
431            types::F16 | types::I8X2 => types::F32,
432            _ => ty,
433        };
434        Inst::store(ty, from_reg, mem)
435    }
436
437    fn gen_move(to_reg: Writable<Reg>, from_reg: Reg, ty: Type) -> Self::I {
438        Inst::gen_move(to_reg, from_reg, ty)
439    }
440
441    /// Generate an integer-extend operation.
442    fn gen_extend(
443        to_reg: Writable<Reg>,
444        from_reg: Reg,
445        is_signed: bool,
446        from_bits: u8,
447        to_bits: u8,
448    ) -> Self::I {
449        let ext_mode = ExtMode::new(from_bits as u16, to_bits as u16)
450            .unwrap_or_else(|| panic!("invalid extension: {from_bits} -> {to_bits}"));
451        if is_signed {
452            Inst::movsx_rm_r(ext_mode, RegMem::reg(from_reg), to_reg)
453        } else {
454            Inst::movzx_rm_r(ext_mode, RegMem::reg(from_reg), to_reg)
455        }
456    }
457
458    fn gen_args(args: Vec<ArgPair>) -> Inst {
459        Inst::Args { args }
460    }
461
462    fn gen_rets(rets: Vec<RetPair>) -> Inst {
463        Inst::Rets { rets }
464    }
465
466    fn gen_add_imm(
467        _call_conv: isa::CallConv,
468        into_reg: Writable<Reg>,
469        from_reg: Reg,
470        imm: u32,
471    ) -> SmallInstVec<Self::I> {
472        let mut ret = SmallVec::new();
473        if from_reg != into_reg.to_reg() {
474            ret.push(Inst::gen_move(into_reg, from_reg, I64));
475        }
476        let imm = i32::try_from(imm).expect("`imm` is too large to fit in a 32-bit immediate");
477        ret.push(Inst::addq_mi(into_reg, imm));
478        ret
479    }
480
481    fn gen_stack_lower_bound_trap(limit_reg: Reg) -> SmallInstVec<Self::I> {
482        smallvec![
483            Inst::External {
484                inst: asm::inst::cmpq_rm::new(Gpr::unwrap_new(limit_reg), Gpr::RSP,).into(),
485            },
486            Inst::TrapIf {
487                // NBE == "> unsigned"; args above are reversed; this tests limit_reg > rsp.
488                cc: CC::NBE,
489                trap_code: TrapCode::STACK_OVERFLOW,
490            },
491        ]
492    }
493
494    fn gen_get_stack_addr(mem: StackAMode, into_reg: Writable<Reg>) -> Self::I {
495        let mem: SyntheticAmode = mem.into();
496        Inst::External {
497            inst: asm::inst::leaq_rm::new(into_reg, mem).into(),
498        }
499    }
500
501    fn get_stacklimit_reg(_call_conv: isa::CallConv) -> Reg {
502        // As per comment on trait definition, we must return a caller-save
503        // register that is not used as an argument here.
504        debug_assert!(!is_callee_save_systemv(
505            regs::r10().to_real_reg().unwrap(),
506            false
507        ));
508        regs::r10()
509    }
510
511    fn gen_load_base_offset(into_reg: Writable<Reg>, base: Reg, offset: i32, ty: Type) -> Self::I {
512        // Only ever used for I64s, F128s and vectors; if that changes, see if
513        // the ExtKind below needs to be changed.
514        assert!(ty == I64 || ty.is_vector() || ty == F128);
515        let mem = Amode::imm_reg(offset, base);
516        Inst::load(ty, mem, into_reg, ExtKind::None)
517    }
518
519    fn gen_store_base_offset(base: Reg, offset: i32, from_reg: Reg, ty: Type) -> Self::I {
520        let ty = match ty {
521            // See `gen_load_stack`.
522            types::F16 | types::I8X2 => types::F32,
523            _ => ty,
524        };
525        let mem = Amode::imm_reg(offset, base);
526        Inst::store(ty, from_reg, mem)
527    }
528
529    fn gen_sp_reg_adjust(amount: i32) -> SmallInstVec<Self::I> {
530        let rsp = Writable::from_reg(regs::rsp());
531        let inst = if amount >= 0 {
532            Inst::addq_mi(rsp, amount)
533        } else {
534            Inst::subq_mi(rsp, -amount)
535        };
536        smallvec![inst]
537    }
538
539    fn gen_prologue_frame_setup(
540        _call_conv: isa::CallConv,
541        flags: &settings::Flags,
542        _isa_flags: &x64_settings::Flags,
543        frame_layout: &FrameLayout,
544    ) -> SmallInstVec<Self::I> {
545        let r_rsp = Gpr::RSP;
546        let r_rbp = Gpr::RBP;
547        let w_rbp = Writable::from_reg(r_rbp);
548        let mut insts = SmallVec::new();
549        // `push %rbp`
550        // RSP before the call will be 0 % 16.  So here, it is 8 % 16.
551        insts.push(Inst::External {
552            inst: asm::inst::pushq_o::new(r_rbp).into(),
553        });
554
555        if flags.unwind_info() {
556            insts.push(Inst::Unwind {
557                inst: UnwindInst::PushFrameRegs {
558                    offset_upward_to_caller_sp: frame_layout.setup_area_size,
559                },
560            });
561        }
562
563        // `mov %rsp, %rbp`
564        // RSP is now 0 % 16
565        insts.push(Inst::External {
566            inst: asm::inst::movq_mr::new(w_rbp, r_rsp).into(),
567        });
568
569        insts
570    }
571
572    fn gen_epilogue_frame_restore(
573        _call_conv: isa::CallConv,
574        _flags: &settings::Flags,
575        _isa_flags: &x64_settings::Flags,
576        _frame_layout: &FrameLayout,
577    ) -> SmallInstVec<Self::I> {
578        let rbp = Gpr::RBP;
579        let rsp = Gpr::RSP;
580
581        let mut insts = SmallVec::new();
582        // `mov %rbp, %rsp`
583        insts.push(Inst::External {
584            inst: asm::inst::movq_mr::new(Writable::from_reg(rsp), rbp).into(),
585        });
586        // `pop %rbp`
587        insts.push(Inst::External {
588            inst: asm::inst::popq_o::new(Writable::from_reg(rbp)).into(),
589        });
590        insts
591    }
592
593    fn gen_return(
594        call_conv: CallConv,
595        _isa_flags: &x64_settings::Flags,
596        frame_layout: &FrameLayout,
597    ) -> SmallInstVec<Self::I> {
598        // Emit return instruction.
599        let stack_bytes_to_pop = if call_conv == CallConv::Tail {
600            frame_layout.tail_args_size
601        } else {
602            0
603        };
604        let inst = if stack_bytes_to_pop == 0 {
605            asm::inst::retq_zo::new().into()
606        } else {
607            let stack_bytes_to_pop = u16::try_from(stack_bytes_to_pop).unwrap();
608            asm::inst::retq_i::new(stack_bytes_to_pop).into()
609        };
610        smallvec![Inst::External { inst }]
611    }
612
613    fn gen_probestack(insts: &mut SmallInstVec<Self::I>, frame_size: u32) {
614        insts.push(Inst::imm(
615            OperandSize::Size32,
616            frame_size as u64,
617            Writable::from_reg(regs::rax()),
618        ));
619        insts.push(Inst::CallKnown {
620            // No need to include arg here: we are post-regalloc
621            // so no constraints will be seen anyway.
622            info: Box::new(CallInfo::empty(
623                ExternalName::LibCall(LibCall::Probestack),
624                CallConv::Probestack,
625            )),
626        });
627    }
628
629    fn gen_inline_probestack(
630        insts: &mut SmallInstVec<Self::I>,
631        call_conv: isa::CallConv,
632        frame_size: u32,
633        guard_size: u32,
634    ) {
635        // Unroll at most n consecutive probes, before falling back to using a loop
636        //
637        // This was number was picked because the loop version is 38 bytes long. We can fit
638        // 4 inline probes in that space, so unroll if its beneficial in terms of code size.
639        const PROBE_MAX_UNROLL: u32 = 4;
640
641        // Calculate how many probes we need to perform. Round down, as we only
642        // need to probe whole guard_size regions we'd otherwise skip over.
643        let probe_count = frame_size / guard_size;
644        if probe_count == 0 {
645            // No probe necessary
646        } else if probe_count <= PROBE_MAX_UNROLL {
647            Self::gen_probestack_unroll(insts, guard_size, probe_count)
648        } else {
649            Self::gen_probestack_loop(insts, call_conv, frame_size, guard_size)
650        }
651    }
652
653    fn gen_clobber_save(
654        _call_conv: isa::CallConv,
655        flags: &settings::Flags,
656        frame_layout: &FrameLayout,
657    ) -> SmallVec<[Self::I; 16]> {
658        let mut insts = SmallVec::new();
659
660        // When a return_call within this function required more stack arguments than we have
661        // present, resize the incoming argument area of the frame to accommodate those arguments.
662        let incoming_args_diff = frame_layout.tail_args_size - frame_layout.incoming_args_size;
663        if incoming_args_diff > 0 {
664            // Decrement the stack pointer to make space for the new arguments.
665            let rsp = Writable::from_reg(regs::rsp());
666            insts.push(Inst::subq_mi(
667                rsp,
668                i32::try_from(incoming_args_diff)
669                    .expect("`incoming_args_diff` is too large to fit in a 32-bit immediate"),
670            ));
671
672            // Make sure to keep the frame pointer and stack pointer in sync at
673            // this point.
674            let rbp = Gpr::RBP;
675            let rsp = Gpr::RSP;
676            insts.push(Inst::External {
677                inst: asm::inst::movq_mr::new(Writable::from_reg(rbp), rsp).into(),
678            });
679
680            let incoming_args_diff = i32::try_from(incoming_args_diff).unwrap();
681
682            // Move the saved frame pointer down by `incoming_args_diff`.
683            let addr = Amode::imm_reg(incoming_args_diff, regs::rsp());
684            let r11 = Writable::from_reg(Gpr::R11);
685            let inst = asm::inst::movq_rm::new(r11, addr).into();
686            insts.push(Inst::External { inst });
687            let inst = asm::inst::movq_mr::new(Amode::imm_reg(0, regs::rsp()), r11.to_reg()).into();
688            insts.push(Inst::External { inst });
689
690            // Move the saved return address down by `incoming_args_diff`.
691            let addr = Amode::imm_reg(incoming_args_diff + 8, regs::rsp());
692            let inst = asm::inst::movq_rm::new(r11, addr).into();
693            insts.push(Inst::External { inst });
694            let inst = asm::inst::movq_mr::new(Amode::imm_reg(8, regs::rsp()), r11.to_reg()).into();
695            insts.push(Inst::External { inst });
696        }
697
698        // We need to factor `incoming_args_diff` into the offset upward here, as we have grown
699        // the argument area -- `setup_area_size` alone will not be the correct offset up to the
700        // original caller's SP.
701        let offset_upward_to_caller_sp = frame_layout.setup_area_size + incoming_args_diff;
702        if flags.unwind_info() && offset_upward_to_caller_sp > 0 {
703            // Emit unwind info: start the frame. The frame (from unwind
704            // consumers' point of view) starts at clobbbers, just below
705            // the FP and return address. Spill slots and stack slots are
706            // part of our actual frame but do not concern the unwinder.
707            insts.push(Inst::Unwind {
708                inst: UnwindInst::DefineNewFrame {
709                    offset_downward_to_clobbers: frame_layout.clobber_size,
710                    offset_upward_to_caller_sp,
711                },
712            });
713        }
714
715        // Adjust the stack pointer downward for clobbers and the function fixed
716        // frame (spillslots, storage slots, and argument area).
717        let stack_size = frame_layout.fixed_frame_storage_size
718            + frame_layout.clobber_size
719            + frame_layout.outgoing_args_size;
720        if stack_size > 0 {
721            let rsp = Writable::from_reg(regs::rsp());
722            let stack_size = i32::try_from(stack_size)
723                .expect("`stack_size` is too large to fit in a 32-bit immediate");
724            insts.push(Inst::subq_mi(rsp, stack_size));
725        }
726
727        // Store each clobbered register in order at offsets from RSP,
728        // placing them above the fixed frame slots.
729        let clobber_offset =
730            frame_layout.fixed_frame_storage_size + frame_layout.outgoing_args_size;
731        let mut cur_offset = 0;
732        for reg in &frame_layout.clobbered_callee_saves {
733            let r_reg = reg.to_reg();
734            let ty = match r_reg.class() {
735                RegClass::Int => types::I64,
736                RegClass::Float => types::I8X16,
737                RegClass::Vector => unreachable!(),
738            };
739
740            // Align to 8 or 16 bytes as required by the storage type of the clobber.
741            cur_offset = align_to(cur_offset, ty.bytes());
742            let off = cur_offset;
743            cur_offset += ty.bytes();
744
745            insts.push(Inst::store(
746                ty,
747                r_reg.into(),
748                Amode::imm_reg(i32::try_from(off + clobber_offset).unwrap(), regs::rsp()),
749            ));
750
751            if flags.unwind_info() {
752                insts.push(Inst::Unwind {
753                    inst: UnwindInst::SaveReg {
754                        clobber_offset: off,
755                        reg: r_reg,
756                    },
757                });
758            }
759        }
760
761        insts
762    }
763
764    fn gen_clobber_restore(
765        _call_conv: isa::CallConv,
766        _flags: &settings::Flags,
767        frame_layout: &FrameLayout,
768    ) -> SmallVec<[Self::I; 16]> {
769        let mut insts = SmallVec::new();
770
771        // Restore regs by loading from offsets of RSP. We compute the offset from
772        // the same base as above in clobber_save, as RSP won't change between the
773        // prologue and epilogue.
774        let mut cur_offset =
775            frame_layout.fixed_frame_storage_size + frame_layout.outgoing_args_size;
776        for reg in &frame_layout.clobbered_callee_saves {
777            let rreg = reg.to_reg();
778            let ty = match rreg.class() {
779                RegClass::Int => types::I64,
780                RegClass::Float => types::I8X16,
781                RegClass::Vector => unreachable!(),
782            };
783
784            // Align to 8 or 16 bytes as required by the storage type of the clobber.
785            cur_offset = align_to(cur_offset, ty.bytes());
786
787            insts.push(Inst::load(
788                ty,
789                Amode::imm_reg(cur_offset.try_into().unwrap(), regs::rsp()),
790                Writable::from_reg(rreg.into()),
791                ExtKind::None,
792            ));
793
794            cur_offset += ty.bytes();
795        }
796
797        let stack_size = frame_layout.fixed_frame_storage_size
798            + frame_layout.clobber_size
799            + frame_layout.outgoing_args_size;
800
801        // Adjust RSP back upward.
802        if stack_size > 0 {
803            let rsp = Writable::from_reg(regs::rsp());
804            let stack_size = i32::try_from(stack_size)
805                .expect("`stack_size` is too large to fit in a 32-bit immediate");
806            insts.push(Inst::addq_mi(rsp, stack_size));
807        }
808
809        insts
810    }
811
812    fn gen_memcpy<F: FnMut(Type) -> Writable<Reg>>(
813        call_conv: isa::CallConv,
814        dst: Reg,
815        src: Reg,
816        size: usize,
817        mut alloc_tmp: F,
818    ) -> SmallVec<[Self::I; 8]> {
819        let mut insts = SmallVec::new();
820        let arg0 = get_intreg_for_arg(call_conv, 0, 0).unwrap();
821        let arg1 = get_intreg_for_arg(call_conv, 1, 1).unwrap();
822        let arg2 = get_intreg_for_arg(call_conv, 2, 2).unwrap();
823        let temp = alloc_tmp(Self::word_type());
824        let temp2 = alloc_tmp(Self::word_type());
825        insts.push(Inst::imm(OperandSize::Size64, size as u64, temp));
826        // We use an indirect call and a full LoadExtName because we do not have
827        // information about the libcall `RelocDistance` here, so we
828        // conservatively use the more flexible calling sequence.
829        insts.push(Inst::LoadExtName {
830            dst: temp2.map(Gpr::unwrap_new),
831            name: Box::new(ExternalName::LibCall(LibCall::Memcpy)),
832            offset: 0,
833            distance: RelocDistance::Far,
834        });
835        let callee_pop_size = 0;
836        insts.push(Inst::call_unknown(Box::new(CallInfo {
837            dest: RegMem::reg(temp2.to_reg()),
838            uses: smallvec![
839                CallArgPair {
840                    vreg: dst,
841                    preg: arg0
842                },
843                CallArgPair {
844                    vreg: src,
845                    preg: arg1
846                },
847                CallArgPair {
848                    vreg: temp.to_reg(),
849                    preg: arg2
850                },
851            ],
852            defs: smallvec![],
853            clobbers: Self::get_regs_clobbered_by_call(call_conv, false),
854            callee_pop_size,
855            callee_conv: call_conv,
856            caller_conv: call_conv,
857            try_call_info: None,
858            patchable: false,
859        })));
860        insts
861    }
862
863    fn get_number_of_spillslots_for_value(
864        rc: RegClass,
865        vector_scale: u32,
866        _isa_flags: &Self::F,
867    ) -> u32 {
868        // We allocate in terms of 8-byte slots.
869        match rc {
870            RegClass::Int => 1,
871            RegClass::Float => vector_scale / 8,
872            RegClass::Vector => unreachable!(),
873        }
874    }
875
876    fn get_machine_env(flags: &settings::Flags, _call_conv: isa::CallConv) -> &MachineEnv {
877        if flags.enable_pinned_reg() {
878            static MACHINE_ENV: OnceLock<MachineEnv> = OnceLock::new();
879            MACHINE_ENV.get_or_init(|| create_reg_env_systemv(true))
880        } else {
881            static MACHINE_ENV: OnceLock<MachineEnv> = OnceLock::new();
882            MACHINE_ENV.get_or_init(|| create_reg_env_systemv(false))
883        }
884    }
885
886    fn get_regs_clobbered_by_call(
887        call_conv_of_callee: isa::CallConv,
888        is_exception: bool,
889    ) -> PRegSet {
890        match (call_conv_of_callee, is_exception) {
891            (isa::CallConv::Tail, true) => ALL_CLOBBERS,
892            // Note that "PreserveAll" actually preserves nothing at
893            // the callsite if used for a `try_call`, because the
894            // unwinder ABI for `try_call`s is still "no clobbered
895            // register restores" for this ABI (so as to work with
896            // Wasmtime).
897            (isa::CallConv::PreserveAll, true) => ALL_CLOBBERS,
898            (isa::CallConv::Winch, _) => ALL_CLOBBERS,
899            (isa::CallConv::SystemV, _) => SYSV_CLOBBERS,
900            (isa::CallConv::WindowsFastcall, false) => WINDOWS_CLOBBERS,
901            (isa::CallConv::PreserveAll, _) => NO_CLOBBERS,
902            (_, false) => SYSV_CLOBBERS,
903            (call_conv, true) => panic!("unimplemented clobbers for exn abi of {call_conv:?}"),
904        }
905    }
906
907    fn get_ext_mode(
908        _call_conv: isa::CallConv,
909        specified: ir::ArgumentExtension,
910    ) -> ir::ArgumentExtension {
911        specified
912    }
913
914    fn compute_frame_layout(
915        call_conv: CallConv,
916        flags: &settings::Flags,
917        _sig: &Signature,
918        regs: &[Writable<RealReg>],
919        function_calls: FunctionCalls,
920        incoming_args_size: u32,
921        tail_args_size: u32,
922        stackslots_size: u32,
923        fixed_frame_storage_size: u32,
924        outgoing_args_size: u32,
925    ) -> FrameLayout {
926        debug_assert!(tail_args_size >= incoming_args_size);
927
928        let mut regs: Vec<Writable<RealReg>> = match call_conv {
929            // The `winch` calling convention doesn't have any callee-save
930            // registers.
931            CallConv::Winch => vec![],
932            CallConv::Fast | CallConv::SystemV | CallConv::Tail => regs
933                .iter()
934                .cloned()
935                .filter(|r| is_callee_save_systemv(r.to_reg(), flags.enable_pinned_reg()))
936                .collect(),
937            CallConv::WindowsFastcall => regs
938                .iter()
939                .cloned()
940                .filter(|r| is_callee_save_fastcall(r.to_reg(), flags.enable_pinned_reg()))
941                .collect(),
942            // The `preserve_all` calling convention makes every reg a callee-save reg.
943            CallConv::PreserveAll => regs.iter().cloned().collect(),
944            CallConv::Probestack => todo!("probestack?"),
945            CallConv::AppleAarch64 => unreachable!(),
946        };
947        // Sort registers for deterministic code output. We can do an unstable sort because the
948        // registers will be unique (there are no dups).
949        regs.sort_unstable();
950
951        // Compute clobber size.
952        let clobber_size = compute_clobber_size(&regs);
953
954        // Compute setup area size.
955        let setup_area_size = 16; // RBP, return address
956
957        // Return FrameLayout structure.
958        FrameLayout {
959            word_bytes: 8,
960            incoming_args_size,
961            tail_args_size: align_to(tail_args_size, 16),
962            setup_area_size,
963            clobber_size,
964            fixed_frame_storage_size,
965            stackslots_size,
966            outgoing_args_size,
967            clobbered_callee_saves: regs,
968            function_calls,
969        }
970    }
971
972    fn retval_temp_reg(_call_conv_of_callee: isa::CallConv) -> Writable<Reg> {
973        // Use r11 as a temp: clobbered anyway, and
974        // not otherwise used as a return value in any of our
975        // supported calling conventions.
976        Writable::from_reg(regs::r11())
977    }
978
979    fn exception_payload_regs(call_conv: isa::CallConv) -> &'static [Reg] {
980        const PAYLOAD_REGS: &'static [Reg] = &[regs::rax(), regs::rdx()];
981        match call_conv {
982            isa::CallConv::SystemV | isa::CallConv::Tail | isa::CallConv::PreserveAll => {
983                PAYLOAD_REGS
984            }
985            _ => &[],
986        }
987    }
988}
989
990impl From<StackAMode> for SyntheticAmode {
991    fn from(amode: StackAMode) -> Self {
992        // We enforce a 128 MB stack-frame size limit above, so these
993        // `expect()`s should never fail.
994        match amode {
995            StackAMode::IncomingArg(off, stack_args_size) => {
996                let offset = u32::try_from(off).expect(
997                    "Offset in IncomingArg is greater than 4GB; should hit impl limit first",
998                );
999                SyntheticAmode::IncomingArg {
1000                    offset: stack_args_size - offset,
1001                }
1002            }
1003            StackAMode::Slot(off) => {
1004                let off = i32::try_from(off)
1005                    .expect("Offset in Slot is greater than 2GB; should hit impl limit first");
1006                SyntheticAmode::slot_offset(off)
1007            }
1008            StackAMode::OutgoingArg(off) => {
1009                let off = i32::try_from(off).expect(
1010                    "Offset in OutgoingArg is greater than 2GB; should hit impl limit first",
1011                );
1012                SyntheticAmode::Real(Amode::ImmReg {
1013                    simm32: off,
1014                    base: regs::rsp(),
1015                    flags: MemFlags::trusted(),
1016                })
1017            }
1018        }
1019    }
1020}
1021
1022fn get_intreg_for_arg(call_conv: CallConv, idx: usize, arg_idx: usize) -> Option<Reg> {
1023    let is_fastcall = call_conv == CallConv::WindowsFastcall;
1024
1025    // Fastcall counts by absolute argument number; SysV counts by argument of
1026    // this (integer) class.
1027    let i = if is_fastcall { arg_idx } else { idx };
1028    match (i, is_fastcall) {
1029        (0, false) => Some(regs::rdi()),
1030        (1, false) => Some(regs::rsi()),
1031        (2, false) => Some(regs::rdx()),
1032        (3, false) => Some(regs::rcx()),
1033        (4, false) => Some(regs::r8()),
1034        (5, false) => Some(regs::r9()),
1035        (0, true) => Some(regs::rcx()),
1036        (1, true) => Some(regs::rdx()),
1037        (2, true) => Some(regs::r8()),
1038        (3, true) => Some(regs::r9()),
1039        _ => None,
1040    }
1041}
1042
1043fn get_fltreg_for_arg(call_conv: CallConv, idx: usize, arg_idx: usize) -> Option<Reg> {
1044    let is_fastcall = call_conv == CallConv::WindowsFastcall;
1045
1046    // Fastcall counts by absolute argument number; SysV counts by argument of
1047    // this (floating-point) class.
1048    let i = if is_fastcall { arg_idx } else { idx };
1049    match (i, is_fastcall) {
1050        (0, false) => Some(regs::xmm0()),
1051        (1, false) => Some(regs::xmm1()),
1052        (2, false) => Some(regs::xmm2()),
1053        (3, false) => Some(regs::xmm3()),
1054        (4, false) => Some(regs::xmm4()),
1055        (5, false) => Some(regs::xmm5()),
1056        (6, false) => Some(regs::xmm6()),
1057        (7, false) => Some(regs::xmm7()),
1058        (0, true) => Some(regs::xmm0()),
1059        (1, true) => Some(regs::xmm1()),
1060        (2, true) => Some(regs::xmm2()),
1061        (3, true) => Some(regs::xmm3()),
1062        _ => None,
1063    }
1064}
1065
1066fn get_intreg_for_retval(
1067    call_conv: CallConv,
1068    flags: &settings::Flags,
1069    intreg_idx: usize,
1070    is_last: bool,
1071) -> Option<Reg> {
1072    match call_conv {
1073        CallConv::Tail => match intreg_idx {
1074            0 => Some(regs::rax()),
1075            1 => Some(regs::rcx()),
1076            2 => Some(regs::rdx()),
1077            3 => Some(regs::rsi()),
1078            4 => Some(regs::rdi()),
1079            5 => Some(regs::r8()),
1080            6 => Some(regs::r9()),
1081            7 => Some(regs::r10()),
1082            // NB: `r11` is reserved as a scratch register that is
1083            // also part of the clobber set.
1084            // NB: `r15` is reserved as a scratch register.
1085            _ => None,
1086        },
1087        CallConv::Fast | CallConv::SystemV | CallConv::PreserveAll => match intreg_idx {
1088            0 => Some(regs::rax()),
1089            1 => Some(regs::rdx()),
1090            2 if flags.enable_llvm_abi_extensions() => Some(regs::rcx()),
1091            _ => None,
1092        },
1093        CallConv::WindowsFastcall => match intreg_idx {
1094            0 => Some(regs::rax()),
1095            1 => Some(regs::rdx()), // The Rust ABI for i128s needs this.
1096            _ => None,
1097        },
1098
1099        CallConv::Winch => is_last.then(|| regs::rax()),
1100        CallConv::Probestack => todo!(),
1101        CallConv::AppleAarch64 => unreachable!(),
1102    }
1103}
1104
1105fn get_fltreg_for_retval(call_conv: CallConv, fltreg_idx: usize, is_last: bool) -> Option<Reg> {
1106    match call_conv {
1107        CallConv::Tail => match fltreg_idx {
1108            0 => Some(regs::xmm0()),
1109            1 => Some(regs::xmm1()),
1110            2 => Some(regs::xmm2()),
1111            3 => Some(regs::xmm3()),
1112            4 => Some(regs::xmm4()),
1113            5 => Some(regs::xmm5()),
1114            6 => Some(regs::xmm6()),
1115            7 => Some(regs::xmm7()),
1116            _ => None,
1117        },
1118        CallConv::Fast | CallConv::SystemV | CallConv::PreserveAll => match fltreg_idx {
1119            0 => Some(regs::xmm0()),
1120            1 => Some(regs::xmm1()),
1121            _ => None,
1122        },
1123        CallConv::WindowsFastcall => match fltreg_idx {
1124            0 => Some(regs::xmm0()),
1125            _ => None,
1126        },
1127        CallConv::Winch => is_last.then(|| regs::xmm0()),
1128        CallConv::Probestack => todo!(),
1129        CallConv::AppleAarch64 => unreachable!(),
1130    }
1131}
1132
1133fn is_callee_save_systemv(r: RealReg, enable_pinned_reg: bool) -> bool {
1134    use asm::gpr::enc::*;
1135
1136    match r.class() {
1137        RegClass::Int => match r.hw_enc() {
1138            RBX | RBP | R12 | R13 | R14 => true,
1139            // R15 is the pinned register; if we're using it that way,
1140            // it is effectively globally-allocated, and is not
1141            // callee-saved.
1142            R15 => !enable_pinned_reg,
1143            _ => false,
1144        },
1145        RegClass::Float => false,
1146        RegClass::Vector => unreachable!(),
1147    }
1148}
1149
1150fn is_callee_save_fastcall(r: RealReg, enable_pinned_reg: bool) -> bool {
1151    use asm::gpr::enc::*;
1152    use asm::xmm::enc::*;
1153
1154    match r.class() {
1155        RegClass::Int => match r.hw_enc() {
1156            RBX | RBP | RSI | RDI | R12 | R13 | R14 => true,
1157            // See above for SysV: we must treat the pinned reg specially.
1158            R15 => !enable_pinned_reg,
1159            _ => false,
1160        },
1161        RegClass::Float => match r.hw_enc() {
1162            XMM6 | XMM7 | XMM8 | XMM9 | XMM10 | XMM11 | XMM12 | XMM13 | XMM14 | XMM15 => true,
1163            _ => false,
1164        },
1165        RegClass::Vector => unreachable!(),
1166    }
1167}
1168
1169fn compute_clobber_size(clobbers: &[Writable<RealReg>]) -> u32 {
1170    let mut clobbered_size = 0;
1171    for reg in clobbers {
1172        match reg.to_reg().class() {
1173            RegClass::Int => {
1174                clobbered_size += 8;
1175            }
1176            RegClass::Float => {
1177                clobbered_size = align_to(clobbered_size, 16);
1178                clobbered_size += 16;
1179            }
1180            RegClass::Vector => unreachable!(),
1181        }
1182    }
1183    align_to(clobbered_size, 16)
1184}
1185
1186const WINDOWS_CLOBBERS: PRegSet = windows_clobbers();
1187const SYSV_CLOBBERS: PRegSet = sysv_clobbers();
1188pub(crate) const ALL_CLOBBERS: PRegSet = all_clobbers();
1189const NO_CLOBBERS: PRegSet = PRegSet::empty();
1190
1191const fn windows_clobbers() -> PRegSet {
1192    use asm::gpr::enc::*;
1193    use asm::xmm::enc::*;
1194
1195    PRegSet::empty()
1196        .with(regs::gpr_preg(RAX))
1197        .with(regs::gpr_preg(RCX))
1198        .with(regs::gpr_preg(RDX))
1199        .with(regs::gpr_preg(R8))
1200        .with(regs::gpr_preg(R9))
1201        .with(regs::gpr_preg(R10))
1202        .with(regs::gpr_preg(R11))
1203        .with(regs::fpr_preg(XMM0))
1204        .with(regs::fpr_preg(XMM1))
1205        .with(regs::fpr_preg(XMM2))
1206        .with(regs::fpr_preg(XMM3))
1207        .with(regs::fpr_preg(XMM4))
1208        .with(regs::fpr_preg(XMM5))
1209}
1210
1211const fn sysv_clobbers() -> PRegSet {
1212    use asm::gpr::enc::*;
1213    use asm::xmm::enc::*;
1214
1215    PRegSet::empty()
1216        .with(regs::gpr_preg(RAX))
1217        .with(regs::gpr_preg(RCX))
1218        .with(regs::gpr_preg(RDX))
1219        .with(regs::gpr_preg(RSI))
1220        .with(regs::gpr_preg(RDI))
1221        .with(regs::gpr_preg(R8))
1222        .with(regs::gpr_preg(R9))
1223        .with(regs::gpr_preg(R10))
1224        .with(regs::gpr_preg(R11))
1225        .with(regs::fpr_preg(XMM0))
1226        .with(regs::fpr_preg(XMM1))
1227        .with(regs::fpr_preg(XMM2))
1228        .with(regs::fpr_preg(XMM3))
1229        .with(regs::fpr_preg(XMM4))
1230        .with(regs::fpr_preg(XMM5))
1231        .with(regs::fpr_preg(XMM6))
1232        .with(regs::fpr_preg(XMM7))
1233        .with(regs::fpr_preg(XMM8))
1234        .with(regs::fpr_preg(XMM9))
1235        .with(regs::fpr_preg(XMM10))
1236        .with(regs::fpr_preg(XMM11))
1237        .with(regs::fpr_preg(XMM12))
1238        .with(regs::fpr_preg(XMM13))
1239        .with(regs::fpr_preg(XMM14))
1240        .with(regs::fpr_preg(XMM15))
1241}
1242
1243/// For calling conventions that clobber all registers.
1244const fn all_clobbers() -> PRegSet {
1245    use asm::gpr::enc::*;
1246    use asm::xmm::enc::*;
1247
1248    PRegSet::empty()
1249        .with(regs::gpr_preg(RAX))
1250        .with(regs::gpr_preg(RCX))
1251        .with(regs::gpr_preg(RDX))
1252        .with(regs::gpr_preg(RBX))
1253        .with(regs::gpr_preg(RSI))
1254        .with(regs::gpr_preg(RDI))
1255        .with(regs::gpr_preg(R8))
1256        .with(regs::gpr_preg(R9))
1257        .with(regs::gpr_preg(R10))
1258        .with(regs::gpr_preg(R11))
1259        .with(regs::gpr_preg(R12))
1260        .with(regs::gpr_preg(R13))
1261        .with(regs::gpr_preg(R14))
1262        .with(regs::gpr_preg(R15))
1263        .with(regs::fpr_preg(XMM0))
1264        .with(regs::fpr_preg(XMM1))
1265        .with(regs::fpr_preg(XMM2))
1266        .with(regs::fpr_preg(XMM3))
1267        .with(regs::fpr_preg(XMM4))
1268        .with(regs::fpr_preg(XMM5))
1269        .with(regs::fpr_preg(XMM6))
1270        .with(regs::fpr_preg(XMM7))
1271        .with(regs::fpr_preg(XMM8))
1272        .with(regs::fpr_preg(XMM9))
1273        .with(regs::fpr_preg(XMM10))
1274        .with(regs::fpr_preg(XMM11))
1275        .with(regs::fpr_preg(XMM12))
1276        .with(regs::fpr_preg(XMM13))
1277        .with(regs::fpr_preg(XMM14))
1278        .with(regs::fpr_preg(XMM15))
1279}
1280
1281fn create_reg_env_systemv(enable_pinned_reg: bool) -> MachineEnv {
1282    fn preg(r: Reg) -> PReg {
1283        r.to_real_reg().unwrap().into()
1284    }
1285
1286    let mut env = MachineEnv {
1287        preferred_regs_by_class: [
1288            // Preferred GPRs: caller-saved in the SysV ABI.
1289            vec![
1290                preg(regs::rsi()),
1291                preg(regs::rdi()),
1292                preg(regs::rax()),
1293                preg(regs::rcx()),
1294                preg(regs::rdx()),
1295                preg(regs::r8()),
1296                preg(regs::r9()),
1297                preg(regs::r10()),
1298                preg(regs::r11()),
1299            ],
1300            // Preferred XMMs: the first 8, which can have smaller encodings
1301            // with AVX instructions.
1302            vec![
1303                preg(regs::xmm0()),
1304                preg(regs::xmm1()),
1305                preg(regs::xmm2()),
1306                preg(regs::xmm3()),
1307                preg(regs::xmm4()),
1308                preg(regs::xmm5()),
1309                preg(regs::xmm6()),
1310                preg(regs::xmm7()),
1311            ],
1312            // The Vector Regclass is unused
1313            vec![],
1314        ],
1315        non_preferred_regs_by_class: [
1316            // Non-preferred GPRs: callee-saved in the SysV ABI.
1317            vec![
1318                preg(regs::rbx()),
1319                preg(regs::r12()),
1320                preg(regs::r13()),
1321                preg(regs::r14()),
1322            ],
1323            // Non-preferred XMMs: the last 8 registers, which can have larger
1324            // encodings with AVX instructions.
1325            vec![
1326                preg(regs::xmm8()),
1327                preg(regs::xmm9()),
1328                preg(regs::xmm10()),
1329                preg(regs::xmm11()),
1330                preg(regs::xmm12()),
1331                preg(regs::xmm13()),
1332                preg(regs::xmm14()),
1333                preg(regs::xmm15()),
1334            ],
1335            // The Vector Regclass is unused
1336            vec![],
1337        ],
1338        fixed_stack_slots: vec![],
1339        scratch_by_class: [None, None, None],
1340    };
1341
1342    debug_assert_eq!(regs::r15(), regs::pinned_reg());
1343    if !enable_pinned_reg {
1344        env.non_preferred_regs_by_class[0].push(preg(regs::r15()));
1345    }
1346
1347    env
1348}