cranelift_codegen/isa/x64/
abi.rs

1//! Implementation of the standard x64 ABI.
2
3use crate::CodegenResult;
4use crate::ir::{self, LibCall, MemFlags, Signature, TrapCode, types};
5use crate::ir::{ExternalName, types::*};
6use crate::isa;
7use crate::isa::winch;
8use crate::isa::{CallConv, unwind::UnwindInst, x64::inst::*, x64::settings as x64_settings};
9use crate::machinst::abi::*;
10use crate::machinst::*;
11use crate::settings;
12use alloc::boxed::Box;
13use alloc::vec::Vec;
14use args::*;
15use cranelift_assembler_x64 as asm;
16use regalloc2::{MachineEnv, PReg, PRegSet};
17use smallvec::{SmallVec, smallvec};
18use std::borrow::ToOwned;
19use std::sync::OnceLock;
20
21/// Support for the x64 ABI from the callee side (within a function body).
22pub(crate) type X64Callee = Callee<X64ABIMachineSpec>;
23
24/// Implementation of ABI primitives for x64.
25pub struct X64ABIMachineSpec;
26
27impl X64ABIMachineSpec {
28    fn gen_probestack_unroll(insts: &mut SmallInstVec<Inst>, guard_size: u32, probe_count: u32) {
29        insts.reserve(probe_count as usize);
30        for _ in 0..probe_count {
31            // "Allocate" stack space for the probe by decrementing the stack pointer before
32            // the write. This is required to make valgrind happy.
33            // See: https://github.com/bytecodealliance/wasmtime/issues/7454
34            insts.extend(Self::gen_sp_reg_adjust(-(guard_size as i32)));
35
36            // TODO: It would be nice if we could store the imm 0, but we don't have insts for those
37            // so store the stack pointer. Any register will do, since the stack is undefined at this point
38            insts.push(Inst::store(
39                I32,
40                regs::rsp(),
41                Amode::imm_reg(0, regs::rsp()),
42            ));
43        }
44
45        // Restore the stack pointer to its original value
46        insts.extend(Self::gen_sp_reg_adjust((guard_size * probe_count) as i32));
47    }
48
49    fn gen_probestack_loop(
50        insts: &mut SmallInstVec<Inst>,
51        _call_conv: isa::CallConv,
52        frame_size: u32,
53        guard_size: u32,
54    ) {
55        // We have to use a caller-saved register since clobbering only
56        // happens after stack probing.
57        // `r11` is caller saved on both Fastcall and SystemV, and not used
58        // for argument passing, so it's pretty much free. It is also not
59        // used by the stacklimit mechanism.
60        let tmp = regs::r11();
61        debug_assert!({
62            let real_reg = tmp.to_real_reg().unwrap();
63            !is_callee_save_systemv(real_reg, false) && !is_callee_save_fastcall(real_reg, false)
64        });
65
66        insts.push(Inst::StackProbeLoop {
67            tmp: Writable::from_reg(tmp),
68            frame_size,
69            guard_size,
70        });
71    }
72}
73
74impl IsaFlags for x64_settings::Flags {}
75
76impl ABIMachineSpec for X64ABIMachineSpec {
77    type I = Inst;
78
79    type F = x64_settings::Flags;
80
81    /// This is the limit for the size of argument and return-value areas on the
82    /// stack. We place a reasonable limit here to avoid integer overflow issues
83    /// with 32-bit arithmetic: for now, 128 MB.
84    const STACK_ARG_RET_SIZE_LIMIT: u32 = 128 * 1024 * 1024;
85
86    fn word_bits() -> u32 {
87        64
88    }
89
90    /// Return required stack alignment in bytes.
91    fn stack_align(_call_conv: isa::CallConv) -> u32 {
92        16
93    }
94
95    fn compute_arg_locs(
96        call_conv: isa::CallConv,
97        flags: &settings::Flags,
98        params: &[ir::AbiParam],
99        args_or_rets: ArgsOrRets,
100        add_ret_area_ptr: bool,
101        mut args: ArgsAccumulator,
102    ) -> CodegenResult<(u32, Option<usize>)> {
103        let is_fastcall = call_conv == CallConv::WindowsFastcall;
104        let is_tail = call_conv == CallConv::Tail;
105
106        let mut next_gpr = 0;
107        let mut next_vreg = 0;
108        let mut next_stack: u32 = 0;
109        let mut next_param_idx = 0; // Fastcall cares about overall param index
110
111        if args_or_rets == ArgsOrRets::Args && is_fastcall {
112            // Fastcall always reserves 32 bytes of shadow space corresponding to
113            // the four initial in-arg parameters.
114            //
115            // (See:
116            // https://learn.microsoft.com/en-us/cpp/build/x64-calling-convention?view=msvc-170)
117            next_stack = 32;
118        }
119
120        let ret_area_ptr = if add_ret_area_ptr {
121            debug_assert_eq!(args_or_rets, ArgsOrRets::Args);
122            next_gpr += 1;
123            next_param_idx += 1;
124            // In the SystemV and WindowsFastcall ABIs, the return area pointer is the first
125            // argument. For the Tail and Winch ABIs we do the same for simplicity sake.
126            Some(ABIArg::reg(
127                get_intreg_for_arg(call_conv, 0, 0)
128                    .unwrap()
129                    .to_real_reg()
130                    .unwrap(),
131                types::I64,
132                ir::ArgumentExtension::None,
133                ir::ArgumentPurpose::Normal,
134            ))
135        } else {
136            None
137        };
138
139        // If any param uses extension, the winch calling convention will not pack its results
140        // on the stack and will instead align them to 8-byte boundaries the same way that all the
141        // other calling conventions do. This isn't consistent with Winch itself, but is fine as
142        // Winch only uses this calling convention via trampolines, and those trampolines don't add
143        // extension annotations. Additionally, handling extension attributes this way allows clif
144        // functions that use them with the Winch calling convention to interact successfully with
145        // testing infrastructure.
146        // The results are also not packed if any of the types are `f16`. This is to simplify the
147        // implementation of `Inst::load`/`Inst::store` (which would otherwise require multiple
148        // instructions), and doesn't affect Winch itself as Winch doesn't support `f16` at all.
149        let uses_extension = params.iter().any(|p| {
150            p.extension != ir::ArgumentExtension::None
151                || p.value_type == types::F16
152                || p.value_type == types::I8X2
153        });
154
155        for (ix, param) in params.iter().enumerate() {
156            let last_param = ix == params.len() - 1;
157
158            if let ir::ArgumentPurpose::StructArgument(size) = param.purpose {
159                let offset = next_stack as i64;
160                let size = size;
161                assert!(size % 8 == 0, "StructArgument size is not properly aligned");
162                next_stack += size;
163                args.push(ABIArg::StructArg {
164                    offset,
165                    size: size as u64,
166                    purpose: param.purpose,
167                });
168                continue;
169            }
170
171            // Find regclass(es) of the register(s) used to store a value of this type.
172            let (rcs, reg_tys) = Inst::rc_for_type(param.value_type)?;
173
174            // Now assign ABIArgSlots for each register-sized part.
175            //
176            // Note that the handling of `i128` values is unique here:
177            //
178            // - If `enable_llvm_abi_extensions` is set in the flags, each
179            //   `i128` is split into two `i64`s and assigned exactly as if it
180            //   were two consecutive 64-bit args, except that if one of the
181            //   two halves is forced onto the stack, the other half is too.
182            //   This is consistent with LLVM's behavior, and is needed for
183            //   some uses of Cranelift (e.g., the rustc backend).
184            //
185            // - Otherwise, if the calling convention is Tail, we behave as in
186            //   the previous case, even if `enable_llvm_abi_extensions` is not
187            //   set in the flags: This is a custom calling convention defined
188            //   by Cranelift, LLVM doesn't know about it.
189            //
190            // - Otherwise, both SysV and Fastcall specify behavior (use of
191            //   vector register, a register pair, or passing by reference
192            //   depending on the case), but for simplicity, we will just panic if
193            //   an i128 type appears in a signature and the LLVM extensions flag
194            //   is not set.
195            //
196            // For examples of how rustc compiles i128 args and return values on
197            // both SysV and Fastcall platforms, see:
198            // https://godbolt.org/z/PhG3ob
199
200            if param.value_type.bits() > 64
201                && !(param.value_type.is_vector() || param.value_type.is_float())
202                && !flags.enable_llvm_abi_extensions()
203                && !is_tail
204            {
205                panic!(
206                    "i128 args/return values not supported unless LLVM ABI extensions are enabled"
207                );
208            }
209            // As MSVC doesn't support f16/f128 there is no standard way to pass/return them with
210            // the Windows ABI. LLVM passes/returns them in XMM registers.
211            if matches!(param.value_type, types::F16 | types::F128)
212                && is_fastcall
213                && !flags.enable_llvm_abi_extensions()
214            {
215                panic!(
216                    "f16/f128 args/return values not supported for windows_fastcall unless LLVM ABI extensions are enabled"
217                );
218            }
219
220            // Windows fastcall dictates that `__m128i` and `f128` parameters to
221            // a function are passed indirectly as pointers, so handle that as a
222            // special case before the loop below.
223            if (param.value_type.is_vector() || param.value_type.is_float())
224                && param.value_type.bits() >= 128
225                && args_or_rets == ArgsOrRets::Args
226                && is_fastcall
227            {
228                let pointer = match get_intreg_for_arg(call_conv, next_gpr, next_param_idx) {
229                    Some(reg) => {
230                        next_gpr += 1;
231                        ABIArgSlot::Reg {
232                            reg: reg.to_real_reg().unwrap(),
233                            ty: ir::types::I64,
234                            extension: ir::ArgumentExtension::None,
235                        }
236                    }
237
238                    None => {
239                        next_stack = align_to(next_stack, 8) + 8;
240                        ABIArgSlot::Stack {
241                            offset: (next_stack - 8) as i64,
242                            ty: ir::types::I64,
243                            extension: param.extension,
244                        }
245                    }
246                };
247                next_param_idx += 1;
248                args.push(ABIArg::ImplicitPtrArg {
249                    // NB: this is filled in after this loop
250                    offset: 0,
251                    pointer,
252                    ty: param.value_type,
253                    purpose: param.purpose,
254                });
255                continue;
256            }
257
258            // SystemV dictates that 128bit int parameters are always either
259            // passed in two registers or on the stack, so handle that as a
260            // special case before the loop below.
261            if param.value_type == types::I128
262                && args_or_rets == ArgsOrRets::Args
263                && call_conv == CallConv::SystemV
264            {
265                let mut slots = ABIArgSlotVec::new();
266                match (
267                    get_intreg_for_arg(CallConv::SystemV, next_gpr, next_param_idx),
268                    get_intreg_for_arg(CallConv::SystemV, next_gpr + 1, next_param_idx + 1),
269                ) {
270                    (Some(reg1), Some(reg2)) => {
271                        slots.push(ABIArgSlot::Reg {
272                            reg: reg1.to_real_reg().unwrap(),
273                            ty: ir::types::I64,
274                            extension: ir::ArgumentExtension::None,
275                        });
276                        slots.push(ABIArgSlot::Reg {
277                            reg: reg2.to_real_reg().unwrap(),
278                            ty: ir::types::I64,
279                            extension: ir::ArgumentExtension::None,
280                        });
281                    }
282                    _ => {
283                        let size = 16;
284
285                        // Align.
286                        next_stack = align_to(next_stack, size);
287
288                        slots.push(ABIArgSlot::Stack {
289                            offset: next_stack as i64,
290                            ty: ir::types::I64,
291                            extension: param.extension,
292                        });
293                        slots.push(ABIArgSlot::Stack {
294                            offset: next_stack as i64 + 8,
295                            ty: ir::types::I64,
296                            extension: param.extension,
297                        });
298                        next_stack += size;
299                    }
300                };
301                // Unconditionally increment next_gpr even when storing the
302                // argument on the stack to prevent reusing a possibly
303                // remaining register for the next argument.
304                next_gpr += 2;
305                next_param_idx += 2;
306
307                args.push(ABIArg::Slots {
308                    slots,
309                    purpose: param.purpose,
310                });
311                continue;
312            }
313
314            let mut slots = ABIArgSlotVec::new();
315            for (ix, (rc, reg_ty)) in rcs.iter().zip(reg_tys.iter()).enumerate() {
316                let last_slot = last_param && ix == rcs.len() - 1;
317
318                let intreg = *rc == RegClass::Int;
319                let nextreg = if intreg {
320                    match args_or_rets {
321                        ArgsOrRets::Args => get_intreg_for_arg(call_conv, next_gpr, next_param_idx),
322                        ArgsOrRets::Rets => {
323                            get_intreg_for_retval(call_conv, flags, next_gpr, last_slot)
324                        }
325                    }
326                } else {
327                    match args_or_rets {
328                        ArgsOrRets::Args => {
329                            get_fltreg_for_arg(call_conv, next_vreg, next_param_idx)
330                        }
331                        ArgsOrRets::Rets => get_fltreg_for_retval(call_conv, next_vreg, last_slot),
332                    }
333                };
334                next_param_idx += 1;
335                if let Some(reg) = nextreg {
336                    if intreg {
337                        next_gpr += 1;
338                    } else {
339                        next_vreg += 1;
340                    }
341                    slots.push(ABIArgSlot::Reg {
342                        reg: reg.to_real_reg().unwrap(),
343                        ty: *reg_ty,
344                        extension: param.extension,
345                    });
346                } else {
347                    if args_or_rets == ArgsOrRets::Rets && !flags.enable_multi_ret_implicit_sret() {
348                        return Err(crate::CodegenError::Unsupported(
349                            "Too many return values to fit in registers. \
350                            Use a StructReturn argument instead. (#9510)"
351                                .to_owned(),
352                        ));
353                    }
354
355                    let size = reg_ty.bytes();
356                    let size = if call_conv == CallConv::Winch
357                        && args_or_rets == ArgsOrRets::Rets
358                        && !uses_extension
359                    {
360                        size
361                    } else {
362                        let size = std::cmp::max(size, 8);
363
364                        // Align.
365                        debug_assert!(size.is_power_of_two());
366                        next_stack = align_to(next_stack, size);
367                        size
368                    };
369
370                    slots.push(ABIArgSlot::Stack {
371                        offset: next_stack as i64,
372                        ty: *reg_ty,
373                        extension: param.extension,
374                    });
375                    next_stack += size;
376                }
377            }
378
379            args.push(ABIArg::Slots {
380                slots,
381                purpose: param.purpose,
382            });
383        }
384
385        // Fastcall's indirect 128+ bit vector arguments are all located on the
386        // stack, and stack space is reserved after all parameters are passed,
387        // so allocate from the space now.
388        if args_or_rets == ArgsOrRets::Args && is_fastcall {
389            for arg in args.args_mut() {
390                if let ABIArg::ImplicitPtrArg { offset, .. } = arg {
391                    assert_eq!(*offset, 0);
392                    next_stack = align_to(next_stack, 16);
393                    *offset = next_stack as i64;
394                    next_stack += 16;
395                }
396            }
397        }
398        let extra_arg_idx = if let Some(ret_area_ptr) = ret_area_ptr {
399            args.push_non_formal(ret_area_ptr);
400            Some(args.args().len() - 1)
401        } else {
402            None
403        };
404
405        // Winch writes the first result to the highest offset, so we need to iterate through the
406        // args and adjust the offsets down.
407        if call_conv == CallConv::Winch && args_or_rets == ArgsOrRets::Rets {
408            winch::reverse_stack(args, next_stack, uses_extension);
409        }
410
411        next_stack = align_to(next_stack, 16);
412
413        Ok((next_stack, extra_arg_idx))
414    }
415
416    fn gen_load_stack(mem: StackAMode, into_reg: Writable<Reg>, ty: Type) -> Self::I {
417        // For integer-typed values, we always load a full 64 bits (and we always spill a full 64
418        // bits as well -- see `Inst::store()`).
419        let ty = match ty {
420            types::I8 | types::I16 | types::I32 => types::I64,
421            // Stack slots are always at least 8 bytes, so it's fine to load 4 bytes instead of only
422            // two.
423            types::F16 | types::I8X2 => types::F32,
424            _ => ty,
425        };
426        Inst::load(ty, mem, into_reg, ExtKind::None)
427    }
428
429    fn gen_store_stack(mem: StackAMode, from_reg: Reg, ty: Type) -> Self::I {
430        let ty = match ty {
431            // See `gen_load_stack`.
432            types::F16 | types::I8X2 => types::F32,
433            _ => ty,
434        };
435        Inst::store(ty, from_reg, mem)
436    }
437
438    fn gen_move(to_reg: Writable<Reg>, from_reg: Reg, ty: Type) -> Self::I {
439        Inst::gen_move(to_reg, from_reg, ty)
440    }
441
442    /// Generate an integer-extend operation.
443    fn gen_extend(
444        to_reg: Writable<Reg>,
445        from_reg: Reg,
446        is_signed: bool,
447        from_bits: u8,
448        to_bits: u8,
449    ) -> Self::I {
450        let ext_mode = ExtMode::new(from_bits as u16, to_bits as u16)
451            .unwrap_or_else(|| panic!("invalid extension: {from_bits} -> {to_bits}"));
452        if is_signed {
453            Inst::movsx_rm_r(ext_mode, RegMem::reg(from_reg), to_reg)
454        } else {
455            Inst::movzx_rm_r(ext_mode, RegMem::reg(from_reg), to_reg)
456        }
457    }
458
459    fn gen_args(args: Vec<ArgPair>) -> Inst {
460        Inst::Args { args }
461    }
462
463    fn gen_rets(rets: Vec<RetPair>) -> Inst {
464        Inst::Rets { rets }
465    }
466
467    fn gen_add_imm(
468        _call_conv: isa::CallConv,
469        into_reg: Writable<Reg>,
470        from_reg: Reg,
471        imm: u32,
472    ) -> SmallInstVec<Self::I> {
473        let mut ret = SmallVec::new();
474        if from_reg != into_reg.to_reg() {
475            ret.push(Inst::gen_move(into_reg, from_reg, I64));
476        }
477        let imm = i32::try_from(imm).expect("`imm` is too large to fit in a 32-bit immediate");
478        ret.push(Inst::addq_mi(into_reg, imm));
479        ret
480    }
481
482    fn gen_stack_lower_bound_trap(limit_reg: Reg) -> SmallInstVec<Self::I> {
483        smallvec![
484            Inst::External {
485                inst: asm::inst::cmpq_rm::new(Gpr::unwrap_new(limit_reg), Gpr::RSP,).into(),
486            },
487            Inst::TrapIf {
488                // NBE == "> unsigned"; args above are reversed; this tests limit_reg > rsp.
489                cc: CC::NBE,
490                trap_code: TrapCode::STACK_OVERFLOW,
491            },
492        ]
493    }
494
495    fn gen_get_stack_addr(mem: StackAMode, into_reg: Writable<Reg>) -> Self::I {
496        let mem: SyntheticAmode = mem.into();
497        Inst::External {
498            inst: asm::inst::leaq_rm::new(into_reg, mem).into(),
499        }
500    }
501
502    fn get_stacklimit_reg(_call_conv: isa::CallConv) -> Reg {
503        // As per comment on trait definition, we must return a caller-save
504        // register that is not used as an argument here.
505        debug_assert!(!is_callee_save_systemv(
506            regs::r10().to_real_reg().unwrap(),
507            false
508        ));
509        regs::r10()
510    }
511
512    fn gen_load_base_offset(into_reg: Writable<Reg>, base: Reg, offset: i32, ty: Type) -> Self::I {
513        // Only ever used for I64s, F128s and vectors; if that changes, see if
514        // the ExtKind below needs to be changed.
515        assert!(ty == I64 || ty.is_vector() || ty == F128);
516        let mem = Amode::imm_reg(offset, base);
517        Inst::load(ty, mem, into_reg, ExtKind::None)
518    }
519
520    fn gen_store_base_offset(base: Reg, offset: i32, from_reg: Reg, ty: Type) -> Self::I {
521        let ty = match ty {
522            // See `gen_load_stack`.
523            types::F16 | types::I8X2 => types::F32,
524            _ => ty,
525        };
526        let mem = Amode::imm_reg(offset, base);
527        Inst::store(ty, from_reg, mem)
528    }
529
530    fn gen_sp_reg_adjust(amount: i32) -> SmallInstVec<Self::I> {
531        let rsp = Writable::from_reg(regs::rsp());
532        let inst = if amount >= 0 {
533            Inst::addq_mi(rsp, amount)
534        } else {
535            Inst::subq_mi(rsp, -amount)
536        };
537        smallvec![inst]
538    }
539
540    fn gen_prologue_frame_setup(
541        _call_conv: isa::CallConv,
542        flags: &settings::Flags,
543        _isa_flags: &x64_settings::Flags,
544        frame_layout: &FrameLayout,
545    ) -> SmallInstVec<Self::I> {
546        let r_rsp = Gpr::RSP;
547        let r_rbp = Gpr::RBP;
548        let w_rbp = Writable::from_reg(r_rbp);
549        let mut insts = SmallVec::new();
550        // `push %rbp`
551        // RSP before the call will be 0 % 16.  So here, it is 8 % 16.
552        insts.push(Inst::External {
553            inst: asm::inst::pushq_o::new(r_rbp).into(),
554        });
555
556        if flags.unwind_info() {
557            insts.push(Inst::Unwind {
558                inst: UnwindInst::PushFrameRegs {
559                    offset_upward_to_caller_sp: frame_layout.setup_area_size,
560                },
561            });
562        }
563
564        // `mov %rsp, %rbp`
565        // RSP is now 0 % 16
566        insts.push(Inst::External {
567            inst: asm::inst::movq_mr::new(w_rbp, r_rsp).into(),
568        });
569
570        insts
571    }
572
573    fn gen_epilogue_frame_restore(
574        _call_conv: isa::CallConv,
575        _flags: &settings::Flags,
576        _isa_flags: &x64_settings::Flags,
577        _frame_layout: &FrameLayout,
578    ) -> SmallInstVec<Self::I> {
579        let rbp = Gpr::RBP;
580        let rsp = Gpr::RSP;
581
582        let mut insts = SmallVec::new();
583        // `mov %rbp, %rsp`
584        insts.push(Inst::External {
585            inst: asm::inst::movq_mr::new(Writable::from_reg(rsp), rbp).into(),
586        });
587        // `pop %rbp`
588        insts.push(Inst::External {
589            inst: asm::inst::popq_o::new(Writable::from_reg(rbp)).into(),
590        });
591        insts
592    }
593
594    fn gen_return(
595        call_conv: CallConv,
596        _isa_flags: &x64_settings::Flags,
597        frame_layout: &FrameLayout,
598    ) -> SmallInstVec<Self::I> {
599        // Emit return instruction.
600        let stack_bytes_to_pop = if call_conv == CallConv::Tail {
601            frame_layout.tail_args_size
602        } else {
603            0
604        };
605        let inst = if stack_bytes_to_pop == 0 {
606            asm::inst::retq_zo::new().into()
607        } else {
608            let stack_bytes_to_pop = u16::try_from(stack_bytes_to_pop).unwrap();
609            asm::inst::retq_i::new(stack_bytes_to_pop).into()
610        };
611        smallvec![Inst::External { inst }]
612    }
613
614    fn gen_probestack(insts: &mut SmallInstVec<Self::I>, frame_size: u32) {
615        insts.push(Inst::imm(
616            OperandSize::Size32,
617            frame_size as u64,
618            Writable::from_reg(regs::rax()),
619        ));
620        insts.push(Inst::CallKnown {
621            // No need to include arg here: we are post-regalloc
622            // so no constraints will be seen anyway.
623            info: Box::new(CallInfo::empty(
624                ExternalName::LibCall(LibCall::Probestack),
625                CallConv::Probestack,
626            )),
627        });
628    }
629
630    fn gen_inline_probestack(
631        insts: &mut SmallInstVec<Self::I>,
632        call_conv: isa::CallConv,
633        frame_size: u32,
634        guard_size: u32,
635    ) {
636        // Unroll at most n consecutive probes, before falling back to using a loop
637        //
638        // This was number was picked because the loop version is 38 bytes long. We can fit
639        // 4 inline probes in that space, so unroll if its beneficial in terms of code size.
640        const PROBE_MAX_UNROLL: u32 = 4;
641
642        // Calculate how many probes we need to perform. Round down, as we only
643        // need to probe whole guard_size regions we'd otherwise skip over.
644        let probe_count = frame_size / guard_size;
645        if probe_count == 0 {
646            // No probe necessary
647        } else if probe_count <= PROBE_MAX_UNROLL {
648            Self::gen_probestack_unroll(insts, guard_size, probe_count)
649        } else {
650            Self::gen_probestack_loop(insts, call_conv, frame_size, guard_size)
651        }
652    }
653
654    fn gen_clobber_save(
655        _call_conv: isa::CallConv,
656        flags: &settings::Flags,
657        frame_layout: &FrameLayout,
658    ) -> SmallVec<[Self::I; 16]> {
659        let mut insts = SmallVec::new();
660
661        // When a return_call within this function required more stack arguments than we have
662        // present, resize the incoming argument area of the frame to accommodate those arguments.
663        let incoming_args_diff = frame_layout.tail_args_size - frame_layout.incoming_args_size;
664        if incoming_args_diff > 0 {
665            // Decrement the stack pointer to make space for the new arguments.
666            let rsp = Writable::from_reg(regs::rsp());
667            insts.push(Inst::subq_mi(
668                rsp,
669                i32::try_from(incoming_args_diff)
670                    .expect("`incoming_args_diff` is too large to fit in a 32-bit immediate"),
671            ));
672
673            // Make sure to keep the frame pointer and stack pointer in sync at
674            // this point.
675            let rbp = Gpr::RBP;
676            let rsp = Gpr::RSP;
677            insts.push(Inst::External {
678                inst: asm::inst::movq_mr::new(Writable::from_reg(rbp), rsp).into(),
679            });
680
681            let incoming_args_diff = i32::try_from(incoming_args_diff).unwrap();
682
683            // Move the saved frame pointer down by `incoming_args_diff`.
684            let addr = Amode::imm_reg(incoming_args_diff, regs::rsp());
685            let r11 = Writable::from_reg(Gpr::R11);
686            let inst = asm::inst::movq_rm::new(r11, addr).into();
687            insts.push(Inst::External { inst });
688            let inst = asm::inst::movq_mr::new(Amode::imm_reg(0, regs::rsp()), r11.to_reg()).into();
689            insts.push(Inst::External { inst });
690
691            // Move the saved return address down by `incoming_args_diff`.
692            let addr = Amode::imm_reg(incoming_args_diff + 8, regs::rsp());
693            let inst = asm::inst::movq_rm::new(r11, addr).into();
694            insts.push(Inst::External { inst });
695            let inst = asm::inst::movq_mr::new(Amode::imm_reg(8, regs::rsp()), r11.to_reg()).into();
696            insts.push(Inst::External { inst });
697        }
698
699        // We need to factor `incoming_args_diff` into the offset upward here, as we have grown
700        // the argument area -- `setup_area_size` alone will not be the correct offset up to the
701        // original caller's SP.
702        let offset_upward_to_caller_sp = frame_layout.setup_area_size + incoming_args_diff;
703        if flags.unwind_info() && offset_upward_to_caller_sp > 0 {
704            // Emit unwind info: start the frame. The frame (from unwind
705            // consumers' point of view) starts at clobbbers, just below
706            // the FP and return address. Spill slots and stack slots are
707            // part of our actual frame but do not concern the unwinder.
708            insts.push(Inst::Unwind {
709                inst: UnwindInst::DefineNewFrame {
710                    offset_downward_to_clobbers: frame_layout.clobber_size,
711                    offset_upward_to_caller_sp,
712                },
713            });
714        }
715
716        // Adjust the stack pointer downward for clobbers and the function fixed
717        // frame (spillslots, storage slots, and argument area).
718        let stack_size = frame_layout.fixed_frame_storage_size
719            + frame_layout.clobber_size
720            + frame_layout.outgoing_args_size;
721        if stack_size > 0 {
722            let rsp = Writable::from_reg(regs::rsp());
723            let stack_size = i32::try_from(stack_size)
724                .expect("`stack_size` is too large to fit in a 32-bit immediate");
725            insts.push(Inst::subq_mi(rsp, stack_size));
726        }
727
728        // Store each clobbered register in order at offsets from RSP,
729        // placing them above the fixed frame slots.
730        let clobber_offset =
731            frame_layout.fixed_frame_storage_size + frame_layout.outgoing_args_size;
732        let mut cur_offset = 0;
733        for reg in &frame_layout.clobbered_callee_saves {
734            let r_reg = reg.to_reg();
735            let ty = match r_reg.class() {
736                RegClass::Int => types::I64,
737                RegClass::Float => types::I8X16,
738                RegClass::Vector => unreachable!(),
739            };
740
741            // Align to 8 or 16 bytes as required by the storage type of the clobber.
742            cur_offset = align_to(cur_offset, ty.bytes());
743            let off = cur_offset;
744            cur_offset += ty.bytes();
745
746            insts.push(Inst::store(
747                ty,
748                r_reg.into(),
749                Amode::imm_reg(i32::try_from(off + clobber_offset).unwrap(), regs::rsp()),
750            ));
751
752            if flags.unwind_info() {
753                insts.push(Inst::Unwind {
754                    inst: UnwindInst::SaveReg {
755                        clobber_offset: off,
756                        reg: r_reg,
757                    },
758                });
759            }
760        }
761
762        insts
763    }
764
765    fn gen_clobber_restore(
766        _call_conv: isa::CallConv,
767        _flags: &settings::Flags,
768        frame_layout: &FrameLayout,
769    ) -> SmallVec<[Self::I; 16]> {
770        let mut insts = SmallVec::new();
771
772        // Restore regs by loading from offsets of RSP. We compute the offset from
773        // the same base as above in clobber_save, as RSP won't change between the
774        // prologue and epilogue.
775        let mut cur_offset =
776            frame_layout.fixed_frame_storage_size + frame_layout.outgoing_args_size;
777        for reg in &frame_layout.clobbered_callee_saves {
778            let rreg = reg.to_reg();
779            let ty = match rreg.class() {
780                RegClass::Int => types::I64,
781                RegClass::Float => types::I8X16,
782                RegClass::Vector => unreachable!(),
783            };
784
785            // Align to 8 or 16 bytes as required by the storage type of the clobber.
786            cur_offset = align_to(cur_offset, ty.bytes());
787
788            insts.push(Inst::load(
789                ty,
790                Amode::imm_reg(cur_offset.try_into().unwrap(), regs::rsp()),
791                Writable::from_reg(rreg.into()),
792                ExtKind::None,
793            ));
794
795            cur_offset += ty.bytes();
796        }
797
798        let stack_size = frame_layout.fixed_frame_storage_size
799            + frame_layout.clobber_size
800            + frame_layout.outgoing_args_size;
801
802        // Adjust RSP back upward.
803        if stack_size > 0 {
804            let rsp = Writable::from_reg(regs::rsp());
805            let stack_size = i32::try_from(stack_size)
806                .expect("`stack_size` is too large to fit in a 32-bit immediate");
807            insts.push(Inst::addq_mi(rsp, stack_size));
808        }
809
810        insts
811    }
812
813    fn gen_memcpy<F: FnMut(Type) -> Writable<Reg>>(
814        call_conv: isa::CallConv,
815        dst: Reg,
816        src: Reg,
817        size: usize,
818        mut alloc_tmp: F,
819    ) -> SmallVec<[Self::I; 8]> {
820        let mut insts = SmallVec::new();
821        let arg0 = get_intreg_for_arg(call_conv, 0, 0).unwrap();
822        let arg1 = get_intreg_for_arg(call_conv, 1, 1).unwrap();
823        let arg2 = get_intreg_for_arg(call_conv, 2, 2).unwrap();
824        let temp = alloc_tmp(Self::word_type());
825        let temp2 = alloc_tmp(Self::word_type());
826        insts.push(Inst::imm(OperandSize::Size64, size as u64, temp));
827        // We use an indirect call and a full LoadExtName because we do not have
828        // information about the libcall `RelocDistance` here, so we
829        // conservatively use the more flexible calling sequence.
830        insts.push(Inst::LoadExtName {
831            dst: temp2.map(Gpr::unwrap_new),
832            name: Box::new(ExternalName::LibCall(LibCall::Memcpy)),
833            offset: 0,
834            distance: RelocDistance::Far,
835        });
836        let callee_pop_size = 0;
837        insts.push(Inst::call_unknown(Box::new(CallInfo {
838            dest: RegMem::reg(temp2.to_reg()),
839            uses: smallvec![
840                CallArgPair {
841                    vreg: dst,
842                    preg: arg0
843                },
844                CallArgPair {
845                    vreg: src,
846                    preg: arg1
847                },
848                CallArgPair {
849                    vreg: temp.to_reg(),
850                    preg: arg2
851                },
852            ],
853            defs: smallvec![],
854            clobbers: Self::get_regs_clobbered_by_call(call_conv, false),
855            callee_pop_size,
856            callee_conv: call_conv,
857            caller_conv: call_conv,
858            try_call_info: None,
859        })));
860        insts
861    }
862
863    fn get_number_of_spillslots_for_value(
864        rc: RegClass,
865        vector_scale: u32,
866        _isa_flags: &Self::F,
867    ) -> u32 {
868        // We allocate in terms of 8-byte slots.
869        match rc {
870            RegClass::Int => 1,
871            RegClass::Float => vector_scale / 8,
872            RegClass::Vector => unreachable!(),
873        }
874    }
875
876    fn get_machine_env(flags: &settings::Flags, _call_conv: isa::CallConv) -> &MachineEnv {
877        if flags.enable_pinned_reg() {
878            static MACHINE_ENV: OnceLock<MachineEnv> = OnceLock::new();
879            MACHINE_ENV.get_or_init(|| create_reg_env_systemv(true))
880        } else {
881            static MACHINE_ENV: OnceLock<MachineEnv> = OnceLock::new();
882            MACHINE_ENV.get_or_init(|| create_reg_env_systemv(false))
883        }
884    }
885
886    fn get_regs_clobbered_by_call(
887        call_conv_of_callee: isa::CallConv,
888        is_exception: bool,
889    ) -> PRegSet {
890        match (call_conv_of_callee, is_exception) {
891            (isa::CallConv::Tail, true) => ALL_CLOBBERS,
892            (isa::CallConv::Winch, _) => ALL_CLOBBERS,
893            (isa::CallConv::SystemV, _) => SYSV_CLOBBERS,
894            (isa::CallConv::WindowsFastcall, false) => WINDOWS_CLOBBERS,
895            (_, false) => SYSV_CLOBBERS,
896            (call_conv, true) => panic!("unimplemented clobbers for exn abi of {call_conv:?}"),
897        }
898    }
899
900    fn get_ext_mode(
901        _call_conv: isa::CallConv,
902        specified: ir::ArgumentExtension,
903    ) -> ir::ArgumentExtension {
904        specified
905    }
906
907    fn compute_frame_layout(
908        call_conv: CallConv,
909        flags: &settings::Flags,
910        _sig: &Signature,
911        regs: &[Writable<RealReg>],
912        function_calls: FunctionCalls,
913        incoming_args_size: u32,
914        tail_args_size: u32,
915        stackslots_size: u32,
916        fixed_frame_storage_size: u32,
917        outgoing_args_size: u32,
918    ) -> FrameLayout {
919        debug_assert!(tail_args_size >= incoming_args_size);
920
921        let mut regs: Vec<Writable<RealReg>> = match call_conv {
922            // The `winch` calling convention doesn't have any callee-save
923            // registers.
924            CallConv::Winch => vec![],
925            CallConv::Fast | CallConv::Cold | CallConv::SystemV | CallConv::Tail => regs
926                .iter()
927                .cloned()
928                .filter(|r| is_callee_save_systemv(r.to_reg(), flags.enable_pinned_reg()))
929                .collect(),
930            CallConv::WindowsFastcall => regs
931                .iter()
932                .cloned()
933                .filter(|r| is_callee_save_fastcall(r.to_reg(), flags.enable_pinned_reg()))
934                .collect(),
935            CallConv::Probestack => todo!("probestack?"),
936            CallConv::AppleAarch64 => unreachable!(),
937        };
938        // Sort registers for deterministic code output. We can do an unstable sort because the
939        // registers will be unique (there are no dups).
940        regs.sort_unstable();
941
942        // Compute clobber size.
943        let clobber_size = compute_clobber_size(&regs);
944
945        // Compute setup area size.
946        let setup_area_size = 16; // RBP, return address
947
948        // Return FrameLayout structure.
949        FrameLayout {
950            word_bytes: 8,
951            incoming_args_size,
952            tail_args_size: align_to(tail_args_size, 16),
953            setup_area_size,
954            clobber_size,
955            fixed_frame_storage_size,
956            stackslots_size,
957            outgoing_args_size,
958            clobbered_callee_saves: regs,
959            function_calls,
960        }
961    }
962
963    fn retval_temp_reg(_call_conv_of_callee: isa::CallConv) -> Writable<Reg> {
964        // Use r11 as a temp: clobbered anyway, and
965        // not otherwise used as a return value in any of our
966        // supported calling conventions.
967        Writable::from_reg(regs::r11())
968    }
969
970    fn exception_payload_regs(call_conv: isa::CallConv) -> &'static [Reg] {
971        const PAYLOAD_REGS: &'static [Reg] = &[regs::rax(), regs::rdx()];
972        match call_conv {
973            isa::CallConv::SystemV | isa::CallConv::Tail => PAYLOAD_REGS,
974            _ => &[],
975        }
976    }
977}
978
979impl From<StackAMode> for SyntheticAmode {
980    fn from(amode: StackAMode) -> Self {
981        // We enforce a 128 MB stack-frame size limit above, so these
982        // `expect()`s should never fail.
983        match amode {
984            StackAMode::IncomingArg(off, stack_args_size) => {
985                let offset = u32::try_from(off).expect(
986                    "Offset in IncomingArg is greater than 4GB; should hit impl limit first",
987                );
988                SyntheticAmode::IncomingArg {
989                    offset: stack_args_size - offset,
990                }
991            }
992            StackAMode::Slot(off) => {
993                let off = i32::try_from(off)
994                    .expect("Offset in Slot is greater than 2GB; should hit impl limit first");
995                SyntheticAmode::slot_offset(off)
996            }
997            StackAMode::OutgoingArg(off) => {
998                let off = i32::try_from(off).expect(
999                    "Offset in OutgoingArg is greater than 2GB; should hit impl limit first",
1000                );
1001                SyntheticAmode::Real(Amode::ImmReg {
1002                    simm32: off,
1003                    base: regs::rsp(),
1004                    flags: MemFlags::trusted(),
1005                })
1006            }
1007        }
1008    }
1009}
1010
1011fn get_intreg_for_arg(call_conv: CallConv, idx: usize, arg_idx: usize) -> Option<Reg> {
1012    let is_fastcall = call_conv == CallConv::WindowsFastcall;
1013
1014    // Fastcall counts by absolute argument number; SysV counts by argument of
1015    // this (integer) class.
1016    let i = if is_fastcall { arg_idx } else { idx };
1017    match (i, is_fastcall) {
1018        (0, false) => Some(regs::rdi()),
1019        (1, false) => Some(regs::rsi()),
1020        (2, false) => Some(regs::rdx()),
1021        (3, false) => Some(regs::rcx()),
1022        (4, false) => Some(regs::r8()),
1023        (5, false) => Some(regs::r9()),
1024        (0, true) => Some(regs::rcx()),
1025        (1, true) => Some(regs::rdx()),
1026        (2, true) => Some(regs::r8()),
1027        (3, true) => Some(regs::r9()),
1028        _ => None,
1029    }
1030}
1031
1032fn get_fltreg_for_arg(call_conv: CallConv, idx: usize, arg_idx: usize) -> Option<Reg> {
1033    let is_fastcall = call_conv == CallConv::WindowsFastcall;
1034
1035    // Fastcall counts by absolute argument number; SysV counts by argument of
1036    // this (floating-point) class.
1037    let i = if is_fastcall { arg_idx } else { idx };
1038    match (i, is_fastcall) {
1039        (0, false) => Some(regs::xmm0()),
1040        (1, false) => Some(regs::xmm1()),
1041        (2, false) => Some(regs::xmm2()),
1042        (3, false) => Some(regs::xmm3()),
1043        (4, false) => Some(regs::xmm4()),
1044        (5, false) => Some(regs::xmm5()),
1045        (6, false) => Some(regs::xmm6()),
1046        (7, false) => Some(regs::xmm7()),
1047        (0, true) => Some(regs::xmm0()),
1048        (1, true) => Some(regs::xmm1()),
1049        (2, true) => Some(regs::xmm2()),
1050        (3, true) => Some(regs::xmm3()),
1051        _ => None,
1052    }
1053}
1054
1055fn get_intreg_for_retval(
1056    call_conv: CallConv,
1057    flags: &settings::Flags,
1058    intreg_idx: usize,
1059    is_last: bool,
1060) -> Option<Reg> {
1061    match call_conv {
1062        CallConv::Tail => match intreg_idx {
1063            0 => Some(regs::rax()),
1064            1 => Some(regs::rcx()),
1065            2 => Some(regs::rdx()),
1066            3 => Some(regs::rsi()),
1067            4 => Some(regs::rdi()),
1068            5 => Some(regs::r8()),
1069            6 => Some(regs::r9()),
1070            7 => Some(regs::r10()),
1071            // NB: `r11` is reserved as a scratch register that is
1072            // also part of the clobber set.
1073            // NB: `r15` is reserved as a scratch register.
1074            _ => None,
1075        },
1076        CallConv::Fast | CallConv::Cold | CallConv::SystemV => match intreg_idx {
1077            0 => Some(regs::rax()),
1078            1 => Some(regs::rdx()),
1079            2 if flags.enable_llvm_abi_extensions() => Some(regs::rcx()),
1080            _ => None,
1081        },
1082        CallConv::WindowsFastcall => match intreg_idx {
1083            0 => Some(regs::rax()),
1084            1 => Some(regs::rdx()), // The Rust ABI for i128s needs this.
1085            _ => None,
1086        },
1087
1088        CallConv::Winch => is_last.then(|| regs::rax()),
1089        CallConv::Probestack => todo!(),
1090        CallConv::AppleAarch64 => unreachable!(),
1091    }
1092}
1093
1094fn get_fltreg_for_retval(call_conv: CallConv, fltreg_idx: usize, is_last: bool) -> Option<Reg> {
1095    match call_conv {
1096        CallConv::Tail => match fltreg_idx {
1097            0 => Some(regs::xmm0()),
1098            1 => Some(regs::xmm1()),
1099            2 => Some(regs::xmm2()),
1100            3 => Some(regs::xmm3()),
1101            4 => Some(regs::xmm4()),
1102            5 => Some(regs::xmm5()),
1103            6 => Some(regs::xmm6()),
1104            7 => Some(regs::xmm7()),
1105            _ => None,
1106        },
1107        CallConv::Fast | CallConv::Cold | CallConv::SystemV => match fltreg_idx {
1108            0 => Some(regs::xmm0()),
1109            1 => Some(regs::xmm1()),
1110            _ => None,
1111        },
1112        CallConv::WindowsFastcall => match fltreg_idx {
1113            0 => Some(regs::xmm0()),
1114            _ => None,
1115        },
1116        CallConv::Winch => is_last.then(|| regs::xmm0()),
1117        CallConv::Probestack => todo!(),
1118        CallConv::AppleAarch64 => unreachable!(),
1119    }
1120}
1121
1122fn is_callee_save_systemv(r: RealReg, enable_pinned_reg: bool) -> bool {
1123    use asm::gpr::enc::*;
1124
1125    match r.class() {
1126        RegClass::Int => match r.hw_enc() {
1127            RBX | RBP | R12 | R13 | R14 => true,
1128            // R15 is the pinned register; if we're using it that way,
1129            // it is effectively globally-allocated, and is not
1130            // callee-saved.
1131            R15 => !enable_pinned_reg,
1132            _ => false,
1133        },
1134        RegClass::Float => false,
1135        RegClass::Vector => unreachable!(),
1136    }
1137}
1138
1139fn is_callee_save_fastcall(r: RealReg, enable_pinned_reg: bool) -> bool {
1140    use asm::gpr::enc::*;
1141    use asm::xmm::enc::*;
1142
1143    match r.class() {
1144        RegClass::Int => match r.hw_enc() {
1145            RBX | RBP | RSI | RDI | R12 | R13 | R14 => true,
1146            // See above for SysV: we must treat the pinned reg specially.
1147            R15 => !enable_pinned_reg,
1148            _ => false,
1149        },
1150        RegClass::Float => match r.hw_enc() {
1151            XMM6 | XMM7 | XMM8 | XMM9 | XMM10 | XMM11 | XMM12 | XMM13 | XMM14 | XMM15 => true,
1152            _ => false,
1153        },
1154        RegClass::Vector => unreachable!(),
1155    }
1156}
1157
1158fn compute_clobber_size(clobbers: &[Writable<RealReg>]) -> u32 {
1159    let mut clobbered_size = 0;
1160    for reg in clobbers {
1161        match reg.to_reg().class() {
1162            RegClass::Int => {
1163                clobbered_size += 8;
1164            }
1165            RegClass::Float => {
1166                clobbered_size = align_to(clobbered_size, 16);
1167                clobbered_size += 16;
1168            }
1169            RegClass::Vector => unreachable!(),
1170        }
1171    }
1172    align_to(clobbered_size, 16)
1173}
1174
1175const WINDOWS_CLOBBERS: PRegSet = windows_clobbers();
1176const SYSV_CLOBBERS: PRegSet = sysv_clobbers();
1177pub(crate) const ALL_CLOBBERS: PRegSet = all_clobbers();
1178
1179const fn windows_clobbers() -> PRegSet {
1180    use asm::gpr::enc::*;
1181    use asm::xmm::enc::*;
1182
1183    PRegSet::empty()
1184        .with(regs::gpr_preg(RAX))
1185        .with(regs::gpr_preg(RCX))
1186        .with(regs::gpr_preg(RDX))
1187        .with(regs::gpr_preg(R8))
1188        .with(regs::gpr_preg(R9))
1189        .with(regs::gpr_preg(R10))
1190        .with(regs::gpr_preg(R11))
1191        .with(regs::fpr_preg(XMM0))
1192        .with(regs::fpr_preg(XMM1))
1193        .with(regs::fpr_preg(XMM2))
1194        .with(regs::fpr_preg(XMM3))
1195        .with(regs::fpr_preg(XMM4))
1196        .with(regs::fpr_preg(XMM5))
1197}
1198
1199const fn sysv_clobbers() -> PRegSet {
1200    use asm::gpr::enc::*;
1201    use asm::xmm::enc::*;
1202
1203    PRegSet::empty()
1204        .with(regs::gpr_preg(RAX))
1205        .with(regs::gpr_preg(RCX))
1206        .with(regs::gpr_preg(RDX))
1207        .with(regs::gpr_preg(RSI))
1208        .with(regs::gpr_preg(RDI))
1209        .with(regs::gpr_preg(R8))
1210        .with(regs::gpr_preg(R9))
1211        .with(regs::gpr_preg(R10))
1212        .with(regs::gpr_preg(R11))
1213        .with(regs::fpr_preg(XMM0))
1214        .with(regs::fpr_preg(XMM1))
1215        .with(regs::fpr_preg(XMM2))
1216        .with(regs::fpr_preg(XMM3))
1217        .with(regs::fpr_preg(XMM4))
1218        .with(regs::fpr_preg(XMM5))
1219        .with(regs::fpr_preg(XMM6))
1220        .with(regs::fpr_preg(XMM7))
1221        .with(regs::fpr_preg(XMM8))
1222        .with(regs::fpr_preg(XMM9))
1223        .with(regs::fpr_preg(XMM10))
1224        .with(regs::fpr_preg(XMM11))
1225        .with(regs::fpr_preg(XMM12))
1226        .with(regs::fpr_preg(XMM13))
1227        .with(regs::fpr_preg(XMM14))
1228        .with(regs::fpr_preg(XMM15))
1229}
1230
1231/// For calling conventions that clobber all registers.
1232const fn all_clobbers() -> PRegSet {
1233    use asm::gpr::enc::*;
1234    use asm::xmm::enc::*;
1235
1236    PRegSet::empty()
1237        .with(regs::gpr_preg(RAX))
1238        .with(regs::gpr_preg(RCX))
1239        .with(regs::gpr_preg(RDX))
1240        .with(regs::gpr_preg(RBX))
1241        .with(regs::gpr_preg(RSI))
1242        .with(regs::gpr_preg(RDI))
1243        .with(regs::gpr_preg(R8))
1244        .with(regs::gpr_preg(R9))
1245        .with(regs::gpr_preg(R10))
1246        .with(regs::gpr_preg(R11))
1247        .with(regs::gpr_preg(R12))
1248        .with(regs::gpr_preg(R13))
1249        .with(regs::gpr_preg(R14))
1250        .with(regs::gpr_preg(R15))
1251        .with(regs::fpr_preg(XMM0))
1252        .with(regs::fpr_preg(XMM1))
1253        .with(regs::fpr_preg(XMM2))
1254        .with(regs::fpr_preg(XMM3))
1255        .with(regs::fpr_preg(XMM4))
1256        .with(regs::fpr_preg(XMM5))
1257        .with(regs::fpr_preg(XMM6))
1258        .with(regs::fpr_preg(XMM7))
1259        .with(regs::fpr_preg(XMM8))
1260        .with(regs::fpr_preg(XMM9))
1261        .with(regs::fpr_preg(XMM10))
1262        .with(regs::fpr_preg(XMM11))
1263        .with(regs::fpr_preg(XMM12))
1264        .with(regs::fpr_preg(XMM13))
1265        .with(regs::fpr_preg(XMM14))
1266        .with(regs::fpr_preg(XMM15))
1267}
1268
1269fn create_reg_env_systemv(enable_pinned_reg: bool) -> MachineEnv {
1270    fn preg(r: Reg) -> PReg {
1271        r.to_real_reg().unwrap().into()
1272    }
1273
1274    let mut env = MachineEnv {
1275        preferred_regs_by_class: [
1276            // Preferred GPRs: caller-saved in the SysV ABI.
1277            vec![
1278                preg(regs::rsi()),
1279                preg(regs::rdi()),
1280                preg(regs::rax()),
1281                preg(regs::rcx()),
1282                preg(regs::rdx()),
1283                preg(regs::r8()),
1284                preg(regs::r9()),
1285                preg(regs::r10()),
1286                preg(regs::r11()),
1287            ],
1288            // Preferred XMMs: the first 8, which can have smaller encodings
1289            // with AVX instructions.
1290            vec![
1291                preg(regs::xmm0()),
1292                preg(regs::xmm1()),
1293                preg(regs::xmm2()),
1294                preg(regs::xmm3()),
1295                preg(regs::xmm4()),
1296                preg(regs::xmm5()),
1297                preg(regs::xmm6()),
1298                preg(regs::xmm7()),
1299            ],
1300            // The Vector Regclass is unused
1301            vec![],
1302        ],
1303        non_preferred_regs_by_class: [
1304            // Non-preferred GPRs: callee-saved in the SysV ABI.
1305            vec![
1306                preg(regs::rbx()),
1307                preg(regs::r12()),
1308                preg(regs::r13()),
1309                preg(regs::r14()),
1310            ],
1311            // Non-preferred XMMs: the last 8 registers, which can have larger
1312            // encodings with AVX instructions.
1313            vec![
1314                preg(regs::xmm8()),
1315                preg(regs::xmm9()),
1316                preg(regs::xmm10()),
1317                preg(regs::xmm11()),
1318                preg(regs::xmm12()),
1319                preg(regs::xmm13()),
1320                preg(regs::xmm14()),
1321                preg(regs::xmm15()),
1322            ],
1323            // The Vector Regclass is unused
1324            vec![],
1325        ],
1326        fixed_stack_slots: vec![],
1327        scratch_by_class: [None, None, None],
1328    };
1329
1330    debug_assert_eq!(regs::r15(), regs::pinned_reg());
1331    if !enable_pinned_reg {
1332        env.non_preferred_regs_by_class[0].push(preg(regs::r15()));
1333    }
1334
1335    env
1336}