cranelift_codegen/isa/x64/
abi.rs

1//! Implementation of the standard x64 ABI.
2
3use crate::ir::{self, types, LibCall, MemFlags, Signature, TrapCode};
4use crate::ir::{types::*, ExternalName};
5use crate::isa;
6use crate::isa::winch;
7use crate::isa::x64::X64Backend;
8use crate::isa::{unwind::UnwindInst, x64::inst::*, x64::settings as x64_settings, CallConv};
9use crate::machinst::abi::*;
10use crate::machinst::*;
11use crate::settings;
12use crate::CodegenResult;
13use alloc::boxed::Box;
14use alloc::vec::Vec;
15use args::*;
16use regalloc2::{MachineEnv, PReg, PRegSet};
17use smallvec::{smallvec, SmallVec};
18use std::borrow::ToOwned;
19use std::sync::OnceLock;
20
21/// Support for the x64 ABI from the callee side (within a function body).
22pub(crate) type X64Callee = Callee<X64ABIMachineSpec>;
23
24/// Support for the x64 ABI from the caller side (at a callsite).
25pub(crate) type X64CallSite = CallSite<X64ABIMachineSpec>;
26
27/// Implementation of ABI primitives for x64.
28pub struct X64ABIMachineSpec;
29
30impl X64ABIMachineSpec {
31    fn gen_probestack_unroll(insts: &mut SmallInstVec<Inst>, guard_size: u32, probe_count: u32) {
32        insts.reserve(probe_count as usize);
33        for _ in 0..probe_count {
34            // "Allocate" stack space for the probe by decrementing the stack pointer before
35            // the write. This is required to make valgrind happy.
36            // See: https://github.com/bytecodealliance/wasmtime/issues/7454
37            insts.extend(Self::gen_sp_reg_adjust(-(guard_size as i32)));
38
39            // TODO: It would be nice if we could store the imm 0, but we don't have insts for those
40            // so store the stack pointer. Any register will do, since the stack is undefined at this point
41            insts.push(Inst::store(
42                I32,
43                regs::rsp(),
44                Amode::imm_reg(0, regs::rsp()),
45            ));
46        }
47
48        // Restore the stack pointer to its original value
49        insts.extend(Self::gen_sp_reg_adjust((guard_size * probe_count) as i32));
50    }
51
52    fn gen_probestack_loop(
53        insts: &mut SmallInstVec<Inst>,
54        _call_conv: isa::CallConv,
55        frame_size: u32,
56        guard_size: u32,
57    ) {
58        // We have to use a caller-saved register since clobbering only
59        // happens after stack probing.
60        // `r11` is caller saved on both Fastcall and SystemV, and not used
61        // for argument passing, so it's pretty much free. It is also not
62        // used by the stacklimit mechanism.
63        let tmp = regs::r11();
64        debug_assert!({
65            let real_reg = tmp.to_real_reg().unwrap();
66            !is_callee_save_systemv(real_reg, false) && !is_callee_save_fastcall(real_reg, false)
67        });
68
69        insts.push(Inst::StackProbeLoop {
70            tmp: Writable::from_reg(tmp),
71            frame_size,
72            guard_size,
73        });
74    }
75}
76
77impl IsaFlags for x64_settings::Flags {}
78
79impl ABIMachineSpec for X64ABIMachineSpec {
80    type I = Inst;
81
82    type F = x64_settings::Flags;
83
84    /// This is the limit for the size of argument and return-value areas on the
85    /// stack. We place a reasonable limit here to avoid integer overflow issues
86    /// with 32-bit arithmetic: for now, 128 MB.
87    const STACK_ARG_RET_SIZE_LIMIT: u32 = 128 * 1024 * 1024;
88
89    fn word_bits() -> u32 {
90        64
91    }
92
93    /// Return required stack alignment in bytes.
94    fn stack_align(_call_conv: isa::CallConv) -> u32 {
95        16
96    }
97
98    fn compute_arg_locs(
99        call_conv: isa::CallConv,
100        flags: &settings::Flags,
101        params: &[ir::AbiParam],
102        args_or_rets: ArgsOrRets,
103        add_ret_area_ptr: bool,
104        mut args: ArgsAccumulator,
105    ) -> CodegenResult<(u32, Option<usize>)> {
106        let is_fastcall = call_conv == CallConv::WindowsFastcall;
107
108        let mut next_gpr = 0;
109        let mut next_vreg = 0;
110        let mut next_stack: u32 = 0;
111        let mut next_param_idx = 0; // Fastcall cares about overall param index
112
113        if args_or_rets == ArgsOrRets::Args && is_fastcall {
114            // Fastcall always reserves 32 bytes of shadow space corresponding to
115            // the four initial in-arg parameters.
116            //
117            // (See:
118            // https://learn.microsoft.com/en-us/cpp/build/x64-calling-convention?view=msvc-170)
119            next_stack = 32;
120        }
121
122        let ret_area_ptr = if add_ret_area_ptr {
123            debug_assert_eq!(args_or_rets, ArgsOrRets::Args);
124            next_gpr += 1;
125            next_param_idx += 1;
126            // In the SystemV and WindowsFastcall ABIs, the return area pointer is the first
127            // argument. For the Tail and Winch ABIs we do the same for simplicity sake.
128            Some(ABIArg::reg(
129                get_intreg_for_arg(call_conv, 0, 0)
130                    .unwrap()
131                    .to_real_reg()
132                    .unwrap(),
133                types::I64,
134                ir::ArgumentExtension::None,
135                ir::ArgumentPurpose::Normal,
136            ))
137        } else {
138            None
139        };
140
141        // If any param uses extension, the winch calling convention will not pack its results
142        // on the stack and will instead align them to 8-byte boundaries the same way that all the
143        // other calling conventions do. This isn't consistent with Winch itself, but is fine as
144        // Winch only uses this calling convention via trampolines, and those trampolines don't add
145        // extension annotations. Additionally, handling extension attributes this way allows clif
146        // functions that use them with the Winch calling convention to interact successfully with
147        // testing infrastructure.
148        // The results are also not packed if any of the types are `f16`. This is to simplify the
149        // implementation of `Inst::load`/`Inst::store` (which would otherwise require multiple
150        // instructions), and doesn't affect Winch itself as Winch doesn't support `f16` at all.
151        let uses_extension = params
152            .iter()
153            .any(|p| p.extension != ir::ArgumentExtension::None || p.value_type == types::F16);
154
155        for (ix, param) in params.iter().enumerate() {
156            let last_param = ix == params.len() - 1;
157
158            if let ir::ArgumentPurpose::StructArgument(size) = param.purpose {
159                let offset = next_stack as i64;
160                let size = size;
161                assert!(size % 8 == 0, "StructArgument size is not properly aligned");
162                next_stack += size;
163                args.push(ABIArg::StructArg {
164                    offset,
165                    size: size as u64,
166                    purpose: param.purpose,
167                });
168                continue;
169            }
170
171            // Find regclass(es) of the register(s) used to store a value of this type.
172            let (rcs, reg_tys) = Inst::rc_for_type(param.value_type)?;
173
174            // Now assign ABIArgSlots for each register-sized part.
175            //
176            // Note that the handling of `i128` values is unique here:
177            //
178            // - If `enable_llvm_abi_extensions` is set in the flags, each
179            //   `i128` is split into two `i64`s and assigned exactly as if it
180            //   were two consecutive 64-bit args, except that if one of the
181            //   two halves is forced onto the stack, the other half is too.
182            //   This is consistent with LLVM's behavior, and is needed for
183            //   some uses of Cranelift (e.g., the rustc backend).
184            //
185            // - Otherwise, both SysV and Fastcall specify behavior (use of
186            //   vector register, a register pair, or passing by reference
187            //   depending on the case), but for simplicity, we will just panic if
188            //   an i128 type appears in a signature and the LLVM extensions flag
189            //   is not set.
190            //
191            // For examples of how rustc compiles i128 args and return values on
192            // both SysV and Fastcall platforms, see:
193            // https://godbolt.org/z/PhG3ob
194
195            if param.value_type.bits() > 64
196                && !(param.value_type.is_vector() || param.value_type.is_float())
197                && !flags.enable_llvm_abi_extensions()
198            {
199                panic!(
200                    "i128 args/return values not supported unless LLVM ABI extensions are enabled"
201                );
202            }
203            // As MSVC doesn't support f16/f128 there is no standard way to pass/return them with
204            // the Windows ABI. LLVM passes/returns them in XMM registers.
205            if matches!(param.value_type, types::F16 | types::F128)
206                && is_fastcall
207                && !flags.enable_llvm_abi_extensions()
208            {
209                panic!(
210                    "f16/f128 args/return values not supported for windows_fastcall unless LLVM ABI extensions are enabled"
211                );
212            }
213
214            // Windows fastcall dictates that `__m128i` parameters to a function
215            // are passed indirectly as pointers, so handle that as a special
216            // case before the loop below.
217            if param.value_type.is_vector()
218                && param.value_type.bits() >= 128
219                && args_or_rets == ArgsOrRets::Args
220                && is_fastcall
221            {
222                let pointer = match get_intreg_for_arg(call_conv, next_gpr, next_param_idx) {
223                    Some(reg) => {
224                        next_gpr += 1;
225                        ABIArgSlot::Reg {
226                            reg: reg.to_real_reg().unwrap(),
227                            ty: ir::types::I64,
228                            extension: ir::ArgumentExtension::None,
229                        }
230                    }
231
232                    None => {
233                        next_stack = align_to(next_stack, 8) + 8;
234                        ABIArgSlot::Stack {
235                            offset: (next_stack - 8) as i64,
236                            ty: ir::types::I64,
237                            extension: param.extension,
238                        }
239                    }
240                };
241                next_param_idx += 1;
242                args.push(ABIArg::ImplicitPtrArg {
243                    // NB: this is filled in after this loop
244                    offset: 0,
245                    pointer,
246                    ty: param.value_type,
247                    purpose: param.purpose,
248                });
249                continue;
250            }
251
252            // SystemV dictates that 128bit int parameters are always either
253            // passed in two registers or on the stack, so handle that as a
254            // special case before the loop below.
255            if param.value_type == types::I128
256                && args_or_rets == ArgsOrRets::Args
257                && call_conv == CallConv::SystemV
258            {
259                let mut slots = ABIArgSlotVec::new();
260                match (
261                    get_intreg_for_arg(CallConv::SystemV, next_gpr, next_param_idx),
262                    get_intreg_for_arg(CallConv::SystemV, next_gpr + 1, next_param_idx + 1),
263                ) {
264                    (Some(reg1), Some(reg2)) => {
265                        slots.push(ABIArgSlot::Reg {
266                            reg: reg1.to_real_reg().unwrap(),
267                            ty: ir::types::I64,
268                            extension: ir::ArgumentExtension::None,
269                        });
270                        slots.push(ABIArgSlot::Reg {
271                            reg: reg2.to_real_reg().unwrap(),
272                            ty: ir::types::I64,
273                            extension: ir::ArgumentExtension::None,
274                        });
275                    }
276                    _ => {
277                        let size = 16;
278
279                        // Align.
280                        next_stack = align_to(next_stack, size);
281
282                        slots.push(ABIArgSlot::Stack {
283                            offset: next_stack as i64,
284                            ty: ir::types::I64,
285                            extension: param.extension,
286                        });
287                        slots.push(ABIArgSlot::Stack {
288                            offset: next_stack as i64 + 8,
289                            ty: ir::types::I64,
290                            extension: param.extension,
291                        });
292                        next_stack += size;
293                    }
294                };
295                // Unconditionally increment next_gpr even when storing the
296                // argument on the stack to prevent reusing a possibly
297                // remaining register for the next argument.
298                next_gpr += 2;
299                next_param_idx += 2;
300
301                args.push(ABIArg::Slots {
302                    slots,
303                    purpose: param.purpose,
304                });
305                continue;
306            }
307
308            let mut slots = ABIArgSlotVec::new();
309            for (ix, (rc, reg_ty)) in rcs.iter().zip(reg_tys.iter()).enumerate() {
310                let last_slot = last_param && ix == rcs.len() - 1;
311
312                let intreg = *rc == RegClass::Int;
313                let nextreg = if intreg {
314                    match args_or_rets {
315                        ArgsOrRets::Args => get_intreg_for_arg(call_conv, next_gpr, next_param_idx),
316                        ArgsOrRets::Rets => {
317                            get_intreg_for_retval(call_conv, flags, next_gpr, last_slot)
318                        }
319                    }
320                } else {
321                    match args_or_rets {
322                        ArgsOrRets::Args => {
323                            get_fltreg_for_arg(call_conv, next_vreg, next_param_idx)
324                        }
325                        ArgsOrRets::Rets => get_fltreg_for_retval(call_conv, next_vreg, last_slot),
326                    }
327                };
328                next_param_idx += 1;
329                if let Some(reg) = nextreg {
330                    if intreg {
331                        next_gpr += 1;
332                    } else {
333                        next_vreg += 1;
334                    }
335                    slots.push(ABIArgSlot::Reg {
336                        reg: reg.to_real_reg().unwrap(),
337                        ty: *reg_ty,
338                        extension: param.extension,
339                    });
340                } else {
341                    if args_or_rets == ArgsOrRets::Rets && !flags.enable_multi_ret_implicit_sret() {
342                        return Err(crate::CodegenError::Unsupported(
343                            "Too many return values to fit in registers. \
344                            Use a StructReturn argument instead. (#9510)"
345                                .to_owned(),
346                        ));
347                    }
348
349                    let size = reg_ty.bytes();
350                    let size = if call_conv == CallConv::Winch
351                        && args_or_rets == ArgsOrRets::Rets
352                        && !uses_extension
353                    {
354                        size
355                    } else {
356                        let size = std::cmp::max(size, 8);
357
358                        // Align.
359                        debug_assert!(size.is_power_of_two());
360                        next_stack = align_to(next_stack, size);
361                        size
362                    };
363
364                    slots.push(ABIArgSlot::Stack {
365                        offset: next_stack as i64,
366                        ty: *reg_ty,
367                        extension: param.extension,
368                    });
369                    next_stack += size;
370                }
371            }
372
373            args.push(ABIArg::Slots {
374                slots,
375                purpose: param.purpose,
376            });
377        }
378
379        // Fastcall's indirect 128+ bit vector arguments are all located on the
380        // stack, and stack space is reserved after all parameters are passed,
381        // so allocate from the space now.
382        if args_or_rets == ArgsOrRets::Args && is_fastcall {
383            for arg in args.args_mut() {
384                if let ABIArg::ImplicitPtrArg { offset, .. } = arg {
385                    assert_eq!(*offset, 0);
386                    next_stack = align_to(next_stack, 16);
387                    *offset = next_stack as i64;
388                    next_stack += 16;
389                }
390            }
391        }
392        let extra_arg_idx = if let Some(ret_area_ptr) = ret_area_ptr {
393            args.push_non_formal(ret_area_ptr);
394            Some(args.args().len() - 1)
395        } else {
396            None
397        };
398
399        // Winch writes the first result to the highest offset, so we need to iterate through the
400        // args and adjust the offsets down.
401        if call_conv == CallConv::Winch && args_or_rets == ArgsOrRets::Rets {
402            winch::reverse_stack(args, next_stack, uses_extension);
403        }
404
405        next_stack = align_to(next_stack, 16);
406
407        Ok((next_stack, extra_arg_idx))
408    }
409
410    fn gen_load_stack(mem: StackAMode, into_reg: Writable<Reg>, ty: Type) -> Self::I {
411        // For integer-typed values, we always load a full 64 bits (and we always spill a full 64
412        // bits as well -- see `Inst::store()`).
413        let ty = match ty {
414            types::I8 | types::I16 | types::I32 => types::I64,
415            // Stack slots are always at least 8 bytes, so it's fine to load 4 bytes instead of only
416            // two.
417            types::F16 => types::F32,
418            _ => ty,
419        };
420        Inst::load(ty, mem, into_reg, ExtKind::None)
421    }
422
423    fn gen_store_stack(mem: StackAMode, from_reg: Reg, ty: Type) -> Self::I {
424        let ty = match ty {
425            // See `gen_load_stack`.
426            types::F16 => types::F32,
427            _ => ty,
428        };
429        Inst::store(ty, from_reg, mem)
430    }
431
432    fn gen_move(to_reg: Writable<Reg>, from_reg: Reg, ty: Type) -> Self::I {
433        Inst::gen_move(to_reg, from_reg, ty)
434    }
435
436    /// Generate an integer-extend operation.
437    fn gen_extend(
438        to_reg: Writable<Reg>,
439        from_reg: Reg,
440        is_signed: bool,
441        from_bits: u8,
442        to_bits: u8,
443    ) -> Self::I {
444        let ext_mode = ExtMode::new(from_bits as u16, to_bits as u16)
445            .unwrap_or_else(|| panic!("invalid extension: {from_bits} -> {to_bits}"));
446        if is_signed {
447            Inst::movsx_rm_r(ext_mode, RegMem::reg(from_reg), to_reg)
448        } else {
449            Inst::movzx_rm_r(ext_mode, RegMem::reg(from_reg), to_reg)
450        }
451    }
452
453    fn gen_args(args: Vec<ArgPair>) -> Inst {
454        Inst::Args { args }
455    }
456
457    fn gen_rets(rets: Vec<RetPair>) -> Inst {
458        Inst::Rets { rets }
459    }
460
461    fn gen_add_imm(
462        _call_conv: isa::CallConv,
463        into_reg: Writable<Reg>,
464        from_reg: Reg,
465        imm: u32,
466    ) -> SmallInstVec<Self::I> {
467        let mut ret = SmallVec::new();
468        if from_reg != into_reg.to_reg() {
469            ret.push(Inst::gen_move(into_reg, from_reg, I64));
470        }
471        ret.push(Inst::alu_rmi_r(
472            OperandSize::Size64,
473            AluRmiROpcode::Add,
474            RegMemImm::imm(imm),
475            into_reg,
476        ));
477        ret
478    }
479
480    fn gen_stack_lower_bound_trap(limit_reg: Reg) -> SmallInstVec<Self::I> {
481        smallvec![
482            Inst::cmp_rmi_r(OperandSize::Size64, limit_reg, RegMemImm::reg(regs::rsp())),
483            Inst::TrapIf {
484                // NBE == "> unsigned"; args above are reversed; this tests limit_reg > rsp.
485                cc: CC::NBE,
486                trap_code: TrapCode::STACK_OVERFLOW,
487            },
488        ]
489    }
490
491    fn gen_get_stack_addr(mem: StackAMode, into_reg: Writable<Reg>) -> Self::I {
492        let mem: SyntheticAmode = mem.into();
493        Inst::lea(mem, into_reg)
494    }
495
496    fn get_stacklimit_reg(_call_conv: isa::CallConv) -> Reg {
497        // As per comment on trait definition, we must return a caller-save
498        // register that is not used as an argument here.
499        debug_assert!(!is_callee_save_systemv(
500            regs::r10().to_real_reg().unwrap(),
501            false
502        ));
503        regs::r10()
504    }
505
506    fn gen_load_base_offset(into_reg: Writable<Reg>, base: Reg, offset: i32, ty: Type) -> Self::I {
507        // Only ever used for I64s and vectors; if that changes, see if the
508        // ExtKind below needs to be changed.
509        assert!(ty == I64 || ty.is_vector());
510        let mem = Amode::imm_reg(offset, base);
511        Inst::load(ty, mem, into_reg, ExtKind::None)
512    }
513
514    fn gen_store_base_offset(base: Reg, offset: i32, from_reg: Reg, ty: Type) -> Self::I {
515        let ty = match ty {
516            // See `gen_load_stack`.
517            types::F16 => types::F32,
518            _ => ty,
519        };
520        let mem = Amode::imm_reg(offset, base);
521        Inst::store(ty, from_reg, mem)
522    }
523
524    fn gen_sp_reg_adjust(amount: i32) -> SmallInstVec<Self::I> {
525        let (alu_op, amount) = if amount >= 0 {
526            (AluRmiROpcode::Add, amount)
527        } else {
528            (AluRmiROpcode::Sub, -amount)
529        };
530
531        let amount = amount as u32;
532
533        smallvec![Inst::alu_rmi_r(
534            OperandSize::Size64,
535            alu_op,
536            RegMemImm::imm(amount),
537            Writable::from_reg(regs::rsp()),
538        )]
539    }
540
541    fn gen_prologue_frame_setup(
542        _call_conv: isa::CallConv,
543        flags: &settings::Flags,
544        _isa_flags: &x64_settings::Flags,
545        frame_layout: &FrameLayout,
546    ) -> SmallInstVec<Self::I> {
547        let r_rsp = regs::rsp();
548        let r_rbp = regs::rbp();
549        let w_rbp = Writable::from_reg(r_rbp);
550        let mut insts = SmallVec::new();
551        // `push %rbp`
552        // RSP before the call will be 0 % 16.  So here, it is 8 % 16.
553        insts.push(Inst::push64(RegMemImm::reg(r_rbp)));
554
555        if flags.unwind_info() {
556            insts.push(Inst::Unwind {
557                inst: UnwindInst::PushFrameRegs {
558                    offset_upward_to_caller_sp: frame_layout.setup_area_size,
559                },
560            });
561        }
562
563        // `mov %rsp, %rbp`
564        // RSP is now 0 % 16
565        insts.push(Inst::mov_r_r(OperandSize::Size64, r_rsp, w_rbp));
566
567        insts
568    }
569
570    fn gen_epilogue_frame_restore(
571        _call_conv: isa::CallConv,
572        _flags: &settings::Flags,
573        _isa_flags: &x64_settings::Flags,
574        _frame_layout: &FrameLayout,
575    ) -> SmallInstVec<Self::I> {
576        let mut insts = SmallVec::new();
577        // `mov %rbp, %rsp`
578        insts.push(Inst::mov_r_r(
579            OperandSize::Size64,
580            regs::rbp(),
581            Writable::from_reg(regs::rsp()),
582        ));
583        // `pop %rbp`
584        insts.push(Inst::pop64(Writable::from_reg(regs::rbp())));
585        insts
586    }
587
588    fn gen_return(
589        call_conv: CallConv,
590        _isa_flags: &x64_settings::Flags,
591        frame_layout: &FrameLayout,
592    ) -> SmallInstVec<Self::I> {
593        // Emit return instruction.
594        let stack_bytes_to_pop = if call_conv == CallConv::Tail {
595            frame_layout.tail_args_size
596        } else {
597            0
598        };
599        smallvec![Inst::ret(stack_bytes_to_pop)]
600    }
601
602    fn gen_probestack(insts: &mut SmallInstVec<Self::I>, frame_size: u32) {
603        insts.push(Inst::imm(
604            OperandSize::Size32,
605            frame_size as u64,
606            Writable::from_reg(regs::rax()),
607        ));
608        insts.push(Inst::CallKnown {
609            // No need to include arg here: we are post-regalloc
610            // so no constraints will be seen anyway.
611            info: Box::new(CallInfo::empty(
612                ExternalName::LibCall(LibCall::Probestack),
613                CallConv::Probestack,
614            )),
615        });
616    }
617
618    fn gen_inline_probestack(
619        insts: &mut SmallInstVec<Self::I>,
620        call_conv: isa::CallConv,
621        frame_size: u32,
622        guard_size: u32,
623    ) {
624        // Unroll at most n consecutive probes, before falling back to using a loop
625        //
626        // This was number was picked because the loop version is 38 bytes long. We can fit
627        // 4 inline probes in that space, so unroll if its beneficial in terms of code size.
628        const PROBE_MAX_UNROLL: u32 = 4;
629
630        // Calculate how many probes we need to perform. Round down, as we only
631        // need to probe whole guard_size regions we'd otherwise skip over.
632        let probe_count = frame_size / guard_size;
633        if probe_count == 0 {
634            // No probe necessary
635        } else if probe_count <= PROBE_MAX_UNROLL {
636            Self::gen_probestack_unroll(insts, guard_size, probe_count)
637        } else {
638            Self::gen_probestack_loop(insts, call_conv, frame_size, guard_size)
639        }
640    }
641
642    fn gen_clobber_save(
643        _call_conv: isa::CallConv,
644        flags: &settings::Flags,
645        frame_layout: &FrameLayout,
646    ) -> SmallVec<[Self::I; 16]> {
647        let mut insts = SmallVec::new();
648
649        // When a return_call within this function required more stack arguments than we have
650        // present, resize the incoming argument area of the frame to accommodate those arguments.
651        let incoming_args_diff = frame_layout.tail_args_size - frame_layout.incoming_args_size;
652        if incoming_args_diff > 0 {
653            // Decrement the stack pointer to make space for the new arguments
654            insts.push(Inst::alu_rmi_r(
655                OperandSize::Size64,
656                AluRmiROpcode::Sub,
657                RegMemImm::imm(incoming_args_diff),
658                Writable::from_reg(regs::rsp()),
659            ));
660
661            // Make sure to keep the frame pointer and stack pointer in sync at this point
662            insts.push(Inst::mov_r_r(
663                OperandSize::Size64,
664                regs::rsp(),
665                Writable::from_reg(regs::rbp()),
666            ));
667
668            let incoming_args_diff = i32::try_from(incoming_args_diff).unwrap();
669
670            // Move the saved frame pointer down by `incoming_args_diff`
671            insts.push(Inst::mov64_m_r(
672                Amode::imm_reg(incoming_args_diff, regs::rsp()),
673                Writable::from_reg(regs::r11()),
674            ));
675            insts.push(Inst::mov_r_m(
676                OperandSize::Size64,
677                regs::r11(),
678                Amode::imm_reg(0, regs::rsp()),
679            ));
680
681            // Move the saved return address down by `incoming_args_diff`
682            insts.push(Inst::mov64_m_r(
683                Amode::imm_reg(incoming_args_diff + 8, regs::rsp()),
684                Writable::from_reg(regs::r11()),
685            ));
686            insts.push(Inst::mov_r_m(
687                OperandSize::Size64,
688                regs::r11(),
689                Amode::imm_reg(8, regs::rsp()),
690            ));
691        }
692
693        // We need to factor `incoming_args_diff` into the offset upward here, as we have grown
694        // the argument area -- `setup_area_size` alone will not be the correct offset up to the
695        // original caller's SP.
696        let offset_upward_to_caller_sp = frame_layout.setup_area_size + incoming_args_diff;
697        if flags.unwind_info() && offset_upward_to_caller_sp > 0 {
698            // Emit unwind info: start the frame. The frame (from unwind
699            // consumers' point of view) starts at clobbbers, just below
700            // the FP and return address. Spill slots and stack slots are
701            // part of our actual frame but do not concern the unwinder.
702            insts.push(Inst::Unwind {
703                inst: UnwindInst::DefineNewFrame {
704                    offset_downward_to_clobbers: frame_layout.clobber_size,
705                    offset_upward_to_caller_sp,
706                },
707            });
708        }
709
710        // Adjust the stack pointer downward for clobbers and the function fixed
711        // frame (spillslots, storage slots, and argument area).
712        let stack_size = frame_layout.fixed_frame_storage_size
713            + frame_layout.clobber_size
714            + frame_layout.outgoing_args_size;
715        if stack_size > 0 {
716            insts.push(Inst::alu_rmi_r(
717                OperandSize::Size64,
718                AluRmiROpcode::Sub,
719                RegMemImm::imm(stack_size),
720                Writable::from_reg(regs::rsp()),
721            ));
722        }
723
724        // Store each clobbered register in order at offsets from RSP,
725        // placing them above the fixed frame slots.
726        let clobber_offset =
727            frame_layout.fixed_frame_storage_size + frame_layout.outgoing_args_size;
728        let mut cur_offset = 0;
729        for reg in &frame_layout.clobbered_callee_saves {
730            let r_reg = reg.to_reg();
731            let ty = match r_reg.class() {
732                RegClass::Int => types::I64,
733                RegClass::Float => types::I8X16,
734                RegClass::Vector => unreachable!(),
735            };
736
737            // Align to 8 or 16 bytes as required by the storage type of the clobber.
738            cur_offset = align_to(cur_offset, ty.bytes());
739            let off = cur_offset;
740            cur_offset += ty.bytes();
741
742            insts.push(Inst::store(
743                ty,
744                r_reg.into(),
745                Amode::imm_reg(i32::try_from(off + clobber_offset).unwrap(), regs::rsp()),
746            ));
747
748            if flags.unwind_info() {
749                insts.push(Inst::Unwind {
750                    inst: UnwindInst::SaveReg {
751                        clobber_offset: off,
752                        reg: r_reg,
753                    },
754                });
755            }
756        }
757
758        insts
759    }
760
761    fn gen_clobber_restore(
762        _call_conv: isa::CallConv,
763        _flags: &settings::Flags,
764        frame_layout: &FrameLayout,
765    ) -> SmallVec<[Self::I; 16]> {
766        let mut insts = SmallVec::new();
767
768        // Restore regs by loading from offsets of RSP. We compute the offset from
769        // the same base as above in clobber_save, as RSP won't change between the
770        // prologue and epilogue.
771        let mut cur_offset =
772            frame_layout.fixed_frame_storage_size + frame_layout.outgoing_args_size;
773        for reg in &frame_layout.clobbered_callee_saves {
774            let rreg = reg.to_reg();
775            let ty = match rreg.class() {
776                RegClass::Int => types::I64,
777                RegClass::Float => types::I8X16,
778                RegClass::Vector => unreachable!(),
779            };
780
781            // Align to 8 or 16 bytes as required by the storage type of the clobber.
782            cur_offset = align_to(cur_offset, ty.bytes());
783
784            insts.push(Inst::load(
785                ty,
786                Amode::imm_reg(cur_offset.try_into().unwrap(), regs::rsp()),
787                Writable::from_reg(rreg.into()),
788                ExtKind::None,
789            ));
790
791            cur_offset += ty.bytes();
792        }
793
794        let stack_size = frame_layout.fixed_frame_storage_size
795            + frame_layout.clobber_size
796            + frame_layout.outgoing_args_size;
797
798        // Adjust RSP back upward.
799        if stack_size > 0 {
800            insts.push(Inst::alu_rmi_r(
801                OperandSize::Size64,
802                AluRmiROpcode::Add,
803                RegMemImm::imm(stack_size),
804                Writable::from_reg(regs::rsp()),
805            ));
806        }
807
808        insts
809    }
810
811    /// Generate a call instruction/sequence.
812    fn gen_call(dest: &CallDest, tmp: Writable<Reg>, info: CallInfo<()>) -> SmallVec<[Self::I; 2]> {
813        let mut insts = SmallVec::new();
814        match dest {
815            &CallDest::ExtName(ref name, RelocDistance::Near) => {
816                let info = Box::new(info.map(|()| name.clone()));
817                insts.push(Inst::call_known(info));
818            }
819            &CallDest::ExtName(ref name, RelocDistance::Far) => {
820                insts.push(Inst::LoadExtName {
821                    dst: tmp,
822                    name: Box::new(name.clone()),
823                    offset: 0,
824                    distance: RelocDistance::Far,
825                });
826                let info = Box::new(info.map(|()| RegMem::reg(tmp.to_reg())));
827                insts.push(Inst::call_unknown(info));
828            }
829            &CallDest::Reg(reg) => {
830                let info = Box::new(info.map(|()| RegMem::reg(reg)));
831                insts.push(Inst::call_unknown(info));
832            }
833        }
834        insts
835    }
836
837    fn gen_memcpy<F: FnMut(Type) -> Writable<Reg>>(
838        call_conv: isa::CallConv,
839        dst: Reg,
840        src: Reg,
841        size: usize,
842        mut alloc_tmp: F,
843    ) -> SmallVec<[Self::I; 8]> {
844        let mut insts = SmallVec::new();
845        let arg0 = get_intreg_for_arg(call_conv, 0, 0).unwrap();
846        let arg1 = get_intreg_for_arg(call_conv, 1, 1).unwrap();
847        let arg2 = get_intreg_for_arg(call_conv, 2, 2).unwrap();
848        let temp = alloc_tmp(Self::word_type());
849        let temp2 = alloc_tmp(Self::word_type());
850        insts.push(Inst::imm(OperandSize::Size64, size as u64, temp));
851        // We use an indirect call and a full LoadExtName because we do not have
852        // information about the libcall `RelocDistance` here, so we
853        // conservatively use the more flexible calling sequence.
854        insts.push(Inst::LoadExtName {
855            dst: temp2,
856            name: Box::new(ExternalName::LibCall(LibCall::Memcpy)),
857            offset: 0,
858            distance: RelocDistance::Far,
859        });
860        let callee_pop_size = 0;
861        insts.push(Inst::call_unknown(Box::new(CallInfo {
862            dest: RegMem::reg(temp2.to_reg()),
863            uses: smallvec![
864                CallArgPair {
865                    vreg: dst,
866                    preg: arg0
867                },
868                CallArgPair {
869                    vreg: src,
870                    preg: arg1
871                },
872                CallArgPair {
873                    vreg: temp.to_reg(),
874                    preg: arg2
875                },
876            ],
877            defs: smallvec![],
878            clobbers: Self::get_regs_clobbered_by_call(call_conv),
879            callee_pop_size,
880            callee_conv: call_conv,
881            caller_conv: call_conv,
882        })));
883        insts
884    }
885
886    fn get_number_of_spillslots_for_value(
887        rc: RegClass,
888        vector_scale: u32,
889        _isa_flags: &Self::F,
890    ) -> u32 {
891        // We allocate in terms of 8-byte slots.
892        match rc {
893            RegClass::Int => 1,
894            RegClass::Float => vector_scale / 8,
895            RegClass::Vector => unreachable!(),
896        }
897    }
898
899    fn get_machine_env(flags: &settings::Flags, _call_conv: isa::CallConv) -> &MachineEnv {
900        if flags.enable_pinned_reg() {
901            static MACHINE_ENV: OnceLock<MachineEnv> = OnceLock::new();
902            MACHINE_ENV.get_or_init(|| create_reg_env_systemv(true))
903        } else {
904            static MACHINE_ENV: OnceLock<MachineEnv> = OnceLock::new();
905            MACHINE_ENV.get_or_init(|| create_reg_env_systemv(false))
906        }
907    }
908
909    fn get_regs_clobbered_by_call(call_conv_of_callee: isa::CallConv) -> PRegSet {
910        match call_conv_of_callee {
911            CallConv::Winch => ALL_CLOBBERS,
912            CallConv::WindowsFastcall => WINDOWS_CLOBBERS,
913            _ => SYSV_CLOBBERS,
914        }
915    }
916
917    fn get_ext_mode(
918        _call_conv: isa::CallConv,
919        specified: ir::ArgumentExtension,
920    ) -> ir::ArgumentExtension {
921        specified
922    }
923
924    fn compute_frame_layout(
925        call_conv: CallConv,
926        flags: &settings::Flags,
927        _sig: &Signature,
928        regs: &[Writable<RealReg>],
929        _is_leaf: bool,
930        incoming_args_size: u32,
931        tail_args_size: u32,
932        fixed_frame_storage_size: u32,
933        outgoing_args_size: u32,
934    ) -> FrameLayout {
935        debug_assert!(tail_args_size >= incoming_args_size);
936
937        let mut regs: Vec<Writable<RealReg>> = match call_conv {
938            // The `winch` calling convention doesn't have any callee-save
939            // registers.
940            CallConv::Winch => vec![],
941            CallConv::Fast | CallConv::Cold | CallConv::SystemV | CallConv::Tail => regs
942                .iter()
943                .cloned()
944                .filter(|r| is_callee_save_systemv(r.to_reg(), flags.enable_pinned_reg()))
945                .collect(),
946            CallConv::WindowsFastcall => regs
947                .iter()
948                .cloned()
949                .filter(|r| is_callee_save_fastcall(r.to_reg(), flags.enable_pinned_reg()))
950                .collect(),
951            CallConv::Probestack => todo!("probestack?"),
952            CallConv::AppleAarch64 => unreachable!(),
953        };
954        // Sort registers for deterministic code output. We can do an unstable sort because the
955        // registers will be unique (there are no dups).
956        regs.sort_unstable();
957
958        // Compute clobber size.
959        let clobber_size = compute_clobber_size(&regs);
960
961        // Compute setup area size.
962        let setup_area_size = 16; // RBP, return address
963
964        // Return FrameLayout structure.
965        FrameLayout {
966            incoming_args_size,
967            tail_args_size: align_to(tail_args_size, 16),
968            setup_area_size,
969            clobber_size,
970            fixed_frame_storage_size,
971            outgoing_args_size,
972            clobbered_callee_saves: regs,
973        }
974    }
975}
976
977impl X64CallSite {
978    pub fn emit_return_call(
979        mut self,
980        ctx: &mut Lower<Inst>,
981        args: isle::ValueSlice,
982        _backend: &X64Backend,
983    ) {
984        let new_stack_arg_size =
985            u32::try_from(self.sig(ctx.sigs()).sized_stack_arg_space()).unwrap();
986
987        ctx.abi_mut().accumulate_tail_args_size(new_stack_arg_size);
988
989        // Put all arguments in registers and stack slots (within that newly
990        // allocated stack space).
991        self.emit_args(ctx, args);
992        self.emit_stack_ret_arg_for_tail_call(ctx);
993
994        // Finally, do the actual tail call!
995        let dest = self.dest().clone();
996        let uses = self.take_uses();
997        let tmp = ctx.temp_writable_gpr();
998        match dest {
999            CallDest::ExtName(callee, RelocDistance::Near) => {
1000                let info = Box::new(ReturnCallInfo {
1001                    dest: callee,
1002                    uses,
1003                    tmp,
1004                    new_stack_arg_size,
1005                });
1006                ctx.emit(Inst::ReturnCallKnown { info });
1007            }
1008            CallDest::ExtName(callee, RelocDistance::Far) => {
1009                let tmp2 = ctx.temp_writable_gpr();
1010                ctx.emit(Inst::LoadExtName {
1011                    dst: tmp2.to_writable_reg(),
1012                    name: Box::new(callee),
1013                    offset: 0,
1014                    distance: RelocDistance::Far,
1015                });
1016                let info = Box::new(ReturnCallInfo {
1017                    dest: tmp2.to_reg().to_reg().into(),
1018                    uses,
1019                    tmp,
1020                    new_stack_arg_size,
1021                });
1022                ctx.emit(Inst::ReturnCallUnknown { info });
1023            }
1024            CallDest::Reg(callee) => {
1025                let info = Box::new(ReturnCallInfo {
1026                    dest: callee.into(),
1027                    uses,
1028                    tmp,
1029                    new_stack_arg_size,
1030                });
1031                ctx.emit(Inst::ReturnCallUnknown { info });
1032            }
1033        }
1034    }
1035}
1036
1037impl From<StackAMode> for SyntheticAmode {
1038    fn from(amode: StackAMode) -> Self {
1039        // We enforce a 128 MB stack-frame size limit above, so these
1040        // `expect()`s should never fail.
1041        match amode {
1042            StackAMode::IncomingArg(off, stack_args_size) => {
1043                let offset = u32::try_from(off).expect(
1044                    "Offset in IncomingArg is greater than 4GB; should hit impl limit first",
1045                );
1046                SyntheticAmode::IncomingArg {
1047                    offset: stack_args_size - offset,
1048                }
1049            }
1050            StackAMode::Slot(off) => {
1051                let off = i32::try_from(off)
1052                    .expect("Offset in Slot is greater than 2GB; should hit impl limit first");
1053                SyntheticAmode::slot_offset(off)
1054            }
1055            StackAMode::OutgoingArg(off) => {
1056                let off = i32::try_from(off).expect(
1057                    "Offset in OutgoingArg is greater than 2GB; should hit impl limit first",
1058                );
1059                SyntheticAmode::Real(Amode::ImmReg {
1060                    simm32: off,
1061                    base: regs::rsp(),
1062                    flags: MemFlags::trusted(),
1063                })
1064            }
1065        }
1066    }
1067}
1068
1069fn get_intreg_for_arg(call_conv: CallConv, idx: usize, arg_idx: usize) -> Option<Reg> {
1070    let is_fastcall = call_conv == CallConv::WindowsFastcall;
1071
1072    // Fastcall counts by absolute argument number; SysV counts by argument of
1073    // this (integer) class.
1074    let i = if is_fastcall { arg_idx } else { idx };
1075    match (i, is_fastcall) {
1076        (0, false) => Some(regs::rdi()),
1077        (1, false) => Some(regs::rsi()),
1078        (2, false) => Some(regs::rdx()),
1079        (3, false) => Some(regs::rcx()),
1080        (4, false) => Some(regs::r8()),
1081        (5, false) => Some(regs::r9()),
1082        (0, true) => Some(regs::rcx()),
1083        (1, true) => Some(regs::rdx()),
1084        (2, true) => Some(regs::r8()),
1085        (3, true) => Some(regs::r9()),
1086        _ => None,
1087    }
1088}
1089
1090fn get_fltreg_for_arg(call_conv: CallConv, idx: usize, arg_idx: usize) -> Option<Reg> {
1091    let is_fastcall = call_conv == CallConv::WindowsFastcall;
1092
1093    // Fastcall counts by absolute argument number; SysV counts by argument of
1094    // this (floating-point) class.
1095    let i = if is_fastcall { arg_idx } else { idx };
1096    match (i, is_fastcall) {
1097        (0, false) => Some(regs::xmm0()),
1098        (1, false) => Some(regs::xmm1()),
1099        (2, false) => Some(regs::xmm2()),
1100        (3, false) => Some(regs::xmm3()),
1101        (4, false) => Some(regs::xmm4()),
1102        (5, false) => Some(regs::xmm5()),
1103        (6, false) => Some(regs::xmm6()),
1104        (7, false) => Some(regs::xmm7()),
1105        (0, true) => Some(regs::xmm0()),
1106        (1, true) => Some(regs::xmm1()),
1107        (2, true) => Some(regs::xmm2()),
1108        (3, true) => Some(regs::xmm3()),
1109        _ => None,
1110    }
1111}
1112
1113fn get_intreg_for_retval(
1114    call_conv: CallConv,
1115    flags: &settings::Flags,
1116    intreg_idx: usize,
1117    is_last: bool,
1118) -> Option<Reg> {
1119    match call_conv {
1120        CallConv::Tail => match intreg_idx {
1121            0 => Some(regs::rax()),
1122            1 => Some(regs::rcx()),
1123            2 => Some(regs::rdx()),
1124            3 => Some(regs::rsi()),
1125            4 => Some(regs::rdi()),
1126            5 => Some(regs::r8()),
1127            6 => Some(regs::r9()),
1128            7 => Some(regs::r10()),
1129            8 => Some(regs::r11()),
1130            // NB: `r15` is reserved as a scratch register.
1131            _ => None,
1132        },
1133        CallConv::Fast | CallConv::Cold | CallConv::SystemV => match intreg_idx {
1134            0 => Some(regs::rax()),
1135            1 => Some(regs::rdx()),
1136            2 if flags.enable_llvm_abi_extensions() => Some(regs::rcx()),
1137            _ => None,
1138        },
1139        CallConv::WindowsFastcall => match intreg_idx {
1140            0 => Some(regs::rax()),
1141            1 => Some(regs::rdx()), // The Rust ABI for i128s needs this.
1142            _ => None,
1143        },
1144
1145        CallConv::Winch => {
1146            // TODO: Once Winch supports SIMD, this will need to be updated to support values
1147            // returned in more than one register.
1148            // https://github.com/bytecodealliance/wasmtime/issues/8093
1149            is_last.then(|| regs::rax())
1150        }
1151        CallConv::Probestack => todo!(),
1152        CallConv::AppleAarch64 => unreachable!(),
1153    }
1154}
1155
1156fn get_fltreg_for_retval(call_conv: CallConv, fltreg_idx: usize, is_last: bool) -> Option<Reg> {
1157    match call_conv {
1158        CallConv::Tail => match fltreg_idx {
1159            0 => Some(regs::xmm0()),
1160            1 => Some(regs::xmm1()),
1161            2 => Some(regs::xmm2()),
1162            3 => Some(regs::xmm3()),
1163            4 => Some(regs::xmm4()),
1164            5 => Some(regs::xmm5()),
1165            6 => Some(regs::xmm6()),
1166            7 => Some(regs::xmm7()),
1167            _ => None,
1168        },
1169        CallConv::Fast | CallConv::Cold | CallConv::SystemV => match fltreg_idx {
1170            0 => Some(regs::xmm0()),
1171            1 => Some(regs::xmm1()),
1172            _ => None,
1173        },
1174        CallConv::WindowsFastcall => match fltreg_idx {
1175            0 => Some(regs::xmm0()),
1176            _ => None,
1177        },
1178        CallConv::Winch => is_last.then(|| regs::xmm0()),
1179        CallConv::Probestack => todo!(),
1180        CallConv::AppleAarch64 => unreachable!(),
1181    }
1182}
1183
1184fn is_callee_save_systemv(r: RealReg, enable_pinned_reg: bool) -> bool {
1185    use regs::*;
1186    match r.class() {
1187        RegClass::Int => match r.hw_enc() {
1188            ENC_RBX | ENC_RBP | ENC_R12 | ENC_R13 | ENC_R14 => true,
1189            // R15 is the pinned register; if we're using it that way,
1190            // it is effectively globally-allocated, and is not
1191            // callee-saved.
1192            ENC_R15 => !enable_pinned_reg,
1193            _ => false,
1194        },
1195        RegClass::Float => false,
1196        RegClass::Vector => unreachable!(),
1197    }
1198}
1199
1200fn is_callee_save_fastcall(r: RealReg, enable_pinned_reg: bool) -> bool {
1201    use regs::*;
1202    match r.class() {
1203        RegClass::Int => match r.hw_enc() {
1204            ENC_RBX | ENC_RBP | ENC_RSI | ENC_RDI | ENC_R12 | ENC_R13 | ENC_R14 => true,
1205            // See above for SysV: we must treat the pinned reg specially.
1206            ENC_R15 => !enable_pinned_reg,
1207            _ => false,
1208        },
1209        RegClass::Float => match r.hw_enc() {
1210            6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 => true,
1211            _ => false,
1212        },
1213        RegClass::Vector => unreachable!(),
1214    }
1215}
1216
1217fn compute_clobber_size(clobbers: &[Writable<RealReg>]) -> u32 {
1218    let mut clobbered_size = 0;
1219    for reg in clobbers {
1220        match reg.to_reg().class() {
1221            RegClass::Int => {
1222                clobbered_size += 8;
1223            }
1224            RegClass::Float => {
1225                clobbered_size = align_to(clobbered_size, 16);
1226                clobbered_size += 16;
1227            }
1228            RegClass::Vector => unreachable!(),
1229        }
1230    }
1231    align_to(clobbered_size, 16)
1232}
1233
1234const WINDOWS_CLOBBERS: PRegSet = windows_clobbers();
1235const SYSV_CLOBBERS: PRegSet = sysv_clobbers();
1236pub(crate) const ALL_CLOBBERS: PRegSet = all_clobbers();
1237
1238const fn windows_clobbers() -> PRegSet {
1239    PRegSet::empty()
1240        .with(regs::gpr_preg(regs::ENC_RAX))
1241        .with(regs::gpr_preg(regs::ENC_RCX))
1242        .with(regs::gpr_preg(regs::ENC_RDX))
1243        .with(regs::gpr_preg(regs::ENC_R8))
1244        .with(regs::gpr_preg(regs::ENC_R9))
1245        .with(regs::gpr_preg(regs::ENC_R10))
1246        .with(regs::gpr_preg(regs::ENC_R11))
1247        .with(regs::fpr_preg(0))
1248        .with(regs::fpr_preg(1))
1249        .with(regs::fpr_preg(2))
1250        .with(regs::fpr_preg(3))
1251        .with(regs::fpr_preg(4))
1252        .with(regs::fpr_preg(5))
1253}
1254
1255const fn sysv_clobbers() -> PRegSet {
1256    PRegSet::empty()
1257        .with(regs::gpr_preg(regs::ENC_RAX))
1258        .with(regs::gpr_preg(regs::ENC_RCX))
1259        .with(regs::gpr_preg(regs::ENC_RDX))
1260        .with(regs::gpr_preg(regs::ENC_RSI))
1261        .with(regs::gpr_preg(regs::ENC_RDI))
1262        .with(regs::gpr_preg(regs::ENC_R8))
1263        .with(regs::gpr_preg(regs::ENC_R9))
1264        .with(regs::gpr_preg(regs::ENC_R10))
1265        .with(regs::gpr_preg(regs::ENC_R11))
1266        .with(regs::fpr_preg(0))
1267        .with(regs::fpr_preg(1))
1268        .with(regs::fpr_preg(2))
1269        .with(regs::fpr_preg(3))
1270        .with(regs::fpr_preg(4))
1271        .with(regs::fpr_preg(5))
1272        .with(regs::fpr_preg(6))
1273        .with(regs::fpr_preg(7))
1274        .with(regs::fpr_preg(8))
1275        .with(regs::fpr_preg(9))
1276        .with(regs::fpr_preg(10))
1277        .with(regs::fpr_preg(11))
1278        .with(regs::fpr_preg(12))
1279        .with(regs::fpr_preg(13))
1280        .with(regs::fpr_preg(14))
1281        .with(regs::fpr_preg(15))
1282}
1283
1284/// For calling conventions that clobber all registers.
1285const fn all_clobbers() -> PRegSet {
1286    PRegSet::empty()
1287        .with(regs::gpr_preg(regs::ENC_RAX))
1288        .with(regs::gpr_preg(regs::ENC_RCX))
1289        .with(regs::gpr_preg(regs::ENC_RDX))
1290        .with(regs::gpr_preg(regs::ENC_RBX))
1291        .with(regs::gpr_preg(regs::ENC_RSI))
1292        .with(regs::gpr_preg(regs::ENC_RDI))
1293        .with(regs::gpr_preg(regs::ENC_R8))
1294        .with(regs::gpr_preg(regs::ENC_R9))
1295        .with(regs::gpr_preg(regs::ENC_R10))
1296        .with(regs::gpr_preg(regs::ENC_R11))
1297        .with(regs::gpr_preg(regs::ENC_R12))
1298        .with(regs::gpr_preg(regs::ENC_R13))
1299        .with(regs::gpr_preg(regs::ENC_R14))
1300        .with(regs::gpr_preg(regs::ENC_R15))
1301        .with(regs::fpr_preg(0))
1302        .with(regs::fpr_preg(1))
1303        .with(regs::fpr_preg(2))
1304        .with(regs::fpr_preg(3))
1305        .with(regs::fpr_preg(4))
1306        .with(regs::fpr_preg(5))
1307        .with(regs::fpr_preg(6))
1308        .with(regs::fpr_preg(7))
1309        .with(regs::fpr_preg(8))
1310        .with(regs::fpr_preg(9))
1311        .with(regs::fpr_preg(10))
1312        .with(regs::fpr_preg(11))
1313        .with(regs::fpr_preg(12))
1314        .with(regs::fpr_preg(13))
1315        .with(regs::fpr_preg(14))
1316        .with(regs::fpr_preg(15))
1317}
1318
1319fn create_reg_env_systemv(enable_pinned_reg: bool) -> MachineEnv {
1320    fn preg(r: Reg) -> PReg {
1321        r.to_real_reg().unwrap().into()
1322    }
1323
1324    let mut env = MachineEnv {
1325        preferred_regs_by_class: [
1326            // Preferred GPRs: caller-saved in the SysV ABI.
1327            vec![
1328                preg(regs::rsi()),
1329                preg(regs::rdi()),
1330                preg(regs::rax()),
1331                preg(regs::rcx()),
1332                preg(regs::rdx()),
1333                preg(regs::r8()),
1334                preg(regs::r9()),
1335                preg(regs::r10()),
1336                preg(regs::r11()),
1337            ],
1338            // Preferred XMMs: the first 8, which can have smaller encodings
1339            // with AVX instructions.
1340            vec![
1341                preg(regs::xmm0()),
1342                preg(regs::xmm1()),
1343                preg(regs::xmm2()),
1344                preg(regs::xmm3()),
1345                preg(regs::xmm4()),
1346                preg(regs::xmm5()),
1347                preg(regs::xmm6()),
1348                preg(regs::xmm7()),
1349            ],
1350            // The Vector Regclass is unused
1351            vec![],
1352        ],
1353        non_preferred_regs_by_class: [
1354            // Non-preferred GPRs: callee-saved in the SysV ABI.
1355            vec![
1356                preg(regs::rbx()),
1357                preg(regs::r12()),
1358                preg(regs::r13()),
1359                preg(regs::r14()),
1360            ],
1361            // Non-preferred XMMs: the last 8 registers, which can have larger
1362            // encodings with AVX instructions.
1363            vec![
1364                preg(regs::xmm8()),
1365                preg(regs::xmm9()),
1366                preg(regs::xmm10()),
1367                preg(regs::xmm11()),
1368                preg(regs::xmm12()),
1369                preg(regs::xmm13()),
1370                preg(regs::xmm14()),
1371                preg(regs::xmm15()),
1372            ],
1373            // The Vector Regclass is unused
1374            vec![],
1375        ],
1376        fixed_stack_slots: vec![],
1377        scratch_by_class: [None, None, None],
1378    };
1379
1380    debug_assert_eq!(regs::r15(), regs::pinned_reg());
1381    if !enable_pinned_reg {
1382        env.non_preferred_regs_by_class[0].push(preg(regs::r15()));
1383    }
1384
1385    env
1386}