cranelift_codegen/isa/x64/inst/
mod.rs

1//! This module defines x86_64-specific machine instruction types.
2
3pub use emit_state::EmitState;
4
5use crate::binemit::{Addend, CodeOffset, Reloc};
6use crate::ir::{ExternalName, LibCall, TrapCode, Type, types};
7use crate::isa::x64::abi::X64ABIMachineSpec;
8use crate::isa::x64::inst::regs::{pretty_print_reg, show_ireg_sized};
9use crate::isa::x64::settings as x64_settings;
10use crate::isa::{CallConv, FunctionAlignment};
11use crate::{CodegenError, CodegenResult, settings};
12use crate::{machinst::*, trace};
13use alloc::boxed::Box;
14use alloc::vec::Vec;
15use core::slice;
16use cranelift_assembler_x64 as asm;
17use smallvec::{SmallVec, smallvec};
18use std::fmt::{self, Write};
19use std::string::{String, ToString};
20
21pub mod args;
22mod emit;
23mod emit_state;
24#[cfg(test)]
25mod emit_tests;
26pub mod external;
27pub mod regs;
28mod stack_switch;
29pub mod unwind;
30
31use args::*;
32
33//=============================================================================
34// Instructions (top level): definition
35
36// `Inst` is defined inside ISLE as `MInst`. We publicly re-export it here.
37pub use super::lower::isle::generated_code::AtomicRmwSeqOp;
38pub use super::lower::isle::generated_code::MInst as Inst;
39
40/// Out-of-line data for return-calls, to keep the size of `Inst` down.
41#[derive(Clone, Debug)]
42pub struct ReturnCallInfo<T> {
43    /// Where this call is going.
44    pub dest: T,
45
46    /// The size of the argument area for this return-call, potentially smaller than that of the
47    /// caller, but never larger.
48    pub new_stack_arg_size: u32,
49
50    /// The in-register arguments and their constraints.
51    pub uses: CallArgList,
52
53    /// A temporary for use when moving the return address.
54    pub tmp: WritableGpr,
55}
56
57#[test]
58#[cfg(target_pointer_width = "64")]
59fn inst_size_test() {
60    // This test will help with unintentionally growing the size
61    // of the Inst enum.
62    assert_eq!(48, std::mem::size_of::<Inst>());
63}
64
65pub(crate) fn low32_will_sign_extend_to_64(x: u64) -> bool {
66    let xs = x as i64;
67    xs == ((xs << 32) >> 32)
68}
69
70impl Inst {
71    /// Retrieve a list of ISA feature sets in which the instruction is available. An empty list
72    /// indicates that the instruction is available in the baseline feature set (i.e. SSE2 and
73    /// below); more than one `InstructionSet` in the list indicates that the instruction is present
74    /// *any* of the included ISA feature sets.
75    fn available_in_any_isa(&self) -> SmallVec<[InstructionSet; 2]> {
76        match self {
77            // These instructions are part of SSE2, which is a basic requirement in Cranelift, and
78            // don't have to be checked.
79            Inst::AtomicRmwSeq { .. }
80            | Inst::CallKnown { .. }
81            | Inst::CallUnknown { .. }
82            | Inst::ReturnCallKnown { .. }
83            | Inst::ReturnCallUnknown { .. }
84            | Inst::CheckedSRemSeq { .. }
85            | Inst::CheckedSRemSeq8 { .. }
86            | Inst::Cmove { .. }
87            | Inst::CmpRmiR { .. }
88            | Inst::CvtFloatToSintSeq { .. }
89            | Inst::CvtFloatToUintSeq { .. }
90            | Inst::CvtUint64ToFloatSeq { .. }
91            | Inst::Fence { .. }
92            | Inst::Hlt
93            | Inst::Imm { .. }
94            | Inst::JmpCond { .. }
95            | Inst::JmpCondOr { .. }
96            | Inst::WinchJmpIf { .. }
97            | Inst::JmpKnown { .. }
98            | Inst::JmpTableSeq { .. }
99            | Inst::JmpUnknown { .. }
100            | Inst::LoadEffectiveAddress { .. }
101            | Inst::LoadExtName { .. }
102            | Inst::LockCmpxchg { .. }
103            | Inst::LockXadd { .. }
104            | Inst::Xchg { .. }
105            | Inst::MovImmM { .. }
106            | Inst::MovRM { .. }
107            | Inst::MovRR { .. }
108            | Inst::MovFromPReg { .. }
109            | Inst::MovToPReg { .. }
110            | Inst::Nop { .. }
111            | Inst::Pop64 { .. }
112            | Inst::Push64 { .. }
113            | Inst::StackProbeLoop { .. }
114            | Inst::Args { .. }
115            | Inst::Rets { .. }
116            | Inst::Ret { .. }
117            | Inst::Setcc { .. }
118            | Inst::StackSwitchBasic { .. }
119            | Inst::TrapIf { .. }
120            | Inst::TrapIfAnd { .. }
121            | Inst::TrapIfOr { .. }
122            | Inst::Ud2 { .. }
123            | Inst::XmmCmove { .. }
124            | Inst::XmmCmpRmR { .. }
125            | Inst::XmmMinMaxSeq { .. }
126            | Inst::XmmUninitializedValue { .. }
127            | Inst::GprUninitializedValue { .. }
128            | Inst::ElfTlsGetAddr { .. }
129            | Inst::MachOTlsGetAddr { .. }
130            | Inst::CoffTlsGetAddr { .. }
131            | Inst::Unwind { .. }
132            | Inst::DummyUse { .. } => smallvec![],
133
134            Inst::LockCmpxchg16b { .. }
135            | Inst::Atomic128RmwSeq { .. }
136            | Inst::Atomic128XchgSeq { .. } => smallvec![InstructionSet::CMPXCHG16b],
137
138            // These use dynamic SSE opcodes.
139            Inst::XmmRmR { op, .. }
140            | Inst::XmmRmRUnaligned { op, .. }
141            | Inst::XmmRmRBlend { op, .. }
142            | Inst::XmmRmRImm { op, .. }
143            | Inst::XmmUnaryRmRImm { op, .. }
144            | Inst::XmmUnaryRmR { op, .. } => smallvec![op.available_from()],
145
146            Inst::XmmUnaryRmREvex { op, .. }
147            | Inst::XmmRmREvex { op, .. }
148            | Inst::XmmRmREvex3 { op, .. }
149            | Inst::XmmUnaryRmRImmEvex { op, .. } => op.available_from(),
150
151            Inst::XmmRmiRVex { op, .. }
152            | Inst::XmmRmRVex3 { op, .. }
153            | Inst::XmmRmRImmVex { op, .. }
154            | Inst::XmmRmRBlendVex { op, .. }
155            | Inst::XmmUnaryRmRVex { op, .. }
156            | Inst::XmmUnaryRmRImmVex { op, .. }
157            | Inst::XmmMovRMVex { op, .. }
158            | Inst::XmmMovRMImmVex { op, .. }
159            | Inst::XmmToGprImmVex { op, .. }
160            | Inst::XmmCmpRmRVex { op, .. } => op.available_from(),
161
162            Inst::External { inst } => {
163                use cranelift_assembler_x64::Feature::*;
164                let mut features = smallvec![];
165                for f in inst.features() {
166                    match f {
167                        _64b | compat => {}
168                        sse => features.push(InstructionSet::SSE),
169                        sse2 => features.push(InstructionSet::SSE2),
170                        sse3 => features.push(InstructionSet::SSE3),
171                        ssse3 => features.push(InstructionSet::SSSE3),
172                        sse41 => features.push(InstructionSet::SSE41),
173                        sse42 => features.push(InstructionSet::SSE42),
174                        bmi1 => features.push(InstructionSet::BMI1),
175                        bmi2 => features.push(InstructionSet::BMI2),
176                        lzcnt => features.push(InstructionSet::Lzcnt),
177                        popcnt => features.push(InstructionSet::Popcnt),
178                        avx => features.push(InstructionSet::AVX),
179                    }
180                }
181                features
182            }
183        }
184    }
185}
186
187// Handy constructors for Insts.
188
189impl Inst {
190    pub(crate) fn nop(len: u8) -> Self {
191        debug_assert!(len <= 15);
192        Self::Nop { len }
193    }
194
195    pub(crate) fn addq_mi(dst: Writable<Reg>, simm32: i32) -> Self {
196        let inst = if let Ok(simm8) = i8::try_from(simm32) {
197            asm::inst::addq_mi_sxb::new(dst, simm8).into()
198        } else {
199            asm::inst::addq_mi_sxl::new(dst, simm32).into()
200        };
201        Inst::External { inst }
202    }
203
204    pub(crate) fn subq_mi(dst: Writable<Reg>, simm32: i32) -> Self {
205        let inst = if let Ok(simm8) = i8::try_from(simm32) {
206            asm::inst::subq_mi_sxb::new(dst, simm8).into()
207        } else {
208            asm::inst::subq_mi_sxl::new(dst, simm32).into()
209        };
210        Inst::External { inst }
211    }
212
213    pub(crate) fn imm(dst_size: OperandSize, simm64: u64, dst: Writable<Reg>) -> Inst {
214        debug_assert!(dst_size.is_one_of(&[OperandSize::Size32, OperandSize::Size64]));
215        debug_assert!(dst.to_reg().class() == RegClass::Int);
216        // Try to generate a 32-bit immediate when the upper high bits are zeroed (which matches
217        // the semantics of movl).
218        let dst_size = match dst_size {
219            OperandSize::Size64 if simm64 > u32::max_value() as u64 => OperandSize::Size64,
220            _ => OperandSize::Size32,
221        };
222        Inst::Imm {
223            dst_size,
224            simm64,
225            dst: WritableGpr::from_writable_reg(dst).unwrap(),
226        }
227    }
228
229    pub(crate) fn mov_r_r(size: OperandSize, src: Reg, dst: Writable<Reg>) -> Inst {
230        debug_assert!(size.is_one_of(&[OperandSize::Size32, OperandSize::Size64]));
231        debug_assert!(src.class() == RegClass::Int);
232        debug_assert!(dst.to_reg().class() == RegClass::Int);
233        let src = Gpr::unwrap_new(src);
234        let dst = WritableGpr::from_writable_reg(dst).unwrap();
235        Inst::MovRR { size, src, dst }
236    }
237
238    /// Convenient helper for unary float operations.
239    #[cfg(test)]
240    pub(crate) fn xmm_unary_rm_r(op: SseOpcode, src: RegMem, dst: Writable<Reg>) -> Inst {
241        src.assert_regclass_is(RegClass::Float);
242        debug_assert!(dst.to_reg().class() == RegClass::Float);
243        Inst::XmmUnaryRmR {
244            op,
245            src: XmmMemAligned::unwrap_new(src),
246            dst: WritableXmm::from_writable_reg(dst).unwrap(),
247        }
248    }
249
250    #[cfg(test)]
251    pub(crate) fn xmm_rm_r(op: SseOpcode, src: RegMem, dst: Writable<Reg>) -> Self {
252        src.assert_regclass_is(RegClass::Float);
253        debug_assert!(dst.to_reg().class() == RegClass::Float);
254        Inst::XmmRmR {
255            op,
256            src1: Xmm::unwrap_new(dst.to_reg()),
257            src2: XmmMemAligned::unwrap_new(src),
258            dst: WritableXmm::from_writable_reg(dst).unwrap(),
259        }
260    }
261
262    #[cfg(test)]
263    pub(crate) fn xmm_rmr_vex3(op: AvxOpcode, src3: RegMem, src2: Reg, dst: Writable<Reg>) -> Self {
264        src3.assert_regclass_is(RegClass::Float);
265        debug_assert!(src2.class() == RegClass::Float);
266        debug_assert!(dst.to_reg().class() == RegClass::Float);
267        Inst::XmmRmRVex3 {
268            op,
269            src3: XmmMem::unwrap_new(src3),
270            src2: Xmm::unwrap_new(src2),
271            src1: Xmm::unwrap_new(dst.to_reg()),
272            dst: WritableXmm::from_writable_reg(dst).unwrap(),
273        }
274    }
275
276    pub(crate) fn xmm_cmp_rm_r(op: SseOpcode, src1: Reg, src2: RegMem) -> Inst {
277        src2.assert_regclass_is(RegClass::Float);
278        debug_assert!(src1.class() == RegClass::Float);
279        let src2 = XmmMemAligned::unwrap_new(src2);
280        let src1 = Xmm::unwrap_new(src1);
281        Inst::XmmCmpRmR { op, src1, src2 }
282    }
283
284    #[allow(dead_code)]
285    pub(crate) fn xmm_min_max_seq(
286        size: OperandSize,
287        is_min: bool,
288        lhs: Reg,
289        rhs: Reg,
290        dst: Writable<Reg>,
291    ) -> Inst {
292        debug_assert!(size.is_one_of(&[OperandSize::Size32, OperandSize::Size64]));
293        debug_assert_eq!(lhs.class(), RegClass::Float);
294        debug_assert_eq!(rhs.class(), RegClass::Float);
295        debug_assert_eq!(dst.to_reg().class(), RegClass::Float);
296        Inst::XmmMinMaxSeq {
297            size,
298            is_min,
299            lhs: Xmm::unwrap_new(lhs),
300            rhs: Xmm::unwrap_new(rhs),
301            dst: WritableXmm::from_writable_reg(dst).unwrap(),
302        }
303    }
304
305    pub(crate) fn movzx_rm_r(ext_mode: ExtMode, src: RegMem, dst: Writable<Reg>) -> Inst {
306        src.assert_regclass_is(RegClass::Int);
307        debug_assert!(dst.to_reg().class() == RegClass::Int);
308        let src = match src {
309            RegMem::Reg { reg } => asm::GprMem::Gpr(Gpr::new(reg).unwrap()),
310            RegMem::Mem { addr } => asm::GprMem::Mem(addr.into()),
311        };
312        let inst = match ext_mode {
313            ExtMode::BL => asm::inst::movzbl_rm::new(dst, src).into(),
314            ExtMode::BQ => asm::inst::movzbq_rm::new(dst, src).into(),
315            ExtMode::WL => asm::inst::movzwl_rm::new(dst, src).into(),
316            ExtMode::WQ => asm::inst::movzwq_rm::new(dst, src).into(),
317            ExtMode::LQ => {
318                // This instruction selection may seem strange but is correct in
319                // 64-bit mode: section 3.4.1.1 of the Intel manual says that
320                // "32-bit operands generate a 32-bit result, zero-extended to a
321                // 64-bit result in the destination general-purpose register."
322                // This is applicable beyond `mov` but we use this fact to
323                // zero-extend `src` into `dst`.
324                asm::inst::movl_rm::new(dst, src).into()
325            }
326        };
327        Inst::External { inst }
328    }
329
330    pub(crate) fn movsx_rm_r(ext_mode: ExtMode, src: RegMem, dst: Writable<Reg>) -> Inst {
331        src.assert_regclass_is(RegClass::Int);
332        debug_assert!(dst.to_reg().class() == RegClass::Int);
333        let src = match src {
334            RegMem::Reg { reg } => asm::GprMem::Gpr(Gpr::new(reg).unwrap()),
335            RegMem::Mem { addr } => asm::GprMem::Mem(addr.into()),
336        };
337        let inst = match ext_mode {
338            ExtMode::BL => asm::inst::movsbl_rm::new(dst, src).into(),
339            ExtMode::BQ => asm::inst::movsbq_rm::new(dst, src).into(),
340            ExtMode::WL => asm::inst::movswl_rm::new(dst, src).into(),
341            ExtMode::WQ => asm::inst::movswq_rm::new(dst, src).into(),
342            ExtMode::LQ => asm::inst::movslq_rm::new(dst, src).into(),
343        };
344        Inst::External { inst }
345    }
346
347    pub(crate) fn mov_r_m(size: OperandSize, src: Reg, dst: impl Into<SyntheticAmode>) -> Inst {
348        debug_assert!(src.class() == RegClass::Int);
349        Inst::MovRM {
350            size,
351            src: Gpr::unwrap_new(src),
352            dst: dst.into(),
353        }
354    }
355
356    pub(crate) fn lea(addr: impl Into<SyntheticAmode>, dst: Writable<Reg>) -> Inst {
357        debug_assert!(dst.to_reg().class() == RegClass::Int);
358        Inst::LoadEffectiveAddress {
359            addr: addr.into(),
360            dst: WritableGpr::from_writable_reg(dst).unwrap(),
361            size: OperandSize::Size64,
362        }
363    }
364
365    /// Does a comparison of dst - src for operands of size `size`, as stated by the machine
366    /// instruction semantics. Be careful with the order of parameters!
367    pub(crate) fn cmp_rmi_r(size: OperandSize, src1: Reg, src2: RegMemImm) -> Inst {
368        src2.assert_regclass_is(RegClass::Int);
369        debug_assert_eq!(src1.class(), RegClass::Int);
370        Inst::CmpRmiR {
371            size,
372            src1: Gpr::unwrap_new(src1),
373            src2: GprMemImm::unwrap_new(src2),
374            opcode: CmpOpcode::Cmp,
375        }
376    }
377
378    pub(crate) fn trap(trap_code: TrapCode) -> Inst {
379        Inst::Ud2 { trap_code }
380    }
381
382    pub(crate) fn trap_if(cc: CC, trap_code: TrapCode) -> Inst {
383        Inst::TrapIf { cc, trap_code }
384    }
385
386    pub(crate) fn cmove(size: OperandSize, cc: CC, src: RegMem, dst: Writable<Reg>) -> Inst {
387        debug_assert!(size.is_one_of(&[
388            OperandSize::Size16,
389            OperandSize::Size32,
390            OperandSize::Size64
391        ]));
392        debug_assert!(dst.to_reg().class() == RegClass::Int);
393        Inst::Cmove {
394            size,
395            cc,
396            consequent: GprMem::unwrap_new(src),
397            alternative: Gpr::unwrap_new(dst.to_reg()),
398            dst: WritableGpr::from_writable_reg(dst).unwrap(),
399        }
400    }
401
402    pub(crate) fn push64(src: RegMemImm) -> Inst {
403        src.assert_regclass_is(RegClass::Int);
404        let src = GprMemImm::unwrap_new(src);
405        Inst::Push64 { src }
406    }
407
408    pub(crate) fn pop64(dst: Writable<Reg>) -> Inst {
409        debug_assert!(dst.to_reg().class() == RegClass::Int);
410        let dst = WritableGpr::from_writable_reg(dst).unwrap();
411        Inst::Pop64 { dst }
412    }
413
414    pub(crate) fn call_known(info: Box<CallInfo<ExternalName>>) -> Inst {
415        Inst::CallKnown { info }
416    }
417
418    pub(crate) fn call_unknown(info: Box<CallInfo<RegMem>>) -> Inst {
419        info.dest.assert_regclass_is(RegClass::Int);
420        Inst::CallUnknown { info }
421    }
422
423    pub(crate) fn ret(stack_bytes_to_pop: u32) -> Inst {
424        Inst::Ret { stack_bytes_to_pop }
425    }
426
427    pub(crate) fn jmp_known(dst: MachLabel) -> Inst {
428        Inst::JmpKnown { dst }
429    }
430
431    pub(crate) fn jmp_unknown(target: RegMem) -> Inst {
432        target.assert_regclass_is(RegClass::Int);
433        Inst::JmpUnknown { target }
434    }
435
436    /// Choose which instruction to use for loading a register value from memory. For loads smaller
437    /// than 64 bits, this method expects a way to extend the value (i.e. [ExtKind::SignExtend],
438    /// [ExtKind::ZeroExtend]); loads with no extension necessary will ignore this.
439    pub(crate) fn load(
440        ty: Type,
441        from_addr: impl Into<SyntheticAmode>,
442        to_reg: Writable<Reg>,
443        ext_kind: ExtKind,
444    ) -> Inst {
445        let rc = to_reg.to_reg().class();
446        match rc {
447            RegClass::Int => {
448                let ext_mode = match ty.bytes() {
449                    1 => Some(ExtMode::BQ),
450                    2 => Some(ExtMode::WQ),
451                    4 => Some(ExtMode::LQ),
452                    8 => None,
453                    _ => unreachable!("the type should never use a scalar load: {}", ty),
454                };
455                if let Some(ext_mode) = ext_mode {
456                    // Values smaller than 64 bits must be extended in some way.
457                    match ext_kind {
458                        ExtKind::SignExtend => {
459                            Inst::movsx_rm_r(ext_mode, RegMem::mem(from_addr), to_reg)
460                        }
461                        ExtKind::ZeroExtend => {
462                            Inst::movzx_rm_r(ext_mode, RegMem::mem(from_addr), to_reg)
463                        }
464                        ExtKind::None => {
465                            panic!("expected an extension kind for extension mode: {ext_mode:?}")
466                        }
467                    }
468                } else {
469                    // 64-bit values can be moved directly.
470                    let from_addr = asm::GprMem::from(from_addr.into());
471                    Inst::External {
472                        inst: asm::inst::movq_rm::new(to_reg, from_addr).into(),
473                    }
474                }
475            }
476            RegClass::Float => {
477                let to_reg = to_reg.map(|r| Xmm::new(r).unwrap());
478                let from_addr = from_addr.into();
479                let inst = match ty {
480                    types::F16 | types::I8X2 => {
481                        panic!("loading a f16 or i8x2 requires multiple instructions")
482                    }
483                    _ if (ty.is_float() || ty.is_vector()) && ty.bits() == 32 => {
484                        asm::inst::movss_a_m::new(to_reg, from_addr).into()
485                    }
486                    _ if (ty.is_float() || ty.is_vector()) && ty.bits() == 64 => {
487                        asm::inst::movsd_a_m::new(to_reg, from_addr).into()
488                    }
489                    types::F32X4 => asm::inst::movups_a::new(to_reg, from_addr).into(),
490                    types::F64X2 => asm::inst::movupd_a::new(to_reg, from_addr).into(),
491                    _ if (ty.is_float() || ty.is_vector()) && ty.bits() == 128 => {
492                        asm::inst::movdqu_a::new(to_reg, from_addr).into()
493                    }
494                    _ => unimplemented!("unable to load type: {}", ty),
495                };
496                Inst::External { inst }
497            }
498            RegClass::Vector => unreachable!(),
499        }
500    }
501
502    /// Choose which instruction to use for storing a register value to memory.
503    pub(crate) fn store(ty: Type, from_reg: Reg, to_addr: impl Into<SyntheticAmode>) -> Inst {
504        let rc = from_reg.class();
505        match rc {
506            RegClass::Int => Inst::mov_r_m(OperandSize::from_ty(ty), from_reg, to_addr),
507            RegClass::Float => {
508                let to_addr = to_addr.into();
509                let from_reg = Xmm::new(from_reg).unwrap();
510                let inst = match ty {
511                    types::F16 | types::I8X2 => {
512                        panic!("storing a f16 or i8x2 requires multiple instructions")
513                    }
514                    _ if (ty.is_float() || ty.is_vector()) && ty.bits() == 32 => {
515                        asm::inst::movss_c_m::new(to_addr, from_reg).into()
516                    }
517                    _ if (ty.is_float() || ty.is_vector()) && ty.bits() == 64 => {
518                        asm::inst::movsd_c_m::new(to_addr, from_reg).into()
519                    }
520                    types::F32X4 => asm::inst::movups_b::new(to_addr, from_reg).into(),
521                    types::F64X2 => asm::inst::movupd_b::new(to_addr, from_reg).into(),
522                    _ if (ty.is_float() || ty.is_vector()) && ty.bits() == 128 => {
523                        asm::inst::movdqu_b::new(to_addr, from_reg).into()
524                    }
525                    _ => unimplemented!("unable to store type: {}", ty),
526                };
527                Inst::External { inst }
528            }
529            RegClass::Vector => unreachable!(),
530        }
531    }
532}
533
534//=============================================================================
535// Instructions: printing
536
537impl PrettyPrint for Inst {
538    fn pretty_print(&self, _size: u8) -> String {
539        fn ljustify(s: String) -> String {
540            let w = 7;
541            if s.len() >= w {
542                s
543            } else {
544                let need = usize::min(w, w - s.len());
545                s + &format!("{nil: <width$}", nil = "", width = need)
546            }
547        }
548
549        fn ljustify2(s1: String, s2: String) -> String {
550            ljustify(s1 + &s2)
551        }
552
553        fn suffix_lq(size: OperandSize) -> String {
554            match size {
555                OperandSize::Size32 => "l",
556                OperandSize::Size64 => "q",
557                _ => unreachable!(),
558            }
559            .to_string()
560        }
561
562        #[allow(dead_code)]
563        fn suffix_lqb(size: OperandSize) -> String {
564            match size {
565                OperandSize::Size32 => "l",
566                OperandSize::Size64 => "q",
567                _ => unreachable!(),
568            }
569            .to_string()
570        }
571
572        fn suffix_bwlq(size: OperandSize) -> String {
573            match size {
574                OperandSize::Size8 => "b".to_string(),
575                OperandSize::Size16 => "w".to_string(),
576                OperandSize::Size32 => "l".to_string(),
577                OperandSize::Size64 => "q".to_string(),
578            }
579        }
580
581        match self {
582            Inst::Nop { len } => format!("{} len={}", ljustify("nop".to_string()), len),
583
584            Inst::CheckedSRemSeq {
585                size,
586                divisor,
587                dividend_lo,
588                dividend_hi,
589                dst_quotient,
590                dst_remainder,
591            } => {
592                let divisor = pretty_print_reg(divisor.to_reg(), size.to_bytes());
593                let dividend_lo = pretty_print_reg(dividend_lo.to_reg(), size.to_bytes());
594                let dividend_hi = pretty_print_reg(dividend_hi.to_reg(), size.to_bytes());
595                let dst_quotient =
596                    pretty_print_reg(dst_quotient.to_reg().to_reg(), size.to_bytes());
597                let dst_remainder =
598                    pretty_print_reg(dst_remainder.to_reg().to_reg(), size.to_bytes());
599                format!(
600                    "checked_srem_seq {dividend_lo}, {dividend_hi}, \
601                        {divisor}, {dst_quotient}, {dst_remainder}",
602                )
603            }
604
605            Inst::CheckedSRemSeq8 {
606                divisor,
607                dividend,
608                dst,
609            } => {
610                let divisor = pretty_print_reg(divisor.to_reg(), 1);
611                let dividend = pretty_print_reg(dividend.to_reg(), 1);
612                let dst = pretty_print_reg(dst.to_reg().to_reg(), 1);
613                format!("checked_srem_seq {dividend}, {divisor}, {dst}")
614            }
615
616            Inst::XmmUnaryRmR { op, src, dst, .. } => {
617                let dst = pretty_print_reg(dst.to_reg().to_reg(), op.src_size());
618                let src = src.pretty_print(op.src_size());
619                let op = ljustify(op.to_string());
620                format!("{op} {src}, {dst}")
621            }
622
623            Inst::XmmUnaryRmRImm {
624                op, src, dst, imm, ..
625            } => {
626                let dst = pretty_print_reg(dst.to_reg().to_reg(), op.src_size());
627                let src = src.pretty_print(op.src_size());
628                let op = ljustify(op.to_string());
629                format!("{op} ${imm}, {src}, {dst}")
630            }
631
632            Inst::XmmUnaryRmRVex { op, src, dst, .. } => {
633                let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);
634                let src = src.pretty_print(8);
635                let op = ljustify(op.to_string());
636                format!("{op} {src}, {dst}")
637            }
638
639            Inst::XmmUnaryRmRImmVex {
640                op, src, dst, imm, ..
641            } => {
642                let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);
643                let src = src.pretty_print(8);
644                let op = ljustify(op.to_string());
645                format!("{op} ${imm}, {src}, {dst}")
646            }
647
648            Inst::XmmUnaryRmREvex { op, src, dst, .. } => {
649                let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);
650                let src = src.pretty_print(8);
651                let op = ljustify(op.to_string());
652                format!("{op} {src}, {dst}")
653            }
654
655            Inst::XmmUnaryRmRImmEvex {
656                op, src, dst, imm, ..
657            } => {
658                let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);
659                let src = src.pretty_print(8);
660                let op = ljustify(op.to_string());
661                format!("{op} ${imm}, {src}, {dst}")
662            }
663
664            Inst::XmmMovRMVex { op, src, dst, .. } => {
665                let src = pretty_print_reg(src.to_reg(), 8);
666                let dst = dst.pretty_print(8);
667                let op = ljustify(op.to_string());
668                format!("{op} {src}, {dst}")
669            }
670
671            Inst::XmmMovRMImmVex {
672                op, src, dst, imm, ..
673            } => {
674                let src = pretty_print_reg(src.to_reg(), 8);
675                let dst = dst.pretty_print(8);
676                let op = ljustify(op.to_string());
677                format!("{op} ${imm}, {src}, {dst}")
678            }
679
680            Inst::XmmRmR {
681                op,
682                src1,
683                src2,
684                dst,
685                ..
686            } => {
687                let src1 = pretty_print_reg(src1.to_reg(), 8);
688                let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);
689                let src2 = src2.pretty_print(8);
690                let op = ljustify(op.to_string());
691                format!("{op} {src1}, {src2}, {dst}")
692            }
693
694            Inst::XmmRmRUnaligned {
695                op,
696                src1,
697                src2,
698                dst,
699                ..
700            } => {
701                let src1 = pretty_print_reg(src1.to_reg(), 8);
702                let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);
703                let src2 = src2.pretty_print(8);
704                let op = ljustify(op.to_string());
705                format!("{op} {src1}, {src2}, {dst}")
706            }
707
708            Inst::XmmRmRBlend {
709                op,
710                src1,
711                src2,
712                mask,
713                dst,
714            } => {
715                let src1 = pretty_print_reg(src1.to_reg(), 8);
716                let mask = mask.to_reg();
717                let mask = if mask.is_virtual() {
718                    format!(" <{}>", show_ireg_sized(mask, 8))
719                } else {
720                    debug_assert_eq!(mask, regs::xmm0());
721                    String::new()
722                };
723                let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);
724                let src2 = src2.pretty_print(8);
725                let op = ljustify(op.to_string());
726                format!("{op} {src1}, {src2}, {dst}{mask}")
727            }
728
729            Inst::XmmRmiRVex {
730                op,
731                src1,
732                src2,
733                dst,
734                ..
735            } => {
736                let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);
737                let src1 = pretty_print_reg(src1.to_reg(), 8);
738                let src2 = src2.pretty_print(8);
739                let op = ljustify(op.to_string());
740                format!("{op} {src1}, {src2}, {dst}")
741            }
742
743            Inst::XmmRmRImmVex {
744                op,
745                src1,
746                src2,
747                dst,
748                imm,
749                ..
750            } => {
751                let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);
752                let src1 = pretty_print_reg(src1.to_reg(), 8);
753                let src2 = src2.pretty_print(8);
754                let op = ljustify(op.to_string());
755                format!("{op} ${imm}, {src1}, {src2}, {dst}")
756            }
757
758            Inst::XmmRmRVex3 {
759                op,
760                src1,
761                src2,
762                src3,
763                dst,
764                ..
765            } => {
766                let src1 = pretty_print_reg(src1.to_reg(), 8);
767                let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);
768                let src2 = pretty_print_reg(src2.to_reg(), 8);
769                let src3 = src3.pretty_print(8);
770                let op = ljustify(op.to_string());
771                format!("{op} {src1}, {src2}, {src3}, {dst}")
772            }
773
774            Inst::XmmRmRBlendVex {
775                op,
776                src1,
777                src2,
778                mask,
779                dst,
780                ..
781            } => {
782                let src1 = pretty_print_reg(src1.to_reg(), 8);
783                let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);
784                let src2 = src2.pretty_print(8);
785                let mask = pretty_print_reg(mask.to_reg(), 8);
786                let op = ljustify(op.to_string());
787                format!("{op} {src1}, {src2}, {mask}, {dst}")
788            }
789
790            Inst::XmmRmREvex {
791                op,
792                src1,
793                src2,
794                dst,
795                ..
796            } => {
797                let src1 = pretty_print_reg(src1.to_reg(), 8);
798                let src2 = src2.pretty_print(8);
799                let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);
800                let op = ljustify(op.to_string());
801                format!("{op} {src2}, {src1}, {dst}")
802            }
803
804            Inst::XmmRmREvex3 {
805                op,
806                src1,
807                src2,
808                src3,
809                dst,
810                ..
811            } => {
812                let src1 = pretty_print_reg(src1.to_reg(), 8);
813                let src2 = pretty_print_reg(src2.to_reg(), 8);
814                let src3 = src3.pretty_print(8);
815                let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);
816                let op = ljustify(op.to_string());
817                format!("{op} {src3}, {src2}, {src1}, {dst}")
818            }
819
820            Inst::XmmMinMaxSeq {
821                lhs,
822                rhs,
823                dst,
824                is_min,
825                size,
826            } => {
827                let rhs = pretty_print_reg(rhs.to_reg(), 8);
828                let lhs = pretty_print_reg(lhs.to_reg(), 8);
829                let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);
830                let op = ljustify2(
831                    if *is_min {
832                        "xmm min seq ".to_string()
833                    } else {
834                        "xmm max seq ".to_string()
835                    },
836                    format!("f{}", size.to_bits()),
837                );
838                format!("{op} {lhs}, {rhs}, {dst}")
839            }
840
841            Inst::XmmRmRImm {
842                op,
843                src1,
844                src2,
845                dst,
846                imm,
847                size,
848                ..
849            } => {
850                let src1 = pretty_print_reg(*src1, 8);
851                let dst = pretty_print_reg(dst.to_reg(), 8);
852                let src2 = src2.pretty_print(8);
853                let op = ljustify(format!(
854                    "{}{}",
855                    op.to_string(),
856                    if *size == OperandSize::Size64 {
857                        ".w"
858                    } else {
859                        ""
860                    }
861                ));
862                format!("{op} ${imm}, {src1}, {src2}, {dst}")
863            }
864
865            Inst::XmmUninitializedValue { dst } => {
866                let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);
867                let op = ljustify("uninit".into());
868                format!("{op} {dst}")
869            }
870
871            Inst::GprUninitializedValue { dst } => {
872                let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);
873                let op = ljustify("uninit".into());
874                format!("{op} {dst}")
875            }
876
877            Inst::XmmToGprImmVex { op, src, dst, imm } => {
878                let src = pretty_print_reg(src.to_reg(), 8);
879                let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);
880                let op = ljustify(op.to_string());
881                format!("{op} ${imm}, {src}, {dst}")
882            }
883
884            Inst::XmmCmpRmR { op, src1, src2 } => {
885                let src1 = pretty_print_reg(src1.to_reg(), 8);
886                let src2 = src2.pretty_print(8);
887                let op = ljustify(op.to_string());
888                format!("{op} {src2}, {src1}")
889            }
890
891            Inst::XmmCmpRmRVex { op, src1, src2 } => {
892                let src1 = pretty_print_reg(src1.to_reg(), 8);
893                let src2 = src2.pretty_print(8);
894                format!("{} {src2}, {src1}", ljustify(op.to_string()))
895            }
896
897            Inst::CvtUint64ToFloatSeq {
898                src,
899                dst,
900                dst_size,
901                tmp_gpr1,
902                tmp_gpr2,
903                ..
904            } => {
905                let src = pretty_print_reg(src.to_reg(), 8);
906                let dst = pretty_print_reg(dst.to_reg().to_reg(), dst_size.to_bytes());
907                let tmp_gpr1 = pretty_print_reg(tmp_gpr1.to_reg().to_reg(), 8);
908                let tmp_gpr2 = pretty_print_reg(tmp_gpr2.to_reg().to_reg(), 8);
909                let op = ljustify(format!(
910                    "u64_to_{}_seq",
911                    if *dst_size == OperandSize::Size64 {
912                        "f64"
913                    } else {
914                        "f32"
915                    }
916                ));
917                format!("{op} {src}, {dst}, {tmp_gpr1}, {tmp_gpr2}")
918            }
919
920            Inst::CvtFloatToSintSeq {
921                src,
922                dst,
923                src_size,
924                dst_size,
925                tmp_xmm,
926                tmp_gpr,
927                is_saturating,
928            } => {
929                let src = pretty_print_reg(src.to_reg(), src_size.to_bytes());
930                let dst = pretty_print_reg(dst.to_reg().to_reg(), dst_size.to_bytes());
931                let tmp_gpr = pretty_print_reg(tmp_gpr.to_reg().to_reg(), 8);
932                let tmp_xmm = pretty_print_reg(tmp_xmm.to_reg().to_reg(), 8);
933                let op = ljustify(format!(
934                    "cvt_float{}_to_sint{}{}_seq",
935                    src_size.to_bits(),
936                    dst_size.to_bits(),
937                    if *is_saturating { "_sat" } else { "" },
938                ));
939                format!("{op} {src}, {dst}, {tmp_gpr}, {tmp_xmm}")
940            }
941
942            Inst::CvtFloatToUintSeq {
943                src,
944                dst,
945                src_size,
946                dst_size,
947                tmp_gpr,
948                tmp_xmm,
949                tmp_xmm2,
950                is_saturating,
951            } => {
952                let src = pretty_print_reg(src.to_reg(), src_size.to_bytes());
953                let dst = pretty_print_reg(dst.to_reg().to_reg(), dst_size.to_bytes());
954                let tmp_gpr = pretty_print_reg(tmp_gpr.to_reg().to_reg(), 8);
955                let tmp_xmm = pretty_print_reg(tmp_xmm.to_reg().to_reg(), 8);
956                let tmp_xmm2 = pretty_print_reg(tmp_xmm2.to_reg().to_reg(), 8);
957                let op = ljustify(format!(
958                    "cvt_float{}_to_uint{}{}_seq",
959                    src_size.to_bits(),
960                    dst_size.to_bits(),
961                    if *is_saturating { "_sat" } else { "" },
962                ));
963                format!("{op} {src}, {dst}, {tmp_gpr}, {tmp_xmm}, {tmp_xmm2}")
964            }
965
966            Inst::Imm {
967                dst_size,
968                simm64,
969                dst,
970            } => {
971                let dst = pretty_print_reg(dst.to_reg().to_reg(), dst_size.to_bytes());
972                if *dst_size == OperandSize::Size64 {
973                    let op = ljustify("movabsq".to_string());
974                    let imm = *simm64 as i64;
975                    format!("{op} ${imm}, {dst}")
976                } else {
977                    let op = ljustify("movl".to_string());
978                    let imm = (*simm64 as u32) as i32;
979                    format!("{op} ${imm}, {dst}")
980                }
981            }
982
983            Inst::MovImmM { size, simm32, dst } => {
984                let dst = dst.pretty_print(size.to_bytes());
985                let suffix = suffix_bwlq(*size);
986                let imm = match *size {
987                    OperandSize::Size8 => ((*simm32 as u8) as i8).to_string(),
988                    OperandSize::Size16 => ((*simm32 as u16) as i16).to_string(),
989                    OperandSize::Size32 => simm32.to_string(),
990                    OperandSize::Size64 => (*simm32 as i64).to_string(),
991                };
992                let op = ljustify2("mov".to_string(), suffix);
993                format!("{op} ${imm}, {dst}")
994            }
995
996            Inst::MovRR { size, src, dst } => {
997                let src = pretty_print_reg(src.to_reg(), size.to_bytes());
998                let dst = pretty_print_reg(dst.to_reg().to_reg(), size.to_bytes());
999                let op = ljustify2("mov".to_string(), suffix_lq(*size));
1000                format!("{op} {src}, {dst}")
1001            }
1002
1003            Inst::MovFromPReg { src, dst } => {
1004                let src: Reg = (*src).into();
1005                let src = regs::show_ireg_sized(src, 8);
1006                let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);
1007                let op = ljustify("movq".to_string());
1008                format!("{op} {src}, {dst}")
1009            }
1010
1011            Inst::MovToPReg { src, dst } => {
1012                let src = pretty_print_reg(src.to_reg(), 8);
1013                let dst: Reg = (*dst).into();
1014                let dst = regs::show_ireg_sized(dst, 8);
1015                let op = ljustify("movq".to_string());
1016                format!("{op} {src}, {dst}")
1017            }
1018
1019            Inst::LoadEffectiveAddress { addr, dst, size } => {
1020                let dst = pretty_print_reg(dst.to_reg().to_reg(), size.to_bytes());
1021                let addr = addr.pretty_print(8);
1022                let op = ljustify("lea".to_string());
1023                format!("{op} {addr}, {dst}")
1024            }
1025
1026            Inst::MovRM { size, src, dst, .. } => {
1027                let src = pretty_print_reg(src.to_reg(), size.to_bytes());
1028                let dst = dst.pretty_print(size.to_bytes());
1029                let op = ljustify2("mov".to_string(), suffix_bwlq(*size));
1030                format!("{op} {src}, {dst}")
1031            }
1032
1033            Inst::CmpRmiR {
1034                size,
1035                src1,
1036                src2,
1037                opcode,
1038            } => {
1039                let src1 = pretty_print_reg(src1.to_reg(), size.to_bytes());
1040                let src2 = src2.pretty_print(size.to_bytes());
1041                let op = match opcode {
1042                    CmpOpcode::Cmp => "cmp",
1043                    CmpOpcode::Test => "test",
1044                };
1045                let op = ljustify2(op.to_string(), suffix_bwlq(*size));
1046                format!("{op} {src2}, {src1}")
1047            }
1048
1049            Inst::Setcc { cc, dst } => {
1050                let dst = pretty_print_reg(dst.to_reg().to_reg(), 1);
1051                let op = ljustify2("set".to_string(), cc.to_string());
1052                format!("{op} {dst}")
1053            }
1054
1055            Inst::Cmove {
1056                size,
1057                cc,
1058                consequent,
1059                alternative,
1060                dst,
1061            } => {
1062                let alternative = pretty_print_reg(alternative.to_reg(), size.to_bytes());
1063                let dst = pretty_print_reg(dst.to_reg().to_reg(), size.to_bytes());
1064                let consequent = consequent.pretty_print(size.to_bytes());
1065                let op = ljustify(format!("cmov{}{}", cc.to_string(), suffix_bwlq(*size)));
1066                format!("{op} {consequent}, {alternative}, {dst}")
1067            }
1068
1069            Inst::XmmCmove {
1070                ty,
1071                cc,
1072                consequent,
1073                alternative,
1074                dst,
1075                ..
1076            } => {
1077                let size = u8::try_from(ty.bytes()).unwrap();
1078                let alternative = pretty_print_reg(alternative.to_reg(), size);
1079                let dst = pretty_print_reg(dst.to_reg().to_reg(), size);
1080                let consequent = pretty_print_reg(consequent.to_reg(), size);
1081                let suffix = match *ty {
1082                    types::F64 => "sd",
1083                    types::F32 => "ss",
1084                    types::F16 => "ss",
1085                    types::F32X4 => "aps",
1086                    types::F64X2 => "apd",
1087                    _ => "dqa",
1088                };
1089                let cc = cc.invert();
1090                format!(
1091                    "mov{suffix} {alternative}, {dst}; \
1092                    j{cc} $next; \
1093                    mov{suffix} {consequent}, {dst}; \
1094                    $next:"
1095                )
1096            }
1097
1098            Inst::Push64 { src } => {
1099                let src = src.pretty_print(8);
1100                let op = ljustify("pushq".to_string());
1101                format!("{op} {src}")
1102            }
1103
1104            Inst::StackProbeLoop {
1105                tmp,
1106                frame_size,
1107                guard_size,
1108            } => {
1109                let tmp = pretty_print_reg(tmp.to_reg(), 8);
1110                let op = ljustify("stack_probe_loop".to_string());
1111                format!("{op} {tmp}, frame_size={frame_size}, guard_size={guard_size}")
1112            }
1113
1114            Inst::Pop64 { dst } => {
1115                let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);
1116                let op = ljustify("popq".to_string());
1117                format!("{op} {dst}")
1118            }
1119
1120            Inst::CallKnown { info } => {
1121                let op = ljustify("call".to_string());
1122                let try_call = info
1123                    .try_call_info
1124                    .as_ref()
1125                    .map(|tci| pretty_print_try_call(tci))
1126                    .unwrap_or_default();
1127                format!("{op} {:?}{try_call}", info.dest)
1128            }
1129
1130            Inst::CallUnknown { info } => {
1131                let dest = info.dest.pretty_print(8);
1132                let op = ljustify("call".to_string());
1133                let try_call = info
1134                    .try_call_info
1135                    .as_ref()
1136                    .map(|tci| pretty_print_try_call(tci))
1137                    .unwrap_or_default();
1138                format!("{op} *{dest}{try_call}")
1139            }
1140
1141            Inst::ReturnCallKnown { info } => {
1142                let ReturnCallInfo {
1143                    uses,
1144                    new_stack_arg_size,
1145                    tmp,
1146                    dest,
1147                } = &**info;
1148                let tmp = pretty_print_reg(tmp.to_reg().to_reg(), 8);
1149                let mut s = format!("return_call_known {dest:?} ({new_stack_arg_size}) tmp={tmp}");
1150                for ret in uses {
1151                    let preg = regs::show_reg(ret.preg);
1152                    let vreg = pretty_print_reg(ret.vreg, 8);
1153                    write!(&mut s, " {vreg}={preg}").unwrap();
1154                }
1155                s
1156            }
1157
1158            Inst::ReturnCallUnknown { info } => {
1159                let ReturnCallInfo {
1160                    uses,
1161                    new_stack_arg_size,
1162                    tmp,
1163                    dest,
1164                } = &**info;
1165                let callee = pretty_print_reg(*dest, 8);
1166                let tmp = pretty_print_reg(tmp.to_reg().to_reg(), 8);
1167                let mut s =
1168                    format!("return_call_unknown {callee} ({new_stack_arg_size}) tmp={tmp}");
1169                for ret in uses {
1170                    let preg = regs::show_reg(ret.preg);
1171                    let vreg = pretty_print_reg(ret.vreg, 8);
1172                    write!(&mut s, " {vreg}={preg}").unwrap();
1173                }
1174                s
1175            }
1176
1177            Inst::Args { args } => {
1178                let mut s = "args".to_string();
1179                for arg in args {
1180                    let preg = regs::show_reg(arg.preg);
1181                    let def = pretty_print_reg(arg.vreg.to_reg(), 8);
1182                    write!(&mut s, " {def}={preg}").unwrap();
1183                }
1184                s
1185            }
1186
1187            Inst::Rets { rets } => {
1188                let mut s = "rets".to_string();
1189                for ret in rets {
1190                    let preg = regs::show_reg(ret.preg);
1191                    let vreg = pretty_print_reg(ret.vreg, 8);
1192                    write!(&mut s, " {vreg}={preg}").unwrap();
1193                }
1194                s
1195            }
1196
1197            Inst::Ret { stack_bytes_to_pop } => {
1198                let mut s = "ret".to_string();
1199                if *stack_bytes_to_pop != 0 {
1200                    write!(&mut s, " {stack_bytes_to_pop}").unwrap();
1201                }
1202                s
1203            }
1204
1205            Inst::StackSwitchBasic {
1206                store_context_ptr,
1207                load_context_ptr,
1208                in_payload0,
1209                out_payload0,
1210            } => {
1211                let store_context_ptr = pretty_print_reg(**store_context_ptr, 8);
1212                let load_context_ptr = pretty_print_reg(**load_context_ptr, 8);
1213                let in_payload0 = pretty_print_reg(**in_payload0, 8);
1214                let out_payload0 = pretty_print_reg(*out_payload0.to_reg(), 8);
1215                format!(
1216                    "{out_payload0} = stack_switch_basic {store_context_ptr}, {load_context_ptr}, {in_payload0}"
1217                )
1218            }
1219
1220            Inst::JmpKnown { dst } => {
1221                let op = ljustify("jmp".to_string());
1222                let dst = dst.to_string();
1223                format!("{op} {dst}")
1224            }
1225
1226            Inst::WinchJmpIf { cc, taken } => {
1227                let taken = taken.to_string();
1228                let op = ljustify2("j".to_string(), cc.to_string());
1229                format!("{op} {taken}")
1230            }
1231
1232            Inst::JmpCondOr {
1233                cc1,
1234                cc2,
1235                taken,
1236                not_taken,
1237            } => {
1238                let taken = taken.to_string();
1239                let not_taken = not_taken.to_string();
1240                let op = ljustify(format!("j{cc1},{cc2}"));
1241                format!("{op} {taken}; j {not_taken}")
1242            }
1243
1244            Inst::JmpCond {
1245                cc,
1246                taken,
1247                not_taken,
1248            } => {
1249                let taken = taken.to_string();
1250                let not_taken = not_taken.to_string();
1251                let op = ljustify2("j".to_string(), cc.to_string());
1252                format!("{op} {taken}; j {not_taken}")
1253            }
1254
1255            Inst::JmpTableSeq {
1256                idx, tmp1, tmp2, ..
1257            } => {
1258                let idx = pretty_print_reg(*idx, 8);
1259                let tmp1 = pretty_print_reg(tmp1.to_reg(), 8);
1260                let tmp2 = pretty_print_reg(tmp2.to_reg(), 8);
1261                let op = ljustify("br_table".into());
1262                format!("{op} {idx}, {tmp1}, {tmp2}")
1263            }
1264
1265            Inst::JmpUnknown { target } => {
1266                let target = target.pretty_print(8);
1267                let op = ljustify("jmp".to_string());
1268                format!("{op} *{target}")
1269            }
1270
1271            Inst::TrapIf { cc, trap_code, .. } => {
1272                format!("j{cc} #trap={trap_code}")
1273            }
1274
1275            Inst::TrapIfAnd {
1276                cc1,
1277                cc2,
1278                trap_code,
1279                ..
1280            } => {
1281                let cc1 = cc1.invert();
1282                let cc2 = cc2.invert();
1283                format!("trap_if_and {cc1}, {cc2}, {trap_code}")
1284            }
1285
1286            Inst::TrapIfOr {
1287                cc1,
1288                cc2,
1289                trap_code,
1290                ..
1291            } => {
1292                let cc2 = cc2.invert();
1293                format!("trap_if_or {cc1}, {cc2}, {trap_code}")
1294            }
1295
1296            Inst::LoadExtName {
1297                dst, name, offset, ..
1298            } => {
1299                let dst = pretty_print_reg(dst.to_reg(), 8);
1300                let name = name.display(None);
1301                let op = ljustify("load_ext_name".into());
1302                format!("{op} {name}+{offset}, {dst}")
1303            }
1304
1305            Inst::LockCmpxchg {
1306                ty,
1307                replacement,
1308                expected,
1309                mem,
1310                dst_old,
1311                ..
1312            } => {
1313                let size = ty.bytes() as u8;
1314                let replacement = pretty_print_reg(*replacement, size);
1315                let expected = pretty_print_reg(*expected, size);
1316                let dst_old = pretty_print_reg(dst_old.to_reg(), size);
1317                let mem = mem.pretty_print(size);
1318                let suffix = suffix_bwlq(OperandSize::from_bytes(size as u32));
1319                format!(
1320                    "lock cmpxchg{suffix} {replacement}, {mem}, expected={expected}, dst_old={dst_old}"
1321                )
1322            }
1323
1324            Inst::LockCmpxchg16b {
1325                replacement_low,
1326                replacement_high,
1327                expected_low,
1328                expected_high,
1329                mem,
1330                dst_old_low,
1331                dst_old_high,
1332                ..
1333            } => {
1334                let replacement_low = pretty_print_reg(*replacement_low, 8);
1335                let replacement_high = pretty_print_reg(*replacement_high, 8);
1336                let expected_low = pretty_print_reg(*expected_low, 8);
1337                let expected_high = pretty_print_reg(*expected_high, 8);
1338                let dst_old_low = pretty_print_reg(dst_old_low.to_reg(), 8);
1339                let dst_old_high = pretty_print_reg(dst_old_high.to_reg(), 8);
1340                let mem = mem.pretty_print(16);
1341                format!(
1342                    "lock cmpxchg16b {mem}, replacement={replacement_high}:{replacement_low}, expected={expected_high}:{expected_low}, dst_old={dst_old_high}:{dst_old_low}"
1343                )
1344            }
1345
1346            Inst::LockXadd {
1347                size,
1348                operand,
1349                mem,
1350                dst_old,
1351            } => {
1352                let operand = pretty_print_reg(*operand, size.to_bytes());
1353                let dst_old = pretty_print_reg(dst_old.to_reg(), size.to_bytes());
1354                let mem = mem.pretty_print(size.to_bytes());
1355                let suffix = suffix_bwlq(*size);
1356                format!("lock xadd{suffix} {operand}, {mem}, dst_old={dst_old}")
1357            }
1358
1359            Inst::Xchg {
1360                size,
1361                operand,
1362                mem,
1363                dst_old,
1364            } => {
1365                let operand = pretty_print_reg(*operand, size.to_bytes());
1366                let dst_old = pretty_print_reg(dst_old.to_reg(), size.to_bytes());
1367                let mem = mem.pretty_print(size.to_bytes());
1368                let suffix = suffix_bwlq(*size);
1369                format!("xchg{suffix} {operand}, {mem}, dst_old={dst_old}")
1370            }
1371
1372            Inst::AtomicRmwSeq { ty, op, .. } => {
1373                let ty = ty.bits();
1374                format!(
1375                    "atomically {{ {ty}_bits_at_[%r9] {op:?}= %r10; %rax = old_value_at_[%r9]; %r11, %rflags = trash }}"
1376                )
1377            }
1378
1379            Inst::Atomic128RmwSeq {
1380                op,
1381                mem,
1382                operand_low,
1383                operand_high,
1384                temp_low,
1385                temp_high,
1386                dst_old_low,
1387                dst_old_high,
1388            } => {
1389                let operand_low = pretty_print_reg(*operand_low, 8);
1390                let operand_high = pretty_print_reg(*operand_high, 8);
1391                let temp_low = pretty_print_reg(temp_low.to_reg(), 8);
1392                let temp_high = pretty_print_reg(temp_high.to_reg(), 8);
1393                let dst_old_low = pretty_print_reg(dst_old_low.to_reg(), 8);
1394                let dst_old_high = pretty_print_reg(dst_old_high.to_reg(), 8);
1395                let mem = mem.pretty_print(16);
1396                format!(
1397                    "atomically {{ {dst_old_high}:{dst_old_low} = {mem}; {temp_high}:{temp_low} = {dst_old_high}:{dst_old_low} {op:?} {operand_high}:{operand_low}; {mem} = {temp_high}:{temp_low} }}"
1398                )
1399            }
1400
1401            Inst::Atomic128XchgSeq {
1402                mem,
1403                operand_low,
1404                operand_high,
1405                dst_old_low,
1406                dst_old_high,
1407            } => {
1408                let operand_low = pretty_print_reg(*operand_low, 8);
1409                let operand_high = pretty_print_reg(*operand_high, 8);
1410                let dst_old_low = pretty_print_reg(dst_old_low.to_reg(), 8);
1411                let dst_old_high = pretty_print_reg(dst_old_high.to_reg(), 8);
1412                let mem = mem.pretty_print(16);
1413                format!(
1414                    "atomically {{ {dst_old_high}:{dst_old_low} = {mem}; {mem} = {operand_high}:{operand_low} }}"
1415                )
1416            }
1417
1418            Inst::Fence { kind } => match kind {
1419                FenceKind::MFence => "mfence".to_string(),
1420                FenceKind::LFence => "lfence".to_string(),
1421                FenceKind::SFence => "sfence".to_string(),
1422            },
1423
1424            Inst::Hlt => "hlt".into(),
1425
1426            Inst::Ud2 { trap_code } => format!("ud2 {trap_code}"),
1427
1428            Inst::ElfTlsGetAddr { symbol, dst } => {
1429                let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);
1430                format!("{dst} = elf_tls_get_addr {symbol:?}")
1431            }
1432
1433            Inst::MachOTlsGetAddr { symbol, dst } => {
1434                let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);
1435                format!("{dst} = macho_tls_get_addr {symbol:?}")
1436            }
1437
1438            Inst::CoffTlsGetAddr { symbol, dst, tmp } => {
1439                let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);
1440                let tmp = tmp.to_reg().to_reg();
1441
1442                let mut s = format!("{dst} = coff_tls_get_addr {symbol:?}");
1443                if tmp.is_virtual() {
1444                    let tmp = show_ireg_sized(tmp, 8);
1445                    write!(&mut s, ", {tmp}").unwrap();
1446                };
1447
1448                s
1449            }
1450
1451            Inst::Unwind { inst } => format!("unwind {inst:?}"),
1452
1453            Inst::DummyUse { reg } => {
1454                let reg = pretty_print_reg(*reg, 8);
1455                format!("dummy_use {reg}")
1456            }
1457
1458            Inst::External { inst } => {
1459                format!("{inst}")
1460            }
1461        }
1462    }
1463}
1464
1465fn pretty_print_try_call(info: &TryCallInfo) -> String {
1466    let dests = info
1467        .exception_dests
1468        .iter()
1469        .map(|(tag, label)| format!("{tag:?}: {label:?}"))
1470        .collect::<Vec<_>>()
1471        .join(", ");
1472    format!("; jmp {:?}; catch [{dests}]", info.continuation)
1473}
1474
1475impl fmt::Debug for Inst {
1476    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
1477        write!(fmt, "{}", self.pretty_print_inst(&mut Default::default()))
1478    }
1479}
1480
1481fn x64_get_operands(inst: &mut Inst, collector: &mut impl OperandVisitor) {
1482    // Note: because we need to statically know the indices of each
1483    // reg in the operands list in order to fetch its allocation
1484    // later, we put the variable-operand-count bits (the RegMem,
1485    // RegMemImm, etc args) last. regalloc2 doesn't care what order
1486    // the operands come in; they can be freely reordered.
1487
1488    // N.B.: we MUST keep the below in careful sync with (i) emission,
1489    // in `emit.rs`, and (ii) pretty-printing, in the `pretty_print`
1490    // method above.
1491    match inst {
1492        Inst::CheckedSRemSeq {
1493            divisor,
1494            dividend_lo,
1495            dividend_hi,
1496            dst_quotient,
1497            dst_remainder,
1498            ..
1499        } => {
1500            collector.reg_use(divisor);
1501            collector.reg_fixed_use(dividend_lo, regs::rax());
1502            collector.reg_fixed_use(dividend_hi, regs::rdx());
1503            collector.reg_fixed_def(dst_quotient, regs::rax());
1504            collector.reg_fixed_def(dst_remainder, regs::rdx());
1505        }
1506        Inst::CheckedSRemSeq8 {
1507            divisor,
1508            dividend,
1509            dst,
1510            ..
1511        } => {
1512            collector.reg_use(divisor);
1513            collector.reg_fixed_use(dividend, regs::rax());
1514            collector.reg_fixed_def(dst, regs::rax());
1515        }
1516        Inst::XmmUnaryRmR { src, dst, .. } | Inst::XmmUnaryRmRImm { src, dst, .. } => {
1517            collector.reg_def(dst);
1518            src.get_operands(collector);
1519        }
1520        Inst::XmmUnaryRmREvex { src, dst, .. }
1521        | Inst::XmmUnaryRmRImmEvex { src, dst, .. }
1522        | Inst::XmmUnaryRmRVex { src, dst, .. }
1523        | Inst::XmmUnaryRmRImmVex { src, dst, .. } => {
1524            collector.reg_def(dst);
1525            src.get_operands(collector);
1526        }
1527        Inst::XmmRmR {
1528            src1, src2, dst, ..
1529        } => {
1530            collector.reg_use(src1);
1531            collector.reg_reuse_def(dst, 0);
1532            src2.get_operands(collector);
1533        }
1534        Inst::XmmRmRUnaligned {
1535            src1, src2, dst, ..
1536        } => {
1537            collector.reg_use(src1);
1538            collector.reg_reuse_def(dst, 0);
1539            src2.get_operands(collector);
1540        }
1541        Inst::XmmRmRBlend {
1542            src1,
1543            src2,
1544            mask,
1545            dst,
1546            op,
1547        } => {
1548            assert!(matches!(
1549                op,
1550                SseOpcode::Blendvpd | SseOpcode::Blendvps | SseOpcode::Pblendvb
1551            ));
1552            collector.reg_use(src1);
1553            collector.reg_fixed_use(mask, regs::xmm0());
1554            collector.reg_reuse_def(dst, 0);
1555            src2.get_operands(collector);
1556        }
1557        Inst::XmmRmiRVex {
1558            src1, src2, dst, ..
1559        } => {
1560            collector.reg_def(dst);
1561            collector.reg_use(src1);
1562            src2.get_operands(collector);
1563        }
1564        Inst::XmmRmRImmVex {
1565            src1, src2, dst, ..
1566        } => {
1567            collector.reg_def(dst);
1568            collector.reg_use(src1);
1569            src2.get_operands(collector);
1570        }
1571        Inst::XmmRmRVex3 {
1572            src1,
1573            src2,
1574            src3,
1575            dst,
1576            ..
1577        } => {
1578            collector.reg_use(src1);
1579            collector.reg_reuse_def(dst, 0);
1580            collector.reg_use(src2);
1581            src3.get_operands(collector);
1582        }
1583        Inst::XmmRmRBlendVex {
1584            src1,
1585            src2,
1586            mask,
1587            dst,
1588            ..
1589        } => {
1590            collector.reg_def(dst);
1591            collector.reg_use(src1);
1592            src2.get_operands(collector);
1593            collector.reg_use(mask);
1594        }
1595        Inst::XmmRmREvex {
1596            op,
1597            src1,
1598            src2,
1599            dst,
1600            ..
1601        } => {
1602            assert_ne!(*op, Avx512Opcode::Vpermi2b);
1603            collector.reg_use(src1);
1604            src2.get_operands(collector);
1605            collector.reg_def(dst);
1606        }
1607        Inst::XmmRmREvex3 {
1608            op,
1609            src1,
1610            src2,
1611            src3,
1612            dst,
1613            ..
1614        } => {
1615            assert_eq!(*op, Avx512Opcode::Vpermi2b);
1616            collector.reg_use(src1);
1617            collector.reg_use(src2);
1618            src3.get_operands(collector);
1619            collector.reg_reuse_def(dst, 0); // Reuse `src1`.
1620        }
1621        Inst::XmmRmRImm {
1622            src1, src2, dst, ..
1623        } => {
1624            collector.reg_use(src1);
1625            collector.reg_reuse_def(dst, 0);
1626            src2.get_operands(collector);
1627        }
1628        Inst::XmmUninitializedValue { dst } => collector.reg_def(dst),
1629        Inst::GprUninitializedValue { dst } => collector.reg_def(dst),
1630        Inst::XmmMinMaxSeq { lhs, rhs, dst, .. } => {
1631            collector.reg_use(rhs);
1632            collector.reg_use(lhs);
1633            collector.reg_reuse_def(dst, 0); // Reuse RHS.
1634        }
1635        Inst::XmmMovRMVex { src, dst, .. } | Inst::XmmMovRMImmVex { src, dst, .. } => {
1636            collector.reg_use(src);
1637            dst.get_operands(collector);
1638        }
1639        Inst::XmmCmpRmR { src1, src2, .. } => {
1640            collector.reg_use(src1);
1641            src2.get_operands(collector);
1642        }
1643        Inst::XmmCmpRmRVex { src1, src2, .. } => {
1644            collector.reg_use(src1);
1645            src2.get_operands(collector);
1646        }
1647        Inst::Imm { dst, .. } => {
1648            collector.reg_def(dst);
1649        }
1650        Inst::MovRR { src, dst, .. } => {
1651            collector.reg_use(src);
1652            collector.reg_def(dst);
1653        }
1654        Inst::MovFromPReg { dst, src } => {
1655            debug_assert!(dst.to_reg().to_reg().is_virtual());
1656            collector.reg_fixed_nonallocatable(*src);
1657            collector.reg_def(dst);
1658        }
1659        Inst::MovToPReg { dst, src } => {
1660            debug_assert!(src.to_reg().is_virtual());
1661            collector.reg_use(src);
1662            collector.reg_fixed_nonallocatable(*dst);
1663        }
1664        Inst::XmmToGprImmVex { src, dst, .. } => {
1665            collector.reg_use(src);
1666            collector.reg_def(dst);
1667        }
1668        Inst::CvtUint64ToFloatSeq {
1669            src,
1670            dst,
1671            tmp_gpr1,
1672            tmp_gpr2,
1673            ..
1674        } => {
1675            collector.reg_use(src);
1676            collector.reg_early_def(dst);
1677            collector.reg_early_def(tmp_gpr1);
1678            collector.reg_early_def(tmp_gpr2);
1679        }
1680        Inst::CvtFloatToSintSeq {
1681            src,
1682            dst,
1683            tmp_xmm,
1684            tmp_gpr,
1685            ..
1686        } => {
1687            collector.reg_use(src);
1688            collector.reg_early_def(dst);
1689            collector.reg_early_def(tmp_gpr);
1690            collector.reg_early_def(tmp_xmm);
1691        }
1692        Inst::CvtFloatToUintSeq {
1693            src,
1694            dst,
1695            tmp_gpr,
1696            tmp_xmm,
1697            tmp_xmm2,
1698            ..
1699        } => {
1700            collector.reg_use(src);
1701            collector.reg_early_def(dst);
1702            collector.reg_early_def(tmp_gpr);
1703            collector.reg_early_def(tmp_xmm);
1704            collector.reg_early_def(tmp_xmm2);
1705        }
1706
1707        Inst::MovImmM { dst, .. } => {
1708            dst.get_operands(collector);
1709        }
1710        Inst::LoadEffectiveAddress { addr: src, dst, .. } => {
1711            collector.reg_def(dst);
1712            src.get_operands(collector);
1713        }
1714        Inst::MovRM { src, dst, .. } => {
1715            collector.reg_use(src);
1716            dst.get_operands(collector);
1717        }
1718        Inst::CmpRmiR { src1, src2, .. } => {
1719            collector.reg_use(src1);
1720            src2.get_operands(collector);
1721        }
1722        Inst::Setcc { dst, .. } => {
1723            collector.reg_def(dst);
1724        }
1725        Inst::Cmove {
1726            consequent,
1727            alternative,
1728            dst,
1729            ..
1730        } => {
1731            collector.reg_use(alternative);
1732            collector.reg_reuse_def(dst, 0);
1733            consequent.get_operands(collector);
1734        }
1735        Inst::XmmCmove {
1736            consequent,
1737            alternative,
1738            dst,
1739            ..
1740        } => {
1741            collector.reg_use(alternative);
1742            collector.reg_reuse_def(dst, 0);
1743            collector.reg_use(consequent);
1744        }
1745        Inst::Push64 { src } => {
1746            src.get_operands(collector);
1747        }
1748        Inst::Pop64 { dst } => {
1749            collector.reg_def(dst);
1750        }
1751        Inst::StackProbeLoop { tmp, .. } => {
1752            collector.reg_early_def(tmp);
1753        }
1754
1755        Inst::CallKnown { info } => {
1756            // Probestack is special and is only inserted after
1757            // regalloc, so we do not need to represent its ABI to the
1758            // register allocator. Assert that we don't alter that
1759            // arrangement.
1760            let CallInfo {
1761                uses,
1762                defs,
1763                clobbers,
1764                dest,
1765                ..
1766            } = &mut **info;
1767            debug_assert_ne!(*dest, ExternalName::LibCall(LibCall::Probestack));
1768            for CallArgPair { vreg, preg } in uses {
1769                collector.reg_fixed_use(vreg, *preg);
1770            }
1771            for CallRetPair { vreg, location } in defs {
1772                match location {
1773                    RetLocation::Reg(preg, ..) => collector.reg_fixed_def(vreg, *preg),
1774                    RetLocation::Stack(..) => collector.any_def(vreg),
1775                }
1776            }
1777            collector.reg_clobbers(*clobbers);
1778        }
1779
1780        Inst::CallUnknown { info } => {
1781            let CallInfo {
1782                uses,
1783                defs,
1784                clobbers,
1785                callee_conv,
1786                dest,
1787                ..
1788            } = &mut **info;
1789            match dest {
1790                RegMem::Reg { reg } if *callee_conv == CallConv::Winch => {
1791                    // TODO(https://github.com/bytecodealliance/regalloc2/issues/145):
1792                    // This shouldn't be a fixed register constraint. r10 is caller-saved, so this
1793                    // should be safe to use.
1794                    collector.reg_fixed_use(reg, regs::r10());
1795                }
1796                _ => dest.get_operands(collector),
1797            }
1798            for CallArgPair { vreg, preg } in uses {
1799                collector.reg_fixed_use(vreg, *preg);
1800            }
1801            for CallRetPair { vreg, location } in defs {
1802                match location {
1803                    RetLocation::Reg(preg, ..) => collector.reg_fixed_def(vreg, *preg),
1804                    RetLocation::Stack(..) => collector.any_def(vreg),
1805                }
1806            }
1807            collector.reg_clobbers(*clobbers);
1808        }
1809        Inst::StackSwitchBasic {
1810            store_context_ptr,
1811            load_context_ptr,
1812            in_payload0,
1813            out_payload0,
1814        } => {
1815            collector.reg_use(load_context_ptr);
1816            collector.reg_use(store_context_ptr);
1817            collector.reg_fixed_use(in_payload0, stack_switch::payload_register());
1818            collector.reg_fixed_def(out_payload0, stack_switch::payload_register());
1819
1820            let mut clobbers = crate::isa::x64::abi::ALL_CLOBBERS;
1821            // The return/payload reg must not be included in the clobber set
1822            clobbers.remove(
1823                stack_switch::payload_register()
1824                    .to_real_reg()
1825                    .unwrap()
1826                    .into(),
1827            );
1828            collector.reg_clobbers(clobbers);
1829        }
1830
1831        Inst::ReturnCallKnown { info } => {
1832            let ReturnCallInfo {
1833                dest, uses, tmp, ..
1834            } = &mut **info;
1835            collector.reg_fixed_def(tmp, regs::r11());
1836            // Same as in the `Inst::CallKnown` branch.
1837            debug_assert_ne!(*dest, ExternalName::LibCall(LibCall::Probestack));
1838            for CallArgPair { vreg, preg } in uses {
1839                collector.reg_fixed_use(vreg, *preg);
1840            }
1841        }
1842
1843        Inst::ReturnCallUnknown { info } => {
1844            let ReturnCallInfo {
1845                dest, uses, tmp, ..
1846            } = &mut **info;
1847
1848            // TODO(https://github.com/bytecodealliance/regalloc2/issues/145):
1849            // This shouldn't be a fixed register constraint, but it's not clear how to
1850            // pick a register that won't be clobbered by the callee-save restore code
1851            // emitted with a return_call_indirect. r10 is caller-saved, so this should be
1852            // safe to use.
1853            collector.reg_fixed_use(dest, regs::r10());
1854
1855            collector.reg_fixed_def(tmp, regs::r11());
1856            for CallArgPair { vreg, preg } in uses {
1857                collector.reg_fixed_use(vreg, *preg);
1858            }
1859        }
1860
1861        Inst::JmpTableSeq {
1862            idx, tmp1, tmp2, ..
1863        } => {
1864            collector.reg_use(idx);
1865            collector.reg_early_def(tmp1);
1866            // In the sequence emitted for this pseudoinstruction in emit.rs,
1867            // tmp2 is only written after idx is read, so it doesn't need to be
1868            // an early def.
1869            collector.reg_def(tmp2);
1870        }
1871
1872        Inst::JmpUnknown { target } => {
1873            target.get_operands(collector);
1874        }
1875
1876        Inst::LoadExtName { dst, .. } => {
1877            collector.reg_def(dst);
1878        }
1879
1880        Inst::LockCmpxchg {
1881            replacement,
1882            expected,
1883            mem,
1884            dst_old,
1885            ..
1886        } => {
1887            collector.reg_use(replacement);
1888            collector.reg_fixed_use(expected, regs::rax());
1889            collector.reg_fixed_def(dst_old, regs::rax());
1890            mem.get_operands(collector);
1891        }
1892
1893        Inst::LockCmpxchg16b {
1894            replacement_low,
1895            replacement_high,
1896            expected_low,
1897            expected_high,
1898            mem,
1899            dst_old_low,
1900            dst_old_high,
1901            ..
1902        } => {
1903            collector.reg_fixed_use(replacement_low, regs::rbx());
1904            collector.reg_fixed_use(replacement_high, regs::rcx());
1905            collector.reg_fixed_use(expected_low, regs::rax());
1906            collector.reg_fixed_use(expected_high, regs::rdx());
1907            collector.reg_fixed_def(dst_old_low, regs::rax());
1908            collector.reg_fixed_def(dst_old_high, regs::rdx());
1909            mem.get_operands(collector);
1910        }
1911
1912        Inst::LockXadd {
1913            operand,
1914            mem,
1915            dst_old,
1916            ..
1917        } => {
1918            collector.reg_use(operand);
1919            collector.reg_reuse_def(dst_old, 0);
1920            mem.get_operands(collector);
1921        }
1922
1923        Inst::Xchg {
1924            operand,
1925            mem,
1926            dst_old,
1927            ..
1928        } => {
1929            collector.reg_use(operand);
1930            collector.reg_reuse_def(dst_old, 0);
1931            mem.get_operands(collector);
1932        }
1933
1934        Inst::AtomicRmwSeq {
1935            operand,
1936            temp,
1937            dst_old,
1938            mem,
1939            ..
1940        } => {
1941            collector.reg_late_use(operand);
1942            collector.reg_early_def(temp);
1943            // This `fixed_def` is needed because `CMPXCHG` always uses this
1944            // register implicitly.
1945            collector.reg_fixed_def(dst_old, regs::rax());
1946            mem.get_operands_late(collector)
1947        }
1948
1949        Inst::Atomic128RmwSeq {
1950            operand_low,
1951            operand_high,
1952            temp_low,
1953            temp_high,
1954            dst_old_low,
1955            dst_old_high,
1956            mem,
1957            ..
1958        } => {
1959            // All registers are collected in the `Late` position so that they don't overlap.
1960            collector.reg_late_use(operand_low);
1961            collector.reg_late_use(operand_high);
1962            collector.reg_fixed_def(temp_low, regs::rbx());
1963            collector.reg_fixed_def(temp_high, regs::rcx());
1964            collector.reg_fixed_def(dst_old_low, regs::rax());
1965            collector.reg_fixed_def(dst_old_high, regs::rdx());
1966            mem.get_operands_late(collector)
1967        }
1968
1969        Inst::Atomic128XchgSeq {
1970            operand_low,
1971            operand_high,
1972            dst_old_low,
1973            dst_old_high,
1974            mem,
1975            ..
1976        } => {
1977            // All registers are collected in the `Late` position so that they don't overlap.
1978            collector.reg_fixed_late_use(operand_low, regs::rbx());
1979            collector.reg_fixed_late_use(operand_high, regs::rcx());
1980            collector.reg_fixed_def(dst_old_low, regs::rax());
1981            collector.reg_fixed_def(dst_old_high, regs::rdx());
1982            mem.get_operands_late(collector)
1983        }
1984
1985        Inst::Args { args } => {
1986            for ArgPair { vreg, preg } in args {
1987                collector.reg_fixed_def(vreg, *preg);
1988            }
1989        }
1990
1991        Inst::Rets { rets } => {
1992            // The return value(s) are live-out; we represent this
1993            // with register uses on the return instruction.
1994            for RetPair { vreg, preg } in rets {
1995                collector.reg_fixed_use(vreg, *preg);
1996            }
1997        }
1998
1999        Inst::JmpKnown { .. }
2000        | Inst::WinchJmpIf { .. }
2001        | Inst::JmpCond { .. }
2002        | Inst::JmpCondOr { .. }
2003        | Inst::Ret { .. }
2004        | Inst::Nop { .. }
2005        | Inst::TrapIf { .. }
2006        | Inst::TrapIfAnd { .. }
2007        | Inst::TrapIfOr { .. }
2008        | Inst::Hlt
2009        | Inst::Ud2 { .. }
2010        | Inst::Fence { .. } => {
2011            // No registers are used.
2012        }
2013
2014        Inst::ElfTlsGetAddr { dst, .. } | Inst::MachOTlsGetAddr { dst, .. } => {
2015            collector.reg_fixed_def(dst, regs::rax());
2016            // All caller-saves are clobbered.
2017            //
2018            // We use the SysV calling convention here because the
2019            // pseudoinstruction (and relocation that it emits) is specific to
2020            // ELF systems; other x86-64 targets with other conventions (i.e.,
2021            // Windows) use different TLS strategies.
2022            let mut clobbers =
2023                X64ABIMachineSpec::get_regs_clobbered_by_call(CallConv::SystemV, false);
2024            clobbers.remove(regs::gpr_preg(regs::ENC_RAX));
2025            collector.reg_clobbers(clobbers);
2026        }
2027
2028        Inst::CoffTlsGetAddr { dst, tmp, .. } => {
2029            // We also use the gs register. But that register is not allocatable by the
2030            // register allocator, so we don't need to mark it as used here.
2031
2032            // We use %rax to set the address
2033            collector.reg_fixed_def(dst, regs::rax());
2034
2035            // We use %rcx as a temporary variable to load the _tls_index
2036            collector.reg_fixed_def(tmp, regs::rcx());
2037        }
2038
2039        Inst::Unwind { .. } => {}
2040
2041        Inst::DummyUse { reg } => {
2042            collector.reg_use(reg);
2043        }
2044
2045        Inst::External { inst } => {
2046            inst.visit(&mut external::RegallocVisitor { collector });
2047        }
2048    }
2049}
2050
2051//=============================================================================
2052// Instructions: misc functions and external interface
2053
2054impl MachInst for Inst {
2055    type ABIMachineSpec = X64ABIMachineSpec;
2056
2057    fn get_operands(&mut self, collector: &mut impl OperandVisitor) {
2058        x64_get_operands(self, collector)
2059    }
2060
2061    fn is_move(&self) -> Option<(Writable<Reg>, Reg)> {
2062        use asm::inst::Inst as I;
2063        match self {
2064            // Note (carefully!) that a 32-bit mov *isn't* a no-op since it zeroes
2065            // out the upper 32 bits of the destination.  For example, we could
2066            // conceivably use `movl %reg, %reg` to zero out the top 32 bits of
2067            // %reg.
2068            Self::MovRR { size, src, dst, .. } if *size == OperandSize::Size64 => {
2069                Some((dst.to_writable_reg(), src.to_reg()))
2070            }
2071            // Note that `movss_a_r` and `movsd_a_r` are specifically omitted
2072            // here because they only overwrite the low bits in the destination
2073            // register, otherwise preserving the upper bits. That can be used
2074            // for lane-insertion instructions, for example, meaning it's not
2075            // classified as a register move.
2076            //
2077            // Otherwise though all register-to-register movement instructions
2078            // which move 128-bits are registered as moves.
2079            Self::External {
2080                inst:
2081                    I::movaps_a(asm::inst::movaps_a { xmm1, xmm_m128 })
2082                    | I::movups_a(asm::inst::movups_a { xmm1, xmm_m128 })
2083                    | I::movapd_a(asm::inst::movapd_a { xmm1, xmm_m128 })
2084                    | I::movupd_a(asm::inst::movupd_a { xmm1, xmm_m128 })
2085                    | I::movdqa_a(asm::inst::movdqa_a { xmm1, xmm_m128 })
2086                    | I::movdqu_a(asm::inst::movdqu_a { xmm1, xmm_m128 }),
2087            } => match xmm_m128 {
2088                asm::XmmMem::Xmm(xmm2) => Some((xmm1.as_ref().map(|r| r.to_reg()), xmm2.to_reg())),
2089                asm::XmmMem::Mem(_) => None,
2090            },
2091            // In addition to the "A" format of instructions above also
2092            // recognize the "B" format which while it can be used for stores it
2093            // can also be used for register moves.
2094            Self::External {
2095                inst:
2096                    I::movaps_b(asm::inst::movaps_b { xmm_m128, xmm1 })
2097                    | I::movups_b(asm::inst::movups_b { xmm_m128, xmm1 })
2098                    | I::movapd_b(asm::inst::movapd_b { xmm_m128, xmm1 })
2099                    | I::movupd_b(asm::inst::movupd_b { xmm_m128, xmm1 })
2100                    | I::movdqa_b(asm::inst::movdqa_b { xmm_m128, xmm1 })
2101                    | I::movdqu_b(asm::inst::movdqu_b { xmm_m128, xmm1 }),
2102            } => match xmm_m128 {
2103                asm::XmmMem::Xmm(dst) => Some((dst.map(|r| r.to_reg()), xmm1.as_ref().to_reg())),
2104                asm::XmmMem::Mem(_) => None,
2105            },
2106            _ => None,
2107        }
2108    }
2109
2110    fn is_included_in_clobbers(&self) -> bool {
2111        match self {
2112            &Inst::Args { .. } => false,
2113            _ => true,
2114        }
2115    }
2116
2117    fn is_trap(&self) -> bool {
2118        match self {
2119            Self::Ud2 { .. } => true,
2120            _ => false,
2121        }
2122    }
2123
2124    fn is_args(&self) -> bool {
2125        match self {
2126            Self::Args { .. } => true,
2127            _ => false,
2128        }
2129    }
2130
2131    fn is_term(&self) -> MachTerminator {
2132        match self {
2133            // Interesting cases.
2134            &Self::Rets { .. } => MachTerminator::Ret,
2135            &Self::ReturnCallKnown { .. } | &Self::ReturnCallUnknown { .. } => {
2136                MachTerminator::RetCall
2137            }
2138            &Self::JmpKnown { .. } => MachTerminator::Branch,
2139            &Self::JmpCond { .. } => MachTerminator::Branch,
2140            &Self::JmpCondOr { .. } => MachTerminator::Branch,
2141            &Self::JmpTableSeq { .. } => MachTerminator::Branch,
2142            &Self::CallKnown { ref info } if info.try_call_info.is_some() => MachTerminator::Branch,
2143            &Self::CallUnknown { ref info } if info.try_call_info.is_some() => {
2144                MachTerminator::Branch
2145            }
2146            // All other cases are boring.
2147            _ => MachTerminator::None,
2148        }
2149    }
2150
2151    fn is_low_level_branch(&self) -> bool {
2152        match self {
2153            &Self::WinchJmpIf { .. } => true,
2154            _ => false,
2155        }
2156    }
2157
2158    fn is_mem_access(&self) -> bool {
2159        panic!("TODO FILL ME OUT")
2160    }
2161
2162    fn gen_move(dst_reg: Writable<Reg>, src_reg: Reg, ty: Type) -> Inst {
2163        trace!(
2164            "Inst::gen_move {:?} -> {:?} (type: {:?})",
2165            src_reg,
2166            dst_reg.to_reg(),
2167            ty
2168        );
2169        let rc_dst = dst_reg.to_reg().class();
2170        let rc_src = src_reg.class();
2171        // If this isn't true, we have gone way off the rails.
2172        debug_assert!(rc_dst == rc_src);
2173        match rc_dst {
2174            RegClass::Int => Inst::mov_r_r(OperandSize::Size64, src_reg, dst_reg),
2175            RegClass::Float => {
2176                // The Intel optimization manual, in "3.5.1.13 Zero-Latency MOV Instructions",
2177                // doesn't include MOVSS/MOVSD as instructions with zero-latency. Use movaps for
2178                // those, which may write more lanes that we need, but are specified to have
2179                // zero-latency.
2180                let dst_reg = dst_reg.map(|r| Xmm::new(r).unwrap());
2181                let src_reg = Xmm::new(src_reg).unwrap();
2182                let inst = match ty {
2183                    types::F16 | types::F32 | types::F64 | types::F32X4 => {
2184                        asm::inst::movaps_a::new(dst_reg, src_reg).into()
2185                    }
2186                    types::F64X2 => asm::inst::movapd_a::new(dst_reg, src_reg).into(),
2187                    _ if (ty.is_float() || ty.is_vector()) && ty.bits() <= 128 => {
2188                        asm::inst::movdqa_a::new(dst_reg, src_reg).into()
2189                    }
2190                    _ => unimplemented!("unable to move type: {}", ty),
2191                };
2192                Inst::External { inst }
2193            }
2194            RegClass::Vector => unreachable!(),
2195        }
2196    }
2197
2198    fn gen_nop(preferred_size: usize) -> Inst {
2199        Inst::nop(std::cmp::min(preferred_size, 15) as u8)
2200    }
2201
2202    fn rc_for_type(ty: Type) -> CodegenResult<(&'static [RegClass], &'static [Type])> {
2203        match ty {
2204            types::I8 => Ok((&[RegClass::Int], &[types::I8])),
2205            types::I16 => Ok((&[RegClass::Int], &[types::I16])),
2206            types::I32 => Ok((&[RegClass::Int], &[types::I32])),
2207            types::I64 => Ok((&[RegClass::Int], &[types::I64])),
2208            types::F16 => Ok((&[RegClass::Float], &[types::F16])),
2209            types::F32 => Ok((&[RegClass::Float], &[types::F32])),
2210            types::F64 => Ok((&[RegClass::Float], &[types::F64])),
2211            types::F128 => Ok((&[RegClass::Float], &[types::F128])),
2212            types::I128 => Ok((&[RegClass::Int, RegClass::Int], &[types::I64, types::I64])),
2213            _ if ty.is_vector() && ty.bits() <= 128 => {
2214                let types = &[types::I8X2, types::I8X4, types::I8X8, types::I8X16];
2215                Ok((
2216                    &[RegClass::Float],
2217                    slice::from_ref(&types[ty.bytes().ilog2() as usize - 1]),
2218                ))
2219            }
2220            _ => Err(CodegenError::Unsupported(format!(
2221                "Unexpected SSA-value type: {ty}"
2222            ))),
2223        }
2224    }
2225
2226    fn canonical_type_for_rc(rc: RegClass) -> Type {
2227        match rc {
2228            RegClass::Float => types::I8X16,
2229            RegClass::Int => types::I64,
2230            RegClass::Vector => unreachable!(),
2231        }
2232    }
2233
2234    fn gen_jump(label: MachLabel) -> Inst {
2235        Inst::jmp_known(label)
2236    }
2237
2238    fn gen_imm_u64(value: u64, dst: Writable<Reg>) -> Option<Self> {
2239        Some(Inst::imm(OperandSize::Size64, value, dst))
2240    }
2241
2242    fn gen_imm_f64(value: f64, tmp: Writable<Reg>, dst: Writable<Reg>) -> SmallVec<[Self; 2]> {
2243        let imm_to_gpr = Inst::imm(OperandSize::Size64, value.to_bits(), tmp);
2244        let gpr_to_xmm = Inst::External {
2245            inst: asm::inst::movq_a::new(dst.map(|r| Xmm::new(r).unwrap()), tmp.to_reg()).into(),
2246        };
2247        smallvec![imm_to_gpr, gpr_to_xmm]
2248    }
2249
2250    fn gen_dummy_use(reg: Reg) -> Self {
2251        Inst::DummyUse { reg }
2252    }
2253
2254    fn worst_case_size() -> CodeOffset {
2255        15
2256    }
2257
2258    fn ref_type_regclass(_: &settings::Flags) -> RegClass {
2259        RegClass::Int
2260    }
2261
2262    fn is_safepoint(&self) -> bool {
2263        match self {
2264            Inst::CallKnown { .. } | Inst::CallUnknown { .. } => true,
2265            _ => false,
2266        }
2267    }
2268
2269    fn function_alignment() -> FunctionAlignment {
2270        FunctionAlignment {
2271            minimum: 1,
2272            // Change the alignment from 16-bytes to 32-bytes for better performance.
2273            // fix-8573: https://github.com/bytecodealliance/wasmtime/issues/8573
2274            preferred: 32,
2275        }
2276    }
2277
2278    type LabelUse = LabelUse;
2279
2280    const TRAP_OPCODE: &'static [u8] = &[0x0f, 0x0b];
2281}
2282
2283/// Constant state used during emissions of a sequence of instructions.
2284pub struct EmitInfo {
2285    pub(super) flags: settings::Flags,
2286    isa_flags: x64_settings::Flags,
2287}
2288
2289impl EmitInfo {
2290    /// Create a constant state for emission of instructions.
2291    pub fn new(flags: settings::Flags, isa_flags: x64_settings::Flags) -> Self {
2292        Self { flags, isa_flags }
2293    }
2294}
2295
2296impl MachInstEmit for Inst {
2297    type State = EmitState;
2298    type Info = EmitInfo;
2299
2300    fn emit(&self, sink: &mut MachBuffer<Inst>, info: &Self::Info, state: &mut Self::State) {
2301        emit::emit(self, sink, info, state);
2302    }
2303
2304    fn pretty_print_inst(&self, _: &mut Self::State) -> String {
2305        PrettyPrint::pretty_print(self, 0)
2306    }
2307}
2308
2309/// A label-use (internal relocation) in generated code.
2310#[derive(Clone, Copy, Debug, PartialEq, Eq)]
2311pub enum LabelUse {
2312    /// A 32-bit offset from location of relocation itself, added to the existing value at that
2313    /// location. Used for control flow instructions which consider an offset from the start of the
2314    /// next instruction (so the size of the payload -- 4 bytes -- is subtracted from the payload).
2315    JmpRel32,
2316
2317    /// A 32-bit offset from location of relocation itself, added to the existing value at that
2318    /// location.
2319    PCRel32,
2320}
2321
2322impl MachInstLabelUse for LabelUse {
2323    const ALIGN: CodeOffset = 1;
2324
2325    fn max_pos_range(self) -> CodeOffset {
2326        match self {
2327            LabelUse::JmpRel32 | LabelUse::PCRel32 => 0x7fff_ffff,
2328        }
2329    }
2330
2331    fn max_neg_range(self) -> CodeOffset {
2332        match self {
2333            LabelUse::JmpRel32 | LabelUse::PCRel32 => 0x8000_0000,
2334        }
2335    }
2336
2337    fn patch_size(self) -> CodeOffset {
2338        match self {
2339            LabelUse::JmpRel32 | LabelUse::PCRel32 => 4,
2340        }
2341    }
2342
2343    fn patch(self, buffer: &mut [u8], use_offset: CodeOffset, label_offset: CodeOffset) {
2344        let pc_rel = (label_offset as i64) - (use_offset as i64);
2345        debug_assert!(pc_rel <= self.max_pos_range() as i64);
2346        debug_assert!(pc_rel >= -(self.max_neg_range() as i64));
2347        let pc_rel = pc_rel as u32;
2348        match self {
2349            LabelUse::JmpRel32 => {
2350                let addend = u32::from_le_bytes([buffer[0], buffer[1], buffer[2], buffer[3]]);
2351                let value = pc_rel.wrapping_add(addend).wrapping_sub(4);
2352                buffer.copy_from_slice(&value.to_le_bytes()[..]);
2353            }
2354            LabelUse::PCRel32 => {
2355                let addend = u32::from_le_bytes([buffer[0], buffer[1], buffer[2], buffer[3]]);
2356                let value = pc_rel.wrapping_add(addend);
2357                buffer.copy_from_slice(&value.to_le_bytes()[..]);
2358            }
2359        }
2360    }
2361
2362    fn supports_veneer(self) -> bool {
2363        match self {
2364            LabelUse::JmpRel32 | LabelUse::PCRel32 => false,
2365        }
2366    }
2367
2368    fn veneer_size(self) -> CodeOffset {
2369        match self {
2370            LabelUse::JmpRel32 | LabelUse::PCRel32 => 0,
2371        }
2372    }
2373
2374    fn worst_case_veneer_size() -> CodeOffset {
2375        0
2376    }
2377
2378    fn generate_veneer(self, _: &mut [u8], _: CodeOffset) -> (CodeOffset, LabelUse) {
2379        match self {
2380            LabelUse::JmpRel32 | LabelUse::PCRel32 => {
2381                panic!("Veneer not supported for JumpRel32 label-use.");
2382            }
2383        }
2384    }
2385
2386    fn from_reloc(reloc: Reloc, addend: Addend) -> Option<Self> {
2387        match (reloc, addend) {
2388            (Reloc::X86CallPCRel4, -4) => Some(LabelUse::JmpRel32),
2389            _ => None,
2390        }
2391    }
2392}