cranelift_codegen/isa/x64/inst/
mod.rs

1//! This module defines x86_64-specific machine instruction types.
2
3pub use emit_state::EmitState;
4
5use crate::binemit::{Addend, CodeOffset, Reloc};
6use crate::ir::{ExternalName, LibCall, TrapCode, Type, types};
7use crate::isa::x64::abi::X64ABIMachineSpec;
8use crate::isa::x64::inst::regs::{pretty_print_reg, show_ireg_sized};
9use crate::isa::x64::settings as x64_settings;
10use crate::isa::{CallConv, FunctionAlignment};
11use crate::{CodegenError, CodegenResult, settings};
12use crate::{machinst::*, trace};
13use alloc::boxed::Box;
14use alloc::vec::Vec;
15use core::slice;
16use cranelift_assembler_x64 as asm;
17use cranelift_entity::{Signed, Unsigned};
18use smallvec::{SmallVec, smallvec};
19use std::fmt::{self, Write};
20use std::string::{String, ToString};
21
22pub mod args;
23mod emit;
24mod emit_state;
25#[cfg(test)]
26mod emit_tests;
27pub mod external;
28pub mod regs;
29mod stack_switch;
30pub mod unwind;
31
32use args::*;
33
34//=============================================================================
35// Instructions (top level): definition
36
37// `Inst` is defined inside ISLE as `MInst`. We publicly re-export it here.
38pub use super::lower::isle::generated_code::AtomicRmwSeqOp;
39pub use super::lower::isle::generated_code::MInst as Inst;
40
41/// Out-of-line data for return-calls, to keep the size of `Inst` down.
42#[derive(Clone, Debug)]
43pub struct ReturnCallInfo<T> {
44    /// Where this call is going.
45    pub dest: T,
46
47    /// The size of the argument area for this return-call, potentially smaller than that of the
48    /// caller, but never larger.
49    pub new_stack_arg_size: u32,
50
51    /// The in-register arguments and their constraints.
52    pub uses: CallArgList,
53
54    /// A temporary for use when moving the return address.
55    pub tmp: WritableGpr,
56}
57
58#[test]
59#[cfg(target_pointer_width = "64")]
60fn inst_size_test() {
61    // This test will help with unintentionally growing the size
62    // of the Inst enum.
63    assert_eq!(48, std::mem::size_of::<Inst>());
64}
65
66impl Inst {
67    /// Retrieve a list of ISA feature sets in which the instruction is available. An empty list
68    /// indicates that the instruction is available in the baseline feature set (i.e. SSE2 and
69    /// below); more than one `InstructionSet` in the list indicates that the instruction is present
70    /// *any* of the included ISA feature sets.
71    fn available_in_any_isa(&self) -> SmallVec<[InstructionSet; 2]> {
72        match self {
73            // These instructions are part of SSE2, which is a basic requirement in Cranelift, and
74            // don't have to be checked.
75            Inst::AtomicRmwSeq { .. }
76            | Inst::CallKnown { .. }
77            | Inst::CallUnknown { .. }
78            | Inst::ReturnCallKnown { .. }
79            | Inst::ReturnCallUnknown { .. }
80            | Inst::CheckedSRemSeq { .. }
81            | Inst::CheckedSRemSeq8 { .. }
82            | Inst::CvtFloatToSintSeq { .. }
83            | Inst::CvtFloatToUintSeq { .. }
84            | Inst::CvtUint64ToFloatSeq { .. }
85            | Inst::JmpCond { .. }
86            | Inst::JmpCondOr { .. }
87            | Inst::WinchJmpIf { .. }
88            | Inst::JmpKnown { .. }
89            | Inst::JmpTableSeq { .. }
90            | Inst::LoadExtName { .. }
91            | Inst::MovFromPReg { .. }
92            | Inst::MovToPReg { .. }
93            | Inst::StackProbeLoop { .. }
94            | Inst::Args { .. }
95            | Inst::Rets { .. }
96            | Inst::StackSwitchBasic { .. }
97            | Inst::TrapIf { .. }
98            | Inst::TrapIfAnd { .. }
99            | Inst::TrapIfOr { .. }
100            | Inst::XmmCmove { .. }
101            | Inst::XmmMinMaxSeq { .. }
102            | Inst::XmmUninitializedValue { .. }
103            | Inst::GprUninitializedValue { .. }
104            | Inst::ElfTlsGetAddr { .. }
105            | Inst::MachOTlsGetAddr { .. }
106            | Inst::CoffTlsGetAddr { .. }
107            | Inst::Unwind { .. }
108            | Inst::DummyUse { .. } => smallvec![],
109
110            Inst::Atomic128RmwSeq { .. } | Inst::Atomic128XchgSeq { .. } => {
111                smallvec![InstructionSet::CMPXCHG16b]
112            }
113
114            Inst::XmmUnaryRmREvex { op, .. }
115            | Inst::XmmRmREvex { op, .. }
116            | Inst::XmmRmREvex3 { op, .. }
117            | Inst::XmmUnaryRmRImmEvex { op, .. } => op.available_from(),
118
119            Inst::External { inst } => {
120                use cranelift_assembler_x64::Feature::*;
121                let mut features = smallvec![];
122                for f in inst.features() {
123                    match f {
124                        _64b | compat => {}
125                        sse => features.push(InstructionSet::SSE),
126                        sse2 => features.push(InstructionSet::SSE2),
127                        sse3 => features.push(InstructionSet::SSE3),
128                        ssse3 => features.push(InstructionSet::SSSE3),
129                        sse41 => features.push(InstructionSet::SSE41),
130                        sse42 => features.push(InstructionSet::SSE42),
131                        bmi1 => features.push(InstructionSet::BMI1),
132                        bmi2 => features.push(InstructionSet::BMI2),
133                        lzcnt => features.push(InstructionSet::Lzcnt),
134                        popcnt => features.push(InstructionSet::Popcnt),
135                        avx => features.push(InstructionSet::AVX),
136                        avx2 => features.push(InstructionSet::AVX2),
137                        cmpxchg16b => features.push(InstructionSet::CMPXCHG16b),
138                        fma => features.push(InstructionSet::FMA),
139                    }
140                }
141                features
142            }
143        }
144    }
145}
146
147// Handy constructors for Insts.
148
149impl Inst {
150    pub(crate) fn nop(len: u8) -> Self {
151        assert!(len > 0 && len <= 9);
152        let inst = match len {
153            1 => asm::inst::nop_1b::new().into(),
154            2 => asm::inst::nop_2b::new().into(),
155            3 => asm::inst::nop_3b::new().into(),
156            4 => asm::inst::nop_4b::new().into(),
157            5 => asm::inst::nop_5b::new().into(),
158            6 => asm::inst::nop_6b::new().into(),
159            7 => asm::inst::nop_7b::new().into(),
160            8 => asm::inst::nop_8b::new().into(),
161            9 => asm::inst::nop_9b::new().into(),
162            _ => unreachable!("nop length must be between 1 and 9"),
163        };
164        Self::External { inst }
165    }
166
167    pub(crate) fn addq_mi(dst: Writable<Reg>, simm32: i32) -> Self {
168        let inst = if let Ok(simm8) = i8::try_from(simm32) {
169            asm::inst::addq_mi_sxb::new(dst, simm8).into()
170        } else {
171            asm::inst::addq_mi_sxl::new(dst, simm32).into()
172        };
173        Inst::External { inst }
174    }
175
176    pub(crate) fn subq_mi(dst: Writable<Reg>, simm32: i32) -> Self {
177        let inst = if let Ok(simm8) = i8::try_from(simm32) {
178            asm::inst::subq_mi_sxb::new(dst, simm8).into()
179        } else {
180            asm::inst::subq_mi_sxl::new(dst, simm32).into()
181        };
182        Inst::External { inst }
183    }
184
185    /// Writes the `simm64` immedaite into `dst`.
186    ///
187    /// Note that if `dst_size` is less than 64-bits then the upper bits of
188    /// `simm64` will be converted to zero.
189    pub fn imm(dst_size: OperandSize, simm64: u64, dst: Writable<Reg>) -> Inst {
190        debug_assert!(dst_size.is_one_of(&[OperandSize::Size32, OperandSize::Size64]));
191        debug_assert!(dst.to_reg().class() == RegClass::Int);
192        let dst = WritableGpr::from_writable_reg(dst).unwrap();
193        let inst = match dst_size {
194            OperandSize::Size64 => match u32::try_from(simm64) {
195                // If `simm64` is zero-extended use `movl` which zeros the
196                // upper bits.
197                Ok(imm32) => asm::inst::movl_oi::new(dst, imm32).into(),
198                _ => match i32::try_from(simm64.signed()) {
199                    // If `simm64` is sign-extended use `movq` which sign the
200                    // upper bits.
201                    Ok(simm32) => asm::inst::movq_mi_sxl::new(dst, simm32).into(),
202                    // fall back to embedding the entire immediate.
203                    _ => asm::inst::movabsq_oi::new(dst, simm64).into(),
204                },
205            },
206            // FIXME: the input to this function is a logical `simm64` stored
207            // as `u64`. That means that ideally what we would do here is cast
208            // the `simm64` to an `i64`, perform a `i32::try_from()`, then cast
209            // that back to `u32`. That would ensure that the immediate loses
210            // no meaning and has the same logical value. Currently though
211            // Cranelift relies on discarding the upper bits because literals
212            // like `0x8000_0000_u64` fail to convert to an `i32`. In theory
213            // the input to this function should change to `i64`. In the
214            // meantime this is documented as discarding the upper bits,
215            // although this is an old function so that's unlikely to help
216            // much.
217            _ => asm::inst::movl_oi::new(dst, simm64 as u32).into(),
218        };
219        Inst::External { inst }
220    }
221
222    #[allow(dead_code)]
223    pub(crate) fn xmm_min_max_seq(
224        size: OperandSize,
225        is_min: bool,
226        lhs: Reg,
227        rhs: Reg,
228        dst: Writable<Reg>,
229    ) -> Inst {
230        debug_assert!(size.is_one_of(&[OperandSize::Size32, OperandSize::Size64]));
231        debug_assert_eq!(lhs.class(), RegClass::Float);
232        debug_assert_eq!(rhs.class(), RegClass::Float);
233        debug_assert_eq!(dst.to_reg().class(), RegClass::Float);
234        Inst::XmmMinMaxSeq {
235            size,
236            is_min,
237            lhs: Xmm::unwrap_new(lhs),
238            rhs: Xmm::unwrap_new(rhs),
239            dst: WritableXmm::from_writable_reg(dst).unwrap(),
240        }
241    }
242
243    pub(crate) fn movzx_rm_r(ext_mode: ExtMode, src: RegMem, dst: Writable<Reg>) -> Inst {
244        src.assert_regclass_is(RegClass::Int);
245        debug_assert!(dst.to_reg().class() == RegClass::Int);
246        let src = match src {
247            RegMem::Reg { reg } => asm::GprMem::Gpr(Gpr::new(reg).unwrap()),
248            RegMem::Mem { addr } => asm::GprMem::Mem(addr.into()),
249        };
250        let inst = match ext_mode {
251            ExtMode::BL => asm::inst::movzbl_rm::new(dst, src).into(),
252            ExtMode::BQ => asm::inst::movzbq_rm::new(dst, src).into(),
253            ExtMode::WL => asm::inst::movzwl_rm::new(dst, src).into(),
254            ExtMode::WQ => asm::inst::movzwq_rm::new(dst, src).into(),
255            ExtMode::LQ => {
256                // This instruction selection may seem strange but is correct in
257                // 64-bit mode: section 3.4.1.1 of the Intel manual says that
258                // "32-bit operands generate a 32-bit result, zero-extended to a
259                // 64-bit result in the destination general-purpose register."
260                // This is applicable beyond `mov` but we use this fact to
261                // zero-extend `src` into `dst`.
262                asm::inst::movl_rm::new(dst, src).into()
263            }
264        };
265        Inst::External { inst }
266    }
267
268    pub(crate) fn movsx_rm_r(ext_mode: ExtMode, src: RegMem, dst: Writable<Reg>) -> Inst {
269        src.assert_regclass_is(RegClass::Int);
270        debug_assert!(dst.to_reg().class() == RegClass::Int);
271        let src = match src {
272            RegMem::Reg { reg } => asm::GprMem::Gpr(Gpr::new(reg).unwrap()),
273            RegMem::Mem { addr } => asm::GprMem::Mem(addr.into()),
274        };
275        let inst = match ext_mode {
276            ExtMode::BL => asm::inst::movsbl_rm::new(dst, src).into(),
277            ExtMode::BQ => asm::inst::movsbq_rm::new(dst, src).into(),
278            ExtMode::WL => asm::inst::movswl_rm::new(dst, src).into(),
279            ExtMode::WQ => asm::inst::movswq_rm::new(dst, src).into(),
280            ExtMode::LQ => asm::inst::movslq_rm::new(dst, src).into(),
281        };
282        Inst::External { inst }
283    }
284
285    /// Compares `src1` against `src2`
286    pub(crate) fn cmp_mi_sxb(size: OperandSize, src1: Gpr, src2: i8) -> Inst {
287        let inst = match size {
288            OperandSize::Size8 => asm::inst::cmpb_mi::new(src1, src2.unsigned()).into(),
289            OperandSize::Size16 => asm::inst::cmpw_mi_sxb::new(src1, src2).into(),
290            OperandSize::Size32 => asm::inst::cmpl_mi_sxb::new(src1, src2).into(),
291            OperandSize::Size64 => asm::inst::cmpq_mi_sxb::new(src1, src2).into(),
292        };
293        Inst::External { inst }
294    }
295
296    pub(crate) fn trap_if(cc: CC, trap_code: TrapCode) -> Inst {
297        Inst::TrapIf { cc, trap_code }
298    }
299
300    pub(crate) fn call_known(info: Box<CallInfo<ExternalName>>) -> Inst {
301        Inst::CallKnown { info }
302    }
303
304    pub(crate) fn call_unknown(info: Box<CallInfo<RegMem>>) -> Inst {
305        info.dest.assert_regclass_is(RegClass::Int);
306        Inst::CallUnknown { info }
307    }
308
309    pub(crate) fn jmp_known(dst: MachLabel) -> Inst {
310        Inst::JmpKnown { dst }
311    }
312
313    /// Choose which instruction to use for loading a register value from memory. For loads smaller
314    /// than 64 bits, this method expects a way to extend the value (i.e. [ExtKind::SignExtend],
315    /// [ExtKind::ZeroExtend]); loads with no extension necessary will ignore this.
316    pub(crate) fn load(
317        ty: Type,
318        from_addr: impl Into<SyntheticAmode>,
319        to_reg: Writable<Reg>,
320        ext_kind: ExtKind,
321    ) -> Inst {
322        let rc = to_reg.to_reg().class();
323        match rc {
324            RegClass::Int => {
325                let ext_mode = match ty.bytes() {
326                    1 => Some(ExtMode::BQ),
327                    2 => Some(ExtMode::WQ),
328                    4 => Some(ExtMode::LQ),
329                    8 => None,
330                    _ => unreachable!("the type should never use a scalar load: {}", ty),
331                };
332                if let Some(ext_mode) = ext_mode {
333                    // Values smaller than 64 bits must be extended in some way.
334                    match ext_kind {
335                        ExtKind::SignExtend => {
336                            Inst::movsx_rm_r(ext_mode, RegMem::mem(from_addr), to_reg)
337                        }
338                        ExtKind::ZeroExtend => {
339                            Inst::movzx_rm_r(ext_mode, RegMem::mem(from_addr), to_reg)
340                        }
341                        ExtKind::None => {
342                            panic!("expected an extension kind for extension mode: {ext_mode:?}")
343                        }
344                    }
345                } else {
346                    // 64-bit values can be moved directly.
347                    let from_addr = asm::GprMem::from(from_addr.into());
348                    Inst::External {
349                        inst: asm::inst::movq_rm::new(to_reg, from_addr).into(),
350                    }
351                }
352            }
353            RegClass::Float => {
354                let to_reg = to_reg.map(|r| Xmm::new(r).unwrap());
355                let from_addr = from_addr.into();
356                let inst = match ty {
357                    types::F16 | types::I8X2 => {
358                        panic!("loading a f16 or i8x2 requires multiple instructions")
359                    }
360                    _ if (ty.is_float() || ty.is_vector()) && ty.bits() == 32 => {
361                        asm::inst::movss_a_m::new(to_reg, from_addr).into()
362                    }
363                    _ if (ty.is_float() || ty.is_vector()) && ty.bits() == 64 => {
364                        asm::inst::movsd_a_m::new(to_reg, from_addr).into()
365                    }
366                    types::F32X4 => asm::inst::movups_a::new(to_reg, from_addr).into(),
367                    types::F64X2 => asm::inst::movupd_a::new(to_reg, from_addr).into(),
368                    _ if (ty.is_float() || ty.is_vector()) && ty.bits() == 128 => {
369                        asm::inst::movdqu_a::new(to_reg, from_addr).into()
370                    }
371                    _ => unimplemented!("unable to load type: {}", ty),
372                };
373                Inst::External { inst }
374            }
375            RegClass::Vector => unreachable!(),
376        }
377    }
378
379    /// Choose which instruction to use for storing a register value to memory.
380    pub(crate) fn store(ty: Type, from_reg: Reg, to_addr: impl Into<SyntheticAmode>) -> Inst {
381        let rc = from_reg.class();
382        let to_addr = to_addr.into();
383        let inst = match rc {
384            RegClass::Int => {
385                let from_reg = Gpr::unwrap_new(from_reg);
386                match ty {
387                    types::I8 => asm::inst::movb_mr::new(to_addr, from_reg).into(),
388                    types::I16 => asm::inst::movw_mr::new(to_addr, from_reg).into(),
389                    types::I32 => asm::inst::movl_mr::new(to_addr, from_reg).into(),
390                    types::I64 => asm::inst::movq_mr::new(to_addr, from_reg).into(),
391                    _ => unreachable!(),
392                }
393            }
394            RegClass::Float => {
395                let from_reg = Xmm::new(from_reg).unwrap();
396                match ty {
397                    types::F16 | types::I8X2 => {
398                        panic!("storing a f16 or i8x2 requires multiple instructions")
399                    }
400                    _ if (ty.is_float() || ty.is_vector()) && ty.bits() == 32 => {
401                        asm::inst::movss_c_m::new(to_addr, from_reg).into()
402                    }
403                    _ if (ty.is_float() || ty.is_vector()) && ty.bits() == 64 => {
404                        asm::inst::movsd_c_m::new(to_addr, from_reg).into()
405                    }
406                    types::F32X4 => asm::inst::movups_b::new(to_addr, from_reg).into(),
407                    types::F64X2 => asm::inst::movupd_b::new(to_addr, from_reg).into(),
408                    _ if (ty.is_float() || ty.is_vector()) && ty.bits() == 128 => {
409                        asm::inst::movdqu_b::new(to_addr, from_reg).into()
410                    }
411                    _ => unimplemented!("unable to store type: {}", ty),
412                }
413            }
414            RegClass::Vector => unreachable!(),
415        };
416        Inst::External { inst }
417    }
418}
419
420//=============================================================================
421// Instructions: printing
422
423impl PrettyPrint for Inst {
424    fn pretty_print(&self, _size: u8) -> String {
425        fn ljustify(s: String) -> String {
426            let w = 7;
427            if s.len() >= w {
428                s
429            } else {
430                let need = usize::min(w, w - s.len());
431                s + &format!("{nil: <width$}", nil = "", width = need)
432            }
433        }
434
435        fn ljustify2(s1: String, s2: String) -> String {
436            ljustify(s1 + &s2)
437        }
438
439        match self {
440            Inst::CheckedSRemSeq {
441                size,
442                divisor,
443                dividend_lo,
444                dividend_hi,
445                dst_quotient,
446                dst_remainder,
447            } => {
448                let divisor = pretty_print_reg(divisor.to_reg(), size.to_bytes());
449                let dividend_lo = pretty_print_reg(dividend_lo.to_reg(), size.to_bytes());
450                let dividend_hi = pretty_print_reg(dividend_hi.to_reg(), size.to_bytes());
451                let dst_quotient =
452                    pretty_print_reg(dst_quotient.to_reg().to_reg(), size.to_bytes());
453                let dst_remainder =
454                    pretty_print_reg(dst_remainder.to_reg().to_reg(), size.to_bytes());
455                format!(
456                    "checked_srem_seq {dividend_lo}, {dividend_hi}, \
457                        {divisor}, {dst_quotient}, {dst_remainder}",
458                )
459            }
460
461            Inst::CheckedSRemSeq8 {
462                divisor,
463                dividend,
464                dst,
465            } => {
466                let divisor = pretty_print_reg(divisor.to_reg(), 1);
467                let dividend = pretty_print_reg(dividend.to_reg(), 1);
468                let dst = pretty_print_reg(dst.to_reg().to_reg(), 1);
469                format!("checked_srem_seq {dividend}, {divisor}, {dst}")
470            }
471
472            Inst::XmmUnaryRmREvex { op, src, dst, .. } => {
473                let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);
474                let src = src.pretty_print(8);
475                let op = ljustify(op.to_string());
476                format!("{op} {src}, {dst}")
477            }
478
479            Inst::XmmUnaryRmRImmEvex {
480                op, src, dst, imm, ..
481            } => {
482                let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);
483                let src = src.pretty_print(8);
484                let op = ljustify(op.to_string());
485                format!("{op} ${imm}, {src}, {dst}")
486            }
487
488            Inst::XmmRmREvex {
489                op,
490                src1,
491                src2,
492                dst,
493                ..
494            } => {
495                let src1 = pretty_print_reg(src1.to_reg(), 8);
496                let src2 = src2.pretty_print(8);
497                let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);
498                let op = ljustify(op.to_string());
499                format!("{op} {src2}, {src1}, {dst}")
500            }
501
502            Inst::XmmRmREvex3 {
503                op,
504                src1,
505                src2,
506                src3,
507                dst,
508                ..
509            } => {
510                let src1 = pretty_print_reg(src1.to_reg(), 8);
511                let src2 = pretty_print_reg(src2.to_reg(), 8);
512                let src3 = src3.pretty_print(8);
513                let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);
514                let op = ljustify(op.to_string());
515                format!("{op} {src3}, {src2}, {src1}, {dst}")
516            }
517
518            Inst::XmmMinMaxSeq {
519                lhs,
520                rhs,
521                dst,
522                is_min,
523                size,
524            } => {
525                let rhs = pretty_print_reg(rhs.to_reg(), 8);
526                let lhs = pretty_print_reg(lhs.to_reg(), 8);
527                let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);
528                let op = ljustify2(
529                    if *is_min {
530                        "xmm min seq ".to_string()
531                    } else {
532                        "xmm max seq ".to_string()
533                    },
534                    format!("f{}", size.to_bits()),
535                );
536                format!("{op} {lhs}, {rhs}, {dst}")
537            }
538
539            Inst::XmmUninitializedValue { dst } => {
540                let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);
541                let op = ljustify("uninit".into());
542                format!("{op} {dst}")
543            }
544
545            Inst::GprUninitializedValue { dst } => {
546                let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);
547                let op = ljustify("uninit".into());
548                format!("{op} {dst}")
549            }
550
551            Inst::CvtUint64ToFloatSeq {
552                src,
553                dst,
554                dst_size,
555                tmp_gpr1,
556                tmp_gpr2,
557                ..
558            } => {
559                let src = pretty_print_reg(src.to_reg(), 8);
560                let dst = pretty_print_reg(dst.to_reg().to_reg(), dst_size.to_bytes());
561                let tmp_gpr1 = pretty_print_reg(tmp_gpr1.to_reg().to_reg(), 8);
562                let tmp_gpr2 = pretty_print_reg(tmp_gpr2.to_reg().to_reg(), 8);
563                let op = ljustify(format!(
564                    "u64_to_{}_seq",
565                    if *dst_size == OperandSize::Size64 {
566                        "f64"
567                    } else {
568                        "f32"
569                    }
570                ));
571                format!("{op} {src}, {dst}, {tmp_gpr1}, {tmp_gpr2}")
572            }
573
574            Inst::CvtFloatToSintSeq {
575                src,
576                dst,
577                src_size,
578                dst_size,
579                tmp_xmm,
580                tmp_gpr,
581                is_saturating,
582            } => {
583                let src = pretty_print_reg(src.to_reg(), src_size.to_bytes());
584                let dst = pretty_print_reg(dst.to_reg().to_reg(), dst_size.to_bytes());
585                let tmp_gpr = pretty_print_reg(tmp_gpr.to_reg().to_reg(), 8);
586                let tmp_xmm = pretty_print_reg(tmp_xmm.to_reg().to_reg(), 8);
587                let op = ljustify(format!(
588                    "cvt_float{}_to_sint{}{}_seq",
589                    src_size.to_bits(),
590                    dst_size.to_bits(),
591                    if *is_saturating { "_sat" } else { "" },
592                ));
593                format!("{op} {src}, {dst}, {tmp_gpr}, {tmp_xmm}")
594            }
595
596            Inst::CvtFloatToUintSeq {
597                src,
598                dst,
599                src_size,
600                dst_size,
601                tmp_gpr,
602                tmp_xmm,
603                tmp_xmm2,
604                is_saturating,
605            } => {
606                let src = pretty_print_reg(src.to_reg(), src_size.to_bytes());
607                let dst = pretty_print_reg(dst.to_reg().to_reg(), dst_size.to_bytes());
608                let tmp_gpr = pretty_print_reg(tmp_gpr.to_reg().to_reg(), 8);
609                let tmp_xmm = pretty_print_reg(tmp_xmm.to_reg().to_reg(), 8);
610                let tmp_xmm2 = pretty_print_reg(tmp_xmm2.to_reg().to_reg(), 8);
611                let op = ljustify(format!(
612                    "cvt_float{}_to_uint{}{}_seq",
613                    src_size.to_bits(),
614                    dst_size.to_bits(),
615                    if *is_saturating { "_sat" } else { "" },
616                ));
617                format!("{op} {src}, {dst}, {tmp_gpr}, {tmp_xmm}, {tmp_xmm2}")
618            }
619
620            Inst::MovFromPReg { src, dst } => {
621                let src: Reg = (*src).into();
622                let src = regs::show_ireg_sized(src, 8);
623                let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);
624                let op = ljustify("movq".to_string());
625                format!("{op} {src}, {dst}")
626            }
627
628            Inst::MovToPReg { src, dst } => {
629                let src = pretty_print_reg(src.to_reg(), 8);
630                let dst: Reg = (*dst).into();
631                let dst = regs::show_ireg_sized(dst, 8);
632                let op = ljustify("movq".to_string());
633                format!("{op} {src}, {dst}")
634            }
635
636            Inst::XmmCmove {
637                ty,
638                cc,
639                consequent,
640                alternative,
641                dst,
642                ..
643            } => {
644                let size = u8::try_from(ty.bytes()).unwrap();
645                let alternative = pretty_print_reg(alternative.to_reg(), size);
646                let dst = pretty_print_reg(dst.to_reg().to_reg(), size);
647                let consequent = pretty_print_reg(consequent.to_reg(), size);
648                let suffix = match *ty {
649                    types::F64 => "sd",
650                    types::F32 => "ss",
651                    types::F16 => "ss",
652                    types::F32X4 => "aps",
653                    types::F64X2 => "apd",
654                    _ => "dqa",
655                };
656                let cc = cc.invert();
657                format!(
658                    "mov{suffix} {alternative}, {dst}; \
659                    j{cc} $next; \
660                    mov{suffix} {consequent}, {dst}; \
661                    $next:"
662                )
663            }
664
665            Inst::StackProbeLoop {
666                tmp,
667                frame_size,
668                guard_size,
669            } => {
670                let tmp = pretty_print_reg(tmp.to_reg(), 8);
671                let op = ljustify("stack_probe_loop".to_string());
672                format!("{op} {tmp}, frame_size={frame_size}, guard_size={guard_size}")
673            }
674
675            Inst::CallKnown { info } => {
676                let op = ljustify("call".to_string());
677                let try_call = info
678                    .try_call_info
679                    .as_ref()
680                    .map(|tci| pretty_print_try_call(tci))
681                    .unwrap_or_default();
682                format!("{op} {:?}{try_call}", info.dest)
683            }
684
685            Inst::CallUnknown { info } => {
686                let dest = info.dest.pretty_print(8);
687                let op = ljustify("call".to_string());
688                let try_call = info
689                    .try_call_info
690                    .as_ref()
691                    .map(|tci| pretty_print_try_call(tci))
692                    .unwrap_or_default();
693                format!("{op} *{dest}{try_call}")
694            }
695
696            Inst::ReturnCallKnown { info } => {
697                let ReturnCallInfo {
698                    uses,
699                    new_stack_arg_size,
700                    tmp,
701                    dest,
702                } = &**info;
703                let tmp = pretty_print_reg(tmp.to_reg().to_reg(), 8);
704                let mut s = format!("return_call_known {dest:?} ({new_stack_arg_size}) tmp={tmp}");
705                for ret in uses {
706                    let preg = regs::show_reg(ret.preg);
707                    let vreg = pretty_print_reg(ret.vreg, 8);
708                    write!(&mut s, " {vreg}={preg}").unwrap();
709                }
710                s
711            }
712
713            Inst::ReturnCallUnknown { info } => {
714                let ReturnCallInfo {
715                    uses,
716                    new_stack_arg_size,
717                    tmp,
718                    dest,
719                } = &**info;
720                let callee = pretty_print_reg(*dest, 8);
721                let tmp = pretty_print_reg(tmp.to_reg().to_reg(), 8);
722                let mut s =
723                    format!("return_call_unknown {callee} ({new_stack_arg_size}) tmp={tmp}");
724                for ret in uses {
725                    let preg = regs::show_reg(ret.preg);
726                    let vreg = pretty_print_reg(ret.vreg, 8);
727                    write!(&mut s, " {vreg}={preg}").unwrap();
728                }
729                s
730            }
731
732            Inst::Args { args } => {
733                let mut s = "args".to_string();
734                for arg in args {
735                    let preg = regs::show_reg(arg.preg);
736                    let def = pretty_print_reg(arg.vreg.to_reg(), 8);
737                    write!(&mut s, " {def}={preg}").unwrap();
738                }
739                s
740            }
741
742            Inst::Rets { rets } => {
743                let mut s = "rets".to_string();
744                for ret in rets {
745                    let preg = regs::show_reg(ret.preg);
746                    let vreg = pretty_print_reg(ret.vreg, 8);
747                    write!(&mut s, " {vreg}={preg}").unwrap();
748                }
749                s
750            }
751
752            Inst::StackSwitchBasic {
753                store_context_ptr,
754                load_context_ptr,
755                in_payload0,
756                out_payload0,
757            } => {
758                let store_context_ptr = pretty_print_reg(**store_context_ptr, 8);
759                let load_context_ptr = pretty_print_reg(**load_context_ptr, 8);
760                let in_payload0 = pretty_print_reg(**in_payload0, 8);
761                let out_payload0 = pretty_print_reg(*out_payload0.to_reg(), 8);
762                format!(
763                    "{out_payload0} = stack_switch_basic {store_context_ptr}, {load_context_ptr}, {in_payload0}"
764                )
765            }
766
767            Inst::JmpKnown { dst } => {
768                let op = ljustify("jmp".to_string());
769                let dst = dst.to_string();
770                format!("{op} {dst}")
771            }
772
773            Inst::WinchJmpIf { cc, taken } => {
774                let taken = taken.to_string();
775                let op = ljustify2("j".to_string(), cc.to_string());
776                format!("{op} {taken}")
777            }
778
779            Inst::JmpCondOr {
780                cc1,
781                cc2,
782                taken,
783                not_taken,
784            } => {
785                let taken = taken.to_string();
786                let not_taken = not_taken.to_string();
787                let op = ljustify(format!("j{cc1},{cc2}"));
788                format!("{op} {taken}; j {not_taken}")
789            }
790
791            Inst::JmpCond {
792                cc,
793                taken,
794                not_taken,
795            } => {
796                let taken = taken.to_string();
797                let not_taken = not_taken.to_string();
798                let op = ljustify2("j".to_string(), cc.to_string());
799                format!("{op} {taken}; j {not_taken}")
800            }
801
802            Inst::JmpTableSeq {
803                idx, tmp1, tmp2, ..
804            } => {
805                let idx = pretty_print_reg(*idx, 8);
806                let tmp1 = pretty_print_reg(tmp1.to_reg(), 8);
807                let tmp2 = pretty_print_reg(tmp2.to_reg(), 8);
808                let op = ljustify("br_table".into());
809                format!("{op} {idx}, {tmp1}, {tmp2}")
810            }
811
812            Inst::TrapIf { cc, trap_code, .. } => {
813                format!("j{cc} #trap={trap_code}")
814            }
815
816            Inst::TrapIfAnd {
817                cc1,
818                cc2,
819                trap_code,
820                ..
821            } => {
822                let cc1 = cc1.invert();
823                let cc2 = cc2.invert();
824                format!("trap_if_and {cc1}, {cc2}, {trap_code}")
825            }
826
827            Inst::TrapIfOr {
828                cc1,
829                cc2,
830                trap_code,
831                ..
832            } => {
833                let cc2 = cc2.invert();
834                format!("trap_if_or {cc1}, {cc2}, {trap_code}")
835            }
836
837            Inst::LoadExtName {
838                dst, name, offset, ..
839            } => {
840                let dst = pretty_print_reg(*dst.to_reg(), 8);
841                let name = name.display(None);
842                let op = ljustify("load_ext_name".into());
843                format!("{op} {name}+{offset}, {dst}")
844            }
845
846            Inst::AtomicRmwSeq { ty, op, .. } => {
847                let ty = ty.bits();
848                format!(
849                    "atomically {{ {ty}_bits_at_[%r9] {op:?}= %r10; %rax = old_value_at_[%r9]; %r11, %rflags = trash }}"
850                )
851            }
852
853            Inst::Atomic128RmwSeq {
854                op,
855                mem,
856                operand_low,
857                operand_high,
858                temp_low,
859                temp_high,
860                dst_old_low,
861                dst_old_high,
862            } => {
863                let operand_low = pretty_print_reg(**operand_low, 8);
864                let operand_high = pretty_print_reg(**operand_high, 8);
865                let temp_low = pretty_print_reg(*temp_low.to_reg(), 8);
866                let temp_high = pretty_print_reg(*temp_high.to_reg(), 8);
867                let dst_old_low = pretty_print_reg(*dst_old_low.to_reg(), 8);
868                let dst_old_high = pretty_print_reg(*dst_old_high.to_reg(), 8);
869                let mem = mem.pretty_print(16);
870                format!(
871                    "atomically {{ {dst_old_high}:{dst_old_low} = {mem}; {temp_high}:{temp_low} = {dst_old_high}:{dst_old_low} {op:?} {operand_high}:{operand_low}; {mem} = {temp_high}:{temp_low} }}"
872                )
873            }
874
875            Inst::Atomic128XchgSeq {
876                mem,
877                operand_low,
878                operand_high,
879                dst_old_low,
880                dst_old_high,
881            } => {
882                let operand_low = pretty_print_reg(**operand_low, 8);
883                let operand_high = pretty_print_reg(**operand_high, 8);
884                let dst_old_low = pretty_print_reg(*dst_old_low.to_reg(), 8);
885                let dst_old_high = pretty_print_reg(*dst_old_high.to_reg(), 8);
886                let mem = mem.pretty_print(16);
887                format!(
888                    "atomically {{ {dst_old_high}:{dst_old_low} = {mem}; {mem} = {operand_high}:{operand_low} }}"
889                )
890            }
891
892            Inst::ElfTlsGetAddr { symbol, dst } => {
893                let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);
894                format!("{dst} = elf_tls_get_addr {symbol:?}")
895            }
896
897            Inst::MachOTlsGetAddr { symbol, dst } => {
898                let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);
899                format!("{dst} = macho_tls_get_addr {symbol:?}")
900            }
901
902            Inst::CoffTlsGetAddr { symbol, dst, tmp } => {
903                let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);
904                let tmp = tmp.to_reg().to_reg();
905
906                let mut s = format!("{dst} = coff_tls_get_addr {symbol:?}");
907                if tmp.is_virtual() {
908                    let tmp = show_ireg_sized(tmp, 8);
909                    write!(&mut s, ", {tmp}").unwrap();
910                };
911
912                s
913            }
914
915            Inst::Unwind { inst } => format!("unwind {inst:?}"),
916
917            Inst::DummyUse { reg } => {
918                let reg = pretty_print_reg(*reg, 8);
919                format!("dummy_use {reg}")
920            }
921
922            Inst::External { inst } => {
923                format!("{inst}")
924            }
925        }
926    }
927}
928
929fn pretty_print_try_call(info: &TryCallInfo) -> String {
930    let dests = info
931        .exception_dests
932        .iter()
933        .map(|(tag, label)| format!("{tag:?}: {label:?}"))
934        .collect::<Vec<_>>()
935        .join(", ");
936    format!("; jmp {:?}; catch [{dests}]", info.continuation)
937}
938
939impl fmt::Debug for Inst {
940    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
941        write!(fmt, "{}", self.pretty_print_inst(&mut Default::default()))
942    }
943}
944
945fn x64_get_operands(inst: &mut Inst, collector: &mut impl OperandVisitor) {
946    // Note: because we need to statically know the indices of each
947    // reg in the operands list in order to fetch its allocation
948    // later, we put the variable-operand-count bits (the RegMem,
949    // RegMemImm, etc args) last. regalloc2 doesn't care what order
950    // the operands come in; they can be freely reordered.
951
952    // N.B.: we MUST keep the below in careful sync with (i) emission,
953    // in `emit.rs`, and (ii) pretty-printing, in the `pretty_print`
954    // method above.
955    match inst {
956        Inst::CheckedSRemSeq {
957            divisor,
958            dividend_lo,
959            dividend_hi,
960            dst_quotient,
961            dst_remainder,
962            ..
963        } => {
964            collector.reg_use(divisor);
965            collector.reg_fixed_use(dividend_lo, regs::rax());
966            collector.reg_fixed_use(dividend_hi, regs::rdx());
967            collector.reg_fixed_def(dst_quotient, regs::rax());
968            collector.reg_fixed_def(dst_remainder, regs::rdx());
969        }
970        Inst::CheckedSRemSeq8 {
971            divisor,
972            dividend,
973            dst,
974            ..
975        } => {
976            collector.reg_use(divisor);
977            collector.reg_fixed_use(dividend, regs::rax());
978            collector.reg_fixed_def(dst, regs::rax());
979        }
980        Inst::XmmUnaryRmREvex { src, dst, .. } | Inst::XmmUnaryRmRImmEvex { src, dst, .. } => {
981            collector.reg_def(dst);
982            src.get_operands(collector);
983        }
984        Inst::XmmRmREvex {
985            op,
986            src1,
987            src2,
988            dst,
989            ..
990        } => {
991            assert_ne!(*op, Avx512Opcode::Vpermi2b);
992            collector.reg_use(src1);
993            src2.get_operands(collector);
994            collector.reg_def(dst);
995        }
996        Inst::XmmRmREvex3 {
997            op,
998            src1,
999            src2,
1000            src3,
1001            dst,
1002            ..
1003        } => {
1004            assert_eq!(*op, Avx512Opcode::Vpermi2b);
1005            collector.reg_use(src1);
1006            collector.reg_use(src2);
1007            src3.get_operands(collector);
1008            collector.reg_reuse_def(dst, 0); // Reuse `src1`.
1009        }
1010        Inst::XmmUninitializedValue { dst } => collector.reg_def(dst),
1011        Inst::GprUninitializedValue { dst } => collector.reg_def(dst),
1012        Inst::XmmMinMaxSeq { lhs, rhs, dst, .. } => {
1013            collector.reg_use(rhs);
1014            collector.reg_use(lhs);
1015            collector.reg_reuse_def(dst, 0); // Reuse RHS.
1016        }
1017        Inst::MovFromPReg { dst, src } => {
1018            debug_assert!(dst.to_reg().to_reg().is_virtual());
1019            collector.reg_fixed_nonallocatable(*src);
1020            collector.reg_def(dst);
1021        }
1022        Inst::MovToPReg { dst, src } => {
1023            debug_assert!(src.to_reg().is_virtual());
1024            collector.reg_use(src);
1025            collector.reg_fixed_nonallocatable(*dst);
1026        }
1027        Inst::CvtUint64ToFloatSeq {
1028            src,
1029            dst,
1030            tmp_gpr1,
1031            tmp_gpr2,
1032            ..
1033        } => {
1034            collector.reg_use(src);
1035            collector.reg_early_def(dst);
1036            collector.reg_early_def(tmp_gpr1);
1037            collector.reg_early_def(tmp_gpr2);
1038        }
1039        Inst::CvtFloatToSintSeq {
1040            src,
1041            dst,
1042            tmp_xmm,
1043            tmp_gpr,
1044            ..
1045        } => {
1046            collector.reg_use(src);
1047            collector.reg_early_def(dst);
1048            collector.reg_early_def(tmp_gpr);
1049            collector.reg_early_def(tmp_xmm);
1050        }
1051        Inst::CvtFloatToUintSeq {
1052            src,
1053            dst,
1054            tmp_gpr,
1055            tmp_xmm,
1056            tmp_xmm2,
1057            ..
1058        } => {
1059            collector.reg_use(src);
1060            collector.reg_early_def(dst);
1061            collector.reg_early_def(tmp_gpr);
1062            collector.reg_early_def(tmp_xmm);
1063            collector.reg_early_def(tmp_xmm2);
1064        }
1065
1066        Inst::XmmCmove {
1067            consequent,
1068            alternative,
1069            dst,
1070            ..
1071        } => {
1072            collector.reg_use(alternative);
1073            collector.reg_reuse_def(dst, 0);
1074            collector.reg_use(consequent);
1075        }
1076        Inst::StackProbeLoop { tmp, .. } => {
1077            collector.reg_early_def(tmp);
1078        }
1079
1080        Inst::CallKnown { info } => {
1081            // Probestack is special and is only inserted after
1082            // regalloc, so we do not need to represent its ABI to the
1083            // register allocator. Assert that we don't alter that
1084            // arrangement.
1085            let CallInfo {
1086                uses,
1087                defs,
1088                clobbers,
1089                dest,
1090                ..
1091            } = &mut **info;
1092            debug_assert_ne!(*dest, ExternalName::LibCall(LibCall::Probestack));
1093            for CallArgPair { vreg, preg } in uses {
1094                collector.reg_fixed_use(vreg, *preg);
1095            }
1096            for CallRetPair { vreg, location } in defs {
1097                match location {
1098                    RetLocation::Reg(preg, ..) => collector.reg_fixed_def(vreg, *preg),
1099                    RetLocation::Stack(..) => collector.any_def(vreg),
1100                }
1101            }
1102            collector.reg_clobbers(*clobbers);
1103        }
1104
1105        Inst::CallUnknown { info } => {
1106            let CallInfo {
1107                uses,
1108                defs,
1109                clobbers,
1110                callee_conv,
1111                dest,
1112                ..
1113            } = &mut **info;
1114            match dest {
1115                RegMem::Reg { reg } if *callee_conv == CallConv::Winch => {
1116                    // TODO(https://github.com/bytecodealliance/regalloc2/issues/145):
1117                    // This shouldn't be a fixed register constraint. r10 is caller-saved, so this
1118                    // should be safe to use.
1119                    collector.reg_fixed_use(reg, regs::r10());
1120                }
1121                _ => dest.get_operands(collector),
1122            }
1123            for CallArgPair { vreg, preg } in uses {
1124                collector.reg_fixed_use(vreg, *preg);
1125            }
1126            for CallRetPair { vreg, location } in defs {
1127                match location {
1128                    RetLocation::Reg(preg, ..) => collector.reg_fixed_def(vreg, *preg),
1129                    RetLocation::Stack(..) => collector.any_def(vreg),
1130                }
1131            }
1132            collector.reg_clobbers(*clobbers);
1133        }
1134        Inst::StackSwitchBasic {
1135            store_context_ptr,
1136            load_context_ptr,
1137            in_payload0,
1138            out_payload0,
1139        } => {
1140            collector.reg_use(load_context_ptr);
1141            collector.reg_use(store_context_ptr);
1142            collector.reg_fixed_use(in_payload0, stack_switch::payload_register());
1143            collector.reg_fixed_def(out_payload0, stack_switch::payload_register());
1144
1145            let mut clobbers = crate::isa::x64::abi::ALL_CLOBBERS;
1146            // The return/payload reg must not be included in the clobber set
1147            clobbers.remove(
1148                stack_switch::payload_register()
1149                    .to_real_reg()
1150                    .unwrap()
1151                    .into(),
1152            );
1153            collector.reg_clobbers(clobbers);
1154        }
1155
1156        Inst::ReturnCallKnown { info } => {
1157            let ReturnCallInfo {
1158                dest, uses, tmp, ..
1159            } = &mut **info;
1160            collector.reg_fixed_def(tmp, regs::r11());
1161            // Same as in the `Inst::CallKnown` branch.
1162            debug_assert_ne!(*dest, ExternalName::LibCall(LibCall::Probestack));
1163            for CallArgPair { vreg, preg } in uses {
1164                collector.reg_fixed_use(vreg, *preg);
1165            }
1166        }
1167
1168        Inst::ReturnCallUnknown { info } => {
1169            let ReturnCallInfo {
1170                dest, uses, tmp, ..
1171            } = &mut **info;
1172
1173            // TODO(https://github.com/bytecodealliance/regalloc2/issues/145):
1174            // This shouldn't be a fixed register constraint, but it's not clear how to
1175            // pick a register that won't be clobbered by the callee-save restore code
1176            // emitted with a return_call_indirect. r10 is caller-saved, so this should be
1177            // safe to use.
1178            collector.reg_fixed_use(dest, regs::r10());
1179
1180            collector.reg_fixed_def(tmp, regs::r11());
1181            for CallArgPair { vreg, preg } in uses {
1182                collector.reg_fixed_use(vreg, *preg);
1183            }
1184        }
1185
1186        Inst::JmpTableSeq {
1187            idx, tmp1, tmp2, ..
1188        } => {
1189            collector.reg_use(idx);
1190            collector.reg_early_def(tmp1);
1191            // In the sequence emitted for this pseudoinstruction in emit.rs,
1192            // tmp2 is only written after idx is read, so it doesn't need to be
1193            // an early def.
1194            collector.reg_def(tmp2);
1195        }
1196
1197        Inst::LoadExtName { dst, .. } => {
1198            collector.reg_def(dst);
1199        }
1200
1201        Inst::AtomicRmwSeq {
1202            operand,
1203            temp,
1204            dst_old,
1205            mem,
1206            ..
1207        } => {
1208            collector.reg_late_use(operand);
1209            collector.reg_early_def(temp);
1210            // This `fixed_def` is needed because `CMPXCHG` always uses this
1211            // register implicitly.
1212            collector.reg_fixed_def(dst_old, regs::rax());
1213            mem.get_operands_late(collector)
1214        }
1215
1216        Inst::Atomic128RmwSeq {
1217            operand_low,
1218            operand_high,
1219            temp_low,
1220            temp_high,
1221            dst_old_low,
1222            dst_old_high,
1223            mem,
1224            ..
1225        } => {
1226            // All registers are collected in the `Late` position so that they don't overlap.
1227            collector.reg_late_use(operand_low);
1228            collector.reg_late_use(operand_high);
1229            collector.reg_fixed_def(temp_low, regs::rbx());
1230            collector.reg_fixed_def(temp_high, regs::rcx());
1231            collector.reg_fixed_def(dst_old_low, regs::rax());
1232            collector.reg_fixed_def(dst_old_high, regs::rdx());
1233            mem.get_operands_late(collector)
1234        }
1235
1236        Inst::Atomic128XchgSeq {
1237            operand_low,
1238            operand_high,
1239            dst_old_low,
1240            dst_old_high,
1241            mem,
1242            ..
1243        } => {
1244            // All registers are collected in the `Late` position so that they don't overlap.
1245            collector.reg_fixed_late_use(operand_low, regs::rbx());
1246            collector.reg_fixed_late_use(operand_high, regs::rcx());
1247            collector.reg_fixed_def(dst_old_low, regs::rax());
1248            collector.reg_fixed_def(dst_old_high, regs::rdx());
1249            mem.get_operands_late(collector)
1250        }
1251
1252        Inst::Args { args } => {
1253            for ArgPair { vreg, preg } in args {
1254                collector.reg_fixed_def(vreg, *preg);
1255            }
1256        }
1257
1258        Inst::Rets { rets } => {
1259            // The return value(s) are live-out; we represent this
1260            // with register uses on the return instruction.
1261            for RetPair { vreg, preg } in rets {
1262                collector.reg_fixed_use(vreg, *preg);
1263            }
1264        }
1265
1266        Inst::JmpKnown { .. }
1267        | Inst::WinchJmpIf { .. }
1268        | Inst::JmpCond { .. }
1269        | Inst::JmpCondOr { .. }
1270        | Inst::TrapIf { .. }
1271        | Inst::TrapIfAnd { .. }
1272        | Inst::TrapIfOr { .. } => {
1273            // No registers are used.
1274        }
1275
1276        Inst::ElfTlsGetAddr { dst, .. } | Inst::MachOTlsGetAddr { dst, .. } => {
1277            collector.reg_fixed_def(dst, regs::rax());
1278            // All caller-saves are clobbered.
1279            //
1280            // We use the SysV calling convention here because the
1281            // pseudoinstruction (and relocation that it emits) is specific to
1282            // ELF systems; other x86-64 targets with other conventions (i.e.,
1283            // Windows) use different TLS strategies.
1284            let mut clobbers =
1285                X64ABIMachineSpec::get_regs_clobbered_by_call(CallConv::SystemV, false);
1286            clobbers.remove(regs::gpr_preg(regs::ENC_RAX));
1287            collector.reg_clobbers(clobbers);
1288        }
1289
1290        Inst::CoffTlsGetAddr { dst, tmp, .. } => {
1291            // We also use the gs register. But that register is not allocatable by the
1292            // register allocator, so we don't need to mark it as used here.
1293
1294            // We use %rax to set the address
1295            collector.reg_fixed_def(dst, regs::rax());
1296
1297            // We use %rcx as a temporary variable to load the _tls_index
1298            collector.reg_fixed_def(tmp, regs::rcx());
1299        }
1300
1301        Inst::Unwind { .. } => {}
1302
1303        Inst::DummyUse { reg } => {
1304            collector.reg_use(reg);
1305        }
1306
1307        Inst::External { inst } => {
1308            inst.visit(&mut external::RegallocVisitor { collector });
1309        }
1310    }
1311}
1312
1313//=============================================================================
1314// Instructions: misc functions and external interface
1315
1316impl MachInst for Inst {
1317    type ABIMachineSpec = X64ABIMachineSpec;
1318
1319    fn get_operands(&mut self, collector: &mut impl OperandVisitor) {
1320        x64_get_operands(self, collector)
1321    }
1322
1323    fn is_move(&self) -> Option<(Writable<Reg>, Reg)> {
1324        use asm::inst::Inst as I;
1325        match self {
1326            // Note (carefully!) that a 32-bit mov *isn't* a no-op since it zeroes
1327            // out the upper 32 bits of the destination.  For example, we could
1328            // conceivably use `movl %reg, %reg` to zero out the top 32 bits of
1329            // %reg.
1330            Self::External {
1331                inst: I::movq_mr(asm::inst::movq_mr { rm64, r64 }),
1332            } => match rm64 {
1333                asm::GprMem::Gpr(reg) => Some((reg.map(|r| r.to_reg()), r64.as_ref().to_reg())),
1334                asm::GprMem::Mem(_) => None,
1335            },
1336            Self::External {
1337                inst: I::movq_rm(asm::inst::movq_rm { r64, rm64 }),
1338            } => match rm64 {
1339                asm::GprMem::Gpr(reg) => Some((r64.as_ref().map(|r| r.to_reg()), reg.to_reg())),
1340                asm::GprMem::Mem(_) => None,
1341            },
1342
1343            // Note that `movss_a_r` and `movsd_a_r` are specifically omitted
1344            // here because they only overwrite the low bits in the destination
1345            // register, otherwise preserving the upper bits. That can be used
1346            // for lane-insertion instructions, for example, meaning it's not
1347            // classified as a register move.
1348            //
1349            // Otherwise though all register-to-register movement instructions
1350            // which move 128-bits are registered as moves.
1351            Self::External {
1352                inst:
1353                    I::movaps_a(asm::inst::movaps_a { xmm1, xmm_m128 })
1354                    | I::movups_a(asm::inst::movups_a { xmm1, xmm_m128 })
1355                    | I::movapd_a(asm::inst::movapd_a { xmm1, xmm_m128 })
1356                    | I::movupd_a(asm::inst::movupd_a { xmm1, xmm_m128 })
1357                    | I::movdqa_a(asm::inst::movdqa_a { xmm1, xmm_m128 })
1358                    | I::movdqu_a(asm::inst::movdqu_a { xmm1, xmm_m128 }),
1359            } => match xmm_m128 {
1360                asm::XmmMem::Xmm(xmm2) => Some((xmm1.as_ref().map(|r| r.to_reg()), xmm2.to_reg())),
1361                asm::XmmMem::Mem(_) => None,
1362            },
1363            // In addition to the "A" format of instructions above also
1364            // recognize the "B" format which while it can be used for stores it
1365            // can also be used for register moves.
1366            Self::External {
1367                inst:
1368                    I::movaps_b(asm::inst::movaps_b { xmm_m128, xmm1 })
1369                    | I::movups_b(asm::inst::movups_b { xmm_m128, xmm1 })
1370                    | I::movapd_b(asm::inst::movapd_b { xmm_m128, xmm1 })
1371                    | I::movupd_b(asm::inst::movupd_b { xmm_m128, xmm1 })
1372                    | I::movdqa_b(asm::inst::movdqa_b { xmm_m128, xmm1 })
1373                    | I::movdqu_b(asm::inst::movdqu_b { xmm_m128, xmm1 }),
1374            } => match xmm_m128 {
1375                asm::XmmMem::Xmm(dst) => Some((dst.map(|r| r.to_reg()), xmm1.as_ref().to_reg())),
1376                asm::XmmMem::Mem(_) => None,
1377            },
1378            _ => None,
1379        }
1380    }
1381
1382    fn is_included_in_clobbers(&self) -> bool {
1383        match self {
1384            &Inst::Args { .. } => false,
1385            _ => true,
1386        }
1387    }
1388
1389    fn is_trap(&self) -> bool {
1390        match self {
1391            Self::External {
1392                inst: asm::inst::Inst::ud2_zo(..),
1393            } => true,
1394            _ => false,
1395        }
1396    }
1397
1398    fn is_args(&self) -> bool {
1399        match self {
1400            Self::Args { .. } => true,
1401            _ => false,
1402        }
1403    }
1404
1405    fn is_term(&self) -> MachTerminator {
1406        match self {
1407            // Interesting cases.
1408            &Self::Rets { .. } => MachTerminator::Ret,
1409            &Self::ReturnCallKnown { .. } | &Self::ReturnCallUnknown { .. } => {
1410                MachTerminator::RetCall
1411            }
1412            &Self::JmpKnown { .. } => MachTerminator::Branch,
1413            &Self::JmpCond { .. } => MachTerminator::Branch,
1414            &Self::JmpCondOr { .. } => MachTerminator::Branch,
1415            &Self::JmpTableSeq { .. } => MachTerminator::Branch,
1416            &Self::CallKnown { ref info } if info.try_call_info.is_some() => MachTerminator::Branch,
1417            &Self::CallUnknown { ref info } if info.try_call_info.is_some() => {
1418                MachTerminator::Branch
1419            }
1420            // All other cases are boring.
1421            _ => MachTerminator::None,
1422        }
1423    }
1424
1425    fn is_low_level_branch(&self) -> bool {
1426        match self {
1427            &Self::WinchJmpIf { .. } => true,
1428            _ => false,
1429        }
1430    }
1431
1432    fn is_mem_access(&self) -> bool {
1433        panic!("TODO FILL ME OUT")
1434    }
1435
1436    fn gen_move(dst_reg: Writable<Reg>, src_reg: Reg, ty: Type) -> Inst {
1437        trace!(
1438            "Inst::gen_move {:?} -> {:?} (type: {:?})",
1439            src_reg,
1440            dst_reg.to_reg(),
1441            ty
1442        );
1443        let rc_dst = dst_reg.to_reg().class();
1444        let rc_src = src_reg.class();
1445        // If this isn't true, we have gone way off the rails.
1446        debug_assert!(rc_dst == rc_src);
1447        let inst = match rc_dst {
1448            RegClass::Int => {
1449                asm::inst::movq_mr::new(dst_reg.map(Gpr::unwrap_new), Gpr::unwrap_new(src_reg))
1450                    .into()
1451            }
1452            RegClass::Float => {
1453                // The Intel optimization manual, in "3.5.1.13 Zero-Latency MOV Instructions",
1454                // doesn't include MOVSS/MOVSD as instructions with zero-latency. Use movaps for
1455                // those, which may write more lanes that we need, but are specified to have
1456                // zero-latency.
1457                let dst_reg = dst_reg.map(|r| Xmm::new(r).unwrap());
1458                let src_reg = Xmm::new(src_reg).unwrap();
1459                match ty {
1460                    types::F16 | types::F32 | types::F64 | types::F32X4 => {
1461                        asm::inst::movaps_a::new(dst_reg, src_reg).into()
1462                    }
1463                    types::F64X2 => asm::inst::movapd_a::new(dst_reg, src_reg).into(),
1464                    _ if (ty.is_float() || ty.is_vector()) && ty.bits() <= 128 => {
1465                        asm::inst::movdqa_a::new(dst_reg, src_reg).into()
1466                    }
1467                    _ => unimplemented!("unable to move type: {}", ty),
1468                }
1469            }
1470            RegClass::Vector => unreachable!(),
1471        };
1472        Inst::External { inst }
1473    }
1474
1475    fn gen_nop(preferred_size: usize) -> Inst {
1476        Inst::nop(std::cmp::min(preferred_size, 9) as u8)
1477    }
1478
1479    fn rc_for_type(ty: Type) -> CodegenResult<(&'static [RegClass], &'static [Type])> {
1480        match ty {
1481            types::I8 => Ok((&[RegClass::Int], &[types::I8])),
1482            types::I16 => Ok((&[RegClass::Int], &[types::I16])),
1483            types::I32 => Ok((&[RegClass::Int], &[types::I32])),
1484            types::I64 => Ok((&[RegClass::Int], &[types::I64])),
1485            types::F16 => Ok((&[RegClass::Float], &[types::F16])),
1486            types::F32 => Ok((&[RegClass::Float], &[types::F32])),
1487            types::F64 => Ok((&[RegClass::Float], &[types::F64])),
1488            types::F128 => Ok((&[RegClass::Float], &[types::F128])),
1489            types::I128 => Ok((&[RegClass::Int, RegClass::Int], &[types::I64, types::I64])),
1490            _ if ty.is_vector() && ty.bits() <= 128 => {
1491                let types = &[types::I8X2, types::I8X4, types::I8X8, types::I8X16];
1492                Ok((
1493                    &[RegClass::Float],
1494                    slice::from_ref(&types[ty.bytes().ilog2() as usize - 1]),
1495                ))
1496            }
1497            _ => Err(CodegenError::Unsupported(format!(
1498                "Unexpected SSA-value type: {ty}"
1499            ))),
1500        }
1501    }
1502
1503    fn canonical_type_for_rc(rc: RegClass) -> Type {
1504        match rc {
1505            RegClass::Float => types::I8X16,
1506            RegClass::Int => types::I64,
1507            RegClass::Vector => unreachable!(),
1508        }
1509    }
1510
1511    fn gen_jump(label: MachLabel) -> Inst {
1512        Inst::jmp_known(label)
1513    }
1514
1515    fn gen_imm_u64(value: u64, dst: Writable<Reg>) -> Option<Self> {
1516        Some(Inst::imm(OperandSize::Size64, value, dst))
1517    }
1518
1519    fn gen_imm_f64(value: f64, tmp: Writable<Reg>, dst: Writable<Reg>) -> SmallVec<[Self; 2]> {
1520        let imm_to_gpr = Inst::imm(OperandSize::Size64, value.to_bits(), tmp);
1521        let gpr_to_xmm = Inst::External {
1522            inst: asm::inst::movq_a::new(dst.map(|r| Xmm::new(r).unwrap()), tmp.to_reg()).into(),
1523        };
1524        smallvec![imm_to_gpr, gpr_to_xmm]
1525    }
1526
1527    fn gen_dummy_use(reg: Reg) -> Self {
1528        Inst::DummyUse { reg }
1529    }
1530
1531    fn worst_case_size() -> CodeOffset {
1532        15
1533    }
1534
1535    fn ref_type_regclass(_: &settings::Flags) -> RegClass {
1536        RegClass::Int
1537    }
1538
1539    fn is_safepoint(&self) -> bool {
1540        match self {
1541            Inst::CallKnown { .. } | Inst::CallUnknown { .. } => true,
1542            _ => false,
1543        }
1544    }
1545
1546    fn function_alignment() -> FunctionAlignment {
1547        FunctionAlignment {
1548            minimum: 1,
1549            // Change the alignment from 16-bytes to 32-bytes for better performance.
1550            // fix-8573: https://github.com/bytecodealliance/wasmtime/issues/8573
1551            preferred: 32,
1552        }
1553    }
1554
1555    type LabelUse = LabelUse;
1556
1557    const TRAP_OPCODE: &'static [u8] = &[0x0f, 0x0b];
1558}
1559
1560/// Constant state used during emissions of a sequence of instructions.
1561pub struct EmitInfo {
1562    pub(super) flags: settings::Flags,
1563    isa_flags: x64_settings::Flags,
1564}
1565
1566impl EmitInfo {
1567    /// Create a constant state for emission of instructions.
1568    pub fn new(flags: settings::Flags, isa_flags: x64_settings::Flags) -> Self {
1569        Self { flags, isa_flags }
1570    }
1571}
1572
1573impl MachInstEmit for Inst {
1574    type State = EmitState;
1575    type Info = EmitInfo;
1576
1577    fn emit(&self, sink: &mut MachBuffer<Inst>, info: &Self::Info, state: &mut Self::State) {
1578        emit::emit(self, sink, info, state);
1579    }
1580
1581    fn pretty_print_inst(&self, _: &mut Self::State) -> String {
1582        PrettyPrint::pretty_print(self, 0)
1583    }
1584}
1585
1586/// A label-use (internal relocation) in generated code.
1587#[derive(Clone, Copy, Debug, PartialEq, Eq)]
1588pub enum LabelUse {
1589    /// A 32-bit offset from location of relocation itself, added to the existing value at that
1590    /// location. Used for control flow instructions which consider an offset from the start of the
1591    /// next instruction (so the size of the payload -- 4 bytes -- is subtracted from the payload).
1592    JmpRel32,
1593
1594    /// A 32-bit offset from location of relocation itself, added to the existing value at that
1595    /// location.
1596    PCRel32,
1597}
1598
1599impl MachInstLabelUse for LabelUse {
1600    const ALIGN: CodeOffset = 1;
1601
1602    fn max_pos_range(self) -> CodeOffset {
1603        match self {
1604            LabelUse::JmpRel32 | LabelUse::PCRel32 => 0x7fff_ffff,
1605        }
1606    }
1607
1608    fn max_neg_range(self) -> CodeOffset {
1609        match self {
1610            LabelUse::JmpRel32 | LabelUse::PCRel32 => 0x8000_0000,
1611        }
1612    }
1613
1614    fn patch_size(self) -> CodeOffset {
1615        match self {
1616            LabelUse::JmpRel32 | LabelUse::PCRel32 => 4,
1617        }
1618    }
1619
1620    fn patch(self, buffer: &mut [u8], use_offset: CodeOffset, label_offset: CodeOffset) {
1621        let pc_rel = (label_offset as i64) - (use_offset as i64);
1622        debug_assert!(pc_rel <= self.max_pos_range() as i64);
1623        debug_assert!(pc_rel >= -(self.max_neg_range() as i64));
1624        let pc_rel = pc_rel as u32;
1625        match self {
1626            LabelUse::JmpRel32 => {
1627                let addend = u32::from_le_bytes([buffer[0], buffer[1], buffer[2], buffer[3]]);
1628                let value = pc_rel.wrapping_add(addend).wrapping_sub(4);
1629                buffer.copy_from_slice(&value.to_le_bytes()[..]);
1630            }
1631            LabelUse::PCRel32 => {
1632                let addend = u32::from_le_bytes([buffer[0], buffer[1], buffer[2], buffer[3]]);
1633                let value = pc_rel.wrapping_add(addend);
1634                buffer.copy_from_slice(&value.to_le_bytes()[..]);
1635            }
1636        }
1637    }
1638
1639    fn supports_veneer(self) -> bool {
1640        match self {
1641            LabelUse::JmpRel32 | LabelUse::PCRel32 => false,
1642        }
1643    }
1644
1645    fn veneer_size(self) -> CodeOffset {
1646        match self {
1647            LabelUse::JmpRel32 | LabelUse::PCRel32 => 0,
1648        }
1649    }
1650
1651    fn worst_case_veneer_size() -> CodeOffset {
1652        0
1653    }
1654
1655    fn generate_veneer(self, _: &mut [u8], _: CodeOffset) -> (CodeOffset, LabelUse) {
1656        match self {
1657            LabelUse::JmpRel32 | LabelUse::PCRel32 => {
1658                panic!("Veneer not supported for JumpRel32 label-use.");
1659            }
1660        }
1661    }
1662
1663    fn from_reloc(reloc: Reloc, addend: Addend) -> Option<Self> {
1664        match (reloc, addend) {
1665            (Reloc::X86CallPCRel4, -4) => Some(LabelUse::JmpRel32),
1666            _ => None,
1667        }
1668    }
1669}