cranelift_codegen/isa/x64/inst/
mod.rs

1//! This module defines x86_64-specific machine instruction types.
2
3pub use emit_state::EmitState;
4
5use crate::binemit::{Addend, CodeOffset, Reloc};
6use crate::ir::{ExternalName, LibCall, TrapCode, Type, types};
7use crate::isa::x64::abi::X64ABIMachineSpec;
8use crate::isa::x64::inst::regs::{pretty_print_reg, show_ireg_sized};
9use crate::isa::x64::settings as x64_settings;
10use crate::isa::{CallConv, FunctionAlignment};
11use crate::{CodegenError, CodegenResult, settings};
12use crate::{machinst::*, trace};
13use alloc::boxed::Box;
14use core::slice;
15use cranelift_assembler_x64 as asm;
16use cranelift_entity::{Signed, Unsigned};
17use smallvec::{SmallVec, smallvec};
18use std::fmt::{self, Write};
19use std::string::{String, ToString};
20
21pub mod args;
22mod emit;
23mod emit_state;
24#[cfg(test)]
25mod emit_tests;
26pub mod external;
27pub mod regs;
28mod stack_switch;
29pub mod unwind;
30
31use args::*;
32
33//=============================================================================
34// Instructions (top level): definition
35
36// `Inst` is defined inside ISLE as `MInst`. We publicly re-export it here.
37pub use super::lower::isle::generated_code::AtomicRmwSeqOp;
38pub use super::lower::isle::generated_code::MInst as Inst;
39
40/// Out-of-line data for return-calls, to keep the size of `Inst` down.
41#[derive(Clone, Debug)]
42pub struct ReturnCallInfo<T> {
43    /// Where this call is going.
44    pub dest: T,
45
46    /// The size of the argument area for this return-call, potentially smaller than that of the
47    /// caller, but never larger.
48    pub new_stack_arg_size: u32,
49
50    /// The in-register arguments and their constraints.
51    pub uses: CallArgList,
52
53    /// A temporary for use when moving the return address.
54    pub tmp: WritableGpr,
55}
56
57#[test]
58#[cfg(target_pointer_width = "64")]
59fn inst_size_test() {
60    // This test will help with unintentionally growing the size
61    // of the Inst enum.
62    assert_eq!(48, std::mem::size_of::<Inst>());
63}
64
65impl Inst {
66    /// Retrieve a list of ISA feature sets in which the instruction is available. An empty list
67    /// indicates that the instruction is available in the baseline feature set (i.e. SSE2 and
68    /// below); more than one `InstructionSet` in the list indicates that the instruction is present
69    /// *any* of the included ISA feature sets.
70    fn available_in_any_isa(&self) -> SmallVec<[InstructionSet; 2]> {
71        match self {
72            // These instructions are part of SSE2, which is a basic requirement in Cranelift, and
73            // don't have to be checked.
74            Inst::AtomicRmwSeq { .. }
75            | Inst::CallKnown { .. }
76            | Inst::CallUnknown { .. }
77            | Inst::ReturnCallKnown { .. }
78            | Inst::ReturnCallUnknown { .. }
79            | Inst::CheckedSRemSeq { .. }
80            | Inst::CheckedSRemSeq8 { .. }
81            | Inst::CvtFloatToSintSeq { .. }
82            | Inst::CvtFloatToUintSeq { .. }
83            | Inst::CvtUint64ToFloatSeq { .. }
84            | Inst::JmpCond { .. }
85            | Inst::JmpCondOr { .. }
86            | Inst::WinchJmpIf { .. }
87            | Inst::JmpKnown { .. }
88            | Inst::JmpTableSeq { .. }
89            | Inst::LoadExtName { .. }
90            | Inst::MovFromPReg { .. }
91            | Inst::MovToPReg { .. }
92            | Inst::StackProbeLoop { .. }
93            | Inst::Args { .. }
94            | Inst::Rets { .. }
95            | Inst::StackSwitchBasic { .. }
96            | Inst::TrapIf { .. }
97            | Inst::TrapIfAnd { .. }
98            | Inst::TrapIfOr { .. }
99            | Inst::XmmCmove { .. }
100            | Inst::XmmMinMaxSeq { .. }
101            | Inst::XmmUninitializedValue { .. }
102            | Inst::GprUninitializedValue { .. }
103            | Inst::ElfTlsGetAddr { .. }
104            | Inst::MachOTlsGetAddr { .. }
105            | Inst::CoffTlsGetAddr { .. }
106            | Inst::Unwind { .. }
107            | Inst::DummyUse { .. } => smallvec![],
108
109            Inst::Atomic128RmwSeq { .. } | Inst::Atomic128XchgSeq { .. } => {
110                smallvec![InstructionSet::CMPXCHG16b]
111            }
112
113            Inst::External { inst } => {
114                use cranelift_assembler_x64::Feature::*;
115                let mut features = smallvec![];
116                for f in inst.features() {
117                    match f {
118                        _64b | compat => {}
119                        sse => features.push(InstructionSet::SSE),
120                        sse2 => features.push(InstructionSet::SSE2),
121                        sse3 => features.push(InstructionSet::SSE3),
122                        ssse3 => features.push(InstructionSet::SSSE3),
123                        sse41 => features.push(InstructionSet::SSE41),
124                        sse42 => features.push(InstructionSet::SSE42),
125                        bmi1 => features.push(InstructionSet::BMI1),
126                        bmi2 => features.push(InstructionSet::BMI2),
127                        lzcnt => features.push(InstructionSet::Lzcnt),
128                        popcnt => features.push(InstructionSet::Popcnt),
129                        avx => features.push(InstructionSet::AVX),
130                        avx2 => features.push(InstructionSet::AVX2),
131                        avx512f => features.push(InstructionSet::AVX512F),
132                        avx512vl => features.push(InstructionSet::AVX512VL),
133                        avx512dq => features.push(InstructionSet::AVX512DQ),
134                        avx512bitalg => features.push(InstructionSet::AVX512BITALG),
135                        avx512vbmi => features.push(InstructionSet::AVX512VBMI),
136                        cmpxchg16b => features.push(InstructionSet::CMPXCHG16b),
137                        fma => features.push(InstructionSet::FMA),
138                    }
139                }
140                features
141            }
142        }
143    }
144}
145
146// Handy constructors for Insts.
147
148impl Inst {
149    pub(crate) fn nop(len: u8) -> Self {
150        assert!(len > 0 && len <= 9);
151        let inst = match len {
152            1 => asm::inst::nop_1b::new().into(),
153            2 => asm::inst::nop_2b::new().into(),
154            3 => asm::inst::nop_3b::new().into(),
155            4 => asm::inst::nop_4b::new().into(),
156            5 => asm::inst::nop_5b::new().into(),
157            6 => asm::inst::nop_6b::new().into(),
158            7 => asm::inst::nop_7b::new().into(),
159            8 => asm::inst::nop_8b::new().into(),
160            9 => asm::inst::nop_9b::new().into(),
161            _ => unreachable!("nop length must be between 1 and 9"),
162        };
163        Self::External { inst }
164    }
165
166    pub(crate) fn addq_mi(dst: Writable<Reg>, simm32: i32) -> Self {
167        let inst = if let Ok(simm8) = i8::try_from(simm32) {
168            asm::inst::addq_mi_sxb::new(dst, simm8).into()
169        } else {
170            asm::inst::addq_mi_sxl::new(dst, simm32).into()
171        };
172        Inst::External { inst }
173    }
174
175    pub(crate) fn subq_mi(dst: Writable<Reg>, simm32: i32) -> Self {
176        let inst = if let Ok(simm8) = i8::try_from(simm32) {
177            asm::inst::subq_mi_sxb::new(dst, simm8).into()
178        } else {
179            asm::inst::subq_mi_sxl::new(dst, simm32).into()
180        };
181        Inst::External { inst }
182    }
183
184    /// Writes the `simm64` immedaite into `dst`.
185    ///
186    /// Note that if `dst_size` is less than 64-bits then the upper bits of
187    /// `simm64` will be converted to zero.
188    pub fn imm(dst_size: OperandSize, simm64: u64, dst: Writable<Reg>) -> Inst {
189        debug_assert!(dst_size.is_one_of(&[OperandSize::Size32, OperandSize::Size64]));
190        debug_assert!(dst.to_reg().class() == RegClass::Int);
191        let dst = WritableGpr::from_writable_reg(dst).unwrap();
192        let inst = match dst_size {
193            OperandSize::Size64 => match u32::try_from(simm64) {
194                // If `simm64` is zero-extended use `movl` which zeros the
195                // upper bits.
196                Ok(imm32) => asm::inst::movl_oi::new(dst, imm32).into(),
197                _ => match i32::try_from(simm64.signed()) {
198                    // If `simm64` is sign-extended use `movq` which sign the
199                    // upper bits.
200                    Ok(simm32) => asm::inst::movq_mi_sxl::new(dst, simm32).into(),
201                    // fall back to embedding the entire immediate.
202                    _ => asm::inst::movabsq_oi::new(dst, simm64).into(),
203                },
204            },
205            // FIXME: the input to this function is a logical `simm64` stored
206            // as `u64`. That means that ideally what we would do here is cast
207            // the `simm64` to an `i64`, perform a `i32::try_from()`, then cast
208            // that back to `u32`. That would ensure that the immediate loses
209            // no meaning and has the same logical value. Currently though
210            // Cranelift relies on discarding the upper bits because literals
211            // like `0x8000_0000_u64` fail to convert to an `i32`. In theory
212            // the input to this function should change to `i64`. In the
213            // meantime this is documented as discarding the upper bits,
214            // although this is an old function so that's unlikely to help
215            // much.
216            _ => asm::inst::movl_oi::new(dst, simm64 as u32).into(),
217        };
218        Inst::External { inst }
219    }
220
221    pub(crate) fn movzx_rm_r(ext_mode: ExtMode, src: RegMem, dst: Writable<Reg>) -> Inst {
222        src.assert_regclass_is(RegClass::Int);
223        debug_assert!(dst.to_reg().class() == RegClass::Int);
224        let src = match src {
225            RegMem::Reg { reg } => asm::GprMem::Gpr(Gpr::new(reg).unwrap()),
226            RegMem::Mem { addr } => asm::GprMem::Mem(addr.into()),
227        };
228        let inst = match ext_mode {
229            ExtMode::BL => asm::inst::movzbl_rm::new(dst, src).into(),
230            ExtMode::BQ => asm::inst::movzbq_rm::new(dst, src).into(),
231            ExtMode::WL => asm::inst::movzwl_rm::new(dst, src).into(),
232            ExtMode::WQ => asm::inst::movzwq_rm::new(dst, src).into(),
233            ExtMode::LQ => {
234                // This instruction selection may seem strange but is correct in
235                // 64-bit mode: section 3.4.1.1 of the Intel manual says that
236                // "32-bit operands generate a 32-bit result, zero-extended to a
237                // 64-bit result in the destination general-purpose register."
238                // This is applicable beyond `mov` but we use this fact to
239                // zero-extend `src` into `dst`.
240                asm::inst::movl_rm::new(dst, src).into()
241            }
242        };
243        Inst::External { inst }
244    }
245
246    pub(crate) fn movsx_rm_r(ext_mode: ExtMode, src: RegMem, dst: Writable<Reg>) -> Inst {
247        src.assert_regclass_is(RegClass::Int);
248        debug_assert!(dst.to_reg().class() == RegClass::Int);
249        let src = match src {
250            RegMem::Reg { reg } => asm::GprMem::Gpr(Gpr::new(reg).unwrap()),
251            RegMem::Mem { addr } => asm::GprMem::Mem(addr.into()),
252        };
253        let inst = match ext_mode {
254            ExtMode::BL => asm::inst::movsbl_rm::new(dst, src).into(),
255            ExtMode::BQ => asm::inst::movsbq_rm::new(dst, src).into(),
256            ExtMode::WL => asm::inst::movswl_rm::new(dst, src).into(),
257            ExtMode::WQ => asm::inst::movswq_rm::new(dst, src).into(),
258            ExtMode::LQ => asm::inst::movslq_rm::new(dst, src).into(),
259        };
260        Inst::External { inst }
261    }
262
263    /// Compares `src1` against `src2`
264    pub(crate) fn cmp_mi_sxb(size: OperandSize, src1: Gpr, src2: i8) -> Inst {
265        let inst = match size {
266            OperandSize::Size8 => asm::inst::cmpb_mi::new(src1, src2.unsigned()).into(),
267            OperandSize::Size16 => asm::inst::cmpw_mi_sxb::new(src1, src2).into(),
268            OperandSize::Size32 => asm::inst::cmpl_mi_sxb::new(src1, src2).into(),
269            OperandSize::Size64 => asm::inst::cmpq_mi_sxb::new(src1, src2).into(),
270        };
271        Inst::External { inst }
272    }
273
274    pub(crate) fn trap_if(cc: CC, trap_code: TrapCode) -> Inst {
275        Inst::TrapIf { cc, trap_code }
276    }
277
278    pub(crate) fn call_known(info: Box<CallInfo<ExternalName>>) -> Inst {
279        Inst::CallKnown { info }
280    }
281
282    pub(crate) fn call_unknown(info: Box<CallInfo<RegMem>>) -> Inst {
283        info.dest.assert_regclass_is(RegClass::Int);
284        Inst::CallUnknown { info }
285    }
286
287    pub(crate) fn jmp_known(dst: MachLabel) -> Inst {
288        Inst::JmpKnown { dst }
289    }
290
291    /// Choose which instruction to use for loading a register value from memory. For loads smaller
292    /// than 64 bits, this method expects a way to extend the value (i.e. [ExtKind::SignExtend],
293    /// [ExtKind::ZeroExtend]); loads with no extension necessary will ignore this.
294    pub(crate) fn load(
295        ty: Type,
296        from_addr: impl Into<SyntheticAmode>,
297        to_reg: Writable<Reg>,
298        ext_kind: ExtKind,
299    ) -> Inst {
300        let rc = to_reg.to_reg().class();
301        match rc {
302            RegClass::Int => {
303                let ext_mode = match ty.bytes() {
304                    1 => Some(ExtMode::BQ),
305                    2 => Some(ExtMode::WQ),
306                    4 => Some(ExtMode::LQ),
307                    8 => None,
308                    _ => unreachable!("the type should never use a scalar load: {}", ty),
309                };
310                if let Some(ext_mode) = ext_mode {
311                    // Values smaller than 64 bits must be extended in some way.
312                    match ext_kind {
313                        ExtKind::SignExtend => {
314                            Inst::movsx_rm_r(ext_mode, RegMem::mem(from_addr), to_reg)
315                        }
316                        ExtKind::ZeroExtend => {
317                            Inst::movzx_rm_r(ext_mode, RegMem::mem(from_addr), to_reg)
318                        }
319                        ExtKind::None => {
320                            panic!("expected an extension kind for extension mode: {ext_mode:?}")
321                        }
322                    }
323                } else {
324                    // 64-bit values can be moved directly.
325                    let from_addr = asm::GprMem::from(from_addr.into());
326                    Inst::External {
327                        inst: asm::inst::movq_rm::new(to_reg, from_addr).into(),
328                    }
329                }
330            }
331            RegClass::Float => {
332                let to_reg = to_reg.map(|r| Xmm::new(r).unwrap());
333                let from_addr = from_addr.into();
334                let inst = match ty {
335                    types::F16 | types::I8X2 => {
336                        panic!("loading a f16 or i8x2 requires multiple instructions")
337                    }
338                    _ if (ty.is_float() || ty.is_vector()) && ty.bits() == 32 => {
339                        asm::inst::movss_a_m::new(to_reg, from_addr).into()
340                    }
341                    _ if (ty.is_float() || ty.is_vector()) && ty.bits() == 64 => {
342                        asm::inst::movsd_a_m::new(to_reg, from_addr).into()
343                    }
344                    types::F32X4 => asm::inst::movups_a::new(to_reg, from_addr).into(),
345                    types::F64X2 => asm::inst::movupd_a::new(to_reg, from_addr).into(),
346                    _ if (ty.is_float() || ty.is_vector()) && ty.bits() == 128 => {
347                        asm::inst::movdqu_a::new(to_reg, from_addr).into()
348                    }
349                    _ => unimplemented!("unable to load type: {}", ty),
350                };
351                Inst::External { inst }
352            }
353            RegClass::Vector => unreachable!(),
354        }
355    }
356
357    /// Choose which instruction to use for storing a register value to memory.
358    pub(crate) fn store(ty: Type, from_reg: Reg, to_addr: impl Into<SyntheticAmode>) -> Inst {
359        let rc = from_reg.class();
360        let to_addr = to_addr.into();
361        let inst = match rc {
362            RegClass::Int => {
363                let from_reg = Gpr::unwrap_new(from_reg);
364                match ty {
365                    types::I8 => asm::inst::movb_mr::new(to_addr, from_reg).into(),
366                    types::I16 => asm::inst::movw_mr::new(to_addr, from_reg).into(),
367                    types::I32 => asm::inst::movl_mr::new(to_addr, from_reg).into(),
368                    types::I64 => asm::inst::movq_mr::new(to_addr, from_reg).into(),
369                    _ => unreachable!(),
370                }
371            }
372            RegClass::Float => {
373                let from_reg = Xmm::new(from_reg).unwrap();
374                match ty {
375                    types::F16 | types::I8X2 => {
376                        panic!("storing a f16 or i8x2 requires multiple instructions")
377                    }
378                    _ if (ty.is_float() || ty.is_vector()) && ty.bits() == 32 => {
379                        asm::inst::movss_c_m::new(to_addr, from_reg).into()
380                    }
381                    _ if (ty.is_float() || ty.is_vector()) && ty.bits() == 64 => {
382                        asm::inst::movsd_c_m::new(to_addr, from_reg).into()
383                    }
384                    types::F32X4 => asm::inst::movups_b::new(to_addr, from_reg).into(),
385                    types::F64X2 => asm::inst::movupd_b::new(to_addr, from_reg).into(),
386                    _ if (ty.is_float() || ty.is_vector()) && ty.bits() == 128 => {
387                        asm::inst::movdqu_b::new(to_addr, from_reg).into()
388                    }
389                    _ => unimplemented!("unable to store type: {}", ty),
390                }
391            }
392            RegClass::Vector => unreachable!(),
393        };
394        Inst::External { inst }
395    }
396}
397
398//=============================================================================
399// Instructions: printing
400
401impl PrettyPrint for Inst {
402    fn pretty_print(&self, _size: u8) -> String {
403        fn ljustify(s: String) -> String {
404            let w = 7;
405            if s.len() >= w {
406                s
407            } else {
408                let need = usize::min(w, w - s.len());
409                s + &format!("{nil: <width$}", nil = "", width = need)
410            }
411        }
412
413        fn ljustify2(s1: String, s2: String) -> String {
414            ljustify(s1 + &s2)
415        }
416
417        match self {
418            Inst::CheckedSRemSeq {
419                size,
420                divisor,
421                dividend_lo,
422                dividend_hi,
423                dst_quotient,
424                dst_remainder,
425            } => {
426                let divisor = pretty_print_reg(divisor.to_reg(), size.to_bytes());
427                let dividend_lo = pretty_print_reg(dividend_lo.to_reg(), size.to_bytes());
428                let dividend_hi = pretty_print_reg(dividend_hi.to_reg(), size.to_bytes());
429                let dst_quotient =
430                    pretty_print_reg(dst_quotient.to_reg().to_reg(), size.to_bytes());
431                let dst_remainder =
432                    pretty_print_reg(dst_remainder.to_reg().to_reg(), size.to_bytes());
433                format!(
434                    "checked_srem_seq {dividend_lo}, {dividend_hi}, \
435                        {divisor}, {dst_quotient}, {dst_remainder}",
436                )
437            }
438
439            Inst::CheckedSRemSeq8 {
440                divisor,
441                dividend,
442                dst,
443            } => {
444                let divisor = pretty_print_reg(divisor.to_reg(), 1);
445                let dividend = pretty_print_reg(dividend.to_reg(), 1);
446                let dst = pretty_print_reg(dst.to_reg().to_reg(), 1);
447                format!("checked_srem_seq {dividend}, {divisor}, {dst}")
448            }
449
450            Inst::XmmMinMaxSeq {
451                lhs,
452                rhs,
453                dst,
454                is_min,
455                size,
456            } => {
457                let rhs = pretty_print_reg(rhs.to_reg(), 8);
458                let lhs = pretty_print_reg(lhs.to_reg(), 8);
459                let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);
460                let op = ljustify2(
461                    if *is_min {
462                        "xmm min seq ".to_string()
463                    } else {
464                        "xmm max seq ".to_string()
465                    },
466                    format!("f{}", size.to_bits()),
467                );
468                format!("{op} {lhs}, {rhs}, {dst}")
469            }
470
471            Inst::XmmUninitializedValue { dst } => {
472                let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);
473                let op = ljustify("uninit".into());
474                format!("{op} {dst}")
475            }
476
477            Inst::GprUninitializedValue { dst } => {
478                let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);
479                let op = ljustify("uninit".into());
480                format!("{op} {dst}")
481            }
482
483            Inst::CvtUint64ToFloatSeq {
484                src,
485                dst,
486                dst_size,
487                tmp_gpr1,
488                tmp_gpr2,
489                ..
490            } => {
491                let src = pretty_print_reg(src.to_reg(), 8);
492                let dst = pretty_print_reg(dst.to_reg().to_reg(), dst_size.to_bytes());
493                let tmp_gpr1 = pretty_print_reg(tmp_gpr1.to_reg().to_reg(), 8);
494                let tmp_gpr2 = pretty_print_reg(tmp_gpr2.to_reg().to_reg(), 8);
495                let op = ljustify(format!(
496                    "u64_to_{}_seq",
497                    if *dst_size == OperandSize::Size64 {
498                        "f64"
499                    } else {
500                        "f32"
501                    }
502                ));
503                format!("{op} {src}, {dst}, {tmp_gpr1}, {tmp_gpr2}")
504            }
505
506            Inst::CvtFloatToSintSeq {
507                src,
508                dst,
509                src_size,
510                dst_size,
511                tmp_xmm,
512                tmp_gpr,
513                is_saturating,
514            } => {
515                let src = pretty_print_reg(src.to_reg(), src_size.to_bytes());
516                let dst = pretty_print_reg(dst.to_reg().to_reg(), dst_size.to_bytes());
517                let tmp_gpr = pretty_print_reg(tmp_gpr.to_reg().to_reg(), 8);
518                let tmp_xmm = pretty_print_reg(tmp_xmm.to_reg().to_reg(), 8);
519                let op = ljustify(format!(
520                    "cvt_float{}_to_sint{}{}_seq",
521                    src_size.to_bits(),
522                    dst_size.to_bits(),
523                    if *is_saturating { "_sat" } else { "" },
524                ));
525                format!("{op} {src}, {dst}, {tmp_gpr}, {tmp_xmm}")
526            }
527
528            Inst::CvtFloatToUintSeq {
529                src,
530                dst,
531                src_size,
532                dst_size,
533                tmp_gpr,
534                tmp_xmm,
535                tmp_xmm2,
536                is_saturating,
537            } => {
538                let src = pretty_print_reg(src.to_reg(), src_size.to_bytes());
539                let dst = pretty_print_reg(dst.to_reg().to_reg(), dst_size.to_bytes());
540                let tmp_gpr = pretty_print_reg(tmp_gpr.to_reg().to_reg(), 8);
541                let tmp_xmm = pretty_print_reg(tmp_xmm.to_reg().to_reg(), 8);
542                let tmp_xmm2 = pretty_print_reg(tmp_xmm2.to_reg().to_reg(), 8);
543                let op = ljustify(format!(
544                    "cvt_float{}_to_uint{}{}_seq",
545                    src_size.to_bits(),
546                    dst_size.to_bits(),
547                    if *is_saturating { "_sat" } else { "" },
548                ));
549                format!("{op} {src}, {dst}, {tmp_gpr}, {tmp_xmm}, {tmp_xmm2}")
550            }
551
552            Inst::MovFromPReg { src, dst } => {
553                let src: Reg = (*src).into();
554                let src = regs::show_ireg_sized(src, 8);
555                let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);
556                let op = ljustify("movq".to_string());
557                format!("{op} {src}, {dst}")
558            }
559
560            Inst::MovToPReg { src, dst } => {
561                let src = pretty_print_reg(src.to_reg(), 8);
562                let dst: Reg = (*dst).into();
563                let dst = regs::show_ireg_sized(dst, 8);
564                let op = ljustify("movq".to_string());
565                format!("{op} {src}, {dst}")
566            }
567
568            Inst::XmmCmove {
569                ty,
570                cc,
571                consequent,
572                alternative,
573                dst,
574                ..
575            } => {
576                let size = u8::try_from(ty.bytes()).unwrap();
577                let alternative = pretty_print_reg(alternative.to_reg(), size);
578                let dst = pretty_print_reg(dst.to_reg().to_reg(), size);
579                let consequent = pretty_print_reg(consequent.to_reg(), size);
580                let suffix = match *ty {
581                    types::F64 => "sd",
582                    types::F32 => "ss",
583                    types::F16 => "ss",
584                    types::F32X4 => "aps",
585                    types::F64X2 => "apd",
586                    _ => "dqa",
587                };
588                let cc = cc.invert();
589                format!(
590                    "mov{suffix} {alternative}, {dst}; \
591                    j{cc} $next; \
592                    mov{suffix} {consequent}, {dst}; \
593                    $next:"
594                )
595            }
596
597            Inst::StackProbeLoop {
598                tmp,
599                frame_size,
600                guard_size,
601            } => {
602                let tmp = pretty_print_reg(tmp.to_reg(), 8);
603                let op = ljustify("stack_probe_loop".to_string());
604                format!("{op} {tmp}, frame_size={frame_size}, guard_size={guard_size}")
605            }
606
607            Inst::CallKnown { info } => {
608                let op = ljustify("call".to_string());
609                let try_call = info
610                    .try_call_info
611                    .as_ref()
612                    .map(|tci| pretty_print_try_call(tci))
613                    .unwrap_or_default();
614                format!("{op} {:?}{try_call}", info.dest)
615            }
616
617            Inst::CallUnknown { info } => {
618                let dest = info.dest.pretty_print(8);
619                let op = ljustify("call".to_string());
620                let try_call = info
621                    .try_call_info
622                    .as_ref()
623                    .map(|tci| pretty_print_try_call(tci))
624                    .unwrap_or_default();
625                format!("{op} *{dest}{try_call}")
626            }
627
628            Inst::ReturnCallKnown { info } => {
629                let ReturnCallInfo {
630                    uses,
631                    new_stack_arg_size,
632                    tmp,
633                    dest,
634                } = &**info;
635                let tmp = pretty_print_reg(tmp.to_reg().to_reg(), 8);
636                let mut s = format!("return_call_known {dest:?} ({new_stack_arg_size}) tmp={tmp}");
637                for ret in uses {
638                    let preg = regs::show_reg(ret.preg);
639                    let vreg = pretty_print_reg(ret.vreg, 8);
640                    write!(&mut s, " {vreg}={preg}").unwrap();
641                }
642                s
643            }
644
645            Inst::ReturnCallUnknown { info } => {
646                let ReturnCallInfo {
647                    uses,
648                    new_stack_arg_size,
649                    tmp,
650                    dest,
651                } = &**info;
652                let callee = pretty_print_reg(*dest, 8);
653                let tmp = pretty_print_reg(tmp.to_reg().to_reg(), 8);
654                let mut s =
655                    format!("return_call_unknown {callee} ({new_stack_arg_size}) tmp={tmp}");
656                for ret in uses {
657                    let preg = regs::show_reg(ret.preg);
658                    let vreg = pretty_print_reg(ret.vreg, 8);
659                    write!(&mut s, " {vreg}={preg}").unwrap();
660                }
661                s
662            }
663
664            Inst::Args { args } => {
665                let mut s = "args".to_string();
666                for arg in args {
667                    let preg = regs::show_reg(arg.preg);
668                    let def = pretty_print_reg(arg.vreg.to_reg(), 8);
669                    write!(&mut s, " {def}={preg}").unwrap();
670                }
671                s
672            }
673
674            Inst::Rets { rets } => {
675                let mut s = "rets".to_string();
676                for ret in rets {
677                    let preg = regs::show_reg(ret.preg);
678                    let vreg = pretty_print_reg(ret.vreg, 8);
679                    write!(&mut s, " {vreg}={preg}").unwrap();
680                }
681                s
682            }
683
684            Inst::StackSwitchBasic {
685                store_context_ptr,
686                load_context_ptr,
687                in_payload0,
688                out_payload0,
689            } => {
690                let store_context_ptr = pretty_print_reg(**store_context_ptr, 8);
691                let load_context_ptr = pretty_print_reg(**load_context_ptr, 8);
692                let in_payload0 = pretty_print_reg(**in_payload0, 8);
693                let out_payload0 = pretty_print_reg(*out_payload0.to_reg(), 8);
694                format!(
695                    "{out_payload0} = stack_switch_basic {store_context_ptr}, {load_context_ptr}, {in_payload0}"
696                )
697            }
698
699            Inst::JmpKnown { dst } => {
700                let op = ljustify("jmp".to_string());
701                let dst = dst.to_string();
702                format!("{op} {dst}")
703            }
704
705            Inst::WinchJmpIf { cc, taken } => {
706                let taken = taken.to_string();
707                let op = ljustify2("j".to_string(), cc.to_string());
708                format!("{op} {taken}")
709            }
710
711            Inst::JmpCondOr {
712                cc1,
713                cc2,
714                taken,
715                not_taken,
716            } => {
717                let taken = taken.to_string();
718                let not_taken = not_taken.to_string();
719                let op = ljustify(format!("j{cc1},{cc2}"));
720                format!("{op} {taken}; j {not_taken}")
721            }
722
723            Inst::JmpCond {
724                cc,
725                taken,
726                not_taken,
727            } => {
728                let taken = taken.to_string();
729                let not_taken = not_taken.to_string();
730                let op = ljustify2("j".to_string(), cc.to_string());
731                format!("{op} {taken}; j {not_taken}")
732            }
733
734            Inst::JmpTableSeq {
735                idx, tmp1, tmp2, ..
736            } => {
737                let idx = pretty_print_reg(*idx, 8);
738                let tmp1 = pretty_print_reg(tmp1.to_reg(), 8);
739                let tmp2 = pretty_print_reg(tmp2.to_reg(), 8);
740                let op = ljustify("br_table".into());
741                format!("{op} {idx}, {tmp1}, {tmp2}")
742            }
743
744            Inst::TrapIf { cc, trap_code, .. } => {
745                format!("j{cc} #trap={trap_code}")
746            }
747
748            Inst::TrapIfAnd {
749                cc1,
750                cc2,
751                trap_code,
752                ..
753            } => {
754                let cc1 = cc1.invert();
755                let cc2 = cc2.invert();
756                format!("trap_if_and {cc1}, {cc2}, {trap_code}")
757            }
758
759            Inst::TrapIfOr {
760                cc1,
761                cc2,
762                trap_code,
763                ..
764            } => {
765                let cc2 = cc2.invert();
766                format!("trap_if_or {cc1}, {cc2}, {trap_code}")
767            }
768
769            Inst::LoadExtName {
770                dst, name, offset, ..
771            } => {
772                let dst = pretty_print_reg(*dst.to_reg(), 8);
773                let name = name.display(None);
774                let op = ljustify("load_ext_name".into());
775                format!("{op} {name}+{offset}, {dst}")
776            }
777
778            Inst::AtomicRmwSeq { ty, op, .. } => {
779                let ty = ty.bits();
780                format!(
781                    "atomically {{ {ty}_bits_at_[%r9] {op:?}= %r10; %rax = old_value_at_[%r9]; %r11, %rflags = trash }}"
782                )
783            }
784
785            Inst::Atomic128RmwSeq {
786                op,
787                mem,
788                operand_low,
789                operand_high,
790                temp_low,
791                temp_high,
792                dst_old_low,
793                dst_old_high,
794            } => {
795                let operand_low = pretty_print_reg(**operand_low, 8);
796                let operand_high = pretty_print_reg(**operand_high, 8);
797                let temp_low = pretty_print_reg(*temp_low.to_reg(), 8);
798                let temp_high = pretty_print_reg(*temp_high.to_reg(), 8);
799                let dst_old_low = pretty_print_reg(*dst_old_low.to_reg(), 8);
800                let dst_old_high = pretty_print_reg(*dst_old_high.to_reg(), 8);
801                let mem = mem.pretty_print(16);
802                format!(
803                    "atomically {{ {dst_old_high}:{dst_old_low} = {mem}; {temp_high}:{temp_low} = {dst_old_high}:{dst_old_low} {op:?} {operand_high}:{operand_low}; {mem} = {temp_high}:{temp_low} }}"
804                )
805            }
806
807            Inst::Atomic128XchgSeq {
808                mem,
809                operand_low,
810                operand_high,
811                dst_old_low,
812                dst_old_high,
813            } => {
814                let operand_low = pretty_print_reg(**operand_low, 8);
815                let operand_high = pretty_print_reg(**operand_high, 8);
816                let dst_old_low = pretty_print_reg(*dst_old_low.to_reg(), 8);
817                let dst_old_high = pretty_print_reg(*dst_old_high.to_reg(), 8);
818                let mem = mem.pretty_print(16);
819                format!(
820                    "atomically {{ {dst_old_high}:{dst_old_low} = {mem}; {mem} = {operand_high}:{operand_low} }}"
821                )
822            }
823
824            Inst::ElfTlsGetAddr { symbol, dst } => {
825                let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);
826                format!("{dst} = elf_tls_get_addr {symbol:?}")
827            }
828
829            Inst::MachOTlsGetAddr { symbol, dst } => {
830                let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);
831                format!("{dst} = macho_tls_get_addr {symbol:?}")
832            }
833
834            Inst::CoffTlsGetAddr { symbol, dst, tmp } => {
835                let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);
836                let tmp = tmp.to_reg().to_reg();
837
838                let mut s = format!("{dst} = coff_tls_get_addr {symbol:?}");
839                if tmp.is_virtual() {
840                    let tmp = show_ireg_sized(tmp, 8);
841                    write!(&mut s, ", {tmp}").unwrap();
842                };
843
844                s
845            }
846
847            Inst::Unwind { inst } => format!("unwind {inst:?}"),
848
849            Inst::DummyUse { reg } => {
850                let reg = pretty_print_reg(*reg, 8);
851                format!("dummy_use {reg}")
852            }
853
854            Inst::External { inst } => {
855                format!("{inst}")
856            }
857        }
858    }
859}
860
861fn pretty_print_try_call(info: &TryCallInfo) -> String {
862    format!(
863        "; jmp {:?}; catch [{}]",
864        info.continuation,
865        info.pretty_print_dests()
866    )
867}
868
869impl fmt::Debug for Inst {
870    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
871        write!(fmt, "{}", self.pretty_print_inst(&mut Default::default()))
872    }
873}
874
875fn x64_get_operands(inst: &mut Inst, collector: &mut impl OperandVisitor) {
876    // Note: because we need to statically know the indices of each
877    // reg in the operands list in order to fetch its allocation
878    // later, we put the variable-operand-count bits (the RegMem,
879    // RegMemImm, etc args) last. regalloc2 doesn't care what order
880    // the operands come in; they can be freely reordered.
881
882    // N.B.: we MUST keep the below in careful sync with (i) emission,
883    // in `emit.rs`, and (ii) pretty-printing, in the `pretty_print`
884    // method above.
885    match inst {
886        Inst::CheckedSRemSeq {
887            divisor,
888            dividend_lo,
889            dividend_hi,
890            dst_quotient,
891            dst_remainder,
892            ..
893        } => {
894            collector.reg_use(divisor);
895            collector.reg_fixed_use(dividend_lo, regs::rax());
896            collector.reg_fixed_use(dividend_hi, regs::rdx());
897            collector.reg_fixed_def(dst_quotient, regs::rax());
898            collector.reg_fixed_def(dst_remainder, regs::rdx());
899        }
900        Inst::CheckedSRemSeq8 {
901            divisor,
902            dividend,
903            dst,
904            ..
905        } => {
906            collector.reg_use(divisor);
907            collector.reg_fixed_use(dividend, regs::rax());
908            collector.reg_fixed_def(dst, regs::rax());
909        }
910        Inst::XmmUninitializedValue { dst } => collector.reg_def(dst),
911        Inst::GprUninitializedValue { dst } => collector.reg_def(dst),
912        Inst::XmmMinMaxSeq { lhs, rhs, dst, .. } => {
913            collector.reg_use(rhs);
914            collector.reg_use(lhs);
915            collector.reg_reuse_def(dst, 0); // Reuse RHS.
916        }
917        Inst::MovFromPReg { dst, src } => {
918            debug_assert!(dst.to_reg().to_reg().is_virtual());
919            collector.reg_fixed_nonallocatable(*src);
920            collector.reg_def(dst);
921        }
922        Inst::MovToPReg { dst, src } => {
923            debug_assert!(src.to_reg().is_virtual());
924            collector.reg_use(src);
925            collector.reg_fixed_nonallocatable(*dst);
926        }
927        Inst::CvtUint64ToFloatSeq {
928            src,
929            dst,
930            tmp_gpr1,
931            tmp_gpr2,
932            ..
933        } => {
934            collector.reg_use(src);
935            collector.reg_early_def(dst);
936            collector.reg_early_def(tmp_gpr1);
937            collector.reg_early_def(tmp_gpr2);
938        }
939        Inst::CvtFloatToSintSeq {
940            src,
941            dst,
942            tmp_xmm,
943            tmp_gpr,
944            ..
945        } => {
946            collector.reg_use(src);
947            collector.reg_early_def(dst);
948            collector.reg_early_def(tmp_gpr);
949            collector.reg_early_def(tmp_xmm);
950        }
951        Inst::CvtFloatToUintSeq {
952            src,
953            dst,
954            tmp_gpr,
955            tmp_xmm,
956            tmp_xmm2,
957            ..
958        } => {
959            collector.reg_use(src);
960            collector.reg_early_def(dst);
961            collector.reg_early_def(tmp_gpr);
962            collector.reg_early_def(tmp_xmm);
963            collector.reg_early_def(tmp_xmm2);
964        }
965
966        Inst::XmmCmove {
967            consequent,
968            alternative,
969            dst,
970            ..
971        } => {
972            collector.reg_use(alternative);
973            collector.reg_reuse_def(dst, 0);
974            collector.reg_use(consequent);
975        }
976        Inst::StackProbeLoop { tmp, .. } => {
977            collector.reg_early_def(tmp);
978        }
979
980        Inst::CallKnown { info } => {
981            // Probestack is special and is only inserted after
982            // regalloc, so we do not need to represent its ABI to the
983            // register allocator. Assert that we don't alter that
984            // arrangement.
985            let CallInfo {
986                uses,
987                defs,
988                clobbers,
989                dest,
990                try_call_info,
991                ..
992            } = &mut **info;
993            debug_assert_ne!(*dest, ExternalName::LibCall(LibCall::Probestack));
994            for CallArgPair { vreg, preg } in uses {
995                collector.reg_fixed_use(vreg, *preg);
996            }
997            for CallRetPair { vreg, location } in defs {
998                match location {
999                    RetLocation::Reg(preg, ..) => collector.reg_fixed_def(vreg, *preg),
1000                    RetLocation::Stack(..) => collector.any_def(vreg),
1001                }
1002            }
1003            collector.reg_clobbers(*clobbers);
1004            if let Some(try_call_info) = try_call_info {
1005                try_call_info.collect_operands(collector);
1006            }
1007        }
1008
1009        Inst::CallUnknown { info } => {
1010            let CallInfo {
1011                uses,
1012                defs,
1013                clobbers,
1014                callee_conv,
1015                dest,
1016                try_call_info,
1017                ..
1018            } = &mut **info;
1019            match dest {
1020                RegMem::Reg { reg } if *callee_conv == CallConv::Winch => {
1021                    // TODO(https://github.com/bytecodealliance/regalloc2/issues/145):
1022                    // This shouldn't be a fixed register constraint. r10 is caller-saved, so this
1023                    // should be safe to use.
1024                    collector.reg_fixed_use(reg, regs::r10());
1025                }
1026                _ => dest.get_operands(collector),
1027            }
1028            for CallArgPair { vreg, preg } in uses {
1029                collector.reg_fixed_use(vreg, *preg);
1030            }
1031            for CallRetPair { vreg, location } in defs {
1032                match location {
1033                    RetLocation::Reg(preg, ..) => collector.reg_fixed_def(vreg, *preg),
1034                    RetLocation::Stack(..) => collector.any_def(vreg),
1035                }
1036            }
1037            collector.reg_clobbers(*clobbers);
1038            if let Some(try_call_info) = try_call_info {
1039                try_call_info.collect_operands(collector);
1040            }
1041        }
1042        Inst::StackSwitchBasic {
1043            store_context_ptr,
1044            load_context_ptr,
1045            in_payload0,
1046            out_payload0,
1047        } => {
1048            collector.reg_use(load_context_ptr);
1049            collector.reg_use(store_context_ptr);
1050            collector.reg_fixed_use(in_payload0, stack_switch::payload_register());
1051            collector.reg_fixed_def(out_payload0, stack_switch::payload_register());
1052
1053            let mut clobbers = crate::isa::x64::abi::ALL_CLOBBERS;
1054            // The return/payload reg must not be included in the clobber set
1055            clobbers.remove(
1056                stack_switch::payload_register()
1057                    .to_real_reg()
1058                    .unwrap()
1059                    .into(),
1060            );
1061            collector.reg_clobbers(clobbers);
1062        }
1063
1064        Inst::ReturnCallKnown { info } => {
1065            let ReturnCallInfo {
1066                dest, uses, tmp, ..
1067            } = &mut **info;
1068            collector.reg_fixed_def(tmp, regs::r11());
1069            // Same as in the `Inst::CallKnown` branch.
1070            debug_assert_ne!(*dest, ExternalName::LibCall(LibCall::Probestack));
1071            for CallArgPair { vreg, preg } in uses {
1072                collector.reg_fixed_use(vreg, *preg);
1073            }
1074        }
1075
1076        Inst::ReturnCallUnknown { info } => {
1077            let ReturnCallInfo {
1078                dest, uses, tmp, ..
1079            } = &mut **info;
1080
1081            // TODO(https://github.com/bytecodealliance/regalloc2/issues/145):
1082            // This shouldn't be a fixed register constraint, but it's not clear how to
1083            // pick a register that won't be clobbered by the callee-save restore code
1084            // emitted with a return_call_indirect. r10 is caller-saved, so this should be
1085            // safe to use.
1086            collector.reg_fixed_use(dest, regs::r10());
1087
1088            collector.reg_fixed_def(tmp, regs::r11());
1089            for CallArgPair { vreg, preg } in uses {
1090                collector.reg_fixed_use(vreg, *preg);
1091            }
1092        }
1093
1094        Inst::JmpTableSeq {
1095            idx, tmp1, tmp2, ..
1096        } => {
1097            collector.reg_use(idx);
1098            collector.reg_early_def(tmp1);
1099            // In the sequence emitted for this pseudoinstruction in emit.rs,
1100            // tmp2 is only written after idx is read, so it doesn't need to be
1101            // an early def.
1102            collector.reg_def(tmp2);
1103        }
1104
1105        Inst::LoadExtName { dst, .. } => {
1106            collector.reg_def(dst);
1107        }
1108
1109        Inst::AtomicRmwSeq {
1110            operand,
1111            temp,
1112            dst_old,
1113            mem,
1114            ..
1115        } => {
1116            collector.reg_late_use(operand);
1117            collector.reg_early_def(temp);
1118            // This `fixed_def` is needed because `CMPXCHG` always uses this
1119            // register implicitly.
1120            collector.reg_fixed_def(dst_old, regs::rax());
1121            mem.get_operands_late(collector)
1122        }
1123
1124        Inst::Atomic128RmwSeq {
1125            operand_low,
1126            operand_high,
1127            temp_low,
1128            temp_high,
1129            dst_old_low,
1130            dst_old_high,
1131            mem,
1132            ..
1133        } => {
1134            // All registers are collected in the `Late` position so that they don't overlap.
1135            collector.reg_late_use(operand_low);
1136            collector.reg_late_use(operand_high);
1137            collector.reg_fixed_def(temp_low, regs::rbx());
1138            collector.reg_fixed_def(temp_high, regs::rcx());
1139            collector.reg_fixed_def(dst_old_low, regs::rax());
1140            collector.reg_fixed_def(dst_old_high, regs::rdx());
1141            mem.get_operands_late(collector)
1142        }
1143
1144        Inst::Atomic128XchgSeq {
1145            operand_low,
1146            operand_high,
1147            dst_old_low,
1148            dst_old_high,
1149            mem,
1150            ..
1151        } => {
1152            // All registers are collected in the `Late` position so that they don't overlap.
1153            collector.reg_fixed_late_use(operand_low, regs::rbx());
1154            collector.reg_fixed_late_use(operand_high, regs::rcx());
1155            collector.reg_fixed_def(dst_old_low, regs::rax());
1156            collector.reg_fixed_def(dst_old_high, regs::rdx());
1157            mem.get_operands_late(collector)
1158        }
1159
1160        Inst::Args { args } => {
1161            for ArgPair { vreg, preg } in args {
1162                collector.reg_fixed_def(vreg, *preg);
1163            }
1164        }
1165
1166        Inst::Rets { rets } => {
1167            // The return value(s) are live-out; we represent this
1168            // with register uses on the return instruction.
1169            for RetPair { vreg, preg } in rets {
1170                collector.reg_fixed_use(vreg, *preg);
1171            }
1172        }
1173
1174        Inst::JmpKnown { .. }
1175        | Inst::WinchJmpIf { .. }
1176        | Inst::JmpCond { .. }
1177        | Inst::JmpCondOr { .. }
1178        | Inst::TrapIf { .. }
1179        | Inst::TrapIfAnd { .. }
1180        | Inst::TrapIfOr { .. } => {
1181            // No registers are used.
1182        }
1183
1184        Inst::ElfTlsGetAddr { dst, .. } | Inst::MachOTlsGetAddr { dst, .. } => {
1185            collector.reg_fixed_def(dst, regs::rax());
1186            // All caller-saves are clobbered.
1187            //
1188            // We use the SysV calling convention here because the
1189            // pseudoinstruction (and relocation that it emits) is specific to
1190            // ELF systems; other x86-64 targets with other conventions (i.e.,
1191            // Windows) use different TLS strategies.
1192            let mut clobbers =
1193                X64ABIMachineSpec::get_regs_clobbered_by_call(CallConv::SystemV, false);
1194            clobbers.remove(regs::gpr_preg(regs::ENC_RAX));
1195            collector.reg_clobbers(clobbers);
1196        }
1197
1198        Inst::CoffTlsGetAddr { dst, tmp, .. } => {
1199            // We also use the gs register. But that register is not allocatable by the
1200            // register allocator, so we don't need to mark it as used here.
1201
1202            // We use %rax to set the address
1203            collector.reg_fixed_def(dst, regs::rax());
1204
1205            // We use %rcx as a temporary variable to load the _tls_index
1206            collector.reg_fixed_def(tmp, regs::rcx());
1207        }
1208
1209        Inst::Unwind { .. } => {}
1210
1211        Inst::DummyUse { reg } => {
1212            collector.reg_use(reg);
1213        }
1214
1215        Inst::External { inst } => {
1216            inst.visit(&mut external::RegallocVisitor { collector });
1217        }
1218    }
1219}
1220
1221//=============================================================================
1222// Instructions: misc functions and external interface
1223
1224impl MachInst for Inst {
1225    type ABIMachineSpec = X64ABIMachineSpec;
1226
1227    fn get_operands(&mut self, collector: &mut impl OperandVisitor) {
1228        x64_get_operands(self, collector)
1229    }
1230
1231    fn is_move(&self) -> Option<(Writable<Reg>, Reg)> {
1232        use asm::inst::Inst as I;
1233        match self {
1234            // Note (carefully!) that a 32-bit mov *isn't* a no-op since it zeroes
1235            // out the upper 32 bits of the destination.  For example, we could
1236            // conceivably use `movl %reg, %reg` to zero out the top 32 bits of
1237            // %reg.
1238            Self::External {
1239                inst: I::movq_mr(asm::inst::movq_mr { rm64, r64 }),
1240            } => match rm64 {
1241                asm::GprMem::Gpr(reg) => Some((reg.map(|r| r.to_reg()), r64.as_ref().to_reg())),
1242                asm::GprMem::Mem(_) => None,
1243            },
1244            Self::External {
1245                inst: I::movq_rm(asm::inst::movq_rm { r64, rm64 }),
1246            } => match rm64 {
1247                asm::GprMem::Gpr(reg) => Some((r64.as_ref().map(|r| r.to_reg()), reg.to_reg())),
1248                asm::GprMem::Mem(_) => None,
1249            },
1250
1251            // Note that `movss_a_r` and `movsd_a_r` are specifically omitted
1252            // here because they only overwrite the low bits in the destination
1253            // register, otherwise preserving the upper bits. That can be used
1254            // for lane-insertion instructions, for example, meaning it's not
1255            // classified as a register move.
1256            //
1257            // Otherwise though all register-to-register movement instructions
1258            // which move 128-bits are registered as moves.
1259            Self::External {
1260                inst:
1261                    I::movaps_a(asm::inst::movaps_a { xmm1, xmm_m128 })
1262                    | I::movups_a(asm::inst::movups_a { xmm1, xmm_m128 })
1263                    | I::movapd_a(asm::inst::movapd_a { xmm1, xmm_m128 })
1264                    | I::movupd_a(asm::inst::movupd_a { xmm1, xmm_m128 })
1265                    | I::movdqa_a(asm::inst::movdqa_a { xmm1, xmm_m128 })
1266                    | I::movdqu_a(asm::inst::movdqu_a { xmm1, xmm_m128 }),
1267            } => match xmm_m128 {
1268                asm::XmmMem::Xmm(xmm2) => Some((xmm1.as_ref().map(|r| r.to_reg()), xmm2.to_reg())),
1269                asm::XmmMem::Mem(_) => None,
1270            },
1271            // In addition to the "A" format of instructions above also
1272            // recognize the "B" format which while it can be used for stores it
1273            // can also be used for register moves.
1274            Self::External {
1275                inst:
1276                    I::movaps_b(asm::inst::movaps_b { xmm_m128, xmm1 })
1277                    | I::movups_b(asm::inst::movups_b { xmm_m128, xmm1 })
1278                    | I::movapd_b(asm::inst::movapd_b { xmm_m128, xmm1 })
1279                    | I::movupd_b(asm::inst::movupd_b { xmm_m128, xmm1 })
1280                    | I::movdqa_b(asm::inst::movdqa_b { xmm_m128, xmm1 })
1281                    | I::movdqu_b(asm::inst::movdqu_b { xmm_m128, xmm1 }),
1282            } => match xmm_m128 {
1283                asm::XmmMem::Xmm(dst) => Some((dst.map(|r| r.to_reg()), xmm1.as_ref().to_reg())),
1284                asm::XmmMem::Mem(_) => None,
1285            },
1286            _ => None,
1287        }
1288    }
1289
1290    fn is_included_in_clobbers(&self) -> bool {
1291        match self {
1292            &Inst::Args { .. } => false,
1293            _ => true,
1294        }
1295    }
1296
1297    fn is_trap(&self) -> bool {
1298        match self {
1299            Self::External {
1300                inst: asm::inst::Inst::ud2_zo(..),
1301            } => true,
1302            _ => false,
1303        }
1304    }
1305
1306    fn is_args(&self) -> bool {
1307        match self {
1308            Self::Args { .. } => true,
1309            _ => false,
1310        }
1311    }
1312
1313    fn is_term(&self) -> MachTerminator {
1314        match self {
1315            // Interesting cases.
1316            &Self::Rets { .. } => MachTerminator::Ret,
1317            &Self::ReturnCallKnown { .. } | &Self::ReturnCallUnknown { .. } => {
1318                MachTerminator::RetCall
1319            }
1320            &Self::JmpKnown { .. } => MachTerminator::Branch,
1321            &Self::JmpCond { .. } => MachTerminator::Branch,
1322            &Self::JmpCondOr { .. } => MachTerminator::Branch,
1323            &Self::JmpTableSeq { .. } => MachTerminator::Branch,
1324            &Self::CallKnown { ref info } if info.try_call_info.is_some() => MachTerminator::Branch,
1325            &Self::CallUnknown { ref info } if info.try_call_info.is_some() => {
1326                MachTerminator::Branch
1327            }
1328            // All other cases are boring.
1329            _ => MachTerminator::None,
1330        }
1331    }
1332
1333    fn is_low_level_branch(&self) -> bool {
1334        match self {
1335            &Self::WinchJmpIf { .. } => true,
1336            _ => false,
1337        }
1338    }
1339
1340    fn is_mem_access(&self) -> bool {
1341        panic!("TODO FILL ME OUT")
1342    }
1343
1344    fn gen_move(dst_reg: Writable<Reg>, src_reg: Reg, ty: Type) -> Inst {
1345        trace!(
1346            "Inst::gen_move {:?} -> {:?} (type: {:?})",
1347            src_reg,
1348            dst_reg.to_reg(),
1349            ty
1350        );
1351        let rc_dst = dst_reg.to_reg().class();
1352        let rc_src = src_reg.class();
1353        // If this isn't true, we have gone way off the rails.
1354        debug_assert!(rc_dst == rc_src);
1355        let inst = match rc_dst {
1356            RegClass::Int => {
1357                asm::inst::movq_mr::new(dst_reg.map(Gpr::unwrap_new), Gpr::unwrap_new(src_reg))
1358                    .into()
1359            }
1360            RegClass::Float => {
1361                // The Intel optimization manual, in "3.5.1.13 Zero-Latency MOV Instructions",
1362                // doesn't include MOVSS/MOVSD as instructions with zero-latency. Use movaps for
1363                // those, which may write more lanes that we need, but are specified to have
1364                // zero-latency.
1365                let dst_reg = dst_reg.map(|r| Xmm::new(r).unwrap());
1366                let src_reg = Xmm::new(src_reg).unwrap();
1367                match ty {
1368                    types::F16 | types::F32 | types::F64 | types::F32X4 => {
1369                        asm::inst::movaps_a::new(dst_reg, src_reg).into()
1370                    }
1371                    types::F64X2 => asm::inst::movapd_a::new(dst_reg, src_reg).into(),
1372                    _ if (ty.is_float() || ty.is_vector()) && ty.bits() <= 128 => {
1373                        asm::inst::movdqa_a::new(dst_reg, src_reg).into()
1374                    }
1375                    _ => unimplemented!("unable to move type: {}", ty),
1376                }
1377            }
1378            RegClass::Vector => unreachable!(),
1379        };
1380        Inst::External { inst }
1381    }
1382
1383    fn gen_nop(preferred_size: usize) -> Inst {
1384        Inst::nop(std::cmp::min(preferred_size, 9) as u8)
1385    }
1386
1387    fn rc_for_type(ty: Type) -> CodegenResult<(&'static [RegClass], &'static [Type])> {
1388        match ty {
1389            types::I8 => Ok((&[RegClass::Int], &[types::I8])),
1390            types::I16 => Ok((&[RegClass::Int], &[types::I16])),
1391            types::I32 => Ok((&[RegClass::Int], &[types::I32])),
1392            types::I64 => Ok((&[RegClass::Int], &[types::I64])),
1393            types::F16 => Ok((&[RegClass::Float], &[types::F16])),
1394            types::F32 => Ok((&[RegClass::Float], &[types::F32])),
1395            types::F64 => Ok((&[RegClass::Float], &[types::F64])),
1396            types::F128 => Ok((&[RegClass::Float], &[types::F128])),
1397            types::I128 => Ok((&[RegClass::Int, RegClass::Int], &[types::I64, types::I64])),
1398            _ if ty.is_vector() && ty.bits() <= 128 => {
1399                let types = &[types::I8X2, types::I8X4, types::I8X8, types::I8X16];
1400                Ok((
1401                    &[RegClass::Float],
1402                    slice::from_ref(&types[ty.bytes().ilog2() as usize - 1]),
1403                ))
1404            }
1405            _ => Err(CodegenError::Unsupported(format!(
1406                "Unexpected SSA-value type: {ty}"
1407            ))),
1408        }
1409    }
1410
1411    fn canonical_type_for_rc(rc: RegClass) -> Type {
1412        match rc {
1413            RegClass::Float => types::I8X16,
1414            RegClass::Int => types::I64,
1415            RegClass::Vector => unreachable!(),
1416        }
1417    }
1418
1419    fn gen_jump(label: MachLabel) -> Inst {
1420        Inst::jmp_known(label)
1421    }
1422
1423    fn gen_imm_u64(value: u64, dst: Writable<Reg>) -> Option<Self> {
1424        Some(Inst::imm(OperandSize::Size64, value, dst))
1425    }
1426
1427    fn gen_imm_f64(value: f64, tmp: Writable<Reg>, dst: Writable<Reg>) -> SmallVec<[Self; 2]> {
1428        let imm_to_gpr = Inst::imm(OperandSize::Size64, value.to_bits(), tmp);
1429        let gpr_to_xmm = Inst::External {
1430            inst: asm::inst::movq_a::new(dst.map(|r| Xmm::new(r).unwrap()), tmp.to_reg()).into(),
1431        };
1432        smallvec![imm_to_gpr, gpr_to_xmm]
1433    }
1434
1435    fn gen_dummy_use(reg: Reg) -> Self {
1436        Inst::DummyUse { reg }
1437    }
1438
1439    fn worst_case_size() -> CodeOffset {
1440        15
1441    }
1442
1443    fn ref_type_regclass(_: &settings::Flags) -> RegClass {
1444        RegClass::Int
1445    }
1446
1447    fn is_safepoint(&self) -> bool {
1448        match self {
1449            Inst::CallKnown { .. } | Inst::CallUnknown { .. } => true,
1450            _ => false,
1451        }
1452    }
1453
1454    fn function_alignment() -> FunctionAlignment {
1455        FunctionAlignment {
1456            minimum: 1,
1457            // Change the alignment from 16-bytes to 32-bytes for better performance.
1458            // fix-8573: https://github.com/bytecodealliance/wasmtime/issues/8573
1459            preferred: 32,
1460        }
1461    }
1462
1463    type LabelUse = LabelUse;
1464
1465    const TRAP_OPCODE: &'static [u8] = &[0x0f, 0x0b];
1466}
1467
1468/// Constant state used during emissions of a sequence of instructions.
1469pub struct EmitInfo {
1470    pub(super) flags: settings::Flags,
1471    isa_flags: x64_settings::Flags,
1472}
1473
1474impl EmitInfo {
1475    /// Create a constant state for emission of instructions.
1476    pub fn new(flags: settings::Flags, isa_flags: x64_settings::Flags) -> Self {
1477        Self { flags, isa_flags }
1478    }
1479}
1480
1481impl MachInstEmit for Inst {
1482    type State = EmitState;
1483    type Info = EmitInfo;
1484
1485    fn emit(&self, sink: &mut MachBuffer<Inst>, info: &Self::Info, state: &mut Self::State) {
1486        emit::emit(self, sink, info, state);
1487    }
1488
1489    fn pretty_print_inst(&self, _: &mut Self::State) -> String {
1490        PrettyPrint::pretty_print(self, 0)
1491    }
1492}
1493
1494/// A label-use (internal relocation) in generated code.
1495#[derive(Clone, Copy, Debug, PartialEq, Eq)]
1496pub enum LabelUse {
1497    /// A 32-bit offset from location of relocation itself, added to the existing value at that
1498    /// location. Used for control flow instructions which consider an offset from the start of the
1499    /// next instruction (so the size of the payload -- 4 bytes -- is subtracted from the payload).
1500    JmpRel32,
1501
1502    /// A 32-bit offset from location of relocation itself, added to the existing value at that
1503    /// location.
1504    PCRel32,
1505}
1506
1507impl MachInstLabelUse for LabelUse {
1508    const ALIGN: CodeOffset = 1;
1509
1510    fn max_pos_range(self) -> CodeOffset {
1511        match self {
1512            LabelUse::JmpRel32 | LabelUse::PCRel32 => 0x7fff_ffff,
1513        }
1514    }
1515
1516    fn max_neg_range(self) -> CodeOffset {
1517        match self {
1518            LabelUse::JmpRel32 | LabelUse::PCRel32 => 0x8000_0000,
1519        }
1520    }
1521
1522    fn patch_size(self) -> CodeOffset {
1523        match self {
1524            LabelUse::JmpRel32 | LabelUse::PCRel32 => 4,
1525        }
1526    }
1527
1528    fn patch(self, buffer: &mut [u8], use_offset: CodeOffset, label_offset: CodeOffset) {
1529        let pc_rel = (label_offset as i64) - (use_offset as i64);
1530        debug_assert!(pc_rel <= self.max_pos_range() as i64);
1531        debug_assert!(pc_rel >= -(self.max_neg_range() as i64));
1532        let pc_rel = pc_rel as u32;
1533        match self {
1534            LabelUse::JmpRel32 => {
1535                let addend = u32::from_le_bytes([buffer[0], buffer[1], buffer[2], buffer[3]]);
1536                let value = pc_rel.wrapping_add(addend).wrapping_sub(4);
1537                buffer.copy_from_slice(&value.to_le_bytes()[..]);
1538            }
1539            LabelUse::PCRel32 => {
1540                let addend = u32::from_le_bytes([buffer[0], buffer[1], buffer[2], buffer[3]]);
1541                let value = pc_rel.wrapping_add(addend);
1542                buffer.copy_from_slice(&value.to_le_bytes()[..]);
1543            }
1544        }
1545    }
1546
1547    fn supports_veneer(self) -> bool {
1548        match self {
1549            LabelUse::JmpRel32 | LabelUse::PCRel32 => false,
1550        }
1551    }
1552
1553    fn veneer_size(self) -> CodeOffset {
1554        match self {
1555            LabelUse::JmpRel32 | LabelUse::PCRel32 => 0,
1556        }
1557    }
1558
1559    fn worst_case_veneer_size() -> CodeOffset {
1560        0
1561    }
1562
1563    fn generate_veneer(self, _: &mut [u8], _: CodeOffset) -> (CodeOffset, LabelUse) {
1564        match self {
1565            LabelUse::JmpRel32 | LabelUse::PCRel32 => {
1566                panic!("Veneer not supported for JumpRel32 label-use.");
1567            }
1568        }
1569    }
1570
1571    fn from_reloc(reloc: Reloc, addend: Addend) -> Option<Self> {
1572        match (reloc, addend) {
1573            (Reloc::X86CallPCRel4, -4) => Some(LabelUse::JmpRel32),
1574            _ => None,
1575        }
1576    }
1577}