cranelift_codegen/isa/x64/inst/
mod.rs

1//! This module defines x86_64-specific machine instruction types.
2
3pub use emit_state::EmitState;
4
5use crate::binemit::{Addend, CodeOffset, Reloc};
6use crate::ir::{ExternalName, LibCall, TrapCode, Type, types};
7use crate::isa::x64::abi::X64ABIMachineSpec;
8use crate::isa::x64::inst::regs::pretty_print_reg;
9use crate::isa::x64::settings as x64_settings;
10use crate::isa::{CallConv, FunctionAlignment};
11use crate::{CodegenError, CodegenResult, settings};
12use crate::{machinst::*, trace};
13use alloc::boxed::Box;
14use core::slice;
15use cranelift_assembler_x64 as asm;
16use smallvec::{SmallVec, smallvec};
17use std::fmt::{self, Write};
18use std::string::{String, ToString};
19
20pub mod args;
21mod emit;
22mod emit_state;
23#[cfg(test)]
24mod emit_tests;
25pub mod external;
26pub mod regs;
27mod stack_switch;
28pub mod unwind;
29
30use args::*;
31
32//=============================================================================
33// Instructions (top level): definition
34
35// `Inst` is defined inside ISLE as `MInst`. We publicly re-export it here.
36pub use super::lower::isle::generated_code::AtomicRmwSeqOp;
37pub use super::lower::isle::generated_code::MInst as Inst;
38
39/// Out-of-line data for return-calls, to keep the size of `Inst` down.
40#[derive(Clone, Debug)]
41pub struct ReturnCallInfo<T> {
42    /// Where this call is going.
43    pub dest: T,
44
45    /// The size of the argument area for this return-call, potentially smaller than that of the
46    /// caller, but never larger.
47    pub new_stack_arg_size: u32,
48
49    /// The in-register arguments and their constraints.
50    pub uses: CallArgList,
51
52    /// A temporary for use when moving the return address.
53    pub tmp: WritableGpr,
54}
55
56#[test]
57#[cfg(target_pointer_width = "64")]
58fn inst_size_test() {
59    // This test will help with unintentionally growing the size
60    // of the Inst enum.
61    assert_eq!(48, std::mem::size_of::<Inst>());
62}
63
64impl Inst {
65    /// Check if the instruction (or pseudo-instruction) can be emitted given
66    /// the current target architecture given by `emit_info`. For non-assembler
67    /// instructions, this assumes a baseline feature set (i.e., 64-bit AND SSE2
68    /// and below).
69    fn is_available(&self, emit_info: &EmitInfo) -> bool {
70        use asm::AvailableFeatures;
71
72        match self {
73            // These instructions are part of SSE2, which is a basic requirement
74            // in Cranelift, and don't have to be checked.
75            Inst::AtomicRmwSeq { .. }
76            | Inst::CallKnown { .. }
77            | Inst::CallUnknown { .. }
78            | Inst::ReturnCallKnown { .. }
79            | Inst::ReturnCallUnknown { .. }
80            | Inst::PatchableCallKnown { .. }
81            | Inst::CheckedSRemSeq { .. }
82            | Inst::CheckedSRemSeq8 { .. }
83            | Inst::CvtFloatToSintSeq { .. }
84            | Inst::CvtFloatToUintSeq { .. }
85            | Inst::CvtUint64ToFloatSeq { .. }
86            | Inst::JmpCond { .. }
87            | Inst::JmpCondOr { .. }
88            | Inst::WinchJmpIf { .. }
89            | Inst::JmpKnown { .. }
90            | Inst::JmpTableSeq { .. }
91            | Inst::LoadExtName { .. }
92            | Inst::MovFromPReg { .. }
93            | Inst::MovToPReg { .. }
94            | Inst::StackProbeLoop { .. }
95            | Inst::Args { .. }
96            | Inst::Rets { .. }
97            | Inst::StackSwitchBasic { .. }
98            | Inst::TrapIf { .. }
99            | Inst::TrapIfAnd { .. }
100            | Inst::TrapIfOr { .. }
101            | Inst::XmmCmove { .. }
102            | Inst::XmmMinMaxSeq { .. }
103            | Inst::XmmUninitializedValue { .. }
104            | Inst::GprUninitializedValue { .. }
105            | Inst::ElfTlsGetAddr { .. }
106            | Inst::MachOTlsGetAddr { .. }
107            | Inst::CoffTlsGetAddr { .. }
108            | Inst::Unwind { .. }
109            | Inst::DummyUse { .. }
110            | Inst::LabelAddress { .. }
111            | Inst::SequencePoint => true,
112
113            Inst::Atomic128RmwSeq { .. } | Inst::Atomic128XchgSeq { .. } => emit_info.cmpxchg16b(),
114
115            Inst::External { inst } => inst.is_available(&emit_info),
116        }
117    }
118}
119
120// Handy constructors for Insts.
121
122impl Inst {
123    pub(crate) fn nop(len: u8) -> Self {
124        assert!(len > 0 && len <= 9);
125        let inst = match len {
126            1 => asm::inst::nop_1b::new().into(),
127            2 => asm::inst::nop_2b::new().into(),
128            3 => asm::inst::nop_3b::new().into(),
129            4 => asm::inst::nop_4b::new().into(),
130            5 => asm::inst::nop_5b::new().into(),
131            6 => asm::inst::nop_6b::new().into(),
132            7 => asm::inst::nop_7b::new().into(),
133            8 => asm::inst::nop_8b::new().into(),
134            9 => asm::inst::nop_9b::new().into(),
135            _ => unreachable!("nop length must be between 1 and 9"),
136        };
137        Self::External { inst }
138    }
139
140    pub(crate) fn addq_mi(dst: Writable<Reg>, simm32: i32) -> Self {
141        let inst = if let Ok(simm8) = i8::try_from(simm32) {
142            asm::inst::addq_mi_sxb::new(dst, simm8).into()
143        } else {
144            asm::inst::addq_mi_sxl::new(dst, simm32).into()
145        };
146        Inst::External { inst }
147    }
148
149    pub(crate) fn subq_mi(dst: Writable<Reg>, simm32: i32) -> Self {
150        let inst = if let Ok(simm8) = i8::try_from(simm32) {
151            asm::inst::subq_mi_sxb::new(dst, simm8).into()
152        } else {
153            asm::inst::subq_mi_sxl::new(dst, simm32).into()
154        };
155        Inst::External { inst }
156    }
157
158    /// Writes the `simm64` immedaite into `dst`.
159    ///
160    /// Note that if `dst_size` is less than 64-bits then the upper bits of
161    /// `simm64` will be converted to zero.
162    pub fn imm(dst_size: OperandSize, simm64: u64, dst: Writable<Reg>) -> Inst {
163        debug_assert!(dst_size.is_one_of(&[OperandSize::Size32, OperandSize::Size64]));
164        debug_assert!(dst.to_reg().class() == RegClass::Int);
165        let dst = WritableGpr::from_writable_reg(dst).unwrap();
166        let inst = match dst_size {
167            OperandSize::Size64 => match u32::try_from(simm64) {
168                // If `simm64` is zero-extended use `movl` which zeros the
169                // upper bits.
170                Ok(imm32) => asm::inst::movl_oi::new(dst, imm32).into(),
171                _ => match i32::try_from(simm64.cast_signed()) {
172                    // If `simm64` is sign-extended use `movq` which sign the
173                    // upper bits.
174                    Ok(simm32) => asm::inst::movq_mi_sxl::new(dst, simm32).into(),
175                    // fall back to embedding the entire immediate.
176                    _ => asm::inst::movabsq_oi::new(dst, simm64).into(),
177                },
178            },
179            // FIXME: the input to this function is a logical `simm64` stored
180            // as `u64`. That means that ideally what we would do here is cast
181            // the `simm64` to an `i64`, perform a `i32::try_from()`, then cast
182            // that back to `u32`. That would ensure that the immediate loses
183            // no meaning and has the same logical value. Currently though
184            // Cranelift relies on discarding the upper bits because literals
185            // like `0x8000_0000_u64` fail to convert to an `i32`. In theory
186            // the input to this function should change to `i64`. In the
187            // meantime this is documented as discarding the upper bits,
188            // although this is an old function so that's unlikely to help
189            // much.
190            _ => asm::inst::movl_oi::new(dst, simm64 as u32).into(),
191        };
192        Inst::External { inst }
193    }
194
195    pub(crate) fn movzx_rm_r(ext_mode: ExtMode, src: RegMem, dst: Writable<Reg>) -> Inst {
196        src.assert_regclass_is(RegClass::Int);
197        debug_assert!(dst.to_reg().class() == RegClass::Int);
198        let src = match src {
199            RegMem::Reg { reg } => asm::GprMem::Gpr(Gpr::new(reg).unwrap()),
200            RegMem::Mem { addr } => asm::GprMem::Mem(addr.into()),
201        };
202        let inst = match ext_mode {
203            ExtMode::BL => asm::inst::movzbl_rm::new(dst, src).into(),
204            ExtMode::BQ => asm::inst::movzbq_rm::new(dst, src).into(),
205            ExtMode::WL => asm::inst::movzwl_rm::new(dst, src).into(),
206            ExtMode::WQ => asm::inst::movzwq_rm::new(dst, src).into(),
207            ExtMode::LQ => {
208                // This instruction selection may seem strange but is correct in
209                // 64-bit mode: section 3.4.1.1 of the Intel manual says that
210                // "32-bit operands generate a 32-bit result, zero-extended to a
211                // 64-bit result in the destination general-purpose register."
212                // This is applicable beyond `mov` but we use this fact to
213                // zero-extend `src` into `dst`.
214                asm::inst::movl_rm::new(dst, src).into()
215            }
216        };
217        Inst::External { inst }
218    }
219
220    pub(crate) fn movsx_rm_r(ext_mode: ExtMode, src: RegMem, dst: Writable<Reg>) -> Inst {
221        src.assert_regclass_is(RegClass::Int);
222        debug_assert!(dst.to_reg().class() == RegClass::Int);
223        let src = match src {
224            RegMem::Reg { reg } => asm::GprMem::Gpr(Gpr::new(reg).unwrap()),
225            RegMem::Mem { addr } => asm::GprMem::Mem(addr.into()),
226        };
227        let inst = match ext_mode {
228            ExtMode::BL => asm::inst::movsbl_rm::new(dst, src).into(),
229            ExtMode::BQ => asm::inst::movsbq_rm::new(dst, src).into(),
230            ExtMode::WL => asm::inst::movswl_rm::new(dst, src).into(),
231            ExtMode::WQ => asm::inst::movswq_rm::new(dst, src).into(),
232            ExtMode::LQ => asm::inst::movslq_rm::new(dst, src).into(),
233        };
234        Inst::External { inst }
235    }
236
237    /// Compares `src1` against `src2`
238    pub(crate) fn cmp_mi_sxb(size: OperandSize, src1: Gpr, src2: i8) -> Inst {
239        let inst = match size {
240            OperandSize::Size8 => asm::inst::cmpb_mi::new(src1, src2.cast_unsigned()).into(),
241            OperandSize::Size16 => asm::inst::cmpw_mi_sxb::new(src1, src2).into(),
242            OperandSize::Size32 => asm::inst::cmpl_mi_sxb::new(src1, src2).into(),
243            OperandSize::Size64 => asm::inst::cmpq_mi_sxb::new(src1, src2).into(),
244        };
245        Inst::External { inst }
246    }
247
248    pub(crate) fn trap_if(cc: CC, trap_code: TrapCode) -> Inst {
249        Inst::TrapIf { cc, trap_code }
250    }
251
252    pub(crate) fn call_known(info: Box<CallInfo<ExternalName>>) -> Inst {
253        Inst::CallKnown { info }
254    }
255
256    pub(crate) fn call_unknown(info: Box<CallInfo<RegMem>>) -> Inst {
257        info.dest.assert_regclass_is(RegClass::Int);
258        Inst::CallUnknown { info }
259    }
260
261    pub(crate) fn jmp_known(dst: MachLabel) -> Inst {
262        Inst::JmpKnown { dst }
263    }
264
265    /// Choose which instruction to use for loading a register value from memory. For loads smaller
266    /// than 64 bits, this method expects a way to extend the value (i.e. [ExtKind::SignExtend],
267    /// [ExtKind::ZeroExtend]); loads with no extension necessary will ignore this.
268    pub(crate) fn load(
269        ty: Type,
270        from_addr: impl Into<SyntheticAmode>,
271        to_reg: Writable<Reg>,
272        ext_kind: ExtKind,
273    ) -> Inst {
274        let rc = to_reg.to_reg().class();
275        match rc {
276            RegClass::Int => {
277                let ext_mode = match ty.bytes() {
278                    1 => Some(ExtMode::BQ),
279                    2 => Some(ExtMode::WQ),
280                    4 => Some(ExtMode::LQ),
281                    8 => None,
282                    _ => unreachable!("the type should never use a scalar load: {}", ty),
283                };
284                if let Some(ext_mode) = ext_mode {
285                    // Values smaller than 64 bits must be extended in some way.
286                    match ext_kind {
287                        ExtKind::SignExtend => {
288                            Inst::movsx_rm_r(ext_mode, RegMem::mem(from_addr), to_reg)
289                        }
290                        ExtKind::ZeroExtend => {
291                            Inst::movzx_rm_r(ext_mode, RegMem::mem(from_addr), to_reg)
292                        }
293                        ExtKind::None => {
294                            panic!("expected an extension kind for extension mode: {ext_mode:?}")
295                        }
296                    }
297                } else {
298                    // 64-bit values can be moved directly.
299                    let from_addr = asm::GprMem::from(from_addr.into());
300                    Inst::External {
301                        inst: asm::inst::movq_rm::new(to_reg, from_addr).into(),
302                    }
303                }
304            }
305            RegClass::Float => {
306                let to_reg = to_reg.map(|r| Xmm::new(r).unwrap());
307                let from_addr = from_addr.into();
308                let inst = match ty {
309                    types::F16 | types::I8X2 => {
310                        panic!("loading a f16 or i8x2 requires multiple instructions")
311                    }
312                    _ if (ty.is_float() || ty.is_vector()) && ty.bits() == 32 => {
313                        asm::inst::movss_a_m::new(to_reg, from_addr).into()
314                    }
315                    _ if (ty.is_float() || ty.is_vector()) && ty.bits() == 64 => {
316                        asm::inst::movsd_a_m::new(to_reg, from_addr).into()
317                    }
318                    types::F32X4 => asm::inst::movups_a::new(to_reg, from_addr).into(),
319                    types::F64X2 => asm::inst::movupd_a::new(to_reg, from_addr).into(),
320                    _ if (ty.is_float() || ty.is_vector()) && ty.bits() == 128 => {
321                        asm::inst::movdqu_a::new(to_reg, from_addr).into()
322                    }
323                    _ => unimplemented!("unable to load type: {}", ty),
324                };
325                Inst::External { inst }
326            }
327            RegClass::Vector => unreachable!(),
328        }
329    }
330
331    /// Choose which instruction to use for storing a register value to memory.
332    pub(crate) fn store(ty: Type, from_reg: Reg, to_addr: impl Into<SyntheticAmode>) -> Inst {
333        let rc = from_reg.class();
334        let to_addr = to_addr.into();
335        let inst = match rc {
336            RegClass::Int => {
337                let from_reg = Gpr::unwrap_new(from_reg);
338                match ty {
339                    types::I8 => asm::inst::movb_mr::new(to_addr, from_reg).into(),
340                    types::I16 => asm::inst::movw_mr::new(to_addr, from_reg).into(),
341                    types::I32 => asm::inst::movl_mr::new(to_addr, from_reg).into(),
342                    types::I64 => asm::inst::movq_mr::new(to_addr, from_reg).into(),
343                    _ => unreachable!(),
344                }
345            }
346            RegClass::Float => {
347                let from_reg = Xmm::new(from_reg).unwrap();
348                match ty {
349                    types::F16 | types::I8X2 => {
350                        panic!("storing a f16 or i8x2 requires multiple instructions")
351                    }
352                    _ if (ty.is_float() || ty.is_vector()) && ty.bits() == 32 => {
353                        asm::inst::movss_c_m::new(to_addr, from_reg).into()
354                    }
355                    _ if (ty.is_float() || ty.is_vector()) && ty.bits() == 64 => {
356                        asm::inst::movsd_c_m::new(to_addr, from_reg).into()
357                    }
358                    types::F32X4 => asm::inst::movups_b::new(to_addr, from_reg).into(),
359                    types::F64X2 => asm::inst::movupd_b::new(to_addr, from_reg).into(),
360                    _ if (ty.is_float() || ty.is_vector()) && ty.bits() == 128 => {
361                        asm::inst::movdqu_b::new(to_addr, from_reg).into()
362                    }
363                    _ => unimplemented!("unable to store type: {}", ty),
364                }
365            }
366            RegClass::Vector => unreachable!(),
367        };
368        Inst::External { inst }
369    }
370}
371
372//=============================================================================
373// Instructions: printing
374
375impl PrettyPrint for Inst {
376    fn pretty_print(&self, _size: u8) -> String {
377        fn ljustify(s: String) -> String {
378            let w = 7;
379            if s.len() >= w {
380                s
381            } else {
382                let need = usize::min(w, w - s.len());
383                s + &format!("{nil: <width$}", nil = "", width = need)
384            }
385        }
386
387        fn ljustify2(s1: String, s2: String) -> String {
388            ljustify(s1 + &s2)
389        }
390
391        match self {
392            Inst::CheckedSRemSeq {
393                size,
394                divisor,
395                dividend_lo,
396                dividend_hi,
397                dst_quotient,
398                dst_remainder,
399            } => {
400                let divisor = pretty_print_reg(divisor.to_reg(), size.to_bytes());
401                let dividend_lo = pretty_print_reg(dividend_lo.to_reg(), size.to_bytes());
402                let dividend_hi = pretty_print_reg(dividend_hi.to_reg(), size.to_bytes());
403                let dst_quotient =
404                    pretty_print_reg(dst_quotient.to_reg().to_reg(), size.to_bytes());
405                let dst_remainder =
406                    pretty_print_reg(dst_remainder.to_reg().to_reg(), size.to_bytes());
407                format!(
408                    "checked_srem_seq {dividend_lo}, {dividend_hi}, \
409                        {divisor}, {dst_quotient}, {dst_remainder}",
410                )
411            }
412
413            Inst::CheckedSRemSeq8 {
414                divisor,
415                dividend,
416                dst,
417            } => {
418                let divisor = pretty_print_reg(divisor.to_reg(), 1);
419                let dividend = pretty_print_reg(dividend.to_reg(), 1);
420                let dst = pretty_print_reg(dst.to_reg().to_reg(), 1);
421                format!("checked_srem_seq {dividend}, {divisor}, {dst}")
422            }
423
424            Inst::XmmMinMaxSeq {
425                lhs,
426                rhs,
427                dst,
428                is_min,
429                size,
430            } => {
431                let rhs = pretty_print_reg(rhs.to_reg(), 8);
432                let lhs = pretty_print_reg(lhs.to_reg(), 8);
433                let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);
434                let op = ljustify2(
435                    if *is_min {
436                        "xmm min seq ".to_string()
437                    } else {
438                        "xmm max seq ".to_string()
439                    },
440                    format!("f{}", size.to_bits()),
441                );
442                format!("{op} {lhs}, {rhs}, {dst}")
443            }
444
445            Inst::XmmUninitializedValue { dst } => {
446                let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);
447                let op = ljustify("uninit".into());
448                format!("{op} {dst}")
449            }
450
451            Inst::GprUninitializedValue { dst } => {
452                let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);
453                let op = ljustify("uninit".into());
454                format!("{op} {dst}")
455            }
456
457            Inst::CvtUint64ToFloatSeq {
458                src,
459                dst,
460                dst_size,
461                tmp_gpr1,
462                tmp_gpr2,
463                ..
464            } => {
465                let src = pretty_print_reg(src.to_reg(), 8);
466                let dst = pretty_print_reg(dst.to_reg().to_reg(), dst_size.to_bytes());
467                let tmp_gpr1 = pretty_print_reg(tmp_gpr1.to_reg().to_reg(), 8);
468                let tmp_gpr2 = pretty_print_reg(tmp_gpr2.to_reg().to_reg(), 8);
469                let op = ljustify(format!(
470                    "u64_to_{}_seq",
471                    if *dst_size == OperandSize::Size64 {
472                        "f64"
473                    } else {
474                        "f32"
475                    }
476                ));
477                format!("{op} {src}, {dst}, {tmp_gpr1}, {tmp_gpr2}")
478            }
479
480            Inst::CvtFloatToSintSeq {
481                src,
482                dst,
483                src_size,
484                dst_size,
485                tmp_xmm,
486                tmp_gpr,
487                is_saturating,
488            } => {
489                let src = pretty_print_reg(src.to_reg(), src_size.to_bytes());
490                let dst = pretty_print_reg(dst.to_reg().to_reg(), dst_size.to_bytes());
491                let tmp_gpr = pretty_print_reg(tmp_gpr.to_reg().to_reg(), 8);
492                let tmp_xmm = pretty_print_reg(tmp_xmm.to_reg().to_reg(), 8);
493                let op = ljustify(format!(
494                    "cvt_float{}_to_sint{}{}_seq",
495                    src_size.to_bits(),
496                    dst_size.to_bits(),
497                    if *is_saturating { "_sat" } else { "" },
498                ));
499                format!("{op} {src}, {dst}, {tmp_gpr}, {tmp_xmm}")
500            }
501
502            Inst::CvtFloatToUintSeq {
503                src,
504                dst,
505                src_size,
506                dst_size,
507                tmp_gpr,
508                tmp_xmm,
509                tmp_xmm2,
510                is_saturating,
511            } => {
512                let src = pretty_print_reg(src.to_reg(), src_size.to_bytes());
513                let dst = pretty_print_reg(dst.to_reg().to_reg(), dst_size.to_bytes());
514                let tmp_gpr = pretty_print_reg(tmp_gpr.to_reg().to_reg(), 8);
515                let tmp_xmm = pretty_print_reg(tmp_xmm.to_reg().to_reg(), 8);
516                let tmp_xmm2 = pretty_print_reg(tmp_xmm2.to_reg().to_reg(), 8);
517                let op = ljustify(format!(
518                    "cvt_float{}_to_uint{}{}_seq",
519                    src_size.to_bits(),
520                    dst_size.to_bits(),
521                    if *is_saturating { "_sat" } else { "" },
522                ));
523                format!("{op} {src}, {dst}, {tmp_gpr}, {tmp_xmm}, {tmp_xmm2}")
524            }
525
526            Inst::MovFromPReg { src, dst } => {
527                let src: Reg = (*src).into();
528                let src = pretty_print_reg(src, 8);
529                let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);
530                let op = ljustify("movq".to_string());
531                format!("{op} {src}, {dst}")
532            }
533
534            Inst::MovToPReg { src, dst } => {
535                let src = pretty_print_reg(src.to_reg(), 8);
536                let dst: Reg = (*dst).into();
537                let dst = pretty_print_reg(dst, 8);
538                let op = ljustify("movq".to_string());
539                format!("{op} {src}, {dst}")
540            }
541
542            Inst::XmmCmove {
543                ty,
544                cc,
545                consequent,
546                alternative,
547                dst,
548                ..
549            } => {
550                let size = u8::try_from(ty.bytes()).unwrap();
551                let alternative = pretty_print_reg(alternative.to_reg(), size);
552                let dst = pretty_print_reg(dst.to_reg().to_reg(), size);
553                let consequent = pretty_print_reg(consequent.to_reg(), size);
554                let suffix = match *ty {
555                    types::F64 => "sd",
556                    types::F32 => "ss",
557                    types::F16 => "ss",
558                    types::F32X4 => "aps",
559                    types::F64X2 => "apd",
560                    _ => "dqa",
561                };
562                let cc = cc.invert();
563                format!(
564                    "mov{suffix} {alternative}, {dst}; \
565                    j{cc} $next; \
566                    mov{suffix} {consequent}, {dst}; \
567                    $next:"
568                )
569            }
570
571            Inst::StackProbeLoop {
572                tmp,
573                frame_size,
574                guard_size,
575            } => {
576                let tmp = pretty_print_reg(tmp.to_reg(), 8);
577                let op = ljustify("stack_probe_loop".to_string());
578                format!("{op} {tmp}, frame_size={frame_size}, guard_size={guard_size}")
579            }
580
581            Inst::CallKnown { info } => {
582                let op = ljustify("call".to_string());
583                let try_call = info
584                    .try_call_info
585                    .as_ref()
586                    .map(|tci| pretty_print_try_call(tci))
587                    .unwrap_or_default();
588                format!("{op} {:?}{try_call}", info.dest)
589            }
590
591            Inst::CallUnknown { info } => {
592                let dest = info.dest.pretty_print(8);
593                let op = ljustify("call".to_string());
594                let try_call = info
595                    .try_call_info
596                    .as_ref()
597                    .map(|tci| pretty_print_try_call(tci))
598                    .unwrap_or_default();
599                format!("{op} *{dest}{try_call}")
600            }
601
602            Inst::ReturnCallKnown { info } => {
603                let ReturnCallInfo {
604                    uses,
605                    new_stack_arg_size,
606                    tmp,
607                    dest,
608                } = &**info;
609                let tmp = pretty_print_reg(tmp.to_reg().to_reg(), 8);
610                let mut s = format!("return_call_known {dest:?} ({new_stack_arg_size}) tmp={tmp}");
611                for ret in uses {
612                    let preg = pretty_print_reg(ret.preg, 8);
613                    let vreg = pretty_print_reg(ret.vreg, 8);
614                    write!(&mut s, " {vreg}={preg}").unwrap();
615                }
616                s
617            }
618
619            Inst::ReturnCallUnknown { info } => {
620                let ReturnCallInfo {
621                    uses,
622                    new_stack_arg_size,
623                    tmp,
624                    dest,
625                } = &**info;
626                let callee = pretty_print_reg(*dest, 8);
627                let tmp = pretty_print_reg(tmp.to_reg().to_reg(), 8);
628                let mut s =
629                    format!("return_call_unknown {callee} ({new_stack_arg_size}) tmp={tmp}");
630                for ret in uses {
631                    let preg = pretty_print_reg(ret.preg, 8);
632                    let vreg = pretty_print_reg(ret.vreg, 8);
633                    write!(&mut s, " {vreg}={preg}").unwrap();
634                }
635                s
636            }
637
638            Inst::Args { args } => {
639                let mut s = "args".to_string();
640                for arg in args {
641                    let preg = pretty_print_reg(arg.preg, 8);
642                    let def = pretty_print_reg(arg.vreg.to_reg(), 8);
643                    write!(&mut s, " {def}={preg}").unwrap();
644                }
645                s
646            }
647
648            Inst::PatchableCallKnown { info } => {
649                let op = ljustify("patchable_call".to_string());
650                format!("{op} {:?}", info.dest)
651            }
652
653            Inst::Rets { rets } => {
654                let mut s = "rets".to_string();
655                for ret in rets {
656                    let preg = pretty_print_reg(ret.preg, 8);
657                    let vreg = pretty_print_reg(ret.vreg, 8);
658                    write!(&mut s, " {vreg}={preg}").unwrap();
659                }
660                s
661            }
662
663            Inst::StackSwitchBasic {
664                store_context_ptr,
665                load_context_ptr,
666                in_payload0,
667                out_payload0,
668            } => {
669                let store_context_ptr = pretty_print_reg(**store_context_ptr, 8);
670                let load_context_ptr = pretty_print_reg(**load_context_ptr, 8);
671                let in_payload0 = pretty_print_reg(**in_payload0, 8);
672                let out_payload0 = pretty_print_reg(*out_payload0.to_reg(), 8);
673                format!(
674                    "{out_payload0} = stack_switch_basic {store_context_ptr}, {load_context_ptr}, {in_payload0}"
675                )
676            }
677
678            Inst::JmpKnown { dst } => {
679                let op = ljustify("jmp".to_string());
680                let dst = dst.to_string();
681                format!("{op} {dst}")
682            }
683
684            Inst::WinchJmpIf { cc, taken } => {
685                let taken = taken.to_string();
686                let op = ljustify2("j".to_string(), cc.to_string());
687                format!("{op} {taken}")
688            }
689
690            Inst::JmpCondOr {
691                cc1,
692                cc2,
693                taken,
694                not_taken,
695            } => {
696                let taken = taken.to_string();
697                let not_taken = not_taken.to_string();
698                let op = ljustify(format!("j{cc1},{cc2}"));
699                format!("{op} {taken}; j {not_taken}")
700            }
701
702            Inst::JmpCond {
703                cc,
704                taken,
705                not_taken,
706            } => {
707                let taken = taken.to_string();
708                let not_taken = not_taken.to_string();
709                let op = ljustify2("j".to_string(), cc.to_string());
710                format!("{op} {taken}; j {not_taken}")
711            }
712
713            Inst::JmpTableSeq {
714                idx, tmp1, tmp2, ..
715            } => {
716                let idx = pretty_print_reg(*idx, 8);
717                let tmp1 = pretty_print_reg(tmp1.to_reg(), 8);
718                let tmp2 = pretty_print_reg(tmp2.to_reg(), 8);
719                let op = ljustify("br_table".into());
720                format!("{op} {idx}, {tmp1}, {tmp2}")
721            }
722
723            Inst::TrapIf { cc, trap_code, .. } => {
724                format!("j{cc} #trap={trap_code}")
725            }
726
727            Inst::TrapIfAnd {
728                cc1,
729                cc2,
730                trap_code,
731                ..
732            } => {
733                let cc1 = cc1.invert();
734                let cc2 = cc2.invert();
735                format!("trap_if_and {cc1}, {cc2}, {trap_code}")
736            }
737
738            Inst::TrapIfOr {
739                cc1,
740                cc2,
741                trap_code,
742                ..
743            } => {
744                let cc2 = cc2.invert();
745                format!("trap_if_or {cc1}, {cc2}, {trap_code}")
746            }
747
748            Inst::LoadExtName {
749                dst, name, offset, ..
750            } => {
751                let dst = pretty_print_reg(*dst.to_reg(), 8);
752                let name = name.display(None);
753                let op = ljustify("load_ext_name".into());
754                format!("{op} {name}+{offset}, {dst}")
755            }
756
757            Inst::AtomicRmwSeq { ty, op, .. } => {
758                let ty = ty.bits();
759                format!(
760                    "atomically {{ {ty}_bits_at_[%r9] {op:?}= %r10; %rax = old_value_at_[%r9]; %r11, %rflags = trash }}"
761                )
762            }
763
764            Inst::Atomic128RmwSeq {
765                op,
766                mem,
767                operand_low,
768                operand_high,
769                temp_low,
770                temp_high,
771                dst_old_low,
772                dst_old_high,
773            } => {
774                let operand_low = pretty_print_reg(**operand_low, 8);
775                let operand_high = pretty_print_reg(**operand_high, 8);
776                let temp_low = pretty_print_reg(*temp_low.to_reg(), 8);
777                let temp_high = pretty_print_reg(*temp_high.to_reg(), 8);
778                let dst_old_low = pretty_print_reg(*dst_old_low.to_reg(), 8);
779                let dst_old_high = pretty_print_reg(*dst_old_high.to_reg(), 8);
780                let mem = mem.pretty_print(16);
781                format!(
782                    "atomically {{ {dst_old_high}:{dst_old_low} = {mem}; {temp_high}:{temp_low} = {dst_old_high}:{dst_old_low} {op:?} {operand_high}:{operand_low}; {mem} = {temp_high}:{temp_low} }}"
783                )
784            }
785
786            Inst::Atomic128XchgSeq {
787                mem,
788                operand_low,
789                operand_high,
790                dst_old_low,
791                dst_old_high,
792            } => {
793                let operand_low = pretty_print_reg(**operand_low, 8);
794                let operand_high = pretty_print_reg(**operand_high, 8);
795                let dst_old_low = pretty_print_reg(*dst_old_low.to_reg(), 8);
796                let dst_old_high = pretty_print_reg(*dst_old_high.to_reg(), 8);
797                let mem = mem.pretty_print(16);
798                format!(
799                    "atomically {{ {dst_old_high}:{dst_old_low} = {mem}; {mem} = {operand_high}:{operand_low} }}"
800                )
801            }
802
803            Inst::ElfTlsGetAddr { symbol, dst } => {
804                let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);
805                format!("{dst} = elf_tls_get_addr {symbol:?}")
806            }
807
808            Inst::MachOTlsGetAddr { symbol, dst } => {
809                let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);
810                format!("{dst} = macho_tls_get_addr {symbol:?}")
811            }
812
813            Inst::CoffTlsGetAddr { symbol, dst, tmp } => {
814                let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);
815                let tmp = tmp.to_reg().to_reg();
816
817                let mut s = format!("{dst} = coff_tls_get_addr {symbol:?}");
818                if tmp.is_virtual() {
819                    let tmp = pretty_print_reg(tmp, 8);
820                    write!(&mut s, ", {tmp}").unwrap();
821                };
822
823                s
824            }
825
826            Inst::Unwind { inst } => format!("unwind {inst:?}"),
827
828            Inst::DummyUse { reg } => {
829                let reg = pretty_print_reg(*reg, 8);
830                format!("dummy_use {reg}")
831            }
832
833            Inst::LabelAddress { dst, label } => {
834                let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);
835                format!("label_address {dst}, {label:?}")
836            }
837
838            Inst::SequencePoint {} => {
839                format!("sequence_point")
840            }
841
842            Inst::External { inst } => {
843                format!("{inst}")
844            }
845        }
846    }
847}
848
849fn pretty_print_try_call(info: &TryCallInfo) -> String {
850    format!(
851        "; jmp {:?}; catch [{}]",
852        info.continuation,
853        info.pretty_print_dests()
854    )
855}
856
857impl fmt::Debug for Inst {
858    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
859        write!(fmt, "{}", self.pretty_print_inst(&mut Default::default()))
860    }
861}
862
863fn x64_get_operands(inst: &mut Inst, collector: &mut impl OperandVisitor) {
864    // Note: because we need to statically know the indices of each
865    // reg in the operands list in order to fetch its allocation
866    // later, we put the variable-operand-count bits (the RegMem,
867    // RegMemImm, etc args) last. regalloc2 doesn't care what order
868    // the operands come in; they can be freely reordered.
869
870    // N.B.: we MUST keep the below in careful sync with (i) emission,
871    // in `emit.rs`, and (ii) pretty-printing, in the `pretty_print`
872    // method above.
873    match inst {
874        Inst::CheckedSRemSeq {
875            divisor,
876            dividend_lo,
877            dividend_hi,
878            dst_quotient,
879            dst_remainder,
880            ..
881        } => {
882            collector.reg_use(divisor);
883            collector.reg_fixed_use(dividend_lo, regs::rax());
884            collector.reg_fixed_use(dividend_hi, regs::rdx());
885            collector.reg_fixed_def(dst_quotient, regs::rax());
886            collector.reg_fixed_def(dst_remainder, regs::rdx());
887        }
888        Inst::CheckedSRemSeq8 {
889            divisor,
890            dividend,
891            dst,
892            ..
893        } => {
894            collector.reg_use(divisor);
895            collector.reg_fixed_use(dividend, regs::rax());
896            collector.reg_fixed_def(dst, regs::rax());
897        }
898        Inst::XmmUninitializedValue { dst } => collector.reg_def(dst),
899        Inst::GprUninitializedValue { dst } => collector.reg_def(dst),
900        Inst::XmmMinMaxSeq { lhs, rhs, dst, .. } => {
901            collector.reg_use(rhs);
902            collector.reg_use(lhs);
903            collector.reg_reuse_def(dst, 0); // Reuse RHS.
904        }
905        Inst::MovFromPReg { dst, src } => {
906            debug_assert!(dst.to_reg().to_reg().is_virtual());
907            collector.reg_fixed_nonallocatable(*src);
908            collector.reg_def(dst);
909        }
910        Inst::MovToPReg { dst, src } => {
911            debug_assert!(src.to_reg().is_virtual());
912            collector.reg_use(src);
913            collector.reg_fixed_nonallocatable(*dst);
914        }
915        Inst::CvtUint64ToFloatSeq {
916            src,
917            dst,
918            tmp_gpr1,
919            tmp_gpr2,
920            ..
921        } => {
922            collector.reg_use(src);
923            collector.reg_early_def(dst);
924            collector.reg_early_def(tmp_gpr1);
925            collector.reg_early_def(tmp_gpr2);
926        }
927        Inst::CvtFloatToSintSeq {
928            src,
929            dst,
930            tmp_xmm,
931            tmp_gpr,
932            ..
933        } => {
934            collector.reg_use(src);
935            collector.reg_early_def(dst);
936            collector.reg_early_def(tmp_gpr);
937            collector.reg_early_def(tmp_xmm);
938        }
939        Inst::CvtFloatToUintSeq {
940            src,
941            dst,
942            tmp_gpr,
943            tmp_xmm,
944            tmp_xmm2,
945            ..
946        } => {
947            collector.reg_use(src);
948            collector.reg_early_def(dst);
949            collector.reg_early_def(tmp_gpr);
950            collector.reg_early_def(tmp_xmm);
951            collector.reg_early_def(tmp_xmm2);
952        }
953
954        Inst::XmmCmove {
955            consequent,
956            alternative,
957            dst,
958            ..
959        } => {
960            collector.reg_use(alternative);
961            collector.reg_reuse_def(dst, 0);
962            collector.reg_use(consequent);
963        }
964        Inst::StackProbeLoop { tmp, .. } => {
965            collector.reg_early_def(tmp);
966        }
967
968        Inst::CallKnown { info } | Inst::PatchableCallKnown { info } => {
969            // Probestack is special and is only inserted after
970            // regalloc, so we do not need to represent its ABI to the
971            // register allocator. Assert that we don't alter that
972            // arrangement.
973            let CallInfo {
974                uses,
975                defs,
976                clobbers,
977                dest,
978                try_call_info,
979                ..
980            } = &mut **info;
981            debug_assert_ne!(*dest, ExternalName::LibCall(LibCall::Probestack));
982            for CallArgPair { vreg, preg } in uses {
983                collector.reg_fixed_use(vreg, *preg);
984            }
985            for CallRetPair { vreg, location } in defs {
986                match location {
987                    RetLocation::Reg(preg, ..) => collector.reg_fixed_def(vreg, *preg),
988                    RetLocation::Stack(..) => collector.any_def(vreg),
989                }
990            }
991            collector.reg_clobbers(*clobbers);
992            if let Some(try_call_info) = try_call_info {
993                try_call_info.collect_operands(collector);
994            }
995        }
996
997        Inst::CallUnknown { info } => {
998            let CallInfo {
999                uses,
1000                defs,
1001                clobbers,
1002                callee_conv,
1003                dest,
1004                try_call_info,
1005                ..
1006            } = &mut **info;
1007            match dest {
1008                RegMem::Reg { reg } if *callee_conv == CallConv::Winch => {
1009                    // TODO(https://github.com/bytecodealliance/regalloc2/issues/145):
1010                    // This shouldn't be a fixed register constraint. r10 is caller-saved, so this
1011                    // should be safe to use.
1012                    collector.reg_fixed_use(reg, regs::r10());
1013                }
1014                _ => dest.get_operands(collector),
1015            }
1016            for CallArgPair { vreg, preg } in uses {
1017                collector.reg_fixed_use(vreg, *preg);
1018            }
1019            for CallRetPair { vreg, location } in defs {
1020                match location {
1021                    RetLocation::Reg(preg, ..) => collector.reg_fixed_def(vreg, *preg),
1022                    RetLocation::Stack(..) => collector.any_def(vreg),
1023                }
1024            }
1025            collector.reg_clobbers(*clobbers);
1026            if let Some(try_call_info) = try_call_info {
1027                try_call_info.collect_operands(collector);
1028            }
1029        }
1030        Inst::StackSwitchBasic {
1031            store_context_ptr,
1032            load_context_ptr,
1033            in_payload0,
1034            out_payload0,
1035        } => {
1036            collector.reg_use(load_context_ptr);
1037            collector.reg_use(store_context_ptr);
1038            collector.reg_fixed_use(in_payload0, stack_switch::payload_register());
1039            collector.reg_fixed_def(out_payload0, stack_switch::payload_register());
1040
1041            let mut clobbers = crate::isa::x64::abi::ALL_CLOBBERS;
1042            // The return/payload reg must not be included in the clobber set
1043            clobbers.remove(
1044                stack_switch::payload_register()
1045                    .to_real_reg()
1046                    .unwrap()
1047                    .into(),
1048            );
1049            collector.reg_clobbers(clobbers);
1050        }
1051
1052        Inst::ReturnCallKnown { info } => {
1053            let ReturnCallInfo {
1054                dest, uses, tmp, ..
1055            } = &mut **info;
1056            collector.reg_fixed_def(tmp, regs::r11());
1057            // Same as in the `Inst::CallKnown` branch.
1058            debug_assert_ne!(*dest, ExternalName::LibCall(LibCall::Probestack));
1059            for CallArgPair { vreg, preg } in uses {
1060                collector.reg_fixed_use(vreg, *preg);
1061            }
1062        }
1063
1064        Inst::ReturnCallUnknown { info } => {
1065            let ReturnCallInfo {
1066                dest, uses, tmp, ..
1067            } = &mut **info;
1068
1069            // TODO(https://github.com/bytecodealliance/regalloc2/issues/145):
1070            // This shouldn't be a fixed register constraint, but it's not clear how to
1071            // pick a register that won't be clobbered by the callee-save restore code
1072            // emitted with a return_call_indirect. r10 is caller-saved, so this should be
1073            // safe to use.
1074            collector.reg_fixed_use(dest, regs::r10());
1075
1076            collector.reg_fixed_def(tmp, regs::r11());
1077            for CallArgPair { vreg, preg } in uses {
1078                collector.reg_fixed_use(vreg, *preg);
1079            }
1080        }
1081
1082        Inst::JmpTableSeq {
1083            idx, tmp1, tmp2, ..
1084        } => {
1085            collector.reg_use(idx);
1086            collector.reg_early_def(tmp1);
1087            // In the sequence emitted for this pseudoinstruction in emit.rs,
1088            // tmp2 is only written after idx is read, so it doesn't need to be
1089            // an early def.
1090            collector.reg_def(tmp2);
1091        }
1092
1093        Inst::LoadExtName { dst, .. } => {
1094            collector.reg_def(dst);
1095        }
1096
1097        Inst::AtomicRmwSeq {
1098            operand,
1099            temp,
1100            dst_old,
1101            mem,
1102            ..
1103        } => {
1104            collector.reg_late_use(operand);
1105            collector.reg_early_def(temp);
1106            // This `fixed_def` is needed because `CMPXCHG` always uses this
1107            // register implicitly.
1108            collector.reg_fixed_def(dst_old, regs::rax());
1109            mem.get_operands_late(collector)
1110        }
1111
1112        Inst::Atomic128RmwSeq {
1113            operand_low,
1114            operand_high,
1115            temp_low,
1116            temp_high,
1117            dst_old_low,
1118            dst_old_high,
1119            mem,
1120            ..
1121        } => {
1122            // All registers are collected in the `Late` position so that they don't overlap.
1123            collector.reg_late_use(operand_low);
1124            collector.reg_late_use(operand_high);
1125            collector.reg_fixed_def(temp_low, regs::rbx());
1126            collector.reg_fixed_def(temp_high, regs::rcx());
1127            collector.reg_fixed_def(dst_old_low, regs::rax());
1128            collector.reg_fixed_def(dst_old_high, regs::rdx());
1129            mem.get_operands_late(collector)
1130        }
1131
1132        Inst::Atomic128XchgSeq {
1133            operand_low,
1134            operand_high,
1135            dst_old_low,
1136            dst_old_high,
1137            mem,
1138            ..
1139        } => {
1140            // All registers are collected in the `Late` position so that they don't overlap.
1141            collector.reg_fixed_late_use(operand_low, regs::rbx());
1142            collector.reg_fixed_late_use(operand_high, regs::rcx());
1143            collector.reg_fixed_def(dst_old_low, regs::rax());
1144            collector.reg_fixed_def(dst_old_high, regs::rdx());
1145            mem.get_operands_late(collector)
1146        }
1147
1148        Inst::Args { args } => {
1149            for ArgPair { vreg, preg } in args {
1150                collector.reg_fixed_def(vreg, *preg);
1151            }
1152        }
1153
1154        Inst::Rets { rets } => {
1155            // The return value(s) are live-out; we represent this
1156            // with register uses on the return instruction.
1157            for RetPair { vreg, preg } in rets {
1158                collector.reg_fixed_use(vreg, *preg);
1159            }
1160        }
1161
1162        Inst::JmpKnown { .. }
1163        | Inst::WinchJmpIf { .. }
1164        | Inst::JmpCond { .. }
1165        | Inst::JmpCondOr { .. }
1166        | Inst::TrapIf { .. }
1167        | Inst::TrapIfAnd { .. }
1168        | Inst::TrapIfOr { .. } => {
1169            // No registers are used.
1170        }
1171
1172        Inst::ElfTlsGetAddr { dst, .. } | Inst::MachOTlsGetAddr { dst, .. } => {
1173            collector.reg_fixed_def(dst, regs::rax());
1174            // All caller-saves are clobbered.
1175            //
1176            // We use the SysV calling convention here because the
1177            // pseudoinstruction (and relocation that it emits) is specific to
1178            // ELF systems; other x86-64 targets with other conventions (i.e.,
1179            // Windows) use different TLS strategies.
1180            let mut clobbers =
1181                X64ABIMachineSpec::get_regs_clobbered_by_call(CallConv::SystemV, false);
1182            clobbers.remove(regs::gpr_preg(asm::gpr::enc::RAX));
1183            collector.reg_clobbers(clobbers);
1184        }
1185
1186        Inst::CoffTlsGetAddr { dst, tmp, .. } => {
1187            // We also use the gs register. But that register is not allocatable by the
1188            // register allocator, so we don't need to mark it as used here.
1189
1190            // We use %rax to set the address
1191            collector.reg_fixed_def(dst, regs::rax());
1192
1193            // We use %rcx as a temporary variable to load the _tls_index
1194            collector.reg_fixed_def(tmp, regs::rcx());
1195        }
1196
1197        Inst::Unwind { .. } => {}
1198
1199        Inst::DummyUse { reg } => {
1200            collector.reg_use(reg);
1201        }
1202
1203        Inst::LabelAddress { dst, .. } => {
1204            collector.reg_def(dst);
1205        }
1206
1207        Inst::SequencePoint { .. } => {}
1208
1209        Inst::External { inst } => {
1210            inst.visit(&mut external::RegallocVisitor { collector });
1211        }
1212    }
1213}
1214
1215//=============================================================================
1216// Instructions: misc functions and external interface
1217
1218impl MachInst for Inst {
1219    type ABIMachineSpec = X64ABIMachineSpec;
1220
1221    fn get_operands(&mut self, collector: &mut impl OperandVisitor) {
1222        x64_get_operands(self, collector)
1223    }
1224
1225    fn is_move(&self) -> Option<(Writable<Reg>, Reg)> {
1226        use asm::inst::Inst as I;
1227        match self {
1228            // Note (carefully!) that a 32-bit mov *isn't* a no-op since it zeroes
1229            // out the upper 32 bits of the destination.  For example, we could
1230            // conceivably use `movl %reg, %reg` to zero out the top 32 bits of
1231            // %reg.
1232            Self::External {
1233                inst: I::movq_mr(asm::inst::movq_mr { rm64, r64 }),
1234            } => match rm64 {
1235                asm::GprMem::Gpr(reg) => Some((reg.map(|r| r.to_reg()), r64.as_ref().to_reg())),
1236                asm::GprMem::Mem(_) => None,
1237            },
1238            Self::External {
1239                inst: I::movq_rm(asm::inst::movq_rm { r64, rm64 }),
1240            } => match rm64 {
1241                asm::GprMem::Gpr(reg) => Some((r64.as_ref().map(|r| r.to_reg()), reg.to_reg())),
1242                asm::GprMem::Mem(_) => None,
1243            },
1244
1245            // Note that `movss_a_r` and `movsd_a_r` are specifically omitted
1246            // here because they only overwrite the low bits in the destination
1247            // register, otherwise preserving the upper bits. That can be used
1248            // for lane-insertion instructions, for example, meaning it's not
1249            // classified as a register move.
1250            //
1251            // Otherwise though all register-to-register movement instructions
1252            // which move 128-bits are registered as moves.
1253            Self::External {
1254                inst:
1255                    I::movaps_a(asm::inst::movaps_a { xmm1, xmm_m128 })
1256                    | I::movups_a(asm::inst::movups_a { xmm1, xmm_m128 })
1257                    | I::movapd_a(asm::inst::movapd_a { xmm1, xmm_m128 })
1258                    | I::movupd_a(asm::inst::movupd_a { xmm1, xmm_m128 })
1259                    | I::movdqa_a(asm::inst::movdqa_a { xmm1, xmm_m128 })
1260                    | I::movdqu_a(asm::inst::movdqu_a { xmm1, xmm_m128 }),
1261            } => match xmm_m128 {
1262                asm::XmmMem::Xmm(xmm2) => Some((xmm1.as_ref().map(|r| r.to_reg()), xmm2.to_reg())),
1263                asm::XmmMem::Mem(_) => None,
1264            },
1265            // In addition to the "A" format of instructions above also
1266            // recognize the "B" format which while it can be used for stores it
1267            // can also be used for register moves.
1268            Self::External {
1269                inst:
1270                    I::movaps_b(asm::inst::movaps_b { xmm_m128, xmm1 })
1271                    | I::movups_b(asm::inst::movups_b { xmm_m128, xmm1 })
1272                    | I::movapd_b(asm::inst::movapd_b { xmm_m128, xmm1 })
1273                    | I::movupd_b(asm::inst::movupd_b { xmm_m128, xmm1 })
1274                    | I::movdqa_b(asm::inst::movdqa_b { xmm_m128, xmm1 })
1275                    | I::movdqu_b(asm::inst::movdqu_b { xmm_m128, xmm1 }),
1276            } => match xmm_m128 {
1277                asm::XmmMem::Xmm(dst) => Some((dst.map(|r| r.to_reg()), xmm1.as_ref().to_reg())),
1278                asm::XmmMem::Mem(_) => None,
1279            },
1280            _ => None,
1281        }
1282    }
1283
1284    fn is_included_in_clobbers(&self) -> bool {
1285        match self {
1286            &Inst::Args { .. } => false,
1287            _ => true,
1288        }
1289    }
1290
1291    fn is_trap(&self) -> bool {
1292        match self {
1293            Self::External {
1294                inst: asm::inst::Inst::ud2_zo(..),
1295            } => true,
1296            _ => false,
1297        }
1298    }
1299
1300    fn is_args(&self) -> bool {
1301        match self {
1302            Self::Args { .. } => true,
1303            _ => false,
1304        }
1305    }
1306
1307    fn call_type(&self) -> CallType {
1308        match self {
1309            Inst::CallKnown { .. }
1310            | Inst::CallUnknown { .. }
1311            | Inst::ElfTlsGetAddr { .. }
1312            | Inst::MachOTlsGetAddr { .. } => CallType::Regular,
1313
1314            Inst::ReturnCallKnown { .. } | Inst::ReturnCallUnknown { .. } => CallType::TailCall,
1315
1316            _ => CallType::None,
1317        }
1318    }
1319
1320    fn is_term(&self) -> MachTerminator {
1321        match self {
1322            // Interesting cases.
1323            &Self::Rets { .. } => MachTerminator::Ret,
1324            &Self::ReturnCallKnown { .. } | &Self::ReturnCallUnknown { .. } => {
1325                MachTerminator::RetCall
1326            }
1327            &Self::JmpKnown { .. } => MachTerminator::Branch,
1328            &Self::JmpCond { .. } => MachTerminator::Branch,
1329            &Self::JmpCondOr { .. } => MachTerminator::Branch,
1330            &Self::JmpTableSeq { .. } => MachTerminator::Branch,
1331            &Self::CallKnown { ref info } if info.try_call_info.is_some() => MachTerminator::Branch,
1332            &Self::CallUnknown { ref info } if info.try_call_info.is_some() => {
1333                MachTerminator::Branch
1334            }
1335            // All other cases are boring.
1336            _ => MachTerminator::None,
1337        }
1338    }
1339
1340    fn is_low_level_branch(&self) -> bool {
1341        match self {
1342            &Self::WinchJmpIf { .. } => true,
1343            _ => false,
1344        }
1345    }
1346
1347    fn is_mem_access(&self) -> bool {
1348        panic!("TODO FILL ME OUT")
1349    }
1350
1351    fn gen_move(dst_reg: Writable<Reg>, src_reg: Reg, ty: Type) -> Inst {
1352        trace!(
1353            "Inst::gen_move {:?} -> {:?} (type: {:?})",
1354            src_reg,
1355            dst_reg.to_reg(),
1356            ty
1357        );
1358        let rc_dst = dst_reg.to_reg().class();
1359        let rc_src = src_reg.class();
1360        // If this isn't true, we have gone way off the rails.
1361        debug_assert!(rc_dst == rc_src);
1362        let inst = match rc_dst {
1363            RegClass::Int => {
1364                asm::inst::movq_mr::new(dst_reg.map(Gpr::unwrap_new), Gpr::unwrap_new(src_reg))
1365                    .into()
1366            }
1367            RegClass::Float => {
1368                // The Intel optimization manual, in "3.5.1.13 Zero-Latency MOV Instructions",
1369                // doesn't include MOVSS/MOVSD as instructions with zero-latency. Use movaps for
1370                // those, which may write more lanes that we need, but are specified to have
1371                // zero-latency.
1372                let dst_reg = dst_reg.map(|r| Xmm::new(r).unwrap());
1373                let src_reg = Xmm::new(src_reg).unwrap();
1374                match ty {
1375                    types::F16 | types::F32 | types::F64 | types::F32X4 => {
1376                        asm::inst::movaps_a::new(dst_reg, src_reg).into()
1377                    }
1378                    types::F64X2 => asm::inst::movapd_a::new(dst_reg, src_reg).into(),
1379                    _ if (ty.is_float() || ty.is_vector()) && ty.bits() <= 128 => {
1380                        asm::inst::movdqa_a::new(dst_reg, src_reg).into()
1381                    }
1382                    _ => unimplemented!("unable to move type: {}", ty),
1383                }
1384            }
1385            RegClass::Vector => unreachable!(),
1386        };
1387        Inst::External { inst }
1388    }
1389
1390    fn gen_nop(preferred_size: usize) -> Inst {
1391        Inst::nop(std::cmp::min(preferred_size, 9) as u8)
1392    }
1393
1394    fn gen_nop_unit() -> SmallVec<[u8; 8]> {
1395        smallvec![0x90]
1396    }
1397
1398    fn rc_for_type(ty: Type) -> CodegenResult<(&'static [RegClass], &'static [Type])> {
1399        match ty {
1400            types::I8 => Ok((&[RegClass::Int], &[types::I8])),
1401            types::I16 => Ok((&[RegClass::Int], &[types::I16])),
1402            types::I32 => Ok((&[RegClass::Int], &[types::I32])),
1403            types::I64 => Ok((&[RegClass::Int], &[types::I64])),
1404            types::F16 => Ok((&[RegClass::Float], &[types::F16])),
1405            types::F32 => Ok((&[RegClass::Float], &[types::F32])),
1406            types::F64 => Ok((&[RegClass::Float], &[types::F64])),
1407            types::F128 => Ok((&[RegClass::Float], &[types::F128])),
1408            types::I128 => Ok((&[RegClass::Int, RegClass::Int], &[types::I64, types::I64])),
1409            _ if ty.is_vector() && ty.bits() <= 128 => {
1410                let types = &[types::I8X2, types::I8X4, types::I8X8, types::I8X16];
1411                Ok((
1412                    &[RegClass::Float],
1413                    slice::from_ref(&types[ty.bytes().ilog2() as usize - 1]),
1414                ))
1415            }
1416            _ => Err(CodegenError::Unsupported(format!(
1417                "Unexpected SSA-value type: {ty}"
1418            ))),
1419        }
1420    }
1421
1422    fn canonical_type_for_rc(rc: RegClass) -> Type {
1423        match rc {
1424            RegClass::Float => types::I8X16,
1425            RegClass::Int => types::I64,
1426            RegClass::Vector => unreachable!(),
1427        }
1428    }
1429
1430    fn gen_jump(label: MachLabel) -> Inst {
1431        Inst::jmp_known(label)
1432    }
1433
1434    fn gen_imm_u64(value: u64, dst: Writable<Reg>) -> Option<Self> {
1435        Some(Inst::imm(OperandSize::Size64, value, dst))
1436    }
1437
1438    fn gen_imm_f64(value: f64, tmp: Writable<Reg>, dst: Writable<Reg>) -> SmallVec<[Self; 2]> {
1439        let imm_to_gpr = Inst::imm(OperandSize::Size64, value.to_bits(), tmp);
1440        let gpr_to_xmm = Inst::External {
1441            inst: asm::inst::movq_a::new(dst.map(|r| Xmm::new(r).unwrap()), tmp.to_reg()).into(),
1442        };
1443        smallvec![imm_to_gpr, gpr_to_xmm]
1444    }
1445
1446    fn gen_dummy_use(reg: Reg) -> Self {
1447        Inst::DummyUse { reg }
1448    }
1449
1450    fn worst_case_size() -> CodeOffset {
1451        15
1452    }
1453
1454    fn ref_type_regclass(_: &settings::Flags) -> RegClass {
1455        RegClass::Int
1456    }
1457
1458    fn is_safepoint(&self) -> bool {
1459        match self {
1460            Inst::CallKnown { .. } | Inst::CallUnknown { .. } | Inst::PatchableCallKnown { .. } => {
1461                true
1462            }
1463            _ => false,
1464        }
1465    }
1466
1467    fn function_alignment() -> FunctionAlignment {
1468        FunctionAlignment {
1469            minimum: 1,
1470            // Change the alignment from 16-bytes to 32-bytes for better performance.
1471            // fix-8573: https://github.com/bytecodealliance/wasmtime/issues/8573
1472            preferred: 32,
1473        }
1474    }
1475
1476    type LabelUse = LabelUse;
1477
1478    const TRAP_OPCODE: &'static [u8] = &[0x0f, 0x0b];
1479}
1480
1481/// Constant state used during emissions of a sequence of instructions.
1482pub struct EmitInfo {
1483    pub(super) flags: settings::Flags,
1484    isa_flags: x64_settings::Flags,
1485}
1486
1487impl EmitInfo {
1488    /// Create a constant state for emission of instructions.
1489    pub fn new(flags: settings::Flags, isa_flags: x64_settings::Flags) -> Self {
1490        Self { flags, isa_flags }
1491    }
1492}
1493
1494impl asm::AvailableFeatures for &EmitInfo {
1495    fn _64b(&self) -> bool {
1496        // Currently, this x64 backend always assumes 64-bit mode.
1497        true
1498    }
1499
1500    fn compat(&self) -> bool {
1501        // For 32-bit compatibility mode, see
1502        // https://github.com/bytecodealliance/wasmtime/issues/1980 (TODO).
1503        false
1504    }
1505
1506    fn sse(&self) -> bool {
1507        // Currently, this x64 backend always assumes SSE.
1508        true
1509    }
1510
1511    fn sse2(&self) -> bool {
1512        // Currently, this x64 backend always assumes SSE2.
1513        true
1514    }
1515
1516    fn sse3(&self) -> bool {
1517        self.isa_flags.has_sse3()
1518    }
1519
1520    fn ssse3(&self) -> bool {
1521        self.isa_flags.has_ssse3()
1522    }
1523
1524    fn sse41(&self) -> bool {
1525        self.isa_flags.has_sse41()
1526    }
1527
1528    fn sse42(&self) -> bool {
1529        self.isa_flags.has_sse42()
1530    }
1531
1532    fn bmi1(&self) -> bool {
1533        self.isa_flags.has_bmi1()
1534    }
1535
1536    fn bmi2(&self) -> bool {
1537        self.isa_flags.has_bmi2()
1538    }
1539
1540    fn lzcnt(&self) -> bool {
1541        self.isa_flags.has_lzcnt()
1542    }
1543
1544    fn popcnt(&self) -> bool {
1545        self.isa_flags.has_popcnt()
1546    }
1547
1548    fn avx(&self) -> bool {
1549        self.isa_flags.has_avx()
1550    }
1551
1552    fn avx2(&self) -> bool {
1553        self.isa_flags.has_avx2()
1554    }
1555
1556    fn avx512f(&self) -> bool {
1557        self.isa_flags.has_avx512f()
1558    }
1559
1560    fn avx512vl(&self) -> bool {
1561        self.isa_flags.has_avx512vl()
1562    }
1563
1564    fn cmpxchg16b(&self) -> bool {
1565        self.isa_flags.has_cmpxchg16b()
1566    }
1567
1568    fn fma(&self) -> bool {
1569        self.isa_flags.has_fma()
1570    }
1571
1572    fn avx512dq(&self) -> bool {
1573        self.isa_flags.has_avx512dq()
1574    }
1575
1576    fn avx512bitalg(&self) -> bool {
1577        self.isa_flags.has_avx512bitalg()
1578    }
1579
1580    fn avx512vbmi(&self) -> bool {
1581        self.isa_flags.has_avx512vbmi()
1582    }
1583}
1584
1585impl MachInstEmit for Inst {
1586    type State = EmitState;
1587    type Info = EmitInfo;
1588
1589    fn emit(&self, sink: &mut MachBuffer<Inst>, info: &Self::Info, state: &mut Self::State) {
1590        emit::emit(self, sink, info, state);
1591    }
1592
1593    fn pretty_print_inst(&self, _: &mut Self::State) -> String {
1594        PrettyPrint::pretty_print(self, 0)
1595    }
1596}
1597
1598/// A label-use (internal relocation) in generated code.
1599#[derive(Clone, Copy, Debug, PartialEq, Eq)]
1600pub enum LabelUse {
1601    /// A 32-bit offset from location of relocation itself, added to the existing value at that
1602    /// location. Used for control flow instructions which consider an offset from the start of the
1603    /// next instruction (so the size of the payload -- 4 bytes -- is subtracted from the payload).
1604    JmpRel32,
1605
1606    /// A 32-bit offset from location of relocation itself, added to the existing value at that
1607    /// location.
1608    PCRel32,
1609}
1610
1611impl MachInstLabelUse for LabelUse {
1612    const ALIGN: CodeOffset = 1;
1613
1614    fn max_pos_range(self) -> CodeOffset {
1615        match self {
1616            LabelUse::JmpRel32 | LabelUse::PCRel32 => 0x7fff_ffff,
1617        }
1618    }
1619
1620    fn max_neg_range(self) -> CodeOffset {
1621        match self {
1622            LabelUse::JmpRel32 | LabelUse::PCRel32 => 0x8000_0000,
1623        }
1624    }
1625
1626    fn patch_size(self) -> CodeOffset {
1627        match self {
1628            LabelUse::JmpRel32 | LabelUse::PCRel32 => 4,
1629        }
1630    }
1631
1632    fn patch(self, buffer: &mut [u8], use_offset: CodeOffset, label_offset: CodeOffset) {
1633        let pc_rel = (label_offset as i64) - (use_offset as i64);
1634        debug_assert!(pc_rel <= self.max_pos_range() as i64);
1635        debug_assert!(pc_rel >= -(self.max_neg_range() as i64));
1636        let pc_rel = pc_rel as u32;
1637        match self {
1638            LabelUse::JmpRel32 => {
1639                let addend = u32::from_le_bytes([buffer[0], buffer[1], buffer[2], buffer[3]]);
1640                let value = pc_rel.wrapping_add(addend).wrapping_sub(4);
1641                buffer.copy_from_slice(&value.to_le_bytes()[..]);
1642            }
1643            LabelUse::PCRel32 => {
1644                let addend = u32::from_le_bytes([buffer[0], buffer[1], buffer[2], buffer[3]]);
1645                let value = pc_rel.wrapping_add(addend);
1646                buffer.copy_from_slice(&value.to_le_bytes()[..]);
1647            }
1648        }
1649    }
1650
1651    fn supports_veneer(self) -> bool {
1652        match self {
1653            LabelUse::JmpRel32 | LabelUse::PCRel32 => false,
1654        }
1655    }
1656
1657    fn veneer_size(self) -> CodeOffset {
1658        match self {
1659            LabelUse::JmpRel32 | LabelUse::PCRel32 => 0,
1660        }
1661    }
1662
1663    fn worst_case_veneer_size() -> CodeOffset {
1664        0
1665    }
1666
1667    fn generate_veneer(self, _: &mut [u8], _: CodeOffset) -> (CodeOffset, LabelUse) {
1668        match self {
1669            LabelUse::JmpRel32 | LabelUse::PCRel32 => {
1670                panic!("Veneer not supported for JumpRel32 label-use.");
1671            }
1672        }
1673    }
1674
1675    fn from_reloc(reloc: Reloc, addend: Addend) -> Option<Self> {
1676        match (reloc, addend) {
1677            (Reloc::X86CallPCRel4, -4) => Some(LabelUse::JmpRel32),
1678            _ => None,
1679        }
1680    }
1681}