Skip to main content

cranelift_codegen/isa/x64/inst/
mod.rs

1//! This module defines x86_64-specific machine instruction types.
2
3pub use emit_state::EmitState;
4
5use crate::binemit::{Addend, CodeOffset, Reloc};
6use crate::ir::{ExternalName, LibCall, TrapCode, Type, types};
7use crate::isa::x64::abi::X64ABIMachineSpec;
8use crate::isa::x64::inst::regs::pretty_print_reg;
9use crate::isa::x64::settings as x64_settings;
10use crate::isa::{CallConv, FunctionAlignment};
11use crate::{CodegenError, CodegenResult, settings};
12use crate::{machinst::*, trace};
13use alloc::boxed::Box;
14use alloc::string::{String, ToString};
15use alloc::vec;
16use alloc::vec::Vec;
17use core::fmt::{self, Write};
18use core::slice;
19use cranelift_assembler_x64 as asm;
20use smallvec::{SmallVec, smallvec};
21
22pub mod args;
23mod emit;
24mod emit_state;
25#[cfg(test)]
26mod emit_tests;
27pub mod external;
28pub mod regs;
29mod stack_switch;
30pub mod unwind;
31
32use args::*;
33
34//=============================================================================
35// Instructions (top level): definition
36
37// `Inst` is defined inside ISLE as `MInst`. We publicly re-export it here.
38pub use super::lower::isle::generated_code::AtomicRmwSeqOp;
39pub use super::lower::isle::generated_code::MInst as Inst;
40
41/// Out-of-line data for return-calls, to keep the size of `Inst` down.
42#[derive(Clone, Debug)]
43pub struct ReturnCallInfo<T> {
44    /// Where this call is going.
45    pub dest: T,
46
47    /// The size of the argument area for this return-call, potentially smaller than that of the
48    /// caller, but never larger.
49    pub new_stack_arg_size: u32,
50
51    /// The in-register arguments and their constraints.
52    pub uses: CallArgList,
53
54    /// A temporary for use when moving the return address.
55    pub tmp: WritableGpr,
56}
57
58#[test]
59#[cfg(target_pointer_width = "64")]
60fn inst_size_test() {
61    // This test will help with unintentionally growing the size
62    // of the Inst enum.
63    assert_eq!(48, core::mem::size_of::<Inst>());
64}
65
66impl Inst {
67    /// Check if the instruction (or pseudo-instruction) can be emitted given
68    /// the current target architecture given by `emit_info`. For non-assembler
69    /// instructions, this assumes a baseline feature set (i.e., 64-bit AND SSE2
70    /// and below).
71    fn is_available(&self, emit_info: &EmitInfo) -> bool {
72        use asm::AvailableFeatures;
73
74        match self {
75            // These instructions are part of SSE2, which is a basic requirement
76            // in Cranelift, and don't have to be checked.
77            Inst::AtomicRmwSeq { .. }
78            | Inst::CallKnown { .. }
79            | Inst::CallUnknown { .. }
80            | Inst::ReturnCallKnown { .. }
81            | Inst::ReturnCallUnknown { .. }
82            | Inst::CheckedSRemSeq { .. }
83            | Inst::CheckedSRemSeq8 { .. }
84            | Inst::CvtFloatToSintSeq { .. }
85            | Inst::CvtFloatToUintSeq { .. }
86            | Inst::CvtUint64ToFloatSeq { .. }
87            | Inst::JmpCond { .. }
88            | Inst::JmpCondOr { .. }
89            | Inst::WinchJmpIf { .. }
90            | Inst::JmpKnown { .. }
91            | Inst::JmpTableSeq { .. }
92            | Inst::LoadExtName { .. }
93            | Inst::MovFromPReg { .. }
94            | Inst::MovToPReg { .. }
95            | Inst::StackProbeLoop { .. }
96            | Inst::Args { .. }
97            | Inst::Rets { .. }
98            | Inst::StackSwitchBasic { .. }
99            | Inst::TrapIf { .. }
100            | Inst::TrapIfAnd { .. }
101            | Inst::TrapIfOr { .. }
102            | Inst::XmmCmove { .. }
103            | Inst::XmmMinMaxSeq { .. }
104            | Inst::XmmUninitializedValue { .. }
105            | Inst::GprUninitializedValue { .. }
106            | Inst::ElfTlsGetAddr { .. }
107            | Inst::MachOTlsGetAddr { .. }
108            | Inst::CoffTlsGetAddr { .. }
109            | Inst::Unwind { .. }
110            | Inst::DummyUse { .. }
111            | Inst::LabelAddress { .. }
112            | Inst::SequencePoint => true,
113
114            Inst::Atomic128RmwSeq { .. } | Inst::Atomic128XchgSeq { .. } => emit_info.cmpxchg16b(),
115
116            Inst::External { inst } => inst.is_available(&emit_info),
117        }
118    }
119}
120
121// Handy constructors for Insts.
122
123impl Inst {
124    pub(crate) fn nop(len: u8) -> Self {
125        assert!(len > 0 && len <= 9);
126        let inst = match len {
127            1 => asm::inst::nop_1b::new().into(),
128            2 => asm::inst::nop_2b::new().into(),
129            3 => asm::inst::nop_3b::new().into(),
130            4 => asm::inst::nop_4b::new().into(),
131            5 => asm::inst::nop_5b::new().into(),
132            6 => asm::inst::nop_6b::new().into(),
133            7 => asm::inst::nop_7b::new().into(),
134            8 => asm::inst::nop_8b::new().into(),
135            9 => asm::inst::nop_9b::new().into(),
136            _ => unreachable!("nop length must be between 1 and 9"),
137        };
138        Self::External { inst }
139    }
140
141    pub(crate) fn addq_mi(dst: Writable<Reg>, simm32: i32) -> Self {
142        let inst = if let Ok(simm8) = i8::try_from(simm32) {
143            asm::inst::addq_mi_sxb::new(dst, simm8).into()
144        } else {
145            asm::inst::addq_mi_sxl::new(dst, simm32).into()
146        };
147        Inst::External { inst }
148    }
149
150    pub(crate) fn subq_mi(dst: Writable<Reg>, simm32: i32) -> Self {
151        let inst = if let Ok(simm8) = i8::try_from(simm32) {
152            asm::inst::subq_mi_sxb::new(dst, simm8).into()
153        } else {
154            asm::inst::subq_mi_sxl::new(dst, simm32).into()
155        };
156        Inst::External { inst }
157    }
158
159    /// Writes the `simm64` immediate into `dst`.
160    ///
161    /// Note that if `dst_size` is less than 64-bits then the upper bits of
162    /// `simm64` will be converted to zero.
163    pub fn imm(dst_size: OperandSize, simm64: u64, dst: Writable<Reg>) -> Inst {
164        debug_assert!(dst_size.is_one_of(&[OperandSize::Size32, OperandSize::Size64]));
165        debug_assert!(dst.to_reg().class() == RegClass::Int);
166        let dst = WritableGpr::from_writable_reg(dst).unwrap();
167        let inst = match dst_size {
168            OperandSize::Size64 => match u32::try_from(simm64) {
169                // If `simm64` is zero-extended use `movl` which zeros the
170                // upper bits.
171                Ok(imm32) => asm::inst::movl_oi::new(dst, imm32).into(),
172                _ => match i32::try_from(simm64.cast_signed()) {
173                    // If `simm64` is sign-extended use `movq` which sign the
174                    // upper bits.
175                    Ok(simm32) => asm::inst::movq_mi_sxl::new(dst, simm32).into(),
176                    // fall back to embedding the entire immediate.
177                    _ => asm::inst::movabsq_oi::new(dst, simm64).into(),
178                },
179            },
180            // FIXME: the input to this function is a logical `simm64` stored
181            // as `u64`. That means that ideally what we would do here is cast
182            // the `simm64` to an `i64`, perform a `i32::try_from()`, then cast
183            // that back to `u32`. That would ensure that the immediate loses
184            // no meaning and has the same logical value. Currently though
185            // Cranelift relies on discarding the upper bits because literals
186            // like `0x8000_0000_u64` fail to convert to an `i32`. In theory
187            // the input to this function should change to `i64`. In the
188            // meantime this is documented as discarding the upper bits,
189            // although this is an old function so that's unlikely to help
190            // much.
191            _ => asm::inst::movl_oi::new(dst, simm64 as u32).into(),
192        };
193        Inst::External { inst }
194    }
195
196    pub(crate) fn movzx_rm_r(ext_mode: ExtMode, src: RegMem, dst: Writable<Reg>) -> Inst {
197        src.assert_regclass_is(RegClass::Int);
198        debug_assert!(dst.to_reg().class() == RegClass::Int);
199        let src = match src {
200            RegMem::Reg { reg } => asm::GprMem::Gpr(Gpr::new(reg).unwrap()),
201            RegMem::Mem { addr } => asm::GprMem::Mem(addr.into()),
202        };
203        let inst = match ext_mode {
204            ExtMode::BL => asm::inst::movzbl_rm::new(dst, src).into(),
205            ExtMode::BQ => asm::inst::movzbq_rm::new(dst, src).into(),
206            ExtMode::WL => asm::inst::movzwl_rm::new(dst, src).into(),
207            ExtMode::WQ => asm::inst::movzwq_rm::new(dst, src).into(),
208            ExtMode::LQ => {
209                // This instruction selection may seem strange but is correct in
210                // 64-bit mode: section 3.4.1.1 of the Intel manual says that
211                // "32-bit operands generate a 32-bit result, zero-extended to a
212                // 64-bit result in the destination general-purpose register."
213                // This is applicable beyond `mov` but we use this fact to
214                // zero-extend `src` into `dst`.
215                asm::inst::movl_rm::new(dst, src).into()
216            }
217        };
218        Inst::External { inst }
219    }
220
221    pub(crate) fn movsx_rm_r(ext_mode: ExtMode, src: RegMem, dst: Writable<Reg>) -> Inst {
222        src.assert_regclass_is(RegClass::Int);
223        debug_assert!(dst.to_reg().class() == RegClass::Int);
224        let src = match src {
225            RegMem::Reg { reg } => asm::GprMem::Gpr(Gpr::new(reg).unwrap()),
226            RegMem::Mem { addr } => asm::GprMem::Mem(addr.into()),
227        };
228        let inst = match ext_mode {
229            ExtMode::BL => asm::inst::movsbl_rm::new(dst, src).into(),
230            ExtMode::BQ => asm::inst::movsbq_rm::new(dst, src).into(),
231            ExtMode::WL => asm::inst::movswl_rm::new(dst, src).into(),
232            ExtMode::WQ => asm::inst::movswq_rm::new(dst, src).into(),
233            ExtMode::LQ => asm::inst::movslq_rm::new(dst, src).into(),
234        };
235        Inst::External { inst }
236    }
237
238    /// Compares `src1` against `src2`
239    pub(crate) fn cmp_mi_sxb(size: OperandSize, src1: Gpr, src2: i8) -> Inst {
240        let inst = match size {
241            OperandSize::Size8 => asm::inst::cmpb_mi::new(src1, src2.cast_unsigned()).into(),
242            OperandSize::Size16 => asm::inst::cmpw_mi_sxb::new(src1, src2).into(),
243            OperandSize::Size32 => asm::inst::cmpl_mi_sxb::new(src1, src2).into(),
244            OperandSize::Size64 => asm::inst::cmpq_mi_sxb::new(src1, src2).into(),
245        };
246        Inst::External { inst }
247    }
248
249    pub(crate) fn trap_if(cc: CC, trap_code: TrapCode) -> Inst {
250        Inst::TrapIf { cc, trap_code }
251    }
252
253    pub(crate) fn call_known(info: Box<CallInfo<ExternalName>>) -> Inst {
254        Inst::CallKnown { info }
255    }
256
257    pub(crate) fn call_unknown(info: Box<CallInfo<RegMem>>) -> Inst {
258        info.dest.assert_regclass_is(RegClass::Int);
259        Inst::CallUnknown { info }
260    }
261
262    pub(crate) fn jmp_known(dst: MachLabel) -> Inst {
263        Inst::JmpKnown { dst }
264    }
265
266    /// Choose which instruction to use for loading a register value from memory. For loads smaller
267    /// than 64 bits, this method expects a way to extend the value (i.e. [ExtKind::SignExtend],
268    /// [ExtKind::ZeroExtend]); loads with no extension necessary will ignore this.
269    pub(crate) fn load(
270        ty: Type,
271        from_addr: impl Into<SyntheticAmode>,
272        to_reg: Writable<Reg>,
273        ext_kind: ExtKind,
274    ) -> Inst {
275        let rc = to_reg.to_reg().class();
276        match rc {
277            RegClass::Int => {
278                let ext_mode = match ty.bytes() {
279                    1 => Some(ExtMode::BQ),
280                    2 => Some(ExtMode::WQ),
281                    4 => Some(ExtMode::LQ),
282                    8 => None,
283                    _ => unreachable!("the type should never use a scalar load: {}", ty),
284                };
285                if let Some(ext_mode) = ext_mode {
286                    // Values smaller than 64 bits must be extended in some way.
287                    match ext_kind {
288                        ExtKind::SignExtend => {
289                            Inst::movsx_rm_r(ext_mode, RegMem::mem(from_addr), to_reg)
290                        }
291                        ExtKind::ZeroExtend => {
292                            Inst::movzx_rm_r(ext_mode, RegMem::mem(from_addr), to_reg)
293                        }
294                        ExtKind::None => {
295                            panic!("expected an extension kind for extension mode: {ext_mode:?}")
296                        }
297                    }
298                } else {
299                    // 64-bit values can be moved directly.
300                    let from_addr = asm::GprMem::from(from_addr.into());
301                    Inst::External {
302                        inst: asm::inst::movq_rm::new(to_reg, from_addr).into(),
303                    }
304                }
305            }
306            RegClass::Float => {
307                let to_reg = to_reg.map(|r| Xmm::new(r).unwrap());
308                let from_addr = from_addr.into();
309                let inst = match ty {
310                    types::F16 | types::I8X2 => {
311                        panic!("loading a f16 or i8x2 requires multiple instructions")
312                    }
313                    _ if (ty.is_float() || ty.is_vector()) && ty.bits() == 32 => {
314                        asm::inst::movss_a_m::new(to_reg, from_addr).into()
315                    }
316                    _ if (ty.is_float() || ty.is_vector()) && ty.bits() == 64 => {
317                        asm::inst::movsd_a_m::new(to_reg, from_addr).into()
318                    }
319                    types::F32X4 => asm::inst::movups_a::new(to_reg, from_addr).into(),
320                    types::F64X2 => asm::inst::movupd_a::new(to_reg, from_addr).into(),
321                    _ if (ty.is_float() || ty.is_vector()) && ty.bits() == 128 => {
322                        asm::inst::movdqu_a::new(to_reg, from_addr).into()
323                    }
324                    _ => unimplemented!("unable to load type: {}", ty),
325                };
326                Inst::External { inst }
327            }
328            RegClass::Vector => unreachable!(),
329        }
330    }
331
332    /// Choose which instruction to use for storing a register value to memory.
333    pub(crate) fn store(ty: Type, from_reg: Reg, to_addr: impl Into<SyntheticAmode>) -> Inst {
334        let rc = from_reg.class();
335        let to_addr = to_addr.into();
336        let inst = match rc {
337            RegClass::Int => {
338                let from_reg = Gpr::unwrap_new(from_reg);
339                match ty {
340                    types::I8 => asm::inst::movb_mr::new(to_addr, from_reg).into(),
341                    types::I16 => asm::inst::movw_mr::new(to_addr, from_reg).into(),
342                    types::I32 => asm::inst::movl_mr::new(to_addr, from_reg).into(),
343                    types::I64 => asm::inst::movq_mr::new(to_addr, from_reg).into(),
344                    _ => unreachable!(),
345                }
346            }
347            RegClass::Float => {
348                let from_reg = Xmm::new(from_reg).unwrap();
349                match ty {
350                    types::F16 | types::I8X2 => {
351                        panic!("storing a f16 or i8x2 requires multiple instructions")
352                    }
353                    _ if (ty.is_float() || ty.is_vector()) && ty.bits() == 32 => {
354                        asm::inst::movss_c_m::new(to_addr, from_reg).into()
355                    }
356                    _ if (ty.is_float() || ty.is_vector()) && ty.bits() == 64 => {
357                        asm::inst::movsd_c_m::new(to_addr, from_reg).into()
358                    }
359                    types::F32X4 => asm::inst::movups_b::new(to_addr, from_reg).into(),
360                    types::F64X2 => asm::inst::movupd_b::new(to_addr, from_reg).into(),
361                    _ if (ty.is_float() || ty.is_vector()) && ty.bits() == 128 => {
362                        asm::inst::movdqu_b::new(to_addr, from_reg).into()
363                    }
364                    _ => unimplemented!("unable to store type: {}", ty),
365                }
366            }
367            RegClass::Vector => unreachable!(),
368        };
369        Inst::External { inst }
370    }
371}
372
373//=============================================================================
374// Instructions: printing
375
376impl PrettyPrint for Inst {
377    fn pretty_print(&self, _size: u8) -> String {
378        fn ljustify(s: String) -> String {
379            let w = 7;
380            if s.len() >= w {
381                s
382            } else {
383                let need = usize::min(w, w - s.len());
384                s + &format!("{nil: <width$}", nil = "", width = need)
385            }
386        }
387
388        fn ljustify2(s1: String, s2: String) -> String {
389            ljustify(s1 + &s2)
390        }
391
392        match self {
393            Inst::CheckedSRemSeq {
394                size,
395                divisor,
396                dividend_lo,
397                dividend_hi,
398                dst_quotient,
399                dst_remainder,
400            } => {
401                let divisor = pretty_print_reg(divisor.to_reg(), size.to_bytes());
402                let dividend_lo = pretty_print_reg(dividend_lo.to_reg(), size.to_bytes());
403                let dividend_hi = pretty_print_reg(dividend_hi.to_reg(), size.to_bytes());
404                let dst_quotient =
405                    pretty_print_reg(dst_quotient.to_reg().to_reg(), size.to_bytes());
406                let dst_remainder =
407                    pretty_print_reg(dst_remainder.to_reg().to_reg(), size.to_bytes());
408                format!(
409                    "checked_srem_seq {dividend_lo}, {dividend_hi}, \
410                        {divisor}, {dst_quotient}, {dst_remainder}",
411                )
412            }
413
414            Inst::CheckedSRemSeq8 {
415                divisor,
416                dividend,
417                dst,
418            } => {
419                let divisor = pretty_print_reg(divisor.to_reg(), 1);
420                let dividend = pretty_print_reg(dividend.to_reg(), 1);
421                let dst = pretty_print_reg(dst.to_reg().to_reg(), 1);
422                format!("checked_srem_seq {dividend}, {divisor}, {dst}")
423            }
424
425            Inst::XmmMinMaxSeq {
426                lhs,
427                rhs,
428                dst,
429                is_min,
430                size,
431            } => {
432                let rhs = pretty_print_reg(rhs.to_reg(), 8);
433                let lhs = pretty_print_reg(lhs.to_reg(), 8);
434                let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);
435                let op = ljustify2(
436                    if *is_min {
437                        "xmm min seq ".to_string()
438                    } else {
439                        "xmm max seq ".to_string()
440                    },
441                    format!("f{}", size.to_bits()),
442                );
443                format!("{op} {lhs}, {rhs}, {dst}")
444            }
445
446            Inst::XmmUninitializedValue { dst } => {
447                let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);
448                let op = ljustify("uninit".into());
449                format!("{op} {dst}")
450            }
451
452            Inst::GprUninitializedValue { dst } => {
453                let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);
454                let op = ljustify("uninit".into());
455                format!("{op} {dst}")
456            }
457
458            Inst::CvtUint64ToFloatSeq {
459                src,
460                dst,
461                dst_size,
462                tmp_gpr1,
463                tmp_gpr2,
464                ..
465            } => {
466                let src = pretty_print_reg(src.to_reg(), 8);
467                let dst = pretty_print_reg(dst.to_reg().to_reg(), dst_size.to_bytes());
468                let tmp_gpr1 = pretty_print_reg(tmp_gpr1.to_reg().to_reg(), 8);
469                let tmp_gpr2 = pretty_print_reg(tmp_gpr2.to_reg().to_reg(), 8);
470                let op = ljustify(format!(
471                    "u64_to_{}_seq",
472                    if *dst_size == OperandSize::Size64 {
473                        "f64"
474                    } else {
475                        "f32"
476                    }
477                ));
478                format!("{op} {src}, {dst}, {tmp_gpr1}, {tmp_gpr2}")
479            }
480
481            Inst::CvtFloatToSintSeq {
482                src,
483                dst,
484                src_size,
485                dst_size,
486                tmp_xmm,
487                tmp_gpr,
488                is_saturating,
489            } => {
490                let src = pretty_print_reg(src.to_reg(), src_size.to_bytes());
491                let dst = pretty_print_reg(dst.to_reg().to_reg(), dst_size.to_bytes());
492                let tmp_gpr = pretty_print_reg(tmp_gpr.to_reg().to_reg(), 8);
493                let tmp_xmm = pretty_print_reg(tmp_xmm.to_reg().to_reg(), 8);
494                let op = ljustify(format!(
495                    "cvt_float{}_to_sint{}{}_seq",
496                    src_size.to_bits(),
497                    dst_size.to_bits(),
498                    if *is_saturating { "_sat" } else { "" },
499                ));
500                format!("{op} {src}, {dst}, {tmp_gpr}, {tmp_xmm}")
501            }
502
503            Inst::CvtFloatToUintSeq {
504                src,
505                dst,
506                src_size,
507                dst_size,
508                tmp_gpr,
509                tmp_xmm,
510                tmp_xmm2,
511                is_saturating,
512            } => {
513                let src = pretty_print_reg(src.to_reg(), src_size.to_bytes());
514                let dst = pretty_print_reg(dst.to_reg().to_reg(), dst_size.to_bytes());
515                let tmp_gpr = pretty_print_reg(tmp_gpr.to_reg().to_reg(), 8);
516                let tmp_xmm = pretty_print_reg(tmp_xmm.to_reg().to_reg(), 8);
517                let tmp_xmm2 = pretty_print_reg(tmp_xmm2.to_reg().to_reg(), 8);
518                let op = ljustify(format!(
519                    "cvt_float{}_to_uint{}{}_seq",
520                    src_size.to_bits(),
521                    dst_size.to_bits(),
522                    if *is_saturating { "_sat" } else { "" },
523                ));
524                format!("{op} {src}, {dst}, {tmp_gpr}, {tmp_xmm}, {tmp_xmm2}")
525            }
526
527            Inst::MovFromPReg { src, dst } => {
528                let src: Reg = (*src).into();
529                let src = pretty_print_reg(src, 8);
530                let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);
531                let op = ljustify("movq".to_string());
532                format!("{op} {src}, {dst}")
533            }
534
535            Inst::MovToPReg { src, dst } => {
536                let src = pretty_print_reg(src.to_reg(), 8);
537                let dst: Reg = (*dst).into();
538                let dst = pretty_print_reg(dst, 8);
539                let op = ljustify("movq".to_string());
540                format!("{op} {src}, {dst}")
541            }
542
543            Inst::XmmCmove {
544                ty,
545                cc,
546                consequent,
547                alternative,
548                dst,
549                ..
550            } => {
551                let size = u8::try_from(ty.bytes()).unwrap();
552                let alternative = pretty_print_reg(alternative.to_reg(), size);
553                let dst = pretty_print_reg(dst.to_reg().to_reg(), size);
554                let consequent = pretty_print_reg(consequent.to_reg(), size);
555                let suffix = match *ty {
556                    types::F64 => "sd",
557                    types::F32 => "ss",
558                    types::F16 => "ss",
559                    types::F32X4 => "aps",
560                    types::F64X2 => "apd",
561                    _ => "dqa",
562                };
563                let cc = cc.invert();
564                format!(
565                    "mov{suffix} {alternative}, {dst}; \
566                    j{cc} $next; \
567                    mov{suffix} {consequent}, {dst}; \
568                    $next:"
569                )
570            }
571
572            Inst::StackProbeLoop {
573                tmp,
574                frame_size,
575                guard_size,
576            } => {
577                let tmp = pretty_print_reg(tmp.to_reg(), 8);
578                let op = ljustify("stack_probe_loop".to_string());
579                format!("{op} {tmp}, frame_size={frame_size}, guard_size={guard_size}")
580            }
581
582            Inst::CallKnown { info } => {
583                let op = ljustify("call".to_string());
584                let try_call = info
585                    .try_call_info
586                    .as_ref()
587                    .map(|tci| pretty_print_try_call(tci))
588                    .unwrap_or_default();
589                format!("{op} {:?}{try_call}", info.dest)
590            }
591
592            Inst::CallUnknown { info } => {
593                let dest = info.dest.pretty_print(8);
594                let op = ljustify("call".to_string());
595                let try_call = info
596                    .try_call_info
597                    .as_ref()
598                    .map(|tci| pretty_print_try_call(tci))
599                    .unwrap_or_default();
600                format!("{op} *{dest}{try_call}")
601            }
602
603            Inst::ReturnCallKnown { info } => {
604                let ReturnCallInfo {
605                    uses,
606                    new_stack_arg_size,
607                    tmp,
608                    dest,
609                } = &**info;
610                let tmp = pretty_print_reg(tmp.to_reg().to_reg(), 8);
611                let mut s = format!("return_call_known {dest:?} ({new_stack_arg_size}) tmp={tmp}");
612                for ret in uses {
613                    let preg = pretty_print_reg(ret.preg, 8);
614                    let vreg = pretty_print_reg(ret.vreg, 8);
615                    write!(&mut s, " {vreg}={preg}").unwrap();
616                }
617                s
618            }
619
620            Inst::ReturnCallUnknown { info } => {
621                let ReturnCallInfo {
622                    uses,
623                    new_stack_arg_size,
624                    tmp,
625                    dest,
626                } = &**info;
627                let callee = pretty_print_reg(*dest, 8);
628                let tmp = pretty_print_reg(tmp.to_reg().to_reg(), 8);
629                let mut s =
630                    format!("return_call_unknown {callee} ({new_stack_arg_size}) tmp={tmp}");
631                for ret in uses {
632                    let preg = pretty_print_reg(ret.preg, 8);
633                    let vreg = pretty_print_reg(ret.vreg, 8);
634                    write!(&mut s, " {vreg}={preg}").unwrap();
635                }
636                s
637            }
638
639            Inst::Args { args } => {
640                let mut s = "args".to_string();
641                for arg in args {
642                    let preg = pretty_print_reg(arg.preg, 8);
643                    let def = pretty_print_reg(arg.vreg.to_reg(), 8);
644                    write!(&mut s, " {def}={preg}").unwrap();
645                }
646                s
647            }
648
649            Inst::Rets { rets } => {
650                let mut s = "rets".to_string();
651                for ret in rets {
652                    let preg = pretty_print_reg(ret.preg, 8);
653                    let vreg = pretty_print_reg(ret.vreg, 8);
654                    write!(&mut s, " {vreg}={preg}").unwrap();
655                }
656                s
657            }
658
659            Inst::StackSwitchBasic {
660                store_context_ptr,
661                load_context_ptr,
662                in_payload0,
663                out_payload0,
664            } => {
665                let store_context_ptr = pretty_print_reg(**store_context_ptr, 8);
666                let load_context_ptr = pretty_print_reg(**load_context_ptr, 8);
667                let in_payload0 = pretty_print_reg(**in_payload0, 8);
668                let out_payload0 = pretty_print_reg(*out_payload0.to_reg(), 8);
669                format!(
670                    "{out_payload0} = stack_switch_basic {store_context_ptr}, {load_context_ptr}, {in_payload0}"
671                )
672            }
673
674            Inst::JmpKnown { dst } => {
675                let op = ljustify("jmp".to_string());
676                let dst = dst.to_string();
677                format!("{op} {dst}")
678            }
679
680            Inst::WinchJmpIf { cc, taken } => {
681                let taken = taken.to_string();
682                let op = ljustify2("j".to_string(), cc.to_string());
683                format!("{op} {taken}")
684            }
685
686            Inst::JmpCondOr {
687                cc1,
688                cc2,
689                taken,
690                not_taken,
691            } => {
692                let taken = taken.to_string();
693                let not_taken = not_taken.to_string();
694                let op = ljustify(format!("j{cc1},{cc2}"));
695                format!("{op} {taken}; j {not_taken}")
696            }
697
698            Inst::JmpCond {
699                cc,
700                taken,
701                not_taken,
702            } => {
703                let taken = taken.to_string();
704                let not_taken = not_taken.to_string();
705                let op = ljustify2("j".to_string(), cc.to_string());
706                format!("{op} {taken}; j {not_taken}")
707            }
708
709            Inst::JmpTableSeq {
710                idx, tmp1, tmp2, ..
711            } => {
712                let idx = pretty_print_reg(*idx, 8);
713                let tmp1 = pretty_print_reg(tmp1.to_reg(), 8);
714                let tmp2 = pretty_print_reg(tmp2.to_reg(), 8);
715                let op = ljustify("br_table".into());
716                format!("{op} {idx}, {tmp1}, {tmp2}")
717            }
718
719            Inst::TrapIf { cc, trap_code, .. } => {
720                format!("j{cc} #trap={trap_code}")
721            }
722
723            Inst::TrapIfAnd {
724                cc1,
725                cc2,
726                trap_code,
727                ..
728            } => {
729                let cc1 = cc1.invert();
730                let cc2 = cc2.invert();
731                format!("trap_if_and {cc1}, {cc2}, {trap_code}")
732            }
733
734            Inst::TrapIfOr {
735                cc1,
736                cc2,
737                trap_code,
738                ..
739            } => {
740                let cc2 = cc2.invert();
741                format!("trap_if_or {cc1}, {cc2}, {trap_code}")
742            }
743
744            Inst::LoadExtName {
745                dst, name, offset, ..
746            } => {
747                let dst = pretty_print_reg(*dst.to_reg(), 8);
748                let name = name.display(None);
749                let op = ljustify("load_ext_name".into());
750                format!("{op} {name}+{offset}, {dst}")
751            }
752
753            Inst::AtomicRmwSeq { ty, op, .. } => {
754                let ty = ty.bits();
755                format!(
756                    "atomically {{ {ty}_bits_at_[%r9] {op:?}= %r10; %rax = old_value_at_[%r9]; %r11, %rflags = trash }}"
757                )
758            }
759
760            Inst::Atomic128RmwSeq { args } => {
761                let Atomic128RmwSeqArgs {
762                    op,
763                    mem_low,
764                    mem_high,
765                    operand_low,
766                    operand_high,
767                    temp_low,
768                    temp_high,
769                    dst_old_low,
770                    dst_old_high,
771                } = &**args;
772                let operand_low = pretty_print_reg(**operand_low, 8);
773                let operand_high = pretty_print_reg(**operand_high, 8);
774                let temp_low = pretty_print_reg(*temp_low.to_reg(), 8);
775                let temp_high = pretty_print_reg(*temp_high.to_reg(), 8);
776                let dst_old_low = pretty_print_reg(*dst_old_low.to_reg(), 8);
777                let dst_old_high = pretty_print_reg(*dst_old_high.to_reg(), 8);
778                let mem_low = mem_low.pretty_print(16);
779                let mem_high = mem_high.pretty_print(16);
780                format!(
781                    "atomically {{ {dst_old_high}:{dst_old_low} = {mem_low}:{mem_high}; {temp_high}:{temp_low} = {dst_old_high}:{dst_old_low} {op:?} {operand_high}:{operand_low}; {mem_low}:{mem_high} = {temp_high}:{temp_low} }}"
782                )
783            }
784
785            Inst::Atomic128XchgSeq { args } => {
786                let Atomic128XchgSeqArgs {
787                    mem_low,
788                    mem_high,
789                    operand_low,
790                    operand_high,
791                    dst_old_low,
792                    dst_old_high,
793                } = &**args;
794                let operand_low = pretty_print_reg(**operand_low, 8);
795                let operand_high = pretty_print_reg(**operand_high, 8);
796                let dst_old_low = pretty_print_reg(*dst_old_low.to_reg(), 8);
797                let dst_old_high = pretty_print_reg(*dst_old_high.to_reg(), 8);
798                let mem_low = mem_low.pretty_print(16);
799                let mem_high = mem_high.pretty_print(16);
800                format!(
801                    "atomically {{ {dst_old_high}:{dst_old_low} = {mem_low}:{mem_high}; {mem_low}:{mem_high} = {operand_high}:{operand_low} }}"
802                )
803            }
804
805            Inst::ElfTlsGetAddr { symbol, dst } => {
806                let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);
807                format!("{dst} = elf_tls_get_addr {symbol:?}")
808            }
809
810            Inst::MachOTlsGetAddr { symbol, dst } => {
811                let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);
812                format!("{dst} = macho_tls_get_addr {symbol:?}")
813            }
814
815            Inst::CoffTlsGetAddr { symbol, dst, tmp } => {
816                let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);
817                let tmp = tmp.to_reg().to_reg();
818
819                let mut s = format!("{dst} = coff_tls_get_addr {symbol:?}");
820                if tmp.is_virtual() {
821                    let tmp = pretty_print_reg(tmp, 8);
822                    write!(&mut s, ", {tmp}").unwrap();
823                };
824
825                s
826            }
827
828            Inst::Unwind { inst } => format!("unwind {inst:?}"),
829
830            Inst::DummyUse { reg } => {
831                let reg = pretty_print_reg(*reg, 8);
832                format!("dummy_use {reg}")
833            }
834
835            Inst::LabelAddress { dst, label } => {
836                let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);
837                format!("label_address {dst}, {label:?}")
838            }
839
840            Inst::SequencePoint {} => {
841                format!("sequence_point")
842            }
843
844            Inst::External { inst } => {
845                format!("{inst}")
846            }
847        }
848    }
849}
850
851fn pretty_print_try_call(info: &TryCallInfo) -> String {
852    format!(
853        "; jmp {:?}; catch [{}]",
854        info.continuation,
855        info.pretty_print_dests()
856    )
857}
858
859impl fmt::Debug for Inst {
860    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
861        write!(fmt, "{}", self.pretty_print_inst(&mut Default::default()))
862    }
863}
864
865fn x64_get_operands(inst: &mut Inst, collector: &mut impl OperandVisitor) {
866    // Note: because we need to statically know the indices of each
867    // reg in the operands list in order to fetch its allocation
868    // later, we put the variable-operand-count bits (the RegMem,
869    // RegMemImm, etc args) last. regalloc2 doesn't care what order
870    // the operands come in; they can be freely reordered.
871
872    // N.B.: we MUST keep the below in careful sync with (i) emission,
873    // in `emit.rs`, and (ii) pretty-printing, in the `pretty_print`
874    // method above.
875    match inst {
876        Inst::CheckedSRemSeq {
877            divisor,
878            dividend_lo,
879            dividend_hi,
880            dst_quotient,
881            dst_remainder,
882            ..
883        } => {
884            collector.reg_use(divisor);
885            collector.reg_fixed_use(dividend_lo, regs::rax());
886            collector.reg_fixed_use(dividend_hi, regs::rdx());
887            collector.reg_fixed_def(dst_quotient, regs::rax());
888            collector.reg_fixed_def(dst_remainder, regs::rdx());
889        }
890        Inst::CheckedSRemSeq8 {
891            divisor,
892            dividend,
893            dst,
894            ..
895        } => {
896            collector.reg_use(divisor);
897            collector.reg_fixed_use(dividend, regs::rax());
898            collector.reg_fixed_def(dst, regs::rax());
899        }
900        Inst::XmmUninitializedValue { dst } => collector.reg_def(dst),
901        Inst::GprUninitializedValue { dst } => collector.reg_def(dst),
902        Inst::XmmMinMaxSeq { lhs, rhs, dst, .. } => {
903            collector.reg_use(rhs);
904            collector.reg_use(lhs);
905            collector.reg_reuse_def(dst, 0); // Reuse RHS.
906        }
907        Inst::MovFromPReg { dst, src } => {
908            debug_assert!(dst.to_reg().to_reg().is_virtual());
909            collector.reg_fixed_nonallocatable(*src);
910            collector.reg_def(dst);
911        }
912        Inst::MovToPReg { dst, src } => {
913            debug_assert!(src.to_reg().is_virtual());
914            collector.reg_use(src);
915            collector.reg_fixed_nonallocatable(*dst);
916        }
917        Inst::CvtUint64ToFloatSeq {
918            src,
919            dst,
920            tmp_gpr1,
921            tmp_gpr2,
922            ..
923        } => {
924            collector.reg_use(src);
925            collector.reg_early_def(dst);
926            collector.reg_early_def(tmp_gpr1);
927            collector.reg_early_def(tmp_gpr2);
928        }
929        Inst::CvtFloatToSintSeq {
930            src,
931            dst,
932            tmp_xmm,
933            tmp_gpr,
934            ..
935        } => {
936            collector.reg_use(src);
937            collector.reg_early_def(dst);
938            collector.reg_early_def(tmp_gpr);
939            collector.reg_early_def(tmp_xmm);
940        }
941        Inst::CvtFloatToUintSeq {
942            src,
943            dst,
944            tmp_gpr,
945            tmp_xmm,
946            tmp_xmm2,
947            ..
948        } => {
949            collector.reg_use(src);
950            collector.reg_early_def(dst);
951            collector.reg_early_def(tmp_gpr);
952            collector.reg_early_def(tmp_xmm);
953            collector.reg_early_def(tmp_xmm2);
954        }
955
956        Inst::XmmCmove {
957            consequent,
958            alternative,
959            dst,
960            ..
961        } => {
962            collector.reg_use(alternative);
963            collector.reg_reuse_def(dst, 0);
964            collector.reg_use(consequent);
965        }
966        Inst::StackProbeLoop { tmp, .. } => {
967            collector.reg_early_def(tmp);
968        }
969
970        Inst::CallKnown { info } => {
971            // Probestack is special and is only inserted after
972            // regalloc, so we do not need to represent its ABI to the
973            // register allocator. Assert that we don't alter that
974            // arrangement.
975            let CallInfo {
976                uses,
977                defs,
978                clobbers,
979                dest,
980                try_call_info,
981                ..
982            } = &mut **info;
983            debug_assert_ne!(*dest, ExternalName::LibCall(LibCall::Probestack));
984            for CallArgPair { vreg, preg } in uses {
985                collector.reg_fixed_use(vreg, *preg);
986            }
987            for CallRetPair { vreg, location } in defs {
988                match location {
989                    RetLocation::Reg(preg, ..) => collector.reg_fixed_def(vreg, *preg),
990                    RetLocation::Stack(..) => collector.any_def(vreg),
991                }
992            }
993            collector.reg_clobbers(*clobbers);
994            if let Some(try_call_info) = try_call_info {
995                try_call_info.collect_operands(collector);
996            }
997        }
998
999        Inst::CallUnknown { info } => {
1000            let CallInfo {
1001                uses,
1002                defs,
1003                clobbers,
1004                callee_conv,
1005                dest,
1006                try_call_info,
1007                ..
1008            } = &mut **info;
1009            match dest {
1010                RegMem::Reg { reg } if *callee_conv == CallConv::Winch => {
1011                    // TODO(https://github.com/bytecodealliance/regalloc2/issues/145):
1012                    // This shouldn't be a fixed register constraint. r10 is caller-saved, so this
1013                    // should be safe to use.
1014                    collector.reg_fixed_use(reg, regs::r10());
1015                }
1016                _ => dest.get_operands(collector),
1017            }
1018            for CallArgPair { vreg, preg } in uses {
1019                collector.reg_fixed_use(vreg, *preg);
1020            }
1021            for CallRetPair { vreg, location } in defs {
1022                match location {
1023                    RetLocation::Reg(preg, ..) => collector.reg_fixed_def(vreg, *preg),
1024                    RetLocation::Stack(..) => collector.any_def(vreg),
1025                }
1026            }
1027            collector.reg_clobbers(*clobbers);
1028            if let Some(try_call_info) = try_call_info {
1029                try_call_info.collect_operands(collector);
1030            }
1031        }
1032        Inst::StackSwitchBasic {
1033            store_context_ptr,
1034            load_context_ptr,
1035            in_payload0,
1036            out_payload0,
1037        } => {
1038            collector.reg_use(load_context_ptr);
1039            collector.reg_use(store_context_ptr);
1040            collector.reg_fixed_use(in_payload0, stack_switch::payload_register());
1041            collector.reg_fixed_def(out_payload0, stack_switch::payload_register());
1042
1043            let mut clobbers = crate::isa::x64::abi::ALL_CLOBBERS;
1044            // The return/payload reg must not be included in the clobber set
1045            clobbers.remove(
1046                stack_switch::payload_register()
1047                    .to_real_reg()
1048                    .unwrap()
1049                    .into(),
1050            );
1051            collector.reg_clobbers(clobbers);
1052        }
1053
1054        Inst::ReturnCallKnown { info } => {
1055            let ReturnCallInfo {
1056                dest, uses, tmp, ..
1057            } = &mut **info;
1058            collector.reg_fixed_def(tmp, regs::r11());
1059            // Same as in the `Inst::CallKnown` branch.
1060            debug_assert_ne!(*dest, ExternalName::LibCall(LibCall::Probestack));
1061            for CallArgPair { vreg, preg } in uses {
1062                collector.reg_fixed_use(vreg, *preg);
1063            }
1064        }
1065
1066        Inst::ReturnCallUnknown { info } => {
1067            let ReturnCallInfo {
1068                dest, uses, tmp, ..
1069            } = &mut **info;
1070
1071            // TODO(https://github.com/bytecodealliance/regalloc2/issues/145):
1072            // This shouldn't be a fixed register constraint, but it's not clear how to
1073            // pick a register that won't be clobbered by the callee-save restore code
1074            // emitted with a return_call_indirect. r10 is caller-saved, so this should be
1075            // safe to use.
1076            collector.reg_fixed_use(dest, regs::r10());
1077
1078            collector.reg_fixed_def(tmp, regs::r11());
1079            for CallArgPair { vreg, preg } in uses {
1080                collector.reg_fixed_use(vreg, *preg);
1081            }
1082        }
1083
1084        Inst::JmpTableSeq {
1085            idx, tmp1, tmp2, ..
1086        } => {
1087            collector.reg_use(idx);
1088            collector.reg_early_def(tmp1);
1089            // In the sequence emitted for this pseudoinstruction in emit.rs,
1090            // tmp2 is only written after idx is read, so it doesn't need to be
1091            // an early def.
1092            collector.reg_def(tmp2);
1093        }
1094
1095        Inst::LoadExtName { dst, .. } => {
1096            collector.reg_def(dst);
1097        }
1098
1099        Inst::AtomicRmwSeq {
1100            operand,
1101            temp,
1102            dst_old,
1103            mem,
1104            ..
1105        } => {
1106            collector.reg_late_use(operand);
1107            collector.reg_early_def(temp);
1108            // This `fixed_def` is needed because `CMPXCHG` always uses this
1109            // register implicitly.
1110            collector.reg_fixed_def(dst_old, regs::rax());
1111            mem.get_operands_late(collector)
1112        }
1113
1114        Inst::Atomic128RmwSeq { args } => {
1115            let Atomic128RmwSeqArgs {
1116                mem_low,
1117                mem_high,
1118                operand_low,
1119                operand_high,
1120                temp_low,
1121                temp_high,
1122                dst_old_low,
1123                dst_old_high,
1124                op: _,
1125            } = &mut **args;
1126            // All registers are collected in the `Late` position so that they don't overlap.
1127            collector.reg_late_use(operand_low);
1128            collector.reg_late_use(operand_high);
1129            collector.reg_fixed_def(temp_low, regs::rbx());
1130            collector.reg_fixed_def(temp_high, regs::rcx());
1131            collector.reg_fixed_def(dst_old_low, regs::rax());
1132            collector.reg_fixed_def(dst_old_high, regs::rdx());
1133            mem_low.get_operands_late(collector);
1134            mem_high.get_operands_late(collector);
1135        }
1136
1137        Inst::Atomic128XchgSeq { args } => {
1138            let Atomic128XchgSeqArgs {
1139                mem_low,
1140                mem_high,
1141                operand_low,
1142                operand_high,
1143                dst_old_low,
1144                dst_old_high,
1145            } = &mut **args;
1146            // All registers are collected in the `Late` position so that they don't overlap.
1147            collector.reg_fixed_late_use(operand_low, regs::rbx());
1148            collector.reg_fixed_late_use(operand_high, regs::rcx());
1149            collector.reg_fixed_def(dst_old_low, regs::rax());
1150            collector.reg_fixed_def(dst_old_high, regs::rdx());
1151            mem_low.get_operands_late(collector);
1152            mem_high.get_operands_late(collector);
1153        }
1154
1155        Inst::Args { args } => {
1156            for ArgPair { vreg, preg } in args {
1157                collector.reg_fixed_def(vreg, *preg);
1158            }
1159        }
1160
1161        Inst::Rets { rets } => {
1162            // The return value(s) are live-out; we represent this
1163            // with register uses on the return instruction.
1164            for RetPair { vreg, preg } in rets {
1165                collector.reg_fixed_use(vreg, *preg);
1166            }
1167        }
1168
1169        Inst::JmpKnown { .. }
1170        | Inst::WinchJmpIf { .. }
1171        | Inst::JmpCond { .. }
1172        | Inst::JmpCondOr { .. }
1173        | Inst::TrapIf { .. }
1174        | Inst::TrapIfAnd { .. }
1175        | Inst::TrapIfOr { .. } => {
1176            // No registers are used.
1177        }
1178
1179        Inst::ElfTlsGetAddr { dst, .. } | Inst::MachOTlsGetAddr { dst, .. } => {
1180            collector.reg_fixed_def(dst, regs::rax());
1181            // All caller-saves are clobbered.
1182            //
1183            // We use the SysV calling convention here because the
1184            // pseudoinstruction (and relocation that it emits) is specific to
1185            // ELF systems; other x86-64 targets with other conventions (i.e.,
1186            // Windows) use different TLS strategies.
1187            let mut clobbers =
1188                X64ABIMachineSpec::get_regs_clobbered_by_call(CallConv::SystemV, false);
1189            clobbers.remove(regs::gpr_preg(asm::gpr::enc::RAX));
1190            collector.reg_clobbers(clobbers);
1191        }
1192
1193        Inst::CoffTlsGetAddr { dst, tmp, .. } => {
1194            // We also use the gs register. But that register is not allocatable by the
1195            // register allocator, so we don't need to mark it as used here.
1196
1197            // We use %rax to set the address
1198            collector.reg_fixed_def(dst, regs::rax());
1199
1200            // We use %rcx as a temporary variable to load the _tls_index
1201            collector.reg_fixed_def(tmp, regs::rcx());
1202        }
1203
1204        Inst::Unwind { .. } => {}
1205
1206        Inst::DummyUse { reg } => {
1207            collector.reg_use(reg);
1208        }
1209
1210        Inst::LabelAddress { dst, .. } => {
1211            collector.reg_def(dst);
1212        }
1213
1214        Inst::SequencePoint { .. } => {}
1215
1216        Inst::External { inst } => {
1217            inst.visit(&mut external::RegallocVisitor { collector });
1218        }
1219    }
1220}
1221
1222//=============================================================================
1223// Instructions: misc functions and external interface
1224
1225impl MachInst for Inst {
1226    type ABIMachineSpec = X64ABIMachineSpec;
1227
1228    fn get_operands(&mut self, collector: &mut impl OperandVisitor) {
1229        x64_get_operands(self, collector)
1230    }
1231
1232    fn is_move(&self) -> Option<(Writable<Reg>, Reg)> {
1233        use asm::inst::Inst as I;
1234        match self {
1235            // Note (carefully!) that a 32-bit mov *isn't* a no-op since it zeroes
1236            // out the upper 32 bits of the destination.  For example, we could
1237            // conceivably use `movl %reg, %reg` to zero out the top 32 bits of
1238            // %reg.
1239            Self::External {
1240                inst: I::movq_mr(asm::inst::movq_mr { rm64, r64 }),
1241            } => match rm64 {
1242                asm::GprMem::Gpr(reg) => Some((reg.map(|r| r.to_reg()), r64.as_ref().to_reg())),
1243                asm::GprMem::Mem(_) => None,
1244            },
1245            Self::External {
1246                inst: I::movq_rm(asm::inst::movq_rm { r64, rm64 }),
1247            } => match rm64 {
1248                asm::GprMem::Gpr(reg) => Some((r64.as_ref().map(|r| r.to_reg()), reg.to_reg())),
1249                asm::GprMem::Mem(_) => None,
1250            },
1251
1252            // Note that `movss_a_r` and `movsd_a_r` are specifically omitted
1253            // here because they only overwrite the low bits in the destination
1254            // register, otherwise preserving the upper bits. That can be used
1255            // for lane-insertion instructions, for example, meaning it's not
1256            // classified as a register move.
1257            //
1258            // Otherwise though all register-to-register movement instructions
1259            // which move 128-bits are registered as moves.
1260            Self::External {
1261                inst:
1262                    I::movaps_a(asm::inst::movaps_a { xmm1, xmm_m128 })
1263                    | I::movups_a(asm::inst::movups_a { xmm1, xmm_m128 })
1264                    | I::movapd_a(asm::inst::movapd_a { xmm1, xmm_m128 })
1265                    | I::movupd_a(asm::inst::movupd_a { xmm1, xmm_m128 })
1266                    | I::movdqa_a(asm::inst::movdqa_a { xmm1, xmm_m128 })
1267                    | I::movdqu_a(asm::inst::movdqu_a { xmm1, xmm_m128 }),
1268            } => match xmm_m128 {
1269                asm::XmmMem::Xmm(xmm2) => Some((xmm1.as_ref().map(|r| r.to_reg()), xmm2.to_reg())),
1270                asm::XmmMem::Mem(_) => None,
1271            },
1272            // In addition to the "A" format of instructions above also
1273            // recognize the "B" format which while it can be used for stores it
1274            // can also be used for register moves.
1275            Self::External {
1276                inst:
1277                    I::movaps_b(asm::inst::movaps_b { xmm_m128, xmm1 })
1278                    | I::movups_b(asm::inst::movups_b { xmm_m128, xmm1 })
1279                    | I::movapd_b(asm::inst::movapd_b { xmm_m128, xmm1 })
1280                    | I::movupd_b(asm::inst::movupd_b { xmm_m128, xmm1 })
1281                    | I::movdqa_b(asm::inst::movdqa_b { xmm_m128, xmm1 })
1282                    | I::movdqu_b(asm::inst::movdqu_b { xmm_m128, xmm1 }),
1283            } => match xmm_m128 {
1284                asm::XmmMem::Xmm(dst) => Some((dst.map(|r| r.to_reg()), xmm1.as_ref().to_reg())),
1285                asm::XmmMem::Mem(_) => None,
1286            },
1287            _ => None,
1288        }
1289    }
1290
1291    fn is_included_in_clobbers(&self) -> bool {
1292        match self {
1293            &Inst::Args { .. } => false,
1294            _ => true,
1295        }
1296    }
1297
1298    fn is_trap(&self) -> bool {
1299        match self {
1300            Self::External {
1301                inst: asm::inst::Inst::ud2_zo(..),
1302            } => true,
1303            _ => false,
1304        }
1305    }
1306
1307    fn is_args(&self) -> bool {
1308        match self {
1309            Self::Args { .. } => true,
1310            _ => false,
1311        }
1312    }
1313
1314    fn call_type(&self) -> CallType {
1315        match self {
1316            Inst::CallKnown { .. }
1317            | Inst::CallUnknown { .. }
1318            | Inst::ElfTlsGetAddr { .. }
1319            | Inst::MachOTlsGetAddr { .. } => CallType::Regular,
1320
1321            Inst::ReturnCallKnown { .. } | Inst::ReturnCallUnknown { .. } => CallType::TailCall,
1322
1323            _ => CallType::None,
1324        }
1325    }
1326
1327    fn is_term(&self) -> MachTerminator {
1328        match self {
1329            // Interesting cases.
1330            &Self::Rets { .. } => MachTerminator::Ret,
1331            &Self::ReturnCallKnown { .. } | &Self::ReturnCallUnknown { .. } => {
1332                MachTerminator::RetCall
1333            }
1334            &Self::JmpKnown { .. } => MachTerminator::Branch,
1335            &Self::JmpCond { .. } => MachTerminator::Branch,
1336            &Self::JmpCondOr { .. } => MachTerminator::Branch,
1337            &Self::JmpTableSeq { .. } => MachTerminator::Branch,
1338            &Self::CallKnown { ref info } if info.try_call_info.is_some() => MachTerminator::Branch,
1339            &Self::CallUnknown { ref info } if info.try_call_info.is_some() => {
1340                MachTerminator::Branch
1341            }
1342            // All other cases are boring.
1343            _ => MachTerminator::None,
1344        }
1345    }
1346
1347    fn is_low_level_branch(&self) -> bool {
1348        match self {
1349            &Self::WinchJmpIf { .. } => true,
1350            _ => false,
1351        }
1352    }
1353
1354    fn is_mem_access(&self) -> bool {
1355        panic!("TODO FILL ME OUT")
1356    }
1357
1358    fn gen_move(dst_reg: Writable<Reg>, src_reg: Reg, ty: Type) -> Inst {
1359        trace!(
1360            "Inst::gen_move {:?} -> {:?} (type: {:?})",
1361            src_reg,
1362            dst_reg.to_reg(),
1363            ty
1364        );
1365        let rc_dst = dst_reg.to_reg().class();
1366        let rc_src = src_reg.class();
1367        // If this isn't true, we have gone way off the rails.
1368        debug_assert!(rc_dst == rc_src);
1369        let inst = match rc_dst {
1370            RegClass::Int => {
1371                asm::inst::movq_mr::new(dst_reg.map(Gpr::unwrap_new), Gpr::unwrap_new(src_reg))
1372                    .into()
1373            }
1374            RegClass::Float => {
1375                // The Intel optimization manual, in "3.5.1.13 Zero-Latency MOV Instructions",
1376                // doesn't include MOVSS/MOVSD as instructions with zero-latency. Use movaps for
1377                // those, which may write more lanes that we need, but are specified to have
1378                // zero-latency.
1379                let dst_reg = dst_reg.map(|r| Xmm::new(r).unwrap());
1380                let src_reg = Xmm::new(src_reg).unwrap();
1381                match ty {
1382                    types::F16 | types::F32 | types::F64 | types::F32X4 => {
1383                        asm::inst::movaps_a::new(dst_reg, src_reg).into()
1384                    }
1385                    types::F64X2 => asm::inst::movapd_a::new(dst_reg, src_reg).into(),
1386                    _ if (ty.is_float() || ty.is_vector()) && ty.bits() <= 128 => {
1387                        asm::inst::movdqa_a::new(dst_reg, src_reg).into()
1388                    }
1389                    _ => unimplemented!("unable to move type: {}", ty),
1390                }
1391            }
1392            RegClass::Vector => unreachable!(),
1393        };
1394        Inst::External { inst }
1395    }
1396
1397    fn gen_nop(preferred_size: usize) -> Inst {
1398        Inst::nop(core::cmp::min(preferred_size, 9) as u8)
1399    }
1400
1401    fn gen_nop_units() -> Vec<Vec<u8>> {
1402        vec![
1403            // Standard 1-byte NOP.
1404            vec![0x90],
1405            // 5-byte NOP useful for patching out patchable calls.
1406            vec![0x0f, 0x1f, 0x44, 0x00, 0x00],
1407        ]
1408    }
1409
1410    fn rc_for_type(ty: Type) -> CodegenResult<(&'static [RegClass], &'static [Type])> {
1411        match ty {
1412            types::I8 => Ok((&[RegClass::Int], &[types::I8])),
1413            types::I16 => Ok((&[RegClass::Int], &[types::I16])),
1414            types::I32 => Ok((&[RegClass::Int], &[types::I32])),
1415            types::I64 => Ok((&[RegClass::Int], &[types::I64])),
1416            types::F16 => Ok((&[RegClass::Float], &[types::F16])),
1417            types::F32 => Ok((&[RegClass::Float], &[types::F32])),
1418            types::F64 => Ok((&[RegClass::Float], &[types::F64])),
1419            types::F128 => Ok((&[RegClass::Float], &[types::F128])),
1420            types::I128 => Ok((&[RegClass::Int, RegClass::Int], &[types::I64, types::I64])),
1421            _ if ty.is_vector() && ty.bits() <= 128 => {
1422                let types = &[types::I8X2, types::I8X4, types::I8X8, types::I8X16];
1423                Ok((
1424                    &[RegClass::Float],
1425                    slice::from_ref(&types[ty.bytes().ilog2() as usize - 1]),
1426                ))
1427            }
1428            _ => Err(CodegenError::Unsupported(format!(
1429                "Unexpected SSA-value type: {ty}"
1430            ))),
1431        }
1432    }
1433
1434    fn canonical_type_for_rc(rc: RegClass) -> Type {
1435        match rc {
1436            RegClass::Float => types::I8X16,
1437            RegClass::Int => types::I64,
1438            RegClass::Vector => unreachable!(),
1439        }
1440    }
1441
1442    fn gen_jump(label: MachLabel) -> Inst {
1443        Inst::jmp_known(label)
1444    }
1445
1446    fn gen_imm_u64(value: u64, dst: Writable<Reg>) -> Option<Self> {
1447        Some(Inst::imm(OperandSize::Size64, value, dst))
1448    }
1449
1450    fn gen_imm_f64(value: f64, tmp: Writable<Reg>, dst: Writable<Reg>) -> SmallVec<[Self; 2]> {
1451        let imm_to_gpr = Inst::imm(OperandSize::Size64, value.to_bits(), tmp);
1452        let gpr_to_xmm = Inst::External {
1453            inst: asm::inst::movq_a::new(dst.map(|r| Xmm::new(r).unwrap()), tmp.to_reg()).into(),
1454        };
1455        smallvec![imm_to_gpr, gpr_to_xmm]
1456    }
1457
1458    fn gen_dummy_use(reg: Reg) -> Self {
1459        Inst::DummyUse { reg }
1460    }
1461
1462    fn worst_case_size() -> CodeOffset {
1463        15
1464    }
1465
1466    fn ref_type_regclass(_: &settings::Flags) -> RegClass {
1467        RegClass::Int
1468    }
1469
1470    fn is_safepoint(&self) -> bool {
1471        match self {
1472            Inst::CallKnown { .. } | Inst::CallUnknown { .. } => true,
1473            _ => false,
1474        }
1475    }
1476
1477    fn function_alignment() -> FunctionAlignment {
1478        FunctionAlignment {
1479            minimum: 1,
1480            // Change the alignment from 16-bytes to 32-bytes for better performance.
1481            // fix-8573: https://github.com/bytecodealliance/wasmtime/issues/8573
1482            preferred: 32,
1483        }
1484    }
1485
1486    type LabelUse = LabelUse;
1487
1488    const TRAP_OPCODE: &'static [u8] = &[0x0f, 0x0b];
1489}
1490
1491/// Constant state used during emissions of a sequence of instructions.
1492pub struct EmitInfo {
1493    pub(super) flags: settings::Flags,
1494    isa_flags: x64_settings::Flags,
1495}
1496
1497impl EmitInfo {
1498    /// Create a constant state for emission of instructions.
1499    pub fn new(flags: settings::Flags, isa_flags: x64_settings::Flags) -> Self {
1500        Self { flags, isa_flags }
1501    }
1502}
1503
1504impl asm::AvailableFeatures for &EmitInfo {
1505    fn _64b(&self) -> bool {
1506        // Currently, this x64 backend always assumes 64-bit mode.
1507        true
1508    }
1509
1510    fn compat(&self) -> bool {
1511        // For 32-bit compatibility mode, see
1512        // https://github.com/bytecodealliance/wasmtime/issues/1980 (TODO).
1513        false
1514    }
1515
1516    fn sse(&self) -> bool {
1517        // Currently, this x64 backend always assumes SSE.
1518        true
1519    }
1520
1521    fn sse2(&self) -> bool {
1522        // Currently, this x64 backend always assumes SSE2.
1523        true
1524    }
1525
1526    fn sse3(&self) -> bool {
1527        self.isa_flags.has_sse3()
1528    }
1529
1530    fn ssse3(&self) -> bool {
1531        self.isa_flags.has_ssse3()
1532    }
1533
1534    fn sse41(&self) -> bool {
1535        self.isa_flags.has_sse41()
1536    }
1537
1538    fn sse42(&self) -> bool {
1539        self.isa_flags.has_sse42()
1540    }
1541
1542    fn bmi1(&self) -> bool {
1543        self.isa_flags.has_bmi1()
1544    }
1545
1546    fn bmi2(&self) -> bool {
1547        self.isa_flags.has_bmi2()
1548    }
1549
1550    fn lzcnt(&self) -> bool {
1551        self.isa_flags.has_lzcnt()
1552    }
1553
1554    fn popcnt(&self) -> bool {
1555        self.isa_flags.has_popcnt()
1556    }
1557
1558    fn avx(&self) -> bool {
1559        self.isa_flags.has_avx()
1560    }
1561
1562    fn avx2(&self) -> bool {
1563        self.isa_flags.has_avx2()
1564    }
1565
1566    fn avx512f(&self) -> bool {
1567        self.isa_flags.has_avx512f()
1568    }
1569
1570    fn avx512vl(&self) -> bool {
1571        self.isa_flags.has_avx512vl()
1572    }
1573
1574    fn cmpxchg16b(&self) -> bool {
1575        self.isa_flags.has_cmpxchg16b()
1576    }
1577
1578    fn fma(&self) -> bool {
1579        self.isa_flags.has_fma()
1580    }
1581
1582    fn avx512dq(&self) -> bool {
1583        self.isa_flags.has_avx512dq()
1584    }
1585
1586    fn avx512bitalg(&self) -> bool {
1587        self.isa_flags.has_avx512bitalg()
1588    }
1589
1590    fn avx512vbmi(&self) -> bool {
1591        self.isa_flags.has_avx512vbmi()
1592    }
1593}
1594
1595impl MachInstEmit for Inst {
1596    type State = EmitState;
1597    type Info = EmitInfo;
1598
1599    fn emit(&self, sink: &mut MachBuffer<Inst>, info: &Self::Info, state: &mut Self::State) {
1600        emit::emit(self, sink, info, state);
1601    }
1602
1603    fn pretty_print_inst(&self, _: &mut Self::State) -> String {
1604        PrettyPrint::pretty_print(self, 0)
1605    }
1606}
1607
1608/// A label-use (internal relocation) in generated code.
1609#[derive(Clone, Copy, Debug, PartialEq, Eq)]
1610pub enum LabelUse {
1611    /// A 32-bit offset from location of relocation itself, added to the existing value at that
1612    /// location. Used for control flow instructions which consider an offset from the start of the
1613    /// next instruction (so the size of the payload -- 4 bytes -- is subtracted from the payload).
1614    JmpRel32,
1615
1616    /// A 32-bit offset from location of relocation itself, added to the existing value at that
1617    /// location.
1618    PCRel32,
1619}
1620
1621impl MachInstLabelUse for LabelUse {
1622    const ALIGN: CodeOffset = 1;
1623
1624    fn max_pos_range(self) -> CodeOffset {
1625        match self {
1626            LabelUse::JmpRel32 | LabelUse::PCRel32 => 0x7fff_ffff,
1627        }
1628    }
1629
1630    fn max_neg_range(self) -> CodeOffset {
1631        match self {
1632            LabelUse::JmpRel32 | LabelUse::PCRel32 => 0x8000_0000,
1633        }
1634    }
1635
1636    fn patch_size(self) -> CodeOffset {
1637        match self {
1638            LabelUse::JmpRel32 | LabelUse::PCRel32 => 4,
1639        }
1640    }
1641
1642    fn patch(self, buffer: &mut [u8], use_offset: CodeOffset, label_offset: CodeOffset) {
1643        let pc_rel = (label_offset as i64) - (use_offset as i64);
1644        debug_assert!(pc_rel <= self.max_pos_range() as i64);
1645        debug_assert!(pc_rel >= -(self.max_neg_range() as i64));
1646        let pc_rel = pc_rel as u32;
1647        match self {
1648            LabelUse::JmpRel32 => {
1649                let addend = u32::from_le_bytes([buffer[0], buffer[1], buffer[2], buffer[3]]);
1650                let value = pc_rel.wrapping_add(addend).wrapping_sub(4);
1651                buffer.copy_from_slice(&value.to_le_bytes()[..]);
1652            }
1653            LabelUse::PCRel32 => {
1654                let addend = u32::from_le_bytes([buffer[0], buffer[1], buffer[2], buffer[3]]);
1655                let value = pc_rel.wrapping_add(addend);
1656                buffer.copy_from_slice(&value.to_le_bytes()[..]);
1657            }
1658        }
1659    }
1660
1661    fn supports_veneer(self) -> bool {
1662        match self {
1663            LabelUse::JmpRel32 | LabelUse::PCRel32 => false,
1664        }
1665    }
1666
1667    fn veneer_size(self) -> CodeOffset {
1668        match self {
1669            LabelUse::JmpRel32 | LabelUse::PCRel32 => 0,
1670        }
1671    }
1672
1673    fn worst_case_veneer_size() -> CodeOffset {
1674        0
1675    }
1676
1677    fn generate_veneer(self, _: &mut [u8], _: CodeOffset) -> (CodeOffset, LabelUse) {
1678        match self {
1679            LabelUse::JmpRel32 | LabelUse::PCRel32 => {
1680                panic!("Veneer not supported for JumpRel32 label-use.");
1681            }
1682        }
1683    }
1684
1685    fn from_reloc(reloc: Reloc, addend: Addend) -> Option<Self> {
1686        match (reloc, addend) {
1687            (Reloc::X86CallPCRel4, -4) => Some(LabelUse::JmpRel32),
1688            _ => None,
1689        }
1690    }
1691}