cranelift_codegen/isa/x64/inst/
mod.rs

1//! This module defines x86_64-specific machine instruction types.
2
3pub use emit_state::EmitState;
4
5use crate::binemit::{Addend, CodeOffset, Reloc};
6use crate::ir::{ExternalName, LibCall, TrapCode, Type, types};
7use crate::isa::x64::abi::X64ABIMachineSpec;
8use crate::isa::x64::inst::regs::pretty_print_reg;
9use crate::isa::x64::settings as x64_settings;
10use crate::isa::{CallConv, FunctionAlignment};
11use crate::{CodegenError, CodegenResult, settings};
12use crate::{machinst::*, trace};
13use alloc::boxed::Box;
14use core::slice;
15use cranelift_assembler_x64 as asm;
16use smallvec::{SmallVec, smallvec};
17use std::fmt::{self, Write};
18use std::string::{String, ToString};
19
20pub mod args;
21mod emit;
22mod emit_state;
23#[cfg(test)]
24mod emit_tests;
25pub mod external;
26pub mod regs;
27mod stack_switch;
28pub mod unwind;
29
30use args::*;
31
32//=============================================================================
33// Instructions (top level): definition
34
35// `Inst` is defined inside ISLE as `MInst`. We publicly re-export it here.
36pub use super::lower::isle::generated_code::AtomicRmwSeqOp;
37pub use super::lower::isle::generated_code::MInst as Inst;
38
39/// Out-of-line data for return-calls, to keep the size of `Inst` down.
40#[derive(Clone, Debug)]
41pub struct ReturnCallInfo<T> {
42    /// Where this call is going.
43    pub dest: T,
44
45    /// The size of the argument area for this return-call, potentially smaller than that of the
46    /// caller, but never larger.
47    pub new_stack_arg_size: u32,
48
49    /// The in-register arguments and their constraints.
50    pub uses: CallArgList,
51
52    /// A temporary for use when moving the return address.
53    pub tmp: WritableGpr,
54}
55
56#[test]
57#[cfg(target_pointer_width = "64")]
58fn inst_size_test() {
59    // This test will help with unintentionally growing the size
60    // of the Inst enum.
61    assert_eq!(48, std::mem::size_of::<Inst>());
62}
63
64impl Inst {
65    /// Check if the instruction (or pseudo-instruction) can be emitted given
66    /// the current target architecture given by `emit_info`. For non-assembler
67    /// instructions, this assumes a baseline feature set (i.e., 64-bit AND SSE2
68    /// and below).
69    fn is_available(&self, emit_info: &EmitInfo) -> bool {
70        use asm::AvailableFeatures;
71
72        match self {
73            // These instructions are part of SSE2, which is a basic requirement
74            // in Cranelift, and don't have to be checked.
75            Inst::AtomicRmwSeq { .. }
76            | Inst::CallKnown { .. }
77            | Inst::CallUnknown { .. }
78            | Inst::ReturnCallKnown { .. }
79            | Inst::ReturnCallUnknown { .. }
80            | Inst::CheckedSRemSeq { .. }
81            | Inst::CheckedSRemSeq8 { .. }
82            | Inst::CvtFloatToSintSeq { .. }
83            | Inst::CvtFloatToUintSeq { .. }
84            | Inst::CvtUint64ToFloatSeq { .. }
85            | Inst::JmpCond { .. }
86            | Inst::JmpCondOr { .. }
87            | Inst::WinchJmpIf { .. }
88            | Inst::JmpKnown { .. }
89            | Inst::JmpTableSeq { .. }
90            | Inst::LoadExtName { .. }
91            | Inst::MovFromPReg { .. }
92            | Inst::MovToPReg { .. }
93            | Inst::StackProbeLoop { .. }
94            | Inst::Args { .. }
95            | Inst::Rets { .. }
96            | Inst::StackSwitchBasic { .. }
97            | Inst::TrapIf { .. }
98            | Inst::TrapIfAnd { .. }
99            | Inst::TrapIfOr { .. }
100            | Inst::XmmCmove { .. }
101            | Inst::XmmMinMaxSeq { .. }
102            | Inst::XmmUninitializedValue { .. }
103            | Inst::GprUninitializedValue { .. }
104            | Inst::ElfTlsGetAddr { .. }
105            | Inst::MachOTlsGetAddr { .. }
106            | Inst::CoffTlsGetAddr { .. }
107            | Inst::Unwind { .. }
108            | Inst::DummyUse { .. }
109            | Inst::LabelAddress { .. } => true,
110
111            Inst::Atomic128RmwSeq { .. } | Inst::Atomic128XchgSeq { .. } => emit_info.cmpxchg16b(),
112
113            Inst::External { inst } => inst.is_available(&emit_info),
114        }
115    }
116}
117
118// Handy constructors for Insts.
119
120impl Inst {
121    pub(crate) fn nop(len: u8) -> Self {
122        assert!(len > 0 && len <= 9);
123        let inst = match len {
124            1 => asm::inst::nop_1b::new().into(),
125            2 => asm::inst::nop_2b::new().into(),
126            3 => asm::inst::nop_3b::new().into(),
127            4 => asm::inst::nop_4b::new().into(),
128            5 => asm::inst::nop_5b::new().into(),
129            6 => asm::inst::nop_6b::new().into(),
130            7 => asm::inst::nop_7b::new().into(),
131            8 => asm::inst::nop_8b::new().into(),
132            9 => asm::inst::nop_9b::new().into(),
133            _ => unreachable!("nop length must be between 1 and 9"),
134        };
135        Self::External { inst }
136    }
137
138    pub(crate) fn addq_mi(dst: Writable<Reg>, simm32: i32) -> Self {
139        let inst = if let Ok(simm8) = i8::try_from(simm32) {
140            asm::inst::addq_mi_sxb::new(dst, simm8).into()
141        } else {
142            asm::inst::addq_mi_sxl::new(dst, simm32).into()
143        };
144        Inst::External { inst }
145    }
146
147    pub(crate) fn subq_mi(dst: Writable<Reg>, simm32: i32) -> Self {
148        let inst = if let Ok(simm8) = i8::try_from(simm32) {
149            asm::inst::subq_mi_sxb::new(dst, simm8).into()
150        } else {
151            asm::inst::subq_mi_sxl::new(dst, simm32).into()
152        };
153        Inst::External { inst }
154    }
155
156    /// Writes the `simm64` immedaite into `dst`.
157    ///
158    /// Note that if `dst_size` is less than 64-bits then the upper bits of
159    /// `simm64` will be converted to zero.
160    pub fn imm(dst_size: OperandSize, simm64: u64, dst: Writable<Reg>) -> Inst {
161        debug_assert!(dst_size.is_one_of(&[OperandSize::Size32, OperandSize::Size64]));
162        debug_assert!(dst.to_reg().class() == RegClass::Int);
163        let dst = WritableGpr::from_writable_reg(dst).unwrap();
164        let inst = match dst_size {
165            OperandSize::Size64 => match u32::try_from(simm64) {
166                // If `simm64` is zero-extended use `movl` which zeros the
167                // upper bits.
168                Ok(imm32) => asm::inst::movl_oi::new(dst, imm32).into(),
169                _ => match i32::try_from(simm64.cast_signed()) {
170                    // If `simm64` is sign-extended use `movq` which sign the
171                    // upper bits.
172                    Ok(simm32) => asm::inst::movq_mi_sxl::new(dst, simm32).into(),
173                    // fall back to embedding the entire immediate.
174                    _ => asm::inst::movabsq_oi::new(dst, simm64).into(),
175                },
176            },
177            // FIXME: the input to this function is a logical `simm64` stored
178            // as `u64`. That means that ideally what we would do here is cast
179            // the `simm64` to an `i64`, perform a `i32::try_from()`, then cast
180            // that back to `u32`. That would ensure that the immediate loses
181            // no meaning and has the same logical value. Currently though
182            // Cranelift relies on discarding the upper bits because literals
183            // like `0x8000_0000_u64` fail to convert to an `i32`. In theory
184            // the input to this function should change to `i64`. In the
185            // meantime this is documented as discarding the upper bits,
186            // although this is an old function so that's unlikely to help
187            // much.
188            _ => asm::inst::movl_oi::new(dst, simm64 as u32).into(),
189        };
190        Inst::External { inst }
191    }
192
193    pub(crate) fn movzx_rm_r(ext_mode: ExtMode, src: RegMem, dst: Writable<Reg>) -> Inst {
194        src.assert_regclass_is(RegClass::Int);
195        debug_assert!(dst.to_reg().class() == RegClass::Int);
196        let src = match src {
197            RegMem::Reg { reg } => asm::GprMem::Gpr(Gpr::new(reg).unwrap()),
198            RegMem::Mem { addr } => asm::GprMem::Mem(addr.into()),
199        };
200        let inst = match ext_mode {
201            ExtMode::BL => asm::inst::movzbl_rm::new(dst, src).into(),
202            ExtMode::BQ => asm::inst::movzbq_rm::new(dst, src).into(),
203            ExtMode::WL => asm::inst::movzwl_rm::new(dst, src).into(),
204            ExtMode::WQ => asm::inst::movzwq_rm::new(dst, src).into(),
205            ExtMode::LQ => {
206                // This instruction selection may seem strange but is correct in
207                // 64-bit mode: section 3.4.1.1 of the Intel manual says that
208                // "32-bit operands generate a 32-bit result, zero-extended to a
209                // 64-bit result in the destination general-purpose register."
210                // This is applicable beyond `mov` but we use this fact to
211                // zero-extend `src` into `dst`.
212                asm::inst::movl_rm::new(dst, src).into()
213            }
214        };
215        Inst::External { inst }
216    }
217
218    pub(crate) fn movsx_rm_r(ext_mode: ExtMode, src: RegMem, dst: Writable<Reg>) -> Inst {
219        src.assert_regclass_is(RegClass::Int);
220        debug_assert!(dst.to_reg().class() == RegClass::Int);
221        let src = match src {
222            RegMem::Reg { reg } => asm::GprMem::Gpr(Gpr::new(reg).unwrap()),
223            RegMem::Mem { addr } => asm::GprMem::Mem(addr.into()),
224        };
225        let inst = match ext_mode {
226            ExtMode::BL => asm::inst::movsbl_rm::new(dst, src).into(),
227            ExtMode::BQ => asm::inst::movsbq_rm::new(dst, src).into(),
228            ExtMode::WL => asm::inst::movswl_rm::new(dst, src).into(),
229            ExtMode::WQ => asm::inst::movswq_rm::new(dst, src).into(),
230            ExtMode::LQ => asm::inst::movslq_rm::new(dst, src).into(),
231        };
232        Inst::External { inst }
233    }
234
235    /// Compares `src1` against `src2`
236    pub(crate) fn cmp_mi_sxb(size: OperandSize, src1: Gpr, src2: i8) -> Inst {
237        let inst = match size {
238            OperandSize::Size8 => asm::inst::cmpb_mi::new(src1, src2.cast_unsigned()).into(),
239            OperandSize::Size16 => asm::inst::cmpw_mi_sxb::new(src1, src2).into(),
240            OperandSize::Size32 => asm::inst::cmpl_mi_sxb::new(src1, src2).into(),
241            OperandSize::Size64 => asm::inst::cmpq_mi_sxb::new(src1, src2).into(),
242        };
243        Inst::External { inst }
244    }
245
246    pub(crate) fn trap_if(cc: CC, trap_code: TrapCode) -> Inst {
247        Inst::TrapIf { cc, trap_code }
248    }
249
250    pub(crate) fn call_known(info: Box<CallInfo<ExternalName>>) -> Inst {
251        Inst::CallKnown { info }
252    }
253
254    pub(crate) fn call_unknown(info: Box<CallInfo<RegMem>>) -> Inst {
255        info.dest.assert_regclass_is(RegClass::Int);
256        Inst::CallUnknown { info }
257    }
258
259    pub(crate) fn jmp_known(dst: MachLabel) -> Inst {
260        Inst::JmpKnown { dst }
261    }
262
263    /// Choose which instruction to use for loading a register value from memory. For loads smaller
264    /// than 64 bits, this method expects a way to extend the value (i.e. [ExtKind::SignExtend],
265    /// [ExtKind::ZeroExtend]); loads with no extension necessary will ignore this.
266    pub(crate) fn load(
267        ty: Type,
268        from_addr: impl Into<SyntheticAmode>,
269        to_reg: Writable<Reg>,
270        ext_kind: ExtKind,
271    ) -> Inst {
272        let rc = to_reg.to_reg().class();
273        match rc {
274            RegClass::Int => {
275                let ext_mode = match ty.bytes() {
276                    1 => Some(ExtMode::BQ),
277                    2 => Some(ExtMode::WQ),
278                    4 => Some(ExtMode::LQ),
279                    8 => None,
280                    _ => unreachable!("the type should never use a scalar load: {}", ty),
281                };
282                if let Some(ext_mode) = ext_mode {
283                    // Values smaller than 64 bits must be extended in some way.
284                    match ext_kind {
285                        ExtKind::SignExtend => {
286                            Inst::movsx_rm_r(ext_mode, RegMem::mem(from_addr), to_reg)
287                        }
288                        ExtKind::ZeroExtend => {
289                            Inst::movzx_rm_r(ext_mode, RegMem::mem(from_addr), to_reg)
290                        }
291                        ExtKind::None => {
292                            panic!("expected an extension kind for extension mode: {ext_mode:?}")
293                        }
294                    }
295                } else {
296                    // 64-bit values can be moved directly.
297                    let from_addr = asm::GprMem::from(from_addr.into());
298                    Inst::External {
299                        inst: asm::inst::movq_rm::new(to_reg, from_addr).into(),
300                    }
301                }
302            }
303            RegClass::Float => {
304                let to_reg = to_reg.map(|r| Xmm::new(r).unwrap());
305                let from_addr = from_addr.into();
306                let inst = match ty {
307                    types::F16 | types::I8X2 => {
308                        panic!("loading a f16 or i8x2 requires multiple instructions")
309                    }
310                    _ if (ty.is_float() || ty.is_vector()) && ty.bits() == 32 => {
311                        asm::inst::movss_a_m::new(to_reg, from_addr).into()
312                    }
313                    _ if (ty.is_float() || ty.is_vector()) && ty.bits() == 64 => {
314                        asm::inst::movsd_a_m::new(to_reg, from_addr).into()
315                    }
316                    types::F32X4 => asm::inst::movups_a::new(to_reg, from_addr).into(),
317                    types::F64X2 => asm::inst::movupd_a::new(to_reg, from_addr).into(),
318                    _ if (ty.is_float() || ty.is_vector()) && ty.bits() == 128 => {
319                        asm::inst::movdqu_a::new(to_reg, from_addr).into()
320                    }
321                    _ => unimplemented!("unable to load type: {}", ty),
322                };
323                Inst::External { inst }
324            }
325            RegClass::Vector => unreachable!(),
326        }
327    }
328
329    /// Choose which instruction to use for storing a register value to memory.
330    pub(crate) fn store(ty: Type, from_reg: Reg, to_addr: impl Into<SyntheticAmode>) -> Inst {
331        let rc = from_reg.class();
332        let to_addr = to_addr.into();
333        let inst = match rc {
334            RegClass::Int => {
335                let from_reg = Gpr::unwrap_new(from_reg);
336                match ty {
337                    types::I8 => asm::inst::movb_mr::new(to_addr, from_reg).into(),
338                    types::I16 => asm::inst::movw_mr::new(to_addr, from_reg).into(),
339                    types::I32 => asm::inst::movl_mr::new(to_addr, from_reg).into(),
340                    types::I64 => asm::inst::movq_mr::new(to_addr, from_reg).into(),
341                    _ => unreachable!(),
342                }
343            }
344            RegClass::Float => {
345                let from_reg = Xmm::new(from_reg).unwrap();
346                match ty {
347                    types::F16 | types::I8X2 => {
348                        panic!("storing a f16 or i8x2 requires multiple instructions")
349                    }
350                    _ if (ty.is_float() || ty.is_vector()) && ty.bits() == 32 => {
351                        asm::inst::movss_c_m::new(to_addr, from_reg).into()
352                    }
353                    _ if (ty.is_float() || ty.is_vector()) && ty.bits() == 64 => {
354                        asm::inst::movsd_c_m::new(to_addr, from_reg).into()
355                    }
356                    types::F32X4 => asm::inst::movups_b::new(to_addr, from_reg).into(),
357                    types::F64X2 => asm::inst::movupd_b::new(to_addr, from_reg).into(),
358                    _ if (ty.is_float() || ty.is_vector()) && ty.bits() == 128 => {
359                        asm::inst::movdqu_b::new(to_addr, from_reg).into()
360                    }
361                    _ => unimplemented!("unable to store type: {}", ty),
362                }
363            }
364            RegClass::Vector => unreachable!(),
365        };
366        Inst::External { inst }
367    }
368}
369
370//=============================================================================
371// Instructions: printing
372
373impl PrettyPrint for Inst {
374    fn pretty_print(&self, _size: u8) -> String {
375        fn ljustify(s: String) -> String {
376            let w = 7;
377            if s.len() >= w {
378                s
379            } else {
380                let need = usize::min(w, w - s.len());
381                s + &format!("{nil: <width$}", nil = "", width = need)
382            }
383        }
384
385        fn ljustify2(s1: String, s2: String) -> String {
386            ljustify(s1 + &s2)
387        }
388
389        match self {
390            Inst::CheckedSRemSeq {
391                size,
392                divisor,
393                dividend_lo,
394                dividend_hi,
395                dst_quotient,
396                dst_remainder,
397            } => {
398                let divisor = pretty_print_reg(divisor.to_reg(), size.to_bytes());
399                let dividend_lo = pretty_print_reg(dividend_lo.to_reg(), size.to_bytes());
400                let dividend_hi = pretty_print_reg(dividend_hi.to_reg(), size.to_bytes());
401                let dst_quotient =
402                    pretty_print_reg(dst_quotient.to_reg().to_reg(), size.to_bytes());
403                let dst_remainder =
404                    pretty_print_reg(dst_remainder.to_reg().to_reg(), size.to_bytes());
405                format!(
406                    "checked_srem_seq {dividend_lo}, {dividend_hi}, \
407                        {divisor}, {dst_quotient}, {dst_remainder}",
408                )
409            }
410
411            Inst::CheckedSRemSeq8 {
412                divisor,
413                dividend,
414                dst,
415            } => {
416                let divisor = pretty_print_reg(divisor.to_reg(), 1);
417                let dividend = pretty_print_reg(dividend.to_reg(), 1);
418                let dst = pretty_print_reg(dst.to_reg().to_reg(), 1);
419                format!("checked_srem_seq {dividend}, {divisor}, {dst}")
420            }
421
422            Inst::XmmMinMaxSeq {
423                lhs,
424                rhs,
425                dst,
426                is_min,
427                size,
428            } => {
429                let rhs = pretty_print_reg(rhs.to_reg(), 8);
430                let lhs = pretty_print_reg(lhs.to_reg(), 8);
431                let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);
432                let op = ljustify2(
433                    if *is_min {
434                        "xmm min seq ".to_string()
435                    } else {
436                        "xmm max seq ".to_string()
437                    },
438                    format!("f{}", size.to_bits()),
439                );
440                format!("{op} {lhs}, {rhs}, {dst}")
441            }
442
443            Inst::XmmUninitializedValue { dst } => {
444                let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);
445                let op = ljustify("uninit".into());
446                format!("{op} {dst}")
447            }
448
449            Inst::GprUninitializedValue { dst } => {
450                let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);
451                let op = ljustify("uninit".into());
452                format!("{op} {dst}")
453            }
454
455            Inst::CvtUint64ToFloatSeq {
456                src,
457                dst,
458                dst_size,
459                tmp_gpr1,
460                tmp_gpr2,
461                ..
462            } => {
463                let src = pretty_print_reg(src.to_reg(), 8);
464                let dst = pretty_print_reg(dst.to_reg().to_reg(), dst_size.to_bytes());
465                let tmp_gpr1 = pretty_print_reg(tmp_gpr1.to_reg().to_reg(), 8);
466                let tmp_gpr2 = pretty_print_reg(tmp_gpr2.to_reg().to_reg(), 8);
467                let op = ljustify(format!(
468                    "u64_to_{}_seq",
469                    if *dst_size == OperandSize::Size64 {
470                        "f64"
471                    } else {
472                        "f32"
473                    }
474                ));
475                format!("{op} {src}, {dst}, {tmp_gpr1}, {tmp_gpr2}")
476            }
477
478            Inst::CvtFloatToSintSeq {
479                src,
480                dst,
481                src_size,
482                dst_size,
483                tmp_xmm,
484                tmp_gpr,
485                is_saturating,
486            } => {
487                let src = pretty_print_reg(src.to_reg(), src_size.to_bytes());
488                let dst = pretty_print_reg(dst.to_reg().to_reg(), dst_size.to_bytes());
489                let tmp_gpr = pretty_print_reg(tmp_gpr.to_reg().to_reg(), 8);
490                let tmp_xmm = pretty_print_reg(tmp_xmm.to_reg().to_reg(), 8);
491                let op = ljustify(format!(
492                    "cvt_float{}_to_sint{}{}_seq",
493                    src_size.to_bits(),
494                    dst_size.to_bits(),
495                    if *is_saturating { "_sat" } else { "" },
496                ));
497                format!("{op} {src}, {dst}, {tmp_gpr}, {tmp_xmm}")
498            }
499
500            Inst::CvtFloatToUintSeq {
501                src,
502                dst,
503                src_size,
504                dst_size,
505                tmp_gpr,
506                tmp_xmm,
507                tmp_xmm2,
508                is_saturating,
509            } => {
510                let src = pretty_print_reg(src.to_reg(), src_size.to_bytes());
511                let dst = pretty_print_reg(dst.to_reg().to_reg(), dst_size.to_bytes());
512                let tmp_gpr = pretty_print_reg(tmp_gpr.to_reg().to_reg(), 8);
513                let tmp_xmm = pretty_print_reg(tmp_xmm.to_reg().to_reg(), 8);
514                let tmp_xmm2 = pretty_print_reg(tmp_xmm2.to_reg().to_reg(), 8);
515                let op = ljustify(format!(
516                    "cvt_float{}_to_uint{}{}_seq",
517                    src_size.to_bits(),
518                    dst_size.to_bits(),
519                    if *is_saturating { "_sat" } else { "" },
520                ));
521                format!("{op} {src}, {dst}, {tmp_gpr}, {tmp_xmm}, {tmp_xmm2}")
522            }
523
524            Inst::MovFromPReg { src, dst } => {
525                let src: Reg = (*src).into();
526                let src = pretty_print_reg(src, 8);
527                let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);
528                let op = ljustify("movq".to_string());
529                format!("{op} {src}, {dst}")
530            }
531
532            Inst::MovToPReg { src, dst } => {
533                let src = pretty_print_reg(src.to_reg(), 8);
534                let dst: Reg = (*dst).into();
535                let dst = pretty_print_reg(dst, 8);
536                let op = ljustify("movq".to_string());
537                format!("{op} {src}, {dst}")
538            }
539
540            Inst::XmmCmove {
541                ty,
542                cc,
543                consequent,
544                alternative,
545                dst,
546                ..
547            } => {
548                let size = u8::try_from(ty.bytes()).unwrap();
549                let alternative = pretty_print_reg(alternative.to_reg(), size);
550                let dst = pretty_print_reg(dst.to_reg().to_reg(), size);
551                let consequent = pretty_print_reg(consequent.to_reg(), size);
552                let suffix = match *ty {
553                    types::F64 => "sd",
554                    types::F32 => "ss",
555                    types::F16 => "ss",
556                    types::F32X4 => "aps",
557                    types::F64X2 => "apd",
558                    _ => "dqa",
559                };
560                let cc = cc.invert();
561                format!(
562                    "mov{suffix} {alternative}, {dst}; \
563                    j{cc} $next; \
564                    mov{suffix} {consequent}, {dst}; \
565                    $next:"
566                )
567            }
568
569            Inst::StackProbeLoop {
570                tmp,
571                frame_size,
572                guard_size,
573            } => {
574                let tmp = pretty_print_reg(tmp.to_reg(), 8);
575                let op = ljustify("stack_probe_loop".to_string());
576                format!("{op} {tmp}, frame_size={frame_size}, guard_size={guard_size}")
577            }
578
579            Inst::CallKnown { info } => {
580                let op = ljustify("call".to_string());
581                let try_call = info
582                    .try_call_info
583                    .as_ref()
584                    .map(|tci| pretty_print_try_call(tci))
585                    .unwrap_or_default();
586                format!("{op} {:?}{try_call}", info.dest)
587            }
588
589            Inst::CallUnknown { info } => {
590                let dest = info.dest.pretty_print(8);
591                let op = ljustify("call".to_string());
592                let try_call = info
593                    .try_call_info
594                    .as_ref()
595                    .map(|tci| pretty_print_try_call(tci))
596                    .unwrap_or_default();
597                format!("{op} *{dest}{try_call}")
598            }
599
600            Inst::ReturnCallKnown { info } => {
601                let ReturnCallInfo {
602                    uses,
603                    new_stack_arg_size,
604                    tmp,
605                    dest,
606                } = &**info;
607                let tmp = pretty_print_reg(tmp.to_reg().to_reg(), 8);
608                let mut s = format!("return_call_known {dest:?} ({new_stack_arg_size}) tmp={tmp}");
609                for ret in uses {
610                    let preg = pretty_print_reg(ret.preg, 8);
611                    let vreg = pretty_print_reg(ret.vreg, 8);
612                    write!(&mut s, " {vreg}={preg}").unwrap();
613                }
614                s
615            }
616
617            Inst::ReturnCallUnknown { info } => {
618                let ReturnCallInfo {
619                    uses,
620                    new_stack_arg_size,
621                    tmp,
622                    dest,
623                } = &**info;
624                let callee = pretty_print_reg(*dest, 8);
625                let tmp = pretty_print_reg(tmp.to_reg().to_reg(), 8);
626                let mut s =
627                    format!("return_call_unknown {callee} ({new_stack_arg_size}) tmp={tmp}");
628                for ret in uses {
629                    let preg = pretty_print_reg(ret.preg, 8);
630                    let vreg = pretty_print_reg(ret.vreg, 8);
631                    write!(&mut s, " {vreg}={preg}").unwrap();
632                }
633                s
634            }
635
636            Inst::Args { args } => {
637                let mut s = "args".to_string();
638                for arg in args {
639                    let preg = pretty_print_reg(arg.preg, 8);
640                    let def = pretty_print_reg(arg.vreg.to_reg(), 8);
641                    write!(&mut s, " {def}={preg}").unwrap();
642                }
643                s
644            }
645
646            Inst::Rets { rets } => {
647                let mut s = "rets".to_string();
648                for ret in rets {
649                    let preg = pretty_print_reg(ret.preg, 8);
650                    let vreg = pretty_print_reg(ret.vreg, 8);
651                    write!(&mut s, " {vreg}={preg}").unwrap();
652                }
653                s
654            }
655
656            Inst::StackSwitchBasic {
657                store_context_ptr,
658                load_context_ptr,
659                in_payload0,
660                out_payload0,
661            } => {
662                let store_context_ptr = pretty_print_reg(**store_context_ptr, 8);
663                let load_context_ptr = pretty_print_reg(**load_context_ptr, 8);
664                let in_payload0 = pretty_print_reg(**in_payload0, 8);
665                let out_payload0 = pretty_print_reg(*out_payload0.to_reg(), 8);
666                format!(
667                    "{out_payload0} = stack_switch_basic {store_context_ptr}, {load_context_ptr}, {in_payload0}"
668                )
669            }
670
671            Inst::JmpKnown { dst } => {
672                let op = ljustify("jmp".to_string());
673                let dst = dst.to_string();
674                format!("{op} {dst}")
675            }
676
677            Inst::WinchJmpIf { cc, taken } => {
678                let taken = taken.to_string();
679                let op = ljustify2("j".to_string(), cc.to_string());
680                format!("{op} {taken}")
681            }
682
683            Inst::JmpCondOr {
684                cc1,
685                cc2,
686                taken,
687                not_taken,
688            } => {
689                let taken = taken.to_string();
690                let not_taken = not_taken.to_string();
691                let op = ljustify(format!("j{cc1},{cc2}"));
692                format!("{op} {taken}; j {not_taken}")
693            }
694
695            Inst::JmpCond {
696                cc,
697                taken,
698                not_taken,
699            } => {
700                let taken = taken.to_string();
701                let not_taken = not_taken.to_string();
702                let op = ljustify2("j".to_string(), cc.to_string());
703                format!("{op} {taken}; j {not_taken}")
704            }
705
706            Inst::JmpTableSeq {
707                idx, tmp1, tmp2, ..
708            } => {
709                let idx = pretty_print_reg(*idx, 8);
710                let tmp1 = pretty_print_reg(tmp1.to_reg(), 8);
711                let tmp2 = pretty_print_reg(tmp2.to_reg(), 8);
712                let op = ljustify("br_table".into());
713                format!("{op} {idx}, {tmp1}, {tmp2}")
714            }
715
716            Inst::TrapIf { cc, trap_code, .. } => {
717                format!("j{cc} #trap={trap_code}")
718            }
719
720            Inst::TrapIfAnd {
721                cc1,
722                cc2,
723                trap_code,
724                ..
725            } => {
726                let cc1 = cc1.invert();
727                let cc2 = cc2.invert();
728                format!("trap_if_and {cc1}, {cc2}, {trap_code}")
729            }
730
731            Inst::TrapIfOr {
732                cc1,
733                cc2,
734                trap_code,
735                ..
736            } => {
737                let cc2 = cc2.invert();
738                format!("trap_if_or {cc1}, {cc2}, {trap_code}")
739            }
740
741            Inst::LoadExtName {
742                dst, name, offset, ..
743            } => {
744                let dst = pretty_print_reg(*dst.to_reg(), 8);
745                let name = name.display(None);
746                let op = ljustify("load_ext_name".into());
747                format!("{op} {name}+{offset}, {dst}")
748            }
749
750            Inst::AtomicRmwSeq { ty, op, .. } => {
751                let ty = ty.bits();
752                format!(
753                    "atomically {{ {ty}_bits_at_[%r9] {op:?}= %r10; %rax = old_value_at_[%r9]; %r11, %rflags = trash }}"
754                )
755            }
756
757            Inst::Atomic128RmwSeq {
758                op,
759                mem,
760                operand_low,
761                operand_high,
762                temp_low,
763                temp_high,
764                dst_old_low,
765                dst_old_high,
766            } => {
767                let operand_low = pretty_print_reg(**operand_low, 8);
768                let operand_high = pretty_print_reg(**operand_high, 8);
769                let temp_low = pretty_print_reg(*temp_low.to_reg(), 8);
770                let temp_high = pretty_print_reg(*temp_high.to_reg(), 8);
771                let dst_old_low = pretty_print_reg(*dst_old_low.to_reg(), 8);
772                let dst_old_high = pretty_print_reg(*dst_old_high.to_reg(), 8);
773                let mem = mem.pretty_print(16);
774                format!(
775                    "atomically {{ {dst_old_high}:{dst_old_low} = {mem}; {temp_high}:{temp_low} = {dst_old_high}:{dst_old_low} {op:?} {operand_high}:{operand_low}; {mem} = {temp_high}:{temp_low} }}"
776                )
777            }
778
779            Inst::Atomic128XchgSeq {
780                mem,
781                operand_low,
782                operand_high,
783                dst_old_low,
784                dst_old_high,
785            } => {
786                let operand_low = pretty_print_reg(**operand_low, 8);
787                let operand_high = pretty_print_reg(**operand_high, 8);
788                let dst_old_low = pretty_print_reg(*dst_old_low.to_reg(), 8);
789                let dst_old_high = pretty_print_reg(*dst_old_high.to_reg(), 8);
790                let mem = mem.pretty_print(16);
791                format!(
792                    "atomically {{ {dst_old_high}:{dst_old_low} = {mem}; {mem} = {operand_high}:{operand_low} }}"
793                )
794            }
795
796            Inst::ElfTlsGetAddr { symbol, dst } => {
797                let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);
798                format!("{dst} = elf_tls_get_addr {symbol:?}")
799            }
800
801            Inst::MachOTlsGetAddr { symbol, dst } => {
802                let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);
803                format!("{dst} = macho_tls_get_addr {symbol:?}")
804            }
805
806            Inst::CoffTlsGetAddr { symbol, dst, tmp } => {
807                let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);
808                let tmp = tmp.to_reg().to_reg();
809
810                let mut s = format!("{dst} = coff_tls_get_addr {symbol:?}");
811                if tmp.is_virtual() {
812                    let tmp = pretty_print_reg(tmp, 8);
813                    write!(&mut s, ", {tmp}").unwrap();
814                };
815
816                s
817            }
818
819            Inst::Unwind { inst } => format!("unwind {inst:?}"),
820
821            Inst::DummyUse { reg } => {
822                let reg = pretty_print_reg(*reg, 8);
823                format!("dummy_use {reg}")
824            }
825
826            Inst::LabelAddress { dst, label } => {
827                let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);
828                format!("label_address {dst}, {label:?}")
829            }
830
831            Inst::External { inst } => {
832                format!("{inst}")
833            }
834        }
835    }
836}
837
838fn pretty_print_try_call(info: &TryCallInfo) -> String {
839    format!(
840        "; jmp {:?}; catch [{}]",
841        info.continuation,
842        info.pretty_print_dests()
843    )
844}
845
846impl fmt::Debug for Inst {
847    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
848        write!(fmt, "{}", self.pretty_print_inst(&mut Default::default()))
849    }
850}
851
852fn x64_get_operands(inst: &mut Inst, collector: &mut impl OperandVisitor) {
853    // Note: because we need to statically know the indices of each
854    // reg in the operands list in order to fetch its allocation
855    // later, we put the variable-operand-count bits (the RegMem,
856    // RegMemImm, etc args) last. regalloc2 doesn't care what order
857    // the operands come in; they can be freely reordered.
858
859    // N.B.: we MUST keep the below in careful sync with (i) emission,
860    // in `emit.rs`, and (ii) pretty-printing, in the `pretty_print`
861    // method above.
862    match inst {
863        Inst::CheckedSRemSeq {
864            divisor,
865            dividend_lo,
866            dividend_hi,
867            dst_quotient,
868            dst_remainder,
869            ..
870        } => {
871            collector.reg_use(divisor);
872            collector.reg_fixed_use(dividend_lo, regs::rax());
873            collector.reg_fixed_use(dividend_hi, regs::rdx());
874            collector.reg_fixed_def(dst_quotient, regs::rax());
875            collector.reg_fixed_def(dst_remainder, regs::rdx());
876        }
877        Inst::CheckedSRemSeq8 {
878            divisor,
879            dividend,
880            dst,
881            ..
882        } => {
883            collector.reg_use(divisor);
884            collector.reg_fixed_use(dividend, regs::rax());
885            collector.reg_fixed_def(dst, regs::rax());
886        }
887        Inst::XmmUninitializedValue { dst } => collector.reg_def(dst),
888        Inst::GprUninitializedValue { dst } => collector.reg_def(dst),
889        Inst::XmmMinMaxSeq { lhs, rhs, dst, .. } => {
890            collector.reg_use(rhs);
891            collector.reg_use(lhs);
892            collector.reg_reuse_def(dst, 0); // Reuse RHS.
893        }
894        Inst::MovFromPReg { dst, src } => {
895            debug_assert!(dst.to_reg().to_reg().is_virtual());
896            collector.reg_fixed_nonallocatable(*src);
897            collector.reg_def(dst);
898        }
899        Inst::MovToPReg { dst, src } => {
900            debug_assert!(src.to_reg().is_virtual());
901            collector.reg_use(src);
902            collector.reg_fixed_nonallocatable(*dst);
903        }
904        Inst::CvtUint64ToFloatSeq {
905            src,
906            dst,
907            tmp_gpr1,
908            tmp_gpr2,
909            ..
910        } => {
911            collector.reg_use(src);
912            collector.reg_early_def(dst);
913            collector.reg_early_def(tmp_gpr1);
914            collector.reg_early_def(tmp_gpr2);
915        }
916        Inst::CvtFloatToSintSeq {
917            src,
918            dst,
919            tmp_xmm,
920            tmp_gpr,
921            ..
922        } => {
923            collector.reg_use(src);
924            collector.reg_early_def(dst);
925            collector.reg_early_def(tmp_gpr);
926            collector.reg_early_def(tmp_xmm);
927        }
928        Inst::CvtFloatToUintSeq {
929            src,
930            dst,
931            tmp_gpr,
932            tmp_xmm,
933            tmp_xmm2,
934            ..
935        } => {
936            collector.reg_use(src);
937            collector.reg_early_def(dst);
938            collector.reg_early_def(tmp_gpr);
939            collector.reg_early_def(tmp_xmm);
940            collector.reg_early_def(tmp_xmm2);
941        }
942
943        Inst::XmmCmove {
944            consequent,
945            alternative,
946            dst,
947            ..
948        } => {
949            collector.reg_use(alternative);
950            collector.reg_reuse_def(dst, 0);
951            collector.reg_use(consequent);
952        }
953        Inst::StackProbeLoop { tmp, .. } => {
954            collector.reg_early_def(tmp);
955        }
956
957        Inst::CallKnown { info } => {
958            // Probestack is special and is only inserted after
959            // regalloc, so we do not need to represent its ABI to the
960            // register allocator. Assert that we don't alter that
961            // arrangement.
962            let CallInfo {
963                uses,
964                defs,
965                clobbers,
966                dest,
967                try_call_info,
968                ..
969            } = &mut **info;
970            debug_assert_ne!(*dest, ExternalName::LibCall(LibCall::Probestack));
971            for CallArgPair { vreg, preg } in uses {
972                collector.reg_fixed_use(vreg, *preg);
973            }
974            for CallRetPair { vreg, location } in defs {
975                match location {
976                    RetLocation::Reg(preg, ..) => collector.reg_fixed_def(vreg, *preg),
977                    RetLocation::Stack(..) => collector.any_def(vreg),
978                }
979            }
980            collector.reg_clobbers(*clobbers);
981            if let Some(try_call_info) = try_call_info {
982                try_call_info.collect_operands(collector);
983            }
984        }
985
986        Inst::CallUnknown { info } => {
987            let CallInfo {
988                uses,
989                defs,
990                clobbers,
991                callee_conv,
992                dest,
993                try_call_info,
994                ..
995            } = &mut **info;
996            match dest {
997                RegMem::Reg { reg } if *callee_conv == CallConv::Winch => {
998                    // TODO(https://github.com/bytecodealliance/regalloc2/issues/145):
999                    // This shouldn't be a fixed register constraint. r10 is caller-saved, so this
1000                    // should be safe to use.
1001                    collector.reg_fixed_use(reg, regs::r10());
1002                }
1003                _ => dest.get_operands(collector),
1004            }
1005            for CallArgPair { vreg, preg } in uses {
1006                collector.reg_fixed_use(vreg, *preg);
1007            }
1008            for CallRetPair { vreg, location } in defs {
1009                match location {
1010                    RetLocation::Reg(preg, ..) => collector.reg_fixed_def(vreg, *preg),
1011                    RetLocation::Stack(..) => collector.any_def(vreg),
1012                }
1013            }
1014            collector.reg_clobbers(*clobbers);
1015            if let Some(try_call_info) = try_call_info {
1016                try_call_info.collect_operands(collector);
1017            }
1018        }
1019        Inst::StackSwitchBasic {
1020            store_context_ptr,
1021            load_context_ptr,
1022            in_payload0,
1023            out_payload0,
1024        } => {
1025            collector.reg_use(load_context_ptr);
1026            collector.reg_use(store_context_ptr);
1027            collector.reg_fixed_use(in_payload0, stack_switch::payload_register());
1028            collector.reg_fixed_def(out_payload0, stack_switch::payload_register());
1029
1030            let mut clobbers = crate::isa::x64::abi::ALL_CLOBBERS;
1031            // The return/payload reg must not be included in the clobber set
1032            clobbers.remove(
1033                stack_switch::payload_register()
1034                    .to_real_reg()
1035                    .unwrap()
1036                    .into(),
1037            );
1038            collector.reg_clobbers(clobbers);
1039        }
1040
1041        Inst::ReturnCallKnown { info } => {
1042            let ReturnCallInfo {
1043                dest, uses, tmp, ..
1044            } = &mut **info;
1045            collector.reg_fixed_def(tmp, regs::r11());
1046            // Same as in the `Inst::CallKnown` branch.
1047            debug_assert_ne!(*dest, ExternalName::LibCall(LibCall::Probestack));
1048            for CallArgPair { vreg, preg } in uses {
1049                collector.reg_fixed_use(vreg, *preg);
1050            }
1051        }
1052
1053        Inst::ReturnCallUnknown { info } => {
1054            let ReturnCallInfo {
1055                dest, uses, tmp, ..
1056            } = &mut **info;
1057
1058            // TODO(https://github.com/bytecodealliance/regalloc2/issues/145):
1059            // This shouldn't be a fixed register constraint, but it's not clear how to
1060            // pick a register that won't be clobbered by the callee-save restore code
1061            // emitted with a return_call_indirect. r10 is caller-saved, so this should be
1062            // safe to use.
1063            collector.reg_fixed_use(dest, regs::r10());
1064
1065            collector.reg_fixed_def(tmp, regs::r11());
1066            for CallArgPair { vreg, preg } in uses {
1067                collector.reg_fixed_use(vreg, *preg);
1068            }
1069        }
1070
1071        Inst::JmpTableSeq {
1072            idx, tmp1, tmp2, ..
1073        } => {
1074            collector.reg_use(idx);
1075            collector.reg_early_def(tmp1);
1076            // In the sequence emitted for this pseudoinstruction in emit.rs,
1077            // tmp2 is only written after idx is read, so it doesn't need to be
1078            // an early def.
1079            collector.reg_def(tmp2);
1080        }
1081
1082        Inst::LoadExtName { dst, .. } => {
1083            collector.reg_def(dst);
1084        }
1085
1086        Inst::AtomicRmwSeq {
1087            operand,
1088            temp,
1089            dst_old,
1090            mem,
1091            ..
1092        } => {
1093            collector.reg_late_use(operand);
1094            collector.reg_early_def(temp);
1095            // This `fixed_def` is needed because `CMPXCHG` always uses this
1096            // register implicitly.
1097            collector.reg_fixed_def(dst_old, regs::rax());
1098            mem.get_operands_late(collector)
1099        }
1100
1101        Inst::Atomic128RmwSeq {
1102            operand_low,
1103            operand_high,
1104            temp_low,
1105            temp_high,
1106            dst_old_low,
1107            dst_old_high,
1108            mem,
1109            ..
1110        } => {
1111            // All registers are collected in the `Late` position so that they don't overlap.
1112            collector.reg_late_use(operand_low);
1113            collector.reg_late_use(operand_high);
1114            collector.reg_fixed_def(temp_low, regs::rbx());
1115            collector.reg_fixed_def(temp_high, regs::rcx());
1116            collector.reg_fixed_def(dst_old_low, regs::rax());
1117            collector.reg_fixed_def(dst_old_high, regs::rdx());
1118            mem.get_operands_late(collector)
1119        }
1120
1121        Inst::Atomic128XchgSeq {
1122            operand_low,
1123            operand_high,
1124            dst_old_low,
1125            dst_old_high,
1126            mem,
1127            ..
1128        } => {
1129            // All registers are collected in the `Late` position so that they don't overlap.
1130            collector.reg_fixed_late_use(operand_low, regs::rbx());
1131            collector.reg_fixed_late_use(operand_high, regs::rcx());
1132            collector.reg_fixed_def(dst_old_low, regs::rax());
1133            collector.reg_fixed_def(dst_old_high, regs::rdx());
1134            mem.get_operands_late(collector)
1135        }
1136
1137        Inst::Args { args } => {
1138            for ArgPair { vreg, preg } in args {
1139                collector.reg_fixed_def(vreg, *preg);
1140            }
1141        }
1142
1143        Inst::Rets { rets } => {
1144            // The return value(s) are live-out; we represent this
1145            // with register uses on the return instruction.
1146            for RetPair { vreg, preg } in rets {
1147                collector.reg_fixed_use(vreg, *preg);
1148            }
1149        }
1150
1151        Inst::JmpKnown { .. }
1152        | Inst::WinchJmpIf { .. }
1153        | Inst::JmpCond { .. }
1154        | Inst::JmpCondOr { .. }
1155        | Inst::TrapIf { .. }
1156        | Inst::TrapIfAnd { .. }
1157        | Inst::TrapIfOr { .. } => {
1158            // No registers are used.
1159        }
1160
1161        Inst::ElfTlsGetAddr { dst, .. } | Inst::MachOTlsGetAddr { dst, .. } => {
1162            collector.reg_fixed_def(dst, regs::rax());
1163            // All caller-saves are clobbered.
1164            //
1165            // We use the SysV calling convention here because the
1166            // pseudoinstruction (and relocation that it emits) is specific to
1167            // ELF systems; other x86-64 targets with other conventions (i.e.,
1168            // Windows) use different TLS strategies.
1169            let mut clobbers =
1170                X64ABIMachineSpec::get_regs_clobbered_by_call(CallConv::SystemV, false);
1171            clobbers.remove(regs::gpr_preg(asm::gpr::enc::RAX));
1172            collector.reg_clobbers(clobbers);
1173        }
1174
1175        Inst::CoffTlsGetAddr { dst, tmp, .. } => {
1176            // We also use the gs register. But that register is not allocatable by the
1177            // register allocator, so we don't need to mark it as used here.
1178
1179            // We use %rax to set the address
1180            collector.reg_fixed_def(dst, regs::rax());
1181
1182            // We use %rcx as a temporary variable to load the _tls_index
1183            collector.reg_fixed_def(tmp, regs::rcx());
1184        }
1185
1186        Inst::Unwind { .. } => {}
1187
1188        Inst::DummyUse { reg } => {
1189            collector.reg_use(reg);
1190        }
1191
1192        Inst::LabelAddress { dst, .. } => {
1193            collector.reg_def(dst);
1194        }
1195
1196        Inst::External { inst } => {
1197            inst.visit(&mut external::RegallocVisitor { collector });
1198        }
1199    }
1200}
1201
1202//=============================================================================
1203// Instructions: misc functions and external interface
1204
1205impl MachInst for Inst {
1206    type ABIMachineSpec = X64ABIMachineSpec;
1207
1208    fn get_operands(&mut self, collector: &mut impl OperandVisitor) {
1209        x64_get_operands(self, collector)
1210    }
1211
1212    fn is_move(&self) -> Option<(Writable<Reg>, Reg)> {
1213        use asm::inst::Inst as I;
1214        match self {
1215            // Note (carefully!) that a 32-bit mov *isn't* a no-op since it zeroes
1216            // out the upper 32 bits of the destination.  For example, we could
1217            // conceivably use `movl %reg, %reg` to zero out the top 32 bits of
1218            // %reg.
1219            Self::External {
1220                inst: I::movq_mr(asm::inst::movq_mr { rm64, r64 }),
1221            } => match rm64 {
1222                asm::GprMem::Gpr(reg) => Some((reg.map(|r| r.to_reg()), r64.as_ref().to_reg())),
1223                asm::GprMem::Mem(_) => None,
1224            },
1225            Self::External {
1226                inst: I::movq_rm(asm::inst::movq_rm { r64, rm64 }),
1227            } => match rm64 {
1228                asm::GprMem::Gpr(reg) => Some((r64.as_ref().map(|r| r.to_reg()), reg.to_reg())),
1229                asm::GprMem::Mem(_) => None,
1230            },
1231
1232            // Note that `movss_a_r` and `movsd_a_r` are specifically omitted
1233            // here because they only overwrite the low bits in the destination
1234            // register, otherwise preserving the upper bits. That can be used
1235            // for lane-insertion instructions, for example, meaning it's not
1236            // classified as a register move.
1237            //
1238            // Otherwise though all register-to-register movement instructions
1239            // which move 128-bits are registered as moves.
1240            Self::External {
1241                inst:
1242                    I::movaps_a(asm::inst::movaps_a { xmm1, xmm_m128 })
1243                    | I::movups_a(asm::inst::movups_a { xmm1, xmm_m128 })
1244                    | I::movapd_a(asm::inst::movapd_a { xmm1, xmm_m128 })
1245                    | I::movupd_a(asm::inst::movupd_a { xmm1, xmm_m128 })
1246                    | I::movdqa_a(asm::inst::movdqa_a { xmm1, xmm_m128 })
1247                    | I::movdqu_a(asm::inst::movdqu_a { xmm1, xmm_m128 }),
1248            } => match xmm_m128 {
1249                asm::XmmMem::Xmm(xmm2) => Some((xmm1.as_ref().map(|r| r.to_reg()), xmm2.to_reg())),
1250                asm::XmmMem::Mem(_) => None,
1251            },
1252            // In addition to the "A" format of instructions above also
1253            // recognize the "B" format which while it can be used for stores it
1254            // can also be used for register moves.
1255            Self::External {
1256                inst:
1257                    I::movaps_b(asm::inst::movaps_b { xmm_m128, xmm1 })
1258                    | I::movups_b(asm::inst::movups_b { xmm_m128, xmm1 })
1259                    | I::movapd_b(asm::inst::movapd_b { xmm_m128, xmm1 })
1260                    | I::movupd_b(asm::inst::movupd_b { xmm_m128, xmm1 })
1261                    | I::movdqa_b(asm::inst::movdqa_b { xmm_m128, xmm1 })
1262                    | I::movdqu_b(asm::inst::movdqu_b { xmm_m128, xmm1 }),
1263            } => match xmm_m128 {
1264                asm::XmmMem::Xmm(dst) => Some((dst.map(|r| r.to_reg()), xmm1.as_ref().to_reg())),
1265                asm::XmmMem::Mem(_) => None,
1266            },
1267            _ => None,
1268        }
1269    }
1270
1271    fn is_included_in_clobbers(&self) -> bool {
1272        match self {
1273            &Inst::Args { .. } => false,
1274            _ => true,
1275        }
1276    }
1277
1278    fn is_trap(&self) -> bool {
1279        match self {
1280            Self::External {
1281                inst: asm::inst::Inst::ud2_zo(..),
1282            } => true,
1283            _ => false,
1284        }
1285    }
1286
1287    fn is_args(&self) -> bool {
1288        match self {
1289            Self::Args { .. } => true,
1290            _ => false,
1291        }
1292    }
1293
1294    fn call_type(&self) -> CallType {
1295        match self {
1296            Inst::CallKnown { .. }
1297            | Inst::CallUnknown { .. }
1298            | Inst::ElfTlsGetAddr { .. }
1299            | Inst::MachOTlsGetAddr { .. } => CallType::Regular,
1300
1301            Inst::ReturnCallKnown { .. } | Inst::ReturnCallUnknown { .. } => CallType::TailCall,
1302
1303            _ => CallType::None,
1304        }
1305    }
1306
1307    fn is_term(&self) -> MachTerminator {
1308        match self {
1309            // Interesting cases.
1310            &Self::Rets { .. } => MachTerminator::Ret,
1311            &Self::ReturnCallKnown { .. } | &Self::ReturnCallUnknown { .. } => {
1312                MachTerminator::RetCall
1313            }
1314            &Self::JmpKnown { .. } => MachTerminator::Branch,
1315            &Self::JmpCond { .. } => MachTerminator::Branch,
1316            &Self::JmpCondOr { .. } => MachTerminator::Branch,
1317            &Self::JmpTableSeq { .. } => MachTerminator::Branch,
1318            &Self::CallKnown { ref info } if info.try_call_info.is_some() => MachTerminator::Branch,
1319            &Self::CallUnknown { ref info } if info.try_call_info.is_some() => {
1320                MachTerminator::Branch
1321            }
1322            // All other cases are boring.
1323            _ => MachTerminator::None,
1324        }
1325    }
1326
1327    fn is_low_level_branch(&self) -> bool {
1328        match self {
1329            &Self::WinchJmpIf { .. } => true,
1330            _ => false,
1331        }
1332    }
1333
1334    fn is_mem_access(&self) -> bool {
1335        panic!("TODO FILL ME OUT")
1336    }
1337
1338    fn gen_move(dst_reg: Writable<Reg>, src_reg: Reg, ty: Type) -> Inst {
1339        trace!(
1340            "Inst::gen_move {:?} -> {:?} (type: {:?})",
1341            src_reg,
1342            dst_reg.to_reg(),
1343            ty
1344        );
1345        let rc_dst = dst_reg.to_reg().class();
1346        let rc_src = src_reg.class();
1347        // If this isn't true, we have gone way off the rails.
1348        debug_assert!(rc_dst == rc_src);
1349        let inst = match rc_dst {
1350            RegClass::Int => {
1351                asm::inst::movq_mr::new(dst_reg.map(Gpr::unwrap_new), Gpr::unwrap_new(src_reg))
1352                    .into()
1353            }
1354            RegClass::Float => {
1355                // The Intel optimization manual, in "3.5.1.13 Zero-Latency MOV Instructions",
1356                // doesn't include MOVSS/MOVSD as instructions with zero-latency. Use movaps for
1357                // those, which may write more lanes that we need, but are specified to have
1358                // zero-latency.
1359                let dst_reg = dst_reg.map(|r| Xmm::new(r).unwrap());
1360                let src_reg = Xmm::new(src_reg).unwrap();
1361                match ty {
1362                    types::F16 | types::F32 | types::F64 | types::F32X4 => {
1363                        asm::inst::movaps_a::new(dst_reg, src_reg).into()
1364                    }
1365                    types::F64X2 => asm::inst::movapd_a::new(dst_reg, src_reg).into(),
1366                    _ if (ty.is_float() || ty.is_vector()) && ty.bits() <= 128 => {
1367                        asm::inst::movdqa_a::new(dst_reg, src_reg).into()
1368                    }
1369                    _ => unimplemented!("unable to move type: {}", ty),
1370                }
1371            }
1372            RegClass::Vector => unreachable!(),
1373        };
1374        Inst::External { inst }
1375    }
1376
1377    fn gen_nop(preferred_size: usize) -> Inst {
1378        Inst::nop(std::cmp::min(preferred_size, 9) as u8)
1379    }
1380
1381    fn rc_for_type(ty: Type) -> CodegenResult<(&'static [RegClass], &'static [Type])> {
1382        match ty {
1383            types::I8 => Ok((&[RegClass::Int], &[types::I8])),
1384            types::I16 => Ok((&[RegClass::Int], &[types::I16])),
1385            types::I32 => Ok((&[RegClass::Int], &[types::I32])),
1386            types::I64 => Ok((&[RegClass::Int], &[types::I64])),
1387            types::F16 => Ok((&[RegClass::Float], &[types::F16])),
1388            types::F32 => Ok((&[RegClass::Float], &[types::F32])),
1389            types::F64 => Ok((&[RegClass::Float], &[types::F64])),
1390            types::F128 => Ok((&[RegClass::Float], &[types::F128])),
1391            types::I128 => Ok((&[RegClass::Int, RegClass::Int], &[types::I64, types::I64])),
1392            _ if ty.is_vector() && ty.bits() <= 128 => {
1393                let types = &[types::I8X2, types::I8X4, types::I8X8, types::I8X16];
1394                Ok((
1395                    &[RegClass::Float],
1396                    slice::from_ref(&types[ty.bytes().ilog2() as usize - 1]),
1397                ))
1398            }
1399            _ => Err(CodegenError::Unsupported(format!(
1400                "Unexpected SSA-value type: {ty}"
1401            ))),
1402        }
1403    }
1404
1405    fn canonical_type_for_rc(rc: RegClass) -> Type {
1406        match rc {
1407            RegClass::Float => types::I8X16,
1408            RegClass::Int => types::I64,
1409            RegClass::Vector => unreachable!(),
1410        }
1411    }
1412
1413    fn gen_jump(label: MachLabel) -> Inst {
1414        Inst::jmp_known(label)
1415    }
1416
1417    fn gen_imm_u64(value: u64, dst: Writable<Reg>) -> Option<Self> {
1418        Some(Inst::imm(OperandSize::Size64, value, dst))
1419    }
1420
1421    fn gen_imm_f64(value: f64, tmp: Writable<Reg>, dst: Writable<Reg>) -> SmallVec<[Self; 2]> {
1422        let imm_to_gpr = Inst::imm(OperandSize::Size64, value.to_bits(), tmp);
1423        let gpr_to_xmm = Inst::External {
1424            inst: asm::inst::movq_a::new(dst.map(|r| Xmm::new(r).unwrap()), tmp.to_reg()).into(),
1425        };
1426        smallvec![imm_to_gpr, gpr_to_xmm]
1427    }
1428
1429    fn gen_dummy_use(reg: Reg) -> Self {
1430        Inst::DummyUse { reg }
1431    }
1432
1433    fn worst_case_size() -> CodeOffset {
1434        15
1435    }
1436
1437    fn ref_type_regclass(_: &settings::Flags) -> RegClass {
1438        RegClass::Int
1439    }
1440
1441    fn is_safepoint(&self) -> bool {
1442        match self {
1443            Inst::CallKnown { .. } | Inst::CallUnknown { .. } => true,
1444            _ => false,
1445        }
1446    }
1447
1448    fn function_alignment() -> FunctionAlignment {
1449        FunctionAlignment {
1450            minimum: 1,
1451            // Change the alignment from 16-bytes to 32-bytes for better performance.
1452            // fix-8573: https://github.com/bytecodealliance/wasmtime/issues/8573
1453            preferred: 32,
1454        }
1455    }
1456
1457    type LabelUse = LabelUse;
1458
1459    const TRAP_OPCODE: &'static [u8] = &[0x0f, 0x0b];
1460}
1461
1462/// Constant state used during emissions of a sequence of instructions.
1463pub struct EmitInfo {
1464    pub(super) flags: settings::Flags,
1465    isa_flags: x64_settings::Flags,
1466}
1467
1468impl EmitInfo {
1469    /// Create a constant state for emission of instructions.
1470    pub fn new(flags: settings::Flags, isa_flags: x64_settings::Flags) -> Self {
1471        Self { flags, isa_flags }
1472    }
1473}
1474
1475impl asm::AvailableFeatures for &EmitInfo {
1476    fn _64b(&self) -> bool {
1477        // Currently, this x64 backend always assumes 64-bit mode.
1478        true
1479    }
1480
1481    fn compat(&self) -> bool {
1482        // For 32-bit compatibility mode, see
1483        // https://github.com/bytecodealliance/wasmtime/issues/1980 (TODO).
1484        false
1485    }
1486
1487    fn sse(&self) -> bool {
1488        // Currently, this x64 backend always assumes SSE.
1489        true
1490    }
1491
1492    fn sse2(&self) -> bool {
1493        // Currently, this x64 backend always assumes SSE2.
1494        true
1495    }
1496
1497    fn sse3(&self) -> bool {
1498        self.isa_flags.has_sse3()
1499    }
1500
1501    fn ssse3(&self) -> bool {
1502        self.isa_flags.has_ssse3()
1503    }
1504
1505    fn sse41(&self) -> bool {
1506        self.isa_flags.has_sse41()
1507    }
1508
1509    fn sse42(&self) -> bool {
1510        self.isa_flags.has_sse42()
1511    }
1512
1513    fn bmi1(&self) -> bool {
1514        self.isa_flags.has_bmi1()
1515    }
1516
1517    fn bmi2(&self) -> bool {
1518        self.isa_flags.has_bmi2()
1519    }
1520
1521    fn lzcnt(&self) -> bool {
1522        self.isa_flags.has_lzcnt()
1523    }
1524
1525    fn popcnt(&self) -> bool {
1526        self.isa_flags.has_popcnt()
1527    }
1528
1529    fn avx(&self) -> bool {
1530        self.isa_flags.has_avx()
1531    }
1532
1533    fn avx2(&self) -> bool {
1534        self.isa_flags.has_avx2()
1535    }
1536
1537    fn avx512f(&self) -> bool {
1538        self.isa_flags.has_avx512f()
1539    }
1540
1541    fn avx512vl(&self) -> bool {
1542        self.isa_flags.has_avx512vl()
1543    }
1544
1545    fn cmpxchg16b(&self) -> bool {
1546        self.isa_flags.has_cmpxchg16b()
1547    }
1548
1549    fn fma(&self) -> bool {
1550        self.isa_flags.has_fma()
1551    }
1552
1553    fn avx512dq(&self) -> bool {
1554        self.isa_flags.has_avx512dq()
1555    }
1556
1557    fn avx512bitalg(&self) -> bool {
1558        self.isa_flags.has_avx512bitalg()
1559    }
1560
1561    fn avx512vbmi(&self) -> bool {
1562        self.isa_flags.has_avx512vbmi()
1563    }
1564}
1565
1566impl MachInstEmit for Inst {
1567    type State = EmitState;
1568    type Info = EmitInfo;
1569
1570    fn emit(&self, sink: &mut MachBuffer<Inst>, info: &Self::Info, state: &mut Self::State) {
1571        emit::emit(self, sink, info, state);
1572    }
1573
1574    fn pretty_print_inst(&self, _: &mut Self::State) -> String {
1575        PrettyPrint::pretty_print(self, 0)
1576    }
1577}
1578
1579/// A label-use (internal relocation) in generated code.
1580#[derive(Clone, Copy, Debug, PartialEq, Eq)]
1581pub enum LabelUse {
1582    /// A 32-bit offset from location of relocation itself, added to the existing value at that
1583    /// location. Used for control flow instructions which consider an offset from the start of the
1584    /// next instruction (so the size of the payload -- 4 bytes -- is subtracted from the payload).
1585    JmpRel32,
1586
1587    /// A 32-bit offset from location of relocation itself, added to the existing value at that
1588    /// location.
1589    PCRel32,
1590}
1591
1592impl MachInstLabelUse for LabelUse {
1593    const ALIGN: CodeOffset = 1;
1594
1595    fn max_pos_range(self) -> CodeOffset {
1596        match self {
1597            LabelUse::JmpRel32 | LabelUse::PCRel32 => 0x7fff_ffff,
1598        }
1599    }
1600
1601    fn max_neg_range(self) -> CodeOffset {
1602        match self {
1603            LabelUse::JmpRel32 | LabelUse::PCRel32 => 0x8000_0000,
1604        }
1605    }
1606
1607    fn patch_size(self) -> CodeOffset {
1608        match self {
1609            LabelUse::JmpRel32 | LabelUse::PCRel32 => 4,
1610        }
1611    }
1612
1613    fn patch(self, buffer: &mut [u8], use_offset: CodeOffset, label_offset: CodeOffset) {
1614        let pc_rel = (label_offset as i64) - (use_offset as i64);
1615        debug_assert!(pc_rel <= self.max_pos_range() as i64);
1616        debug_assert!(pc_rel >= -(self.max_neg_range() as i64));
1617        let pc_rel = pc_rel as u32;
1618        match self {
1619            LabelUse::JmpRel32 => {
1620                let addend = u32::from_le_bytes([buffer[0], buffer[1], buffer[2], buffer[3]]);
1621                let value = pc_rel.wrapping_add(addend).wrapping_sub(4);
1622                buffer.copy_from_slice(&value.to_le_bytes()[..]);
1623            }
1624            LabelUse::PCRel32 => {
1625                let addend = u32::from_le_bytes([buffer[0], buffer[1], buffer[2], buffer[3]]);
1626                let value = pc_rel.wrapping_add(addend);
1627                buffer.copy_from_slice(&value.to_le_bytes()[..]);
1628            }
1629        }
1630    }
1631
1632    fn supports_veneer(self) -> bool {
1633        match self {
1634            LabelUse::JmpRel32 | LabelUse::PCRel32 => false,
1635        }
1636    }
1637
1638    fn veneer_size(self) -> CodeOffset {
1639        match self {
1640            LabelUse::JmpRel32 | LabelUse::PCRel32 => 0,
1641        }
1642    }
1643
1644    fn worst_case_veneer_size() -> CodeOffset {
1645        0
1646    }
1647
1648    fn generate_veneer(self, _: &mut [u8], _: CodeOffset) -> (CodeOffset, LabelUse) {
1649        match self {
1650            LabelUse::JmpRel32 | LabelUse::PCRel32 => {
1651                panic!("Veneer not supported for JumpRel32 label-use.");
1652            }
1653        }
1654    }
1655
1656    fn from_reloc(reloc: Reloc, addend: Addend) -> Option<Self> {
1657        match (reloc, addend) {
1658            (Reloc::X86CallPCRel4, -4) => Some(LabelUse::JmpRel32),
1659            _ => None,
1660        }
1661    }
1662}