cranelift_codegen/isa/x64/inst/
mod.rs

1//! This module defines x86_64-specific machine instruction types.
2
3pub use emit_state::EmitState;
4
5use crate::binemit::{Addend, CodeOffset, Reloc};
6use crate::ir::{ExternalName, LibCall, TrapCode, Type, types};
7use crate::isa::x64::abi::X64ABIMachineSpec;
8use crate::isa::x64::inst::regs::pretty_print_reg;
9use crate::isa::x64::settings as x64_settings;
10use crate::isa::{CallConv, FunctionAlignment};
11use crate::{CodegenError, CodegenResult, settings};
12use crate::{machinst::*, trace};
13use alloc::boxed::Box;
14use core::slice;
15use cranelift_assembler_x64 as asm;
16use smallvec::{SmallVec, smallvec};
17use std::fmt::{self, Write};
18use std::string::{String, ToString};
19
20pub mod args;
21mod emit;
22mod emit_state;
23#[cfg(test)]
24mod emit_tests;
25pub mod external;
26pub mod regs;
27mod stack_switch;
28pub mod unwind;
29
30use args::*;
31
32//=============================================================================
33// Instructions (top level): definition
34
35// `Inst` is defined inside ISLE as `MInst`. We publicly re-export it here.
36pub use super::lower::isle::generated_code::AtomicRmwSeqOp;
37pub use super::lower::isle::generated_code::MInst as Inst;
38
39/// Out-of-line data for return-calls, to keep the size of `Inst` down.
40#[derive(Clone, Debug)]
41pub struct ReturnCallInfo<T> {
42    /// Where this call is going.
43    pub dest: T,
44
45    /// The size of the argument area for this return-call, potentially smaller than that of the
46    /// caller, but never larger.
47    pub new_stack_arg_size: u32,
48
49    /// The in-register arguments and their constraints.
50    pub uses: CallArgList,
51
52    /// A temporary for use when moving the return address.
53    pub tmp: WritableGpr,
54}
55
56#[test]
57#[cfg(target_pointer_width = "64")]
58fn inst_size_test() {
59    // This test will help with unintentionally growing the size
60    // of the Inst enum.
61    assert_eq!(48, std::mem::size_of::<Inst>());
62}
63
64impl Inst {
65    /// Check if the instruction (or pseudo-instruction) can be emitted given
66    /// the current target architecture given by `emit_info`. For non-assembler
67    /// instructions, this assumes a baseline feature set (i.e., 64-bit AND SSE2
68    /// and below).
69    fn is_available(&self, emit_info: &EmitInfo) -> bool {
70        use asm::AvailableFeatures;
71
72        match self {
73            // These instructions are part of SSE2, which is a basic requirement
74            // in Cranelift, and don't have to be checked.
75            Inst::AtomicRmwSeq { .. }
76            | Inst::CallKnown { .. }
77            | Inst::CallUnknown { .. }
78            | Inst::ReturnCallKnown { .. }
79            | Inst::ReturnCallUnknown { .. }
80            | Inst::CheckedSRemSeq { .. }
81            | Inst::CheckedSRemSeq8 { .. }
82            | Inst::CvtFloatToSintSeq { .. }
83            | Inst::CvtFloatToUintSeq { .. }
84            | Inst::CvtUint64ToFloatSeq { .. }
85            | Inst::JmpCond { .. }
86            | Inst::JmpCondOr { .. }
87            | Inst::WinchJmpIf { .. }
88            | Inst::JmpKnown { .. }
89            | Inst::JmpTableSeq { .. }
90            | Inst::LoadExtName { .. }
91            | Inst::MovFromPReg { .. }
92            | Inst::MovToPReg { .. }
93            | Inst::StackProbeLoop { .. }
94            | Inst::Args { .. }
95            | Inst::Rets { .. }
96            | Inst::StackSwitchBasic { .. }
97            | Inst::TrapIf { .. }
98            | Inst::TrapIfAnd { .. }
99            | Inst::TrapIfOr { .. }
100            | Inst::XmmCmove { .. }
101            | Inst::XmmMinMaxSeq { .. }
102            | Inst::XmmUninitializedValue { .. }
103            | Inst::GprUninitializedValue { .. }
104            | Inst::ElfTlsGetAddr { .. }
105            | Inst::MachOTlsGetAddr { .. }
106            | Inst::CoffTlsGetAddr { .. }
107            | Inst::Unwind { .. }
108            | Inst::DummyUse { .. } => true,
109
110            Inst::Atomic128RmwSeq { .. } | Inst::Atomic128XchgSeq { .. } => emit_info.cmpxchg16b(),
111
112            Inst::External { inst } => inst.is_available(&emit_info),
113        }
114    }
115}
116
117// Handy constructors for Insts.
118
119impl Inst {
120    pub(crate) fn nop(len: u8) -> Self {
121        assert!(len > 0 && len <= 9);
122        let inst = match len {
123            1 => asm::inst::nop_1b::new().into(),
124            2 => asm::inst::nop_2b::new().into(),
125            3 => asm::inst::nop_3b::new().into(),
126            4 => asm::inst::nop_4b::new().into(),
127            5 => asm::inst::nop_5b::new().into(),
128            6 => asm::inst::nop_6b::new().into(),
129            7 => asm::inst::nop_7b::new().into(),
130            8 => asm::inst::nop_8b::new().into(),
131            9 => asm::inst::nop_9b::new().into(),
132            _ => unreachable!("nop length must be between 1 and 9"),
133        };
134        Self::External { inst }
135    }
136
137    pub(crate) fn addq_mi(dst: Writable<Reg>, simm32: i32) -> Self {
138        let inst = if let Ok(simm8) = i8::try_from(simm32) {
139            asm::inst::addq_mi_sxb::new(dst, simm8).into()
140        } else {
141            asm::inst::addq_mi_sxl::new(dst, simm32).into()
142        };
143        Inst::External { inst }
144    }
145
146    pub(crate) fn subq_mi(dst: Writable<Reg>, simm32: i32) -> Self {
147        let inst = if let Ok(simm8) = i8::try_from(simm32) {
148            asm::inst::subq_mi_sxb::new(dst, simm8).into()
149        } else {
150            asm::inst::subq_mi_sxl::new(dst, simm32).into()
151        };
152        Inst::External { inst }
153    }
154
155    /// Writes the `simm64` immedaite into `dst`.
156    ///
157    /// Note that if `dst_size` is less than 64-bits then the upper bits of
158    /// `simm64` will be converted to zero.
159    pub fn imm(dst_size: OperandSize, simm64: u64, dst: Writable<Reg>) -> Inst {
160        debug_assert!(dst_size.is_one_of(&[OperandSize::Size32, OperandSize::Size64]));
161        debug_assert!(dst.to_reg().class() == RegClass::Int);
162        let dst = WritableGpr::from_writable_reg(dst).unwrap();
163        let inst = match dst_size {
164            OperandSize::Size64 => match u32::try_from(simm64) {
165                // If `simm64` is zero-extended use `movl` which zeros the
166                // upper bits.
167                Ok(imm32) => asm::inst::movl_oi::new(dst, imm32).into(),
168                _ => match i32::try_from(simm64.cast_signed()) {
169                    // If `simm64` is sign-extended use `movq` which sign the
170                    // upper bits.
171                    Ok(simm32) => asm::inst::movq_mi_sxl::new(dst, simm32).into(),
172                    // fall back to embedding the entire immediate.
173                    _ => asm::inst::movabsq_oi::new(dst, simm64).into(),
174                },
175            },
176            // FIXME: the input to this function is a logical `simm64` stored
177            // as `u64`. That means that ideally what we would do here is cast
178            // the `simm64` to an `i64`, perform a `i32::try_from()`, then cast
179            // that back to `u32`. That would ensure that the immediate loses
180            // no meaning and has the same logical value. Currently though
181            // Cranelift relies on discarding the upper bits because literals
182            // like `0x8000_0000_u64` fail to convert to an `i32`. In theory
183            // the input to this function should change to `i64`. In the
184            // meantime this is documented as discarding the upper bits,
185            // although this is an old function so that's unlikely to help
186            // much.
187            _ => asm::inst::movl_oi::new(dst, simm64 as u32).into(),
188        };
189        Inst::External { inst }
190    }
191
192    pub(crate) fn movzx_rm_r(ext_mode: ExtMode, src: RegMem, dst: Writable<Reg>) -> Inst {
193        src.assert_regclass_is(RegClass::Int);
194        debug_assert!(dst.to_reg().class() == RegClass::Int);
195        let src = match src {
196            RegMem::Reg { reg } => asm::GprMem::Gpr(Gpr::new(reg).unwrap()),
197            RegMem::Mem { addr } => asm::GprMem::Mem(addr.into()),
198        };
199        let inst = match ext_mode {
200            ExtMode::BL => asm::inst::movzbl_rm::new(dst, src).into(),
201            ExtMode::BQ => asm::inst::movzbq_rm::new(dst, src).into(),
202            ExtMode::WL => asm::inst::movzwl_rm::new(dst, src).into(),
203            ExtMode::WQ => asm::inst::movzwq_rm::new(dst, src).into(),
204            ExtMode::LQ => {
205                // This instruction selection may seem strange but is correct in
206                // 64-bit mode: section 3.4.1.1 of the Intel manual says that
207                // "32-bit operands generate a 32-bit result, zero-extended to a
208                // 64-bit result in the destination general-purpose register."
209                // This is applicable beyond `mov` but we use this fact to
210                // zero-extend `src` into `dst`.
211                asm::inst::movl_rm::new(dst, src).into()
212            }
213        };
214        Inst::External { inst }
215    }
216
217    pub(crate) fn movsx_rm_r(ext_mode: ExtMode, src: RegMem, dst: Writable<Reg>) -> Inst {
218        src.assert_regclass_is(RegClass::Int);
219        debug_assert!(dst.to_reg().class() == RegClass::Int);
220        let src = match src {
221            RegMem::Reg { reg } => asm::GprMem::Gpr(Gpr::new(reg).unwrap()),
222            RegMem::Mem { addr } => asm::GprMem::Mem(addr.into()),
223        };
224        let inst = match ext_mode {
225            ExtMode::BL => asm::inst::movsbl_rm::new(dst, src).into(),
226            ExtMode::BQ => asm::inst::movsbq_rm::new(dst, src).into(),
227            ExtMode::WL => asm::inst::movswl_rm::new(dst, src).into(),
228            ExtMode::WQ => asm::inst::movswq_rm::new(dst, src).into(),
229            ExtMode::LQ => asm::inst::movslq_rm::new(dst, src).into(),
230        };
231        Inst::External { inst }
232    }
233
234    /// Compares `src1` against `src2`
235    pub(crate) fn cmp_mi_sxb(size: OperandSize, src1: Gpr, src2: i8) -> Inst {
236        let inst = match size {
237            OperandSize::Size8 => asm::inst::cmpb_mi::new(src1, src2.cast_unsigned()).into(),
238            OperandSize::Size16 => asm::inst::cmpw_mi_sxb::new(src1, src2).into(),
239            OperandSize::Size32 => asm::inst::cmpl_mi_sxb::new(src1, src2).into(),
240            OperandSize::Size64 => asm::inst::cmpq_mi_sxb::new(src1, src2).into(),
241        };
242        Inst::External { inst }
243    }
244
245    pub(crate) fn trap_if(cc: CC, trap_code: TrapCode) -> Inst {
246        Inst::TrapIf { cc, trap_code }
247    }
248
249    pub(crate) fn call_known(info: Box<CallInfo<ExternalName>>) -> Inst {
250        Inst::CallKnown { info }
251    }
252
253    pub(crate) fn call_unknown(info: Box<CallInfo<RegMem>>) -> Inst {
254        info.dest.assert_regclass_is(RegClass::Int);
255        Inst::CallUnknown { info }
256    }
257
258    pub(crate) fn jmp_known(dst: MachLabel) -> Inst {
259        Inst::JmpKnown { dst }
260    }
261
262    /// Choose which instruction to use for loading a register value from memory. For loads smaller
263    /// than 64 bits, this method expects a way to extend the value (i.e. [ExtKind::SignExtend],
264    /// [ExtKind::ZeroExtend]); loads with no extension necessary will ignore this.
265    pub(crate) fn load(
266        ty: Type,
267        from_addr: impl Into<SyntheticAmode>,
268        to_reg: Writable<Reg>,
269        ext_kind: ExtKind,
270    ) -> Inst {
271        let rc = to_reg.to_reg().class();
272        match rc {
273            RegClass::Int => {
274                let ext_mode = match ty.bytes() {
275                    1 => Some(ExtMode::BQ),
276                    2 => Some(ExtMode::WQ),
277                    4 => Some(ExtMode::LQ),
278                    8 => None,
279                    _ => unreachable!("the type should never use a scalar load: {}", ty),
280                };
281                if let Some(ext_mode) = ext_mode {
282                    // Values smaller than 64 bits must be extended in some way.
283                    match ext_kind {
284                        ExtKind::SignExtend => {
285                            Inst::movsx_rm_r(ext_mode, RegMem::mem(from_addr), to_reg)
286                        }
287                        ExtKind::ZeroExtend => {
288                            Inst::movzx_rm_r(ext_mode, RegMem::mem(from_addr), to_reg)
289                        }
290                        ExtKind::None => {
291                            panic!("expected an extension kind for extension mode: {ext_mode:?}")
292                        }
293                    }
294                } else {
295                    // 64-bit values can be moved directly.
296                    let from_addr = asm::GprMem::from(from_addr.into());
297                    Inst::External {
298                        inst: asm::inst::movq_rm::new(to_reg, from_addr).into(),
299                    }
300                }
301            }
302            RegClass::Float => {
303                let to_reg = to_reg.map(|r| Xmm::new(r).unwrap());
304                let from_addr = from_addr.into();
305                let inst = match ty {
306                    types::F16 | types::I8X2 => {
307                        panic!("loading a f16 or i8x2 requires multiple instructions")
308                    }
309                    _ if (ty.is_float() || ty.is_vector()) && ty.bits() == 32 => {
310                        asm::inst::movss_a_m::new(to_reg, from_addr).into()
311                    }
312                    _ if (ty.is_float() || ty.is_vector()) && ty.bits() == 64 => {
313                        asm::inst::movsd_a_m::new(to_reg, from_addr).into()
314                    }
315                    types::F32X4 => asm::inst::movups_a::new(to_reg, from_addr).into(),
316                    types::F64X2 => asm::inst::movupd_a::new(to_reg, from_addr).into(),
317                    _ if (ty.is_float() || ty.is_vector()) && ty.bits() == 128 => {
318                        asm::inst::movdqu_a::new(to_reg, from_addr).into()
319                    }
320                    _ => unimplemented!("unable to load type: {}", ty),
321                };
322                Inst::External { inst }
323            }
324            RegClass::Vector => unreachable!(),
325        }
326    }
327
328    /// Choose which instruction to use for storing a register value to memory.
329    pub(crate) fn store(ty: Type, from_reg: Reg, to_addr: impl Into<SyntheticAmode>) -> Inst {
330        let rc = from_reg.class();
331        let to_addr = to_addr.into();
332        let inst = match rc {
333            RegClass::Int => {
334                let from_reg = Gpr::unwrap_new(from_reg);
335                match ty {
336                    types::I8 => asm::inst::movb_mr::new(to_addr, from_reg).into(),
337                    types::I16 => asm::inst::movw_mr::new(to_addr, from_reg).into(),
338                    types::I32 => asm::inst::movl_mr::new(to_addr, from_reg).into(),
339                    types::I64 => asm::inst::movq_mr::new(to_addr, from_reg).into(),
340                    _ => unreachable!(),
341                }
342            }
343            RegClass::Float => {
344                let from_reg = Xmm::new(from_reg).unwrap();
345                match ty {
346                    types::F16 | types::I8X2 => {
347                        panic!("storing a f16 or i8x2 requires multiple instructions")
348                    }
349                    _ if (ty.is_float() || ty.is_vector()) && ty.bits() == 32 => {
350                        asm::inst::movss_c_m::new(to_addr, from_reg).into()
351                    }
352                    _ if (ty.is_float() || ty.is_vector()) && ty.bits() == 64 => {
353                        asm::inst::movsd_c_m::new(to_addr, from_reg).into()
354                    }
355                    types::F32X4 => asm::inst::movups_b::new(to_addr, from_reg).into(),
356                    types::F64X2 => asm::inst::movupd_b::new(to_addr, from_reg).into(),
357                    _ if (ty.is_float() || ty.is_vector()) && ty.bits() == 128 => {
358                        asm::inst::movdqu_b::new(to_addr, from_reg).into()
359                    }
360                    _ => unimplemented!("unable to store type: {}", ty),
361                }
362            }
363            RegClass::Vector => unreachable!(),
364        };
365        Inst::External { inst }
366    }
367}
368
369//=============================================================================
370// Instructions: printing
371
372impl PrettyPrint for Inst {
373    fn pretty_print(&self, _size: u8) -> String {
374        fn ljustify(s: String) -> String {
375            let w = 7;
376            if s.len() >= w {
377                s
378            } else {
379                let need = usize::min(w, w - s.len());
380                s + &format!("{nil: <width$}", nil = "", width = need)
381            }
382        }
383
384        fn ljustify2(s1: String, s2: String) -> String {
385            ljustify(s1 + &s2)
386        }
387
388        match self {
389            Inst::CheckedSRemSeq {
390                size,
391                divisor,
392                dividend_lo,
393                dividend_hi,
394                dst_quotient,
395                dst_remainder,
396            } => {
397                let divisor = pretty_print_reg(divisor.to_reg(), size.to_bytes());
398                let dividend_lo = pretty_print_reg(dividend_lo.to_reg(), size.to_bytes());
399                let dividend_hi = pretty_print_reg(dividend_hi.to_reg(), size.to_bytes());
400                let dst_quotient =
401                    pretty_print_reg(dst_quotient.to_reg().to_reg(), size.to_bytes());
402                let dst_remainder =
403                    pretty_print_reg(dst_remainder.to_reg().to_reg(), size.to_bytes());
404                format!(
405                    "checked_srem_seq {dividend_lo}, {dividend_hi}, \
406                        {divisor}, {dst_quotient}, {dst_remainder}",
407                )
408            }
409
410            Inst::CheckedSRemSeq8 {
411                divisor,
412                dividend,
413                dst,
414            } => {
415                let divisor = pretty_print_reg(divisor.to_reg(), 1);
416                let dividend = pretty_print_reg(dividend.to_reg(), 1);
417                let dst = pretty_print_reg(dst.to_reg().to_reg(), 1);
418                format!("checked_srem_seq {dividend}, {divisor}, {dst}")
419            }
420
421            Inst::XmmMinMaxSeq {
422                lhs,
423                rhs,
424                dst,
425                is_min,
426                size,
427            } => {
428                let rhs = pretty_print_reg(rhs.to_reg(), 8);
429                let lhs = pretty_print_reg(lhs.to_reg(), 8);
430                let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);
431                let op = ljustify2(
432                    if *is_min {
433                        "xmm min seq ".to_string()
434                    } else {
435                        "xmm max seq ".to_string()
436                    },
437                    format!("f{}", size.to_bits()),
438                );
439                format!("{op} {lhs}, {rhs}, {dst}")
440            }
441
442            Inst::XmmUninitializedValue { dst } => {
443                let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);
444                let op = ljustify("uninit".into());
445                format!("{op} {dst}")
446            }
447
448            Inst::GprUninitializedValue { dst } => {
449                let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);
450                let op = ljustify("uninit".into());
451                format!("{op} {dst}")
452            }
453
454            Inst::CvtUint64ToFloatSeq {
455                src,
456                dst,
457                dst_size,
458                tmp_gpr1,
459                tmp_gpr2,
460                ..
461            } => {
462                let src = pretty_print_reg(src.to_reg(), 8);
463                let dst = pretty_print_reg(dst.to_reg().to_reg(), dst_size.to_bytes());
464                let tmp_gpr1 = pretty_print_reg(tmp_gpr1.to_reg().to_reg(), 8);
465                let tmp_gpr2 = pretty_print_reg(tmp_gpr2.to_reg().to_reg(), 8);
466                let op = ljustify(format!(
467                    "u64_to_{}_seq",
468                    if *dst_size == OperandSize::Size64 {
469                        "f64"
470                    } else {
471                        "f32"
472                    }
473                ));
474                format!("{op} {src}, {dst}, {tmp_gpr1}, {tmp_gpr2}")
475            }
476
477            Inst::CvtFloatToSintSeq {
478                src,
479                dst,
480                src_size,
481                dst_size,
482                tmp_xmm,
483                tmp_gpr,
484                is_saturating,
485            } => {
486                let src = pretty_print_reg(src.to_reg(), src_size.to_bytes());
487                let dst = pretty_print_reg(dst.to_reg().to_reg(), dst_size.to_bytes());
488                let tmp_gpr = pretty_print_reg(tmp_gpr.to_reg().to_reg(), 8);
489                let tmp_xmm = pretty_print_reg(tmp_xmm.to_reg().to_reg(), 8);
490                let op = ljustify(format!(
491                    "cvt_float{}_to_sint{}{}_seq",
492                    src_size.to_bits(),
493                    dst_size.to_bits(),
494                    if *is_saturating { "_sat" } else { "" },
495                ));
496                format!("{op} {src}, {dst}, {tmp_gpr}, {tmp_xmm}")
497            }
498
499            Inst::CvtFloatToUintSeq {
500                src,
501                dst,
502                src_size,
503                dst_size,
504                tmp_gpr,
505                tmp_xmm,
506                tmp_xmm2,
507                is_saturating,
508            } => {
509                let src = pretty_print_reg(src.to_reg(), src_size.to_bytes());
510                let dst = pretty_print_reg(dst.to_reg().to_reg(), dst_size.to_bytes());
511                let tmp_gpr = pretty_print_reg(tmp_gpr.to_reg().to_reg(), 8);
512                let tmp_xmm = pretty_print_reg(tmp_xmm.to_reg().to_reg(), 8);
513                let tmp_xmm2 = pretty_print_reg(tmp_xmm2.to_reg().to_reg(), 8);
514                let op = ljustify(format!(
515                    "cvt_float{}_to_uint{}{}_seq",
516                    src_size.to_bits(),
517                    dst_size.to_bits(),
518                    if *is_saturating { "_sat" } else { "" },
519                ));
520                format!("{op} {src}, {dst}, {tmp_gpr}, {tmp_xmm}, {tmp_xmm2}")
521            }
522
523            Inst::MovFromPReg { src, dst } => {
524                let src: Reg = (*src).into();
525                let src = pretty_print_reg(src, 8);
526                let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);
527                let op = ljustify("movq".to_string());
528                format!("{op} {src}, {dst}")
529            }
530
531            Inst::MovToPReg { src, dst } => {
532                let src = pretty_print_reg(src.to_reg(), 8);
533                let dst: Reg = (*dst).into();
534                let dst = pretty_print_reg(dst, 8);
535                let op = ljustify("movq".to_string());
536                format!("{op} {src}, {dst}")
537            }
538
539            Inst::XmmCmove {
540                ty,
541                cc,
542                consequent,
543                alternative,
544                dst,
545                ..
546            } => {
547                let size = u8::try_from(ty.bytes()).unwrap();
548                let alternative = pretty_print_reg(alternative.to_reg(), size);
549                let dst = pretty_print_reg(dst.to_reg().to_reg(), size);
550                let consequent = pretty_print_reg(consequent.to_reg(), size);
551                let suffix = match *ty {
552                    types::F64 => "sd",
553                    types::F32 => "ss",
554                    types::F16 => "ss",
555                    types::F32X4 => "aps",
556                    types::F64X2 => "apd",
557                    _ => "dqa",
558                };
559                let cc = cc.invert();
560                format!(
561                    "mov{suffix} {alternative}, {dst}; \
562                    j{cc} $next; \
563                    mov{suffix} {consequent}, {dst}; \
564                    $next:"
565                )
566            }
567
568            Inst::StackProbeLoop {
569                tmp,
570                frame_size,
571                guard_size,
572            } => {
573                let tmp = pretty_print_reg(tmp.to_reg(), 8);
574                let op = ljustify("stack_probe_loop".to_string());
575                format!("{op} {tmp}, frame_size={frame_size}, guard_size={guard_size}")
576            }
577
578            Inst::CallKnown { info } => {
579                let op = ljustify("call".to_string());
580                let try_call = info
581                    .try_call_info
582                    .as_ref()
583                    .map(|tci| pretty_print_try_call(tci))
584                    .unwrap_or_default();
585                format!("{op} {:?}{try_call}", info.dest)
586            }
587
588            Inst::CallUnknown { info } => {
589                let dest = info.dest.pretty_print(8);
590                let op = ljustify("call".to_string());
591                let try_call = info
592                    .try_call_info
593                    .as_ref()
594                    .map(|tci| pretty_print_try_call(tci))
595                    .unwrap_or_default();
596                format!("{op} *{dest}{try_call}")
597            }
598
599            Inst::ReturnCallKnown { info } => {
600                let ReturnCallInfo {
601                    uses,
602                    new_stack_arg_size,
603                    tmp,
604                    dest,
605                } = &**info;
606                let tmp = pretty_print_reg(tmp.to_reg().to_reg(), 8);
607                let mut s = format!("return_call_known {dest:?} ({new_stack_arg_size}) tmp={tmp}");
608                for ret in uses {
609                    let preg = pretty_print_reg(ret.preg, 8);
610                    let vreg = pretty_print_reg(ret.vreg, 8);
611                    write!(&mut s, " {vreg}={preg}").unwrap();
612                }
613                s
614            }
615
616            Inst::ReturnCallUnknown { info } => {
617                let ReturnCallInfo {
618                    uses,
619                    new_stack_arg_size,
620                    tmp,
621                    dest,
622                } = &**info;
623                let callee = pretty_print_reg(*dest, 8);
624                let tmp = pretty_print_reg(tmp.to_reg().to_reg(), 8);
625                let mut s =
626                    format!("return_call_unknown {callee} ({new_stack_arg_size}) tmp={tmp}");
627                for ret in uses {
628                    let preg = pretty_print_reg(ret.preg, 8);
629                    let vreg = pretty_print_reg(ret.vreg, 8);
630                    write!(&mut s, " {vreg}={preg}").unwrap();
631                }
632                s
633            }
634
635            Inst::Args { args } => {
636                let mut s = "args".to_string();
637                for arg in args {
638                    let preg = pretty_print_reg(arg.preg, 8);
639                    let def = pretty_print_reg(arg.vreg.to_reg(), 8);
640                    write!(&mut s, " {def}={preg}").unwrap();
641                }
642                s
643            }
644
645            Inst::Rets { rets } => {
646                let mut s = "rets".to_string();
647                for ret in rets {
648                    let preg = pretty_print_reg(ret.preg, 8);
649                    let vreg = pretty_print_reg(ret.vreg, 8);
650                    write!(&mut s, " {vreg}={preg}").unwrap();
651                }
652                s
653            }
654
655            Inst::StackSwitchBasic {
656                store_context_ptr,
657                load_context_ptr,
658                in_payload0,
659                out_payload0,
660            } => {
661                let store_context_ptr = pretty_print_reg(**store_context_ptr, 8);
662                let load_context_ptr = pretty_print_reg(**load_context_ptr, 8);
663                let in_payload0 = pretty_print_reg(**in_payload0, 8);
664                let out_payload0 = pretty_print_reg(*out_payload0.to_reg(), 8);
665                format!(
666                    "{out_payload0} = stack_switch_basic {store_context_ptr}, {load_context_ptr}, {in_payload0}"
667                )
668            }
669
670            Inst::JmpKnown { dst } => {
671                let op = ljustify("jmp".to_string());
672                let dst = dst.to_string();
673                format!("{op} {dst}")
674            }
675
676            Inst::WinchJmpIf { cc, taken } => {
677                let taken = taken.to_string();
678                let op = ljustify2("j".to_string(), cc.to_string());
679                format!("{op} {taken}")
680            }
681
682            Inst::JmpCondOr {
683                cc1,
684                cc2,
685                taken,
686                not_taken,
687            } => {
688                let taken = taken.to_string();
689                let not_taken = not_taken.to_string();
690                let op = ljustify(format!("j{cc1},{cc2}"));
691                format!("{op} {taken}; j {not_taken}")
692            }
693
694            Inst::JmpCond {
695                cc,
696                taken,
697                not_taken,
698            } => {
699                let taken = taken.to_string();
700                let not_taken = not_taken.to_string();
701                let op = ljustify2("j".to_string(), cc.to_string());
702                format!("{op} {taken}; j {not_taken}")
703            }
704
705            Inst::JmpTableSeq {
706                idx, tmp1, tmp2, ..
707            } => {
708                let idx = pretty_print_reg(*idx, 8);
709                let tmp1 = pretty_print_reg(tmp1.to_reg(), 8);
710                let tmp2 = pretty_print_reg(tmp2.to_reg(), 8);
711                let op = ljustify("br_table".into());
712                format!("{op} {idx}, {tmp1}, {tmp2}")
713            }
714
715            Inst::TrapIf { cc, trap_code, .. } => {
716                format!("j{cc} #trap={trap_code}")
717            }
718
719            Inst::TrapIfAnd {
720                cc1,
721                cc2,
722                trap_code,
723                ..
724            } => {
725                let cc1 = cc1.invert();
726                let cc2 = cc2.invert();
727                format!("trap_if_and {cc1}, {cc2}, {trap_code}")
728            }
729
730            Inst::TrapIfOr {
731                cc1,
732                cc2,
733                trap_code,
734                ..
735            } => {
736                let cc2 = cc2.invert();
737                format!("trap_if_or {cc1}, {cc2}, {trap_code}")
738            }
739
740            Inst::LoadExtName {
741                dst, name, offset, ..
742            } => {
743                let dst = pretty_print_reg(*dst.to_reg(), 8);
744                let name = name.display(None);
745                let op = ljustify("load_ext_name".into());
746                format!("{op} {name}+{offset}, {dst}")
747            }
748
749            Inst::AtomicRmwSeq { ty, op, .. } => {
750                let ty = ty.bits();
751                format!(
752                    "atomically {{ {ty}_bits_at_[%r9] {op:?}= %r10; %rax = old_value_at_[%r9]; %r11, %rflags = trash }}"
753                )
754            }
755
756            Inst::Atomic128RmwSeq {
757                op,
758                mem,
759                operand_low,
760                operand_high,
761                temp_low,
762                temp_high,
763                dst_old_low,
764                dst_old_high,
765            } => {
766                let operand_low = pretty_print_reg(**operand_low, 8);
767                let operand_high = pretty_print_reg(**operand_high, 8);
768                let temp_low = pretty_print_reg(*temp_low.to_reg(), 8);
769                let temp_high = pretty_print_reg(*temp_high.to_reg(), 8);
770                let dst_old_low = pretty_print_reg(*dst_old_low.to_reg(), 8);
771                let dst_old_high = pretty_print_reg(*dst_old_high.to_reg(), 8);
772                let mem = mem.pretty_print(16);
773                format!(
774                    "atomically {{ {dst_old_high}:{dst_old_low} = {mem}; {temp_high}:{temp_low} = {dst_old_high}:{dst_old_low} {op:?} {operand_high}:{operand_low}; {mem} = {temp_high}:{temp_low} }}"
775                )
776            }
777
778            Inst::Atomic128XchgSeq {
779                mem,
780                operand_low,
781                operand_high,
782                dst_old_low,
783                dst_old_high,
784            } => {
785                let operand_low = pretty_print_reg(**operand_low, 8);
786                let operand_high = pretty_print_reg(**operand_high, 8);
787                let dst_old_low = pretty_print_reg(*dst_old_low.to_reg(), 8);
788                let dst_old_high = pretty_print_reg(*dst_old_high.to_reg(), 8);
789                let mem = mem.pretty_print(16);
790                format!(
791                    "atomically {{ {dst_old_high}:{dst_old_low} = {mem}; {mem} = {operand_high}:{operand_low} }}"
792                )
793            }
794
795            Inst::ElfTlsGetAddr { symbol, dst } => {
796                let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);
797                format!("{dst} = elf_tls_get_addr {symbol:?}")
798            }
799
800            Inst::MachOTlsGetAddr { symbol, dst } => {
801                let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);
802                format!("{dst} = macho_tls_get_addr {symbol:?}")
803            }
804
805            Inst::CoffTlsGetAddr { symbol, dst, tmp } => {
806                let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);
807                let tmp = tmp.to_reg().to_reg();
808
809                let mut s = format!("{dst} = coff_tls_get_addr {symbol:?}");
810                if tmp.is_virtual() {
811                    let tmp = pretty_print_reg(tmp, 8);
812                    write!(&mut s, ", {tmp}").unwrap();
813                };
814
815                s
816            }
817
818            Inst::Unwind { inst } => format!("unwind {inst:?}"),
819
820            Inst::DummyUse { reg } => {
821                let reg = pretty_print_reg(*reg, 8);
822                format!("dummy_use {reg}")
823            }
824
825            Inst::External { inst } => {
826                format!("{inst}")
827            }
828        }
829    }
830}
831
832fn pretty_print_try_call(info: &TryCallInfo) -> String {
833    format!(
834        "; jmp {:?}; catch [{}]",
835        info.continuation,
836        info.pretty_print_dests()
837    )
838}
839
840impl fmt::Debug for Inst {
841    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
842        write!(fmt, "{}", self.pretty_print_inst(&mut Default::default()))
843    }
844}
845
846fn x64_get_operands(inst: &mut Inst, collector: &mut impl OperandVisitor) {
847    // Note: because we need to statically know the indices of each
848    // reg in the operands list in order to fetch its allocation
849    // later, we put the variable-operand-count bits (the RegMem,
850    // RegMemImm, etc args) last. regalloc2 doesn't care what order
851    // the operands come in; they can be freely reordered.
852
853    // N.B.: we MUST keep the below in careful sync with (i) emission,
854    // in `emit.rs`, and (ii) pretty-printing, in the `pretty_print`
855    // method above.
856    match inst {
857        Inst::CheckedSRemSeq {
858            divisor,
859            dividend_lo,
860            dividend_hi,
861            dst_quotient,
862            dst_remainder,
863            ..
864        } => {
865            collector.reg_use(divisor);
866            collector.reg_fixed_use(dividend_lo, regs::rax());
867            collector.reg_fixed_use(dividend_hi, regs::rdx());
868            collector.reg_fixed_def(dst_quotient, regs::rax());
869            collector.reg_fixed_def(dst_remainder, regs::rdx());
870        }
871        Inst::CheckedSRemSeq8 {
872            divisor,
873            dividend,
874            dst,
875            ..
876        } => {
877            collector.reg_use(divisor);
878            collector.reg_fixed_use(dividend, regs::rax());
879            collector.reg_fixed_def(dst, regs::rax());
880        }
881        Inst::XmmUninitializedValue { dst } => collector.reg_def(dst),
882        Inst::GprUninitializedValue { dst } => collector.reg_def(dst),
883        Inst::XmmMinMaxSeq { lhs, rhs, dst, .. } => {
884            collector.reg_use(rhs);
885            collector.reg_use(lhs);
886            collector.reg_reuse_def(dst, 0); // Reuse RHS.
887        }
888        Inst::MovFromPReg { dst, src } => {
889            debug_assert!(dst.to_reg().to_reg().is_virtual());
890            collector.reg_fixed_nonallocatable(*src);
891            collector.reg_def(dst);
892        }
893        Inst::MovToPReg { dst, src } => {
894            debug_assert!(src.to_reg().is_virtual());
895            collector.reg_use(src);
896            collector.reg_fixed_nonallocatable(*dst);
897        }
898        Inst::CvtUint64ToFloatSeq {
899            src,
900            dst,
901            tmp_gpr1,
902            tmp_gpr2,
903            ..
904        } => {
905            collector.reg_use(src);
906            collector.reg_early_def(dst);
907            collector.reg_early_def(tmp_gpr1);
908            collector.reg_early_def(tmp_gpr2);
909        }
910        Inst::CvtFloatToSintSeq {
911            src,
912            dst,
913            tmp_xmm,
914            tmp_gpr,
915            ..
916        } => {
917            collector.reg_use(src);
918            collector.reg_early_def(dst);
919            collector.reg_early_def(tmp_gpr);
920            collector.reg_early_def(tmp_xmm);
921        }
922        Inst::CvtFloatToUintSeq {
923            src,
924            dst,
925            tmp_gpr,
926            tmp_xmm,
927            tmp_xmm2,
928            ..
929        } => {
930            collector.reg_use(src);
931            collector.reg_early_def(dst);
932            collector.reg_early_def(tmp_gpr);
933            collector.reg_early_def(tmp_xmm);
934            collector.reg_early_def(tmp_xmm2);
935        }
936
937        Inst::XmmCmove {
938            consequent,
939            alternative,
940            dst,
941            ..
942        } => {
943            collector.reg_use(alternative);
944            collector.reg_reuse_def(dst, 0);
945            collector.reg_use(consequent);
946        }
947        Inst::StackProbeLoop { tmp, .. } => {
948            collector.reg_early_def(tmp);
949        }
950
951        Inst::CallKnown { info } => {
952            // Probestack is special and is only inserted after
953            // regalloc, so we do not need to represent its ABI to the
954            // register allocator. Assert that we don't alter that
955            // arrangement.
956            let CallInfo {
957                uses,
958                defs,
959                clobbers,
960                dest,
961                try_call_info,
962                ..
963            } = &mut **info;
964            debug_assert_ne!(*dest, ExternalName::LibCall(LibCall::Probestack));
965            for CallArgPair { vreg, preg } in uses {
966                collector.reg_fixed_use(vreg, *preg);
967            }
968            for CallRetPair { vreg, location } in defs {
969                match location {
970                    RetLocation::Reg(preg, ..) => collector.reg_fixed_def(vreg, *preg),
971                    RetLocation::Stack(..) => collector.any_def(vreg),
972                }
973            }
974            collector.reg_clobbers(*clobbers);
975            if let Some(try_call_info) = try_call_info {
976                try_call_info.collect_operands(collector);
977            }
978        }
979
980        Inst::CallUnknown { info } => {
981            let CallInfo {
982                uses,
983                defs,
984                clobbers,
985                callee_conv,
986                dest,
987                try_call_info,
988                ..
989            } = &mut **info;
990            match dest {
991                RegMem::Reg { reg } if *callee_conv == CallConv::Winch => {
992                    // TODO(https://github.com/bytecodealliance/regalloc2/issues/145):
993                    // This shouldn't be a fixed register constraint. r10 is caller-saved, so this
994                    // should be safe to use.
995                    collector.reg_fixed_use(reg, regs::r10());
996                }
997                _ => dest.get_operands(collector),
998            }
999            for CallArgPair { vreg, preg } in uses {
1000                collector.reg_fixed_use(vreg, *preg);
1001            }
1002            for CallRetPair { vreg, location } in defs {
1003                match location {
1004                    RetLocation::Reg(preg, ..) => collector.reg_fixed_def(vreg, *preg),
1005                    RetLocation::Stack(..) => collector.any_def(vreg),
1006                }
1007            }
1008            collector.reg_clobbers(*clobbers);
1009            if let Some(try_call_info) = try_call_info {
1010                try_call_info.collect_operands(collector);
1011            }
1012        }
1013        Inst::StackSwitchBasic {
1014            store_context_ptr,
1015            load_context_ptr,
1016            in_payload0,
1017            out_payload0,
1018        } => {
1019            collector.reg_use(load_context_ptr);
1020            collector.reg_use(store_context_ptr);
1021            collector.reg_fixed_use(in_payload0, stack_switch::payload_register());
1022            collector.reg_fixed_def(out_payload0, stack_switch::payload_register());
1023
1024            let mut clobbers = crate::isa::x64::abi::ALL_CLOBBERS;
1025            // The return/payload reg must not be included in the clobber set
1026            clobbers.remove(
1027                stack_switch::payload_register()
1028                    .to_real_reg()
1029                    .unwrap()
1030                    .into(),
1031            );
1032            collector.reg_clobbers(clobbers);
1033        }
1034
1035        Inst::ReturnCallKnown { info } => {
1036            let ReturnCallInfo {
1037                dest, uses, tmp, ..
1038            } = &mut **info;
1039            collector.reg_fixed_def(tmp, regs::r11());
1040            // Same as in the `Inst::CallKnown` branch.
1041            debug_assert_ne!(*dest, ExternalName::LibCall(LibCall::Probestack));
1042            for CallArgPair { vreg, preg } in uses {
1043                collector.reg_fixed_use(vreg, *preg);
1044            }
1045        }
1046
1047        Inst::ReturnCallUnknown { info } => {
1048            let ReturnCallInfo {
1049                dest, uses, tmp, ..
1050            } = &mut **info;
1051
1052            // TODO(https://github.com/bytecodealliance/regalloc2/issues/145):
1053            // This shouldn't be a fixed register constraint, but it's not clear how to
1054            // pick a register that won't be clobbered by the callee-save restore code
1055            // emitted with a return_call_indirect. r10 is caller-saved, so this should be
1056            // safe to use.
1057            collector.reg_fixed_use(dest, regs::r10());
1058
1059            collector.reg_fixed_def(tmp, regs::r11());
1060            for CallArgPair { vreg, preg } in uses {
1061                collector.reg_fixed_use(vreg, *preg);
1062            }
1063        }
1064
1065        Inst::JmpTableSeq {
1066            idx, tmp1, tmp2, ..
1067        } => {
1068            collector.reg_use(idx);
1069            collector.reg_early_def(tmp1);
1070            // In the sequence emitted for this pseudoinstruction in emit.rs,
1071            // tmp2 is only written after idx is read, so it doesn't need to be
1072            // an early def.
1073            collector.reg_def(tmp2);
1074        }
1075
1076        Inst::LoadExtName { dst, .. } => {
1077            collector.reg_def(dst);
1078        }
1079
1080        Inst::AtomicRmwSeq {
1081            operand,
1082            temp,
1083            dst_old,
1084            mem,
1085            ..
1086        } => {
1087            collector.reg_late_use(operand);
1088            collector.reg_early_def(temp);
1089            // This `fixed_def` is needed because `CMPXCHG` always uses this
1090            // register implicitly.
1091            collector.reg_fixed_def(dst_old, regs::rax());
1092            mem.get_operands_late(collector)
1093        }
1094
1095        Inst::Atomic128RmwSeq {
1096            operand_low,
1097            operand_high,
1098            temp_low,
1099            temp_high,
1100            dst_old_low,
1101            dst_old_high,
1102            mem,
1103            ..
1104        } => {
1105            // All registers are collected in the `Late` position so that they don't overlap.
1106            collector.reg_late_use(operand_low);
1107            collector.reg_late_use(operand_high);
1108            collector.reg_fixed_def(temp_low, regs::rbx());
1109            collector.reg_fixed_def(temp_high, regs::rcx());
1110            collector.reg_fixed_def(dst_old_low, regs::rax());
1111            collector.reg_fixed_def(dst_old_high, regs::rdx());
1112            mem.get_operands_late(collector)
1113        }
1114
1115        Inst::Atomic128XchgSeq {
1116            operand_low,
1117            operand_high,
1118            dst_old_low,
1119            dst_old_high,
1120            mem,
1121            ..
1122        } => {
1123            // All registers are collected in the `Late` position so that they don't overlap.
1124            collector.reg_fixed_late_use(operand_low, regs::rbx());
1125            collector.reg_fixed_late_use(operand_high, regs::rcx());
1126            collector.reg_fixed_def(dst_old_low, regs::rax());
1127            collector.reg_fixed_def(dst_old_high, regs::rdx());
1128            mem.get_operands_late(collector)
1129        }
1130
1131        Inst::Args { args } => {
1132            for ArgPair { vreg, preg } in args {
1133                collector.reg_fixed_def(vreg, *preg);
1134            }
1135        }
1136
1137        Inst::Rets { rets } => {
1138            // The return value(s) are live-out; we represent this
1139            // with register uses on the return instruction.
1140            for RetPair { vreg, preg } in rets {
1141                collector.reg_fixed_use(vreg, *preg);
1142            }
1143        }
1144
1145        Inst::JmpKnown { .. }
1146        | Inst::WinchJmpIf { .. }
1147        | Inst::JmpCond { .. }
1148        | Inst::JmpCondOr { .. }
1149        | Inst::TrapIf { .. }
1150        | Inst::TrapIfAnd { .. }
1151        | Inst::TrapIfOr { .. } => {
1152            // No registers are used.
1153        }
1154
1155        Inst::ElfTlsGetAddr { dst, .. } | Inst::MachOTlsGetAddr { dst, .. } => {
1156            collector.reg_fixed_def(dst, regs::rax());
1157            // All caller-saves are clobbered.
1158            //
1159            // We use the SysV calling convention here because the
1160            // pseudoinstruction (and relocation that it emits) is specific to
1161            // ELF systems; other x86-64 targets with other conventions (i.e.,
1162            // Windows) use different TLS strategies.
1163            let mut clobbers =
1164                X64ABIMachineSpec::get_regs_clobbered_by_call(CallConv::SystemV, false);
1165            clobbers.remove(regs::gpr_preg(asm::gpr::enc::RAX));
1166            collector.reg_clobbers(clobbers);
1167        }
1168
1169        Inst::CoffTlsGetAddr { dst, tmp, .. } => {
1170            // We also use the gs register. But that register is not allocatable by the
1171            // register allocator, so we don't need to mark it as used here.
1172
1173            // We use %rax to set the address
1174            collector.reg_fixed_def(dst, regs::rax());
1175
1176            // We use %rcx as a temporary variable to load the _tls_index
1177            collector.reg_fixed_def(tmp, regs::rcx());
1178        }
1179
1180        Inst::Unwind { .. } => {}
1181
1182        Inst::DummyUse { reg } => {
1183            collector.reg_use(reg);
1184        }
1185
1186        Inst::External { inst } => {
1187            inst.visit(&mut external::RegallocVisitor { collector });
1188        }
1189    }
1190}
1191
1192//=============================================================================
1193// Instructions: misc functions and external interface
1194
1195impl MachInst for Inst {
1196    type ABIMachineSpec = X64ABIMachineSpec;
1197
1198    fn get_operands(&mut self, collector: &mut impl OperandVisitor) {
1199        x64_get_operands(self, collector)
1200    }
1201
1202    fn is_move(&self) -> Option<(Writable<Reg>, Reg)> {
1203        use asm::inst::Inst as I;
1204        match self {
1205            // Note (carefully!) that a 32-bit mov *isn't* a no-op since it zeroes
1206            // out the upper 32 bits of the destination.  For example, we could
1207            // conceivably use `movl %reg, %reg` to zero out the top 32 bits of
1208            // %reg.
1209            Self::External {
1210                inst: I::movq_mr(asm::inst::movq_mr { rm64, r64 }),
1211            } => match rm64 {
1212                asm::GprMem::Gpr(reg) => Some((reg.map(|r| r.to_reg()), r64.as_ref().to_reg())),
1213                asm::GprMem::Mem(_) => None,
1214            },
1215            Self::External {
1216                inst: I::movq_rm(asm::inst::movq_rm { r64, rm64 }),
1217            } => match rm64 {
1218                asm::GprMem::Gpr(reg) => Some((r64.as_ref().map(|r| r.to_reg()), reg.to_reg())),
1219                asm::GprMem::Mem(_) => None,
1220            },
1221
1222            // Note that `movss_a_r` and `movsd_a_r` are specifically omitted
1223            // here because they only overwrite the low bits in the destination
1224            // register, otherwise preserving the upper bits. That can be used
1225            // for lane-insertion instructions, for example, meaning it's not
1226            // classified as a register move.
1227            //
1228            // Otherwise though all register-to-register movement instructions
1229            // which move 128-bits are registered as moves.
1230            Self::External {
1231                inst:
1232                    I::movaps_a(asm::inst::movaps_a { xmm1, xmm_m128 })
1233                    | I::movups_a(asm::inst::movups_a { xmm1, xmm_m128 })
1234                    | I::movapd_a(asm::inst::movapd_a { xmm1, xmm_m128 })
1235                    | I::movupd_a(asm::inst::movupd_a { xmm1, xmm_m128 })
1236                    | I::movdqa_a(asm::inst::movdqa_a { xmm1, xmm_m128 })
1237                    | I::movdqu_a(asm::inst::movdqu_a { xmm1, xmm_m128 }),
1238            } => match xmm_m128 {
1239                asm::XmmMem::Xmm(xmm2) => Some((xmm1.as_ref().map(|r| r.to_reg()), xmm2.to_reg())),
1240                asm::XmmMem::Mem(_) => None,
1241            },
1242            // In addition to the "A" format of instructions above also
1243            // recognize the "B" format which while it can be used for stores it
1244            // can also be used for register moves.
1245            Self::External {
1246                inst:
1247                    I::movaps_b(asm::inst::movaps_b { xmm_m128, xmm1 })
1248                    | I::movups_b(asm::inst::movups_b { xmm_m128, xmm1 })
1249                    | I::movapd_b(asm::inst::movapd_b { xmm_m128, xmm1 })
1250                    | I::movupd_b(asm::inst::movupd_b { xmm_m128, xmm1 })
1251                    | I::movdqa_b(asm::inst::movdqa_b { xmm_m128, xmm1 })
1252                    | I::movdqu_b(asm::inst::movdqu_b { xmm_m128, xmm1 }),
1253            } => match xmm_m128 {
1254                asm::XmmMem::Xmm(dst) => Some((dst.map(|r| r.to_reg()), xmm1.as_ref().to_reg())),
1255                asm::XmmMem::Mem(_) => None,
1256            },
1257            _ => None,
1258        }
1259    }
1260
1261    fn is_included_in_clobbers(&self) -> bool {
1262        match self {
1263            &Inst::Args { .. } => false,
1264            _ => true,
1265        }
1266    }
1267
1268    fn is_trap(&self) -> bool {
1269        match self {
1270            Self::External {
1271                inst: asm::inst::Inst::ud2_zo(..),
1272            } => true,
1273            _ => false,
1274        }
1275    }
1276
1277    fn is_args(&self) -> bool {
1278        match self {
1279            Self::Args { .. } => true,
1280            _ => false,
1281        }
1282    }
1283
1284    fn is_term(&self) -> MachTerminator {
1285        match self {
1286            // Interesting cases.
1287            &Self::Rets { .. } => MachTerminator::Ret,
1288            &Self::ReturnCallKnown { .. } | &Self::ReturnCallUnknown { .. } => {
1289                MachTerminator::RetCall
1290            }
1291            &Self::JmpKnown { .. } => MachTerminator::Branch,
1292            &Self::JmpCond { .. } => MachTerminator::Branch,
1293            &Self::JmpCondOr { .. } => MachTerminator::Branch,
1294            &Self::JmpTableSeq { .. } => MachTerminator::Branch,
1295            &Self::CallKnown { ref info } if info.try_call_info.is_some() => MachTerminator::Branch,
1296            &Self::CallUnknown { ref info } if info.try_call_info.is_some() => {
1297                MachTerminator::Branch
1298            }
1299            // All other cases are boring.
1300            _ => MachTerminator::None,
1301        }
1302    }
1303
1304    fn is_low_level_branch(&self) -> bool {
1305        match self {
1306            &Self::WinchJmpIf { .. } => true,
1307            _ => false,
1308        }
1309    }
1310
1311    fn is_mem_access(&self) -> bool {
1312        panic!("TODO FILL ME OUT")
1313    }
1314
1315    fn gen_move(dst_reg: Writable<Reg>, src_reg: Reg, ty: Type) -> Inst {
1316        trace!(
1317            "Inst::gen_move {:?} -> {:?} (type: {:?})",
1318            src_reg,
1319            dst_reg.to_reg(),
1320            ty
1321        );
1322        let rc_dst = dst_reg.to_reg().class();
1323        let rc_src = src_reg.class();
1324        // If this isn't true, we have gone way off the rails.
1325        debug_assert!(rc_dst == rc_src);
1326        let inst = match rc_dst {
1327            RegClass::Int => {
1328                asm::inst::movq_mr::new(dst_reg.map(Gpr::unwrap_new), Gpr::unwrap_new(src_reg))
1329                    .into()
1330            }
1331            RegClass::Float => {
1332                // The Intel optimization manual, in "3.5.1.13 Zero-Latency MOV Instructions",
1333                // doesn't include MOVSS/MOVSD as instructions with zero-latency. Use movaps for
1334                // those, which may write more lanes that we need, but are specified to have
1335                // zero-latency.
1336                let dst_reg = dst_reg.map(|r| Xmm::new(r).unwrap());
1337                let src_reg = Xmm::new(src_reg).unwrap();
1338                match ty {
1339                    types::F16 | types::F32 | types::F64 | types::F32X4 => {
1340                        asm::inst::movaps_a::new(dst_reg, src_reg).into()
1341                    }
1342                    types::F64X2 => asm::inst::movapd_a::new(dst_reg, src_reg).into(),
1343                    _ if (ty.is_float() || ty.is_vector()) && ty.bits() <= 128 => {
1344                        asm::inst::movdqa_a::new(dst_reg, src_reg).into()
1345                    }
1346                    _ => unimplemented!("unable to move type: {}", ty),
1347                }
1348            }
1349            RegClass::Vector => unreachable!(),
1350        };
1351        Inst::External { inst }
1352    }
1353
1354    fn gen_nop(preferred_size: usize) -> Inst {
1355        Inst::nop(std::cmp::min(preferred_size, 9) as u8)
1356    }
1357
1358    fn rc_for_type(ty: Type) -> CodegenResult<(&'static [RegClass], &'static [Type])> {
1359        match ty {
1360            types::I8 => Ok((&[RegClass::Int], &[types::I8])),
1361            types::I16 => Ok((&[RegClass::Int], &[types::I16])),
1362            types::I32 => Ok((&[RegClass::Int], &[types::I32])),
1363            types::I64 => Ok((&[RegClass::Int], &[types::I64])),
1364            types::F16 => Ok((&[RegClass::Float], &[types::F16])),
1365            types::F32 => Ok((&[RegClass::Float], &[types::F32])),
1366            types::F64 => Ok((&[RegClass::Float], &[types::F64])),
1367            types::F128 => Ok((&[RegClass::Float], &[types::F128])),
1368            types::I128 => Ok((&[RegClass::Int, RegClass::Int], &[types::I64, types::I64])),
1369            _ if ty.is_vector() && ty.bits() <= 128 => {
1370                let types = &[types::I8X2, types::I8X4, types::I8X8, types::I8X16];
1371                Ok((
1372                    &[RegClass::Float],
1373                    slice::from_ref(&types[ty.bytes().ilog2() as usize - 1]),
1374                ))
1375            }
1376            _ => Err(CodegenError::Unsupported(format!(
1377                "Unexpected SSA-value type: {ty}"
1378            ))),
1379        }
1380    }
1381
1382    fn canonical_type_for_rc(rc: RegClass) -> Type {
1383        match rc {
1384            RegClass::Float => types::I8X16,
1385            RegClass::Int => types::I64,
1386            RegClass::Vector => unreachable!(),
1387        }
1388    }
1389
1390    fn gen_jump(label: MachLabel) -> Inst {
1391        Inst::jmp_known(label)
1392    }
1393
1394    fn gen_imm_u64(value: u64, dst: Writable<Reg>) -> Option<Self> {
1395        Some(Inst::imm(OperandSize::Size64, value, dst))
1396    }
1397
1398    fn gen_imm_f64(value: f64, tmp: Writable<Reg>, dst: Writable<Reg>) -> SmallVec<[Self; 2]> {
1399        let imm_to_gpr = Inst::imm(OperandSize::Size64, value.to_bits(), tmp);
1400        let gpr_to_xmm = Inst::External {
1401            inst: asm::inst::movq_a::new(dst.map(|r| Xmm::new(r).unwrap()), tmp.to_reg()).into(),
1402        };
1403        smallvec![imm_to_gpr, gpr_to_xmm]
1404    }
1405
1406    fn gen_dummy_use(reg: Reg) -> Self {
1407        Inst::DummyUse { reg }
1408    }
1409
1410    fn worst_case_size() -> CodeOffset {
1411        15
1412    }
1413
1414    fn ref_type_regclass(_: &settings::Flags) -> RegClass {
1415        RegClass::Int
1416    }
1417
1418    fn is_safepoint(&self) -> bool {
1419        match self {
1420            Inst::CallKnown { .. } | Inst::CallUnknown { .. } => true,
1421            _ => false,
1422        }
1423    }
1424
1425    fn function_alignment() -> FunctionAlignment {
1426        FunctionAlignment {
1427            minimum: 1,
1428            // Change the alignment from 16-bytes to 32-bytes for better performance.
1429            // fix-8573: https://github.com/bytecodealliance/wasmtime/issues/8573
1430            preferred: 32,
1431        }
1432    }
1433
1434    type LabelUse = LabelUse;
1435
1436    const TRAP_OPCODE: &'static [u8] = &[0x0f, 0x0b];
1437}
1438
1439/// Constant state used during emissions of a sequence of instructions.
1440pub struct EmitInfo {
1441    pub(super) flags: settings::Flags,
1442    isa_flags: x64_settings::Flags,
1443}
1444
1445impl EmitInfo {
1446    /// Create a constant state for emission of instructions.
1447    pub fn new(flags: settings::Flags, isa_flags: x64_settings::Flags) -> Self {
1448        Self { flags, isa_flags }
1449    }
1450}
1451
1452impl asm::AvailableFeatures for &EmitInfo {
1453    fn _64b(&self) -> bool {
1454        // Currently, this x64 backend always assumes 64-bit mode.
1455        true
1456    }
1457
1458    fn compat(&self) -> bool {
1459        // For 32-bit compatibility mode, see
1460        // https://github.com/bytecodealliance/wasmtime/issues/1980 (TODO).
1461        false
1462    }
1463
1464    fn sse(&self) -> bool {
1465        // Currently, this x64 backend always assumes SSE.
1466        true
1467    }
1468
1469    fn sse2(&self) -> bool {
1470        // Currently, this x64 backend always assumes SSE2.
1471        true
1472    }
1473
1474    fn sse3(&self) -> bool {
1475        self.isa_flags.has_sse3()
1476    }
1477
1478    fn ssse3(&self) -> bool {
1479        self.isa_flags.has_ssse3()
1480    }
1481
1482    fn sse41(&self) -> bool {
1483        self.isa_flags.has_sse41()
1484    }
1485
1486    fn sse42(&self) -> bool {
1487        self.isa_flags.has_sse42()
1488    }
1489
1490    fn bmi1(&self) -> bool {
1491        self.isa_flags.has_bmi1()
1492    }
1493
1494    fn bmi2(&self) -> bool {
1495        self.isa_flags.has_bmi2()
1496    }
1497
1498    fn lzcnt(&self) -> bool {
1499        self.isa_flags.has_lzcnt()
1500    }
1501
1502    fn popcnt(&self) -> bool {
1503        self.isa_flags.has_popcnt()
1504    }
1505
1506    fn avx(&self) -> bool {
1507        self.isa_flags.has_avx()
1508    }
1509
1510    fn avx2(&self) -> bool {
1511        self.isa_flags.has_avx2()
1512    }
1513
1514    fn avx512f(&self) -> bool {
1515        self.isa_flags.has_avx512f()
1516    }
1517
1518    fn avx512vl(&self) -> bool {
1519        self.isa_flags.has_avx512vl()
1520    }
1521
1522    fn cmpxchg16b(&self) -> bool {
1523        self.isa_flags.has_cmpxchg16b()
1524    }
1525
1526    fn fma(&self) -> bool {
1527        self.isa_flags.has_fma()
1528    }
1529
1530    fn avx512dq(&self) -> bool {
1531        self.isa_flags.has_avx512dq()
1532    }
1533
1534    fn avx512bitalg(&self) -> bool {
1535        self.isa_flags.has_avx512bitalg()
1536    }
1537
1538    fn avx512vbmi(&self) -> bool {
1539        self.isa_flags.has_avx512vbmi()
1540    }
1541}
1542
1543impl MachInstEmit for Inst {
1544    type State = EmitState;
1545    type Info = EmitInfo;
1546
1547    fn emit(&self, sink: &mut MachBuffer<Inst>, info: &Self::Info, state: &mut Self::State) {
1548        emit::emit(self, sink, info, state);
1549    }
1550
1551    fn pretty_print_inst(&self, _: &mut Self::State) -> String {
1552        PrettyPrint::pretty_print(self, 0)
1553    }
1554}
1555
1556/// A label-use (internal relocation) in generated code.
1557#[derive(Clone, Copy, Debug, PartialEq, Eq)]
1558pub enum LabelUse {
1559    /// A 32-bit offset from location of relocation itself, added to the existing value at that
1560    /// location. Used for control flow instructions which consider an offset from the start of the
1561    /// next instruction (so the size of the payload -- 4 bytes -- is subtracted from the payload).
1562    JmpRel32,
1563
1564    /// A 32-bit offset from location of relocation itself, added to the existing value at that
1565    /// location.
1566    PCRel32,
1567}
1568
1569impl MachInstLabelUse for LabelUse {
1570    const ALIGN: CodeOffset = 1;
1571
1572    fn max_pos_range(self) -> CodeOffset {
1573        match self {
1574            LabelUse::JmpRel32 | LabelUse::PCRel32 => 0x7fff_ffff,
1575        }
1576    }
1577
1578    fn max_neg_range(self) -> CodeOffset {
1579        match self {
1580            LabelUse::JmpRel32 | LabelUse::PCRel32 => 0x8000_0000,
1581        }
1582    }
1583
1584    fn patch_size(self) -> CodeOffset {
1585        match self {
1586            LabelUse::JmpRel32 | LabelUse::PCRel32 => 4,
1587        }
1588    }
1589
1590    fn patch(self, buffer: &mut [u8], use_offset: CodeOffset, label_offset: CodeOffset) {
1591        let pc_rel = (label_offset as i64) - (use_offset as i64);
1592        debug_assert!(pc_rel <= self.max_pos_range() as i64);
1593        debug_assert!(pc_rel >= -(self.max_neg_range() as i64));
1594        let pc_rel = pc_rel as u32;
1595        match self {
1596            LabelUse::JmpRel32 => {
1597                let addend = u32::from_le_bytes([buffer[0], buffer[1], buffer[2], buffer[3]]);
1598                let value = pc_rel.wrapping_add(addend).wrapping_sub(4);
1599                buffer.copy_from_slice(&value.to_le_bytes()[..]);
1600            }
1601            LabelUse::PCRel32 => {
1602                let addend = u32::from_le_bytes([buffer[0], buffer[1], buffer[2], buffer[3]]);
1603                let value = pc_rel.wrapping_add(addend);
1604                buffer.copy_from_slice(&value.to_le_bytes()[..]);
1605            }
1606        }
1607    }
1608
1609    fn supports_veneer(self) -> bool {
1610        match self {
1611            LabelUse::JmpRel32 | LabelUse::PCRel32 => false,
1612        }
1613    }
1614
1615    fn veneer_size(self) -> CodeOffset {
1616        match self {
1617            LabelUse::JmpRel32 | LabelUse::PCRel32 => 0,
1618        }
1619    }
1620
1621    fn worst_case_veneer_size() -> CodeOffset {
1622        0
1623    }
1624
1625    fn generate_veneer(self, _: &mut [u8], _: CodeOffset) -> (CodeOffset, LabelUse) {
1626        match self {
1627            LabelUse::JmpRel32 | LabelUse::PCRel32 => {
1628                panic!("Veneer not supported for JumpRel32 label-use.");
1629            }
1630        }
1631    }
1632
1633    fn from_reloc(reloc: Reloc, addend: Addend) -> Option<Self> {
1634        match (reloc, addend) {
1635            (Reloc::X86CallPCRel4, -4) => Some(LabelUse::JmpRel32),
1636            _ => None,
1637        }
1638    }
1639}