cranelift_codegen/isa/x64/inst/
mod.rs

1//! This module defines x86_64-specific machine instruction types.
2
3pub use emit_state::EmitState;
4
5use crate::binemit::{Addend, CodeOffset, Reloc};
6use crate::ir::{ExternalName, LibCall, TrapCode, Type, types};
7use crate::isa::x64::abi::X64ABIMachineSpec;
8use crate::isa::x64::inst::regs::pretty_print_reg;
9use crate::isa::x64::settings as x64_settings;
10use crate::isa::{CallConv, FunctionAlignment};
11use crate::{CodegenError, CodegenResult, settings};
12use crate::{machinst::*, trace};
13use alloc::boxed::Box;
14use core::slice;
15use cranelift_assembler_x64 as asm;
16use smallvec::{SmallVec, smallvec};
17use std::fmt::{self, Write};
18use std::string::{String, ToString};
19
20pub mod args;
21mod emit;
22mod emit_state;
23#[cfg(test)]
24mod emit_tests;
25pub mod external;
26pub mod regs;
27mod stack_switch;
28pub mod unwind;
29
30use args::*;
31
32//=============================================================================
33// Instructions (top level): definition
34
35// `Inst` is defined inside ISLE as `MInst`. We publicly re-export it here.
36pub use super::lower::isle::generated_code::AtomicRmwSeqOp;
37pub use super::lower::isle::generated_code::MInst as Inst;
38
39/// Out-of-line data for return-calls, to keep the size of `Inst` down.
40#[derive(Clone, Debug)]
41pub struct ReturnCallInfo<T> {
42    /// Where this call is going.
43    pub dest: T,
44
45    /// The size of the argument area for this return-call, potentially smaller than that of the
46    /// caller, but never larger.
47    pub new_stack_arg_size: u32,
48
49    /// The in-register arguments and their constraints.
50    pub uses: CallArgList,
51
52    /// A temporary for use when moving the return address.
53    pub tmp: WritableGpr,
54}
55
56#[test]
57#[cfg(target_pointer_width = "64")]
58fn inst_size_test() {
59    // This test will help with unintentionally growing the size
60    // of the Inst enum.
61    assert_eq!(48, std::mem::size_of::<Inst>());
62}
63
64impl Inst {
65    /// Check if the instruction (or pseudo-instruction) can be emitted given
66    /// the current target architecture given by `emit_info`. For non-assembler
67    /// instructions, this assumes a baseline feature set (i.e., 64-bit AND SSE2
68    /// and below).
69    fn is_available(&self, emit_info: &EmitInfo) -> bool {
70        use asm::AvailableFeatures;
71
72        match self {
73            // These instructions are part of SSE2, which is a basic requirement
74            // in Cranelift, and don't have to be checked.
75            Inst::AtomicRmwSeq { .. }
76            | Inst::CallKnown { .. }
77            | Inst::CallUnknown { .. }
78            | Inst::ReturnCallKnown { .. }
79            | Inst::ReturnCallUnknown { .. }
80            | Inst::CheckedSRemSeq { .. }
81            | Inst::CheckedSRemSeq8 { .. }
82            | Inst::CvtFloatToSintSeq { .. }
83            | Inst::CvtFloatToUintSeq { .. }
84            | Inst::CvtUint64ToFloatSeq { .. }
85            | Inst::JmpCond { .. }
86            | Inst::JmpCondOr { .. }
87            | Inst::WinchJmpIf { .. }
88            | Inst::JmpKnown { .. }
89            | Inst::JmpTableSeq { .. }
90            | Inst::LoadExtName { .. }
91            | Inst::MovFromPReg { .. }
92            | Inst::MovToPReg { .. }
93            | Inst::StackProbeLoop { .. }
94            | Inst::Args { .. }
95            | Inst::Rets { .. }
96            | Inst::StackSwitchBasic { .. }
97            | Inst::TrapIf { .. }
98            | Inst::TrapIfAnd { .. }
99            | Inst::TrapIfOr { .. }
100            | Inst::XmmCmove { .. }
101            | Inst::XmmMinMaxSeq { .. }
102            | Inst::XmmUninitializedValue { .. }
103            | Inst::GprUninitializedValue { .. }
104            | Inst::ElfTlsGetAddr { .. }
105            | Inst::MachOTlsGetAddr { .. }
106            | Inst::CoffTlsGetAddr { .. }
107            | Inst::Unwind { .. }
108            | Inst::DummyUse { .. }
109            | Inst::LabelAddress { .. }
110            | Inst::SequencePoint => true,
111
112            Inst::Atomic128RmwSeq { .. } | Inst::Atomic128XchgSeq { .. } => emit_info.cmpxchg16b(),
113
114            Inst::External { inst } => inst.is_available(&emit_info),
115        }
116    }
117}
118
119// Handy constructors for Insts.
120
121impl Inst {
122    pub(crate) fn nop(len: u8) -> Self {
123        assert!(len > 0 && len <= 9);
124        let inst = match len {
125            1 => asm::inst::nop_1b::new().into(),
126            2 => asm::inst::nop_2b::new().into(),
127            3 => asm::inst::nop_3b::new().into(),
128            4 => asm::inst::nop_4b::new().into(),
129            5 => asm::inst::nop_5b::new().into(),
130            6 => asm::inst::nop_6b::new().into(),
131            7 => asm::inst::nop_7b::new().into(),
132            8 => asm::inst::nop_8b::new().into(),
133            9 => asm::inst::nop_9b::new().into(),
134            _ => unreachable!("nop length must be between 1 and 9"),
135        };
136        Self::External { inst }
137    }
138
139    pub(crate) fn addq_mi(dst: Writable<Reg>, simm32: i32) -> Self {
140        let inst = if let Ok(simm8) = i8::try_from(simm32) {
141            asm::inst::addq_mi_sxb::new(dst, simm8).into()
142        } else {
143            asm::inst::addq_mi_sxl::new(dst, simm32).into()
144        };
145        Inst::External { inst }
146    }
147
148    pub(crate) fn subq_mi(dst: Writable<Reg>, simm32: i32) -> Self {
149        let inst = if let Ok(simm8) = i8::try_from(simm32) {
150            asm::inst::subq_mi_sxb::new(dst, simm8).into()
151        } else {
152            asm::inst::subq_mi_sxl::new(dst, simm32).into()
153        };
154        Inst::External { inst }
155    }
156
157    /// Writes the `simm64` immedaite into `dst`.
158    ///
159    /// Note that if `dst_size` is less than 64-bits then the upper bits of
160    /// `simm64` will be converted to zero.
161    pub fn imm(dst_size: OperandSize, simm64: u64, dst: Writable<Reg>) -> Inst {
162        debug_assert!(dst_size.is_one_of(&[OperandSize::Size32, OperandSize::Size64]));
163        debug_assert!(dst.to_reg().class() == RegClass::Int);
164        let dst = WritableGpr::from_writable_reg(dst).unwrap();
165        let inst = match dst_size {
166            OperandSize::Size64 => match u32::try_from(simm64) {
167                // If `simm64` is zero-extended use `movl` which zeros the
168                // upper bits.
169                Ok(imm32) => asm::inst::movl_oi::new(dst, imm32).into(),
170                _ => match i32::try_from(simm64.cast_signed()) {
171                    // If `simm64` is sign-extended use `movq` which sign the
172                    // upper bits.
173                    Ok(simm32) => asm::inst::movq_mi_sxl::new(dst, simm32).into(),
174                    // fall back to embedding the entire immediate.
175                    _ => asm::inst::movabsq_oi::new(dst, simm64).into(),
176                },
177            },
178            // FIXME: the input to this function is a logical `simm64` stored
179            // as `u64`. That means that ideally what we would do here is cast
180            // the `simm64` to an `i64`, perform a `i32::try_from()`, then cast
181            // that back to `u32`. That would ensure that the immediate loses
182            // no meaning and has the same logical value. Currently though
183            // Cranelift relies on discarding the upper bits because literals
184            // like `0x8000_0000_u64` fail to convert to an `i32`. In theory
185            // the input to this function should change to `i64`. In the
186            // meantime this is documented as discarding the upper bits,
187            // although this is an old function so that's unlikely to help
188            // much.
189            _ => asm::inst::movl_oi::new(dst, simm64 as u32).into(),
190        };
191        Inst::External { inst }
192    }
193
194    pub(crate) fn movzx_rm_r(ext_mode: ExtMode, src: RegMem, dst: Writable<Reg>) -> Inst {
195        src.assert_regclass_is(RegClass::Int);
196        debug_assert!(dst.to_reg().class() == RegClass::Int);
197        let src = match src {
198            RegMem::Reg { reg } => asm::GprMem::Gpr(Gpr::new(reg).unwrap()),
199            RegMem::Mem { addr } => asm::GprMem::Mem(addr.into()),
200        };
201        let inst = match ext_mode {
202            ExtMode::BL => asm::inst::movzbl_rm::new(dst, src).into(),
203            ExtMode::BQ => asm::inst::movzbq_rm::new(dst, src).into(),
204            ExtMode::WL => asm::inst::movzwl_rm::new(dst, src).into(),
205            ExtMode::WQ => asm::inst::movzwq_rm::new(dst, src).into(),
206            ExtMode::LQ => {
207                // This instruction selection may seem strange but is correct in
208                // 64-bit mode: section 3.4.1.1 of the Intel manual says that
209                // "32-bit operands generate a 32-bit result, zero-extended to a
210                // 64-bit result in the destination general-purpose register."
211                // This is applicable beyond `mov` but we use this fact to
212                // zero-extend `src` into `dst`.
213                asm::inst::movl_rm::new(dst, src).into()
214            }
215        };
216        Inst::External { inst }
217    }
218
219    pub(crate) fn movsx_rm_r(ext_mode: ExtMode, src: RegMem, dst: Writable<Reg>) -> Inst {
220        src.assert_regclass_is(RegClass::Int);
221        debug_assert!(dst.to_reg().class() == RegClass::Int);
222        let src = match src {
223            RegMem::Reg { reg } => asm::GprMem::Gpr(Gpr::new(reg).unwrap()),
224            RegMem::Mem { addr } => asm::GprMem::Mem(addr.into()),
225        };
226        let inst = match ext_mode {
227            ExtMode::BL => asm::inst::movsbl_rm::new(dst, src).into(),
228            ExtMode::BQ => asm::inst::movsbq_rm::new(dst, src).into(),
229            ExtMode::WL => asm::inst::movswl_rm::new(dst, src).into(),
230            ExtMode::WQ => asm::inst::movswq_rm::new(dst, src).into(),
231            ExtMode::LQ => asm::inst::movslq_rm::new(dst, src).into(),
232        };
233        Inst::External { inst }
234    }
235
236    /// Compares `src1` against `src2`
237    pub(crate) fn cmp_mi_sxb(size: OperandSize, src1: Gpr, src2: i8) -> Inst {
238        let inst = match size {
239            OperandSize::Size8 => asm::inst::cmpb_mi::new(src1, src2.cast_unsigned()).into(),
240            OperandSize::Size16 => asm::inst::cmpw_mi_sxb::new(src1, src2).into(),
241            OperandSize::Size32 => asm::inst::cmpl_mi_sxb::new(src1, src2).into(),
242            OperandSize::Size64 => asm::inst::cmpq_mi_sxb::new(src1, src2).into(),
243        };
244        Inst::External { inst }
245    }
246
247    pub(crate) fn trap_if(cc: CC, trap_code: TrapCode) -> Inst {
248        Inst::TrapIf { cc, trap_code }
249    }
250
251    pub(crate) fn call_known(info: Box<CallInfo<ExternalName>>) -> Inst {
252        Inst::CallKnown { info }
253    }
254
255    pub(crate) fn call_unknown(info: Box<CallInfo<RegMem>>) -> Inst {
256        info.dest.assert_regclass_is(RegClass::Int);
257        Inst::CallUnknown { info }
258    }
259
260    pub(crate) fn jmp_known(dst: MachLabel) -> Inst {
261        Inst::JmpKnown { dst }
262    }
263
264    /// Choose which instruction to use for loading a register value from memory. For loads smaller
265    /// than 64 bits, this method expects a way to extend the value (i.e. [ExtKind::SignExtend],
266    /// [ExtKind::ZeroExtend]); loads with no extension necessary will ignore this.
267    pub(crate) fn load(
268        ty: Type,
269        from_addr: impl Into<SyntheticAmode>,
270        to_reg: Writable<Reg>,
271        ext_kind: ExtKind,
272    ) -> Inst {
273        let rc = to_reg.to_reg().class();
274        match rc {
275            RegClass::Int => {
276                let ext_mode = match ty.bytes() {
277                    1 => Some(ExtMode::BQ),
278                    2 => Some(ExtMode::WQ),
279                    4 => Some(ExtMode::LQ),
280                    8 => None,
281                    _ => unreachable!("the type should never use a scalar load: {}", ty),
282                };
283                if let Some(ext_mode) = ext_mode {
284                    // Values smaller than 64 bits must be extended in some way.
285                    match ext_kind {
286                        ExtKind::SignExtend => {
287                            Inst::movsx_rm_r(ext_mode, RegMem::mem(from_addr), to_reg)
288                        }
289                        ExtKind::ZeroExtend => {
290                            Inst::movzx_rm_r(ext_mode, RegMem::mem(from_addr), to_reg)
291                        }
292                        ExtKind::None => {
293                            panic!("expected an extension kind for extension mode: {ext_mode:?}")
294                        }
295                    }
296                } else {
297                    // 64-bit values can be moved directly.
298                    let from_addr = asm::GprMem::from(from_addr.into());
299                    Inst::External {
300                        inst: asm::inst::movq_rm::new(to_reg, from_addr).into(),
301                    }
302                }
303            }
304            RegClass::Float => {
305                let to_reg = to_reg.map(|r| Xmm::new(r).unwrap());
306                let from_addr = from_addr.into();
307                let inst = match ty {
308                    types::F16 | types::I8X2 => {
309                        panic!("loading a f16 or i8x2 requires multiple instructions")
310                    }
311                    _ if (ty.is_float() || ty.is_vector()) && ty.bits() == 32 => {
312                        asm::inst::movss_a_m::new(to_reg, from_addr).into()
313                    }
314                    _ if (ty.is_float() || ty.is_vector()) && ty.bits() == 64 => {
315                        asm::inst::movsd_a_m::new(to_reg, from_addr).into()
316                    }
317                    types::F32X4 => asm::inst::movups_a::new(to_reg, from_addr).into(),
318                    types::F64X2 => asm::inst::movupd_a::new(to_reg, from_addr).into(),
319                    _ if (ty.is_float() || ty.is_vector()) && ty.bits() == 128 => {
320                        asm::inst::movdqu_a::new(to_reg, from_addr).into()
321                    }
322                    _ => unimplemented!("unable to load type: {}", ty),
323                };
324                Inst::External { inst }
325            }
326            RegClass::Vector => unreachable!(),
327        }
328    }
329
330    /// Choose which instruction to use for storing a register value to memory.
331    pub(crate) fn store(ty: Type, from_reg: Reg, to_addr: impl Into<SyntheticAmode>) -> Inst {
332        let rc = from_reg.class();
333        let to_addr = to_addr.into();
334        let inst = match rc {
335            RegClass::Int => {
336                let from_reg = Gpr::unwrap_new(from_reg);
337                match ty {
338                    types::I8 => asm::inst::movb_mr::new(to_addr, from_reg).into(),
339                    types::I16 => asm::inst::movw_mr::new(to_addr, from_reg).into(),
340                    types::I32 => asm::inst::movl_mr::new(to_addr, from_reg).into(),
341                    types::I64 => asm::inst::movq_mr::new(to_addr, from_reg).into(),
342                    _ => unreachable!(),
343                }
344            }
345            RegClass::Float => {
346                let from_reg = Xmm::new(from_reg).unwrap();
347                match ty {
348                    types::F16 | types::I8X2 => {
349                        panic!("storing a f16 or i8x2 requires multiple instructions")
350                    }
351                    _ if (ty.is_float() || ty.is_vector()) && ty.bits() == 32 => {
352                        asm::inst::movss_c_m::new(to_addr, from_reg).into()
353                    }
354                    _ if (ty.is_float() || ty.is_vector()) && ty.bits() == 64 => {
355                        asm::inst::movsd_c_m::new(to_addr, from_reg).into()
356                    }
357                    types::F32X4 => asm::inst::movups_b::new(to_addr, from_reg).into(),
358                    types::F64X2 => asm::inst::movupd_b::new(to_addr, from_reg).into(),
359                    _ if (ty.is_float() || ty.is_vector()) && ty.bits() == 128 => {
360                        asm::inst::movdqu_b::new(to_addr, from_reg).into()
361                    }
362                    _ => unimplemented!("unable to store type: {}", ty),
363                }
364            }
365            RegClass::Vector => unreachable!(),
366        };
367        Inst::External { inst }
368    }
369}
370
371//=============================================================================
372// Instructions: printing
373
374impl PrettyPrint for Inst {
375    fn pretty_print(&self, _size: u8) -> String {
376        fn ljustify(s: String) -> String {
377            let w = 7;
378            if s.len() >= w {
379                s
380            } else {
381                let need = usize::min(w, w - s.len());
382                s + &format!("{nil: <width$}", nil = "", width = need)
383            }
384        }
385
386        fn ljustify2(s1: String, s2: String) -> String {
387            ljustify(s1 + &s2)
388        }
389
390        match self {
391            Inst::CheckedSRemSeq {
392                size,
393                divisor,
394                dividend_lo,
395                dividend_hi,
396                dst_quotient,
397                dst_remainder,
398            } => {
399                let divisor = pretty_print_reg(divisor.to_reg(), size.to_bytes());
400                let dividend_lo = pretty_print_reg(dividend_lo.to_reg(), size.to_bytes());
401                let dividend_hi = pretty_print_reg(dividend_hi.to_reg(), size.to_bytes());
402                let dst_quotient =
403                    pretty_print_reg(dst_quotient.to_reg().to_reg(), size.to_bytes());
404                let dst_remainder =
405                    pretty_print_reg(dst_remainder.to_reg().to_reg(), size.to_bytes());
406                format!(
407                    "checked_srem_seq {dividend_lo}, {dividend_hi}, \
408                        {divisor}, {dst_quotient}, {dst_remainder}",
409                )
410            }
411
412            Inst::CheckedSRemSeq8 {
413                divisor,
414                dividend,
415                dst,
416            } => {
417                let divisor = pretty_print_reg(divisor.to_reg(), 1);
418                let dividend = pretty_print_reg(dividend.to_reg(), 1);
419                let dst = pretty_print_reg(dst.to_reg().to_reg(), 1);
420                format!("checked_srem_seq {dividend}, {divisor}, {dst}")
421            }
422
423            Inst::XmmMinMaxSeq {
424                lhs,
425                rhs,
426                dst,
427                is_min,
428                size,
429            } => {
430                let rhs = pretty_print_reg(rhs.to_reg(), 8);
431                let lhs = pretty_print_reg(lhs.to_reg(), 8);
432                let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);
433                let op = ljustify2(
434                    if *is_min {
435                        "xmm min seq ".to_string()
436                    } else {
437                        "xmm max seq ".to_string()
438                    },
439                    format!("f{}", size.to_bits()),
440                );
441                format!("{op} {lhs}, {rhs}, {dst}")
442            }
443
444            Inst::XmmUninitializedValue { dst } => {
445                let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);
446                let op = ljustify("uninit".into());
447                format!("{op} {dst}")
448            }
449
450            Inst::GprUninitializedValue { dst } => {
451                let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);
452                let op = ljustify("uninit".into());
453                format!("{op} {dst}")
454            }
455
456            Inst::CvtUint64ToFloatSeq {
457                src,
458                dst,
459                dst_size,
460                tmp_gpr1,
461                tmp_gpr2,
462                ..
463            } => {
464                let src = pretty_print_reg(src.to_reg(), 8);
465                let dst = pretty_print_reg(dst.to_reg().to_reg(), dst_size.to_bytes());
466                let tmp_gpr1 = pretty_print_reg(tmp_gpr1.to_reg().to_reg(), 8);
467                let tmp_gpr2 = pretty_print_reg(tmp_gpr2.to_reg().to_reg(), 8);
468                let op = ljustify(format!(
469                    "u64_to_{}_seq",
470                    if *dst_size == OperandSize::Size64 {
471                        "f64"
472                    } else {
473                        "f32"
474                    }
475                ));
476                format!("{op} {src}, {dst}, {tmp_gpr1}, {tmp_gpr2}")
477            }
478
479            Inst::CvtFloatToSintSeq {
480                src,
481                dst,
482                src_size,
483                dst_size,
484                tmp_xmm,
485                tmp_gpr,
486                is_saturating,
487            } => {
488                let src = pretty_print_reg(src.to_reg(), src_size.to_bytes());
489                let dst = pretty_print_reg(dst.to_reg().to_reg(), dst_size.to_bytes());
490                let tmp_gpr = pretty_print_reg(tmp_gpr.to_reg().to_reg(), 8);
491                let tmp_xmm = pretty_print_reg(tmp_xmm.to_reg().to_reg(), 8);
492                let op = ljustify(format!(
493                    "cvt_float{}_to_sint{}{}_seq",
494                    src_size.to_bits(),
495                    dst_size.to_bits(),
496                    if *is_saturating { "_sat" } else { "" },
497                ));
498                format!("{op} {src}, {dst}, {tmp_gpr}, {tmp_xmm}")
499            }
500
501            Inst::CvtFloatToUintSeq {
502                src,
503                dst,
504                src_size,
505                dst_size,
506                tmp_gpr,
507                tmp_xmm,
508                tmp_xmm2,
509                is_saturating,
510            } => {
511                let src = pretty_print_reg(src.to_reg(), src_size.to_bytes());
512                let dst = pretty_print_reg(dst.to_reg().to_reg(), dst_size.to_bytes());
513                let tmp_gpr = pretty_print_reg(tmp_gpr.to_reg().to_reg(), 8);
514                let tmp_xmm = pretty_print_reg(tmp_xmm.to_reg().to_reg(), 8);
515                let tmp_xmm2 = pretty_print_reg(tmp_xmm2.to_reg().to_reg(), 8);
516                let op = ljustify(format!(
517                    "cvt_float{}_to_uint{}{}_seq",
518                    src_size.to_bits(),
519                    dst_size.to_bits(),
520                    if *is_saturating { "_sat" } else { "" },
521                ));
522                format!("{op} {src}, {dst}, {tmp_gpr}, {tmp_xmm}, {tmp_xmm2}")
523            }
524
525            Inst::MovFromPReg { src, dst } => {
526                let src: Reg = (*src).into();
527                let src = pretty_print_reg(src, 8);
528                let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);
529                let op = ljustify("movq".to_string());
530                format!("{op} {src}, {dst}")
531            }
532
533            Inst::MovToPReg { src, dst } => {
534                let src = pretty_print_reg(src.to_reg(), 8);
535                let dst: Reg = (*dst).into();
536                let dst = pretty_print_reg(dst, 8);
537                let op = ljustify("movq".to_string());
538                format!("{op} {src}, {dst}")
539            }
540
541            Inst::XmmCmove {
542                ty,
543                cc,
544                consequent,
545                alternative,
546                dst,
547                ..
548            } => {
549                let size = u8::try_from(ty.bytes()).unwrap();
550                let alternative = pretty_print_reg(alternative.to_reg(), size);
551                let dst = pretty_print_reg(dst.to_reg().to_reg(), size);
552                let consequent = pretty_print_reg(consequent.to_reg(), size);
553                let suffix = match *ty {
554                    types::F64 => "sd",
555                    types::F32 => "ss",
556                    types::F16 => "ss",
557                    types::F32X4 => "aps",
558                    types::F64X2 => "apd",
559                    _ => "dqa",
560                };
561                let cc = cc.invert();
562                format!(
563                    "mov{suffix} {alternative}, {dst}; \
564                    j{cc} $next; \
565                    mov{suffix} {consequent}, {dst}; \
566                    $next:"
567                )
568            }
569
570            Inst::StackProbeLoop {
571                tmp,
572                frame_size,
573                guard_size,
574            } => {
575                let tmp = pretty_print_reg(tmp.to_reg(), 8);
576                let op = ljustify("stack_probe_loop".to_string());
577                format!("{op} {tmp}, frame_size={frame_size}, guard_size={guard_size}")
578            }
579
580            Inst::CallKnown { info } => {
581                let op = ljustify("call".to_string());
582                let try_call = info
583                    .try_call_info
584                    .as_ref()
585                    .map(|tci| pretty_print_try_call(tci))
586                    .unwrap_or_default();
587                format!("{op} {:?}{try_call}", info.dest)
588            }
589
590            Inst::CallUnknown { info } => {
591                let dest = info.dest.pretty_print(8);
592                let op = ljustify("call".to_string());
593                let try_call = info
594                    .try_call_info
595                    .as_ref()
596                    .map(|tci| pretty_print_try_call(tci))
597                    .unwrap_or_default();
598                format!("{op} *{dest}{try_call}")
599            }
600
601            Inst::ReturnCallKnown { info } => {
602                let ReturnCallInfo {
603                    uses,
604                    new_stack_arg_size,
605                    tmp,
606                    dest,
607                } = &**info;
608                let tmp = pretty_print_reg(tmp.to_reg().to_reg(), 8);
609                let mut s = format!("return_call_known {dest:?} ({new_stack_arg_size}) tmp={tmp}");
610                for ret in uses {
611                    let preg = pretty_print_reg(ret.preg, 8);
612                    let vreg = pretty_print_reg(ret.vreg, 8);
613                    write!(&mut s, " {vreg}={preg}").unwrap();
614                }
615                s
616            }
617
618            Inst::ReturnCallUnknown { info } => {
619                let ReturnCallInfo {
620                    uses,
621                    new_stack_arg_size,
622                    tmp,
623                    dest,
624                } = &**info;
625                let callee = pretty_print_reg(*dest, 8);
626                let tmp = pretty_print_reg(tmp.to_reg().to_reg(), 8);
627                let mut s =
628                    format!("return_call_unknown {callee} ({new_stack_arg_size}) tmp={tmp}");
629                for ret in uses {
630                    let preg = pretty_print_reg(ret.preg, 8);
631                    let vreg = pretty_print_reg(ret.vreg, 8);
632                    write!(&mut s, " {vreg}={preg}").unwrap();
633                }
634                s
635            }
636
637            Inst::Args { args } => {
638                let mut s = "args".to_string();
639                for arg in args {
640                    let preg = pretty_print_reg(arg.preg, 8);
641                    let def = pretty_print_reg(arg.vreg.to_reg(), 8);
642                    write!(&mut s, " {def}={preg}").unwrap();
643                }
644                s
645            }
646
647            Inst::Rets { rets } => {
648                let mut s = "rets".to_string();
649                for ret in rets {
650                    let preg = pretty_print_reg(ret.preg, 8);
651                    let vreg = pretty_print_reg(ret.vreg, 8);
652                    write!(&mut s, " {vreg}={preg}").unwrap();
653                }
654                s
655            }
656
657            Inst::StackSwitchBasic {
658                store_context_ptr,
659                load_context_ptr,
660                in_payload0,
661                out_payload0,
662            } => {
663                let store_context_ptr = pretty_print_reg(**store_context_ptr, 8);
664                let load_context_ptr = pretty_print_reg(**load_context_ptr, 8);
665                let in_payload0 = pretty_print_reg(**in_payload0, 8);
666                let out_payload0 = pretty_print_reg(*out_payload0.to_reg(), 8);
667                format!(
668                    "{out_payload0} = stack_switch_basic {store_context_ptr}, {load_context_ptr}, {in_payload0}"
669                )
670            }
671
672            Inst::JmpKnown { dst } => {
673                let op = ljustify("jmp".to_string());
674                let dst = dst.to_string();
675                format!("{op} {dst}")
676            }
677
678            Inst::WinchJmpIf { cc, taken } => {
679                let taken = taken.to_string();
680                let op = ljustify2("j".to_string(), cc.to_string());
681                format!("{op} {taken}")
682            }
683
684            Inst::JmpCondOr {
685                cc1,
686                cc2,
687                taken,
688                not_taken,
689            } => {
690                let taken = taken.to_string();
691                let not_taken = not_taken.to_string();
692                let op = ljustify(format!("j{cc1},{cc2}"));
693                format!("{op} {taken}; j {not_taken}")
694            }
695
696            Inst::JmpCond {
697                cc,
698                taken,
699                not_taken,
700            } => {
701                let taken = taken.to_string();
702                let not_taken = not_taken.to_string();
703                let op = ljustify2("j".to_string(), cc.to_string());
704                format!("{op} {taken}; j {not_taken}")
705            }
706
707            Inst::JmpTableSeq {
708                idx, tmp1, tmp2, ..
709            } => {
710                let idx = pretty_print_reg(*idx, 8);
711                let tmp1 = pretty_print_reg(tmp1.to_reg(), 8);
712                let tmp2 = pretty_print_reg(tmp2.to_reg(), 8);
713                let op = ljustify("br_table".into());
714                format!("{op} {idx}, {tmp1}, {tmp2}")
715            }
716
717            Inst::TrapIf { cc, trap_code, .. } => {
718                format!("j{cc} #trap={trap_code}")
719            }
720
721            Inst::TrapIfAnd {
722                cc1,
723                cc2,
724                trap_code,
725                ..
726            } => {
727                let cc1 = cc1.invert();
728                let cc2 = cc2.invert();
729                format!("trap_if_and {cc1}, {cc2}, {trap_code}")
730            }
731
732            Inst::TrapIfOr {
733                cc1,
734                cc2,
735                trap_code,
736                ..
737            } => {
738                let cc2 = cc2.invert();
739                format!("trap_if_or {cc1}, {cc2}, {trap_code}")
740            }
741
742            Inst::LoadExtName {
743                dst, name, offset, ..
744            } => {
745                let dst = pretty_print_reg(*dst.to_reg(), 8);
746                let name = name.display(None);
747                let op = ljustify("load_ext_name".into());
748                format!("{op} {name}+{offset}, {dst}")
749            }
750
751            Inst::AtomicRmwSeq { ty, op, .. } => {
752                let ty = ty.bits();
753                format!(
754                    "atomically {{ {ty}_bits_at_[%r9] {op:?}= %r10; %rax = old_value_at_[%r9]; %r11, %rflags = trash }}"
755                )
756            }
757
758            Inst::Atomic128RmwSeq {
759                op,
760                mem,
761                operand_low,
762                operand_high,
763                temp_low,
764                temp_high,
765                dst_old_low,
766                dst_old_high,
767            } => {
768                let operand_low = pretty_print_reg(**operand_low, 8);
769                let operand_high = pretty_print_reg(**operand_high, 8);
770                let temp_low = pretty_print_reg(*temp_low.to_reg(), 8);
771                let temp_high = pretty_print_reg(*temp_high.to_reg(), 8);
772                let dst_old_low = pretty_print_reg(*dst_old_low.to_reg(), 8);
773                let dst_old_high = pretty_print_reg(*dst_old_high.to_reg(), 8);
774                let mem = mem.pretty_print(16);
775                format!(
776                    "atomically {{ {dst_old_high}:{dst_old_low} = {mem}; {temp_high}:{temp_low} = {dst_old_high}:{dst_old_low} {op:?} {operand_high}:{operand_low}; {mem} = {temp_high}:{temp_low} }}"
777                )
778            }
779
780            Inst::Atomic128XchgSeq {
781                mem,
782                operand_low,
783                operand_high,
784                dst_old_low,
785                dst_old_high,
786            } => {
787                let operand_low = pretty_print_reg(**operand_low, 8);
788                let operand_high = pretty_print_reg(**operand_high, 8);
789                let dst_old_low = pretty_print_reg(*dst_old_low.to_reg(), 8);
790                let dst_old_high = pretty_print_reg(*dst_old_high.to_reg(), 8);
791                let mem = mem.pretty_print(16);
792                format!(
793                    "atomically {{ {dst_old_high}:{dst_old_low} = {mem}; {mem} = {operand_high}:{operand_low} }}"
794                )
795            }
796
797            Inst::ElfTlsGetAddr { symbol, dst } => {
798                let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);
799                format!("{dst} = elf_tls_get_addr {symbol:?}")
800            }
801
802            Inst::MachOTlsGetAddr { symbol, dst } => {
803                let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);
804                format!("{dst} = macho_tls_get_addr {symbol:?}")
805            }
806
807            Inst::CoffTlsGetAddr { symbol, dst, tmp } => {
808                let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);
809                let tmp = tmp.to_reg().to_reg();
810
811                let mut s = format!("{dst} = coff_tls_get_addr {symbol:?}");
812                if tmp.is_virtual() {
813                    let tmp = pretty_print_reg(tmp, 8);
814                    write!(&mut s, ", {tmp}").unwrap();
815                };
816
817                s
818            }
819
820            Inst::Unwind { inst } => format!("unwind {inst:?}"),
821
822            Inst::DummyUse { reg } => {
823                let reg = pretty_print_reg(*reg, 8);
824                format!("dummy_use {reg}")
825            }
826
827            Inst::LabelAddress { dst, label } => {
828                let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);
829                format!("label_address {dst}, {label:?}")
830            }
831
832            Inst::SequencePoint {} => {
833                format!("sequence_point")
834            }
835
836            Inst::External { inst } => {
837                format!("{inst}")
838            }
839        }
840    }
841}
842
843fn pretty_print_try_call(info: &TryCallInfo) -> String {
844    format!(
845        "; jmp {:?}; catch [{}]",
846        info.continuation,
847        info.pretty_print_dests()
848    )
849}
850
851impl fmt::Debug for Inst {
852    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
853        write!(fmt, "{}", self.pretty_print_inst(&mut Default::default()))
854    }
855}
856
857fn x64_get_operands(inst: &mut Inst, collector: &mut impl OperandVisitor) {
858    // Note: because we need to statically know the indices of each
859    // reg in the operands list in order to fetch its allocation
860    // later, we put the variable-operand-count bits (the RegMem,
861    // RegMemImm, etc args) last. regalloc2 doesn't care what order
862    // the operands come in; they can be freely reordered.
863
864    // N.B.: we MUST keep the below in careful sync with (i) emission,
865    // in `emit.rs`, and (ii) pretty-printing, in the `pretty_print`
866    // method above.
867    match inst {
868        Inst::CheckedSRemSeq {
869            divisor,
870            dividend_lo,
871            dividend_hi,
872            dst_quotient,
873            dst_remainder,
874            ..
875        } => {
876            collector.reg_use(divisor);
877            collector.reg_fixed_use(dividend_lo, regs::rax());
878            collector.reg_fixed_use(dividend_hi, regs::rdx());
879            collector.reg_fixed_def(dst_quotient, regs::rax());
880            collector.reg_fixed_def(dst_remainder, regs::rdx());
881        }
882        Inst::CheckedSRemSeq8 {
883            divisor,
884            dividend,
885            dst,
886            ..
887        } => {
888            collector.reg_use(divisor);
889            collector.reg_fixed_use(dividend, regs::rax());
890            collector.reg_fixed_def(dst, regs::rax());
891        }
892        Inst::XmmUninitializedValue { dst } => collector.reg_def(dst),
893        Inst::GprUninitializedValue { dst } => collector.reg_def(dst),
894        Inst::XmmMinMaxSeq { lhs, rhs, dst, .. } => {
895            collector.reg_use(rhs);
896            collector.reg_use(lhs);
897            collector.reg_reuse_def(dst, 0); // Reuse RHS.
898        }
899        Inst::MovFromPReg { dst, src } => {
900            debug_assert!(dst.to_reg().to_reg().is_virtual());
901            collector.reg_fixed_nonallocatable(*src);
902            collector.reg_def(dst);
903        }
904        Inst::MovToPReg { dst, src } => {
905            debug_assert!(src.to_reg().is_virtual());
906            collector.reg_use(src);
907            collector.reg_fixed_nonallocatable(*dst);
908        }
909        Inst::CvtUint64ToFloatSeq {
910            src,
911            dst,
912            tmp_gpr1,
913            tmp_gpr2,
914            ..
915        } => {
916            collector.reg_use(src);
917            collector.reg_early_def(dst);
918            collector.reg_early_def(tmp_gpr1);
919            collector.reg_early_def(tmp_gpr2);
920        }
921        Inst::CvtFloatToSintSeq {
922            src,
923            dst,
924            tmp_xmm,
925            tmp_gpr,
926            ..
927        } => {
928            collector.reg_use(src);
929            collector.reg_early_def(dst);
930            collector.reg_early_def(tmp_gpr);
931            collector.reg_early_def(tmp_xmm);
932        }
933        Inst::CvtFloatToUintSeq {
934            src,
935            dst,
936            tmp_gpr,
937            tmp_xmm,
938            tmp_xmm2,
939            ..
940        } => {
941            collector.reg_use(src);
942            collector.reg_early_def(dst);
943            collector.reg_early_def(tmp_gpr);
944            collector.reg_early_def(tmp_xmm);
945            collector.reg_early_def(tmp_xmm2);
946        }
947
948        Inst::XmmCmove {
949            consequent,
950            alternative,
951            dst,
952            ..
953        } => {
954            collector.reg_use(alternative);
955            collector.reg_reuse_def(dst, 0);
956            collector.reg_use(consequent);
957        }
958        Inst::StackProbeLoop { tmp, .. } => {
959            collector.reg_early_def(tmp);
960        }
961
962        Inst::CallKnown { info } => {
963            // Probestack is special and is only inserted after
964            // regalloc, so we do not need to represent its ABI to the
965            // register allocator. Assert that we don't alter that
966            // arrangement.
967            let CallInfo {
968                uses,
969                defs,
970                clobbers,
971                dest,
972                try_call_info,
973                ..
974            } = &mut **info;
975            debug_assert_ne!(*dest, ExternalName::LibCall(LibCall::Probestack));
976            for CallArgPair { vreg, preg } in uses {
977                collector.reg_fixed_use(vreg, *preg);
978            }
979            for CallRetPair { vreg, location } in defs {
980                match location {
981                    RetLocation::Reg(preg, ..) => collector.reg_fixed_def(vreg, *preg),
982                    RetLocation::Stack(..) => collector.any_def(vreg),
983                }
984            }
985            collector.reg_clobbers(*clobbers);
986            if let Some(try_call_info) = try_call_info {
987                try_call_info.collect_operands(collector);
988            }
989        }
990
991        Inst::CallUnknown { info } => {
992            let CallInfo {
993                uses,
994                defs,
995                clobbers,
996                callee_conv,
997                dest,
998                try_call_info,
999                ..
1000            } = &mut **info;
1001            match dest {
1002                RegMem::Reg { reg } if *callee_conv == CallConv::Winch => {
1003                    // TODO(https://github.com/bytecodealliance/regalloc2/issues/145):
1004                    // This shouldn't be a fixed register constraint. r10 is caller-saved, so this
1005                    // should be safe to use.
1006                    collector.reg_fixed_use(reg, regs::r10());
1007                }
1008                _ => dest.get_operands(collector),
1009            }
1010            for CallArgPair { vreg, preg } in uses {
1011                collector.reg_fixed_use(vreg, *preg);
1012            }
1013            for CallRetPair { vreg, location } in defs {
1014                match location {
1015                    RetLocation::Reg(preg, ..) => collector.reg_fixed_def(vreg, *preg),
1016                    RetLocation::Stack(..) => collector.any_def(vreg),
1017                }
1018            }
1019            collector.reg_clobbers(*clobbers);
1020            if let Some(try_call_info) = try_call_info {
1021                try_call_info.collect_operands(collector);
1022            }
1023        }
1024        Inst::StackSwitchBasic {
1025            store_context_ptr,
1026            load_context_ptr,
1027            in_payload0,
1028            out_payload0,
1029        } => {
1030            collector.reg_use(load_context_ptr);
1031            collector.reg_use(store_context_ptr);
1032            collector.reg_fixed_use(in_payload0, stack_switch::payload_register());
1033            collector.reg_fixed_def(out_payload0, stack_switch::payload_register());
1034
1035            let mut clobbers = crate::isa::x64::abi::ALL_CLOBBERS;
1036            // The return/payload reg must not be included in the clobber set
1037            clobbers.remove(
1038                stack_switch::payload_register()
1039                    .to_real_reg()
1040                    .unwrap()
1041                    .into(),
1042            );
1043            collector.reg_clobbers(clobbers);
1044        }
1045
1046        Inst::ReturnCallKnown { info } => {
1047            let ReturnCallInfo {
1048                dest, uses, tmp, ..
1049            } = &mut **info;
1050            collector.reg_fixed_def(tmp, regs::r11());
1051            // Same as in the `Inst::CallKnown` branch.
1052            debug_assert_ne!(*dest, ExternalName::LibCall(LibCall::Probestack));
1053            for CallArgPair { vreg, preg } in uses {
1054                collector.reg_fixed_use(vreg, *preg);
1055            }
1056        }
1057
1058        Inst::ReturnCallUnknown { info } => {
1059            let ReturnCallInfo {
1060                dest, uses, tmp, ..
1061            } = &mut **info;
1062
1063            // TODO(https://github.com/bytecodealliance/regalloc2/issues/145):
1064            // This shouldn't be a fixed register constraint, but it's not clear how to
1065            // pick a register that won't be clobbered by the callee-save restore code
1066            // emitted with a return_call_indirect. r10 is caller-saved, so this should be
1067            // safe to use.
1068            collector.reg_fixed_use(dest, regs::r10());
1069
1070            collector.reg_fixed_def(tmp, regs::r11());
1071            for CallArgPair { vreg, preg } in uses {
1072                collector.reg_fixed_use(vreg, *preg);
1073            }
1074        }
1075
1076        Inst::JmpTableSeq {
1077            idx, tmp1, tmp2, ..
1078        } => {
1079            collector.reg_use(idx);
1080            collector.reg_early_def(tmp1);
1081            // In the sequence emitted for this pseudoinstruction in emit.rs,
1082            // tmp2 is only written after idx is read, so it doesn't need to be
1083            // an early def.
1084            collector.reg_def(tmp2);
1085        }
1086
1087        Inst::LoadExtName { dst, .. } => {
1088            collector.reg_def(dst);
1089        }
1090
1091        Inst::AtomicRmwSeq {
1092            operand,
1093            temp,
1094            dst_old,
1095            mem,
1096            ..
1097        } => {
1098            collector.reg_late_use(operand);
1099            collector.reg_early_def(temp);
1100            // This `fixed_def` is needed because `CMPXCHG` always uses this
1101            // register implicitly.
1102            collector.reg_fixed_def(dst_old, regs::rax());
1103            mem.get_operands_late(collector)
1104        }
1105
1106        Inst::Atomic128RmwSeq {
1107            operand_low,
1108            operand_high,
1109            temp_low,
1110            temp_high,
1111            dst_old_low,
1112            dst_old_high,
1113            mem,
1114            ..
1115        } => {
1116            // All registers are collected in the `Late` position so that they don't overlap.
1117            collector.reg_late_use(operand_low);
1118            collector.reg_late_use(operand_high);
1119            collector.reg_fixed_def(temp_low, regs::rbx());
1120            collector.reg_fixed_def(temp_high, regs::rcx());
1121            collector.reg_fixed_def(dst_old_low, regs::rax());
1122            collector.reg_fixed_def(dst_old_high, regs::rdx());
1123            mem.get_operands_late(collector)
1124        }
1125
1126        Inst::Atomic128XchgSeq {
1127            operand_low,
1128            operand_high,
1129            dst_old_low,
1130            dst_old_high,
1131            mem,
1132            ..
1133        } => {
1134            // All registers are collected in the `Late` position so that they don't overlap.
1135            collector.reg_fixed_late_use(operand_low, regs::rbx());
1136            collector.reg_fixed_late_use(operand_high, regs::rcx());
1137            collector.reg_fixed_def(dst_old_low, regs::rax());
1138            collector.reg_fixed_def(dst_old_high, regs::rdx());
1139            mem.get_operands_late(collector)
1140        }
1141
1142        Inst::Args { args } => {
1143            for ArgPair { vreg, preg } in args {
1144                collector.reg_fixed_def(vreg, *preg);
1145            }
1146        }
1147
1148        Inst::Rets { rets } => {
1149            // The return value(s) are live-out; we represent this
1150            // with register uses on the return instruction.
1151            for RetPair { vreg, preg } in rets {
1152                collector.reg_fixed_use(vreg, *preg);
1153            }
1154        }
1155
1156        Inst::JmpKnown { .. }
1157        | Inst::WinchJmpIf { .. }
1158        | Inst::JmpCond { .. }
1159        | Inst::JmpCondOr { .. }
1160        | Inst::TrapIf { .. }
1161        | Inst::TrapIfAnd { .. }
1162        | Inst::TrapIfOr { .. } => {
1163            // No registers are used.
1164        }
1165
1166        Inst::ElfTlsGetAddr { dst, .. } | Inst::MachOTlsGetAddr { dst, .. } => {
1167            collector.reg_fixed_def(dst, regs::rax());
1168            // All caller-saves are clobbered.
1169            //
1170            // We use the SysV calling convention here because the
1171            // pseudoinstruction (and relocation that it emits) is specific to
1172            // ELF systems; other x86-64 targets with other conventions (i.e.,
1173            // Windows) use different TLS strategies.
1174            let mut clobbers =
1175                X64ABIMachineSpec::get_regs_clobbered_by_call(CallConv::SystemV, false);
1176            clobbers.remove(regs::gpr_preg(asm::gpr::enc::RAX));
1177            collector.reg_clobbers(clobbers);
1178        }
1179
1180        Inst::CoffTlsGetAddr { dst, tmp, .. } => {
1181            // We also use the gs register. But that register is not allocatable by the
1182            // register allocator, so we don't need to mark it as used here.
1183
1184            // We use %rax to set the address
1185            collector.reg_fixed_def(dst, regs::rax());
1186
1187            // We use %rcx as a temporary variable to load the _tls_index
1188            collector.reg_fixed_def(tmp, regs::rcx());
1189        }
1190
1191        Inst::Unwind { .. } => {}
1192
1193        Inst::DummyUse { reg } => {
1194            collector.reg_use(reg);
1195        }
1196
1197        Inst::LabelAddress { dst, .. } => {
1198            collector.reg_def(dst);
1199        }
1200
1201        Inst::SequencePoint { .. } => {}
1202
1203        Inst::External { inst } => {
1204            inst.visit(&mut external::RegallocVisitor { collector });
1205        }
1206    }
1207}
1208
1209//=============================================================================
1210// Instructions: misc functions and external interface
1211
1212impl MachInst for Inst {
1213    type ABIMachineSpec = X64ABIMachineSpec;
1214
1215    fn get_operands(&mut self, collector: &mut impl OperandVisitor) {
1216        x64_get_operands(self, collector)
1217    }
1218
1219    fn is_move(&self) -> Option<(Writable<Reg>, Reg)> {
1220        use asm::inst::Inst as I;
1221        match self {
1222            // Note (carefully!) that a 32-bit mov *isn't* a no-op since it zeroes
1223            // out the upper 32 bits of the destination.  For example, we could
1224            // conceivably use `movl %reg, %reg` to zero out the top 32 bits of
1225            // %reg.
1226            Self::External {
1227                inst: I::movq_mr(asm::inst::movq_mr { rm64, r64 }),
1228            } => match rm64 {
1229                asm::GprMem::Gpr(reg) => Some((reg.map(|r| r.to_reg()), r64.as_ref().to_reg())),
1230                asm::GprMem::Mem(_) => None,
1231            },
1232            Self::External {
1233                inst: I::movq_rm(asm::inst::movq_rm { r64, rm64 }),
1234            } => match rm64 {
1235                asm::GprMem::Gpr(reg) => Some((r64.as_ref().map(|r| r.to_reg()), reg.to_reg())),
1236                asm::GprMem::Mem(_) => None,
1237            },
1238
1239            // Note that `movss_a_r` and `movsd_a_r` are specifically omitted
1240            // here because they only overwrite the low bits in the destination
1241            // register, otherwise preserving the upper bits. That can be used
1242            // for lane-insertion instructions, for example, meaning it's not
1243            // classified as a register move.
1244            //
1245            // Otherwise though all register-to-register movement instructions
1246            // which move 128-bits are registered as moves.
1247            Self::External {
1248                inst:
1249                    I::movaps_a(asm::inst::movaps_a { xmm1, xmm_m128 })
1250                    | I::movups_a(asm::inst::movups_a { xmm1, xmm_m128 })
1251                    | I::movapd_a(asm::inst::movapd_a { xmm1, xmm_m128 })
1252                    | I::movupd_a(asm::inst::movupd_a { xmm1, xmm_m128 })
1253                    | I::movdqa_a(asm::inst::movdqa_a { xmm1, xmm_m128 })
1254                    | I::movdqu_a(asm::inst::movdqu_a { xmm1, xmm_m128 }),
1255            } => match xmm_m128 {
1256                asm::XmmMem::Xmm(xmm2) => Some((xmm1.as_ref().map(|r| r.to_reg()), xmm2.to_reg())),
1257                asm::XmmMem::Mem(_) => None,
1258            },
1259            // In addition to the "A" format of instructions above also
1260            // recognize the "B" format which while it can be used for stores it
1261            // can also be used for register moves.
1262            Self::External {
1263                inst:
1264                    I::movaps_b(asm::inst::movaps_b { xmm_m128, xmm1 })
1265                    | I::movups_b(asm::inst::movups_b { xmm_m128, xmm1 })
1266                    | I::movapd_b(asm::inst::movapd_b { xmm_m128, xmm1 })
1267                    | I::movupd_b(asm::inst::movupd_b { xmm_m128, xmm1 })
1268                    | I::movdqa_b(asm::inst::movdqa_b { xmm_m128, xmm1 })
1269                    | I::movdqu_b(asm::inst::movdqu_b { xmm_m128, xmm1 }),
1270            } => match xmm_m128 {
1271                asm::XmmMem::Xmm(dst) => Some((dst.map(|r| r.to_reg()), xmm1.as_ref().to_reg())),
1272                asm::XmmMem::Mem(_) => None,
1273            },
1274            _ => None,
1275        }
1276    }
1277
1278    fn is_included_in_clobbers(&self) -> bool {
1279        match self {
1280            &Inst::Args { .. } => false,
1281            _ => true,
1282        }
1283    }
1284
1285    fn is_trap(&self) -> bool {
1286        match self {
1287            Self::External {
1288                inst: asm::inst::Inst::ud2_zo(..),
1289            } => true,
1290            _ => false,
1291        }
1292    }
1293
1294    fn is_args(&self) -> bool {
1295        match self {
1296            Self::Args { .. } => true,
1297            _ => false,
1298        }
1299    }
1300
1301    fn call_type(&self) -> CallType {
1302        match self {
1303            Inst::CallKnown { .. }
1304            | Inst::CallUnknown { .. }
1305            | Inst::ElfTlsGetAddr { .. }
1306            | Inst::MachOTlsGetAddr { .. } => CallType::Regular,
1307
1308            Inst::ReturnCallKnown { .. } | Inst::ReturnCallUnknown { .. } => CallType::TailCall,
1309
1310            _ => CallType::None,
1311        }
1312    }
1313
1314    fn is_term(&self) -> MachTerminator {
1315        match self {
1316            // Interesting cases.
1317            &Self::Rets { .. } => MachTerminator::Ret,
1318            &Self::ReturnCallKnown { .. } | &Self::ReturnCallUnknown { .. } => {
1319                MachTerminator::RetCall
1320            }
1321            &Self::JmpKnown { .. } => MachTerminator::Branch,
1322            &Self::JmpCond { .. } => MachTerminator::Branch,
1323            &Self::JmpCondOr { .. } => MachTerminator::Branch,
1324            &Self::JmpTableSeq { .. } => MachTerminator::Branch,
1325            &Self::CallKnown { ref info } if info.try_call_info.is_some() => MachTerminator::Branch,
1326            &Self::CallUnknown { ref info } if info.try_call_info.is_some() => {
1327                MachTerminator::Branch
1328            }
1329            // All other cases are boring.
1330            _ => MachTerminator::None,
1331        }
1332    }
1333
1334    fn is_low_level_branch(&self) -> bool {
1335        match self {
1336            &Self::WinchJmpIf { .. } => true,
1337            _ => false,
1338        }
1339    }
1340
1341    fn is_mem_access(&self) -> bool {
1342        panic!("TODO FILL ME OUT")
1343    }
1344
1345    fn gen_move(dst_reg: Writable<Reg>, src_reg: Reg, ty: Type) -> Inst {
1346        trace!(
1347            "Inst::gen_move {:?} -> {:?} (type: {:?})",
1348            src_reg,
1349            dst_reg.to_reg(),
1350            ty
1351        );
1352        let rc_dst = dst_reg.to_reg().class();
1353        let rc_src = src_reg.class();
1354        // If this isn't true, we have gone way off the rails.
1355        debug_assert!(rc_dst == rc_src);
1356        let inst = match rc_dst {
1357            RegClass::Int => {
1358                asm::inst::movq_mr::new(dst_reg.map(Gpr::unwrap_new), Gpr::unwrap_new(src_reg))
1359                    .into()
1360            }
1361            RegClass::Float => {
1362                // The Intel optimization manual, in "3.5.1.13 Zero-Latency MOV Instructions",
1363                // doesn't include MOVSS/MOVSD as instructions with zero-latency. Use movaps for
1364                // those, which may write more lanes that we need, but are specified to have
1365                // zero-latency.
1366                let dst_reg = dst_reg.map(|r| Xmm::new(r).unwrap());
1367                let src_reg = Xmm::new(src_reg).unwrap();
1368                match ty {
1369                    types::F16 | types::F32 | types::F64 | types::F32X4 => {
1370                        asm::inst::movaps_a::new(dst_reg, src_reg).into()
1371                    }
1372                    types::F64X2 => asm::inst::movapd_a::new(dst_reg, src_reg).into(),
1373                    _ if (ty.is_float() || ty.is_vector()) && ty.bits() <= 128 => {
1374                        asm::inst::movdqa_a::new(dst_reg, src_reg).into()
1375                    }
1376                    _ => unimplemented!("unable to move type: {}", ty),
1377                }
1378            }
1379            RegClass::Vector => unreachable!(),
1380        };
1381        Inst::External { inst }
1382    }
1383
1384    fn gen_nop(preferred_size: usize) -> Inst {
1385        Inst::nop(std::cmp::min(preferred_size, 9) as u8)
1386    }
1387
1388    fn rc_for_type(ty: Type) -> CodegenResult<(&'static [RegClass], &'static [Type])> {
1389        match ty {
1390            types::I8 => Ok((&[RegClass::Int], &[types::I8])),
1391            types::I16 => Ok((&[RegClass::Int], &[types::I16])),
1392            types::I32 => Ok((&[RegClass::Int], &[types::I32])),
1393            types::I64 => Ok((&[RegClass::Int], &[types::I64])),
1394            types::F16 => Ok((&[RegClass::Float], &[types::F16])),
1395            types::F32 => Ok((&[RegClass::Float], &[types::F32])),
1396            types::F64 => Ok((&[RegClass::Float], &[types::F64])),
1397            types::F128 => Ok((&[RegClass::Float], &[types::F128])),
1398            types::I128 => Ok((&[RegClass::Int, RegClass::Int], &[types::I64, types::I64])),
1399            _ if ty.is_vector() && ty.bits() <= 128 => {
1400                let types = &[types::I8X2, types::I8X4, types::I8X8, types::I8X16];
1401                Ok((
1402                    &[RegClass::Float],
1403                    slice::from_ref(&types[ty.bytes().ilog2() as usize - 1]),
1404                ))
1405            }
1406            _ => Err(CodegenError::Unsupported(format!(
1407                "Unexpected SSA-value type: {ty}"
1408            ))),
1409        }
1410    }
1411
1412    fn canonical_type_for_rc(rc: RegClass) -> Type {
1413        match rc {
1414            RegClass::Float => types::I8X16,
1415            RegClass::Int => types::I64,
1416            RegClass::Vector => unreachable!(),
1417        }
1418    }
1419
1420    fn gen_jump(label: MachLabel) -> Inst {
1421        Inst::jmp_known(label)
1422    }
1423
1424    fn gen_imm_u64(value: u64, dst: Writable<Reg>) -> Option<Self> {
1425        Some(Inst::imm(OperandSize::Size64, value, dst))
1426    }
1427
1428    fn gen_imm_f64(value: f64, tmp: Writable<Reg>, dst: Writable<Reg>) -> SmallVec<[Self; 2]> {
1429        let imm_to_gpr = Inst::imm(OperandSize::Size64, value.to_bits(), tmp);
1430        let gpr_to_xmm = Inst::External {
1431            inst: asm::inst::movq_a::new(dst.map(|r| Xmm::new(r).unwrap()), tmp.to_reg()).into(),
1432        };
1433        smallvec![imm_to_gpr, gpr_to_xmm]
1434    }
1435
1436    fn gen_dummy_use(reg: Reg) -> Self {
1437        Inst::DummyUse { reg }
1438    }
1439
1440    fn worst_case_size() -> CodeOffset {
1441        15
1442    }
1443
1444    fn ref_type_regclass(_: &settings::Flags) -> RegClass {
1445        RegClass::Int
1446    }
1447
1448    fn is_safepoint(&self) -> bool {
1449        match self {
1450            Inst::CallKnown { .. } | Inst::CallUnknown { .. } => true,
1451            _ => false,
1452        }
1453    }
1454
1455    fn function_alignment() -> FunctionAlignment {
1456        FunctionAlignment {
1457            minimum: 1,
1458            // Change the alignment from 16-bytes to 32-bytes for better performance.
1459            // fix-8573: https://github.com/bytecodealliance/wasmtime/issues/8573
1460            preferred: 32,
1461        }
1462    }
1463
1464    type LabelUse = LabelUse;
1465
1466    const TRAP_OPCODE: &'static [u8] = &[0x0f, 0x0b];
1467}
1468
1469/// Constant state used during emissions of a sequence of instructions.
1470pub struct EmitInfo {
1471    pub(super) flags: settings::Flags,
1472    isa_flags: x64_settings::Flags,
1473}
1474
1475impl EmitInfo {
1476    /// Create a constant state for emission of instructions.
1477    pub fn new(flags: settings::Flags, isa_flags: x64_settings::Flags) -> Self {
1478        Self { flags, isa_flags }
1479    }
1480}
1481
1482impl asm::AvailableFeatures for &EmitInfo {
1483    fn _64b(&self) -> bool {
1484        // Currently, this x64 backend always assumes 64-bit mode.
1485        true
1486    }
1487
1488    fn compat(&self) -> bool {
1489        // For 32-bit compatibility mode, see
1490        // https://github.com/bytecodealliance/wasmtime/issues/1980 (TODO).
1491        false
1492    }
1493
1494    fn sse(&self) -> bool {
1495        // Currently, this x64 backend always assumes SSE.
1496        true
1497    }
1498
1499    fn sse2(&self) -> bool {
1500        // Currently, this x64 backend always assumes SSE2.
1501        true
1502    }
1503
1504    fn sse3(&self) -> bool {
1505        self.isa_flags.has_sse3()
1506    }
1507
1508    fn ssse3(&self) -> bool {
1509        self.isa_flags.has_ssse3()
1510    }
1511
1512    fn sse41(&self) -> bool {
1513        self.isa_flags.has_sse41()
1514    }
1515
1516    fn sse42(&self) -> bool {
1517        self.isa_flags.has_sse42()
1518    }
1519
1520    fn bmi1(&self) -> bool {
1521        self.isa_flags.has_bmi1()
1522    }
1523
1524    fn bmi2(&self) -> bool {
1525        self.isa_flags.has_bmi2()
1526    }
1527
1528    fn lzcnt(&self) -> bool {
1529        self.isa_flags.has_lzcnt()
1530    }
1531
1532    fn popcnt(&self) -> bool {
1533        self.isa_flags.has_popcnt()
1534    }
1535
1536    fn avx(&self) -> bool {
1537        self.isa_flags.has_avx()
1538    }
1539
1540    fn avx2(&self) -> bool {
1541        self.isa_flags.has_avx2()
1542    }
1543
1544    fn avx512f(&self) -> bool {
1545        self.isa_flags.has_avx512f()
1546    }
1547
1548    fn avx512vl(&self) -> bool {
1549        self.isa_flags.has_avx512vl()
1550    }
1551
1552    fn cmpxchg16b(&self) -> bool {
1553        self.isa_flags.has_cmpxchg16b()
1554    }
1555
1556    fn fma(&self) -> bool {
1557        self.isa_flags.has_fma()
1558    }
1559
1560    fn avx512dq(&self) -> bool {
1561        self.isa_flags.has_avx512dq()
1562    }
1563
1564    fn avx512bitalg(&self) -> bool {
1565        self.isa_flags.has_avx512bitalg()
1566    }
1567
1568    fn avx512vbmi(&self) -> bool {
1569        self.isa_flags.has_avx512vbmi()
1570    }
1571}
1572
1573impl MachInstEmit for Inst {
1574    type State = EmitState;
1575    type Info = EmitInfo;
1576
1577    fn emit(&self, sink: &mut MachBuffer<Inst>, info: &Self::Info, state: &mut Self::State) {
1578        emit::emit(self, sink, info, state);
1579    }
1580
1581    fn pretty_print_inst(&self, _: &mut Self::State) -> String {
1582        PrettyPrint::pretty_print(self, 0)
1583    }
1584}
1585
1586/// A label-use (internal relocation) in generated code.
1587#[derive(Clone, Copy, Debug, PartialEq, Eq)]
1588pub enum LabelUse {
1589    /// A 32-bit offset from location of relocation itself, added to the existing value at that
1590    /// location. Used for control flow instructions which consider an offset from the start of the
1591    /// next instruction (so the size of the payload -- 4 bytes -- is subtracted from the payload).
1592    JmpRel32,
1593
1594    /// A 32-bit offset from location of relocation itself, added to the existing value at that
1595    /// location.
1596    PCRel32,
1597}
1598
1599impl MachInstLabelUse for LabelUse {
1600    const ALIGN: CodeOffset = 1;
1601
1602    fn max_pos_range(self) -> CodeOffset {
1603        match self {
1604            LabelUse::JmpRel32 | LabelUse::PCRel32 => 0x7fff_ffff,
1605        }
1606    }
1607
1608    fn max_neg_range(self) -> CodeOffset {
1609        match self {
1610            LabelUse::JmpRel32 | LabelUse::PCRel32 => 0x8000_0000,
1611        }
1612    }
1613
1614    fn patch_size(self) -> CodeOffset {
1615        match self {
1616            LabelUse::JmpRel32 | LabelUse::PCRel32 => 4,
1617        }
1618    }
1619
1620    fn patch(self, buffer: &mut [u8], use_offset: CodeOffset, label_offset: CodeOffset) {
1621        let pc_rel = (label_offset as i64) - (use_offset as i64);
1622        debug_assert!(pc_rel <= self.max_pos_range() as i64);
1623        debug_assert!(pc_rel >= -(self.max_neg_range() as i64));
1624        let pc_rel = pc_rel as u32;
1625        match self {
1626            LabelUse::JmpRel32 => {
1627                let addend = u32::from_le_bytes([buffer[0], buffer[1], buffer[2], buffer[3]]);
1628                let value = pc_rel.wrapping_add(addend).wrapping_sub(4);
1629                buffer.copy_from_slice(&value.to_le_bytes()[..]);
1630            }
1631            LabelUse::PCRel32 => {
1632                let addend = u32::from_le_bytes([buffer[0], buffer[1], buffer[2], buffer[3]]);
1633                let value = pc_rel.wrapping_add(addend);
1634                buffer.copy_from_slice(&value.to_le_bytes()[..]);
1635            }
1636        }
1637    }
1638
1639    fn supports_veneer(self) -> bool {
1640        match self {
1641            LabelUse::JmpRel32 | LabelUse::PCRel32 => false,
1642        }
1643    }
1644
1645    fn veneer_size(self) -> CodeOffset {
1646        match self {
1647            LabelUse::JmpRel32 | LabelUse::PCRel32 => 0,
1648        }
1649    }
1650
1651    fn worst_case_veneer_size() -> CodeOffset {
1652        0
1653    }
1654
1655    fn generate_veneer(self, _: &mut [u8], _: CodeOffset) -> (CodeOffset, LabelUse) {
1656        match self {
1657            LabelUse::JmpRel32 | LabelUse::PCRel32 => {
1658                panic!("Veneer not supported for JumpRel32 label-use.");
1659            }
1660        }
1661    }
1662
1663    fn from_reloc(reloc: Reloc, addend: Addend) -> Option<Self> {
1664        match (reloc, addend) {
1665            (Reloc::X86CallPCRel4, -4) => Some(LabelUse::JmpRel32),
1666            _ => None,
1667        }
1668    }
1669}