winch_codegen/isa/x64/
asm.rs

1//! Assembler library implementation for x64.
2
3use crate::{
4    constant_pool::ConstantPool,
5    isa::{CallingConvention, reg::Reg},
6    masm::{
7        DivKind, Extend, ExtendKind, ExtendType, IntCmpKind, MulWideKind, OperandSize, RemKind,
8        RoundingMode, ShiftKind, Signed, V128ExtendKind, V128LoadExtendKind, Zero,
9    },
10    reg::writable,
11};
12use cranelift_codegen::{
13    CallInfo, Final, MachBuffer, MachBufferFinalized, MachInst, MachInstEmit, MachInstEmitState,
14    MachLabel, PatchRegion, Writable,
15    ir::{ExternalName, MemFlags, SourceLoc, TrapCode, Type, UserExternalNameRef, types},
16    isa::{
17        unwind::UnwindInst,
18        x64::{
19            AtomicRmwSeqOp, EmitInfo, EmitState, Inst,
20            args::{
21                self, Amode, Avx512Opcode, CC, ExtMode, FromWritableReg, Gpr, GprMem, GprMemImm,
22                RegMem, RegMemImm, SyntheticAmode, WritableGpr, WritableXmm, Xmm, XmmMem,
23                XmmMemImm,
24            },
25            external::{PairedGpr, PairedXmm},
26            settings as x64_settings,
27        },
28    },
29    settings,
30};
31
32use crate::reg::WritableReg;
33use cranelift_assembler_x64 as asm;
34use wasmtime_environ::Unsigned;
35
36use super::address::Address;
37use smallvec::SmallVec;
38
39// Conversions between winch-codegen x64 types and cranelift-codegen x64 types.
40
41impl From<Reg> for RegMemImm {
42    fn from(reg: Reg) -> Self {
43        RegMemImm::reg(reg.into())
44    }
45}
46
47impl From<Reg> for RegMem {
48    fn from(value: Reg) -> Self {
49        RegMem::Reg { reg: value.into() }
50    }
51}
52
53impl From<Reg> for WritableGpr {
54    fn from(reg: Reg) -> Self {
55        let writable = Writable::from_reg(reg.into());
56        WritableGpr::from_writable_reg(writable).expect("valid writable gpr")
57    }
58}
59
60impl From<Reg> for WritableXmm {
61    fn from(reg: Reg) -> Self {
62        let writable = Writable::from_reg(reg.into());
63        WritableXmm::from_writable_reg(writable).expect("valid writable xmm")
64    }
65}
66
67/// Convert a writable GPR register to the read-write pair expected by
68/// `cranelift-codegen`.
69fn pair_gpr(reg: WritableReg) -> PairedGpr {
70    assert!(reg.to_reg().is_int());
71    let read = Gpr::unwrap_new(reg.to_reg().into());
72    let write = WritableGpr::from_reg(reg.to_reg().into());
73    PairedGpr { read, write }
74}
75
76impl From<Reg> for asm::Gpr<Gpr> {
77    fn from(reg: Reg) -> Self {
78        asm::Gpr::new(reg.into())
79    }
80}
81
82impl From<Reg> for asm::GprMem<Gpr, Gpr> {
83    fn from(reg: Reg) -> Self {
84        asm::GprMem::Gpr(reg.into())
85    }
86}
87
88/// Convert a writable XMM register to the read-write pair expected by
89/// `cranelift-codegen`.
90fn pair_xmm(reg: WritableReg) -> PairedXmm {
91    assert!(reg.to_reg().is_float());
92    let read = Xmm::unwrap_new(reg.to_reg().into());
93    let write = WritableXmm::from_reg(reg.to_reg().into());
94    PairedXmm { read, write }
95}
96
97impl From<Reg> for asm::Xmm<Xmm> {
98    fn from(reg: Reg) -> Self {
99        asm::Xmm::new(reg.into())
100    }
101}
102
103impl From<Reg> for asm::XmmMem<Xmm, Gpr> {
104    fn from(reg: Reg) -> Self {
105        asm::XmmMem::Xmm(reg.into())
106    }
107}
108
109impl From<Reg> for Gpr {
110    fn from(reg: Reg) -> Self {
111        Gpr::unwrap_new(reg.into())
112    }
113}
114
115impl From<Reg> for GprMem {
116    fn from(value: Reg) -> Self {
117        GprMem::unwrap_new(value.into())
118    }
119}
120
121impl From<Reg> for GprMemImm {
122    fn from(reg: Reg) -> Self {
123        GprMemImm::unwrap_new(reg.into())
124    }
125}
126
127impl From<Reg> for Xmm {
128    fn from(reg: Reg) -> Self {
129        Xmm::unwrap_new(reg.into())
130    }
131}
132
133impl From<Reg> for XmmMem {
134    fn from(value: Reg) -> Self {
135        XmmMem::unwrap_new(value.into())
136    }
137}
138
139impl From<Reg> for XmmMemImm {
140    fn from(value: Reg) -> Self {
141        XmmMemImm::unwrap_new(value.into())
142    }
143}
144
145impl From<OperandSize> for args::OperandSize {
146    fn from(size: OperandSize) -> Self {
147        match size {
148            OperandSize::S8 => Self::Size8,
149            OperandSize::S16 => Self::Size16,
150            OperandSize::S32 => Self::Size32,
151            OperandSize::S64 => Self::Size64,
152            s => panic!("Invalid operand size {s:?}"),
153        }
154    }
155}
156
157impl From<IntCmpKind> for CC {
158    fn from(value: IntCmpKind) -> Self {
159        match value {
160            IntCmpKind::Eq => CC::Z,
161            IntCmpKind::Ne => CC::NZ,
162            IntCmpKind::LtS => CC::L,
163            IntCmpKind::LtU => CC::B,
164            IntCmpKind::GtS => CC::NLE,
165            IntCmpKind::GtU => CC::NBE,
166            IntCmpKind::LeS => CC::LE,
167            IntCmpKind::LeU => CC::BE,
168            IntCmpKind::GeS => CC::NL,
169            IntCmpKind::GeU => CC::NB,
170        }
171    }
172}
173
174impl<T: ExtendType> From<Extend<T>> for ExtMode {
175    fn from(value: Extend<T>) -> Self {
176        match value {
177            Extend::I32Extend8 => ExtMode::BL,
178            Extend::I32Extend16 => ExtMode::WL,
179            Extend::I64Extend8 => ExtMode::BQ,
180            Extend::I64Extend16 => ExtMode::WQ,
181            Extend::I64Extend32 => ExtMode::LQ,
182            Extend::__Kind(_) => unreachable!(),
183        }
184    }
185}
186
187impl From<ExtendKind> for ExtMode {
188    fn from(value: ExtendKind) -> Self {
189        match value {
190            ExtendKind::Signed(s) => s.into(),
191            ExtendKind::Unsigned(u) => u.into(),
192        }
193    }
194}
195
196/// Kinds of extends supported by `vpmov`.
197pub(super) enum VpmovKind {
198    /// Sign extends 8 lanes of 8-bit integers to 8 lanes of 16-bit integers.
199    E8x8S,
200    /// Zero extends 8 lanes of 8-bit integers to 8 lanes of 16-bit integers.
201    E8x8U,
202    /// Sign extends 4 lanes of 16-bit integers to 4 lanes of 32-bit integers.
203    E16x4S,
204    /// Zero extends 4 lanes of 16-bit integers to 4 lanes of 32-bit integers.
205    E16x4U,
206    /// Sign extends 2 lanes of 32-bit integers to 2 lanes of 64-bit integers.
207    E32x2S,
208    /// Zero extends 2 lanes of 32-bit integers to 2 lanes of 64-bit integers.
209    E32x2U,
210}
211
212impl From<V128LoadExtendKind> for VpmovKind {
213    fn from(value: V128LoadExtendKind) -> Self {
214        match value {
215            V128LoadExtendKind::E8x8S => Self::E8x8S,
216            V128LoadExtendKind::E8x8U => Self::E8x8U,
217            V128LoadExtendKind::E16x4S => Self::E16x4S,
218            V128LoadExtendKind::E16x4U => Self::E16x4U,
219            V128LoadExtendKind::E32x2S => Self::E32x2S,
220            V128LoadExtendKind::E32x2U => Self::E32x2U,
221        }
222    }
223}
224
225impl From<V128ExtendKind> for VpmovKind {
226    fn from(value: V128ExtendKind) -> Self {
227        match value {
228            V128ExtendKind::LowI8x16S | V128ExtendKind::HighI8x16S => Self::E8x8S,
229            V128ExtendKind::LowI8x16U => Self::E8x8U,
230            V128ExtendKind::LowI16x8S | V128ExtendKind::HighI16x8S => Self::E16x4S,
231            V128ExtendKind::LowI16x8U => Self::E16x4U,
232            V128ExtendKind::LowI32x4S | V128ExtendKind::HighI32x4S => Self::E32x2S,
233            V128ExtendKind::LowI32x4U => Self::E32x2U,
234            _ => unimplemented!(),
235        }
236    }
237}
238
239/// Kinds of comparisons supported by `vcmp`.
240pub(super) enum VcmpKind {
241    /// Equal comparison.
242    Eq,
243    /// Not equal comparison.
244    Ne,
245    /// Less than comparison.
246    Lt,
247    /// Less than or equal comparison.
248    Le,
249    /// Unordered comparison. Sets result to all 1s if either source operand is
250    /// NaN.
251    Unord,
252}
253
254/// Kinds of conversions supported by `vcvt`.
255pub(super) enum VcvtKind {
256    /// Converts 32-bit integers to 32-bit floats.
257    I32ToF32,
258    /// Converts doubleword integers to double precision floats.
259    I32ToF64,
260    /// Converts double precision floats to single precision floats.
261    F64ToF32,
262    // Converts double precision floats to 32-bit integers.
263    F64ToI32,
264    /// Converts single precision floats to double precision floats.
265    F32ToF64,
266    /// Converts single precision floats to 32-bit integers.
267    F32ToI32,
268}
269
270/// Modes supported by `vround`.
271pub(crate) enum VroundMode {
272    /// Rounds toward nearest (ties to even).
273    TowardNearest,
274    /// Rounds toward negative infinity.
275    TowardNegativeInfinity,
276    /// Rounds toward positive infinity.
277    TowardPositiveInfinity,
278    /// Rounds toward zero.
279    TowardZero,
280}
281
282/// Low level assembler implementation for x64.
283pub(crate) struct Assembler {
284    /// The machine instruction buffer.
285    buffer: MachBuffer<Inst>,
286    /// Constant emission information.
287    emit_info: EmitInfo,
288    /// Emission state.
289    emit_state: EmitState,
290    /// x64 flags.
291    isa_flags: x64_settings::Flags,
292    /// Constant pool.
293    pool: ConstantPool,
294}
295
296impl Assembler {
297    /// Create a new x64 assembler.
298    pub fn new(shared_flags: settings::Flags, isa_flags: x64_settings::Flags) -> Self {
299        Self {
300            buffer: MachBuffer::<Inst>::new(),
301            emit_state: Default::default(),
302            emit_info: EmitInfo::new(shared_flags, isa_flags.clone()),
303            pool: ConstantPool::new(),
304            isa_flags,
305        }
306    }
307
308    /// Get a mutable reference to underlying
309    /// machine buffer.
310    pub fn buffer_mut(&mut self) -> &mut MachBuffer<Inst> {
311        &mut self.buffer
312    }
313
314    /// Get a reference to the underlying machine buffer.
315    pub fn buffer(&self) -> &MachBuffer<Inst> {
316        &self.buffer
317    }
318
319    /// Adds a constant to the constant pool and returns its address.
320    pub fn add_constant(&mut self, constant: &[u8]) -> Address {
321        let handle = self.pool.register(constant, &mut self.buffer);
322        Address::constant(handle)
323    }
324
325    /// Load a floating point constant, using the constant pool.
326    pub fn load_fp_const(&mut self, dst: WritableReg, constant: &[u8], size: OperandSize) {
327        let addr = self.add_constant(constant);
328        self.xmm_mov_mr(&addr, dst, size, MemFlags::trusted());
329    }
330
331    /// Return the emitted code.
332    pub fn finalize(mut self, loc: Option<SourceLoc>) -> MachBufferFinalized<Final> {
333        let stencil = self
334            .buffer
335            .finish(&self.pool.constants(), self.emit_state.ctrl_plane_mut());
336        stencil.apply_base_srcloc(loc.unwrap_or_default())
337    }
338
339    fn emit(&mut self, inst: Inst) {
340        inst.emit(&mut self.buffer, &self.emit_info, &mut self.emit_state);
341    }
342
343    fn to_synthetic_amode(addr: &Address, memflags: MemFlags) -> SyntheticAmode {
344        match *addr {
345            Address::Offset { base, offset } => {
346                let amode = Amode::imm_reg(offset as i32, base.into()).with_flags(memflags);
347                SyntheticAmode::real(amode)
348            }
349            Address::Const(c) => SyntheticAmode::ConstantOffset(c),
350            Address::ImmRegRegShift {
351                simm32,
352                base,
353                index,
354                shift,
355            } => SyntheticAmode::Real(Amode::ImmRegRegShift {
356                simm32,
357                base: base.into(),
358                index: index.into(),
359                shift,
360                flags: memflags,
361            }),
362        }
363    }
364
365    /// Emit an unwind instruction.
366    pub fn unwind_inst(&mut self, inst: UnwindInst) {
367        self.emit(Inst::Unwind { inst })
368    }
369
370    /// Push register.
371    pub fn push_r(&mut self, reg: Reg) {
372        let inst = asm::inst::pushq_o::new(reg).into();
373        self.emit(Inst::External { inst });
374    }
375
376    /// Pop to register.
377    pub fn pop_r(&mut self, dst: WritableReg) {
378        let writable: WritableGpr = dst.map(Into::into);
379        let inst = asm::inst::popq_o::new(writable).into();
380        self.emit(Inst::External { inst });
381    }
382
383    /// Return instruction.
384    pub fn ret(&mut self) {
385        let inst = asm::inst::retq_zo::new().into();
386        self.emit(Inst::External { inst });
387    }
388
389    /// Register-to-register move.
390    pub fn mov_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
391        let dst: WritableGpr = dst.map(|r| r.into());
392        let inst = match size {
393            OperandSize::S8 => asm::inst::movb_mr::new(dst, src).into(),
394            OperandSize::S16 => asm::inst::movw_mr::new(dst, src).into(),
395            OperandSize::S32 => asm::inst::movl_mr::new(dst, src).into(),
396            OperandSize::S64 => asm::inst::movq_mr::new(dst, src).into(),
397            _ => unreachable!(),
398        };
399        self.emit(Inst::External { inst });
400    }
401
402    /// Register-to-memory move.
403    pub fn mov_rm(&mut self, src: Reg, addr: &Address, size: OperandSize, flags: MemFlags) {
404        assert!(addr.is_offset());
405        let dst = Self::to_synthetic_amode(addr, flags);
406        let inst = match size {
407            OperandSize::S8 => asm::inst::movb_mr::new(dst, src).into(),
408            OperandSize::S16 => asm::inst::movw_mr::new(dst, src).into(),
409            OperandSize::S32 => asm::inst::movl_mr::new(dst, src).into(),
410            OperandSize::S64 => asm::inst::movq_mr::new(dst, src).into(),
411            _ => unreachable!(),
412        };
413        self.emit(Inst::External { inst });
414    }
415
416    /// Immediate-to-memory move.
417    pub fn mov_im(&mut self, src: i32, addr: &Address, size: OperandSize, flags: MemFlags) {
418        assert!(addr.is_offset());
419        let dst = Self::to_synthetic_amode(addr, flags);
420        let inst = match size {
421            OperandSize::S8 => {
422                let src = i8::try_from(src).unwrap();
423                asm::inst::movb_mi::new(dst, src.unsigned()).into()
424            }
425            OperandSize::S16 => {
426                let src = i16::try_from(src).unwrap();
427                asm::inst::movw_mi::new(dst, src.unsigned()).into()
428            }
429            OperandSize::S32 => asm::inst::movl_mi::new(dst, src.unsigned()).into(),
430            OperandSize::S64 => asm::inst::movq_mi_sxl::new(dst, src).into(),
431            _ => unreachable!(),
432        };
433        self.emit(Inst::External { inst });
434    }
435
436    /// Immediate-to-register move.
437    pub fn mov_ir(&mut self, imm: u64, dst: WritableReg, size: OperandSize) {
438        self.emit(Inst::imm(size.into(), imm, dst.map(Into::into)));
439    }
440
441    /// Zero-extend memory-to-register load.
442    pub fn movzx_mr(
443        &mut self,
444        addr: &Address,
445        dst: WritableReg,
446        ext: Option<Extend<Zero>>,
447        memflags: MemFlags,
448    ) {
449        let src = Self::to_synthetic_amode(addr, memflags);
450
451        if let Some(ext) = ext {
452            let dst = WritableGpr::from_reg(dst.to_reg().into());
453            let inst = match ext.into() {
454                ExtMode::BL => asm::inst::movzbl_rm::new(dst, src).into(),
455                ExtMode::BQ => asm::inst::movzbq_rm::new(dst, src).into(),
456                ExtMode::WL => asm::inst::movzwl_rm::new(dst, src).into(),
457                ExtMode::WQ => asm::inst::movzwq_rm::new(dst, src).into(),
458                ExtMode::LQ => {
459                    // This instruction selection may seem strange but is
460                    // correct in 64-bit mode: section 3.4.1.1 of the Intel
461                    // manual says that "32-bit operands generate a 32-bit
462                    // result, zero-extended to a 64-bit result in the
463                    // destination general-purpose register." This is applicable
464                    // beyond `mov` but we use this fact to zero-extend `src`
465                    // into `dst`.
466                    asm::inst::movl_rm::new(dst, src).into()
467                }
468            };
469            self.emit(Inst::External { inst });
470        } else {
471            let dst = WritableGpr::from_reg(dst.to_reg().into());
472            let inst = asm::inst::movq_rm::new(dst, src).into();
473            self.emit(Inst::External { inst });
474        }
475    }
476
477    // Sign-extend memory-to-register load.
478    pub fn movsx_mr(
479        &mut self,
480        addr: &Address,
481        dst: WritableReg,
482        ext: Extend<Signed>,
483        memflags: MemFlags,
484    ) {
485        let src = Self::to_synthetic_amode(addr, memflags);
486        let dst = WritableGpr::from_reg(dst.to_reg().into());
487        let inst = match ext.into() {
488            ExtMode::BL => asm::inst::movsbl_rm::new(dst, src).into(),
489            ExtMode::BQ => asm::inst::movsbq_rm::new(dst, src).into(),
490            ExtMode::WL => asm::inst::movswl_rm::new(dst, src).into(),
491            ExtMode::WQ => asm::inst::movswq_rm::new(dst, src).into(),
492            ExtMode::LQ => asm::inst::movslq_rm::new(dst, src).into(),
493        };
494        self.emit(Inst::External { inst });
495    }
496
497    /// Register-to-register move with zero extension.
498    pub fn movzx_rr(&mut self, src: Reg, dst: WritableReg, kind: Extend<Zero>) {
499        let dst = WritableGpr::from_reg(dst.to_reg().into());
500        let inst = match kind.into() {
501            ExtMode::BL => asm::inst::movzbl_rm::new(dst, src).into(),
502            ExtMode::BQ => asm::inst::movzbq_rm::new(dst, src).into(),
503            ExtMode::WL => asm::inst::movzwl_rm::new(dst, src).into(),
504            ExtMode::WQ => asm::inst::movzwq_rm::new(dst, src).into(),
505            ExtMode::LQ => {
506                // This instruction selection may seem strange but is correct in
507                // 64-bit mode: section 3.4.1.1 of the Intel manual says that
508                // "32-bit operands generate a 32-bit result, zero-extended to a
509                // 64-bit result in the destination general-purpose register."
510                // This is applicable beyond `mov` but we use this fact to
511                // zero-extend `src` into `dst`.
512                asm::inst::movl_rm::new(dst, src).into()
513            }
514        };
515        self.emit(Inst::External { inst });
516    }
517
518    /// Register-to-register move with sign extension.
519    pub fn movsx_rr(&mut self, src: Reg, dst: WritableReg, kind: Extend<Signed>) {
520        let dst = WritableGpr::from_reg(dst.to_reg().into());
521        let inst = match kind.into() {
522            ExtMode::BL => asm::inst::movsbl_rm::new(dst, src).into(),
523            ExtMode::BQ => asm::inst::movsbq_rm::new(dst, src).into(),
524            ExtMode::WL => asm::inst::movswl_rm::new(dst, src).into(),
525            ExtMode::WQ => asm::inst::movswq_rm::new(dst, src).into(),
526            ExtMode::LQ => asm::inst::movslq_rm::new(dst, src).into(),
527        };
528        self.emit(Inst::External { inst });
529    }
530
531    /// Integer register conditional move.
532    pub fn cmov(&mut self, src: Reg, dst: WritableReg, cc: IntCmpKind, size: OperandSize) {
533        use IntCmpKind::*;
534        use OperandSize::*;
535
536        let dst: WritableGpr = dst.map(Into::into);
537        let inst = match size {
538            S8 | S16 | S32 => match cc {
539                Eq => asm::inst::cmovel_rm::new(dst, src).into(),
540                Ne => asm::inst::cmovnel_rm::new(dst, src).into(),
541                LtS => asm::inst::cmovll_rm::new(dst, src).into(),
542                LtU => asm::inst::cmovbl_rm::new(dst, src).into(),
543                GtS => asm::inst::cmovgl_rm::new(dst, src).into(),
544                GtU => asm::inst::cmoval_rm::new(dst, src).into(),
545                LeS => asm::inst::cmovlel_rm::new(dst, src).into(),
546                LeU => asm::inst::cmovbel_rm::new(dst, src).into(),
547                GeS => asm::inst::cmovgel_rm::new(dst, src).into(),
548                GeU => asm::inst::cmovael_rm::new(dst, src).into(),
549            },
550            S64 => match cc {
551                Eq => asm::inst::cmoveq_rm::new(dst, src).into(),
552                Ne => asm::inst::cmovneq_rm::new(dst, src).into(),
553                LtS => asm::inst::cmovlq_rm::new(dst, src).into(),
554                LtU => asm::inst::cmovbq_rm::new(dst, src).into(),
555                GtS => asm::inst::cmovgq_rm::new(dst, src).into(),
556                GtU => asm::inst::cmovaq_rm::new(dst, src).into(),
557                LeS => asm::inst::cmovleq_rm::new(dst, src).into(),
558                LeU => asm::inst::cmovbeq_rm::new(dst, src).into(),
559                GeS => asm::inst::cmovgeq_rm::new(dst, src).into(),
560                GeU => asm::inst::cmovaeq_rm::new(dst, src).into(),
561            },
562            _ => unreachable!(),
563        };
564        self.emit(Inst::External { inst });
565    }
566
567    /// Single and double precision floating point
568    /// register-to-register move.
569    pub fn xmm_mov_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
570        let ty = match size {
571            OperandSize::S32 => types::F32,
572            OperandSize::S64 => types::F64,
573            OperandSize::S128 => types::I32X4,
574            OperandSize::S8 | OperandSize::S16 => unreachable!(),
575        };
576        self.emit(Inst::gen_move(dst.map(|r| r.into()), src.into(), ty));
577    }
578
579    /// Single and double precision floating point load.
580    pub fn xmm_mov_mr(
581        &mut self,
582        src: &Address,
583        dst: WritableReg,
584        size: OperandSize,
585        flags: MemFlags,
586    ) {
587        use OperandSize::*;
588
589        assert!(dst.to_reg().is_float());
590
591        let src = Self::to_synthetic_amode(src, flags);
592        let dst: WritableXmm = dst.map(|r| r.into());
593        let inst = match size {
594            S32 => asm::inst::movss_a_m::new(dst, src).into(),
595            S64 => asm::inst::movsd_a_m::new(dst, src).into(),
596            S128 => asm::inst::movdqu_a::new(dst, src).into(),
597            S8 | S16 => unreachable!(),
598        };
599        self.emit(Inst::External { inst });
600    }
601
602    /// Vector load and extend.
603    pub fn xmm_vpmov_mr(
604        &mut self,
605        src: &Address,
606        dst: WritableReg,
607        kind: VpmovKind,
608        flags: MemFlags,
609    ) {
610        assert!(dst.to_reg().is_float());
611        let src = Self::to_synthetic_amode(src, flags);
612        let dst: WritableXmm = dst.map(|r| r.into());
613        let inst = match kind {
614            VpmovKind::E8x8S => asm::inst::vpmovsxbw_a::new(dst, src).into(),
615            VpmovKind::E8x8U => asm::inst::vpmovzxbw_a::new(dst, src).into(),
616            VpmovKind::E16x4S => asm::inst::vpmovsxwd_a::new(dst, src).into(),
617            VpmovKind::E16x4U => asm::inst::vpmovzxwd_a::new(dst, src).into(),
618            VpmovKind::E32x2S => asm::inst::vpmovsxdq_a::new(dst, src).into(),
619            VpmovKind::E32x2U => asm::inst::vpmovzxdq_a::new(dst, src).into(),
620        };
621        self.emit(Inst::External { inst });
622    }
623
624    /// Extends vector of integers in `src` and puts results in `dst`.
625    pub fn xmm_vpmov_rr(&mut self, src: Reg, dst: WritableReg, kind: VpmovKind) {
626        let dst: WritableXmm = dst.map(|r| r.into());
627        let inst = match kind {
628            VpmovKind::E8x8S => asm::inst::vpmovsxbw_a::new(dst, src).into(),
629            VpmovKind::E8x8U => asm::inst::vpmovzxbw_a::new(dst, src).into(),
630            VpmovKind::E16x4S => asm::inst::vpmovsxwd_a::new(dst, src).into(),
631            VpmovKind::E16x4U => asm::inst::vpmovzxwd_a::new(dst, src).into(),
632            VpmovKind::E32x2S => asm::inst::vpmovsxdq_a::new(dst, src).into(),
633            VpmovKind::E32x2U => asm::inst::vpmovzxdq_a::new(dst, src).into(),
634        };
635        self.emit(Inst::External { inst });
636    }
637
638    /// Vector load and broadcast.
639    pub fn xmm_vpbroadcast_mr(
640        &mut self,
641        src: &Address,
642        dst: WritableReg,
643        size: OperandSize,
644        flags: MemFlags,
645    ) {
646        assert!(dst.to_reg().is_float());
647        let src = Self::to_synthetic_amode(src, flags);
648        let dst: WritableXmm = dst.map(|r| r.into());
649        let inst = match size {
650            OperandSize::S8 => asm::inst::vpbroadcastb_a::new(dst, src).into(),
651            OperandSize::S16 => asm::inst::vpbroadcastw_a::new(dst, src).into(),
652            OperandSize::S32 => asm::inst::vpbroadcastd_a::new(dst, src).into(),
653            _ => unimplemented!(),
654        };
655        self.emit(Inst::External { inst });
656    }
657
658    /// Value in `src` is broadcast into lanes of `size` in `dst`.
659    pub fn xmm_vpbroadcast_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
660        assert!(src.is_float() && dst.to_reg().is_float());
661        let dst: WritableXmm = dst.map(|r| r.into());
662        let inst = match size {
663            OperandSize::S8 => asm::inst::vpbroadcastb_a::new(dst, src).into(),
664            OperandSize::S16 => asm::inst::vpbroadcastw_a::new(dst, src).into(),
665            OperandSize::S32 => asm::inst::vpbroadcastd_a::new(dst, src).into(),
666            _ => unimplemented!(),
667        };
668        self.emit(Inst::External { inst });
669    }
670
671    /// Memory to register shuffle of bytes in vector.
672    pub fn xmm_vpshuf_mr(
673        &mut self,
674        src: &Address,
675        dst: WritableReg,
676        mask: u8,
677        size: OperandSize,
678        flags: MemFlags,
679    ) {
680        let dst: WritableXmm = dst.map(|r| r.into());
681        let src = Self::to_synthetic_amode(src, flags);
682        let inst = match size {
683            OperandSize::S32 => asm::inst::vpshufd_a::new(dst, src, mask).into(),
684            _ => unimplemented!(),
685        };
686        self.emit(Inst::External { inst });
687    }
688
689    /// Register to register shuffle of bytes in vector.
690    pub fn xmm_vpshuf_rr(&mut self, src: Reg, dst: WritableReg, mask: u8, size: OperandSize) {
691        let dst: WritableXmm = dst.map(|r| r.into());
692
693        let inst = match size {
694            OperandSize::S16 => asm::inst::vpshuflw_a::new(dst, src, mask).into(),
695            OperandSize::S32 => asm::inst::vpshufd_a::new(dst, src, mask).into(),
696            _ => unimplemented!(),
697        };
698
699        self.emit(Inst::External { inst });
700    }
701
702    /// Single and double precision floating point store.
703    pub fn xmm_mov_rm(&mut self, src: Reg, dst: &Address, size: OperandSize, flags: MemFlags) {
704        use OperandSize::*;
705
706        assert!(src.is_float());
707
708        let dst = Self::to_synthetic_amode(dst, flags);
709        let src: Xmm = src.into();
710        let inst = match size {
711            S32 => asm::inst::movss_c_m::new(dst, src).into(),
712            S64 => asm::inst::movsd_c_m::new(dst, src).into(),
713            S128 => asm::inst::movdqu_b::new(dst, src).into(),
714            S16 | S8 => unreachable!(),
715        };
716        self.emit(Inst::External { inst })
717    }
718
719    /// Floating point register conditional move.
720    pub fn xmm_cmov(&mut self, src: Reg, dst: WritableReg, cc: IntCmpKind, size: OperandSize) {
721        let dst: WritableXmm = dst.map(Into::into);
722        let ty = match size {
723            OperandSize::S32 => types::F32,
724            OperandSize::S64 => types::F64,
725            // Move the entire 128 bits via movdqa.
726            OperandSize::S128 => types::I32X4,
727            OperandSize::S8 | OperandSize::S16 => unreachable!(),
728        };
729
730        self.emit(Inst::XmmCmove {
731            ty,
732            cc: cc.into(),
733            consequent: Xmm::unwrap_new(src.into()),
734            alternative: dst.to_reg(),
735            dst,
736        })
737    }
738
739    /// Subtract register and register
740    pub fn sub_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
741        let dst = pair_gpr(dst);
742        let inst = match size {
743            OperandSize::S8 => asm::inst::subb_rm::new(dst, src).into(),
744            OperandSize::S16 => asm::inst::subw_rm::new(dst, src).into(),
745            OperandSize::S32 => asm::inst::subl_rm::new(dst, src).into(),
746            OperandSize::S64 => asm::inst::subq_rm::new(dst, src).into(),
747            OperandSize::S128 => unimplemented!(),
748        };
749        self.emit(Inst::External { inst });
750    }
751
752    /// Subtract immediate register.
753    pub fn sub_ir(&mut self, imm: i32, dst: WritableReg, size: OperandSize) {
754        let dst = pair_gpr(dst);
755        let inst = match size {
756            OperandSize::S8 => asm::inst::subb_mi::new(dst, u8::try_from(imm).unwrap()).into(),
757            OperandSize::S16 => asm::inst::subw_mi::new(dst, u16::try_from(imm).unwrap()).into(),
758            OperandSize::S32 => asm::inst::subl_mi::new(dst, imm as u32).into(),
759            OperandSize::S64 => asm::inst::subq_mi_sxl::new(dst, imm).into(),
760            OperandSize::S128 => unimplemented!(),
761        };
762        self.emit(Inst::External { inst });
763    }
764
765    /// "and" two registers.
766    pub fn and_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
767        let dst = pair_gpr(dst);
768        let inst = match size {
769            OperandSize::S8 => asm::inst::andb_rm::new(dst, src).into(),
770            OperandSize::S16 => asm::inst::andw_rm::new(dst, src).into(),
771            OperandSize::S32 => asm::inst::andl_rm::new(dst, src).into(),
772            OperandSize::S64 => asm::inst::andq_rm::new(dst, src).into(),
773            OperandSize::S128 => unimplemented!(),
774        };
775        self.emit(Inst::External { inst });
776    }
777
778    pub fn and_ir(&mut self, imm: i32, dst: WritableReg, size: OperandSize) {
779        let dst = pair_gpr(dst);
780        let inst = match size {
781            OperandSize::S8 => asm::inst::andb_mi::new(dst, u8::try_from(imm).unwrap()).into(),
782            OperandSize::S16 => asm::inst::andw_mi::new(dst, u16::try_from(imm).unwrap()).into(),
783            OperandSize::S32 => asm::inst::andl_mi::new(dst, imm as u32).into(),
784            OperandSize::S64 => asm::inst::andq_mi_sxl::new(dst, imm).into(),
785            OperandSize::S128 => unimplemented!(),
786        };
787        self.emit(Inst::External { inst });
788    }
789
790    /// "and" two float registers.
791    pub fn xmm_and_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
792        let dst = pair_xmm(dst);
793        let inst = match size {
794            OperandSize::S32 => asm::inst::andps_a::new(dst, src).into(),
795            OperandSize::S64 => asm::inst::andpd_a::new(dst, src).into(),
796            OperandSize::S8 | OperandSize::S16 | OperandSize::S128 => unreachable!(),
797        };
798        self.emit(Inst::External { inst });
799    }
800
801    /// "and not" two float registers.
802    pub fn xmm_andn_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
803        let dst = pair_xmm(dst);
804        let inst = match size {
805            OperandSize::S32 => asm::inst::andnps_a::new(dst, src).into(),
806            OperandSize::S64 => asm::inst::andnpd_a::new(dst, src).into(),
807            OperandSize::S8 | OperandSize::S16 | OperandSize::S128 => unreachable!(),
808        };
809        self.emit(Inst::External { inst });
810    }
811
812    pub fn gpr_to_xmm(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
813        let dst: WritableXmm = dst.map(|r| r.into());
814        let inst = match size {
815            OperandSize::S32 => asm::inst::movd_a::new(dst, src).into(),
816            OperandSize::S64 => asm::inst::movq_a::new(dst, src).into(),
817            OperandSize::S8 | OperandSize::S16 | OperandSize::S128 => unreachable!(),
818        };
819
820        self.emit(Inst::External { inst });
821    }
822
823    pub fn xmm_to_gpr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
824        let dst: WritableGpr = dst.map(Into::into);
825        let src: Xmm = src.into();
826        let inst = match size {
827            OperandSize::S32 => asm::inst::movd_b::new(dst, src).into(),
828            OperandSize::S64 => asm::inst::movq_b::new(dst, src).into(),
829            OperandSize::S8 | OperandSize::S16 | OperandSize::S128 => unreachable!(),
830        };
831
832        self.emit(Inst::External { inst })
833    }
834
835    /// Convert float to signed int.
836    pub fn cvt_float_to_sint_seq(
837        &mut self,
838        src: Reg,
839        dst: WritableReg,
840        tmp_gpr: Reg,
841        tmp_xmm: Reg,
842        src_size: OperandSize,
843        dst_size: OperandSize,
844        saturating: bool,
845    ) {
846        self.emit(Inst::CvtFloatToSintSeq {
847            dst_size: dst_size.into(),
848            src_size: src_size.into(),
849            is_saturating: saturating,
850            src: src.into(),
851            dst: dst.map(Into::into),
852            tmp_gpr: tmp_gpr.into(),
853            tmp_xmm: tmp_xmm.into(),
854        });
855    }
856
857    /// Convert float to unsigned int.
858    pub fn cvt_float_to_uint_seq(
859        &mut self,
860        src: Reg,
861        dst: WritableReg,
862        tmp_gpr: Reg,
863        tmp_xmm: Reg,
864        tmp_xmm2: Reg,
865        src_size: OperandSize,
866        dst_size: OperandSize,
867        saturating: bool,
868    ) {
869        self.emit(Inst::CvtFloatToUintSeq {
870            dst_size: dst_size.into(),
871            src_size: src_size.into(),
872            is_saturating: saturating,
873            src: src.into(),
874            dst: dst.map(Into::into),
875            tmp_gpr: tmp_gpr.into(),
876            tmp_xmm: tmp_xmm.into(),
877            tmp_xmm2: tmp_xmm2.into(),
878        });
879    }
880
881    /// Convert signed int to float.
882    pub fn cvt_sint_to_float(
883        &mut self,
884        src: Reg,
885        dst: WritableReg,
886        src_size: OperandSize,
887        dst_size: OperandSize,
888    ) {
889        use OperandSize::*;
890        let dst = pair_xmm(dst);
891        let inst = match (src_size, dst_size) {
892            (S32, S32) => asm::inst::cvtsi2ssl_a::new(dst, src).into(),
893            (S32, S64) => asm::inst::cvtsi2sdl_a::new(dst, src).into(),
894            (S64, S32) => asm::inst::cvtsi2ssq_a::new(dst, src).into(),
895            (S64, S64) => asm::inst::cvtsi2sdq_a::new(dst, src).into(),
896            _ => unreachable!(),
897        };
898        self.emit(Inst::External { inst });
899    }
900
901    /// Convert unsigned 64-bit int to float.
902    pub fn cvt_uint64_to_float_seq(
903        &mut self,
904        src: Reg,
905        dst: WritableReg,
906        tmp_gpr1: Reg,
907        tmp_gpr2: Reg,
908        dst_size: OperandSize,
909    ) {
910        self.emit(Inst::CvtUint64ToFloatSeq {
911            dst_size: dst_size.into(),
912            src: src.into(),
913            dst: dst.map(Into::into),
914            tmp_gpr1: tmp_gpr1.into(),
915            tmp_gpr2: tmp_gpr2.into(),
916        });
917    }
918
919    /// Change precision of float.
920    pub fn cvt_float_to_float(
921        &mut self,
922        src: Reg,
923        dst: WritableReg,
924        src_size: OperandSize,
925        dst_size: OperandSize,
926    ) {
927        use OperandSize::*;
928        let dst = pair_xmm(dst);
929        let inst = match (src_size, dst_size) {
930            (S32, S64) => asm::inst::cvtss2sd_a::new(dst, src).into(),
931            (S64, S32) => asm::inst::cvtsd2ss_a::new(dst, src).into(),
932            _ => unimplemented!(),
933        };
934        self.emit(Inst::External { inst });
935    }
936
937    pub fn or_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
938        let dst = pair_gpr(dst);
939        let inst = match size {
940            OperandSize::S8 => asm::inst::orb_rm::new(dst, src).into(),
941            OperandSize::S16 => asm::inst::orw_rm::new(dst, src).into(),
942            OperandSize::S32 => asm::inst::orl_rm::new(dst, src).into(),
943            OperandSize::S64 => asm::inst::orq_rm::new(dst, src).into(),
944            OperandSize::S128 => unimplemented!(),
945        };
946        self.emit(Inst::External { inst });
947    }
948
949    pub fn or_ir(&mut self, imm: i32, dst: WritableReg, size: OperandSize) {
950        let dst = pair_gpr(dst);
951        let inst = match size {
952            OperandSize::S8 => asm::inst::orb_mi::new(dst, u8::try_from(imm).unwrap()).into(),
953            OperandSize::S16 => asm::inst::orw_mi::new(dst, u16::try_from(imm).unwrap()).into(),
954            OperandSize::S32 => asm::inst::orl_mi::new(dst, imm as u32).into(),
955            OperandSize::S64 => asm::inst::orq_mi_sxl::new(dst, imm).into(),
956            OperandSize::S128 => unimplemented!(),
957        };
958        self.emit(Inst::External { inst });
959    }
960
961    pub fn xmm_or_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
962        let dst = pair_xmm(dst);
963        let inst = match size {
964            OperandSize::S32 => asm::inst::orps_a::new(dst, src).into(),
965            OperandSize::S64 => asm::inst::orpd_a::new(dst, src).into(),
966            OperandSize::S8 | OperandSize::S16 | OperandSize::S128 => unreachable!(),
967        };
968        self.emit(Inst::External { inst });
969    }
970
971    /// Logical exclusive or with registers.
972    pub fn xor_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
973        let dst = pair_gpr(dst);
974        let inst = match size {
975            OperandSize::S8 => asm::inst::xorb_rm::new(dst, src).into(),
976            OperandSize::S16 => asm::inst::xorw_rm::new(dst, src).into(),
977            OperandSize::S32 => asm::inst::xorl_rm::new(dst, src).into(),
978            OperandSize::S64 => asm::inst::xorq_rm::new(dst, src).into(),
979            OperandSize::S128 => unimplemented!(),
980        };
981        self.emit(Inst::External { inst });
982    }
983
984    pub fn xor_ir(&mut self, imm: i32, dst: WritableReg, size: OperandSize) {
985        let dst = pair_gpr(dst);
986        let inst = match size {
987            OperandSize::S8 => asm::inst::xorb_mi::new(dst, u8::try_from(imm).unwrap()).into(),
988            OperandSize::S16 => asm::inst::xorw_mi::new(dst, u16::try_from(imm).unwrap()).into(),
989            OperandSize::S32 => asm::inst::xorl_mi::new(dst, imm as u32).into(),
990            OperandSize::S64 => asm::inst::xorq_mi_sxl::new(dst, imm).into(),
991            OperandSize::S128 => unimplemented!(),
992        };
993        self.emit(Inst::External { inst });
994    }
995
996    /// Logical exclusive or with float registers.
997    pub fn xmm_xor_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
998        let dst = pair_xmm(dst);
999        let inst = match size {
1000            OperandSize::S32 => asm::inst::xorps_a::new(dst, src).into(),
1001            OperandSize::S64 => asm::inst::xorpd_a::new(dst, src).into(),
1002            OperandSize::S8 | OperandSize::S16 | OperandSize::S128 => unreachable!(),
1003        };
1004        self.emit(Inst::External { inst });
1005    }
1006
1007    /// Shift with register and register.
1008    pub fn shift_rr(&mut self, src: Reg, dst: WritableReg, kind: ShiftKind, size: OperandSize) {
1009        let dst = pair_gpr(dst);
1010        let src: Gpr = src.into();
1011        let inst = match (kind, size) {
1012            (ShiftKind::Shl, OperandSize::S32) => asm::inst::shll_mc::new(dst, src).into(),
1013            (ShiftKind::Shl, OperandSize::S64) => asm::inst::shlq_mc::new(dst, src).into(),
1014            (ShiftKind::Shl, _) => todo!(),
1015            (ShiftKind::ShrS, OperandSize::S32) => asm::inst::sarl_mc::new(dst, src).into(),
1016            (ShiftKind::ShrS, OperandSize::S64) => asm::inst::sarq_mc::new(dst, src).into(),
1017            (ShiftKind::ShrS, _) => todo!(),
1018            (ShiftKind::ShrU, OperandSize::S32) => asm::inst::shrl_mc::new(dst, src).into(),
1019            (ShiftKind::ShrU, OperandSize::S64) => asm::inst::shrq_mc::new(dst, src).into(),
1020            (ShiftKind::ShrU, _) => todo!(),
1021            (ShiftKind::Rotl, OperandSize::S32) => asm::inst::roll_mc::new(dst, src).into(),
1022            (ShiftKind::Rotl, OperandSize::S64) => asm::inst::rolq_mc::new(dst, src).into(),
1023            (ShiftKind::Rotl, _) => todo!(),
1024            (ShiftKind::Rotr, OperandSize::S32) => asm::inst::rorl_mc::new(dst, src).into(),
1025            (ShiftKind::Rotr, OperandSize::S64) => asm::inst::rorq_mc::new(dst, src).into(),
1026            (ShiftKind::Rotr, _) => todo!(),
1027        };
1028        self.emit(Inst::External { inst });
1029    }
1030
1031    /// Shift with immediate and register.
1032    pub fn shift_ir(&mut self, imm: u8, dst: WritableReg, kind: ShiftKind, size: OperandSize) {
1033        let dst = pair_gpr(dst);
1034        let inst = match (kind, size) {
1035            (ShiftKind::Shl, OperandSize::S32) => asm::inst::shll_mi::new(dst, imm).into(),
1036            (ShiftKind::Shl, OperandSize::S64) => asm::inst::shlq_mi::new(dst, imm).into(),
1037            (ShiftKind::Shl, _) => todo!(),
1038            (ShiftKind::ShrS, OperandSize::S32) => asm::inst::sarl_mi::new(dst, imm).into(),
1039            (ShiftKind::ShrS, OperandSize::S64) => asm::inst::sarq_mi::new(dst, imm).into(),
1040            (ShiftKind::ShrS, _) => todo!(),
1041            (ShiftKind::ShrU, OperandSize::S32) => asm::inst::shrl_mi::new(dst, imm).into(),
1042            (ShiftKind::ShrU, OperandSize::S64) => asm::inst::shrq_mi::new(dst, imm).into(),
1043            (ShiftKind::ShrU, _) => todo!(),
1044            (ShiftKind::Rotl, OperandSize::S32) => asm::inst::roll_mi::new(dst, imm).into(),
1045            (ShiftKind::Rotl, OperandSize::S64) => asm::inst::rolq_mi::new(dst, imm).into(),
1046            (ShiftKind::Rotl, _) => todo!(),
1047            (ShiftKind::Rotr, OperandSize::S32) => asm::inst::rorl_mi::new(dst, imm).into(),
1048            (ShiftKind::Rotr, OperandSize::S64) => asm::inst::rorq_mi::new(dst, imm).into(),
1049            (ShiftKind::Rotr, _) => todo!(),
1050        };
1051        self.emit(Inst::External { inst });
1052    }
1053
1054    /// Signed/unsigned division.
1055    ///
1056    /// Emits a sequence of instructions to ensure the correctness of
1057    /// the division invariants.  This function assumes that the
1058    /// caller has correctly allocated the dividend as `(rdx:rax)` and
1059    /// accounted for the quotient to be stored in `rax`.
1060    pub fn div(&mut self, divisor: Reg, dst: (Reg, Reg), kind: DivKind, size: OperandSize) {
1061        let trap = match kind {
1062            // Signed division has two trapping conditions, integer overflow and
1063            // divide-by-zero. Check for divide-by-zero explicitly and let the
1064            // hardware detect overflow.
1065            DivKind::Signed => {
1066                self.cmp_ir(divisor, 0, size);
1067                self.emit(Inst::TrapIf {
1068                    cc: CC::Z,
1069                    trap_code: TrapCode::INTEGER_DIVISION_BY_ZERO,
1070                });
1071
1072                // Sign-extend the dividend with tailor-made instructoins for
1073                // just this operation.
1074                let ext_dst: WritableGpr = dst.1.into();
1075                let ext_src: Gpr = dst.0.into();
1076                let inst = match size {
1077                    OperandSize::S32 => asm::inst::cltd_zo::new(ext_dst, ext_src).into(),
1078                    OperandSize::S64 => asm::inst::cqto_zo::new(ext_dst, ext_src).into(),
1079                    _ => unimplemented!(),
1080                };
1081                self.emit(Inst::External { inst });
1082                TrapCode::INTEGER_OVERFLOW
1083            }
1084
1085            // Unsigned division only traps in one case, on divide-by-zero, so
1086            // defer that to the trap opcode.
1087            //
1088            // The divisor_hi reg is initialized with zero through an
1089            // xor-against-itself op.
1090            DivKind::Unsigned => {
1091                self.xor_rr(dst.1, writable!(dst.1), size);
1092                TrapCode::INTEGER_DIVISION_BY_ZERO
1093            }
1094        };
1095        let dst0 = pair_gpr(writable!(dst.0));
1096        let dst1 = pair_gpr(writable!(dst.1));
1097        let inst = match (kind, size) {
1098            (DivKind::Signed, OperandSize::S32) => {
1099                asm::inst::idivl_m::new(dst0, dst1, divisor, trap).into()
1100            }
1101            (DivKind::Unsigned, OperandSize::S32) => {
1102                asm::inst::divl_m::new(dst0, dst1, divisor, trap).into()
1103            }
1104            (DivKind::Signed, OperandSize::S64) => {
1105                asm::inst::idivq_m::new(dst0, dst1, divisor, trap).into()
1106            }
1107            (DivKind::Unsigned, OperandSize::S64) => {
1108                asm::inst::divq_m::new(dst0, dst1, divisor, trap).into()
1109            }
1110            _ => todo!(),
1111        };
1112        self.emit(Inst::External { inst });
1113    }
1114
1115    /// Signed/unsigned remainder.
1116    ///
1117    /// Emits a sequence of instructions to ensure the correctness of the
1118    /// division invariants and ultimately calculate the remainder.
1119    /// This function assumes that the
1120    /// caller has correctly allocated the dividend as `(rdx:rax)` and
1121    /// accounted for the remainder to be stored in `rdx`.
1122    pub fn rem(&mut self, divisor: Reg, dst: (Reg, Reg), kind: RemKind, size: OperandSize) {
1123        match kind {
1124            // Signed remainder goes through a pseudo-instruction which has
1125            // some internal branching. The `dividend_hi`, or `rdx`, is
1126            // initialized here with a `SignExtendData` instruction.
1127            RemKind::Signed => {
1128                let ext_dst: WritableGpr = dst.1.into();
1129
1130                // Initialize `dividend_hi`, or `rdx`, with a tailor-made
1131                // instruction for this operation.
1132                let ext_src: Gpr = dst.0.into();
1133                let inst = match size {
1134                    OperandSize::S32 => asm::inst::cltd_zo::new(ext_dst, ext_src).into(),
1135                    OperandSize::S64 => asm::inst::cqto_zo::new(ext_dst, ext_src).into(),
1136                    _ => unimplemented!(),
1137                };
1138                self.emit(Inst::External { inst });
1139                self.emit(Inst::CheckedSRemSeq {
1140                    size: size.into(),
1141                    divisor: divisor.into(),
1142                    dividend_lo: dst.0.into(),
1143                    dividend_hi: dst.1.into(),
1144                    dst_quotient: dst.0.into(),
1145                    dst_remainder: dst.1.into(),
1146                });
1147            }
1148
1149            // Unsigned remainder initializes `dividend_hi` with zero and
1150            // then executes a normal `div` instruction.
1151            RemKind::Unsigned => {
1152                self.xor_rr(dst.1, writable!(dst.1), size);
1153                let dst0 = pair_gpr(writable!(dst.0));
1154                let dst1 = pair_gpr(writable!(dst.1));
1155                let trap = TrapCode::INTEGER_DIVISION_BY_ZERO;
1156                let inst = match size {
1157                    OperandSize::S32 => asm::inst::divl_m::new(dst0, dst1, divisor, trap).into(),
1158                    OperandSize::S64 => asm::inst::divq_m::new(dst0, dst1, divisor, trap).into(),
1159                    _ => todo!(),
1160                };
1161                self.emit(Inst::External { inst });
1162            }
1163        }
1164    }
1165
1166    /// Multiply immediate and register.
1167    pub fn mul_ir(&mut self, imm: i32, dst: WritableReg, size: OperandSize) {
1168        use OperandSize::*;
1169        let src = dst.to_reg();
1170        let dst: WritableGpr = dst.to_reg().into();
1171        let inst = match size {
1172            S16 => asm::inst::imulw_rmi::new(dst, src, u16::try_from(imm).unwrap()).into(),
1173            S32 => asm::inst::imull_rmi::new(dst, src, imm as u32).into(),
1174            S64 => asm::inst::imulq_rmi_sxl::new(dst, src, imm).into(),
1175            S8 | S128 => unimplemented!(),
1176        };
1177        self.emit(Inst::External { inst });
1178    }
1179
1180    /// Multiply register and register.
1181    pub fn mul_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
1182        use OperandSize::*;
1183        let dst = pair_gpr(dst);
1184        let inst = match size {
1185            S16 => asm::inst::imulw_rm::new(dst, src).into(),
1186            S32 => asm::inst::imull_rm::new(dst, src).into(),
1187            S64 => asm::inst::imulq_rm::new(dst, src).into(),
1188            S8 | S128 => unimplemented!(),
1189        };
1190        self.emit(Inst::External { inst });
1191    }
1192
1193    /// Add immediate and register.
1194    pub fn add_ir(&mut self, imm: i32, dst: WritableReg, size: OperandSize) {
1195        let dst = pair_gpr(dst);
1196        let inst = match size {
1197            OperandSize::S8 => asm::inst::addb_mi::new(dst, u8::try_from(imm).unwrap()).into(),
1198            OperandSize::S16 => asm::inst::addw_mi::new(dst, u16::try_from(imm).unwrap()).into(),
1199            OperandSize::S32 => asm::inst::addl_mi::new(dst, imm as u32).into(),
1200            OperandSize::S64 => asm::inst::addq_mi_sxl::new(dst, imm).into(),
1201            OperandSize::S128 => unimplemented!(),
1202        };
1203        self.emit(Inst::External { inst });
1204    }
1205
1206    /// Add register and register.
1207    pub fn add_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
1208        let dst = pair_gpr(dst);
1209        let inst = match size {
1210            OperandSize::S8 => asm::inst::addb_rm::new(dst, src).into(),
1211            OperandSize::S16 => asm::inst::addw_rm::new(dst, src).into(),
1212            OperandSize::S32 => asm::inst::addl_rm::new(dst, src).into(),
1213            OperandSize::S64 => asm::inst::addq_rm::new(dst, src).into(),
1214            OperandSize::S128 => unimplemented!(),
1215        };
1216        self.emit(Inst::External { inst });
1217    }
1218
1219    pub fn lock_xadd(
1220        &mut self,
1221        addr: Address,
1222        dst: WritableReg,
1223        size: OperandSize,
1224        flags: MemFlags,
1225    ) {
1226        assert!(addr.is_offset());
1227        let mem = Self::to_synthetic_amode(&addr, flags);
1228        let dst = pair_gpr(dst);
1229        let inst = match size {
1230            OperandSize::S8 => asm::inst::lock_xaddb_mr::new(mem, dst).into(),
1231            OperandSize::S16 => asm::inst::lock_xaddw_mr::new(mem, dst).into(),
1232            OperandSize::S32 => asm::inst::lock_xaddl_mr::new(mem, dst).into(),
1233            OperandSize::S64 => asm::inst::lock_xaddq_mr::new(mem, dst).into(),
1234            OperandSize::S128 => unimplemented!(),
1235        };
1236
1237        self.emit(Inst::External { inst });
1238    }
1239
1240    pub fn atomic_rmw_seq(
1241        &mut self,
1242        addr: Address,
1243        operand: Reg,
1244        dst: WritableReg,
1245        temp: WritableReg,
1246        size: OperandSize,
1247        flags: MemFlags,
1248        op: AtomicRmwSeqOp,
1249    ) {
1250        assert!(addr.is_offset());
1251        let mem = Self::to_synthetic_amode(&addr, flags);
1252        self.emit(Inst::AtomicRmwSeq {
1253            ty: Type::int_with_byte_size(size.bytes() as _).unwrap(),
1254            mem,
1255            operand: operand.into(),
1256            temp: temp.map(Into::into),
1257            dst_old: dst.map(Into::into),
1258            op,
1259        });
1260    }
1261
1262    pub fn xchg(&mut self, addr: Address, dst: WritableReg, size: OperandSize, flags: MemFlags) {
1263        assert!(addr.is_offset());
1264        let mem = Self::to_synthetic_amode(&addr, flags);
1265        let dst = pair_gpr(dst);
1266        let inst = match size {
1267            OperandSize::S8 => asm::inst::xchgb_rm::new(dst, mem).into(),
1268            OperandSize::S16 => asm::inst::xchgw_rm::new(dst, mem).into(),
1269            OperandSize::S32 => asm::inst::xchgl_rm::new(dst, mem).into(),
1270            OperandSize::S64 => asm::inst::xchgq_rm::new(dst, mem).into(),
1271            OperandSize::S128 => unimplemented!(),
1272        };
1273
1274        self.emit(Inst::External { inst });
1275    }
1276    pub fn cmpxchg(
1277        &mut self,
1278        addr: Address,
1279        replacement: Reg,
1280        dst: WritableReg,
1281        size: OperandSize,
1282        flags: MemFlags,
1283    ) {
1284        assert!(addr.is_offset());
1285        let mem = Self::to_synthetic_amode(&addr, flags);
1286        let dst = pair_gpr(dst);
1287        let inst = match size {
1288            OperandSize::S8 => asm::inst::lock_cmpxchgb_mr::new(mem, replacement, dst).into(),
1289            OperandSize::S16 => asm::inst::lock_cmpxchgw_mr::new(mem, replacement, dst).into(),
1290            OperandSize::S32 => asm::inst::lock_cmpxchgl_mr::new(mem, replacement, dst).into(),
1291            OperandSize::S64 => asm::inst::lock_cmpxchgq_mr::new(mem, replacement, dst).into(),
1292            OperandSize::S128 => unimplemented!(),
1293        };
1294
1295        self.emit(Inst::External { inst });
1296    }
1297
1298    pub fn cmp_ir(&mut self, src1: Reg, imm: i32, size: OperandSize) {
1299        let inst = match size {
1300            OperandSize::S8 => {
1301                let imm = i8::try_from(imm).unwrap();
1302                asm::inst::cmpb_mi::new(src1, imm.unsigned()).into()
1303            }
1304            OperandSize::S16 => match i8::try_from(imm) {
1305                Ok(imm8) => asm::inst::cmpw_mi_sxb::new(src1, imm8).into(),
1306                Err(_) => {
1307                    asm::inst::cmpw_mi::new(src1, i16::try_from(imm).unwrap().unsigned()).into()
1308                }
1309            },
1310            OperandSize::S32 => match i8::try_from(imm) {
1311                Ok(imm8) => asm::inst::cmpl_mi_sxb::new(src1, imm8).into(),
1312                Err(_) => asm::inst::cmpl_mi::new(src1, imm.unsigned()).into(),
1313            },
1314            OperandSize::S64 => match i8::try_from(imm) {
1315                Ok(imm8) => asm::inst::cmpq_mi_sxb::new(src1, imm8).into(),
1316                Err(_) => asm::inst::cmpq_mi::new(src1, imm).into(),
1317            },
1318            OperandSize::S128 => unimplemented!(),
1319        };
1320
1321        self.emit(Inst::External { inst });
1322    }
1323
1324    pub fn cmp_rr(&mut self, src1: Reg, src2: Reg, size: OperandSize) {
1325        let inst = match size {
1326            OperandSize::S8 => asm::inst::cmpb_rm::new(src1, src2).into(),
1327            OperandSize::S16 => asm::inst::cmpw_rm::new(src1, src2).into(),
1328            OperandSize::S32 => asm::inst::cmpl_rm::new(src1, src2).into(),
1329            OperandSize::S64 => asm::inst::cmpq_rm::new(src1, src2).into(),
1330            OperandSize::S128 => unimplemented!(),
1331        };
1332
1333        self.emit(Inst::External { inst });
1334    }
1335
1336    /// Compares values in src1 and src2 and sets ZF, PF, and CF flags in EFLAGS
1337    /// register.
1338    pub fn ucomis(&mut self, src1: Reg, src2: Reg, size: OperandSize) {
1339        let inst = match size {
1340            OperandSize::S32 => asm::inst::ucomiss_a::new(src1, src2).into(),
1341            OperandSize::S64 => asm::inst::ucomisd_a::new(src1, src2).into(),
1342            OperandSize::S8 | OperandSize::S16 | OperandSize::S128 => unreachable!(),
1343        };
1344        self.emit(Inst::External { inst });
1345    }
1346
1347    pub fn popcnt(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
1348        assert!(
1349            self.isa_flags.has_popcnt() && self.isa_flags.has_sse42(),
1350            "Requires has_popcnt and has_sse42 flags"
1351        );
1352        let dst = WritableGpr::from_reg(dst.to_reg().into());
1353        let inst = match size {
1354            OperandSize::S16 => asm::inst::popcntw_rm::new(dst, src).into(),
1355            OperandSize::S32 => asm::inst::popcntl_rm::new(dst, src).into(),
1356            OperandSize::S64 => asm::inst::popcntq_rm::new(dst, src).into(),
1357            OperandSize::S8 | OperandSize::S128 => unreachable!(),
1358        };
1359        self.emit(Inst::External { inst });
1360    }
1361
1362    /// Emit a test instruction with two register operands.
1363    pub fn test_rr(&mut self, src1: Reg, src2: Reg, size: OperandSize) {
1364        let inst = match size {
1365            OperandSize::S8 => asm::inst::testb_mr::new(src1, src2).into(),
1366            OperandSize::S16 => asm::inst::testw_mr::new(src1, src2).into(),
1367            OperandSize::S32 => asm::inst::testl_mr::new(src1, src2).into(),
1368            OperandSize::S64 => asm::inst::testq_mr::new(src1, src2).into(),
1369            OperandSize::S128 => unimplemented!(),
1370        };
1371
1372        self.emit(Inst::External { inst });
1373    }
1374
1375    /// Set value in dst to `0` or `1` based on flags in status register and
1376    /// [`CmpKind`].
1377    pub fn setcc(&mut self, kind: IntCmpKind, dst: WritableReg) {
1378        self.setcc_impl(kind.into(), dst);
1379    }
1380
1381    /// Set value in dst to `1` if parity flag in status register is set, `0`
1382    /// otherwise.
1383    pub fn setp(&mut self, dst: WritableReg) {
1384        self.setcc_impl(CC::P, dst);
1385    }
1386
1387    /// Set value in dst to `1` if parity flag in status register is not set,
1388    /// `0` otherwise.
1389    pub fn setnp(&mut self, dst: WritableReg) {
1390        self.setcc_impl(CC::NP, dst);
1391    }
1392
1393    fn setcc_impl(&mut self, cc: CC, dst: WritableReg) {
1394        // Clear the dst register or bits 1 to 31 may be incorrectly set.
1395        // Don't use xor since it updates the status register.
1396        let dst: WritableGpr = dst.map(Into::into);
1397        let inst = asm::inst::movl_oi::new(dst, 0).into();
1398        self.emit(Inst::External { inst });
1399
1400        // Copy correct bit from status register into dst register.
1401        //
1402        // Note that some of these mnemonics don't match exactly and that's
1403        // intentional as there are multiple mnemonics for the same encoding in
1404        // some cases and the assembler picked ones that match Capstone rather
1405        // than Cranelift.
1406        let inst = match cc {
1407            CC::O => asm::inst::seto_m::new(dst).into(),
1408            CC::NO => asm::inst::setno_m::new(dst).into(),
1409            CC::B => asm::inst::setb_m::new(dst).into(),
1410            CC::NB => asm::inst::setae_m::new(dst).into(), //  nb == ae
1411            CC::Z => asm::inst::sete_m::new(dst).into(),   //   z ==  e
1412            CC::NZ => asm::inst::setne_m::new(dst).into(), //  nz == ne
1413            CC::BE => asm::inst::setbe_m::new(dst).into(),
1414            CC::NBE => asm::inst::seta_m::new(dst).into(), // nbe ==  a
1415            CC::S => asm::inst::sets_m::new(dst).into(),
1416            CC::NS => asm::inst::setns_m::new(dst).into(),
1417            CC::L => asm::inst::setl_m::new(dst).into(),
1418            CC::NL => asm::inst::setge_m::new(dst).into(), //  nl == ge
1419            CC::LE => asm::inst::setle_m::new(dst).into(),
1420            CC::NLE => asm::inst::setg_m::new(dst).into(), // nle ==  g
1421            CC::P => asm::inst::setp_m::new(dst).into(),
1422            CC::NP => asm::inst::setnp_m::new(dst).into(),
1423        };
1424        self.emit(Inst::External { inst });
1425    }
1426
1427    /// Store the count of leading zeroes in src in dst.
1428    /// Requires `has_lzcnt` flag.
1429    pub fn lzcnt(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
1430        assert!(self.isa_flags.has_lzcnt(), "Requires has_lzcnt flag");
1431        let dst = WritableGpr::from_reg(dst.to_reg().into());
1432        let inst = match size {
1433            OperandSize::S16 => asm::inst::lzcntw_rm::new(dst, src).into(),
1434            OperandSize::S32 => asm::inst::lzcntl_rm::new(dst, src).into(),
1435            OperandSize::S64 => asm::inst::lzcntq_rm::new(dst, src).into(),
1436            OperandSize::S8 | OperandSize::S128 => unreachable!(),
1437        };
1438        self.emit(Inst::External { inst });
1439    }
1440
1441    /// Store the count of trailing zeroes in src in dst.
1442    /// Requires `has_bmi1` flag.
1443    pub fn tzcnt(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
1444        assert!(self.isa_flags.has_bmi1(), "Requires has_bmi1 flag");
1445        let dst = WritableGpr::from_reg(dst.to_reg().into());
1446        let inst = match size {
1447            OperandSize::S16 => asm::inst::tzcntw_a::new(dst, src).into(),
1448            OperandSize::S32 => asm::inst::tzcntl_a::new(dst, src).into(),
1449            OperandSize::S64 => asm::inst::tzcntq_a::new(dst, src).into(),
1450            OperandSize::S8 | OperandSize::S128 => unreachable!(),
1451        };
1452        self.emit(Inst::External { inst });
1453    }
1454
1455    /// Stores position of the most significant bit set in src in dst.
1456    /// Zero flag is set if src is equal to 0.
1457    pub fn bsr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
1458        let dst: WritableGpr = WritableGpr::from_reg(dst.to_reg().into());
1459        let inst = match size {
1460            OperandSize::S16 => asm::inst::bsrw_rm::new(dst, src).into(),
1461            OperandSize::S32 => asm::inst::bsrl_rm::new(dst, src).into(),
1462            OperandSize::S64 => asm::inst::bsrq_rm::new(dst, src).into(),
1463            OperandSize::S8 | OperandSize::S128 => unreachable!(),
1464        };
1465        self.emit(Inst::External { inst });
1466    }
1467
1468    /// Performs integer negation on `src` and places result in `dst`.
1469    pub fn neg(&mut self, read: Reg, write: WritableReg, size: OperandSize) {
1470        let gpr = PairedGpr {
1471            read: read.into(),
1472            write: WritableGpr::from_reg(write.to_reg().into()),
1473        };
1474        let inst = match size {
1475            OperandSize::S8 => asm::inst::negb_m::new(gpr).into(),
1476            OperandSize::S16 => asm::inst::negw_m::new(gpr).into(),
1477            OperandSize::S32 => asm::inst::negl_m::new(gpr).into(),
1478            OperandSize::S64 => asm::inst::negq_m::new(gpr).into(),
1479            OperandSize::S128 => unreachable!(),
1480        };
1481        self.emit(Inst::External { inst });
1482    }
1483
1484    /// Stores position of the least significant bit set in src in dst.
1485    /// Zero flag is set if src is equal to 0.
1486    pub fn bsf(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
1487        let dst: WritableGpr = WritableGpr::from_reg(dst.to_reg().into());
1488        let inst = match size {
1489            OperandSize::S16 => asm::inst::bsfw_rm::new(dst, src).into(),
1490            OperandSize::S32 => asm::inst::bsfl_rm::new(dst, src).into(),
1491            OperandSize::S64 => asm::inst::bsfq_rm::new(dst, src).into(),
1492            OperandSize::S8 | OperandSize::S128 => unreachable!(),
1493        };
1494        self.emit(Inst::External { inst });
1495    }
1496
1497    /// Performs float addition on src and dst and places result in dst.
1498    pub fn xmm_add_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
1499        let dst = pair_xmm(dst);
1500        let inst = match size {
1501            OperandSize::S32 => asm::inst::addss_a::new(dst, src).into(),
1502            OperandSize::S64 => asm::inst::addsd_a::new(dst, src).into(),
1503            OperandSize::S8 | OperandSize::S16 | OperandSize::S128 => unreachable!(),
1504        };
1505        self.emit(Inst::External { inst });
1506    }
1507
1508    /// Performs float subtraction on src and dst and places result in dst.
1509    pub fn xmm_sub_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
1510        let dst = pair_xmm(dst);
1511        let inst = match size {
1512            OperandSize::S32 => asm::inst::subss_a::new(dst, src).into(),
1513            OperandSize::S64 => asm::inst::subsd_a::new(dst, src).into(),
1514            OperandSize::S8 | OperandSize::S16 | OperandSize::S128 => unreachable!(),
1515        };
1516        self.emit(Inst::External { inst });
1517    }
1518
1519    /// Performs float multiplication on src and dst and places result in dst.
1520    pub fn xmm_mul_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
1521        use OperandSize::*;
1522        let dst = pair_xmm(dst);
1523        let inst = match size {
1524            S32 => asm::inst::mulss_a::new(dst, src).into(),
1525            S64 => asm::inst::mulsd_a::new(dst, src).into(),
1526            S8 | S16 | S128 => unreachable!(),
1527        };
1528        self.emit(Inst::External { inst });
1529    }
1530
1531    /// Performs float division on src and dst and places result in dst.
1532    pub fn xmm_div_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
1533        let dst = pair_xmm(dst);
1534        let inst = match size {
1535            OperandSize::S32 => asm::inst::divss_a::new(dst, src).into(),
1536            OperandSize::S64 => asm::inst::divsd_a::new(dst, src).into(),
1537            OperandSize::S8 | OperandSize::S16 | OperandSize::S128 => unreachable!(),
1538        };
1539        self.emit(Inst::External { inst });
1540    }
1541
1542    /// Minimum for src and dst XMM registers with results put in dst.
1543    pub fn xmm_min_seq(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
1544        self.emit(Inst::XmmMinMaxSeq {
1545            size: size.into(),
1546            is_min: true,
1547            lhs: src.into(),
1548            rhs: dst.to_reg().into(),
1549            dst: dst.map(Into::into),
1550        });
1551    }
1552
1553    /// Maximum for src and dst XMM registers with results put in dst.
1554    pub fn xmm_max_seq(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
1555        self.emit(Inst::XmmMinMaxSeq {
1556            size: size.into(),
1557            is_min: false,
1558            lhs: src.into(),
1559            rhs: dst.to_reg().into(),
1560            dst: dst.map(Into::into),
1561        });
1562    }
1563
1564    /// Perform rounding operation on float register src and place results in
1565    /// float register dst.
1566    pub fn xmm_rounds_rr(
1567        &mut self,
1568        src: Reg,
1569        dst: WritableReg,
1570        mode: RoundingMode,
1571        size: OperandSize,
1572    ) {
1573        let dst = dst.map(|r| r.into());
1574
1575        let imm: u8 = match mode {
1576            RoundingMode::Nearest => 0x00,
1577            RoundingMode::Down => 0x01,
1578            RoundingMode::Up => 0x02,
1579            RoundingMode::Zero => 0x03,
1580        };
1581
1582        let inst = match size {
1583            OperandSize::S32 => asm::inst::roundss_rmi::new(dst, src, imm).into(),
1584            OperandSize::S64 => asm::inst::roundsd_rmi::new(dst, src, imm).into(),
1585            OperandSize::S8 | OperandSize::S16 | OperandSize::S128 => unreachable!(),
1586        };
1587
1588        self.emit(Inst::External { inst });
1589    }
1590
1591    pub fn sqrt(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
1592        use OperandSize::*;
1593        let dst = pair_xmm(dst);
1594        let inst = match size {
1595            S32 => asm::inst::sqrtss_a::new(dst, src).into(),
1596            S64 => asm::inst::sqrtsd_a::new(dst, src).into(),
1597            S8 | S16 | S128 => unimplemented!(),
1598        };
1599        self.emit(Inst::External { inst });
1600    }
1601
1602    /// Emit a call to an unknown location through a register.
1603    pub fn call_with_reg(&mut self, cc: CallingConvention, callee: Reg) {
1604        self.emit(Inst::CallUnknown {
1605            info: Box::new(CallInfo::empty(RegMem::reg(callee.into()), cc.into())),
1606        });
1607    }
1608
1609    /// Emit a call to a locally defined function through an index.
1610    pub fn call_with_name(&mut self, cc: CallingConvention, name: UserExternalNameRef) {
1611        self.emit(Inst::CallKnown {
1612            info: Box::new(CallInfo::empty(ExternalName::user(name), cc.into())),
1613        });
1614    }
1615
1616    /// Emits a conditional jump to the given label.
1617    pub fn jmp_if(&mut self, cc: impl Into<CC>, taken: MachLabel) {
1618        self.emit(Inst::WinchJmpIf {
1619            cc: cc.into(),
1620            taken,
1621        });
1622    }
1623
1624    /// Performs an unconditional jump to the given label.
1625    pub fn jmp(&mut self, target: MachLabel) {
1626        self.emit(Inst::JmpKnown { dst: target });
1627    }
1628
1629    /// Emits a jump table sequence.
1630    pub fn jmp_table(
1631        &mut self,
1632        targets: SmallVec<[MachLabel; 4]>,
1633        default: MachLabel,
1634        index: Reg,
1635        tmp1: Reg,
1636        tmp2: Reg,
1637    ) {
1638        self.emit(Inst::JmpTableSeq {
1639            idx: index.into(),
1640            tmp1: Writable::from_reg(tmp1.into()),
1641            tmp2: Writable::from_reg(tmp2.into()),
1642            default_target: default,
1643            targets: Box::new(targets.to_vec()),
1644        })
1645    }
1646
1647    /// Emit a trap instruction.
1648    pub fn trap(&mut self, code: TrapCode) {
1649        let inst = asm::inst::ud2_zo::new(code).into();
1650        self.emit(Inst::External { inst });
1651    }
1652
1653    /// Conditional trap.
1654    pub fn trapif(&mut self, cc: impl Into<CC>, trap_code: TrapCode) {
1655        self.emit(Inst::TrapIf {
1656            cc: cc.into(),
1657            trap_code,
1658        });
1659    }
1660
1661    /// Load effective address.
1662    pub fn lea(&mut self, addr: &Address, dst: WritableReg, size: OperandSize) {
1663        let addr = Self::to_synthetic_amode(addr, MemFlags::trusted());
1664        let dst: WritableGpr = dst.map(Into::into);
1665        let inst = match size {
1666            OperandSize::S16 => asm::inst::leaw_rm::new(dst, addr).into(),
1667            OperandSize::S32 => asm::inst::leal_rm::new(dst, addr).into(),
1668            OperandSize::S64 => asm::inst::leaq_rm::new(dst, addr).into(),
1669            OperandSize::S8 | OperandSize::S128 => unimplemented!(),
1670        };
1671        self.emit(Inst::External { inst });
1672    }
1673
1674    pub fn adc_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
1675        let dst = pair_gpr(dst);
1676        let inst = match size {
1677            OperandSize::S8 => asm::inst::adcb_rm::new(dst, src).into(),
1678            OperandSize::S16 => asm::inst::adcw_rm::new(dst, src).into(),
1679            OperandSize::S32 => asm::inst::adcl_rm::new(dst, src).into(),
1680            OperandSize::S64 => asm::inst::adcq_rm::new(dst, src).into(),
1681            OperandSize::S128 => unimplemented!(),
1682        };
1683        self.emit(Inst::External { inst });
1684    }
1685
1686    pub fn sbb_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
1687        let dst = pair_gpr(dst);
1688        let inst = match size {
1689            OperandSize::S8 => asm::inst::sbbb_rm::new(dst, src).into(),
1690            OperandSize::S16 => asm::inst::sbbw_rm::new(dst, src).into(),
1691            OperandSize::S32 => asm::inst::sbbl_rm::new(dst, src).into(),
1692            OperandSize::S64 => asm::inst::sbbq_rm::new(dst, src).into(),
1693            OperandSize::S128 => unimplemented!(),
1694        };
1695        self.emit(Inst::External { inst });
1696    }
1697
1698    pub fn mul_wide(
1699        &mut self,
1700        dst_lo: WritableReg,
1701        dst_hi: WritableReg,
1702        lhs: Reg,
1703        rhs: Reg,
1704        kind: MulWideKind,
1705        size: OperandSize,
1706    ) {
1707        use MulWideKind::*;
1708        use OperandSize::*;
1709        let rax = asm::Fixed(PairedGpr {
1710            read: lhs.into(),
1711            write: WritableGpr::from_reg(dst_lo.to_reg().into()),
1712        });
1713        let rdx = asm::Fixed(dst_hi.to_reg().into());
1714        if size == S8 {
1715            // For `mulb` and `imulb`, both the high and low bits are written to
1716            // RAX.
1717            assert_eq!(dst_lo, dst_hi);
1718        }
1719        let inst = match (size, kind) {
1720            (S8, Unsigned) => asm::inst::mulb_m::new(rax, rhs).into(),
1721            (S8, Signed) => asm::inst::imulb_m::new(rax, rhs).into(),
1722            (S16, Unsigned) => asm::inst::mulw_m::new(rax, rdx, rhs).into(),
1723            (S16, Signed) => asm::inst::imulw_m::new(rax, rdx, rhs).into(),
1724            (S32, Unsigned) => asm::inst::mull_m::new(rax, rdx, rhs).into(),
1725            (S32, Signed) => asm::inst::imull_m::new(rax, rdx, rhs).into(),
1726            (S64, Unsigned) => asm::inst::mulq_m::new(rax, rdx, rhs).into(),
1727            (S64, Signed) => asm::inst::imulq_m::new(rax, rdx, rhs).into(),
1728            (S128, _) => unimplemented!(),
1729        };
1730        self.emit(Inst::External { inst });
1731    }
1732
1733    /// Shuffles bytes in `src` according to contents of `mask` and puts
1734    /// result in `dst`.
1735    pub fn xmm_vpshufb_rrm(&mut self, dst: WritableReg, src: Reg, mask: &Address) {
1736        let dst: WritableXmm = dst.map(|r| r.into());
1737        let mask = Self::to_synthetic_amode(mask, MemFlags::trusted());
1738        let inst = asm::inst::vpshufb_b::new(dst, src, mask).into();
1739        self.emit(Inst::External { inst });
1740    }
1741
1742    /// Shuffles bytes in `src` according to contents of `mask` and puts
1743    /// result in `dst`.
1744    pub fn xmm_vpshufb_rrr(&mut self, dst: WritableReg, src: Reg, mask: Reg) {
1745        let dst: WritableXmm = dst.map(|r| r.into());
1746        let inst = asm::inst::vpshufb_b::new(dst, src, mask).into();
1747        self.emit(Inst::External { inst });
1748    }
1749
1750    /// Add unsigned integers with unsigned saturation.
1751    ///
1752    /// Adds the src operands but when an individual byte result is larger than
1753    /// an unsigned byte integer, 0xFF is written instead.
1754    pub fn xmm_vpaddus_rrm(
1755        &mut self,
1756        dst: WritableReg,
1757        src1: Reg,
1758        src2: &Address,
1759        size: OperandSize,
1760    ) {
1761        let dst: WritableXmm = dst.map(|r| r.into());
1762        let src2 = Self::to_synthetic_amode(src2, MemFlags::trusted());
1763        let inst = match size {
1764            OperandSize::S8 => asm::inst::vpaddusb_b::new(dst, src1, src2).into(),
1765            OperandSize::S32 => asm::inst::vpaddusw_b::new(dst, src1, src2).into(),
1766            _ => unimplemented!(),
1767        };
1768        self.emit(Inst::External { inst });
1769    }
1770
1771    /// Add unsigned integers with unsigned saturation.
1772    ///
1773    /// Adds the src operands but when an individual byte result is larger than
1774    /// an unsigned byte integer, 0xFF is written instead.
1775    pub fn xmm_vpaddus_rrr(&mut self, dst: WritableReg, src1: Reg, src2: Reg, size: OperandSize) {
1776        let dst: WritableXmm = dst.map(|r| r.into());
1777        let inst = match size {
1778            OperandSize::S8 => asm::inst::vpaddusb_b::new(dst, src1, src2).into(),
1779            OperandSize::S16 => asm::inst::vpaddusw_b::new(dst, src1, src2).into(),
1780            _ => unimplemented!(),
1781        };
1782        self.emit(Inst::External { inst });
1783    }
1784
1785    /// Add signed integers.
1786    pub fn xmm_vpadds_rrr(&mut self, dst: WritableReg, src1: Reg, src2: Reg, size: OperandSize) {
1787        let dst: WritableXmm = dst.map(|r| r.into());
1788        let inst = match size {
1789            OperandSize::S8 => asm::inst::vpaddsb_b::new(dst, src1, src2).into(),
1790            OperandSize::S16 => asm::inst::vpaddsw_b::new(dst, src1, src2).into(),
1791            _ => unimplemented!(),
1792        };
1793        self.emit(Inst::External { inst });
1794    }
1795
1796    pub fn xmm_vpadd_rmr(
1797        &mut self,
1798        src1: Reg,
1799        src2: &Address,
1800        dst: WritableReg,
1801        size: OperandSize,
1802    ) {
1803        let dst: WritableXmm = dst.map(|r| r.into());
1804        let address = Self::to_synthetic_amode(src2, MemFlags::trusted());
1805        let inst = match size {
1806            OperandSize::S8 => asm::inst::vpaddb_b::new(dst, src1, address).into(),
1807            OperandSize::S16 => asm::inst::vpaddw_b::new(dst, src1, address).into(),
1808            OperandSize::S32 => asm::inst::vpaddd_b::new(dst, src1, address).into(),
1809            _ => unimplemented!(),
1810        };
1811        self.emit(Inst::External { inst });
1812    }
1813
1814    /// Adds vectors of integers in `src1` and `src2` and puts the results in
1815    /// `dst`.
1816    pub fn xmm_vpadd_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {
1817        let dst: WritableXmm = dst.map(|r| r.into());
1818        let inst = match size {
1819            OperandSize::S8 => asm::inst::vpaddb_b::new(dst, src1, src2).into(),
1820            OperandSize::S16 => asm::inst::vpaddw_b::new(dst, src1, src2).into(),
1821            OperandSize::S32 => asm::inst::vpaddd_b::new(dst, src1, src2).into(),
1822            OperandSize::S64 => asm::inst::vpaddq_b::new(dst, src1, src2).into(),
1823            _ => unimplemented!(),
1824        };
1825        self.emit(Inst::External { inst });
1826    }
1827
1828    pub fn mfence(&mut self) {
1829        self.emit(Inst::External {
1830            inst: asm::inst::mfence_zo::new().into(),
1831        });
1832    }
1833
1834    /// Extract a value from `src` into `addr` determined by `lane`.
1835    pub(crate) fn xmm_vpextr_rm(
1836        &mut self,
1837        addr: &Address,
1838        src: Reg,
1839        lane: u8,
1840        size: OperandSize,
1841        flags: MemFlags,
1842    ) {
1843        assert!(addr.is_offset());
1844        let dst = Self::to_synthetic_amode(addr, flags);
1845        let inst = match size {
1846            OperandSize::S8 => asm::inst::vpextrb_a::new(dst, src, lane).into(),
1847            OperandSize::S16 => asm::inst::vpextrw_b::new(dst, src, lane).into(),
1848            OperandSize::S32 => asm::inst::vpextrd_a::new(dst, src, lane).into(),
1849            OperandSize::S64 => asm::inst::vpextrq_a::new(dst, src, lane).into(),
1850            _ => unimplemented!(),
1851        };
1852        self.emit(Inst::External { inst });
1853    }
1854
1855    /// Extract a value from `src` into `dst` (zero extended) determined by `lane`.
1856    pub fn xmm_vpextr_rr(&mut self, dst: WritableReg, src: Reg, lane: u8, size: OperandSize) {
1857        let dst: WritableGpr = dst.map(|r| r.into());
1858        let inst = match size {
1859            OperandSize::S8 => asm::inst::vpextrb_a::new(dst, src, lane).into(),
1860            OperandSize::S16 => asm::inst::vpextrw_a::new(dst, src, lane).into(),
1861            OperandSize::S32 => asm::inst::vpextrd_a::new(dst, src, lane).into(),
1862            OperandSize::S64 => asm::inst::vpextrq_a::new(dst, src, lane).into(),
1863            _ => unimplemented!(),
1864        };
1865        self.emit(Inst::External { inst });
1866    }
1867
1868    /// Copy value from `src2`, merge into `src1`, and put result in `dst` at
1869    /// the location specified in `count`.
1870    pub fn xmm_vpinsr_rrm(
1871        &mut self,
1872        dst: WritableReg,
1873        src1: Reg,
1874        src2: &Address,
1875        count: u8,
1876        size: OperandSize,
1877    ) {
1878        let src2 = Self::to_synthetic_amode(src2, MemFlags::trusted());
1879        let dst: WritableXmm = dst.map(|r| r.into());
1880
1881        let inst = match size {
1882            OperandSize::S8 => asm::inst::vpinsrb_b::new(dst, src1, src2, count).into(),
1883            OperandSize::S16 => asm::inst::vpinsrw_b::new(dst, src1, src2, count).into(),
1884            OperandSize::S32 => asm::inst::vpinsrd_b::new(dst, src1, src2, count).into(),
1885            OperandSize::S64 => asm::inst::vpinsrq_b::new(dst, src1, src2, count).into(),
1886            OperandSize::S128 => unreachable!(),
1887        };
1888        self.emit(Inst::External { inst });
1889    }
1890
1891    /// Copy value from `src2`, merge into `src1`, and put result in `dst` at
1892    /// the location specified in `count`.
1893    pub fn xmm_vpinsr_rrr(
1894        &mut self,
1895        dst: WritableReg,
1896        src1: Reg,
1897        src2: Reg,
1898        count: u8,
1899        size: OperandSize,
1900    ) {
1901        let dst: WritableXmm = dst.map(|r| r.into());
1902        let inst = match size {
1903            OperandSize::S8 => asm::inst::vpinsrb_b::new(dst, src1, src2, count).into(),
1904            OperandSize::S16 => asm::inst::vpinsrw_b::new(dst, src1, src2, count).into(),
1905            OperandSize::S32 => asm::inst::vpinsrd_b::new(dst, src1, src2, count).into(),
1906            OperandSize::S64 => asm::inst::vpinsrq_b::new(dst, src1, src2, count).into(),
1907            OperandSize::S128 => unreachable!(),
1908        };
1909        self.emit(Inst::External { inst });
1910    }
1911
1912    /// Copy a 32-bit float in `src2`, merge into `src1`, and put result in `dst`.
1913    pub fn xmm_vinsertps_rrm(&mut self, dst: WritableReg, src1: Reg, address: &Address, imm: u8) {
1914        let dst: WritableXmm = dst.map(|r| r.into());
1915        let address = Self::to_synthetic_amode(address, MemFlags::trusted());
1916        let inst = asm::inst::vinsertps_b::new(dst, src1, address, imm).into();
1917        self.emit(Inst::External { inst });
1918    }
1919
1920    /// Copy a 32-bit float in `src2`, merge into `src1`, and put result in `dst`.
1921    pub fn xmm_vinsertps_rrr(&mut self, dst: WritableReg, src1: Reg, src2: Reg, imm: u8) {
1922        let dst: WritableXmm = dst.map(|r| r.into());
1923        let inst = asm::inst::vinsertps_b::new(dst, src1, src2, imm).into();
1924        self.emit(Inst::External { inst });
1925    }
1926
1927    /// Moves lower 64-bit float from `src2` into lower 64-bits of `dst` and the
1928    /// upper 64-bits in `src1` into the upper 64-bits of `dst`.
1929    pub fn xmm_vmovsd_rrr(&mut self, dst: WritableReg, src1: Reg, src2: Reg) {
1930        let dst: WritableXmm = dst.map(|r| r.into());
1931        let inst = asm::inst::vmovsd_b::new(dst, src1, src2).into();
1932        self.emit(Inst::External { inst });
1933    }
1934
1935    /// Moves 64-bit float from `src` into lower 64-bits of `dst`.
1936    /// Zeroes out the upper 64 bits of `dst`.
1937    pub fn xmm_vmovsd_rm(&mut self, dst: WritableReg, src: &Address) {
1938        let src = Self::to_synthetic_amode(src, MemFlags::trusted());
1939        let dst: WritableXmm = dst.map(|r| r.into());
1940        let inst = asm::inst::vmovsd_d::new(dst, src).into();
1941        self.emit(Inst::External { inst });
1942    }
1943
1944    /// Moves two 32-bit floats from `src2` to the upper 64-bits of `dst`.
1945    /// Copies two 32-bit floats from the lower 64-bits of `src1` to lower
1946    /// 64-bits of `dst`.
1947    pub fn xmm_vmovlhps_rrm(&mut self, dst: WritableReg, src1: Reg, src2: &Address) {
1948        let src2 = Self::to_synthetic_amode(src2, MemFlags::trusted());
1949        let dst: WritableXmm = dst.map(|r| r.into());
1950        let inst = asm::inst::vmovhps_b::new(dst, src1, src2).into();
1951        self.emit(Inst::External { inst });
1952    }
1953
1954    /// Moves two 32-bit floats from the lower 64-bits of `src2` to the upper
1955    /// 64-bits of `dst`. Copies two 32-bit floats from the lower 64-bits of
1956    /// `src1` to lower 64-bits of `dst`.
1957    pub fn xmm_vmovlhps_rrr(&mut self, dst: WritableReg, src1: Reg, src2: Reg) {
1958        let dst: WritableXmm = dst.map(|r| r.into());
1959        let inst = asm::inst::vmovlhps_rvm::new(dst, src1, src2).into();
1960        self.emit(Inst::External { inst });
1961    }
1962
1963    /// Move unaligned packed integer values from address `src` to `dst`.
1964    pub fn xmm_vmovdqu_mr(&mut self, src: &Address, dst: WritableReg, flags: MemFlags) {
1965        let src = Self::to_synthetic_amode(src, flags);
1966        let dst: WritableXmm = dst.map(|r| r.into());
1967        let inst = asm::inst::vmovdqu_a::new(dst, src).into();
1968        self.emit(Inst::External { inst });
1969    }
1970
1971    /// Move integer from `src` to xmm register `dst` using an AVX instruction.
1972    pub fn avx_gpr_to_xmm(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
1973        let dst: WritableXmm = dst.map(|r| r.into());
1974        let inst = match size {
1975            OperandSize::S32 => asm::inst::vmovd_a::new(dst, src).into(),
1976            OperandSize::S64 => asm::inst::vmovq_a::new(dst, src).into(),
1977            _ => unreachable!(),
1978        };
1979
1980        self.emit(Inst::External { inst });
1981    }
1982
1983    pub fn xmm_vptest(&mut self, src1: Reg, src2: Reg) {
1984        let inst = asm::inst::vptest_rm::new(src1, src2).into();
1985        self.emit(Inst::External { inst });
1986    }
1987
1988    /// Converts vector of integers into vector of floating values.
1989    pub fn xmm_vcvt_rr(&mut self, src: Reg, dst: WritableReg, kind: VcvtKind) {
1990        let dst: WritableXmm = dst.map(|x| x.into());
1991        let inst = match kind {
1992            VcvtKind::I32ToF32 => asm::inst::vcvtdq2ps_a::new(dst, src).into(),
1993            VcvtKind::I32ToF64 => asm::inst::vcvtdq2pd_a::new(dst, src).into(),
1994            VcvtKind::F64ToF32 => asm::inst::vcvtpd2ps_a::new(dst, src).into(),
1995            VcvtKind::F64ToI32 => asm::inst::vcvttpd2dq_a::new(dst, src).into(),
1996            VcvtKind::F32ToF64 => asm::inst::vcvtps2pd_a::new(dst, src).into(),
1997            VcvtKind::F32ToI32 => asm::inst::vcvttps2dq_a::new(dst, src).into(),
1998        };
1999        self.emit(Inst::External { inst });
2000    }
2001
2002    /// Subtract floats in vector `src1` to floats in vector `src2`.
2003    pub fn xmm_vsubp_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {
2004        let dst: WritableXmm = dst.map(|r| r.into());
2005        let inst = match size {
2006            OperandSize::S32 => asm::inst::vsubps_b::new(dst, src1, src2).into(),
2007            OperandSize::S64 => asm::inst::vsubpd_b::new(dst, src1, src2).into(),
2008            _ => unimplemented!(),
2009        };
2010        self.emit(Inst::External { inst });
2011    }
2012
2013    /// Subtract integers in vector `src1` from integers in vector `src2`.
2014    pub fn xmm_vpsub_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {
2015        let dst: WritableXmm = dst.map(|r| r.into());
2016        let inst = match size {
2017            OperandSize::S8 => asm::inst::vpsubb_b::new(dst, src1, src2).into(),
2018            OperandSize::S16 => asm::inst::vpsubw_b::new(dst, src1, src2).into(),
2019            OperandSize::S32 => asm::inst::vpsubd_b::new(dst, src1, src2).into(),
2020            OperandSize::S64 => asm::inst::vpsubq_b::new(dst, src1, src2).into(),
2021            _ => unimplemented!(),
2022        };
2023        self.emit(Inst::External { inst });
2024    }
2025
2026    /// Substract unsigned integers with unsigned saturation.
2027    pub fn xmm_vpsubus_rrr(&mut self, dst: WritableReg, src1: Reg, src2: Reg, size: OperandSize) {
2028        let dst: WritableXmm = dst.map(|r| r.into());
2029        let inst = match size {
2030            OperandSize::S8 => asm::inst::vpsubusb_b::new(dst, src1, src2).into(),
2031            OperandSize::S16 => asm::inst::vpsubusw_b::new(dst, src1, src2).into(),
2032            _ => unimplemented!(),
2033        };
2034        self.emit(Inst::External { inst });
2035    }
2036
2037    /// Subtract signed integers with signed saturation.
2038    pub fn xmm_vpsubs_rrr(&mut self, dst: WritableReg, src1: Reg, src2: Reg, size: OperandSize) {
2039        let dst: WritableXmm = dst.map(|r| r.into());
2040        let inst = match size {
2041            OperandSize::S8 => asm::inst::vpsubsb_b::new(dst, src1, src2).into(),
2042            OperandSize::S16 => asm::inst::vpsubsw_b::new(dst, src1, src2).into(),
2043            _ => unimplemented!(),
2044        };
2045        self.emit(Inst::External { inst });
2046    }
2047
2048    /// Add floats in vector `src1` to floats in vector `src2`.
2049    pub fn xmm_vaddp_rrm(
2050        &mut self,
2051        src1: Reg,
2052        src2: &Address,
2053        dst: WritableReg,
2054        size: OperandSize,
2055    ) {
2056        let dst: WritableXmm = dst.map(|r| r.into());
2057        let address = Self::to_synthetic_amode(src2, MemFlags::trusted());
2058        let inst = match size {
2059            OperandSize::S32 => asm::inst::vaddps_b::new(dst, src1, address).into(),
2060            OperandSize::S64 => asm::inst::vaddpd_b::new(dst, src1, address).into(),
2061            _ => unimplemented!(),
2062        };
2063        self.emit(Inst::External { inst });
2064    }
2065
2066    /// Add floats in vector `src1` to floats in vector `src2`.
2067    pub fn xmm_vaddp_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {
2068        let dst: WritableXmm = dst.map(|r| r.into());
2069        let inst = match size {
2070            OperandSize::S32 => asm::inst::vaddps_b::new(dst, src1, src2).into(),
2071            OperandSize::S64 => asm::inst::vaddpd_b::new(dst, src1, src2).into(),
2072            _ => unimplemented!(),
2073        };
2074        self.emit(Inst::External { inst });
2075    }
2076
2077    /// Compare vector register `lhs` with a vector of integers in `rhs` for
2078    /// equality between packed integers and write the resulting vector into
2079    /// `dst`.
2080    pub fn xmm_vpcmpeq_rrm(
2081        &mut self,
2082        dst: WritableReg,
2083        lhs: Reg,
2084        address: &Address,
2085        size: OperandSize,
2086    ) {
2087        let dst: WritableXmm = dst.map(|r| r.into());
2088        let address = Self::to_synthetic_amode(address, MemFlags::trusted());
2089        let inst = match size {
2090            OperandSize::S8 => asm::inst::vpcmpeqb_b::new(dst, lhs, address).into(),
2091            OperandSize::S16 => asm::inst::vpcmpeqw_b::new(dst, lhs, address).into(),
2092            OperandSize::S32 => asm::inst::vpcmpeqd_b::new(dst, lhs, address).into(),
2093            OperandSize::S64 => asm::inst::vpcmpeqq_b::new(dst, lhs, address).into(),
2094            _ => unimplemented!(),
2095        };
2096        self.emit(Inst::External { inst });
2097    }
2098
2099    /// Compare vector registers `lhs` and `rhs` for equality between packed
2100    /// integers and write the resulting vector into `dst`.
2101    pub fn xmm_vpcmpeq_rrr(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) {
2102        let dst: WritableXmm = dst.map(|r| r.into());
2103        let inst = match size {
2104            OperandSize::S8 => asm::inst::vpcmpeqb_b::new(dst, lhs, rhs).into(),
2105            OperandSize::S16 => asm::inst::vpcmpeqw_b::new(dst, lhs, rhs).into(),
2106            OperandSize::S32 => asm::inst::vpcmpeqd_b::new(dst, lhs, rhs).into(),
2107            OperandSize::S64 => asm::inst::vpcmpeqq_b::new(dst, lhs, rhs).into(),
2108            _ => unimplemented!(),
2109        };
2110        self.emit(Inst::External { inst });
2111    }
2112
2113    /// Performs a greater than comparison with vectors of signed integers in
2114    /// `lhs` and `rhs` and puts the results in `dst`.
2115    pub fn xmm_vpcmpgt_rrr(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) {
2116        let dst: WritableXmm = dst.map(|r| r.into());
2117        let inst = match size {
2118            OperandSize::S8 => asm::inst::vpcmpgtb_b::new(dst, lhs, rhs).into(),
2119            OperandSize::S16 => asm::inst::vpcmpgtw_b::new(dst, lhs, rhs).into(),
2120            OperandSize::S32 => asm::inst::vpcmpgtd_b::new(dst, lhs, rhs).into(),
2121            OperandSize::S64 => asm::inst::vpcmpgtq_b::new(dst, lhs, rhs).into(),
2122            _ => unimplemented!(),
2123        };
2124        self.emit(Inst::External { inst });
2125    }
2126
2127    /// Performs a max operation with vectors of signed integers in `lhs` and
2128    /// `rhs` and puts the results in `dst`.
2129    pub fn xmm_vpmaxs_rrr(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) {
2130        let dst: WritableXmm = dst.map(|r| r.into());
2131        let inst = match size {
2132            OperandSize::S8 => asm::inst::vpmaxsb_b::new(dst, lhs, rhs).into(),
2133            OperandSize::S16 => asm::inst::vpmaxsw_b::new(dst, lhs, rhs).into(),
2134            OperandSize::S32 => asm::inst::vpmaxsd_b::new(dst, lhs, rhs).into(),
2135            _ => unimplemented!(),
2136        };
2137        self.emit(Inst::External { inst });
2138    }
2139
2140    /// Performs a max operation with vectors of unsigned integers in `lhs` and
2141    /// `rhs` and puts the results in `dst`.
2142    pub fn xmm_vpmaxu_rrr(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) {
2143        let dst: WritableXmm = dst.map(|r| r.into());
2144        let inst = match size {
2145            OperandSize::S8 => asm::inst::vpmaxub_b::new(dst, lhs, rhs).into(),
2146            OperandSize::S16 => asm::inst::vpmaxuw_b::new(dst, lhs, rhs).into(),
2147            OperandSize::S32 => asm::inst::vpmaxud_b::new(dst, lhs, rhs).into(),
2148            _ => unimplemented!(),
2149        };
2150        self.emit(Inst::External { inst });
2151    }
2152
2153    /// Performs a min operation with vectors of signed integers in `lhs` and
2154    /// `rhs` and puts the results in `dst`.
2155    pub fn xmm_vpmins_rrr(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) {
2156        let dst: WritableXmm = dst.map(|r| r.into());
2157        let inst = match size {
2158            OperandSize::S8 => asm::inst::vpminsb_b::new(dst, lhs, rhs).into(),
2159            OperandSize::S16 => asm::inst::vpminsw_b::new(dst, lhs, rhs).into(),
2160            OperandSize::S32 => asm::inst::vpminsd_b::new(dst, lhs, rhs).into(),
2161            _ => unimplemented!(),
2162        };
2163        self.emit(Inst::External { inst });
2164    }
2165
2166    /// Performs a min operation with vectors of unsigned integers in `lhs` and
2167    /// `rhs` and puts the results in `dst`.
2168    pub fn xmm_vpminu_rrr(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) {
2169        let dst: WritableXmm = dst.map(|r| r.into());
2170        let inst = match size {
2171            OperandSize::S8 => asm::inst::vpminub_b::new(dst, lhs, rhs).into(),
2172            OperandSize::S16 => asm::inst::vpminuw_b::new(dst, lhs, rhs).into(),
2173            OperandSize::S32 => asm::inst::vpminud_b::new(dst, lhs, rhs).into(),
2174            _ => unimplemented!(),
2175        };
2176        self.emit(Inst::External { inst });
2177    }
2178
2179    /// Performs a comparison operation between vectors of floats in `lhs` and
2180    /// `rhs` and puts the results in `dst`.
2181    pub fn xmm_vcmpp_rrr(
2182        &mut self,
2183        dst: WritableReg,
2184        lhs: Reg,
2185        rhs: Reg,
2186        size: OperandSize,
2187        kind: VcmpKind,
2188    ) {
2189        let dst: WritableXmm = dst.map(|r| r.into());
2190        let imm = match kind {
2191            VcmpKind::Eq => 0,
2192            VcmpKind::Lt => 1,
2193            VcmpKind::Le => 2,
2194            VcmpKind::Unord => 3,
2195            VcmpKind::Ne => 4,
2196        };
2197        let inst = match size {
2198            OperandSize::S32 => asm::inst::vcmpps_b::new(dst, lhs, rhs, imm).into(),
2199            OperandSize::S64 => asm::inst::vcmppd_b::new(dst, lhs, rhs, imm).into(),
2200            _ => unimplemented!(),
2201        };
2202        self.emit(Inst::External { inst });
2203    }
2204
2205    /// Performs a subtraction on two vectors of floats and puts the results in
2206    /// `dst`.
2207    pub fn xmm_vsub_rrm(&mut self, src1: Reg, src2: &Address, dst: WritableReg, size: OperandSize) {
2208        let dst: WritableXmm = dst.map(|r| r.into());
2209        let address = Self::to_synthetic_amode(src2, MemFlags::trusted());
2210        let inst = match size {
2211            OperandSize::S64 => asm::inst::vsubpd_b::new(dst, src1, address).into(),
2212            _ => unimplemented!(),
2213        };
2214        self.emit(Inst::External { inst });
2215    }
2216
2217    /// Performs a subtraction on two vectors of floats and puts the results in
2218    /// `dst`.
2219    pub fn xmm_vsub_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {
2220        let dst: WritableXmm = dst.map(|r| r.into());
2221        let inst = match size {
2222            OperandSize::S32 => asm::inst::vsubps_b::new(dst, src1, src2).into(),
2223            OperandSize::S64 => asm::inst::vsubpd_b::new(dst, src1, src2).into(),
2224            _ => unimplemented!(),
2225        };
2226        self.emit(Inst::External { inst });
2227    }
2228
2229    /// Converts a vector of signed integers into a vector of narrower integers
2230    /// using saturation to handle overflow.
2231    pub fn xmm_vpackss_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {
2232        let dst: WritableXmm = dst.map(|r| r.into());
2233        let inst = match size {
2234            OperandSize::S8 => asm::inst::vpacksswb_b::new(dst, src1, src2).into(),
2235            OperandSize::S16 => asm::inst::vpackssdw_b::new(dst, src1, src2).into(),
2236            _ => unimplemented!(),
2237        };
2238        self.emit(Inst::External { inst });
2239    }
2240
2241    /// Converts a vector of unsigned integers into a vector of narrower
2242    /// integers using saturation to handle overflow.
2243    pub fn xmm_vpackus_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {
2244        let dst: WritableXmm = dst.map(|r| r.into());
2245        let inst = match size {
2246            OperandSize::S8 => asm::inst::vpackuswb_b::new(dst, src1, src2).into(),
2247            OperandSize::S16 => asm::inst::vpackusdw_b::new(dst, src1, src2).into(),
2248            _ => unimplemented!(),
2249        };
2250        self.emit(Inst::External { inst });
2251    }
2252
2253    /// Concatenates `src1` and `src2` and shifts right by `imm` and puts
2254    /// result in `dst`.
2255    pub fn xmm_vpalignr_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, imm: u8) {
2256        let dst: WritableXmm = dst.map(|r| r.into());
2257        let inst = asm::inst::vpalignr_b::new(dst, src1, src2, imm).into();
2258        self.emit(Inst::External { inst });
2259    }
2260
2261    /// Takes the lower lanes of vectors of floats in `src1` and `src2` and
2262    /// interleaves them in `dst`.
2263    pub fn xmm_vunpcklp_rrm(
2264        &mut self,
2265        src1: Reg,
2266        src2: &Address,
2267        dst: WritableReg,
2268        size: OperandSize,
2269    ) {
2270        let dst: WritableXmm = dst.map(|r| r.into());
2271        let address = Self::to_synthetic_amode(src2, MemFlags::trusted());
2272        let inst = match size {
2273            OperandSize::S32 => asm::inst::vunpcklps_b::new(dst, src1, address).into(),
2274            _ => unimplemented!(),
2275        };
2276        self.emit(Inst::External { inst });
2277    }
2278
2279    /// Unpacks and interleaves high order data of floats in `src1` and `src2`
2280    /// and puts the results in `dst`.
2281    pub fn xmm_vunpckhp_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {
2282        let dst: WritableXmm = dst.map(|r| r.into());
2283        let inst = match size {
2284            OperandSize::S32 => asm::inst::vunpckhps_b::new(dst, src1, src2).into(),
2285            _ => unimplemented!(),
2286        };
2287        self.emit(Inst::External { inst });
2288    }
2289
2290    /// Unpacks and interleaves the lower lanes of vectors of integers in `src1`
2291    /// and `src2` and puts the results in `dst`.
2292    pub fn xmm_vpunpckl_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {
2293        let dst: WritableXmm = dst.map(|r| r.into());
2294        let inst = match size {
2295            OperandSize::S8 => asm::inst::vpunpcklbw_b::new(dst, src1, src2).into(),
2296            OperandSize::S16 => asm::inst::vpunpcklwd_b::new(dst, src1, src2).into(),
2297            _ => unimplemented!(),
2298        };
2299        self.emit(Inst::External { inst });
2300    }
2301
2302    /// Unpacks and interleaves the higher lanes of vectors of integers in
2303    /// `src1` and `src2` and puts the results in `dst`.
2304    pub fn xmm_vpunpckh_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {
2305        let dst: WritableXmm = dst.map(|r| r.into());
2306        let inst = match size {
2307            OperandSize::S8 => asm::inst::vpunpckhbw_b::new(dst, src1, src2).into(),
2308            OperandSize::S16 => asm::inst::vpunpckhwd_b::new(dst, src1, src2).into(),
2309            _ => unimplemented!(),
2310        };
2311        self.emit(Inst::External { inst });
2312    }
2313
2314    pub(crate) fn xmm_rm_rvex3(
2315        &mut self,
2316        op: Avx512Opcode,
2317        src1: Reg,
2318        src2: Reg,
2319        dst: WritableReg,
2320    ) {
2321        self.emit(Inst::XmmRmREvex3 {
2322            op,
2323            // `src1` reuses `dst`, and is ignored in emission
2324            src1: dst.to_reg().into(),
2325            src2: src1.into(),
2326            src3: src2.into(),
2327            dst: dst.map(Into::into),
2328        });
2329    }
2330
2331    /// Creates a mask made up of the most significant bit of each byte of
2332    /// `src` and stores the result in `dst`.
2333    pub fn xmm_vpmovmsk_rr(
2334        &mut self,
2335        src: Reg,
2336        dst: WritableReg,
2337        src_size: OperandSize,
2338        dst_size: OperandSize,
2339    ) {
2340        assert_eq!(dst_size, OperandSize::S32);
2341        let dst: WritableGpr = dst.map(|r| r.into());
2342        let inst = match src_size {
2343            OperandSize::S8 => asm::inst::vpmovmskb_rm::new(dst, src).into(),
2344            _ => unimplemented!(),
2345        };
2346
2347        self.emit(Inst::External { inst });
2348    }
2349
2350    /// Creates a mask made up of the most significant bit of each byte of
2351    /// in `src` and stores the result in `dst`.
2352    pub fn xmm_vmovskp_rr(
2353        &mut self,
2354        src: Reg,
2355        dst: WritableReg,
2356        src_size: OperandSize,
2357        dst_size: OperandSize,
2358    ) {
2359        assert_eq!(dst_size, OperandSize::S32);
2360        let dst: WritableGpr = dst.map(|r| r.into());
2361        let inst = match src_size {
2362            OperandSize::S32 => asm::inst::vmovmskps_rm::new(dst, src).into(),
2363            OperandSize::S64 => asm::inst::vmovmskpd_rm::new(dst, src).into(),
2364            _ => unimplemented!(),
2365        };
2366
2367        self.emit(Inst::External { inst });
2368    }
2369
2370    /// Compute the absolute value of elements in vector `src` and put the
2371    /// results in `dst`.
2372    pub fn xmm_vpabs_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
2373        let dst: WritableXmm = dst.map(|r| r.into());
2374        let inst = match size {
2375            OperandSize::S8 => asm::inst::vpabsb_a::new(dst, src).into(),
2376            OperandSize::S16 => asm::inst::vpabsw_a::new(dst, src).into(),
2377            OperandSize::S32 => asm::inst::vpabsd_a::new(dst, src).into(),
2378            _ => unimplemented!(),
2379        };
2380        self.emit(Inst::External { inst });
2381    }
2382
2383    /// Arithmetically (sign preserving) right shift on vector in `src` by
2384    /// `amount` with result written to `dst`.
2385    pub fn xmm_vpsra_rrr(&mut self, src: Reg, amount: Reg, dst: WritableReg, size: OperandSize) {
2386        let dst: WritableXmm = dst.map(|r| r.into());
2387        let inst = match size {
2388            OperandSize::S16 => asm::inst::vpsraw_c::new(dst, src, amount).into(),
2389            OperandSize::S32 => asm::inst::vpsrad_c::new(dst, src, amount).into(),
2390            _ => unimplemented!(),
2391        };
2392        self.emit(Inst::External { inst });
2393    }
2394
2395    /// Arithmetically (sign preserving) right shift on vector in `src` by
2396    /// `imm` with result written to `dst`.
2397    pub fn xmm_vpsra_rri(&mut self, src: Reg, dst: WritableReg, imm: u32, size: OperandSize) {
2398        let dst: WritableXmm = dst.map(|r| r.into());
2399        let imm = u8::try_from(imm).expect("immediate must fit in 8 bits");
2400        let inst = match size {
2401            OperandSize::S32 => asm::inst::vpsrad_d::new(dst, src, imm).into(),
2402            _ => unimplemented!(),
2403        };
2404        self.emit(Inst::External { inst });
2405    }
2406
2407    /// Shift vector data left by `imm`.
2408    pub fn xmm_vpsll_rri(&mut self, src: Reg, dst: WritableReg, imm: u32, size: OperandSize) {
2409        let dst: WritableXmm = dst.map(|r| r.into());
2410        let imm = u8::try_from(imm).expect("immediate must fit in 8 bits");
2411        let inst = match size {
2412            OperandSize::S32 => asm::inst::vpslld_d::new(dst, src, imm).into(),
2413            OperandSize::S64 => asm::inst::vpsllq_d::new(dst, src, imm).into(),
2414            _ => unimplemented!(),
2415        };
2416        self.emit(Inst::External { inst });
2417    }
2418
2419    /// Shift vector data left by `amount`.
2420    pub fn xmm_vpsll_rrr(&mut self, src: Reg, amount: Reg, dst: WritableReg, size: OperandSize) {
2421        let dst: WritableXmm = dst.map(|r| r.into());
2422        let inst = match size {
2423            OperandSize::S16 => asm::inst::vpsllw_c::new(dst, src, amount).into(),
2424            OperandSize::S32 => asm::inst::vpslld_c::new(dst, src, amount).into(),
2425            OperandSize::S64 => asm::inst::vpsllq_c::new(dst, src, amount).into(),
2426            _ => unimplemented!(),
2427        };
2428        self.emit(Inst::External { inst });
2429    }
2430
2431    /// Shift vector data right by `imm`.
2432    pub fn xmm_vpsrl_rri(&mut self, src: Reg, dst: WritableReg, imm: u32, size: OperandSize) {
2433        let dst: WritableXmm = dst.map(|r| r.into());
2434        let imm = u8::try_from(imm).expect("immediate must fit in 8 bits");
2435        let inst = match size {
2436            OperandSize::S16 => asm::inst::vpsrlw_d::new(dst, src, imm).into(),
2437            OperandSize::S32 => asm::inst::vpsrld_d::new(dst, src, imm).into(),
2438            OperandSize::S64 => asm::inst::vpsrlq_d::new(dst, src, imm).into(),
2439            _ => unimplemented!(),
2440        };
2441        self.emit(Inst::External { inst });
2442    }
2443
2444    /// Shift vector data right by `amount`.
2445    pub fn xmm_vpsrl_rrr(&mut self, src: Reg, amount: Reg, dst: WritableReg, size: OperandSize) {
2446        let dst: WritableXmm = dst.map(|r| r.into());
2447        let inst = match size {
2448            OperandSize::S16 => asm::inst::vpsrlw_c::new(dst, src, amount).into(),
2449            OperandSize::S32 => asm::inst::vpsrld_c::new(dst, src, amount).into(),
2450            OperandSize::S64 => asm::inst::vpsrlq_c::new(dst, src, amount).into(),
2451            _ => unimplemented!(),
2452        };
2453        self.emit(Inst::External { inst });
2454    }
2455
2456    /// Perform an `and` operation on vectors of floats in `src1` and `src2`
2457    /// and put the results in `dst`.
2458    pub fn xmm_vandp_rrm(
2459        &mut self,
2460        src1: Reg,
2461        src2: &Address,
2462        dst: WritableReg,
2463        size: OperandSize,
2464    ) {
2465        let dst: WritableXmm = dst.map(|r| r.into());
2466        let address = Self::to_synthetic_amode(src2, MemFlags::trusted());
2467        let inst = match size {
2468            OperandSize::S32 => asm::inst::vandps_b::new(dst, src1, address).into(),
2469            OperandSize::S64 => asm::inst::vandpd_b::new(dst, src1, address).into(),
2470            _ => unimplemented!(),
2471        };
2472        self.emit(Inst::External { inst });
2473    }
2474
2475    /// Perform an `and` operation on vectors of floats in `src1` and `src2`
2476    /// and put the results in `dst`.
2477    pub fn xmm_vandp_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {
2478        let dst: WritableXmm = dst.map(|r| r.into());
2479        let inst = match size {
2480            OperandSize::S32 => asm::inst::vandps_b::new(dst, src1, src2).into(),
2481            OperandSize::S64 => asm::inst::vandpd_b::new(dst, src1, src2).into(),
2482            _ => unimplemented!(),
2483        };
2484        self.emit(Inst::External { inst });
2485    }
2486
2487    /// Performs a bitwise `and` operation on the vectors in `src1` and `src2`
2488    /// and stores the results in `dst`.
2489    pub fn xmm_vpand_rrm(&mut self, src1: Reg, src2: &Address, dst: WritableReg) {
2490        let dst: WritableXmm = dst.map(|r| r.into());
2491        let address = Self::to_synthetic_amode(&src2, MemFlags::trusted());
2492        let inst = asm::inst::vpand_b::new(dst, src1, address).into();
2493        self.emit(Inst::External { inst });
2494    }
2495
2496    /// Performs a bitwise `and` operation on the vectors in `src1` and `src2`
2497    /// and stores the results in `dst`.
2498    pub fn xmm_vpand_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg) {
2499        let dst: WritableXmm = dst.map(|r| r.into());
2500        let inst = asm::inst::vpand_b::new(dst, src1, src2).into();
2501        self.emit(Inst::External { inst });
2502    }
2503
2504    /// Perform an `and not` operation on vectors of floats in `src1` and
2505    /// `src2` and put the results in `dst`.
2506    pub fn xmm_vandnp_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {
2507        let dst: WritableXmm = dst.map(|r| r.into());
2508        let inst = match size {
2509            OperandSize::S32 => asm::inst::vandnps_b::new(dst, src1, src2).into(),
2510            OperandSize::S64 => asm::inst::vandnpd_b::new(dst, src1, src2).into(),
2511            _ => unimplemented!(),
2512        };
2513        self.emit(Inst::External { inst });
2514    }
2515
2516    /// Perform an `and not` operation on vectors in `src1` and `src2` and put
2517    /// the results in `dst`.
2518    pub fn xmm_vpandn_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg) {
2519        let dst: WritableXmm = dst.map(|r| r.into());
2520        let inst = asm::inst::vpandn_b::new(dst, src1, src2).into();
2521        self.emit(Inst::External { inst });
2522    }
2523
2524    /// Perform an or operation for the vectors of floats in `src1` and `src2`
2525    /// and put the results in `dst`.
2526    pub fn xmm_vorp_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {
2527        let dst: WritableXmm = dst.map(|r| r.into());
2528        let inst = match size {
2529            OperandSize::S32 => asm::inst::vorps_b::new(dst, src1, src2).into(),
2530            OperandSize::S64 => asm::inst::vorpd_b::new(dst, src1, src2).into(),
2531            _ => unimplemented!(),
2532        };
2533        self.emit(Inst::External { inst });
2534    }
2535
2536    /// Bitwise OR of `src1` and `src2`.
2537    pub fn xmm_vpor_rrr(&mut self, dst: WritableReg, src1: Reg, src2: Reg) {
2538        let dst: WritableXmm = dst.map(|r| r.into());
2539        let inst = asm::inst::vpor_b::new(dst, src1, src2).into();
2540        self.emit(Inst::External { inst });
2541    }
2542
2543    /// Bitwise logical xor of vectors of floats in `src1` and `src2` and puts
2544    /// the results in `dst`.
2545    pub fn xmm_vxorp_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {
2546        let dst: WritableXmm = dst.map(|r| r.into());
2547        let inst = match size {
2548            OperandSize::S32 => asm::inst::vxorps_b::new(dst, src1, src2).into(),
2549            OperandSize::S64 => asm::inst::vxorpd_b::new(dst, src1, src2).into(),
2550            _ => unimplemented!(),
2551        };
2552        self.emit(Inst::External { inst });
2553    }
2554
2555    /// Perform a logical on vector in `src` and in `address` and put the
2556    /// results in `dst`.
2557    pub fn xmm_vpxor_rmr(&mut self, src: Reg, address: &Address, dst: WritableReg) {
2558        let dst: WritableXmm = dst.map(|r| r.into());
2559        let address = Self::to_synthetic_amode(address, MemFlags::trusted());
2560        let inst = asm::inst::vpxor_b::new(dst, src, address).into();
2561        self.emit(Inst::External { inst });
2562    }
2563
2564    /// Perform a logical on vectors in `src1` and `src2` and put the results in
2565    /// `dst`.
2566    pub fn xmm_vpxor_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg) {
2567        let dst: WritableXmm = dst.map(|r| r.into());
2568        let inst = asm::inst::vpxor_b::new(dst, src1, src2).into();
2569        self.emit(Inst::External { inst });
2570    }
2571
2572    /// Perform a max operation across two vectors of floats and put the
2573    /// results in `dst`.
2574    pub fn xmm_vmaxp_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {
2575        let dst: WritableXmm = dst.map(|r| r.into());
2576        let inst = match size {
2577            OperandSize::S32 => asm::inst::vmaxps_b::new(dst, src1, src2).into(),
2578            OperandSize::S64 => asm::inst::vmaxpd_b::new(dst, src1, src2).into(),
2579            _ => unimplemented!(),
2580        };
2581        self.emit(Inst::External { inst });
2582    }
2583
2584    // Perform a min operation across two vectors of floats and put the
2585    // results in `dst`.
2586    pub fn xmm_vminp_rrm(
2587        &mut self,
2588        src1: Reg,
2589        src2: &Address,
2590        dst: WritableReg,
2591        size: OperandSize,
2592    ) {
2593        let dst: WritableXmm = dst.map(|r| r.into());
2594        let address = Self::to_synthetic_amode(src2, MemFlags::trusted());
2595        let inst = match size {
2596            OperandSize::S32 => asm::inst::vminps_b::new(dst, src1, address).into(),
2597            OperandSize::S64 => asm::inst::vminpd_b::new(dst, src1, address).into(),
2598            _ => unimplemented!(),
2599        };
2600        self.emit(Inst::External { inst });
2601    }
2602
2603    // Perform a min operation across two vectors of floats and put the
2604    // results in `dst`.
2605    pub fn xmm_vminp_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {
2606        let dst: WritableXmm = dst.map(|r| r.into());
2607        let inst = match size {
2608            OperandSize::S32 => asm::inst::vminps_b::new(dst, src1, src2).into(),
2609            OperandSize::S64 => asm::inst::vminpd_b::new(dst, src1, src2).into(),
2610            _ => unimplemented!(),
2611        };
2612        self.emit(Inst::External { inst });
2613    }
2614
2615    // Round a vector of floats.
2616    pub fn xmm_vroundp_rri(
2617        &mut self,
2618        src: Reg,
2619        dst: WritableReg,
2620        mode: VroundMode,
2621        size: OperandSize,
2622    ) {
2623        let dst: WritableXmm = dst.map(|r| r.into());
2624        let imm = match mode {
2625            VroundMode::TowardNearest => 0,
2626            VroundMode::TowardNegativeInfinity => 1,
2627            VroundMode::TowardPositiveInfinity => 2,
2628            VroundMode::TowardZero => 3,
2629        };
2630
2631        let inst = match size {
2632            OperandSize::S32 => asm::inst::vroundps_rmi::new(dst, src, imm).into(),
2633            OperandSize::S64 => asm::inst::vroundpd_rmi::new(dst, src, imm).into(),
2634            _ => unimplemented!(),
2635        };
2636
2637        self.emit(Inst::External { inst });
2638    }
2639
2640    /// Shuffle of vectors of floats.
2641    pub fn xmm_vshufp_rrri(
2642        &mut self,
2643        src1: Reg,
2644        src2: Reg,
2645        dst: WritableReg,
2646        imm: u8,
2647        size: OperandSize,
2648    ) {
2649        let dst: WritableXmm = dst.map(|r| r.into());
2650        let inst = match size {
2651            OperandSize::S32 => asm::inst::vshufps_b::new(dst, src1, src2, imm).into(),
2652            _ => unimplemented!(),
2653        };
2654        self.emit(Inst::External { inst });
2655    }
2656
2657    /// Each lane in `src1` is multiplied by the corresponding lane in `src2`
2658    /// producing intermediate 32-bit operands. Each intermediate 32-bit
2659    /// operand is truncated to 18 most significant bits. Rounding is performed
2660    /// by adding 1 to the least significant bit of the 18-bit intermediate
2661    /// result. The 16 bits immediately to the right of the most significant
2662    /// bit of each 18-bit intermediate result is placed in each lane of `dst`.
2663    pub fn xmm_vpmulhrs_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {
2664        let dst: WritableXmm = dst.map(|r| r.into());
2665        let inst = match size {
2666            OperandSize::S16 => asm::inst::vpmulhrsw_b::new(dst, src1, src2).into(),
2667            _ => unimplemented!(),
2668        };
2669        self.emit(Inst::External { inst });
2670    }
2671
2672    pub fn xmm_vpmuldq_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg) {
2673        let dst: WritableXmm = dst.map(|r| r.into());
2674        let inst = asm::inst::vpmuldq_b::new(dst, src1, src2).into();
2675        self.emit(Inst::External { inst });
2676    }
2677
2678    pub fn xmm_vpmuludq_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg) {
2679        let dst: WritableXmm = dst.map(|r| r.into());
2680        let inst = asm::inst::vpmuludq_b::new(dst, src1, src2).into();
2681        self.emit(Inst::External { inst });
2682    }
2683
2684    pub fn xmm_vpmull_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {
2685        let dst: WritableXmm = dst.map(|r| r.into());
2686        let inst = match size {
2687            OperandSize::S16 => asm::inst::vpmullw_b::new(dst, src1, src2).into(),
2688            OperandSize::S32 => asm::inst::vpmulld_b::new(dst, src1, src2).into(),
2689            _ => unimplemented!(),
2690        };
2691        self.emit(Inst::External { inst });
2692    }
2693
2694    pub fn xmm_vmulp_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {
2695        let dst: WritableXmm = dst.map(|r| r.into());
2696        let inst = match size {
2697            OperandSize::S32 => asm::inst::vmulps_b::new(dst, src1, src2).into(),
2698            OperandSize::S64 => asm::inst::vmulpd_b::new(dst, src1, src2).into(),
2699            _ => unimplemented!(),
2700        };
2701        self.emit(Inst::External { inst });
2702    }
2703
2704    /// Perform an average operation for the vector of unsigned integers in
2705    /// `src1` and `src2` and put the results in `dst`.
2706    pub fn xmm_vpavg_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {
2707        let dst: WritableXmm = dst.map(|r| r.into());
2708        let inst = match size {
2709            OperandSize::S8 => asm::inst::vpavgb_b::new(dst, src1, src2).into(),
2710            OperandSize::S16 => asm::inst::vpavgw_b::new(dst, src1, src2).into(),
2711            _ => unimplemented!(),
2712        };
2713        self.emit(Inst::External { inst });
2714    }
2715
2716    /// Divide the vector of floats in `src1` by the vector of floats in `src2`
2717    /// and put the results in `dst`.
2718    pub fn xmm_vdivp_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {
2719        let dst: WritableXmm = dst.map(|r| r.into());
2720        let inst = match size {
2721            OperandSize::S32 => asm::inst::vdivps_b::new(dst, src1, src2).into(),
2722            OperandSize::S64 => asm::inst::vdivpd_b::new(dst, src1, src2).into(),
2723            _ => unimplemented!(),
2724        };
2725        self.emit(Inst::External { inst });
2726    }
2727
2728    /// Compute square roots of vector of floats in `src` and put the results
2729    /// in `dst`.
2730    pub fn xmm_vsqrtp_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
2731        let dst: WritableXmm = dst.map(|r| r.into());
2732        let inst = match size {
2733            OperandSize::S32 => asm::inst::vsqrtps_b::new(dst, src).into(),
2734            OperandSize::S64 => asm::inst::vsqrtpd_b::new(dst, src).into(),
2735            _ => unimplemented!(),
2736        };
2737        self.emit(Inst::External { inst });
2738    }
2739
2740    /// Multiply and add packed signed and unsigned bytes.
2741    pub fn xmm_vpmaddubsw_rmr(&mut self, src: Reg, address: &Address, dst: WritableReg) {
2742        let dst: WritableXmm = dst.map(|r| r.into());
2743        let address = Self::to_synthetic_amode(address, MemFlags::trusted());
2744        let inst = asm::inst::vpmaddubsw_b::new(dst, src, address).into();
2745        self.emit(Inst::External { inst });
2746    }
2747
2748    /// Multiply and add packed signed and unsigned bytes.
2749    pub fn xmm_vpmaddubsw_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg) {
2750        let dst: WritableXmm = dst.map(|r| r.into());
2751        let inst = asm::inst::vpmaddubsw_b::new(dst, src1, src2).into();
2752        self.emit(Inst::External { inst });
2753    }
2754
2755    /// Multiple and add packed integers.
2756    pub fn xmm_vpmaddwd_rmr(&mut self, src: Reg, address: &Address, dst: WritableReg) {
2757        let dst: WritableXmm = dst.map(|r| r.into());
2758        let address = Self::to_synthetic_amode(address, MemFlags::trusted());
2759        let inst = asm::inst::vpmaddwd_b::new(dst, src, address).into();
2760        self.emit(Inst::External { inst });
2761    }
2762
2763    /// Multiple and add packed integers.
2764    pub fn xmm_vpmaddwd_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg) {
2765        let dst: WritableXmm = dst.map(|r| r.into());
2766        let inst = asm::inst::vpmaddwd_b::new(dst, src1, src2).into();
2767        self.emit(Inst::External { inst });
2768    }
2769}
2770
2771/// Captures the region in a MachBuffer where an add-with-immediate instruction would be emitted,
2772/// but the immediate is not yet known. Currently, this implementation expects a 32-bit immediate,
2773/// so 8 and 16 bit operand sizes are not supported.
2774pub(crate) struct PatchableAddToReg {
2775    /// The region to be patched in the [`MachBuffer`]. It must contain a valid add instruction
2776    /// sequence, accepting a 32-bit immediate.
2777    region: PatchRegion,
2778
2779    /// The offset into the patchable region where the patchable constant begins.
2780    constant_offset: usize,
2781}
2782
2783impl PatchableAddToReg {
2784    /// Create a new [`PatchableAddToReg`] by capturing a region in the output buffer where the
2785    /// add-with-immediate occurs. The [`MachBuffer`] will have and add-with-immediate instruction
2786    /// present in that region, though it will add `0` until the `::finalize` method is called.
2787    ///
2788    /// Currently this implementation expects to be able to patch a 32-bit immediate, which means
2789    /// that 8 and 16-bit addition cannot be supported.
2790    pub(crate) fn new(reg: Reg, size: OperandSize, asm: &mut Assembler) -> Self {
2791        let open = asm.buffer_mut().start_patchable();
2792        let start = asm.buffer().cur_offset();
2793
2794        // Emit the opcode and register use for the add instruction.
2795        let reg = pair_gpr(Writable::from_reg(reg));
2796        let inst = match size {
2797            OperandSize::S32 => asm::inst::addl_mi::new(reg, 0_u32).into(),
2798            OperandSize::S64 => asm::inst::addq_mi_sxl::new(reg, 0_i32).into(),
2799            _ => {
2800                panic!(
2801                    "{}-bit addition is not supported, please see the comment on PatchableAddToReg::new",
2802                    size.num_bits(),
2803                )
2804            }
2805        };
2806        asm.emit(Inst::External { inst });
2807
2808        // The offset to the constant is the width of what was just emitted
2809        // minus 4, the width of the 32-bit immediate.
2810        let constant_offset = usize::try_from(asm.buffer().cur_offset() - start - 4).unwrap();
2811
2812        let region = asm.buffer_mut().end_patchable(open);
2813
2814        Self {
2815            region,
2816            constant_offset,
2817        }
2818    }
2819
2820    /// Patch the [`MachBuffer`] with the known constant to be added to the register. The final
2821    /// value is passed in as an i32, but the instruction encoding is fixed when
2822    /// [`PatchableAddToReg::new`] is called.
2823    pub(crate) fn finalize(self, val: i32, buffer: &mut MachBuffer<Inst>) {
2824        let slice = self.region.patch(buffer);
2825        debug_assert_eq!(slice.len(), self.constant_offset + 4);
2826        slice[self.constant_offset..].copy_from_slice(val.to_le_bytes().as_slice());
2827    }
2828}
winch_codegen/isa/x64/asm.rs

winch_codegen/isa/x64/
asm.rs