winch_codegen/isa/x64/
asm.rs

1//! Assembler library implementation for x64.
2
3use crate::{
4    constant_pool::ConstantPool,
5    isa::{CallingConvention, reg::Reg},
6    masm::{
7        DivKind, Extend, ExtendKind, ExtendType, IntCmpKind, MulWideKind, OperandSize, RemKind,
8        RoundingMode, ShiftKind, Signed, V128ExtendKind, V128LoadExtendKind, Zero,
9    },
10    reg::writable,
11};
12use cranelift_codegen::{
13    CallInfo, Final, MachBuffer, MachBufferFinalized, MachInst, MachInstEmit, MachInstEmitState,
14    MachLabel, PatchRegion, Writable,
15    ir::{ExternalName, MemFlags, SourceLoc, TrapCode, Type, UserExternalNameRef, types},
16    isa::{
17        unwind::UnwindInst,
18        x64::{
19            AtomicRmwSeqOp, EmitInfo, EmitState, Inst,
20            args::{
21                self, Amode, CC, ExtMode, FromWritableReg, Gpr, GprMem, GprMemImm, RegMem,
22                RegMemImm, SyntheticAmode, WritableGpr, WritableXmm, Xmm, XmmMem, XmmMemImm,
23            },
24            external::{PairedGpr, PairedXmm},
25            settings as x64_settings,
26        },
27    },
28    settings,
29};
30
31use crate::reg::WritableReg;
32use cranelift_assembler_x64 as asm;
33
34use super::address::Address;
35use smallvec::SmallVec;
36
37// Conversions between winch-codegen x64 types and cranelift-codegen x64 types.
38
39impl From<Reg> for RegMemImm {
40    fn from(reg: Reg) -> Self {
41        RegMemImm::reg(reg.into())
42    }
43}
44
45impl From<Reg> for RegMem {
46    fn from(value: Reg) -> Self {
47        RegMem::Reg { reg: value.into() }
48    }
49}
50
51impl From<Reg> for WritableGpr {
52    fn from(reg: Reg) -> Self {
53        let writable = Writable::from_reg(reg.into());
54        WritableGpr::from_writable_reg(writable).expect("valid writable gpr")
55    }
56}
57
58impl From<Reg> for WritableXmm {
59    fn from(reg: Reg) -> Self {
60        let writable = Writable::from_reg(reg.into());
61        WritableXmm::from_writable_reg(writable).expect("valid writable xmm")
62    }
63}
64
65/// Convert a writable GPR register to the read-write pair expected by
66/// `cranelift-codegen`.
67fn pair_gpr(reg: WritableReg) -> PairedGpr {
68    assert!(reg.to_reg().is_int());
69    let read = Gpr::unwrap_new(reg.to_reg().into());
70    let write = WritableGpr::from_reg(reg.to_reg().into());
71    PairedGpr { read, write }
72}
73
74impl From<Reg> for asm::Gpr<Gpr> {
75    fn from(reg: Reg) -> Self {
76        asm::Gpr::new(reg.into())
77    }
78}
79
80impl From<Reg> for asm::GprMem<Gpr, Gpr> {
81    fn from(reg: Reg) -> Self {
82        asm::GprMem::Gpr(reg.into())
83    }
84}
85
86/// Convert a writable XMM register to the read-write pair expected by
87/// `cranelift-codegen`.
88fn pair_xmm(reg: WritableReg) -> PairedXmm {
89    assert!(reg.to_reg().is_float());
90    let read = Xmm::unwrap_new(reg.to_reg().into());
91    let write = WritableXmm::from_reg(reg.to_reg().into());
92    PairedXmm { read, write }
93}
94
95impl From<Reg> for asm::Xmm<Xmm> {
96    fn from(reg: Reg) -> Self {
97        asm::Xmm::new(reg.into())
98    }
99}
100
101impl From<Reg> for asm::XmmMem<Xmm, Gpr> {
102    fn from(reg: Reg) -> Self {
103        asm::XmmMem::Xmm(reg.into())
104    }
105}
106
107impl From<Reg> for Gpr {
108    fn from(reg: Reg) -> Self {
109        Gpr::unwrap_new(reg.into())
110    }
111}
112
113impl From<Reg> for GprMem {
114    fn from(value: Reg) -> Self {
115        GprMem::unwrap_new(value.into())
116    }
117}
118
119impl From<Reg> for GprMemImm {
120    fn from(reg: Reg) -> Self {
121        GprMemImm::unwrap_new(reg.into())
122    }
123}
124
125impl From<Reg> for Xmm {
126    fn from(reg: Reg) -> Self {
127        Xmm::unwrap_new(reg.into())
128    }
129}
130
131impl From<Reg> for XmmMem {
132    fn from(value: Reg) -> Self {
133        XmmMem::unwrap_new(value.into())
134    }
135}
136
137impl From<Reg> for XmmMemImm {
138    fn from(value: Reg) -> Self {
139        XmmMemImm::unwrap_new(value.into())
140    }
141}
142
143impl From<OperandSize> for args::OperandSize {
144    fn from(size: OperandSize) -> Self {
145        match size {
146            OperandSize::S8 => Self::Size8,
147            OperandSize::S16 => Self::Size16,
148            OperandSize::S32 => Self::Size32,
149            OperandSize::S64 => Self::Size64,
150            s => panic!("Invalid operand size {s:?}"),
151        }
152    }
153}
154
155impl From<IntCmpKind> for CC {
156    fn from(value: IntCmpKind) -> Self {
157        match value {
158            IntCmpKind::Eq => CC::Z,
159            IntCmpKind::Ne => CC::NZ,
160            IntCmpKind::LtS => CC::L,
161            IntCmpKind::LtU => CC::B,
162            IntCmpKind::GtS => CC::NLE,
163            IntCmpKind::GtU => CC::NBE,
164            IntCmpKind::LeS => CC::LE,
165            IntCmpKind::LeU => CC::BE,
166            IntCmpKind::GeS => CC::NL,
167            IntCmpKind::GeU => CC::NB,
168        }
169    }
170}
171
172impl<T: ExtendType> From<Extend<T>> for ExtMode {
173    fn from(value: Extend<T>) -> Self {
174        match value {
175            Extend::I32Extend8 => ExtMode::BL,
176            Extend::I32Extend16 => ExtMode::WL,
177            Extend::I64Extend8 => ExtMode::BQ,
178            Extend::I64Extend16 => ExtMode::WQ,
179            Extend::I64Extend32 => ExtMode::LQ,
180            Extend::__Kind(_) => unreachable!(),
181        }
182    }
183}
184
185impl From<ExtendKind> for ExtMode {
186    fn from(value: ExtendKind) -> Self {
187        match value {
188            ExtendKind::Signed(s) => s.into(),
189            ExtendKind::Unsigned(u) => u.into(),
190        }
191    }
192}
193
194/// Kinds of extends supported by `vpmov`.
195pub(super) enum VpmovKind {
196    /// Sign extends 8 lanes of 8-bit integers to 8 lanes of 16-bit integers.
197    E8x8S,
198    /// Zero extends 8 lanes of 8-bit integers to 8 lanes of 16-bit integers.
199    E8x8U,
200    /// Sign extends 4 lanes of 16-bit integers to 4 lanes of 32-bit integers.
201    E16x4S,
202    /// Zero extends 4 lanes of 16-bit integers to 4 lanes of 32-bit integers.
203    E16x4U,
204    /// Sign extends 2 lanes of 32-bit integers to 2 lanes of 64-bit integers.
205    E32x2S,
206    /// Zero extends 2 lanes of 32-bit integers to 2 lanes of 64-bit integers.
207    E32x2U,
208}
209
210impl From<V128LoadExtendKind> for VpmovKind {
211    fn from(value: V128LoadExtendKind) -> Self {
212        match value {
213            V128LoadExtendKind::E8x8S => Self::E8x8S,
214            V128LoadExtendKind::E8x8U => Self::E8x8U,
215            V128LoadExtendKind::E16x4S => Self::E16x4S,
216            V128LoadExtendKind::E16x4U => Self::E16x4U,
217            V128LoadExtendKind::E32x2S => Self::E32x2S,
218            V128LoadExtendKind::E32x2U => Self::E32x2U,
219        }
220    }
221}
222
223impl From<V128ExtendKind> for VpmovKind {
224    fn from(value: V128ExtendKind) -> Self {
225        match value {
226            V128ExtendKind::LowI8x16S | V128ExtendKind::HighI8x16S => Self::E8x8S,
227            V128ExtendKind::LowI8x16U => Self::E8x8U,
228            V128ExtendKind::LowI16x8S | V128ExtendKind::HighI16x8S => Self::E16x4S,
229            V128ExtendKind::LowI16x8U => Self::E16x4U,
230            V128ExtendKind::LowI32x4S | V128ExtendKind::HighI32x4S => Self::E32x2S,
231            V128ExtendKind::LowI32x4U => Self::E32x2U,
232            _ => unimplemented!(),
233        }
234    }
235}
236
237/// Kinds of comparisons supported by `vcmp`.
238pub(super) enum VcmpKind {
239    /// Equal comparison.
240    Eq,
241    /// Not equal comparison.
242    Ne,
243    /// Less than comparison.
244    Lt,
245    /// Less than or equal comparison.
246    Le,
247    /// Unordered comparison. Sets result to all 1s if either source operand is
248    /// NaN.
249    Unord,
250}
251
252/// Kinds of conversions supported by `vcvt`.
253pub(super) enum VcvtKind {
254    /// Converts 32-bit integers to 32-bit floats.
255    I32ToF32,
256    /// Converts doubleword integers to double precision floats.
257    I32ToF64,
258    /// Converts double precision floats to single precision floats.
259    F64ToF32,
260    // Converts double precision floats to 32-bit integers.
261    F64ToI32,
262    /// Converts single precision floats to double precision floats.
263    F32ToF64,
264    /// Converts single precision floats to 32-bit integers.
265    F32ToI32,
266}
267
268/// Modes supported by `vround`.
269pub(crate) enum VroundMode {
270    /// Rounds toward nearest (ties to even).
271    TowardNearest,
272    /// Rounds toward negative infinity.
273    TowardNegativeInfinity,
274    /// Rounds toward positive infinity.
275    TowardPositiveInfinity,
276    /// Rounds toward zero.
277    TowardZero,
278}
279
280/// Low level assembler implementation for x64.
281pub(crate) struct Assembler {
282    /// The machine instruction buffer.
283    buffer: MachBuffer<Inst>,
284    /// Constant emission information.
285    emit_info: EmitInfo,
286    /// Emission state.
287    emit_state: EmitState,
288    /// x64 flags.
289    isa_flags: x64_settings::Flags,
290    /// Constant pool.
291    pool: ConstantPool,
292}
293
294impl Assembler {
295    /// Create a new x64 assembler.
296    pub fn new(shared_flags: settings::Flags, isa_flags: x64_settings::Flags) -> Self {
297        Self {
298            buffer: MachBuffer::<Inst>::new(),
299            emit_state: Default::default(),
300            emit_info: EmitInfo::new(shared_flags, isa_flags.clone()),
301            pool: ConstantPool::new(),
302            isa_flags,
303        }
304    }
305
306    /// Get a mutable reference to underlying
307    /// machine buffer.
308    pub fn buffer_mut(&mut self) -> &mut MachBuffer<Inst> {
309        &mut self.buffer
310    }
311
312    /// Get a reference to the underlying machine buffer.
313    pub fn buffer(&self) -> &MachBuffer<Inst> {
314        &self.buffer
315    }
316
317    /// Adds a constant to the constant pool and returns its address.
318    pub fn add_constant(&mut self, constant: &[u8]) -> Address {
319        let handle = self.pool.register(constant, &mut self.buffer);
320        Address::constant(handle)
321    }
322
323    /// Load a floating point constant, using the constant pool.
324    pub fn load_fp_const(&mut self, dst: WritableReg, constant: &[u8], size: OperandSize) {
325        let addr = self.add_constant(constant);
326        self.xmm_mov_mr(&addr, dst, size, MemFlags::trusted());
327    }
328
329    /// Return the emitted code.
330    pub fn finalize(mut self, loc: Option<SourceLoc>) -> MachBufferFinalized<Final> {
331        let stencil = self
332            .buffer
333            .finish(&self.pool.constants(), self.emit_state.ctrl_plane_mut());
334        stencil.apply_base_srcloc(loc.unwrap_or_default())
335    }
336
337    fn emit(&mut self, inst: Inst) {
338        inst.emit(&mut self.buffer, &self.emit_info, &mut self.emit_state);
339    }
340
341    fn to_synthetic_amode(addr: &Address, memflags: MemFlags) -> SyntheticAmode {
342        match *addr {
343            Address::Offset { base, offset } => {
344                let amode = Amode::imm_reg(offset as i32, base.into()).with_flags(memflags);
345                SyntheticAmode::real(amode)
346            }
347            Address::Const(c) => SyntheticAmode::ConstantOffset(c),
348            Address::ImmRegRegShift {
349                simm32,
350                base,
351                index,
352                shift,
353            } => SyntheticAmode::Real(Amode::ImmRegRegShift {
354                simm32,
355                base: base.into(),
356                index: index.into(),
357                shift,
358                flags: memflags,
359            }),
360        }
361    }
362
363    /// Emit an unwind instruction.
364    pub fn unwind_inst(&mut self, inst: UnwindInst) {
365        self.emit(Inst::Unwind { inst })
366    }
367
368    /// Push register.
369    pub fn push_r(&mut self, reg: Reg) {
370        let inst = asm::inst::pushq_o::new(reg).into();
371        self.emit(Inst::External { inst });
372    }
373
374    /// Pop to register.
375    pub fn pop_r(&mut self, dst: WritableReg) {
376        let writable: WritableGpr = dst.map(Into::into);
377        let inst = asm::inst::popq_o::new(writable).into();
378        self.emit(Inst::External { inst });
379    }
380
381    /// Return instruction.
382    pub fn ret(&mut self) {
383        let inst = asm::inst::retq_zo::new().into();
384        self.emit(Inst::External { inst });
385    }
386
387    /// Register-to-register move.
388    pub fn mov_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
389        let dst: WritableGpr = dst.map(|r| r.into());
390        let inst = match size {
391            OperandSize::S8 => asm::inst::movb_mr::new(dst, src).into(),
392            OperandSize::S16 => asm::inst::movw_mr::new(dst, src).into(),
393            OperandSize::S32 => asm::inst::movl_mr::new(dst, src).into(),
394            OperandSize::S64 => asm::inst::movq_mr::new(dst, src).into(),
395            _ => unreachable!(),
396        };
397        self.emit(Inst::External { inst });
398    }
399
400    /// Register-to-memory move.
401    pub fn mov_rm(&mut self, src: Reg, addr: &Address, size: OperandSize, flags: MemFlags) {
402        assert!(addr.is_offset());
403        let dst = Self::to_synthetic_amode(addr, flags);
404        let inst = match size {
405            OperandSize::S8 => asm::inst::movb_mr::new(dst, src).into(),
406            OperandSize::S16 => asm::inst::movw_mr::new(dst, src).into(),
407            OperandSize::S32 => asm::inst::movl_mr::new(dst, src).into(),
408            OperandSize::S64 => asm::inst::movq_mr::new(dst, src).into(),
409            _ => unreachable!(),
410        };
411        self.emit(Inst::External { inst });
412    }
413
414    /// Immediate-to-memory move.
415    pub fn mov_im(&mut self, src: i32, addr: &Address, size: OperandSize, flags: MemFlags) {
416        assert!(addr.is_offset());
417        let dst = Self::to_synthetic_amode(addr, flags);
418        let inst = match size {
419            OperandSize::S8 => {
420                let src = i8::try_from(src).unwrap();
421                asm::inst::movb_mi::new(dst, src.cast_unsigned()).into()
422            }
423            OperandSize::S16 => {
424                let src = i16::try_from(src).unwrap();
425                asm::inst::movw_mi::new(dst, src.cast_unsigned()).into()
426            }
427            OperandSize::S32 => asm::inst::movl_mi::new(dst, src.cast_unsigned()).into(),
428            OperandSize::S64 => asm::inst::movq_mi_sxl::new(dst, src).into(),
429            _ => unreachable!(),
430        };
431        self.emit(Inst::External { inst });
432    }
433
434    /// Immediate-to-register move.
435    pub fn mov_ir(&mut self, imm: u64, dst: WritableReg, size: OperandSize) {
436        self.emit(Inst::imm(size.into(), imm, dst.map(Into::into)));
437    }
438
439    /// Zero-extend memory-to-register load.
440    pub fn movzx_mr(
441        &mut self,
442        addr: &Address,
443        dst: WritableReg,
444        ext: Option<Extend<Zero>>,
445        memflags: MemFlags,
446    ) {
447        let src = Self::to_synthetic_amode(addr, memflags);
448
449        if let Some(ext) = ext {
450            let dst = WritableGpr::from_reg(dst.to_reg().into());
451            let inst = match ext.into() {
452                ExtMode::BL => asm::inst::movzbl_rm::new(dst, src).into(),
453                ExtMode::BQ => asm::inst::movzbq_rm::new(dst, src).into(),
454                ExtMode::WL => asm::inst::movzwl_rm::new(dst, src).into(),
455                ExtMode::WQ => asm::inst::movzwq_rm::new(dst, src).into(),
456                ExtMode::LQ => {
457                    // This instruction selection may seem strange but is
458                    // correct in 64-bit mode: section 3.4.1.1 of the Intel
459                    // manual says that "32-bit operands generate a 32-bit
460                    // result, zero-extended to a 64-bit result in the
461                    // destination general-purpose register." This is applicable
462                    // beyond `mov` but we use this fact to zero-extend `src`
463                    // into `dst`.
464                    asm::inst::movl_rm::new(dst, src).into()
465                }
466            };
467            self.emit(Inst::External { inst });
468        } else {
469            let dst = WritableGpr::from_reg(dst.to_reg().into());
470            let inst = asm::inst::movq_rm::new(dst, src).into();
471            self.emit(Inst::External { inst });
472        }
473    }
474
475    // Sign-extend memory-to-register load.
476    pub fn movsx_mr(
477        &mut self,
478        addr: &Address,
479        dst: WritableReg,
480        ext: Extend<Signed>,
481        memflags: MemFlags,
482    ) {
483        let src = Self::to_synthetic_amode(addr, memflags);
484        let dst = WritableGpr::from_reg(dst.to_reg().into());
485        let inst = match ext.into() {
486            ExtMode::BL => asm::inst::movsbl_rm::new(dst, src).into(),
487            ExtMode::BQ => asm::inst::movsbq_rm::new(dst, src).into(),
488            ExtMode::WL => asm::inst::movswl_rm::new(dst, src).into(),
489            ExtMode::WQ => asm::inst::movswq_rm::new(dst, src).into(),
490            ExtMode::LQ => asm::inst::movslq_rm::new(dst, src).into(),
491        };
492        self.emit(Inst::External { inst });
493    }
494
495    /// Register-to-register move with zero extension.
496    pub fn movzx_rr(&mut self, src: Reg, dst: WritableReg, kind: Extend<Zero>) {
497        let dst = WritableGpr::from_reg(dst.to_reg().into());
498        let inst = match kind.into() {
499            ExtMode::BL => asm::inst::movzbl_rm::new(dst, src).into(),
500            ExtMode::BQ => asm::inst::movzbq_rm::new(dst, src).into(),
501            ExtMode::WL => asm::inst::movzwl_rm::new(dst, src).into(),
502            ExtMode::WQ => asm::inst::movzwq_rm::new(dst, src).into(),
503            ExtMode::LQ => {
504                // This instruction selection may seem strange but is correct in
505                // 64-bit mode: section 3.4.1.1 of the Intel manual says that
506                // "32-bit operands generate a 32-bit result, zero-extended to a
507                // 64-bit result in the destination general-purpose register."
508                // This is applicable beyond `mov` but we use this fact to
509                // zero-extend `src` into `dst`.
510                asm::inst::movl_rm::new(dst, src).into()
511            }
512        };
513        self.emit(Inst::External { inst });
514    }
515
516    /// Register-to-register move with sign extension.
517    pub fn movsx_rr(&mut self, src: Reg, dst: WritableReg, kind: Extend<Signed>) {
518        let dst = WritableGpr::from_reg(dst.to_reg().into());
519        let inst = match kind.into() {
520            ExtMode::BL => asm::inst::movsbl_rm::new(dst, src).into(),
521            ExtMode::BQ => asm::inst::movsbq_rm::new(dst, src).into(),
522            ExtMode::WL => asm::inst::movswl_rm::new(dst, src).into(),
523            ExtMode::WQ => asm::inst::movswq_rm::new(dst, src).into(),
524            ExtMode::LQ => asm::inst::movslq_rm::new(dst, src).into(),
525        };
526        self.emit(Inst::External { inst });
527    }
528
529    /// Integer register conditional move.
530    pub fn cmov(&mut self, src: Reg, dst: WritableReg, cc: IntCmpKind, size: OperandSize) {
531        use IntCmpKind::*;
532        use OperandSize::*;
533
534        let dst: WritableGpr = dst.map(Into::into);
535        let inst = match size {
536            S8 | S16 | S32 => match cc {
537                Eq => asm::inst::cmovel_rm::new(dst, src).into(),
538                Ne => asm::inst::cmovnel_rm::new(dst, src).into(),
539                LtS => asm::inst::cmovll_rm::new(dst, src).into(),
540                LtU => asm::inst::cmovbl_rm::new(dst, src).into(),
541                GtS => asm::inst::cmovgl_rm::new(dst, src).into(),
542                GtU => asm::inst::cmoval_rm::new(dst, src).into(),
543                LeS => asm::inst::cmovlel_rm::new(dst, src).into(),
544                LeU => asm::inst::cmovbel_rm::new(dst, src).into(),
545                GeS => asm::inst::cmovgel_rm::new(dst, src).into(),
546                GeU => asm::inst::cmovael_rm::new(dst, src).into(),
547            },
548            S64 => match cc {
549                Eq => asm::inst::cmoveq_rm::new(dst, src).into(),
550                Ne => asm::inst::cmovneq_rm::new(dst, src).into(),
551                LtS => asm::inst::cmovlq_rm::new(dst, src).into(),
552                LtU => asm::inst::cmovbq_rm::new(dst, src).into(),
553                GtS => asm::inst::cmovgq_rm::new(dst, src).into(),
554                GtU => asm::inst::cmovaq_rm::new(dst, src).into(),
555                LeS => asm::inst::cmovleq_rm::new(dst, src).into(),
556                LeU => asm::inst::cmovbeq_rm::new(dst, src).into(),
557                GeS => asm::inst::cmovgeq_rm::new(dst, src).into(),
558                GeU => asm::inst::cmovaeq_rm::new(dst, src).into(),
559            },
560            _ => unreachable!(),
561        };
562        self.emit(Inst::External { inst });
563    }
564
565    /// Single and double precision floating point
566    /// register-to-register move.
567    pub fn xmm_mov_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
568        let ty = match size {
569            OperandSize::S32 => types::F32,
570            OperandSize::S64 => types::F64,
571            OperandSize::S128 => types::I32X4,
572            OperandSize::S8 | OperandSize::S16 => unreachable!(),
573        };
574        self.emit(Inst::gen_move(dst.map(|r| r.into()), src.into(), ty));
575    }
576
577    /// Single and double precision floating point load.
578    pub fn xmm_mov_mr(
579        &mut self,
580        src: &Address,
581        dst: WritableReg,
582        size: OperandSize,
583        flags: MemFlags,
584    ) {
585        use OperandSize::*;
586
587        assert!(dst.to_reg().is_float());
588
589        let src = Self::to_synthetic_amode(src, flags);
590        let dst: WritableXmm = dst.map(|r| r.into());
591        let inst = match size {
592            S32 => asm::inst::movss_a_m::new(dst, src).into(),
593            S64 => asm::inst::movsd_a_m::new(dst, src).into(),
594            S128 => asm::inst::movdqu_a::new(dst, src).into(),
595            S8 | S16 => unreachable!(),
596        };
597        self.emit(Inst::External { inst });
598    }
599
600    /// Vector load and extend.
601    pub fn xmm_vpmov_mr(
602        &mut self,
603        src: &Address,
604        dst: WritableReg,
605        kind: VpmovKind,
606        flags: MemFlags,
607    ) {
608        assert!(dst.to_reg().is_float());
609        let src = Self::to_synthetic_amode(src, flags);
610        let dst: WritableXmm = dst.map(|r| r.into());
611        let inst = match kind {
612            VpmovKind::E8x8S => asm::inst::vpmovsxbw_a::new(dst, src).into(),
613            VpmovKind::E8x8U => asm::inst::vpmovzxbw_a::new(dst, src).into(),
614            VpmovKind::E16x4S => asm::inst::vpmovsxwd_a::new(dst, src).into(),
615            VpmovKind::E16x4U => asm::inst::vpmovzxwd_a::new(dst, src).into(),
616            VpmovKind::E32x2S => asm::inst::vpmovsxdq_a::new(dst, src).into(),
617            VpmovKind::E32x2U => asm::inst::vpmovzxdq_a::new(dst, src).into(),
618        };
619        self.emit(Inst::External { inst });
620    }
621
622    /// Extends vector of integers in `src` and puts results in `dst`.
623    pub fn xmm_vpmov_rr(&mut self, src: Reg, dst: WritableReg, kind: VpmovKind) {
624        let dst: WritableXmm = dst.map(|r| r.into());
625        let inst = match kind {
626            VpmovKind::E8x8S => asm::inst::vpmovsxbw_a::new(dst, src).into(),
627            VpmovKind::E8x8U => asm::inst::vpmovzxbw_a::new(dst, src).into(),
628            VpmovKind::E16x4S => asm::inst::vpmovsxwd_a::new(dst, src).into(),
629            VpmovKind::E16x4U => asm::inst::vpmovzxwd_a::new(dst, src).into(),
630            VpmovKind::E32x2S => asm::inst::vpmovsxdq_a::new(dst, src).into(),
631            VpmovKind::E32x2U => asm::inst::vpmovzxdq_a::new(dst, src).into(),
632        };
633        self.emit(Inst::External { inst });
634    }
635
636    /// Vector load and broadcast.
637    pub fn xmm_vpbroadcast_mr(
638        &mut self,
639        src: &Address,
640        dst: WritableReg,
641        size: OperandSize,
642        flags: MemFlags,
643    ) {
644        assert!(dst.to_reg().is_float());
645        let src = Self::to_synthetic_amode(src, flags);
646        let dst: WritableXmm = dst.map(|r| r.into());
647        let inst = match size {
648            OperandSize::S8 => asm::inst::vpbroadcastb_a::new(dst, src).into(),
649            OperandSize::S16 => asm::inst::vpbroadcastw_a::new(dst, src).into(),
650            OperandSize::S32 => asm::inst::vpbroadcastd_a::new(dst, src).into(),
651            _ => unimplemented!(),
652        };
653        self.emit(Inst::External { inst });
654    }
655
656    /// Value in `src` is broadcast into lanes of `size` in `dst`.
657    pub fn xmm_vpbroadcast_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
658        assert!(src.is_float() && dst.to_reg().is_float());
659        let dst: WritableXmm = dst.map(|r| r.into());
660        let inst = match size {
661            OperandSize::S8 => asm::inst::vpbroadcastb_a::new(dst, src).into(),
662            OperandSize::S16 => asm::inst::vpbroadcastw_a::new(dst, src).into(),
663            OperandSize::S32 => asm::inst::vpbroadcastd_a::new(dst, src).into(),
664            _ => unimplemented!(),
665        };
666        self.emit(Inst::External { inst });
667    }
668
669    /// Memory to register shuffle of bytes in vector.
670    pub fn xmm_vpshuf_mr(
671        &mut self,
672        src: &Address,
673        dst: WritableReg,
674        mask: u8,
675        size: OperandSize,
676        flags: MemFlags,
677    ) {
678        let dst: WritableXmm = dst.map(|r| r.into());
679        let src = Self::to_synthetic_amode(src, flags);
680        let inst = match size {
681            OperandSize::S32 => asm::inst::vpshufd_a::new(dst, src, mask).into(),
682            _ => unimplemented!(),
683        };
684        self.emit(Inst::External { inst });
685    }
686
687    /// Register to register shuffle of bytes in vector.
688    pub fn xmm_vpshuf_rr(&mut self, src: Reg, dst: WritableReg, mask: u8, size: OperandSize) {
689        let dst: WritableXmm = dst.map(|r| r.into());
690
691        let inst = match size {
692            OperandSize::S16 => asm::inst::vpshuflw_a::new(dst, src, mask).into(),
693            OperandSize::S32 => asm::inst::vpshufd_a::new(dst, src, mask).into(),
694            _ => unimplemented!(),
695        };
696
697        self.emit(Inst::External { inst });
698    }
699
700    /// Single and double precision floating point store.
701    pub fn xmm_mov_rm(&mut self, src: Reg, dst: &Address, size: OperandSize, flags: MemFlags) {
702        use OperandSize::*;
703
704        assert!(src.is_float());
705
706        let dst = Self::to_synthetic_amode(dst, flags);
707        let src: Xmm = src.into();
708        let inst = match size {
709            S32 => asm::inst::movss_c_m::new(dst, src).into(),
710            S64 => asm::inst::movsd_c_m::new(dst, src).into(),
711            S128 => asm::inst::movdqu_b::new(dst, src).into(),
712            S16 | S8 => unreachable!(),
713        };
714        self.emit(Inst::External { inst })
715    }
716
717    /// Floating point register conditional move.
718    pub fn xmm_cmov(&mut self, src: Reg, dst: WritableReg, cc: IntCmpKind, size: OperandSize) {
719        let dst: WritableXmm = dst.map(Into::into);
720        let ty = match size {
721            OperandSize::S32 => types::F32,
722            OperandSize::S64 => types::F64,
723            // Move the entire 128 bits via movdqa.
724            OperandSize::S128 => types::I32X4,
725            OperandSize::S8 | OperandSize::S16 => unreachable!(),
726        };
727
728        self.emit(Inst::XmmCmove {
729            ty,
730            cc: cc.into(),
731            consequent: Xmm::unwrap_new(src.into()),
732            alternative: dst.to_reg(),
733            dst,
734        })
735    }
736
737    /// Subtract register and register
738    pub fn sub_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
739        let dst = pair_gpr(dst);
740        let inst = match size {
741            OperandSize::S8 => asm::inst::subb_rm::new(dst, src).into(),
742            OperandSize::S16 => asm::inst::subw_rm::new(dst, src).into(),
743            OperandSize::S32 => asm::inst::subl_rm::new(dst, src).into(),
744            OperandSize::S64 => asm::inst::subq_rm::new(dst, src).into(),
745            OperandSize::S128 => unimplemented!(),
746        };
747        self.emit(Inst::External { inst });
748    }
749
750    /// Subtract immediate register.
751    pub fn sub_ir(&mut self, imm: i32, dst: WritableReg, size: OperandSize) {
752        let dst = pair_gpr(dst);
753        let inst = match size {
754            OperandSize::S8 => asm::inst::subb_mi::new(dst, u8::try_from(imm).unwrap()).into(),
755            OperandSize::S16 => asm::inst::subw_mi::new(dst, u16::try_from(imm).unwrap()).into(),
756            OperandSize::S32 => asm::inst::subl_mi::new(dst, imm as u32).into(),
757            OperandSize::S64 => asm::inst::subq_mi_sxl::new(dst, imm).into(),
758            OperandSize::S128 => unimplemented!(),
759        };
760        self.emit(Inst::External { inst });
761    }
762
763    /// "and" two registers.
764    pub fn and_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
765        let dst = pair_gpr(dst);
766        let inst = match size {
767            OperandSize::S8 => asm::inst::andb_rm::new(dst, src).into(),
768            OperandSize::S16 => asm::inst::andw_rm::new(dst, src).into(),
769            OperandSize::S32 => asm::inst::andl_rm::new(dst, src).into(),
770            OperandSize::S64 => asm::inst::andq_rm::new(dst, src).into(),
771            OperandSize::S128 => unimplemented!(),
772        };
773        self.emit(Inst::External { inst });
774    }
775
776    pub fn and_ir(&mut self, imm: i32, dst: WritableReg, size: OperandSize) {
777        let dst = pair_gpr(dst);
778        let inst = match size {
779            OperandSize::S8 => asm::inst::andb_mi::new(dst, u8::try_from(imm).unwrap()).into(),
780            OperandSize::S16 => asm::inst::andw_mi::new(dst, u16::try_from(imm).unwrap()).into(),
781            OperandSize::S32 => asm::inst::andl_mi::new(dst, imm as u32).into(),
782            OperandSize::S64 => asm::inst::andq_mi_sxl::new(dst, imm).into(),
783            OperandSize::S128 => unimplemented!(),
784        };
785        self.emit(Inst::External { inst });
786    }
787
788    /// "and" two float registers.
789    pub fn xmm_and_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
790        let dst = pair_xmm(dst);
791        let inst = match size {
792            OperandSize::S32 => asm::inst::andps_a::new(dst, src).into(),
793            OperandSize::S64 => asm::inst::andpd_a::new(dst, src).into(),
794            OperandSize::S8 | OperandSize::S16 | OperandSize::S128 => unreachable!(),
795        };
796        self.emit(Inst::External { inst });
797    }
798
799    /// "and not" two float registers.
800    pub fn xmm_andn_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
801        let dst = pair_xmm(dst);
802        let inst = match size {
803            OperandSize::S32 => asm::inst::andnps_a::new(dst, src).into(),
804            OperandSize::S64 => asm::inst::andnpd_a::new(dst, src).into(),
805            OperandSize::S8 | OperandSize::S16 | OperandSize::S128 => unreachable!(),
806        };
807        self.emit(Inst::External { inst });
808    }
809
810    pub fn gpr_to_xmm(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
811        let dst: WritableXmm = dst.map(|r| r.into());
812        let inst = match size {
813            OperandSize::S32 => asm::inst::movd_a::new(dst, src).into(),
814            OperandSize::S64 => asm::inst::movq_a::new(dst, src).into(),
815            OperandSize::S8 | OperandSize::S16 | OperandSize::S128 => unreachable!(),
816        };
817
818        self.emit(Inst::External { inst });
819    }
820
821    pub fn xmm_to_gpr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
822        let dst: WritableGpr = dst.map(Into::into);
823        let src: Xmm = src.into();
824        let inst = match size {
825            OperandSize::S32 => asm::inst::movd_b::new(dst, src).into(),
826            OperandSize::S64 => asm::inst::movq_b::new(dst, src).into(),
827            OperandSize::S8 | OperandSize::S16 | OperandSize::S128 => unreachable!(),
828        };
829
830        self.emit(Inst::External { inst })
831    }
832
833    /// Convert float to signed int.
834    pub fn cvt_float_to_sint_seq(
835        &mut self,
836        src: Reg,
837        dst: WritableReg,
838        tmp_gpr: Reg,
839        tmp_xmm: Reg,
840        src_size: OperandSize,
841        dst_size: OperandSize,
842        saturating: bool,
843    ) {
844        self.emit(Inst::CvtFloatToSintSeq {
845            dst_size: dst_size.into(),
846            src_size: src_size.into(),
847            is_saturating: saturating,
848            src: src.into(),
849            dst: dst.map(Into::into),
850            tmp_gpr: tmp_gpr.into(),
851            tmp_xmm: tmp_xmm.into(),
852        });
853    }
854
855    /// Convert float to unsigned int.
856    pub fn cvt_float_to_uint_seq(
857        &mut self,
858        src: Reg,
859        dst: WritableReg,
860        tmp_gpr: Reg,
861        tmp_xmm: Reg,
862        tmp_xmm2: Reg,
863        src_size: OperandSize,
864        dst_size: OperandSize,
865        saturating: bool,
866    ) {
867        self.emit(Inst::CvtFloatToUintSeq {
868            dst_size: dst_size.into(),
869            src_size: src_size.into(),
870            is_saturating: saturating,
871            src: src.into(),
872            dst: dst.map(Into::into),
873            tmp_gpr: tmp_gpr.into(),
874            tmp_xmm: tmp_xmm.into(),
875            tmp_xmm2: tmp_xmm2.into(),
876        });
877    }
878
879    /// Convert signed int to float.
880    pub fn cvt_sint_to_float(
881        &mut self,
882        src: Reg,
883        dst: WritableReg,
884        src_size: OperandSize,
885        dst_size: OperandSize,
886    ) {
887        use OperandSize::*;
888        let dst = pair_xmm(dst);
889        let inst = match (src_size, dst_size) {
890            (S32, S32) => asm::inst::cvtsi2ssl_a::new(dst, src).into(),
891            (S32, S64) => asm::inst::cvtsi2sdl_a::new(dst, src).into(),
892            (S64, S32) => asm::inst::cvtsi2ssq_a::new(dst, src).into(),
893            (S64, S64) => asm::inst::cvtsi2sdq_a::new(dst, src).into(),
894            _ => unreachable!(),
895        };
896        self.emit(Inst::External { inst });
897    }
898
899    /// Convert unsigned 64-bit int to float.
900    pub fn cvt_uint64_to_float_seq(
901        &mut self,
902        src: Reg,
903        dst: WritableReg,
904        tmp_gpr1: Reg,
905        tmp_gpr2: Reg,
906        dst_size: OperandSize,
907    ) {
908        self.emit(Inst::CvtUint64ToFloatSeq {
909            dst_size: dst_size.into(),
910            src: src.into(),
911            dst: dst.map(Into::into),
912            tmp_gpr1: tmp_gpr1.into(),
913            tmp_gpr2: tmp_gpr2.into(),
914        });
915    }
916
917    /// Change precision of float.
918    pub fn cvt_float_to_float(
919        &mut self,
920        src: Reg,
921        dst: WritableReg,
922        src_size: OperandSize,
923        dst_size: OperandSize,
924    ) {
925        use OperandSize::*;
926        let dst = pair_xmm(dst);
927        let inst = match (src_size, dst_size) {
928            (S32, S64) => asm::inst::cvtss2sd_a::new(dst, src).into(),
929            (S64, S32) => asm::inst::cvtsd2ss_a::new(dst, src).into(),
930            _ => unimplemented!(),
931        };
932        self.emit(Inst::External { inst });
933    }
934
935    pub fn or_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
936        let dst = pair_gpr(dst);
937        let inst = match size {
938            OperandSize::S8 => asm::inst::orb_rm::new(dst, src).into(),
939            OperandSize::S16 => asm::inst::orw_rm::new(dst, src).into(),
940            OperandSize::S32 => asm::inst::orl_rm::new(dst, src).into(),
941            OperandSize::S64 => asm::inst::orq_rm::new(dst, src).into(),
942            OperandSize::S128 => unimplemented!(),
943        };
944        self.emit(Inst::External { inst });
945    }
946
947    pub fn or_ir(&mut self, imm: i32, dst: WritableReg, size: OperandSize) {
948        let dst = pair_gpr(dst);
949        let inst = match size {
950            OperandSize::S8 => asm::inst::orb_mi::new(dst, u8::try_from(imm).unwrap()).into(),
951            OperandSize::S16 => asm::inst::orw_mi::new(dst, u16::try_from(imm).unwrap()).into(),
952            OperandSize::S32 => asm::inst::orl_mi::new(dst, imm as u32).into(),
953            OperandSize::S64 => asm::inst::orq_mi_sxl::new(dst, imm).into(),
954            OperandSize::S128 => unimplemented!(),
955        };
956        self.emit(Inst::External { inst });
957    }
958
959    pub fn xmm_or_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
960        let dst = pair_xmm(dst);
961        let inst = match size {
962            OperandSize::S32 => asm::inst::orps_a::new(dst, src).into(),
963            OperandSize::S64 => asm::inst::orpd_a::new(dst, src).into(),
964            OperandSize::S8 | OperandSize::S16 | OperandSize::S128 => unreachable!(),
965        };
966        self.emit(Inst::External { inst });
967    }
968
969    /// Logical exclusive or with registers.
970    pub fn xor_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
971        let dst = pair_gpr(dst);
972        let inst = match size {
973            OperandSize::S8 => asm::inst::xorb_rm::new(dst, src).into(),
974            OperandSize::S16 => asm::inst::xorw_rm::new(dst, src).into(),
975            OperandSize::S32 => asm::inst::xorl_rm::new(dst, src).into(),
976            OperandSize::S64 => asm::inst::xorq_rm::new(dst, src).into(),
977            OperandSize::S128 => unimplemented!(),
978        };
979        self.emit(Inst::External { inst });
980    }
981
982    pub fn xor_ir(&mut self, imm: i32, dst: WritableReg, size: OperandSize) {
983        let dst = pair_gpr(dst);
984        let inst = match size {
985            OperandSize::S8 => asm::inst::xorb_mi::new(dst, u8::try_from(imm).unwrap()).into(),
986            OperandSize::S16 => asm::inst::xorw_mi::new(dst, u16::try_from(imm).unwrap()).into(),
987            OperandSize::S32 => asm::inst::xorl_mi::new(dst, imm as u32).into(),
988            OperandSize::S64 => asm::inst::xorq_mi_sxl::new(dst, imm).into(),
989            OperandSize::S128 => unimplemented!(),
990        };
991        self.emit(Inst::External { inst });
992    }
993
994    /// Logical exclusive or with float registers.
995    pub fn xmm_xor_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
996        let dst = pair_xmm(dst);
997        let inst = match size {
998            OperandSize::S32 => asm::inst::xorps_a::new(dst, src).into(),
999            OperandSize::S64 => asm::inst::xorpd_a::new(dst, src).into(),
1000            OperandSize::S8 | OperandSize::S16 | OperandSize::S128 => unreachable!(),
1001        };
1002        self.emit(Inst::External { inst });
1003    }
1004
1005    /// Shift with register and register.
1006    pub fn shift_rr(&mut self, src: Reg, dst: WritableReg, kind: ShiftKind, size: OperandSize) {
1007        let dst = pair_gpr(dst);
1008        let src: Gpr = src.into();
1009        let inst = match (kind, size) {
1010            (ShiftKind::Shl, OperandSize::S32) => asm::inst::shll_mc::new(dst, src).into(),
1011            (ShiftKind::Shl, OperandSize::S64) => asm::inst::shlq_mc::new(dst, src).into(),
1012            (ShiftKind::Shl, _) => todo!(),
1013            (ShiftKind::ShrS, OperandSize::S32) => asm::inst::sarl_mc::new(dst, src).into(),
1014            (ShiftKind::ShrS, OperandSize::S64) => asm::inst::sarq_mc::new(dst, src).into(),
1015            (ShiftKind::ShrS, _) => todo!(),
1016            (ShiftKind::ShrU, OperandSize::S32) => asm::inst::shrl_mc::new(dst, src).into(),
1017            (ShiftKind::ShrU, OperandSize::S64) => asm::inst::shrq_mc::new(dst, src).into(),
1018            (ShiftKind::ShrU, _) => todo!(),
1019            (ShiftKind::Rotl, OperandSize::S32) => asm::inst::roll_mc::new(dst, src).into(),
1020            (ShiftKind::Rotl, OperandSize::S64) => asm::inst::rolq_mc::new(dst, src).into(),
1021            (ShiftKind::Rotl, _) => todo!(),
1022            (ShiftKind::Rotr, OperandSize::S32) => asm::inst::rorl_mc::new(dst, src).into(),
1023            (ShiftKind::Rotr, OperandSize::S64) => asm::inst::rorq_mc::new(dst, src).into(),
1024            (ShiftKind::Rotr, _) => todo!(),
1025        };
1026        self.emit(Inst::External { inst });
1027    }
1028
1029    /// Shift with immediate and register.
1030    pub fn shift_ir(&mut self, imm: u8, dst: WritableReg, kind: ShiftKind, size: OperandSize) {
1031        let dst = pair_gpr(dst);
1032        let inst = match (kind, size) {
1033            (ShiftKind::Shl, OperandSize::S32) => asm::inst::shll_mi::new(dst, imm).into(),
1034            (ShiftKind::Shl, OperandSize::S64) => asm::inst::shlq_mi::new(dst, imm).into(),
1035            (ShiftKind::Shl, _) => todo!(),
1036            (ShiftKind::ShrS, OperandSize::S32) => asm::inst::sarl_mi::new(dst, imm).into(),
1037            (ShiftKind::ShrS, OperandSize::S64) => asm::inst::sarq_mi::new(dst, imm).into(),
1038            (ShiftKind::ShrS, _) => todo!(),
1039            (ShiftKind::ShrU, OperandSize::S32) => asm::inst::shrl_mi::new(dst, imm).into(),
1040            (ShiftKind::ShrU, OperandSize::S64) => asm::inst::shrq_mi::new(dst, imm).into(),
1041            (ShiftKind::ShrU, _) => todo!(),
1042            (ShiftKind::Rotl, OperandSize::S32) => asm::inst::roll_mi::new(dst, imm).into(),
1043            (ShiftKind::Rotl, OperandSize::S64) => asm::inst::rolq_mi::new(dst, imm).into(),
1044            (ShiftKind::Rotl, _) => todo!(),
1045            (ShiftKind::Rotr, OperandSize::S32) => asm::inst::rorl_mi::new(dst, imm).into(),
1046            (ShiftKind::Rotr, OperandSize::S64) => asm::inst::rorq_mi::new(dst, imm).into(),
1047            (ShiftKind::Rotr, _) => todo!(),
1048        };
1049        self.emit(Inst::External { inst });
1050    }
1051
1052    /// Signed/unsigned division.
1053    ///
1054    /// Emits a sequence of instructions to ensure the correctness of
1055    /// the division invariants.  This function assumes that the
1056    /// caller has correctly allocated the dividend as `(rdx:rax)` and
1057    /// accounted for the quotient to be stored in `rax`.
1058    pub fn div(&mut self, divisor: Reg, dst: (Reg, Reg), kind: DivKind, size: OperandSize) {
1059        let trap = match kind {
1060            // Signed division has two trapping conditions, integer overflow and
1061            // divide-by-zero. Check for divide-by-zero explicitly and let the
1062            // hardware detect overflow.
1063            DivKind::Signed => {
1064                self.cmp_ir(divisor, 0, size);
1065                self.emit(Inst::TrapIf {
1066                    cc: CC::Z,
1067                    trap_code: TrapCode::INTEGER_DIVISION_BY_ZERO,
1068                });
1069
1070                // Sign-extend the dividend with tailor-made instructoins for
1071                // just this operation.
1072                let ext_dst: WritableGpr = dst.1.into();
1073                let ext_src: Gpr = dst.0.into();
1074                let inst = match size {
1075                    OperandSize::S32 => asm::inst::cltd_zo::new(ext_dst, ext_src).into(),
1076                    OperandSize::S64 => asm::inst::cqto_zo::new(ext_dst, ext_src).into(),
1077                    _ => unimplemented!(),
1078                };
1079                self.emit(Inst::External { inst });
1080                TrapCode::INTEGER_OVERFLOW
1081            }
1082
1083            // Unsigned division only traps in one case, on divide-by-zero, so
1084            // defer that to the trap opcode.
1085            //
1086            // The divisor_hi reg is initialized with zero through an
1087            // xor-against-itself op.
1088            DivKind::Unsigned => {
1089                self.xor_rr(dst.1, writable!(dst.1), size);
1090                TrapCode::INTEGER_DIVISION_BY_ZERO
1091            }
1092        };
1093        let dst0 = pair_gpr(writable!(dst.0));
1094        let dst1 = pair_gpr(writable!(dst.1));
1095        let inst = match (kind, size) {
1096            (DivKind::Signed, OperandSize::S32) => {
1097                asm::inst::idivl_m::new(dst0, dst1, divisor, trap).into()
1098            }
1099            (DivKind::Unsigned, OperandSize::S32) => {
1100                asm::inst::divl_m::new(dst0, dst1, divisor, trap).into()
1101            }
1102            (DivKind::Signed, OperandSize::S64) => {
1103                asm::inst::idivq_m::new(dst0, dst1, divisor, trap).into()
1104            }
1105            (DivKind::Unsigned, OperandSize::S64) => {
1106                asm::inst::divq_m::new(dst0, dst1, divisor, trap).into()
1107            }
1108            _ => todo!(),
1109        };
1110        self.emit(Inst::External { inst });
1111    }
1112
1113    /// Signed/unsigned remainder.
1114    ///
1115    /// Emits a sequence of instructions to ensure the correctness of the
1116    /// division invariants and ultimately calculate the remainder.
1117    /// This function assumes that the
1118    /// caller has correctly allocated the dividend as `(rdx:rax)` and
1119    /// accounted for the remainder to be stored in `rdx`.
1120    pub fn rem(&mut self, divisor: Reg, dst: (Reg, Reg), kind: RemKind, size: OperandSize) {
1121        match kind {
1122            // Signed remainder goes through a pseudo-instruction which has
1123            // some internal branching. The `dividend_hi`, or `rdx`, is
1124            // initialized here with a `SignExtendData` instruction.
1125            RemKind::Signed => {
1126                let ext_dst: WritableGpr = dst.1.into();
1127
1128                // Initialize `dividend_hi`, or `rdx`, with a tailor-made
1129                // instruction for this operation.
1130                let ext_src: Gpr = dst.0.into();
1131                let inst = match size {
1132                    OperandSize::S32 => asm::inst::cltd_zo::new(ext_dst, ext_src).into(),
1133                    OperandSize::S64 => asm::inst::cqto_zo::new(ext_dst, ext_src).into(),
1134                    _ => unimplemented!(),
1135                };
1136                self.emit(Inst::External { inst });
1137                self.emit(Inst::CheckedSRemSeq {
1138                    size: size.into(),
1139                    divisor: divisor.into(),
1140                    dividend_lo: dst.0.into(),
1141                    dividend_hi: dst.1.into(),
1142                    dst_quotient: dst.0.into(),
1143                    dst_remainder: dst.1.into(),
1144                });
1145            }
1146
1147            // Unsigned remainder initializes `dividend_hi` with zero and
1148            // then executes a normal `div` instruction.
1149            RemKind::Unsigned => {
1150                self.xor_rr(dst.1, writable!(dst.1), size);
1151                let dst0 = pair_gpr(writable!(dst.0));
1152                let dst1 = pair_gpr(writable!(dst.1));
1153                let trap = TrapCode::INTEGER_DIVISION_BY_ZERO;
1154                let inst = match size {
1155                    OperandSize::S32 => asm::inst::divl_m::new(dst0, dst1, divisor, trap).into(),
1156                    OperandSize::S64 => asm::inst::divq_m::new(dst0, dst1, divisor, trap).into(),
1157                    _ => todo!(),
1158                };
1159                self.emit(Inst::External { inst });
1160            }
1161        }
1162    }
1163
1164    /// Multiply immediate and register.
1165    pub fn mul_ir(&mut self, imm: i32, dst: WritableReg, size: OperandSize) {
1166        use OperandSize::*;
1167        let src = dst.to_reg();
1168        let dst: WritableGpr = dst.to_reg().into();
1169        let inst = match size {
1170            S16 => asm::inst::imulw_rmi::new(dst, src, u16::try_from(imm).unwrap()).into(),
1171            S32 => asm::inst::imull_rmi::new(dst, src, imm as u32).into(),
1172            S64 => asm::inst::imulq_rmi_sxl::new(dst, src, imm).into(),
1173            S8 | S128 => unimplemented!(),
1174        };
1175        self.emit(Inst::External { inst });
1176    }
1177
1178    /// Multiply register and register.
1179    pub fn mul_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
1180        use OperandSize::*;
1181        let dst = pair_gpr(dst);
1182        let inst = match size {
1183            S16 => asm::inst::imulw_rm::new(dst, src).into(),
1184            S32 => asm::inst::imull_rm::new(dst, src).into(),
1185            S64 => asm::inst::imulq_rm::new(dst, src).into(),
1186            S8 | S128 => unimplemented!(),
1187        };
1188        self.emit(Inst::External { inst });
1189    }
1190
1191    /// Add immediate and register.
1192    pub fn add_ir(&mut self, imm: i32, dst: WritableReg, size: OperandSize) {
1193        let dst = pair_gpr(dst);
1194        let inst = match size {
1195            OperandSize::S8 => asm::inst::addb_mi::new(dst, u8::try_from(imm).unwrap()).into(),
1196            OperandSize::S16 => asm::inst::addw_mi::new(dst, u16::try_from(imm).unwrap()).into(),
1197            OperandSize::S32 => asm::inst::addl_mi::new(dst, imm as u32).into(),
1198            OperandSize::S64 => asm::inst::addq_mi_sxl::new(dst, imm).into(),
1199            OperandSize::S128 => unimplemented!(),
1200        };
1201        self.emit(Inst::External { inst });
1202    }
1203
1204    /// Add register and register.
1205    pub fn add_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
1206        let dst = pair_gpr(dst);
1207        let inst = match size {
1208            OperandSize::S8 => asm::inst::addb_rm::new(dst, src).into(),
1209            OperandSize::S16 => asm::inst::addw_rm::new(dst, src).into(),
1210            OperandSize::S32 => asm::inst::addl_rm::new(dst, src).into(),
1211            OperandSize::S64 => asm::inst::addq_rm::new(dst, src).into(),
1212            OperandSize::S128 => unimplemented!(),
1213        };
1214        self.emit(Inst::External { inst });
1215    }
1216
1217    pub fn lock_xadd(
1218        &mut self,
1219        addr: Address,
1220        dst: WritableReg,
1221        size: OperandSize,
1222        flags: MemFlags,
1223    ) {
1224        assert!(addr.is_offset());
1225        let mem = Self::to_synthetic_amode(&addr, flags);
1226        let dst = pair_gpr(dst);
1227        let inst = match size {
1228            OperandSize::S8 => asm::inst::lock_xaddb_mr::new(mem, dst).into(),
1229            OperandSize::S16 => asm::inst::lock_xaddw_mr::new(mem, dst).into(),
1230            OperandSize::S32 => asm::inst::lock_xaddl_mr::new(mem, dst).into(),
1231            OperandSize::S64 => asm::inst::lock_xaddq_mr::new(mem, dst).into(),
1232            OperandSize::S128 => unimplemented!(),
1233        };
1234
1235        self.emit(Inst::External { inst });
1236    }
1237
1238    pub fn atomic_rmw_seq(
1239        &mut self,
1240        addr: Address,
1241        operand: Reg,
1242        dst: WritableReg,
1243        temp: WritableReg,
1244        size: OperandSize,
1245        flags: MemFlags,
1246        op: AtomicRmwSeqOp,
1247    ) {
1248        assert!(addr.is_offset());
1249        let mem = Self::to_synthetic_amode(&addr, flags);
1250        self.emit(Inst::AtomicRmwSeq {
1251            ty: Type::int_with_byte_size(size.bytes() as _).unwrap(),
1252            mem,
1253            operand: operand.into(),
1254            temp: temp.map(Into::into),
1255            dst_old: dst.map(Into::into),
1256            op,
1257        });
1258    }
1259
1260    pub fn xchg(&mut self, addr: Address, dst: WritableReg, size: OperandSize, flags: MemFlags) {
1261        assert!(addr.is_offset());
1262        let mem = Self::to_synthetic_amode(&addr, flags);
1263        let dst = pair_gpr(dst);
1264        let inst = match size {
1265            OperandSize::S8 => asm::inst::xchgb_rm::new(dst, mem).into(),
1266            OperandSize::S16 => asm::inst::xchgw_rm::new(dst, mem).into(),
1267            OperandSize::S32 => asm::inst::xchgl_rm::new(dst, mem).into(),
1268            OperandSize::S64 => asm::inst::xchgq_rm::new(dst, mem).into(),
1269            OperandSize::S128 => unimplemented!(),
1270        };
1271
1272        self.emit(Inst::External { inst });
1273    }
1274    pub fn cmpxchg(
1275        &mut self,
1276        addr: Address,
1277        replacement: Reg,
1278        dst: WritableReg,
1279        size: OperandSize,
1280        flags: MemFlags,
1281    ) {
1282        assert!(addr.is_offset());
1283        let mem = Self::to_synthetic_amode(&addr, flags);
1284        let dst = pair_gpr(dst);
1285        let inst = match size {
1286            OperandSize::S8 => asm::inst::lock_cmpxchgb_mr::new(mem, replacement, dst).into(),
1287            OperandSize::S16 => asm::inst::lock_cmpxchgw_mr::new(mem, replacement, dst).into(),
1288            OperandSize::S32 => asm::inst::lock_cmpxchgl_mr::new(mem, replacement, dst).into(),
1289            OperandSize::S64 => asm::inst::lock_cmpxchgq_mr::new(mem, replacement, dst).into(),
1290            OperandSize::S128 => unimplemented!(),
1291        };
1292
1293        self.emit(Inst::External { inst });
1294    }
1295
1296    pub fn cmp_ir(&mut self, src1: Reg, imm: i32, size: OperandSize) {
1297        let inst = match size {
1298            OperandSize::S8 => {
1299                let imm = i8::try_from(imm).unwrap();
1300                asm::inst::cmpb_mi::new(src1, imm.cast_unsigned()).into()
1301            }
1302            OperandSize::S16 => match i8::try_from(imm) {
1303                Ok(imm8) => asm::inst::cmpw_mi_sxb::new(src1, imm8).into(),
1304                Err(_) => {
1305                    asm::inst::cmpw_mi::new(src1, i16::try_from(imm).unwrap().cast_unsigned())
1306                        .into()
1307                }
1308            },
1309            OperandSize::S32 => match i8::try_from(imm) {
1310                Ok(imm8) => asm::inst::cmpl_mi_sxb::new(src1, imm8).into(),
1311                Err(_) => asm::inst::cmpl_mi::new(src1, imm.cast_unsigned()).into(),
1312            },
1313            OperandSize::S64 => match i8::try_from(imm) {
1314                Ok(imm8) => asm::inst::cmpq_mi_sxb::new(src1, imm8).into(),
1315                Err(_) => asm::inst::cmpq_mi::new(src1, imm).into(),
1316            },
1317            OperandSize::S128 => unimplemented!(),
1318        };
1319
1320        self.emit(Inst::External { inst });
1321    }
1322
1323    pub fn cmp_rr(&mut self, src1: Reg, src2: Reg, size: OperandSize) {
1324        let inst = match size {
1325            OperandSize::S8 => asm::inst::cmpb_rm::new(src1, src2).into(),
1326            OperandSize::S16 => asm::inst::cmpw_rm::new(src1, src2).into(),
1327            OperandSize::S32 => asm::inst::cmpl_rm::new(src1, src2).into(),
1328            OperandSize::S64 => asm::inst::cmpq_rm::new(src1, src2).into(),
1329            OperandSize::S128 => unimplemented!(),
1330        };
1331
1332        self.emit(Inst::External { inst });
1333    }
1334
1335    /// Compares values in src1 and src2 and sets ZF, PF, and CF flags in EFLAGS
1336    /// register.
1337    pub fn ucomis(&mut self, src1: Reg, src2: Reg, size: OperandSize) {
1338        let inst = match size {
1339            OperandSize::S32 => asm::inst::ucomiss_a::new(src1, src2).into(),
1340            OperandSize::S64 => asm::inst::ucomisd_a::new(src1, src2).into(),
1341            OperandSize::S8 | OperandSize::S16 | OperandSize::S128 => unreachable!(),
1342        };
1343        self.emit(Inst::External { inst });
1344    }
1345
1346    pub fn popcnt(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
1347        assert!(
1348            self.isa_flags.has_popcnt() && self.isa_flags.has_sse42(),
1349            "Requires has_popcnt and has_sse42 flags"
1350        );
1351        let dst = WritableGpr::from_reg(dst.to_reg().into());
1352        let inst = match size {
1353            OperandSize::S16 => asm::inst::popcntw_rm::new(dst, src).into(),
1354            OperandSize::S32 => asm::inst::popcntl_rm::new(dst, src).into(),
1355            OperandSize::S64 => asm::inst::popcntq_rm::new(dst, src).into(),
1356            OperandSize::S8 | OperandSize::S128 => unreachable!(),
1357        };
1358        self.emit(Inst::External { inst });
1359    }
1360
1361    /// Emit a test instruction with two register operands.
1362    pub fn test_rr(&mut self, src1: Reg, src2: Reg, size: OperandSize) {
1363        let inst = match size {
1364            OperandSize::S8 => asm::inst::testb_mr::new(src1, src2).into(),
1365            OperandSize::S16 => asm::inst::testw_mr::new(src1, src2).into(),
1366            OperandSize::S32 => asm::inst::testl_mr::new(src1, src2).into(),
1367            OperandSize::S64 => asm::inst::testq_mr::new(src1, src2).into(),
1368            OperandSize::S128 => unimplemented!(),
1369        };
1370
1371        self.emit(Inst::External { inst });
1372    }
1373
1374    /// Set value in dst to `0` or `1` based on flags in status register and
1375    /// [`CmpKind`].
1376    pub fn setcc(&mut self, kind: IntCmpKind, dst: WritableReg) {
1377        self.setcc_impl(kind.into(), dst);
1378    }
1379
1380    /// Set value in dst to `1` if parity flag in status register is set, `0`
1381    /// otherwise.
1382    pub fn setp(&mut self, dst: WritableReg) {
1383        self.setcc_impl(CC::P, dst);
1384    }
1385
1386    /// Set value in dst to `1` if parity flag in status register is not set,
1387    /// `0` otherwise.
1388    pub fn setnp(&mut self, dst: WritableReg) {
1389        self.setcc_impl(CC::NP, dst);
1390    }
1391
1392    fn setcc_impl(&mut self, cc: CC, dst: WritableReg) {
1393        // Clear the dst register or bits 1 to 31 may be incorrectly set.
1394        // Don't use xor since it updates the status register.
1395        let dst: WritableGpr = dst.map(Into::into);
1396        let inst = asm::inst::movl_oi::new(dst, 0).into();
1397        self.emit(Inst::External { inst });
1398
1399        // Copy correct bit from status register into dst register.
1400        //
1401        // Note that some of these mnemonics don't match exactly and that's
1402        // intentional as there are multiple mnemonics for the same encoding in
1403        // some cases and the assembler picked ones that match Capstone rather
1404        // than Cranelift.
1405        let inst = match cc {
1406            CC::O => asm::inst::seto_m::new(dst).into(),
1407            CC::NO => asm::inst::setno_m::new(dst).into(),
1408            CC::B => asm::inst::setb_m::new(dst).into(),
1409            CC::NB => asm::inst::setae_m::new(dst).into(), //  nb == ae
1410            CC::Z => asm::inst::sete_m::new(dst).into(),   //   z ==  e
1411            CC::NZ => asm::inst::setne_m::new(dst).into(), //  nz == ne
1412            CC::BE => asm::inst::setbe_m::new(dst).into(),
1413            CC::NBE => asm::inst::seta_m::new(dst).into(), // nbe ==  a
1414            CC::S => asm::inst::sets_m::new(dst).into(),
1415            CC::NS => asm::inst::setns_m::new(dst).into(),
1416            CC::L => asm::inst::setl_m::new(dst).into(),
1417            CC::NL => asm::inst::setge_m::new(dst).into(), //  nl == ge
1418            CC::LE => asm::inst::setle_m::new(dst).into(),
1419            CC::NLE => asm::inst::setg_m::new(dst).into(), // nle ==  g
1420            CC::P => asm::inst::setp_m::new(dst).into(),
1421            CC::NP => asm::inst::setnp_m::new(dst).into(),
1422        };
1423        self.emit(Inst::External { inst });
1424    }
1425
1426    /// Store the count of leading zeroes in src in dst.
1427    /// Requires `has_lzcnt` flag.
1428    pub fn lzcnt(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
1429        assert!(self.isa_flags.has_lzcnt(), "Requires has_lzcnt flag");
1430        let dst = WritableGpr::from_reg(dst.to_reg().into());
1431        let inst = match size {
1432            OperandSize::S16 => asm::inst::lzcntw_rm::new(dst, src).into(),
1433            OperandSize::S32 => asm::inst::lzcntl_rm::new(dst, src).into(),
1434            OperandSize::S64 => asm::inst::lzcntq_rm::new(dst, src).into(),
1435            OperandSize::S8 | OperandSize::S128 => unreachable!(),
1436        };
1437        self.emit(Inst::External { inst });
1438    }
1439
1440    /// Store the count of trailing zeroes in src in dst.
1441    /// Requires `has_bmi1` flag.
1442    pub fn tzcnt(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
1443        assert!(self.isa_flags.has_bmi1(), "Requires has_bmi1 flag");
1444        let dst = WritableGpr::from_reg(dst.to_reg().into());
1445        let inst = match size {
1446            OperandSize::S16 => asm::inst::tzcntw_a::new(dst, src).into(),
1447            OperandSize::S32 => asm::inst::tzcntl_a::new(dst, src).into(),
1448            OperandSize::S64 => asm::inst::tzcntq_a::new(dst, src).into(),
1449            OperandSize::S8 | OperandSize::S128 => unreachable!(),
1450        };
1451        self.emit(Inst::External { inst });
1452    }
1453
1454    /// Stores position of the most significant bit set in src in dst.
1455    /// Zero flag is set if src is equal to 0.
1456    pub fn bsr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
1457        let dst: WritableGpr = WritableGpr::from_reg(dst.to_reg().into());
1458        let inst = match size {
1459            OperandSize::S16 => asm::inst::bsrw_rm::new(dst, src).into(),
1460            OperandSize::S32 => asm::inst::bsrl_rm::new(dst, src).into(),
1461            OperandSize::S64 => asm::inst::bsrq_rm::new(dst, src).into(),
1462            OperandSize::S8 | OperandSize::S128 => unreachable!(),
1463        };
1464        self.emit(Inst::External { inst });
1465    }
1466
1467    /// Performs integer negation on `src` and places result in `dst`.
1468    pub fn neg(&mut self, read: Reg, write: WritableReg, size: OperandSize) {
1469        let gpr = PairedGpr {
1470            read: read.into(),
1471            write: WritableGpr::from_reg(write.to_reg().into()),
1472        };
1473        let inst = match size {
1474            OperandSize::S8 => asm::inst::negb_m::new(gpr).into(),
1475            OperandSize::S16 => asm::inst::negw_m::new(gpr).into(),
1476            OperandSize::S32 => asm::inst::negl_m::new(gpr).into(),
1477            OperandSize::S64 => asm::inst::negq_m::new(gpr).into(),
1478            OperandSize::S128 => unreachable!(),
1479        };
1480        self.emit(Inst::External { inst });
1481    }
1482
1483    /// Stores position of the least significant bit set in src in dst.
1484    /// Zero flag is set if src is equal to 0.
1485    pub fn bsf(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
1486        let dst: WritableGpr = WritableGpr::from_reg(dst.to_reg().into());
1487        let inst = match size {
1488            OperandSize::S16 => asm::inst::bsfw_rm::new(dst, src).into(),
1489            OperandSize::S32 => asm::inst::bsfl_rm::new(dst, src).into(),
1490            OperandSize::S64 => asm::inst::bsfq_rm::new(dst, src).into(),
1491            OperandSize::S8 | OperandSize::S128 => unreachable!(),
1492        };
1493        self.emit(Inst::External { inst });
1494    }
1495
1496    /// Performs float addition on src and dst and places result in dst.
1497    pub fn xmm_add_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
1498        let dst = pair_xmm(dst);
1499        let inst = match size {
1500            OperandSize::S32 => asm::inst::addss_a::new(dst, src).into(),
1501            OperandSize::S64 => asm::inst::addsd_a::new(dst, src).into(),
1502            OperandSize::S8 | OperandSize::S16 | OperandSize::S128 => unreachable!(),
1503        };
1504        self.emit(Inst::External { inst });
1505    }
1506
1507    /// Performs float subtraction on src and dst and places result in dst.
1508    pub fn xmm_sub_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
1509        let dst = pair_xmm(dst);
1510        let inst = match size {
1511            OperandSize::S32 => asm::inst::subss_a::new(dst, src).into(),
1512            OperandSize::S64 => asm::inst::subsd_a::new(dst, src).into(),
1513            OperandSize::S8 | OperandSize::S16 | OperandSize::S128 => unreachable!(),
1514        };
1515        self.emit(Inst::External { inst });
1516    }
1517
1518    /// Performs float multiplication on src and dst and places result in dst.
1519    pub fn xmm_mul_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
1520        use OperandSize::*;
1521        let dst = pair_xmm(dst);
1522        let inst = match size {
1523            S32 => asm::inst::mulss_a::new(dst, src).into(),
1524            S64 => asm::inst::mulsd_a::new(dst, src).into(),
1525            S8 | S16 | S128 => unreachable!(),
1526        };
1527        self.emit(Inst::External { inst });
1528    }
1529
1530    /// Performs float division on src and dst and places result in dst.
1531    pub fn xmm_div_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
1532        let dst = pair_xmm(dst);
1533        let inst = match size {
1534            OperandSize::S32 => asm::inst::divss_a::new(dst, src).into(),
1535            OperandSize::S64 => asm::inst::divsd_a::new(dst, src).into(),
1536            OperandSize::S8 | OperandSize::S16 | OperandSize::S128 => unreachable!(),
1537        };
1538        self.emit(Inst::External { inst });
1539    }
1540
1541    /// Minimum for src and dst XMM registers with results put in dst.
1542    pub fn xmm_min_seq(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
1543        self.emit(Inst::XmmMinMaxSeq {
1544            size: size.into(),
1545            is_min: true,
1546            lhs: src.into(),
1547            rhs: dst.to_reg().into(),
1548            dst: dst.map(Into::into),
1549        });
1550    }
1551
1552    /// Maximum for src and dst XMM registers with results put in dst.
1553    pub fn xmm_max_seq(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
1554        self.emit(Inst::XmmMinMaxSeq {
1555            size: size.into(),
1556            is_min: false,
1557            lhs: src.into(),
1558            rhs: dst.to_reg().into(),
1559            dst: dst.map(Into::into),
1560        });
1561    }
1562
1563    /// Perform rounding operation on float register src and place results in
1564    /// float register dst.
1565    pub fn xmm_rounds_rr(
1566        &mut self,
1567        src: Reg,
1568        dst: WritableReg,
1569        mode: RoundingMode,
1570        size: OperandSize,
1571    ) {
1572        let dst = dst.map(|r| r.into());
1573
1574        let imm: u8 = match mode {
1575            RoundingMode::Nearest => 0x00,
1576            RoundingMode::Down => 0x01,
1577            RoundingMode::Up => 0x02,
1578            RoundingMode::Zero => 0x03,
1579        };
1580
1581        let inst = match size {
1582            OperandSize::S32 => asm::inst::roundss_rmi::new(dst, src, imm).into(),
1583            OperandSize::S64 => asm::inst::roundsd_rmi::new(dst, src, imm).into(),
1584            OperandSize::S8 | OperandSize::S16 | OperandSize::S128 => unreachable!(),
1585        };
1586
1587        self.emit(Inst::External { inst });
1588    }
1589
1590    pub fn sqrt(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
1591        use OperandSize::*;
1592        let dst = pair_xmm(dst);
1593        let inst = match size {
1594            S32 => asm::inst::sqrtss_a::new(dst, src).into(),
1595            S64 => asm::inst::sqrtsd_a::new(dst, src).into(),
1596            S8 | S16 | S128 => unimplemented!(),
1597        };
1598        self.emit(Inst::External { inst });
1599    }
1600
1601    /// Emit a call to an unknown location through a register.
1602    pub fn call_with_reg(&mut self, cc: CallingConvention, callee: Reg) {
1603        self.emit(Inst::CallUnknown {
1604            info: Box::new(CallInfo::empty(RegMem::reg(callee.into()), cc.into())),
1605        });
1606    }
1607
1608    /// Emit a call to a locally defined function through an index.
1609    pub fn call_with_name(&mut self, cc: CallingConvention, name: UserExternalNameRef) {
1610        self.emit(Inst::CallKnown {
1611            info: Box::new(CallInfo::empty(ExternalName::user(name), cc.into())),
1612        });
1613    }
1614
1615    /// Emits a conditional jump to the given label.
1616    pub fn jmp_if(&mut self, cc: impl Into<CC>, taken: MachLabel) {
1617        self.emit(Inst::WinchJmpIf {
1618            cc: cc.into(),
1619            taken,
1620        });
1621    }
1622
1623    /// Performs an unconditional jump to the given label.
1624    pub fn jmp(&mut self, target: MachLabel) {
1625        self.emit(Inst::JmpKnown { dst: target });
1626    }
1627
1628    /// Emits a jump table sequence.
1629    pub fn jmp_table(
1630        &mut self,
1631        targets: SmallVec<[MachLabel; 4]>,
1632        default: MachLabel,
1633        index: Reg,
1634        tmp1: Reg,
1635        tmp2: Reg,
1636    ) {
1637        self.emit(Inst::JmpTableSeq {
1638            idx: index.into(),
1639            tmp1: Writable::from_reg(tmp1.into()),
1640            tmp2: Writable::from_reg(tmp2.into()),
1641            default_target: default,
1642            targets: Box::new(targets.to_vec()),
1643        })
1644    }
1645
1646    /// Emit a trap instruction.
1647    pub fn trap(&mut self, code: TrapCode) {
1648        let inst = asm::inst::ud2_zo::new(code).into();
1649        self.emit(Inst::External { inst });
1650    }
1651
1652    /// Conditional trap.
1653    pub fn trapif(&mut self, cc: impl Into<CC>, trap_code: TrapCode) {
1654        self.emit(Inst::TrapIf {
1655            cc: cc.into(),
1656            trap_code,
1657        });
1658    }
1659
1660    /// Load effective address.
1661    pub fn lea(&mut self, addr: &Address, dst: WritableReg, size: OperandSize) {
1662        let addr = Self::to_synthetic_amode(addr, MemFlags::trusted());
1663        let dst: WritableGpr = dst.map(Into::into);
1664        let inst = match size {
1665            OperandSize::S16 => asm::inst::leaw_rm::new(dst, addr).into(),
1666            OperandSize::S32 => asm::inst::leal_rm::new(dst, addr).into(),
1667            OperandSize::S64 => asm::inst::leaq_rm::new(dst, addr).into(),
1668            OperandSize::S8 | OperandSize::S128 => unimplemented!(),
1669        };
1670        self.emit(Inst::External { inst });
1671    }
1672
1673    pub fn adc_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
1674        let dst = pair_gpr(dst);
1675        let inst = match size {
1676            OperandSize::S8 => asm::inst::adcb_rm::new(dst, src).into(),
1677            OperandSize::S16 => asm::inst::adcw_rm::new(dst, src).into(),
1678            OperandSize::S32 => asm::inst::adcl_rm::new(dst, src).into(),
1679            OperandSize::S64 => asm::inst::adcq_rm::new(dst, src).into(),
1680            OperandSize::S128 => unimplemented!(),
1681        };
1682        self.emit(Inst::External { inst });
1683    }
1684
1685    pub fn sbb_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
1686        let dst = pair_gpr(dst);
1687        let inst = match size {
1688            OperandSize::S8 => asm::inst::sbbb_rm::new(dst, src).into(),
1689            OperandSize::S16 => asm::inst::sbbw_rm::new(dst, src).into(),
1690            OperandSize::S32 => asm::inst::sbbl_rm::new(dst, src).into(),
1691            OperandSize::S64 => asm::inst::sbbq_rm::new(dst, src).into(),
1692            OperandSize::S128 => unimplemented!(),
1693        };
1694        self.emit(Inst::External { inst });
1695    }
1696
1697    pub fn mul_wide(
1698        &mut self,
1699        dst_lo: WritableReg,
1700        dst_hi: WritableReg,
1701        lhs: Reg,
1702        rhs: Reg,
1703        kind: MulWideKind,
1704        size: OperandSize,
1705    ) {
1706        use MulWideKind::*;
1707        use OperandSize::*;
1708        let rax = asm::Fixed(PairedGpr {
1709            read: lhs.into(),
1710            write: WritableGpr::from_reg(dst_lo.to_reg().into()),
1711        });
1712        let rdx = asm::Fixed(dst_hi.to_reg().into());
1713        if size == S8 {
1714            // For `mulb` and `imulb`, both the high and low bits are written to
1715            // RAX.
1716            assert_eq!(dst_lo, dst_hi);
1717        }
1718        let inst = match (size, kind) {
1719            (S8, Unsigned) => asm::inst::mulb_m::new(rax, rhs).into(),
1720            (S8, Signed) => asm::inst::imulb_m::new(rax, rhs).into(),
1721            (S16, Unsigned) => asm::inst::mulw_m::new(rax, rdx, rhs).into(),
1722            (S16, Signed) => asm::inst::imulw_m::new(rax, rdx, rhs).into(),
1723            (S32, Unsigned) => asm::inst::mull_m::new(rax, rdx, rhs).into(),
1724            (S32, Signed) => asm::inst::imull_m::new(rax, rdx, rhs).into(),
1725            (S64, Unsigned) => asm::inst::mulq_m::new(rax, rdx, rhs).into(),
1726            (S64, Signed) => asm::inst::imulq_m::new(rax, rdx, rhs).into(),
1727            (S128, _) => unimplemented!(),
1728        };
1729        self.emit(Inst::External { inst });
1730    }
1731
1732    /// Shuffles bytes in `src` according to contents of `mask` and puts
1733    /// result in `dst`.
1734    pub fn xmm_vpshufb_rrm(&mut self, dst: WritableReg, src: Reg, mask: &Address) {
1735        let dst: WritableXmm = dst.map(|r| r.into());
1736        let mask = Self::to_synthetic_amode(mask, MemFlags::trusted());
1737        let inst = asm::inst::vpshufb_b::new(dst, src, mask).into();
1738        self.emit(Inst::External { inst });
1739    }
1740
1741    /// Shuffles bytes in `src` according to contents of `mask` and puts
1742    /// result in `dst`.
1743    pub fn xmm_vpshufb_rrr(&mut self, dst: WritableReg, src: Reg, mask: Reg) {
1744        let dst: WritableXmm = dst.map(|r| r.into());
1745        let inst = asm::inst::vpshufb_b::new(dst, src, mask).into();
1746        self.emit(Inst::External { inst });
1747    }
1748
1749    /// Add unsigned integers with unsigned saturation.
1750    ///
1751    /// Adds the src operands but when an individual byte result is larger than
1752    /// an unsigned byte integer, 0xFF is written instead.
1753    pub fn xmm_vpaddus_rrm(
1754        &mut self,
1755        dst: WritableReg,
1756        src1: Reg,
1757        src2: &Address,
1758        size: OperandSize,
1759    ) {
1760        let dst: WritableXmm = dst.map(|r| r.into());
1761        let src2 = Self::to_synthetic_amode(src2, MemFlags::trusted());
1762        let inst = match size {
1763            OperandSize::S8 => asm::inst::vpaddusb_b::new(dst, src1, src2).into(),
1764            OperandSize::S32 => asm::inst::vpaddusw_b::new(dst, src1, src2).into(),
1765            _ => unimplemented!(),
1766        };
1767        self.emit(Inst::External { inst });
1768    }
1769
1770    /// Add unsigned integers with unsigned saturation.
1771    ///
1772    /// Adds the src operands but when an individual byte result is larger than
1773    /// an unsigned byte integer, 0xFF is written instead.
1774    pub fn xmm_vpaddus_rrr(&mut self, dst: WritableReg, src1: Reg, src2: Reg, size: OperandSize) {
1775        let dst: WritableXmm = dst.map(|r| r.into());
1776        let inst = match size {
1777            OperandSize::S8 => asm::inst::vpaddusb_b::new(dst, src1, src2).into(),
1778            OperandSize::S16 => asm::inst::vpaddusw_b::new(dst, src1, src2).into(),
1779            _ => unimplemented!(),
1780        };
1781        self.emit(Inst::External { inst });
1782    }
1783
1784    /// Add signed integers.
1785    pub fn xmm_vpadds_rrr(&mut self, dst: WritableReg, src1: Reg, src2: Reg, size: OperandSize) {
1786        let dst: WritableXmm = dst.map(|r| r.into());
1787        let inst = match size {
1788            OperandSize::S8 => asm::inst::vpaddsb_b::new(dst, src1, src2).into(),
1789            OperandSize::S16 => asm::inst::vpaddsw_b::new(dst, src1, src2).into(),
1790            _ => unimplemented!(),
1791        };
1792        self.emit(Inst::External { inst });
1793    }
1794
1795    pub fn xmm_vpadd_rmr(
1796        &mut self,
1797        src1: Reg,
1798        src2: &Address,
1799        dst: WritableReg,
1800        size: OperandSize,
1801    ) {
1802        let dst: WritableXmm = dst.map(|r| r.into());
1803        let address = Self::to_synthetic_amode(src2, MemFlags::trusted());
1804        let inst = match size {
1805            OperandSize::S8 => asm::inst::vpaddb_b::new(dst, src1, address).into(),
1806            OperandSize::S16 => asm::inst::vpaddw_b::new(dst, src1, address).into(),
1807            OperandSize::S32 => asm::inst::vpaddd_b::new(dst, src1, address).into(),
1808            _ => unimplemented!(),
1809        };
1810        self.emit(Inst::External { inst });
1811    }
1812
1813    /// Adds vectors of integers in `src1` and `src2` and puts the results in
1814    /// `dst`.
1815    pub fn xmm_vpadd_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {
1816        let dst: WritableXmm = dst.map(|r| r.into());
1817        let inst = match size {
1818            OperandSize::S8 => asm::inst::vpaddb_b::new(dst, src1, src2).into(),
1819            OperandSize::S16 => asm::inst::vpaddw_b::new(dst, src1, src2).into(),
1820            OperandSize::S32 => asm::inst::vpaddd_b::new(dst, src1, src2).into(),
1821            OperandSize::S64 => asm::inst::vpaddq_b::new(dst, src1, src2).into(),
1822            _ => unimplemented!(),
1823        };
1824        self.emit(Inst::External { inst });
1825    }
1826
1827    pub fn mfence(&mut self) {
1828        self.emit(Inst::External {
1829            inst: asm::inst::mfence_zo::new().into(),
1830        });
1831    }
1832
1833    /// Extract a value from `src` into `addr` determined by `lane`.
1834    pub(crate) fn xmm_vpextr_rm(
1835        &mut self,
1836        addr: &Address,
1837        src: Reg,
1838        lane: u8,
1839        size: OperandSize,
1840        flags: MemFlags,
1841    ) {
1842        assert!(addr.is_offset());
1843        let dst = Self::to_synthetic_amode(addr, flags);
1844        let inst = match size {
1845            OperandSize::S8 => asm::inst::vpextrb_a::new(dst, src, lane).into(),
1846            OperandSize::S16 => asm::inst::vpextrw_b::new(dst, src, lane).into(),
1847            OperandSize::S32 => asm::inst::vpextrd_a::new(dst, src, lane).into(),
1848            OperandSize::S64 => asm::inst::vpextrq_a::new(dst, src, lane).into(),
1849            _ => unimplemented!(),
1850        };
1851        self.emit(Inst::External { inst });
1852    }
1853
1854    /// Extract a value from `src` into `dst` (zero extended) determined by `lane`.
1855    pub fn xmm_vpextr_rr(&mut self, dst: WritableReg, src: Reg, lane: u8, size: OperandSize) {
1856        let dst: WritableGpr = dst.map(|r| r.into());
1857        let inst = match size {
1858            OperandSize::S8 => asm::inst::vpextrb_a::new(dst, src, lane).into(),
1859            OperandSize::S16 => asm::inst::vpextrw_a::new(dst, src, lane).into(),
1860            OperandSize::S32 => asm::inst::vpextrd_a::new(dst, src, lane).into(),
1861            OperandSize::S64 => asm::inst::vpextrq_a::new(dst, src, lane).into(),
1862            _ => unimplemented!(),
1863        };
1864        self.emit(Inst::External { inst });
1865    }
1866
1867    /// Copy value from `src2`, merge into `src1`, and put result in `dst` at
1868    /// the location specified in `count`.
1869    pub fn xmm_vpinsr_rrm(
1870        &mut self,
1871        dst: WritableReg,
1872        src1: Reg,
1873        src2: &Address,
1874        count: u8,
1875        size: OperandSize,
1876    ) {
1877        let src2 = Self::to_synthetic_amode(src2, MemFlags::trusted());
1878        let dst: WritableXmm = dst.map(|r| r.into());
1879
1880        let inst = match size {
1881            OperandSize::S8 => asm::inst::vpinsrb_b::new(dst, src1, src2, count).into(),
1882            OperandSize::S16 => asm::inst::vpinsrw_b::new(dst, src1, src2, count).into(),
1883            OperandSize::S32 => asm::inst::vpinsrd_b::new(dst, src1, src2, count).into(),
1884            OperandSize::S64 => asm::inst::vpinsrq_b::new(dst, src1, src2, count).into(),
1885            OperandSize::S128 => unreachable!(),
1886        };
1887        self.emit(Inst::External { inst });
1888    }
1889
1890    /// Copy value from `src2`, merge into `src1`, and put result in `dst` at
1891    /// the location specified in `count`.
1892    pub fn xmm_vpinsr_rrr(
1893        &mut self,
1894        dst: WritableReg,
1895        src1: Reg,
1896        src2: Reg,
1897        count: u8,
1898        size: OperandSize,
1899    ) {
1900        let dst: WritableXmm = dst.map(|r| r.into());
1901        let inst = match size {
1902            OperandSize::S8 => asm::inst::vpinsrb_b::new(dst, src1, src2, count).into(),
1903            OperandSize::S16 => asm::inst::vpinsrw_b::new(dst, src1, src2, count).into(),
1904            OperandSize::S32 => asm::inst::vpinsrd_b::new(dst, src1, src2, count).into(),
1905            OperandSize::S64 => asm::inst::vpinsrq_b::new(dst, src1, src2, count).into(),
1906            OperandSize::S128 => unreachable!(),
1907        };
1908        self.emit(Inst::External { inst });
1909    }
1910
1911    /// Copy a 32-bit float in `src2`, merge into `src1`, and put result in `dst`.
1912    pub fn xmm_vinsertps_rrm(&mut self, dst: WritableReg, src1: Reg, address: &Address, imm: u8) {
1913        let dst: WritableXmm = dst.map(|r| r.into());
1914        let address = Self::to_synthetic_amode(address, MemFlags::trusted());
1915        let inst = asm::inst::vinsertps_b::new(dst, src1, address, imm).into();
1916        self.emit(Inst::External { inst });
1917    }
1918
1919    /// Copy a 32-bit float in `src2`, merge into `src1`, and put result in `dst`.
1920    pub fn xmm_vinsertps_rrr(&mut self, dst: WritableReg, src1: Reg, src2: Reg, imm: u8) {
1921        let dst: WritableXmm = dst.map(|r| r.into());
1922        let inst = asm::inst::vinsertps_b::new(dst, src1, src2, imm).into();
1923        self.emit(Inst::External { inst });
1924    }
1925
1926    /// Moves lower 64-bit float from `src2` into lower 64-bits of `dst` and the
1927    /// upper 64-bits in `src1` into the upper 64-bits of `dst`.
1928    pub fn xmm_vmovsd_rrr(&mut self, dst: WritableReg, src1: Reg, src2: Reg) {
1929        let dst: WritableXmm = dst.map(|r| r.into());
1930        let inst = asm::inst::vmovsd_b::new(dst, src1, src2).into();
1931        self.emit(Inst::External { inst });
1932    }
1933
1934    /// Moves 64-bit float from `src` into lower 64-bits of `dst`.
1935    /// Zeroes out the upper 64 bits of `dst`.
1936    pub fn xmm_vmovsd_rm(&mut self, dst: WritableReg, src: &Address) {
1937        let src = Self::to_synthetic_amode(src, MemFlags::trusted());
1938        let dst: WritableXmm = dst.map(|r| r.into());
1939        let inst = asm::inst::vmovsd_d::new(dst, src).into();
1940        self.emit(Inst::External { inst });
1941    }
1942
1943    /// Moves two 32-bit floats from `src2` to the upper 64-bits of `dst`.
1944    /// Copies two 32-bit floats from the lower 64-bits of `src1` to lower
1945    /// 64-bits of `dst`.
1946    pub fn xmm_vmovlhps_rrm(&mut self, dst: WritableReg, src1: Reg, src2: &Address) {
1947        let src2 = Self::to_synthetic_amode(src2, MemFlags::trusted());
1948        let dst: WritableXmm = dst.map(|r| r.into());
1949        let inst = asm::inst::vmovhps_b::new(dst, src1, src2).into();
1950        self.emit(Inst::External { inst });
1951    }
1952
1953    /// Moves two 32-bit floats from the lower 64-bits of `src2` to the upper
1954    /// 64-bits of `dst`. Copies two 32-bit floats from the lower 64-bits of
1955    /// `src1` to lower 64-bits of `dst`.
1956    pub fn xmm_vmovlhps_rrr(&mut self, dst: WritableReg, src1: Reg, src2: Reg) {
1957        let dst: WritableXmm = dst.map(|r| r.into());
1958        let inst = asm::inst::vmovlhps_rvm::new(dst, src1, src2).into();
1959        self.emit(Inst::External { inst });
1960    }
1961
1962    /// Move unaligned packed integer values from address `src` to `dst`.
1963    pub fn xmm_vmovdqu_mr(&mut self, src: &Address, dst: WritableReg, flags: MemFlags) {
1964        let src = Self::to_synthetic_amode(src, flags);
1965        let dst: WritableXmm = dst.map(|r| r.into());
1966        let inst = asm::inst::vmovdqu_a::new(dst, src).into();
1967        self.emit(Inst::External { inst });
1968    }
1969
1970    /// Move integer from `src` to xmm register `dst` using an AVX instruction.
1971    pub fn avx_gpr_to_xmm(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
1972        let dst: WritableXmm = dst.map(|r| r.into());
1973        let inst = match size {
1974            OperandSize::S32 => asm::inst::vmovd_a::new(dst, src).into(),
1975            OperandSize::S64 => asm::inst::vmovq_a::new(dst, src).into(),
1976            _ => unreachable!(),
1977        };
1978
1979        self.emit(Inst::External { inst });
1980    }
1981
1982    pub fn xmm_vptest(&mut self, src1: Reg, src2: Reg) {
1983        let inst = asm::inst::vptest_rm::new(src1, src2).into();
1984        self.emit(Inst::External { inst });
1985    }
1986
1987    /// Converts vector of integers into vector of floating values.
1988    pub fn xmm_vcvt_rr(&mut self, src: Reg, dst: WritableReg, kind: VcvtKind) {
1989        let dst: WritableXmm = dst.map(|x| x.into());
1990        let inst = match kind {
1991            VcvtKind::I32ToF32 => asm::inst::vcvtdq2ps_a::new(dst, src).into(),
1992            VcvtKind::I32ToF64 => asm::inst::vcvtdq2pd_a::new(dst, src).into(),
1993            VcvtKind::F64ToF32 => asm::inst::vcvtpd2ps_a::new(dst, src).into(),
1994            VcvtKind::F64ToI32 => asm::inst::vcvttpd2dq_a::new(dst, src).into(),
1995            VcvtKind::F32ToF64 => asm::inst::vcvtps2pd_a::new(dst, src).into(),
1996            VcvtKind::F32ToI32 => asm::inst::vcvttps2dq_a::new(dst, src).into(),
1997        };
1998        self.emit(Inst::External { inst });
1999    }
2000
2001    /// Subtract floats in vector `src1` to floats in vector `src2`.
2002    pub fn xmm_vsubp_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {
2003        let dst: WritableXmm = dst.map(|r| r.into());
2004        let inst = match size {
2005            OperandSize::S32 => asm::inst::vsubps_b::new(dst, src1, src2).into(),
2006            OperandSize::S64 => asm::inst::vsubpd_b::new(dst, src1, src2).into(),
2007            _ => unimplemented!(),
2008        };
2009        self.emit(Inst::External { inst });
2010    }
2011
2012    /// Subtract integers in vector `src1` from integers in vector `src2`.
2013    pub fn xmm_vpsub_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {
2014        let dst: WritableXmm = dst.map(|r| r.into());
2015        let inst = match size {
2016            OperandSize::S8 => asm::inst::vpsubb_b::new(dst, src1, src2).into(),
2017            OperandSize::S16 => asm::inst::vpsubw_b::new(dst, src1, src2).into(),
2018            OperandSize::S32 => asm::inst::vpsubd_b::new(dst, src1, src2).into(),
2019            OperandSize::S64 => asm::inst::vpsubq_b::new(dst, src1, src2).into(),
2020            _ => unimplemented!(),
2021        };
2022        self.emit(Inst::External { inst });
2023    }
2024
2025    /// Subtract unsigned integers with unsigned saturation.
2026    pub fn xmm_vpsubus_rrr(&mut self, dst: WritableReg, src1: Reg, src2: Reg, size: OperandSize) {
2027        let dst: WritableXmm = dst.map(|r| r.into());
2028        let inst = match size {
2029            OperandSize::S8 => asm::inst::vpsubusb_b::new(dst, src1, src2).into(),
2030            OperandSize::S16 => asm::inst::vpsubusw_b::new(dst, src1, src2).into(),
2031            _ => unimplemented!(),
2032        };
2033        self.emit(Inst::External { inst });
2034    }
2035
2036    /// Subtract signed integers with signed saturation.
2037    pub fn xmm_vpsubs_rrr(&mut self, dst: WritableReg, src1: Reg, src2: Reg, size: OperandSize) {
2038        let dst: WritableXmm = dst.map(|r| r.into());
2039        let inst = match size {
2040            OperandSize::S8 => asm::inst::vpsubsb_b::new(dst, src1, src2).into(),
2041            OperandSize::S16 => asm::inst::vpsubsw_b::new(dst, src1, src2).into(),
2042            _ => unimplemented!(),
2043        };
2044        self.emit(Inst::External { inst });
2045    }
2046
2047    /// Add floats in vector `src1` to floats in vector `src2`.
2048    pub fn xmm_vaddp_rrm(
2049        &mut self,
2050        src1: Reg,
2051        src2: &Address,
2052        dst: WritableReg,
2053        size: OperandSize,
2054    ) {
2055        let dst: WritableXmm = dst.map(|r| r.into());
2056        let address = Self::to_synthetic_amode(src2, MemFlags::trusted());
2057        let inst = match size {
2058            OperandSize::S32 => asm::inst::vaddps_b::new(dst, src1, address).into(),
2059            OperandSize::S64 => asm::inst::vaddpd_b::new(dst, src1, address).into(),
2060            _ => unimplemented!(),
2061        };
2062        self.emit(Inst::External { inst });
2063    }
2064
2065    /// Add floats in vector `src1` to floats in vector `src2`.
2066    pub fn xmm_vaddp_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {
2067        let dst: WritableXmm = dst.map(|r| r.into());
2068        let inst = match size {
2069            OperandSize::S32 => asm::inst::vaddps_b::new(dst, src1, src2).into(),
2070            OperandSize::S64 => asm::inst::vaddpd_b::new(dst, src1, src2).into(),
2071            _ => unimplemented!(),
2072        };
2073        self.emit(Inst::External { inst });
2074    }
2075
2076    /// Compare vector register `lhs` with a vector of integers in `rhs` for
2077    /// equality between packed integers and write the resulting vector into
2078    /// `dst`.
2079    pub fn xmm_vpcmpeq_rrm(
2080        &mut self,
2081        dst: WritableReg,
2082        lhs: Reg,
2083        address: &Address,
2084        size: OperandSize,
2085    ) {
2086        let dst: WritableXmm = dst.map(|r| r.into());
2087        let address = Self::to_synthetic_amode(address, MemFlags::trusted());
2088        let inst = match size {
2089            OperandSize::S8 => asm::inst::vpcmpeqb_b::new(dst, lhs, address).into(),
2090            OperandSize::S16 => asm::inst::vpcmpeqw_b::new(dst, lhs, address).into(),
2091            OperandSize::S32 => asm::inst::vpcmpeqd_b::new(dst, lhs, address).into(),
2092            OperandSize::S64 => asm::inst::vpcmpeqq_b::new(dst, lhs, address).into(),
2093            _ => unimplemented!(),
2094        };
2095        self.emit(Inst::External { inst });
2096    }
2097
2098    /// Compare vector registers `lhs` and `rhs` for equality between packed
2099    /// integers and write the resulting vector into `dst`.
2100    pub fn xmm_vpcmpeq_rrr(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) {
2101        let dst: WritableXmm = dst.map(|r| r.into());
2102        let inst = match size {
2103            OperandSize::S8 => asm::inst::vpcmpeqb_b::new(dst, lhs, rhs).into(),
2104            OperandSize::S16 => asm::inst::vpcmpeqw_b::new(dst, lhs, rhs).into(),
2105            OperandSize::S32 => asm::inst::vpcmpeqd_b::new(dst, lhs, rhs).into(),
2106            OperandSize::S64 => asm::inst::vpcmpeqq_b::new(dst, lhs, rhs).into(),
2107            _ => unimplemented!(),
2108        };
2109        self.emit(Inst::External { inst });
2110    }
2111
2112    /// Performs a greater than comparison with vectors of signed integers in
2113    /// `lhs` and `rhs` and puts the results in `dst`.
2114    pub fn xmm_vpcmpgt_rrr(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) {
2115        let dst: WritableXmm = dst.map(|r| r.into());
2116        let inst = match size {
2117            OperandSize::S8 => asm::inst::vpcmpgtb_b::new(dst, lhs, rhs).into(),
2118            OperandSize::S16 => asm::inst::vpcmpgtw_b::new(dst, lhs, rhs).into(),
2119            OperandSize::S32 => asm::inst::vpcmpgtd_b::new(dst, lhs, rhs).into(),
2120            OperandSize::S64 => asm::inst::vpcmpgtq_b::new(dst, lhs, rhs).into(),
2121            _ => unimplemented!(),
2122        };
2123        self.emit(Inst::External { inst });
2124    }
2125
2126    /// Performs a max operation with vectors of signed integers in `lhs` and
2127    /// `rhs` and puts the results in `dst`.
2128    pub fn xmm_vpmaxs_rrr(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) {
2129        let dst: WritableXmm = dst.map(|r| r.into());
2130        let inst = match size {
2131            OperandSize::S8 => asm::inst::vpmaxsb_b::new(dst, lhs, rhs).into(),
2132            OperandSize::S16 => asm::inst::vpmaxsw_b::new(dst, lhs, rhs).into(),
2133            OperandSize::S32 => asm::inst::vpmaxsd_b::new(dst, lhs, rhs).into(),
2134            _ => unimplemented!(),
2135        };
2136        self.emit(Inst::External { inst });
2137    }
2138
2139    /// Performs a max operation with vectors of unsigned integers in `lhs` and
2140    /// `rhs` and puts the results in `dst`.
2141    pub fn xmm_vpmaxu_rrr(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) {
2142        let dst: WritableXmm = dst.map(|r| r.into());
2143        let inst = match size {
2144            OperandSize::S8 => asm::inst::vpmaxub_b::new(dst, lhs, rhs).into(),
2145            OperandSize::S16 => asm::inst::vpmaxuw_b::new(dst, lhs, rhs).into(),
2146            OperandSize::S32 => asm::inst::vpmaxud_b::new(dst, lhs, rhs).into(),
2147            _ => unimplemented!(),
2148        };
2149        self.emit(Inst::External { inst });
2150    }
2151
2152    /// Performs a min operation with vectors of signed integers in `lhs` and
2153    /// `rhs` and puts the results in `dst`.
2154    pub fn xmm_vpmins_rrr(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) {
2155        let dst: WritableXmm = dst.map(|r| r.into());
2156        let inst = match size {
2157            OperandSize::S8 => asm::inst::vpminsb_b::new(dst, lhs, rhs).into(),
2158            OperandSize::S16 => asm::inst::vpminsw_b::new(dst, lhs, rhs).into(),
2159            OperandSize::S32 => asm::inst::vpminsd_b::new(dst, lhs, rhs).into(),
2160            _ => unimplemented!(),
2161        };
2162        self.emit(Inst::External { inst });
2163    }
2164
2165    /// Performs a min operation with vectors of unsigned integers in `lhs` and
2166    /// `rhs` and puts the results in `dst`.
2167    pub fn xmm_vpminu_rrr(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) {
2168        let dst: WritableXmm = dst.map(|r| r.into());
2169        let inst = match size {
2170            OperandSize::S8 => asm::inst::vpminub_b::new(dst, lhs, rhs).into(),
2171            OperandSize::S16 => asm::inst::vpminuw_b::new(dst, lhs, rhs).into(),
2172            OperandSize::S32 => asm::inst::vpminud_b::new(dst, lhs, rhs).into(),
2173            _ => unimplemented!(),
2174        };
2175        self.emit(Inst::External { inst });
2176    }
2177
2178    /// Performs a comparison operation between vectors of floats in `lhs` and
2179    /// `rhs` and puts the results in `dst`.
2180    pub fn xmm_vcmpp_rrr(
2181        &mut self,
2182        dst: WritableReg,
2183        lhs: Reg,
2184        rhs: Reg,
2185        size: OperandSize,
2186        kind: VcmpKind,
2187    ) {
2188        let dst: WritableXmm = dst.map(|r| r.into());
2189        let imm = match kind {
2190            VcmpKind::Eq => 0,
2191            VcmpKind::Lt => 1,
2192            VcmpKind::Le => 2,
2193            VcmpKind::Unord => 3,
2194            VcmpKind::Ne => 4,
2195        };
2196        let inst = match size {
2197            OperandSize::S32 => asm::inst::vcmpps_b::new(dst, lhs, rhs, imm).into(),
2198            OperandSize::S64 => asm::inst::vcmppd_b::new(dst, lhs, rhs, imm).into(),
2199            _ => unimplemented!(),
2200        };
2201        self.emit(Inst::External { inst });
2202    }
2203
2204    /// Performs a subtraction on two vectors of floats and puts the results in
2205    /// `dst`.
2206    pub fn xmm_vsub_rrm(&mut self, src1: Reg, src2: &Address, dst: WritableReg, size: OperandSize) {
2207        let dst: WritableXmm = dst.map(|r| r.into());
2208        let address = Self::to_synthetic_amode(src2, MemFlags::trusted());
2209        let inst = match size {
2210            OperandSize::S64 => asm::inst::vsubpd_b::new(dst, src1, address).into(),
2211            _ => unimplemented!(),
2212        };
2213        self.emit(Inst::External { inst });
2214    }
2215
2216    /// Performs a subtraction on two vectors of floats and puts the results in
2217    /// `dst`.
2218    pub fn xmm_vsub_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {
2219        let dst: WritableXmm = dst.map(|r| r.into());
2220        let inst = match size {
2221            OperandSize::S32 => asm::inst::vsubps_b::new(dst, src1, src2).into(),
2222            OperandSize::S64 => asm::inst::vsubpd_b::new(dst, src1, src2).into(),
2223            _ => unimplemented!(),
2224        };
2225        self.emit(Inst::External { inst });
2226    }
2227
2228    /// Converts a vector of signed integers into a vector of narrower integers
2229    /// using saturation to handle overflow.
2230    pub fn xmm_vpackss_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {
2231        let dst: WritableXmm = dst.map(|r| r.into());
2232        let inst = match size {
2233            OperandSize::S8 => asm::inst::vpacksswb_b::new(dst, src1, src2).into(),
2234            OperandSize::S16 => asm::inst::vpackssdw_b::new(dst, src1, src2).into(),
2235            _ => unimplemented!(),
2236        };
2237        self.emit(Inst::External { inst });
2238    }
2239
2240    /// Converts a vector of unsigned integers into a vector of narrower
2241    /// integers using saturation to handle overflow.
2242    pub fn xmm_vpackus_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {
2243        let dst: WritableXmm = dst.map(|r| r.into());
2244        let inst = match size {
2245            OperandSize::S8 => asm::inst::vpackuswb_b::new(dst, src1, src2).into(),
2246            OperandSize::S16 => asm::inst::vpackusdw_b::new(dst, src1, src2).into(),
2247            _ => unimplemented!(),
2248        };
2249        self.emit(Inst::External { inst });
2250    }
2251
2252    /// Concatenates `src1` and `src2` and shifts right by `imm` and puts
2253    /// result in `dst`.
2254    pub fn xmm_vpalignr_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, imm: u8) {
2255        let dst: WritableXmm = dst.map(|r| r.into());
2256        let inst = asm::inst::vpalignr_b::new(dst, src1, src2, imm).into();
2257        self.emit(Inst::External { inst });
2258    }
2259
2260    /// Takes the lower lanes of vectors of floats in `src1` and `src2` and
2261    /// interleaves them in `dst`.
2262    pub fn xmm_vunpcklp_rrm(
2263        &mut self,
2264        src1: Reg,
2265        src2: &Address,
2266        dst: WritableReg,
2267        size: OperandSize,
2268    ) {
2269        let dst: WritableXmm = dst.map(|r| r.into());
2270        let address = Self::to_synthetic_amode(src2, MemFlags::trusted());
2271        let inst = match size {
2272            OperandSize::S32 => asm::inst::vunpcklps_b::new(dst, src1, address).into(),
2273            _ => unimplemented!(),
2274        };
2275        self.emit(Inst::External { inst });
2276    }
2277
2278    /// Unpacks and interleaves high order data of floats in `src1` and `src2`
2279    /// and puts the results in `dst`.
2280    pub fn xmm_vunpckhp_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {
2281        let dst: WritableXmm = dst.map(|r| r.into());
2282        let inst = match size {
2283            OperandSize::S32 => asm::inst::vunpckhps_b::new(dst, src1, src2).into(),
2284            _ => unimplemented!(),
2285        };
2286        self.emit(Inst::External { inst });
2287    }
2288
2289    /// Unpacks and interleaves the lower lanes of vectors of integers in `src1`
2290    /// and `src2` and puts the results in `dst`.
2291    pub fn xmm_vpunpckl_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {
2292        let dst: WritableXmm = dst.map(|r| r.into());
2293        let inst = match size {
2294            OperandSize::S8 => asm::inst::vpunpcklbw_b::new(dst, src1, src2).into(),
2295            OperandSize::S16 => asm::inst::vpunpcklwd_b::new(dst, src1, src2).into(),
2296            _ => unimplemented!(),
2297        };
2298        self.emit(Inst::External { inst });
2299    }
2300
2301    /// Unpacks and interleaves the higher lanes of vectors of integers in
2302    /// `src1` and `src2` and puts the results in `dst`.
2303    pub fn xmm_vpunpckh_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {
2304        let dst: WritableXmm = dst.map(|r| r.into());
2305        let inst = match size {
2306            OperandSize::S8 => asm::inst::vpunpckhbw_b::new(dst, src1, src2).into(),
2307            OperandSize::S16 => asm::inst::vpunpckhwd_b::new(dst, src1, src2).into(),
2308            _ => unimplemented!(),
2309        };
2310        self.emit(Inst::External { inst });
2311    }
2312
2313    pub(crate) fn vpmullq(&mut self, src1: Reg, src2: Reg, dst: WritableReg) {
2314        let dst: WritableXmm = dst.map(|r| r.into());
2315        let inst = asm::inst::vpmullq_c::new(dst, src1, src2).into();
2316        self.emit(Inst::External { inst });
2317    }
2318
2319    /// Creates a mask made up of the most significant bit of each byte of
2320    /// `src` and stores the result in `dst`.
2321    pub fn xmm_vpmovmsk_rr(
2322        &mut self,
2323        src: Reg,
2324        dst: WritableReg,
2325        src_size: OperandSize,
2326        dst_size: OperandSize,
2327    ) {
2328        assert_eq!(dst_size, OperandSize::S32);
2329        let dst: WritableGpr = dst.map(|r| r.into());
2330        let inst = match src_size {
2331            OperandSize::S8 => asm::inst::vpmovmskb_rm::new(dst, src).into(),
2332            _ => unimplemented!(),
2333        };
2334
2335        self.emit(Inst::External { inst });
2336    }
2337
2338    /// Creates a mask made up of the most significant bit of each byte of
2339    /// in `src` and stores the result in `dst`.
2340    pub fn xmm_vmovskp_rr(
2341        &mut self,
2342        src: Reg,
2343        dst: WritableReg,
2344        src_size: OperandSize,
2345        dst_size: OperandSize,
2346    ) {
2347        assert_eq!(dst_size, OperandSize::S32);
2348        let dst: WritableGpr = dst.map(|r| r.into());
2349        let inst = match src_size {
2350            OperandSize::S32 => asm::inst::vmovmskps_rm::new(dst, src).into(),
2351            OperandSize::S64 => asm::inst::vmovmskpd_rm::new(dst, src).into(),
2352            _ => unimplemented!(),
2353        };
2354
2355        self.emit(Inst::External { inst });
2356    }
2357
2358    /// Compute the absolute value of elements in vector `src` and put the
2359    /// results in `dst`.
2360    pub fn xmm_vpabs_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
2361        let dst: WritableXmm = dst.map(|r| r.into());
2362        let inst = match size {
2363            OperandSize::S8 => asm::inst::vpabsb_a::new(dst, src).into(),
2364            OperandSize::S16 => asm::inst::vpabsw_a::new(dst, src).into(),
2365            OperandSize::S32 => asm::inst::vpabsd_a::new(dst, src).into(),
2366            _ => unimplemented!(),
2367        };
2368        self.emit(Inst::External { inst });
2369    }
2370
2371    /// Arithmetically (sign preserving) right shift on vector in `src` by
2372    /// `amount` with result written to `dst`.
2373    pub fn xmm_vpsra_rrr(&mut self, src: Reg, amount: Reg, dst: WritableReg, size: OperandSize) {
2374        let dst: WritableXmm = dst.map(|r| r.into());
2375        let inst = match size {
2376            OperandSize::S16 => asm::inst::vpsraw_c::new(dst, src, amount).into(),
2377            OperandSize::S32 => asm::inst::vpsrad_c::new(dst, src, amount).into(),
2378            _ => unimplemented!(),
2379        };
2380        self.emit(Inst::External { inst });
2381    }
2382
2383    /// Arithmetically (sign preserving) right shift on vector in `src` by
2384    /// `imm` with result written to `dst`.
2385    pub fn xmm_vpsra_rri(&mut self, src: Reg, dst: WritableReg, imm: u32, size: OperandSize) {
2386        let dst: WritableXmm = dst.map(|r| r.into());
2387        let imm = u8::try_from(imm).expect("immediate must fit in 8 bits");
2388        let inst = match size {
2389            OperandSize::S32 => asm::inst::vpsrad_d::new(dst, src, imm).into(),
2390            _ => unimplemented!(),
2391        };
2392        self.emit(Inst::External { inst });
2393    }
2394
2395    /// Shift vector data left by `imm`.
2396    pub fn xmm_vpsll_rri(&mut self, src: Reg, dst: WritableReg, imm: u32, size: OperandSize) {
2397        let dst: WritableXmm = dst.map(|r| r.into());
2398        let imm = u8::try_from(imm).expect("immediate must fit in 8 bits");
2399        let inst = match size {
2400            OperandSize::S32 => asm::inst::vpslld_d::new(dst, src, imm).into(),
2401            OperandSize::S64 => asm::inst::vpsllq_d::new(dst, src, imm).into(),
2402            _ => unimplemented!(),
2403        };
2404        self.emit(Inst::External { inst });
2405    }
2406
2407    /// Shift vector data left by `amount`.
2408    pub fn xmm_vpsll_rrr(&mut self, src: Reg, amount: Reg, dst: WritableReg, size: OperandSize) {
2409        let dst: WritableXmm = dst.map(|r| r.into());
2410        let inst = match size {
2411            OperandSize::S16 => asm::inst::vpsllw_c::new(dst, src, amount).into(),
2412            OperandSize::S32 => asm::inst::vpslld_c::new(dst, src, amount).into(),
2413            OperandSize::S64 => asm::inst::vpsllq_c::new(dst, src, amount).into(),
2414            _ => unimplemented!(),
2415        };
2416        self.emit(Inst::External { inst });
2417    }
2418
2419    /// Shift vector data right by `imm`.
2420    pub fn xmm_vpsrl_rri(&mut self, src: Reg, dst: WritableReg, imm: u32, size: OperandSize) {
2421        let dst: WritableXmm = dst.map(|r| r.into());
2422        let imm = u8::try_from(imm).expect("immediate must fit in 8 bits");
2423        let inst = match size {
2424            OperandSize::S16 => asm::inst::vpsrlw_d::new(dst, src, imm).into(),
2425            OperandSize::S32 => asm::inst::vpsrld_d::new(dst, src, imm).into(),
2426            OperandSize::S64 => asm::inst::vpsrlq_d::new(dst, src, imm).into(),
2427            _ => unimplemented!(),
2428        };
2429        self.emit(Inst::External { inst });
2430    }
2431
2432    /// Shift vector data right by `amount`.
2433    pub fn xmm_vpsrl_rrr(&mut self, src: Reg, amount: Reg, dst: WritableReg, size: OperandSize) {
2434        let dst: WritableXmm = dst.map(|r| r.into());
2435        let inst = match size {
2436            OperandSize::S16 => asm::inst::vpsrlw_c::new(dst, src, amount).into(),
2437            OperandSize::S32 => asm::inst::vpsrld_c::new(dst, src, amount).into(),
2438            OperandSize::S64 => asm::inst::vpsrlq_c::new(dst, src, amount).into(),
2439            _ => unimplemented!(),
2440        };
2441        self.emit(Inst::External { inst });
2442    }
2443
2444    /// Perform an `and` operation on vectors of floats in `src1` and `src2`
2445    /// and put the results in `dst`.
2446    pub fn xmm_vandp_rrm(
2447        &mut self,
2448        src1: Reg,
2449        src2: &Address,
2450        dst: WritableReg,
2451        size: OperandSize,
2452    ) {
2453        let dst: WritableXmm = dst.map(|r| r.into());
2454        let address = Self::to_synthetic_amode(src2, MemFlags::trusted());
2455        let inst = match size {
2456            OperandSize::S32 => asm::inst::vandps_b::new(dst, src1, address).into(),
2457            OperandSize::S64 => asm::inst::vandpd_b::new(dst, src1, address).into(),
2458            _ => unimplemented!(),
2459        };
2460        self.emit(Inst::External { inst });
2461    }
2462
2463    /// Perform an `and` operation on vectors of floats in `src1` and `src2`
2464    /// and put the results in `dst`.
2465    pub fn xmm_vandp_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {
2466        let dst: WritableXmm = dst.map(|r| r.into());
2467        let inst = match size {
2468            OperandSize::S32 => asm::inst::vandps_b::new(dst, src1, src2).into(),
2469            OperandSize::S64 => asm::inst::vandpd_b::new(dst, src1, src2).into(),
2470            _ => unimplemented!(),
2471        };
2472        self.emit(Inst::External { inst });
2473    }
2474
2475    /// Performs a bitwise `and` operation on the vectors in `src1` and `src2`
2476    /// and stores the results in `dst`.
2477    pub fn xmm_vpand_rrm(&mut self, src1: Reg, src2: &Address, dst: WritableReg) {
2478        let dst: WritableXmm = dst.map(|r| r.into());
2479        let address = Self::to_synthetic_amode(&src2, MemFlags::trusted());
2480        let inst = asm::inst::vpand_b::new(dst, src1, address).into();
2481        self.emit(Inst::External { inst });
2482    }
2483
2484    /// Performs a bitwise `and` operation on the vectors in `src1` and `src2`
2485    /// and stores the results in `dst`.
2486    pub fn xmm_vpand_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg) {
2487        let dst: WritableXmm = dst.map(|r| r.into());
2488        let inst = asm::inst::vpand_b::new(dst, src1, src2).into();
2489        self.emit(Inst::External { inst });
2490    }
2491
2492    /// Perform an `and not` operation on vectors of floats in `src1` and
2493    /// `src2` and put the results in `dst`.
2494    pub fn xmm_vandnp_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {
2495        let dst: WritableXmm = dst.map(|r| r.into());
2496        let inst = match size {
2497            OperandSize::S32 => asm::inst::vandnps_b::new(dst, src1, src2).into(),
2498            OperandSize::S64 => asm::inst::vandnpd_b::new(dst, src1, src2).into(),
2499            _ => unimplemented!(),
2500        };
2501        self.emit(Inst::External { inst });
2502    }
2503
2504    /// Perform an `and not` operation on vectors in `src1` and `src2` and put
2505    /// the results in `dst`.
2506    pub fn xmm_vpandn_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg) {
2507        let dst: WritableXmm = dst.map(|r| r.into());
2508        let inst = asm::inst::vpandn_b::new(dst, src1, src2).into();
2509        self.emit(Inst::External { inst });
2510    }
2511
2512    /// Perform an or operation for the vectors of floats in `src1` and `src2`
2513    /// and put the results in `dst`.
2514    pub fn xmm_vorp_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {
2515        let dst: WritableXmm = dst.map(|r| r.into());
2516        let inst = match size {
2517            OperandSize::S32 => asm::inst::vorps_b::new(dst, src1, src2).into(),
2518            OperandSize::S64 => asm::inst::vorpd_b::new(dst, src1, src2).into(),
2519            _ => unimplemented!(),
2520        };
2521        self.emit(Inst::External { inst });
2522    }
2523
2524    /// Bitwise OR of `src1` and `src2`.
2525    pub fn xmm_vpor_rrr(&mut self, dst: WritableReg, src1: Reg, src2: Reg) {
2526        let dst: WritableXmm = dst.map(|r| r.into());
2527        let inst = asm::inst::vpor_b::new(dst, src1, src2).into();
2528        self.emit(Inst::External { inst });
2529    }
2530
2531    /// Bitwise logical xor of vectors of floats in `src1` and `src2` and puts
2532    /// the results in `dst`.
2533    pub fn xmm_vxorp_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {
2534        let dst: WritableXmm = dst.map(|r| r.into());
2535        let inst = match size {
2536            OperandSize::S32 => asm::inst::vxorps_b::new(dst, src1, src2).into(),
2537            OperandSize::S64 => asm::inst::vxorpd_b::new(dst, src1, src2).into(),
2538            _ => unimplemented!(),
2539        };
2540        self.emit(Inst::External { inst });
2541    }
2542
2543    /// Perform a logical on vector in `src` and in `address` and put the
2544    /// results in `dst`.
2545    pub fn xmm_vpxor_rmr(&mut self, src: Reg, address: &Address, dst: WritableReg) {
2546        let dst: WritableXmm = dst.map(|r| r.into());
2547        let address = Self::to_synthetic_amode(address, MemFlags::trusted());
2548        let inst = asm::inst::vpxor_b::new(dst, src, address).into();
2549        self.emit(Inst::External { inst });
2550    }
2551
2552    /// Perform a logical on vectors in `src1` and `src2` and put the results in
2553    /// `dst`.
2554    pub fn xmm_vpxor_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg) {
2555        let dst: WritableXmm = dst.map(|r| r.into());
2556        let inst = asm::inst::vpxor_b::new(dst, src1, src2).into();
2557        self.emit(Inst::External { inst });
2558    }
2559
2560    /// Perform a max operation across two vectors of floats and put the
2561    /// results in `dst`.
2562    pub fn xmm_vmaxp_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {
2563        let dst: WritableXmm = dst.map(|r| r.into());
2564        let inst = match size {
2565            OperandSize::S32 => asm::inst::vmaxps_b::new(dst, src1, src2).into(),
2566            OperandSize::S64 => asm::inst::vmaxpd_b::new(dst, src1, src2).into(),
2567            _ => unimplemented!(),
2568        };
2569        self.emit(Inst::External { inst });
2570    }
2571
2572    // Perform a min operation across two vectors of floats and put the
2573    // results in `dst`.
2574    pub fn xmm_vminp_rrm(
2575        &mut self,
2576        src1: Reg,
2577        src2: &Address,
2578        dst: WritableReg,
2579        size: OperandSize,
2580    ) {
2581        let dst: WritableXmm = dst.map(|r| r.into());
2582        let address = Self::to_synthetic_amode(src2, MemFlags::trusted());
2583        let inst = match size {
2584            OperandSize::S32 => asm::inst::vminps_b::new(dst, src1, address).into(),
2585            OperandSize::S64 => asm::inst::vminpd_b::new(dst, src1, address).into(),
2586            _ => unimplemented!(),
2587        };
2588        self.emit(Inst::External { inst });
2589    }
2590
2591    // Perform a min operation across two vectors of floats and put the
2592    // results in `dst`.
2593    pub fn xmm_vminp_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {
2594        let dst: WritableXmm = dst.map(|r| r.into());
2595        let inst = match size {
2596            OperandSize::S32 => asm::inst::vminps_b::new(dst, src1, src2).into(),
2597            OperandSize::S64 => asm::inst::vminpd_b::new(dst, src1, src2).into(),
2598            _ => unimplemented!(),
2599        };
2600        self.emit(Inst::External { inst });
2601    }
2602
2603    // Round a vector of floats.
2604    pub fn xmm_vroundp_rri(
2605        &mut self,
2606        src: Reg,
2607        dst: WritableReg,
2608        mode: VroundMode,
2609        size: OperandSize,
2610    ) {
2611        let dst: WritableXmm = dst.map(|r| r.into());
2612        let imm = match mode {
2613            VroundMode::TowardNearest => 0,
2614            VroundMode::TowardNegativeInfinity => 1,
2615            VroundMode::TowardPositiveInfinity => 2,
2616            VroundMode::TowardZero => 3,
2617        };
2618
2619        let inst = match size {
2620            OperandSize::S32 => asm::inst::vroundps_rmi::new(dst, src, imm).into(),
2621            OperandSize::S64 => asm::inst::vroundpd_rmi::new(dst, src, imm).into(),
2622            _ => unimplemented!(),
2623        };
2624
2625        self.emit(Inst::External { inst });
2626    }
2627
2628    /// Shuffle of vectors of floats.
2629    pub fn xmm_vshufp_rrri(
2630        &mut self,
2631        src1: Reg,
2632        src2: Reg,
2633        dst: WritableReg,
2634        imm: u8,
2635        size: OperandSize,
2636    ) {
2637        let dst: WritableXmm = dst.map(|r| r.into());
2638        let inst = match size {
2639            OperandSize::S32 => asm::inst::vshufps_b::new(dst, src1, src2, imm).into(),
2640            _ => unimplemented!(),
2641        };
2642        self.emit(Inst::External { inst });
2643    }
2644
2645    /// Each lane in `src1` is multiplied by the corresponding lane in `src2`
2646    /// producing intermediate 32-bit operands. Each intermediate 32-bit
2647    /// operand is truncated to 18 most significant bits. Rounding is performed
2648    /// by adding 1 to the least significant bit of the 18-bit intermediate
2649    /// result. The 16 bits immediately to the right of the most significant
2650    /// bit of each 18-bit intermediate result is placed in each lane of `dst`.
2651    pub fn xmm_vpmulhrs_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {
2652        let dst: WritableXmm = dst.map(|r| r.into());
2653        let inst = match size {
2654            OperandSize::S16 => asm::inst::vpmulhrsw_b::new(dst, src1, src2).into(),
2655            _ => unimplemented!(),
2656        };
2657        self.emit(Inst::External { inst });
2658    }
2659
2660    pub fn xmm_vpmuldq_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg) {
2661        let dst: WritableXmm = dst.map(|r| r.into());
2662        let inst = asm::inst::vpmuldq_b::new(dst, src1, src2).into();
2663        self.emit(Inst::External { inst });
2664    }
2665
2666    pub fn xmm_vpmuludq_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg) {
2667        let dst: WritableXmm = dst.map(|r| r.into());
2668        let inst = asm::inst::vpmuludq_b::new(dst, src1, src2).into();
2669        self.emit(Inst::External { inst });
2670    }
2671
2672    pub fn xmm_vpmull_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {
2673        let dst: WritableXmm = dst.map(|r| r.into());
2674        let inst = match size {
2675            OperandSize::S16 => asm::inst::vpmullw_b::new(dst, src1, src2).into(),
2676            OperandSize::S32 => asm::inst::vpmulld_b::new(dst, src1, src2).into(),
2677            _ => unimplemented!(),
2678        };
2679        self.emit(Inst::External { inst });
2680    }
2681
2682    pub fn xmm_vmulp_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {
2683        let dst: WritableXmm = dst.map(|r| r.into());
2684        let inst = match size {
2685            OperandSize::S32 => asm::inst::vmulps_b::new(dst, src1, src2).into(),
2686            OperandSize::S64 => asm::inst::vmulpd_b::new(dst, src1, src2).into(),
2687            _ => unimplemented!(),
2688        };
2689        self.emit(Inst::External { inst });
2690    }
2691
2692    /// Perform an average operation for the vector of unsigned integers in
2693    /// `src1` and `src2` and put the results in `dst`.
2694    pub fn xmm_vpavg_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {
2695        let dst: WritableXmm = dst.map(|r| r.into());
2696        let inst = match size {
2697            OperandSize::S8 => asm::inst::vpavgb_b::new(dst, src1, src2).into(),
2698            OperandSize::S16 => asm::inst::vpavgw_b::new(dst, src1, src2).into(),
2699            _ => unimplemented!(),
2700        };
2701        self.emit(Inst::External { inst });
2702    }
2703
2704    /// Divide the vector of floats in `src1` by the vector of floats in `src2`
2705    /// and put the results in `dst`.
2706    pub fn xmm_vdivp_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {
2707        let dst: WritableXmm = dst.map(|r| r.into());
2708        let inst = match size {
2709            OperandSize::S32 => asm::inst::vdivps_b::new(dst, src1, src2).into(),
2710            OperandSize::S64 => asm::inst::vdivpd_b::new(dst, src1, src2).into(),
2711            _ => unimplemented!(),
2712        };
2713        self.emit(Inst::External { inst });
2714    }
2715
2716    /// Compute square roots of vector of floats in `src` and put the results
2717    /// in `dst`.
2718    pub fn xmm_vsqrtp_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
2719        let dst: WritableXmm = dst.map(|r| r.into());
2720        let inst = match size {
2721            OperandSize::S32 => asm::inst::vsqrtps_b::new(dst, src).into(),
2722            OperandSize::S64 => asm::inst::vsqrtpd_b::new(dst, src).into(),
2723            _ => unimplemented!(),
2724        };
2725        self.emit(Inst::External { inst });
2726    }
2727
2728    /// Multiply and add packed signed and unsigned bytes.
2729    pub fn xmm_vpmaddubsw_rmr(&mut self, src: Reg, address: &Address, dst: WritableReg) {
2730        let dst: WritableXmm = dst.map(|r| r.into());
2731        let address = Self::to_synthetic_amode(address, MemFlags::trusted());
2732        let inst = asm::inst::vpmaddubsw_b::new(dst, src, address).into();
2733        self.emit(Inst::External { inst });
2734    }
2735
2736    /// Multiply and add packed signed and unsigned bytes.
2737    pub fn xmm_vpmaddubsw_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg) {
2738        let dst: WritableXmm = dst.map(|r| r.into());
2739        let inst = asm::inst::vpmaddubsw_b::new(dst, src1, src2).into();
2740        self.emit(Inst::External { inst });
2741    }
2742
2743    /// Multiple and add packed integers.
2744    pub fn xmm_vpmaddwd_rmr(&mut self, src: Reg, address: &Address, dst: WritableReg) {
2745        let dst: WritableXmm = dst.map(|r| r.into());
2746        let address = Self::to_synthetic_amode(address, MemFlags::trusted());
2747        let inst = asm::inst::vpmaddwd_b::new(dst, src, address).into();
2748        self.emit(Inst::External { inst });
2749    }
2750
2751    /// Multiple and add packed integers.
2752    pub fn xmm_vpmaddwd_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg) {
2753        let dst: WritableXmm = dst.map(|r| r.into());
2754        let inst = asm::inst::vpmaddwd_b::new(dst, src1, src2).into();
2755        self.emit(Inst::External { inst });
2756    }
2757}
2758
2759/// Captures the region in a MachBuffer where an add-with-immediate instruction would be emitted,
2760/// but the immediate is not yet known. Currently, this implementation expects a 32-bit immediate,
2761/// so 8 and 16 bit operand sizes are not supported.
2762pub(crate) struct PatchableAddToReg {
2763    /// The region to be patched in the [`MachBuffer`]. It must contain a valid add instruction
2764    /// sequence, accepting a 32-bit immediate.
2765    region: PatchRegion,
2766
2767    /// The offset into the patchable region where the patchable constant begins.
2768    constant_offset: usize,
2769}
2770
2771impl PatchableAddToReg {
2772    /// Create a new [`PatchableAddToReg`] by capturing a region in the output buffer where the
2773    /// add-with-immediate occurs. The [`MachBuffer`] will have and add-with-immediate instruction
2774    /// present in that region, though it will add `0` until the `::finalize` method is called.
2775    ///
2776    /// Currently this implementation expects to be able to patch a 32-bit immediate, which means
2777    /// that 8 and 16-bit addition cannot be supported.
2778    pub(crate) fn new(reg: Reg, size: OperandSize, asm: &mut Assembler) -> Self {
2779        let open = asm.buffer_mut().start_patchable();
2780        let start = asm.buffer().cur_offset();
2781
2782        // Emit the opcode and register use for the add instruction.
2783        let reg = pair_gpr(Writable::from_reg(reg));
2784        let inst = match size {
2785            OperandSize::S32 => asm::inst::addl_mi::new(reg, 0_u32).into(),
2786            OperandSize::S64 => asm::inst::addq_mi_sxl::new(reg, 0_i32).into(),
2787            _ => {
2788                panic!(
2789                    "{}-bit addition is not supported, please see the comment on PatchableAddToReg::new",
2790                    size.num_bits(),
2791                )
2792            }
2793        };
2794        asm.emit(Inst::External { inst });
2795
2796        // The offset to the constant is the width of what was just emitted
2797        // minus 4, the width of the 32-bit immediate.
2798        let constant_offset = usize::try_from(asm.buffer().cur_offset() - start - 4).unwrap();
2799
2800        let region = asm.buffer_mut().end_patchable(open);
2801
2802        Self {
2803            region,
2804            constant_offset,
2805        }
2806    }
2807
2808    /// Patch the [`MachBuffer`] with the known constant to be added to the register. The final
2809    /// value is passed in as an i32, but the instruction encoding is fixed when
2810    /// [`PatchableAddToReg::new`] is called.
2811    pub(crate) fn finalize(self, val: i32, buffer: &mut MachBuffer<Inst>) {
2812        let slice = self.region.patch(buffer);
2813        debug_assert_eq!(slice.len(), self.constant_offset + 4);
2814        slice[self.constant_offset..].copy_from_slice(val.to_le_bytes().as_slice());
2815    }
2816}