winch_codegen/isa/x64/
asm.rs

1//! Assembler library implementation for x64.
2
3use crate::{
4    isa::{reg::Reg, CallingConvention},
5    masm::{
6        DivKind, Extend, ExtendKind, ExtendType, IntCmpKind, MulWideKind, OperandSize, RemKind,
7        RoundingMode, ShiftKind, Signed, V128ExtendKind, V128LoadExtendKind, Zero,
8    },
9    reg::writable,
10    x64::regs::scratch,
11};
12use cranelift_codegen::{
13    ir::{
14        types, ConstantPool, ExternalName, LibCall, MemFlags, SourceLoc, TrapCode, Type,
15        UserExternalNameRef,
16    },
17    isa::{
18        unwind::UnwindInst,
19        x64::{
20            args::{
21                self, AluRmiROpcode, Amode, Avx512Opcode, AvxOpcode, CmpOpcode, DivSignedness,
22                ExtMode, FenceKind, FromWritableReg, Gpr, GprMem, GprMemImm, Imm8Gpr, Imm8Reg,
23                RegMem, RegMemImm, ShiftKind as CraneliftShiftKind, SseOpcode, SyntheticAmode,
24                WritableGpr, WritableXmm, Xmm, XmmMem, XmmMemAligned, XmmMemImm, CC,
25            },
26            encoding::rex::{encode_modrm, RexFlags},
27            settings as x64_settings, AtomicRmwSeqOp, EmitInfo, EmitState, Inst,
28        },
29    },
30    settings, CallInfo, Final, MachBuffer, MachBufferFinalized, MachInstEmit, MachInstEmitState,
31    MachLabel, PatchRegion, RelocDistance, VCodeConstantData, VCodeConstants, Writable,
32};
33
34use crate::reg::WritableReg;
35
36use super::address::Address;
37use smallvec::SmallVec;
38
39// Conversions between winch-codegen x64 types and cranelift-codegen x64 types.
40
41impl From<Reg> for RegMemImm {
42    fn from(reg: Reg) -> Self {
43        RegMemImm::reg(reg.into())
44    }
45}
46
47impl From<Reg> for RegMem {
48    fn from(value: Reg) -> Self {
49        RegMem::Reg { reg: value.into() }
50    }
51}
52
53impl From<Reg> for WritableGpr {
54    fn from(reg: Reg) -> Self {
55        let writable = Writable::from_reg(reg.into());
56        WritableGpr::from_writable_reg(writable).expect("valid writable gpr")
57    }
58}
59
60impl From<Reg> for WritableXmm {
61    fn from(reg: Reg) -> Self {
62        let writable = Writable::from_reg(reg.into());
63        WritableXmm::from_writable_reg(writable).expect("valid writable xmm")
64    }
65}
66
67impl From<Reg> for Gpr {
68    fn from(reg: Reg) -> Self {
69        Gpr::unwrap_new(reg.into())
70    }
71}
72
73impl From<Reg> for GprMem {
74    fn from(value: Reg) -> Self {
75        GprMem::unwrap_new(value.into())
76    }
77}
78
79impl From<Reg> for GprMemImm {
80    fn from(reg: Reg) -> Self {
81        GprMemImm::unwrap_new(reg.into())
82    }
83}
84
85impl From<Reg> for Imm8Gpr {
86    fn from(value: Reg) -> Self {
87        Imm8Gpr::unwrap_new(Imm8Reg::Reg { reg: value.into() })
88    }
89}
90
91impl From<Reg> for Xmm {
92    fn from(reg: Reg) -> Self {
93        Xmm::unwrap_new(reg.into())
94    }
95}
96
97impl From<Reg> for XmmMem {
98    fn from(value: Reg) -> Self {
99        XmmMem::unwrap_new(value.into())
100    }
101}
102
103impl From<Reg> for XmmMemImm {
104    fn from(value: Reg) -> Self {
105        XmmMemImm::unwrap_new(value.into())
106    }
107}
108
109impl From<OperandSize> for args::OperandSize {
110    fn from(size: OperandSize) -> Self {
111        match size {
112            OperandSize::S8 => Self::Size8,
113            OperandSize::S16 => Self::Size16,
114            OperandSize::S32 => Self::Size32,
115            OperandSize::S64 => Self::Size64,
116            s => panic!("Invalid operand size {s:?}"),
117        }
118    }
119}
120
121impl From<DivKind> for DivSignedness {
122    fn from(kind: DivKind) -> DivSignedness {
123        match kind {
124            DivKind::Signed => DivSignedness::Signed,
125            DivKind::Unsigned => DivSignedness::Unsigned,
126        }
127    }
128}
129
130impl From<IntCmpKind> for CC {
131    fn from(value: IntCmpKind) -> Self {
132        match value {
133            IntCmpKind::Eq => CC::Z,
134            IntCmpKind::Ne => CC::NZ,
135            IntCmpKind::LtS => CC::L,
136            IntCmpKind::LtU => CC::B,
137            IntCmpKind::GtS => CC::NLE,
138            IntCmpKind::GtU => CC::NBE,
139            IntCmpKind::LeS => CC::LE,
140            IntCmpKind::LeU => CC::BE,
141            IntCmpKind::GeS => CC::NL,
142            IntCmpKind::GeU => CC::NB,
143        }
144    }
145}
146
147impl From<ShiftKind> for CraneliftShiftKind {
148    fn from(value: ShiftKind) -> Self {
149        match value {
150            ShiftKind::Shl => CraneliftShiftKind::ShiftLeft,
151            ShiftKind::ShrS => CraneliftShiftKind::ShiftRightArithmetic,
152            ShiftKind::ShrU => CraneliftShiftKind::ShiftRightLogical,
153            ShiftKind::Rotl => CraneliftShiftKind::RotateLeft,
154            ShiftKind::Rotr => CraneliftShiftKind::RotateRight,
155        }
156    }
157}
158
159impl<T: ExtendType> From<Extend<T>> for ExtMode {
160    fn from(value: Extend<T>) -> Self {
161        match value {
162            Extend::I32Extend8 => ExtMode::BL,
163            Extend::I32Extend16 => ExtMode::WL,
164            Extend::I64Extend8 => ExtMode::BQ,
165            Extend::I64Extend16 => ExtMode::WQ,
166            Extend::I64Extend32 => ExtMode::LQ,
167            Extend::__Kind(_) => unreachable!(),
168        }
169    }
170}
171
172impl From<ExtendKind> for ExtMode {
173    fn from(value: ExtendKind) -> Self {
174        match value {
175            ExtendKind::Signed(s) => s.into(),
176            ExtendKind::Unsigned(u) => u.into(),
177        }
178    }
179}
180
181/// Kinds of extends supported by `vpmov`.
182pub(super) enum VpmovKind {
183    /// Sign extends 8 lanes of 8-bit integers to 8 lanes of 16-bit integers.
184    E8x8S,
185    /// Zero extends 8 lanes of 8-bit integers to 8 lanes of 16-bit integers.
186    E8x8U,
187    /// Sign extends 4 lanes of 16-bit integers to 4 lanes of 32-bit integers.
188    E16x4S,
189    /// Zero extends 4 lanes of 16-bit integers to 4 lanes of 32-bit integers.
190    E16x4U,
191    /// Sign extends 2 lanes of 32-bit integers to 2 lanes of 64-bit integers.
192    E32x2S,
193    /// Zero extends 2 lanes of 32-bit integers to 2 lanes of 64-bit integers.
194    E32x2U,
195}
196
197impl From<VpmovKind> for AvxOpcode {
198    fn from(value: VpmovKind) -> Self {
199        match value {
200            VpmovKind::E8x8S => AvxOpcode::Vpmovsxbw,
201            VpmovKind::E8x8U => AvxOpcode::Vpmovzxbw,
202            VpmovKind::E16x4S => AvxOpcode::Vpmovsxwd,
203            VpmovKind::E16x4U => AvxOpcode::Vpmovzxwd,
204            VpmovKind::E32x2S => AvxOpcode::Vpmovsxdq,
205            VpmovKind::E32x2U => AvxOpcode::Vpmovzxdq,
206        }
207    }
208}
209
210impl From<V128LoadExtendKind> for VpmovKind {
211    fn from(value: V128LoadExtendKind) -> Self {
212        match value {
213            V128LoadExtendKind::E8x8S => Self::E8x8S,
214            V128LoadExtendKind::E8x8U => Self::E8x8U,
215            V128LoadExtendKind::E16x4S => Self::E16x4S,
216            V128LoadExtendKind::E16x4U => Self::E16x4U,
217            V128LoadExtendKind::E32x2S => Self::E32x2S,
218            V128LoadExtendKind::E32x2U => Self::E32x2U,
219        }
220    }
221}
222
223impl From<V128ExtendKind> for VpmovKind {
224    fn from(value: V128ExtendKind) -> Self {
225        match value {
226            V128ExtendKind::LowI8x16S | V128ExtendKind::HighI8x16S => Self::E8x8S,
227            V128ExtendKind::LowI8x16U => Self::E8x8U,
228            V128ExtendKind::LowI16x8S | V128ExtendKind::HighI16x8S => Self::E16x4S,
229            V128ExtendKind::LowI16x8U => Self::E16x4U,
230            V128ExtendKind::LowI32x4S | V128ExtendKind::HighI32x4S => Self::E32x2S,
231            V128ExtendKind::LowI32x4U => Self::E32x2U,
232            _ => unimplemented!(),
233        }
234    }
235}
236
237/// Kinds of comparisons supported by `vcmp`.
238pub(super) enum VcmpKind {
239    /// Equal comparison.
240    Eq,
241    /// Not equal comparison.
242    Ne,
243    /// Less than comparison.
244    Lt,
245    /// Less than or equal comparison.
246    Le,
247    /// Unordered comparison. Sets result to all 1s if either source operand is
248    /// NaN.
249    Unord,
250}
251
252/// Kinds of conversions supported by `vcvt`.
253pub(super) enum VcvtKind {
254    /// Converts 32-bit integers to 32-bit floats.
255    I32ToF32,
256    /// Converts doubleword integers to double precision floats.
257    I32ToF64,
258    /// Converts double precision floats to single precision floats.
259    F64ToF32,
260    // Converts double precision floats to 32-bit integers.
261    F64ToI32,
262    /// Converts single precision floats to double precision floats.
263    F32ToF64,
264    /// Converts single precision floats to 32-bit integers.
265    F32ToI32,
266}
267
268/// Modes supported by `vround`.
269pub(crate) enum VroundMode {
270    /// Rounds toward nearest (ties to even).
271    TowardNearest,
272    /// Rounds toward negative infinity.
273    TowardNegativeInfinity,
274    /// Rounds toward positive infinity.
275    TowardPositiveInfinity,
276    /// Rounds toward zero.
277    TowardZero,
278}
279
280/// Low level assembler implementation for x64.
281pub(crate) struct Assembler {
282    /// The machine instruction buffer.
283    buffer: MachBuffer<Inst>,
284    /// Constant emission information.
285    emit_info: EmitInfo,
286    /// Emission state.
287    emit_state: EmitState,
288    /// x64 flags.
289    isa_flags: x64_settings::Flags,
290    /// Constant pool.
291    pool: ConstantPool,
292    /// Constants that will be emitted separately by the MachBuffer.
293    constants: VCodeConstants,
294}
295
296impl Assembler {
297    /// Create a new x64 assembler.
298    pub fn new(shared_flags: settings::Flags, isa_flags: x64_settings::Flags) -> Self {
299        Self {
300            buffer: MachBuffer::<Inst>::new(),
301            emit_state: Default::default(),
302            emit_info: EmitInfo::new(shared_flags, isa_flags.clone()),
303            constants: Default::default(),
304            pool: ConstantPool::new(),
305            isa_flags,
306        }
307    }
308
309    /// Get a mutable reference to underlying
310    /// machine buffer.
311    pub fn buffer_mut(&mut self) -> &mut MachBuffer<Inst> {
312        &mut self.buffer
313    }
314
315    /// Get a reference to the underlying machine buffer.
316    pub fn buffer(&self) -> &MachBuffer<Inst> {
317        &self.buffer
318    }
319
320    /// Adds a constant to the constant pool and returns its address.
321    pub fn add_constant(&mut self, constant: &[u8]) -> Address {
322        let handle = self.pool.insert(constant.into());
323        Address::constant(handle)
324    }
325
326    /// Return the emitted code.
327    pub fn finalize(mut self, loc: Option<SourceLoc>) -> MachBufferFinalized<Final> {
328        let stencil = self
329            .buffer
330            .finish(&self.constants, self.emit_state.ctrl_plane_mut());
331        stencil.apply_base_srcloc(loc.unwrap_or_default())
332    }
333
334    fn emit(&mut self, inst: Inst) {
335        inst.emit(&mut self.buffer, &self.emit_info, &mut self.emit_state);
336    }
337
338    fn to_synthetic_amode(
339        addr: &Address,
340        pool: &mut ConstantPool,
341        constants: &mut VCodeConstants,
342        buffer: &mut MachBuffer<Inst>,
343        memflags: MemFlags,
344    ) -> SyntheticAmode {
345        match *addr {
346            Address::Offset { base, offset } => {
347                let amode = Amode::imm_reg(offset as i32, base.into()).with_flags(memflags);
348                SyntheticAmode::real(amode)
349            }
350            Address::Const(c) => {
351                // Defer the creation of the
352                // `SyntheticAmode::ConstantOffset` addressing mode
353                // until the address is referenced by an actual
354                // instruction.
355                let constant_data = pool.get(c);
356                let data = VCodeConstantData::Pool(c, constant_data.clone());
357                // If the constant data is not marked as used, it will be
358                // inserted, therefore, it needs to be registered.
359                let needs_registration = !constants.pool_uses(&data);
360                let constant = constants.insert(VCodeConstantData::Pool(c, constant_data.clone()));
361
362                if needs_registration {
363                    buffer.register_constant(&constant, &data);
364                }
365                SyntheticAmode::ConstantOffset(constant)
366            }
367            Address::ImmRegRegShift {
368                simm32,
369                base,
370                index,
371                shift,
372            } => SyntheticAmode::Real(Amode::ImmRegRegShift {
373                simm32,
374                base: base.into(),
375                index: index.into(),
376                shift,
377                flags: memflags,
378            }),
379        }
380    }
381
382    /// Emit an unwind instruction.
383    pub fn unwind_inst(&mut self, inst: UnwindInst) {
384        self.emit(Inst::Unwind { inst })
385    }
386
387    /// Push register.
388    pub fn push_r(&mut self, reg: Reg) {
389        self.emit(Inst::Push64 { src: reg.into() });
390    }
391
392    /// Pop to register.
393    pub fn pop_r(&mut self, dst: WritableReg) {
394        let writable = dst.map(Into::into);
395        let dst = WritableGpr::from_writable_reg(writable).expect("valid writable gpr");
396        self.emit(Inst::Pop64 { dst });
397    }
398
399    /// Return instruction.
400    pub fn ret(&mut self) {
401        self.emit(Inst::Ret {
402            stack_bytes_to_pop: 0,
403        });
404    }
405
406    /// Register-to-register move.
407    pub fn mov_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
408        self.emit(Inst::MovRR {
409            src: src.into(),
410            dst: dst.map(Into::into),
411            size: size.into(),
412        });
413    }
414
415    /// Register-to-memory move.
416    pub fn mov_rm(&mut self, src: Reg, addr: &Address, size: OperandSize, flags: MemFlags) {
417        assert!(addr.is_offset());
418        let dst = Self::to_synthetic_amode(
419            addr,
420            &mut self.pool,
421            &mut self.constants,
422            &mut self.buffer,
423            flags,
424        );
425        self.emit(Inst::MovRM {
426            size: size.into(),
427            src: src.into(),
428            dst,
429        });
430    }
431
432    /// Immediate-to-memory move.
433    pub fn mov_im(&mut self, src: i32, addr: &Address, size: OperandSize, flags: MemFlags) {
434        assert!(addr.is_offset());
435        let dst = Self::to_synthetic_amode(
436            addr,
437            &mut self.pool,
438            &mut self.constants,
439            &mut self.buffer,
440            flags,
441        );
442        self.emit(Inst::MovImmM {
443            size: size.into(),
444            simm32: src,
445            dst,
446        });
447    }
448
449    /// Immediate-to-register move.
450    pub fn mov_ir(&mut self, imm: u64, dst: WritableReg, size: OperandSize) {
451        self.emit(Inst::Imm {
452            dst_size: size.into(),
453            simm64: imm,
454            dst: dst.map(Into::into),
455        });
456    }
457
458    /// Zero-extend memory-to-register load.
459    pub fn movzx_mr(
460        &mut self,
461        addr: &Address,
462        dst: WritableReg,
463        ext: Option<Extend<Zero>>,
464        memflags: MemFlags,
465    ) {
466        let src = Self::to_synthetic_amode(
467            addr,
468            &mut self.pool,
469            &mut self.constants,
470            &mut self.buffer,
471            memflags,
472        );
473
474        if let Some(ext) = ext {
475            let reg_mem = RegMem::mem(src);
476            self.emit(Inst::MovzxRmR {
477                ext_mode: ext.into(),
478                src: GprMem::unwrap_new(reg_mem),
479                dst: dst.map(Into::into),
480            });
481        } else {
482            self.emit(Inst::Mov64MR {
483                src,
484                dst: dst.map(Into::into),
485            });
486        }
487    }
488
489    // Sign-extend memory-to-register load.
490    pub fn movsx_mr(
491        &mut self,
492        addr: &Address,
493        dst: WritableReg,
494        ext: Extend<Signed>,
495        memflags: MemFlags,
496    ) {
497        let src = Self::to_synthetic_amode(
498            addr,
499            &mut self.pool,
500            &mut self.constants,
501            &mut self.buffer,
502            memflags,
503        );
504
505        let reg_mem = RegMem::mem(src);
506        self.emit(Inst::MovsxRmR {
507            ext_mode: ext.into(),
508            src: GprMem::unwrap_new(reg_mem),
509            dst: dst.map(Into::into),
510        })
511    }
512
513    /// Register-to-register move with zero extension.
514    pub fn movzx_rr(&mut self, src: Reg, dst: WritableReg, kind: Extend<Zero>) {
515        self.emit(Inst::MovzxRmR {
516            ext_mode: kind.into(),
517            src: src.into(),
518            dst: dst.map(Into::into),
519        })
520    }
521
522    /// Register-to-register move with sign extension.
523    pub fn movsx_rr(&mut self, src: Reg, dst: WritableReg, kind: Extend<Signed>) {
524        self.emit(Inst::MovsxRmR {
525            ext_mode: kind.into(),
526            src: src.into(),
527            dst: dst.map(Into::into),
528        });
529    }
530
531    /// Integer register conditional move.
532    pub fn cmov(&mut self, src: Reg, dst: WritableReg, cc: IntCmpKind, size: OperandSize) {
533        let dst: WritableGpr = dst.map(Into::into);
534        self.emit(Inst::Cmove {
535            size: size.into(),
536            cc: cc.into(),
537            consequent: src.into(),
538            alternative: dst.to_reg().into(),
539            dst,
540        })
541    }
542
543    /// Single and double precision floating point
544    /// register-to-register move.
545    pub fn xmm_mov_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
546        use OperandSize::*;
547
548        let op = match size {
549            S32 => SseOpcode::Movaps,
550            S64 => SseOpcode::Movapd,
551            S128 => SseOpcode::Movdqa,
552            S8 | S16 => unreachable!(),
553        };
554
555        self.emit(Inst::XmmUnaryRmRUnaligned {
556            op,
557            src: XmmMem::unwrap_new(src.into()),
558            dst: dst.map(Into::into),
559        });
560    }
561
562    /// Single and double precision floating point load.
563    pub fn xmm_mov_mr(
564        &mut self,
565        src: &Address,
566        dst: WritableReg,
567        size: OperandSize,
568        flags: MemFlags,
569    ) {
570        use OperandSize::*;
571
572        assert!(dst.to_reg().is_float());
573        let op = match size {
574            S32 => SseOpcode::Movss,
575            S64 => SseOpcode::Movsd,
576            S128 => SseOpcode::Movdqu,
577            S16 | S8 => unreachable!(),
578        };
579
580        let src = Self::to_synthetic_amode(
581            src,
582            &mut self.pool,
583            &mut self.constants,
584            &mut self.buffer,
585            flags,
586        );
587        self.emit(Inst::XmmUnaryRmRUnaligned {
588            op,
589            src: XmmMem::unwrap_new(RegMem::mem(src)),
590            dst: dst.map(Into::into),
591        });
592    }
593
594    /// Vector load and extend.
595    pub fn xmm_vpmov_mr(
596        &mut self,
597        src: &Address,
598        dst: WritableReg,
599        kind: VpmovKind,
600        flags: MemFlags,
601    ) {
602        assert!(dst.to_reg().is_float());
603
604        let src = Self::to_synthetic_amode(
605            src,
606            &mut self.pool,
607            &mut self.constants,
608            &mut self.buffer,
609            flags,
610        );
611
612        self.emit(Inst::XmmUnaryRmRVex {
613            op: kind.into(),
614            src: XmmMem::unwrap_new(RegMem::mem(src)),
615            dst: dst.to_reg().into(),
616        });
617    }
618
619    /// Extends vector of integers in `src` and puts results in `dst`.
620    pub fn xmm_vpmov_rr(&mut self, src: Reg, dst: WritableReg, kind: VpmovKind) {
621        self.emit(Inst::XmmUnaryRmRVex {
622            op: kind.into(),
623            src: src.into(),
624            dst: dst.to_reg().into(),
625        });
626    }
627
628    /// Vector load and broadcast.
629    pub fn xmm_vpbroadcast_mr(
630        &mut self,
631        src: &Address,
632        dst: WritableReg,
633        size: OperandSize,
634        flags: MemFlags,
635    ) {
636        assert!(dst.to_reg().is_float());
637
638        let src = Self::to_synthetic_amode(
639            src,
640            &mut self.pool,
641            &mut self.constants,
642            &mut self.buffer,
643            flags,
644        );
645
646        let op = match size {
647            OperandSize::S8 => AvxOpcode::Vpbroadcastb,
648            OperandSize::S16 => AvxOpcode::Vpbroadcastw,
649            OperandSize::S32 => AvxOpcode::Vpbroadcastd,
650            _ => unimplemented!(),
651        };
652
653        self.emit(Inst::XmmUnaryRmRVex {
654            op,
655            src: XmmMem::unwrap_new(RegMem::mem(src)),
656            dst: dst.to_reg().into(),
657        });
658    }
659
660    /// Value in `src` is broadcast into lanes of `size` in `dst`.
661    pub fn xmm_vpbroadcast_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
662        assert!(src.is_float() && dst.to_reg().is_float());
663
664        let op = match size {
665            OperandSize::S8 => AvxOpcode::Vpbroadcastb,
666            OperandSize::S16 => AvxOpcode::Vpbroadcastw,
667            OperandSize::S32 => AvxOpcode::Vpbroadcastd,
668            _ => unimplemented!(),
669        };
670
671        self.emit(Inst::XmmUnaryRmRVex {
672            op,
673            src: XmmMem::unwrap_new(src.into()),
674            dst: dst.to_reg().into(),
675        });
676    }
677
678    /// Memory to register shuffle of bytes in vector.
679    pub fn xmm_vpshuf_mr(
680        &mut self,
681        src: &Address,
682        dst: WritableReg,
683        mask: u8,
684        size: OperandSize,
685        flags: MemFlags,
686    ) {
687        assert!(dst.to_reg().is_float());
688
689        let op = match size {
690            OperandSize::S32 => AvxOpcode::Vpshufd,
691            _ => unimplemented!(),
692        };
693
694        let src = Self::to_synthetic_amode(
695            src,
696            &mut self.pool,
697            &mut self.constants,
698            &mut self.buffer,
699            flags,
700        );
701        self.emit(Inst::XmmUnaryRmRImmVex {
702            op,
703            src: XmmMem::unwrap_new(RegMem::Mem { addr: src }),
704            dst: dst.to_reg().into(),
705            imm: mask,
706        });
707    }
708
709    /// Register to register shuffle of bytes in vector.
710    pub fn xmm_vpshuf_rr(&mut self, src: Reg, dst: WritableReg, mask: u8, size: OperandSize) {
711        assert!(src.is_float() && dst.to_reg().is_float());
712
713        let op = match size {
714            OperandSize::S16 => AvxOpcode::Vpshuflw,
715            OperandSize::S32 => AvxOpcode::Vpshufd,
716            _ => unimplemented!(),
717        };
718
719        self.emit(Inst::XmmUnaryRmRImmVex {
720            op,
721            src: XmmMem::from(Xmm::from(src)),
722            imm: mask,
723            dst: dst.to_reg().into(),
724        });
725    }
726
727    /// Single and double precision floating point store.
728    pub fn xmm_mov_rm(&mut self, src: Reg, dst: &Address, size: OperandSize, flags: MemFlags) {
729        use OperandSize::*;
730
731        assert!(src.is_float());
732
733        let op = match size {
734            S32 => SseOpcode::Movss,
735            S64 => SseOpcode::Movsd,
736            S128 => SseOpcode::Movdqu,
737            S16 | S8 => unreachable!(),
738        };
739
740        let dst = Self::to_synthetic_amode(
741            dst,
742            &mut self.pool,
743            &mut self.constants,
744            &mut self.buffer,
745            flags,
746        );
747        self.emit(Inst::XmmMovRM {
748            op,
749            src: src.into(),
750            dst,
751        });
752    }
753
754    /// Floating point register conditional move.
755    pub fn xmm_cmov(&mut self, src: Reg, dst: WritableReg, cc: IntCmpKind, size: OperandSize) {
756        let dst: WritableXmm = dst.map(Into::into);
757        let ty = match size {
758            OperandSize::S32 => types::F32,
759            OperandSize::S64 => types::F64,
760            // Move the entire 128 bits via movdqa.
761            OperandSize::S128 => types::I32X4,
762            OperandSize::S8 | OperandSize::S16 => unreachable!(),
763        };
764
765        self.emit(Inst::XmmCmove {
766            ty,
767            cc: cc.into(),
768            consequent: Xmm::unwrap_new(src.into()),
769            alternative: dst.to_reg().into(),
770            dst,
771        })
772    }
773
774    /// Subtract register and register
775    pub fn sub_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
776        self.emit(Inst::AluRmiR {
777            size: size.into(),
778            op: AluRmiROpcode::Sub,
779            src1: dst.to_reg().into(),
780            src2: src.into(),
781            dst: dst.map(Into::into),
782        });
783    }
784
785    /// Subtact immediate register.
786    pub fn sub_ir(&mut self, imm: i32, dst: WritableReg, size: OperandSize) {
787        let imm = RegMemImm::imm(imm as u32);
788
789        self.emit(Inst::AluRmiR {
790            size: size.into(),
791            op: AluRmiROpcode::Sub,
792            src1: dst.to_reg().into(),
793            src2: GprMemImm::unwrap_new(imm),
794            dst: dst.map(Into::into),
795        });
796    }
797
798    /// "and" two registers.
799    pub fn and_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
800        self.emit(Inst::AluRmiR {
801            size: size.into(),
802            op: AluRmiROpcode::And,
803            src1: dst.to_reg().into(),
804            src2: src.into(),
805            dst: dst.map(Into::into),
806        });
807    }
808
809    pub fn and_ir(&mut self, imm: i32, dst: WritableReg, size: OperandSize) {
810        let imm = RegMemImm::imm(imm as u32);
811        self.emit(Inst::AluRmiR {
812            size: size.into(),
813            op: AluRmiROpcode::And,
814            src1: dst.to_reg().into(),
815            src2: GprMemImm::unwrap_new(imm),
816            dst: dst.map(Into::into),
817        });
818    }
819
820    /// "and" two float registers.
821    pub fn xmm_and_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
822        let op = match size {
823            OperandSize::S32 => SseOpcode::Andps,
824            OperandSize::S64 => SseOpcode::Andpd,
825            OperandSize::S8 | OperandSize::S16 | OperandSize::S128 => unreachable!(),
826        };
827
828        self.emit(Inst::XmmRmR {
829            op,
830            src1: dst.to_reg().into(),
831            src2: XmmMemAligned::from(Xmm::from(src)),
832            dst: dst.map(Into::into),
833        });
834    }
835
836    /// "and not" two float registers.
837    pub fn xmm_andn_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
838        let op = match size {
839            OperandSize::S32 => SseOpcode::Andnps,
840            OperandSize::S64 => SseOpcode::Andnpd,
841            OperandSize::S8 | OperandSize::S16 | OperandSize::S128 => unreachable!(),
842        };
843
844        self.emit(Inst::XmmRmR {
845            op,
846            src1: dst.to_reg().into(),
847            src2: Xmm::from(src).into(),
848            dst: dst.map(Into::into),
849        });
850    }
851
852    pub fn gpr_to_xmm(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
853        let op = match size {
854            OperandSize::S32 => SseOpcode::Movd,
855            OperandSize::S64 => SseOpcode::Movq,
856            OperandSize::S8 | OperandSize::S16 | OperandSize::S128 => unreachable!(),
857        };
858
859        self.emit(Inst::GprToXmm {
860            op,
861            src: src.into(),
862            dst: dst.map(Into::into),
863            src_size: size.into(),
864        })
865    }
866
867    pub fn xmm_to_gpr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
868        let op = match size {
869            OperandSize::S32 => SseOpcode::Movd,
870            OperandSize::S64 => SseOpcode::Movq,
871            OperandSize::S8 | OperandSize::S16 | OperandSize::S128 => unreachable!(),
872        };
873
874        self.emit(Inst::XmmToGpr {
875            op,
876            src: src.into(),
877            dst: dst.map(Into::into),
878            dst_size: size.into(),
879        });
880    }
881
882    /// Convert float to signed int.
883    pub fn cvt_float_to_sint_seq(
884        &mut self,
885        src: Reg,
886        dst: WritableReg,
887        tmp_gpr: Reg,
888        tmp_xmm: Reg,
889        src_size: OperandSize,
890        dst_size: OperandSize,
891        saturating: bool,
892    ) {
893        self.emit(Inst::CvtFloatToSintSeq {
894            dst_size: dst_size.into(),
895            src_size: src_size.into(),
896            is_saturating: saturating,
897            src: src.into(),
898            dst: dst.map(Into::into),
899            tmp_gpr: tmp_gpr.into(),
900            tmp_xmm: tmp_xmm.into(),
901        });
902    }
903
904    /// Convert float to unsigned int.
905    pub fn cvt_float_to_uint_seq(
906        &mut self,
907        src: Reg,
908        dst: WritableReg,
909        tmp_gpr: Reg,
910        tmp_xmm: Reg,
911        tmp_xmm2: Reg,
912        src_size: OperandSize,
913        dst_size: OperandSize,
914        saturating: bool,
915    ) {
916        self.emit(Inst::CvtFloatToUintSeq {
917            dst_size: dst_size.into(),
918            src_size: src_size.into(),
919            is_saturating: saturating,
920            src: src.into(),
921            dst: dst.map(Into::into),
922            tmp_gpr: tmp_gpr.into(),
923            tmp_xmm: tmp_xmm.into(),
924            tmp_xmm2: tmp_xmm2.into(),
925        });
926    }
927
928    /// Convert signed int to float.
929    pub fn cvt_sint_to_float(
930        &mut self,
931        src: Reg,
932        dst: WritableReg,
933        src_size: OperandSize,
934        dst_size: OperandSize,
935    ) {
936        let op = match dst_size {
937            OperandSize::S32 => SseOpcode::Cvtsi2ss,
938            OperandSize::S64 => SseOpcode::Cvtsi2sd,
939            OperandSize::S16 | OperandSize::S8 | OperandSize::S128 => unreachable!(),
940        };
941        self.emit(Inst::CvtIntToFloat {
942            op,
943            src1: dst.to_reg().into(),
944            src2: src.into(),
945            dst: dst.map(Into::into),
946            src2_size: src_size.into(),
947        });
948    }
949
950    /// Convert unsigned 64-bit int to float.
951    pub fn cvt_uint64_to_float_seq(
952        &mut self,
953        src: Reg,
954        dst: WritableReg,
955        tmp_gpr1: Reg,
956        tmp_gpr2: Reg,
957        dst_size: OperandSize,
958    ) {
959        self.emit(Inst::CvtUint64ToFloatSeq {
960            dst_size: dst_size.into(),
961            src: src.into(),
962            dst: dst.map(Into::into),
963            tmp_gpr1: tmp_gpr1.into(),
964            tmp_gpr2: tmp_gpr2.into(),
965        });
966    }
967
968    /// Change precision of float.
969    pub fn cvt_float_to_float(
970        &mut self,
971        src: Reg,
972        dst: WritableReg,
973        src_size: OperandSize,
974        dst_size: OperandSize,
975    ) {
976        let op = match (src_size, dst_size) {
977            (OperandSize::S32, OperandSize::S64) => SseOpcode::Cvtss2sd,
978            (OperandSize::S64, OperandSize::S32) => SseOpcode::Cvtsd2ss,
979            _ => unimplemented!(),
980        };
981
982        self.emit(Inst::XmmRmRUnaligned {
983            op,
984            src2: Xmm::unwrap_new(src.into()).into(),
985            src1: dst.to_reg().into(),
986            dst: dst.map(Into::into),
987        });
988    }
989
990    pub fn or_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
991        self.emit(Inst::AluRmiR {
992            size: size.into(),
993            op: AluRmiROpcode::Or,
994            src1: dst.to_reg().into(),
995            src2: src.into(),
996            dst: dst.map(Into::into),
997        });
998    }
999
1000    pub fn or_ir(&mut self, imm: i32, dst: WritableReg, size: OperandSize) {
1001        let imm = RegMemImm::imm(imm as u32);
1002
1003        self.emit(Inst::AluRmiR {
1004            size: size.into(),
1005            op: AluRmiROpcode::Or,
1006            src1: dst.to_reg().into(),
1007            src2: GprMemImm::unwrap_new(imm),
1008            dst: dst.map(Into::into),
1009        });
1010    }
1011
1012    pub fn xmm_or_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
1013        let op = match size {
1014            OperandSize::S32 => SseOpcode::Orps,
1015            OperandSize::S64 => SseOpcode::Orpd,
1016            OperandSize::S8 | OperandSize::S16 | OperandSize::S128 => unreachable!(),
1017        };
1018
1019        self.emit(Inst::XmmRmR {
1020            op,
1021            src1: dst.to_reg().into(),
1022            src2: XmmMemAligned::from(Xmm::from(src)),
1023            dst: dst.map(Into::into),
1024        });
1025    }
1026
1027    /// Logical exclusive or with registers.
1028    pub fn xor_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
1029        self.emit(Inst::AluRmiR {
1030            size: size.into(),
1031            op: AluRmiROpcode::Xor,
1032            src1: dst.to_reg().into(),
1033            src2: src.into(),
1034            dst: dst.map(Into::into),
1035        });
1036    }
1037
1038    pub fn xor_ir(&mut self, imm: i32, dst: WritableReg, size: OperandSize) {
1039        let imm = RegMemImm::imm(imm as u32);
1040
1041        self.emit(Inst::AluRmiR {
1042            size: size.into(),
1043            op: AluRmiROpcode::Xor,
1044            src1: dst.to_reg().into(),
1045            src2: GprMemImm::unwrap_new(imm),
1046            dst: dst.map(Into::into),
1047        });
1048    }
1049
1050    /// Logical exclusive or with float registers.
1051    pub fn xmm_xor_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
1052        let op = match size {
1053            OperandSize::S32 => SseOpcode::Xorps,
1054            OperandSize::S64 => SseOpcode::Xorpd,
1055            OperandSize::S8 | OperandSize::S16 | OperandSize::S128 => unreachable!(),
1056        };
1057
1058        self.emit(Inst::XmmRmR {
1059            op,
1060            src1: dst.to_reg().into(),
1061            src2: XmmMemAligned::from(Xmm::from(src)),
1062            dst: dst.map(Into::into),
1063        });
1064    }
1065
1066    /// Shift with register and register.
1067    pub fn shift_rr(&mut self, src: Reg, dst: WritableReg, kind: ShiftKind, size: OperandSize) {
1068        self.emit(Inst::ShiftR {
1069            size: size.into(),
1070            kind: kind.into(),
1071            src: dst.to_reg().into(),
1072            num_bits: src.into(),
1073            dst: dst.map(Into::into),
1074        });
1075    }
1076
1077    /// Shift with immediate and register.
1078    pub fn shift_ir(&mut self, imm: u8, dst: WritableReg, kind: ShiftKind, size: OperandSize) {
1079        let imm = imm.into();
1080
1081        self.emit(Inst::ShiftR {
1082            size: size.into(),
1083            kind: kind.into(),
1084            src: dst.to_reg().into(),
1085            num_bits: Imm8Gpr::unwrap_new(imm),
1086            dst: dst.map(Into::into),
1087        });
1088    }
1089
1090    /// Signed/unsigned division.
1091    ///
1092    /// Emits a sequence of instructions to ensure the correctness of
1093    /// the division invariants.  This function assumes that the
1094    /// caller has correctly allocated the dividend as `(rdx:rax)` and
1095    /// accounted for the quotient to be stored in `rax`.
1096    pub fn div(&mut self, divisor: Reg, dst: (Reg, Reg), kind: DivKind, size: OperandSize) {
1097        let trap = match kind {
1098            // Signed division has two trapping conditions, integer overflow and
1099            // divide-by-zero. Check for divide-by-zero explicitly and let the
1100            // hardware detect overflow.
1101            //
1102            // The dividend is sign extended to initialize `rdx`.
1103            DivKind::Signed => {
1104                self.emit(Inst::CmpRmiR {
1105                    size: size.into(),
1106                    src1: divisor.into(),
1107                    src2: GprMemImm::unwrap_new(RegMemImm::imm(0)),
1108                    opcode: CmpOpcode::Cmp,
1109                });
1110                self.emit(Inst::TrapIf {
1111                    cc: CC::Z,
1112                    trap_code: TrapCode::INTEGER_DIVISION_BY_ZERO,
1113                });
1114                self.emit(Inst::SignExtendData {
1115                    size: size.into(),
1116                    src: dst.0.into(),
1117                    dst: dst.1.into(),
1118                });
1119                TrapCode::INTEGER_OVERFLOW
1120            }
1121
1122            // Unsigned division only traps in one case, on divide-by-zero, so
1123            // defer that to the trap opcode.
1124            //
1125            // The divisor_hi reg is initialized with zero through an
1126            // xor-against-itself op.
1127            DivKind::Unsigned => {
1128                self.emit(Inst::AluRmiR {
1129                    size: size.into(),
1130                    op: AluRmiROpcode::Xor,
1131                    src1: dst.1.into(),
1132                    src2: dst.1.into(),
1133                    dst: dst.1.into(),
1134                });
1135                TrapCode::INTEGER_DIVISION_BY_ZERO
1136            }
1137        };
1138        self.emit(Inst::Div {
1139            sign: kind.into(),
1140            size: size.into(),
1141            trap,
1142            divisor: GprMem::unwrap_new(RegMem::reg(divisor.into())),
1143            dividend_lo: dst.0.into(),
1144            dividend_hi: dst.1.into(),
1145            dst_quotient: dst.0.into(),
1146            dst_remainder: dst.1.into(),
1147        });
1148    }
1149
1150    /// Signed/unsigned remainder.
1151    ///
1152    /// Emits a sequence of instructions to ensure the correctness of the
1153    /// division invariants and ultimately calculate the remainder.
1154    /// This function assumes that the
1155    /// caller has correctly allocated the dividend as `(rdx:rax)` and
1156    /// accounted for the remainder to be stored in `rdx`.
1157    pub fn rem(&mut self, divisor: Reg, dst: (Reg, Reg), kind: RemKind, size: OperandSize) {
1158        match kind {
1159            // Signed remainder goes through a pseudo-instruction which has
1160            // some internal branching. The `dividend_hi`, or `rdx`, is
1161            // initialized here with a `SignExtendData` instruction.
1162            RemKind::Signed => {
1163                self.emit(Inst::SignExtendData {
1164                    size: size.into(),
1165                    src: dst.0.into(),
1166                    dst: dst.1.into(),
1167                });
1168                self.emit(Inst::CheckedSRemSeq {
1169                    size: size.into(),
1170                    divisor: divisor.into(),
1171                    dividend_lo: dst.0.into(),
1172                    dividend_hi: dst.1.into(),
1173                    dst_quotient: dst.0.into(),
1174                    dst_remainder: dst.1.into(),
1175                });
1176            }
1177
1178            // Unsigned remainder initializes `dividend_hi` with zero and
1179            // then executes a normal `div` instruction.
1180            RemKind::Unsigned => {
1181                self.emit(Inst::AluRmiR {
1182                    size: size.into(),
1183                    op: AluRmiROpcode::Xor,
1184                    src1: dst.1.into(),
1185                    src2: dst.1.into(),
1186                    dst: dst.1.into(),
1187                });
1188                self.emit(Inst::Div {
1189                    sign: DivSignedness::Unsigned,
1190                    trap: TrapCode::INTEGER_DIVISION_BY_ZERO,
1191                    size: size.into(),
1192                    divisor: GprMem::unwrap_new(RegMem::reg(divisor.into())),
1193                    dividend_lo: dst.0.into(),
1194                    dividend_hi: dst.1.into(),
1195                    dst_quotient: dst.0.into(),
1196                    dst_remainder: dst.1.into(),
1197                });
1198            }
1199        }
1200    }
1201
1202    /// Multiply immediate and register.
1203    pub fn mul_ir(&mut self, imm: i32, dst: WritableReg, size: OperandSize) {
1204        self.emit(Inst::IMulImm {
1205            size: size.into(),
1206            src1: dst.to_reg().into(),
1207            src2: imm,
1208            dst: dst.map(Into::into),
1209        });
1210    }
1211
1212    /// Multiply register and register.
1213    pub fn mul_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
1214        self.emit(Inst::IMul {
1215            size: size.into(),
1216            src1: dst.to_reg().into(),
1217            src2: src.into(),
1218            dst: dst.map(Into::into),
1219        });
1220    }
1221
1222    /// Add immediate and register.
1223    pub fn add_ir(&mut self, imm: i32, dst: WritableReg, size: OperandSize) {
1224        let imm = RegMemImm::imm(imm as u32);
1225
1226        self.emit(Inst::AluRmiR {
1227            size: size.into(),
1228            op: AluRmiROpcode::Add,
1229            src1: dst.to_reg().into(),
1230            src2: GprMemImm::unwrap_new(imm),
1231            dst: dst.map(Into::into),
1232        });
1233    }
1234
1235    /// Add register and register.
1236    pub fn add_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
1237        self.emit(Inst::AluRmiR {
1238            size: size.into(),
1239            op: AluRmiROpcode::Add,
1240            src1: dst.to_reg().into(),
1241            src2: src.into(),
1242            dst: dst.map(Into::into),
1243        });
1244    }
1245
1246    pub fn lock_xadd(
1247        &mut self,
1248        addr: Address,
1249        operand: Reg,
1250        dst: WritableReg,
1251        size: OperandSize,
1252        flags: MemFlags,
1253    ) {
1254        assert!(addr.is_offset());
1255        let mem = Self::to_synthetic_amode(
1256            &addr,
1257            &mut self.pool,
1258            &mut self.constants,
1259            &mut self.buffer,
1260            flags,
1261        );
1262
1263        self.emit(Inst::LockXadd {
1264            size: size.into(),
1265            operand: operand.into(),
1266            mem,
1267            dst_old: dst.map(Into::into),
1268        });
1269    }
1270
1271    pub fn atomic_rmw_seq(
1272        &mut self,
1273        addr: Address,
1274        operand: Reg,
1275        dst: WritableReg,
1276        size: OperandSize,
1277        flags: MemFlags,
1278        op: AtomicRmwSeqOp,
1279    ) {
1280        assert!(addr.is_offset());
1281        let mem = Self::to_synthetic_amode(
1282            &addr,
1283            &mut self.pool,
1284            &mut self.constants,
1285            &mut self.buffer,
1286            flags,
1287        );
1288        self.emit(Inst::AtomicRmwSeq {
1289            ty: Type::int_with_byte_size(size.bytes() as _).unwrap(),
1290            mem,
1291            operand: operand.into(),
1292            temp: writable!(scratch().into()),
1293            dst_old: dst.map(Into::into),
1294            op,
1295        });
1296    }
1297
1298    pub fn xchg(
1299        &mut self,
1300        addr: Address,
1301        operand: Reg,
1302        dst: WritableReg,
1303        size: OperandSize,
1304        flags: MemFlags,
1305    ) {
1306        assert!(addr.is_offset());
1307        let mem = Self::to_synthetic_amode(
1308            &addr,
1309            &mut self.pool,
1310            &mut self.constants,
1311            &mut self.buffer,
1312            flags,
1313        );
1314
1315        self.emit(Inst::Xchg {
1316            size: size.into(),
1317            operand: operand.into(),
1318            mem,
1319            dst_old: dst.map(Into::into),
1320        });
1321    }
1322    pub fn cmpxchg(
1323        &mut self,
1324        addr: Address,
1325        expected: Reg,
1326        replacement: Reg,
1327        dst: WritableReg,
1328        size: OperandSize,
1329        flags: MemFlags,
1330    ) {
1331        assert!(addr.is_offset());
1332        let mem = Self::to_synthetic_amode(
1333            &addr,
1334            &mut self.pool,
1335            &mut self.constants,
1336            &mut self.buffer,
1337            flags,
1338        );
1339
1340        self.emit(Inst::LockCmpxchg {
1341            ty: Type::int_with_byte_size(size.bytes() as _).unwrap(),
1342            replacement: replacement.into(),
1343            expected: expected.into(),
1344            mem,
1345            dst_old: dst.map(Into::into),
1346        })
1347    }
1348
1349    pub fn cmp_ir(&mut self, src1: Reg, imm: i32, size: OperandSize) {
1350        let imm = RegMemImm::imm(imm as u32);
1351
1352        self.emit(Inst::CmpRmiR {
1353            size: size.into(),
1354            opcode: CmpOpcode::Cmp,
1355            src1: src1.into(),
1356            src2: GprMemImm::unwrap_new(imm),
1357        });
1358    }
1359
1360    pub fn cmp_rr(&mut self, src1: Reg, src2: Reg, size: OperandSize) {
1361        self.emit(Inst::CmpRmiR {
1362            size: size.into(),
1363            opcode: CmpOpcode::Cmp,
1364            src1: src1.into(),
1365            src2: src2.into(),
1366        });
1367    }
1368
1369    /// Compares values in src1 and src2 and sets ZF, PF, and CF flags in EFLAGS
1370    /// register.
1371    pub fn ucomis(&mut self, src1: Reg, src2: Reg, size: OperandSize) {
1372        let op = match size {
1373            OperandSize::S32 => SseOpcode::Ucomiss,
1374            OperandSize::S64 => SseOpcode::Ucomisd,
1375            OperandSize::S8 | OperandSize::S16 | OperandSize::S128 => unreachable!(),
1376        };
1377
1378        self.emit(Inst::XmmCmpRmR {
1379            op,
1380            src1: src1.into(),
1381            src2: Xmm::from(src2).into(),
1382        });
1383    }
1384
1385    pub fn popcnt(&mut self, src: Reg, size: OperandSize) {
1386        assert!(
1387            self.isa_flags.has_popcnt() && self.isa_flags.has_sse42(),
1388            "Requires has_popcnt and has_sse42 flags"
1389        );
1390        self.emit(Inst::UnaryRmR {
1391            size: size.into(),
1392            op: args::UnaryRmROpcode::Popcnt,
1393            src: src.into(),
1394            dst: src.into(),
1395        });
1396    }
1397
1398    /// Emit a test instruction with two register operands.
1399    pub fn test_rr(&mut self, src1: Reg, src2: Reg, size: OperandSize) {
1400        self.emit(Inst::CmpRmiR {
1401            size: size.into(),
1402            opcode: CmpOpcode::Test,
1403            src1: src1.into(),
1404            src2: src2.into(),
1405        })
1406    }
1407
1408    /// Set value in dst to `0` or `1` based on flags in status register and
1409    /// [`CmpKind`].
1410    pub fn setcc(&mut self, kind: IntCmpKind, dst: WritableReg) {
1411        self.setcc_impl(kind.into(), dst);
1412    }
1413
1414    /// Set value in dst to `1` if parity flag in status register is set, `0`
1415    /// otherwise.
1416    pub fn setp(&mut self, dst: WritableReg) {
1417        self.setcc_impl(CC::P, dst);
1418    }
1419
1420    /// Set value in dst to `1` if parity flag in status register is not set,
1421    /// `0` otherwise.
1422    pub fn setnp(&mut self, dst: WritableReg) {
1423        self.setcc_impl(CC::NP, dst);
1424    }
1425
1426    fn setcc_impl(&mut self, cc: CC, dst: WritableReg) {
1427        // Clear the dst register or bits 1 to 31 may be incorrectly set.
1428        // Don't use xor since it updates the status register.
1429        self.emit(Inst::Imm {
1430            dst_size: args::OperandSize::Size32, // Always going to be an i32 result.
1431            simm64: 0,
1432            dst: dst.map(Into::into),
1433        });
1434        // Copy correct bit from status register into dst register.
1435        self.emit(Inst::Setcc {
1436            cc,
1437            dst: dst.map(Into::into),
1438        });
1439    }
1440
1441    /// Store the count of leading zeroes in src in dst.
1442    /// Requires `has_lzcnt` flag.
1443    pub fn lzcnt(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
1444        assert!(self.isa_flags.has_lzcnt(), "Requires has_lzcnt flag");
1445        self.emit(Inst::UnaryRmR {
1446            size: size.into(),
1447            op: args::UnaryRmROpcode::Lzcnt,
1448            src: src.into(),
1449            dst: dst.map(Into::into),
1450        });
1451    }
1452
1453    /// Store the count of trailing zeroes in src in dst.
1454    /// Requires `has_bmi1` flag.
1455    pub fn tzcnt(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
1456        assert!(self.isa_flags.has_bmi1(), "Requires has_bmi1 flag");
1457        self.emit(Inst::UnaryRmR {
1458            size: size.into(),
1459            op: args::UnaryRmROpcode::Tzcnt,
1460            src: src.into(),
1461            dst: dst.map(Into::into),
1462        });
1463    }
1464
1465    /// Stores position of the most significant bit set in src in dst.
1466    /// Zero flag is set if src is equal to 0.
1467    pub fn bsr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
1468        self.emit(Inst::UnaryRmR {
1469            size: size.into(),
1470            op: args::UnaryRmROpcode::Bsr,
1471            src: src.into(),
1472            dst: dst.map(Into::into),
1473        });
1474    }
1475
1476    /// Performs integer negation on src and places result in dst.
1477    pub fn neg(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
1478        self.emit(Inst::Neg {
1479            size: size.into(),
1480            src: src.into(),
1481            dst: dst.map(Into::into),
1482        });
1483    }
1484
1485    /// Stores position of the least significant bit set in src in dst.
1486    /// Zero flag is set if src is equal to 0.
1487    pub fn bsf(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
1488        self.emit(Inst::UnaryRmR {
1489            size: size.into(),
1490            op: args::UnaryRmROpcode::Bsf,
1491            src: src.into(),
1492            dst: dst.map(Into::into),
1493        });
1494    }
1495
1496    /// Performs float addition on src and dst and places result in dst.
1497    pub fn xmm_add_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
1498        let op = match size {
1499            OperandSize::S32 => SseOpcode::Addss,
1500            OperandSize::S64 => SseOpcode::Addsd,
1501            OperandSize::S8 | OperandSize::S16 | OperandSize::S128 => unreachable!(),
1502        };
1503
1504        self.emit(Inst::XmmRmRUnaligned {
1505            op,
1506            src1: Xmm::from(dst.to_reg()).into(),
1507            src2: Xmm::from(src).into(),
1508            dst: dst.map(Into::into),
1509        });
1510    }
1511
1512    /// Performs float subtraction on src and dst and places result in dst.
1513    pub fn xmm_sub_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
1514        let op = match size {
1515            OperandSize::S32 => SseOpcode::Subss,
1516            OperandSize::S64 => SseOpcode::Subsd,
1517            OperandSize::S8 | OperandSize::S16 | OperandSize::S128 => unreachable!(),
1518        };
1519
1520        self.emit(Inst::XmmRmRUnaligned {
1521            op,
1522            src1: Xmm::from(dst.to_reg()).into(),
1523            src2: Xmm::from(src).into(),
1524            dst: dst.map(Into::into),
1525        });
1526    }
1527
1528    /// Performs float multiplication on src and dst and places result in dst.
1529    pub fn xmm_mul_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
1530        let op = match size {
1531            OperandSize::S32 => SseOpcode::Mulss,
1532            OperandSize::S64 => SseOpcode::Mulsd,
1533            OperandSize::S8 | OperandSize::S16 | OperandSize::S128 => unreachable!(),
1534        };
1535
1536        self.emit(Inst::XmmRmRUnaligned {
1537            op,
1538            src1: Xmm::from(dst.to_reg()).into(),
1539            src2: Xmm::from(src).into(),
1540            dst: dst.map(Into::into),
1541        });
1542    }
1543
1544    /// Performs float division on src and dst and places result in dst.
1545    pub fn xmm_div_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
1546        let op = match size {
1547            OperandSize::S32 => SseOpcode::Divss,
1548            OperandSize::S64 => SseOpcode::Divsd,
1549            OperandSize::S8 | OperandSize::S16 | OperandSize::S128 => unreachable!(),
1550        };
1551
1552        self.emit(Inst::XmmRmRUnaligned {
1553            op,
1554            src1: Xmm::from(dst.to_reg()).into(),
1555            src2: Xmm::from(src).into(),
1556            dst: dst.map(Into::into),
1557        });
1558    }
1559
1560    /// Minimum for src and dst XMM registers with results put in dst.
1561    pub fn xmm_min_seq(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
1562        self.emit(Inst::XmmMinMaxSeq {
1563            size: size.into(),
1564            is_min: true,
1565            lhs: src.into(),
1566            rhs: dst.to_reg().into(),
1567            dst: dst.map(Into::into),
1568        });
1569    }
1570
1571    /// Maximum for src and dst XMM registers with results put in dst.
1572    pub fn xmm_max_seq(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
1573        self.emit(Inst::XmmMinMaxSeq {
1574            size: size.into(),
1575            is_min: false,
1576            lhs: src.into(),
1577            rhs: dst.to_reg().into(),
1578            dst: dst.map(Into::into),
1579        });
1580    }
1581
1582    /// Perform rounding operation on float register src and place results in
1583    /// float register dst.
1584    pub fn xmm_rounds_rr(
1585        &mut self,
1586        src: Reg,
1587        dst: WritableReg,
1588        mode: RoundingMode,
1589        size: OperandSize,
1590    ) {
1591        let op = match size {
1592            OperandSize::S32 => SseOpcode::Roundss,
1593            OperandSize::S64 => SseOpcode::Roundsd,
1594            OperandSize::S8 | OperandSize::S16 | OperandSize::S128 => unreachable!(),
1595        };
1596
1597        let imm: u8 = match mode {
1598            RoundingMode::Nearest => 0x00,
1599            RoundingMode::Down => 0x01,
1600            RoundingMode::Up => 0x02,
1601            RoundingMode::Zero => 0x03,
1602        };
1603
1604        self.emit(Inst::XmmUnaryRmRImm {
1605            op,
1606            src: XmmMemAligned::from(Xmm::from(src)),
1607            imm,
1608            dst: dst.map(Into::into),
1609        })
1610    }
1611
1612    pub fn sqrt(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
1613        let op = match size {
1614            OperandSize::S32 => SseOpcode::Sqrtss,
1615            OperandSize::S64 => SseOpcode::Sqrtsd,
1616            OperandSize::S8 | OperandSize::S16 | OperandSize::S128 => unreachable!(),
1617        };
1618
1619        self.emit(Inst::XmmRmR {
1620            op,
1621            src2: Xmm::from(src).into(),
1622            src1: dst.to_reg().into(),
1623            dst: dst.map(Into::into),
1624        })
1625    }
1626
1627    /// Emit a call to an unknown location through a register.
1628    pub fn call_with_reg(&mut self, cc: CallingConvention, callee: Reg) {
1629        self.emit(Inst::CallUnknown {
1630            info: Box::new(CallInfo::empty(RegMem::reg(callee.into()), cc.into())),
1631        });
1632    }
1633
1634    /// Emit a call to a locally defined function through an index.
1635    pub fn call_with_name(&mut self, cc: CallingConvention, name: UserExternalNameRef) {
1636        self.emit(Inst::CallKnown {
1637            info: Box::new(CallInfo::empty(ExternalName::user(name), cc.into())),
1638        });
1639    }
1640
1641    /// Emit a call to a well-known libcall.
1642    pub fn call_with_lib(&mut self, cc: CallingConvention, lib: LibCall, dst: Reg) {
1643        let dest = ExternalName::LibCall(lib);
1644
1645        // `use_colocated_libcalls` is never `true` from within Wasmtime,
1646        // so always require loading the libcall to a register and use
1647        // a `Far` relocation distance to ensure the right relocation when
1648        // emitting to binary.
1649        //
1650        // See [wasmtime::engine::Engine::check_compatible_with_shared_flag] and
1651        // [wasmtime_cranelift::obj::ModuleTextBuilder::append_func]
1652        self.emit(Inst::LoadExtName {
1653            dst: Writable::from_reg(dst.into()),
1654            name: Box::new(dest),
1655            offset: 0,
1656            distance: RelocDistance::Far,
1657        });
1658        self.call_with_reg(cc, dst);
1659    }
1660
1661    /// Emits a conditional jump to the given label.
1662    pub fn jmp_if(&mut self, cc: impl Into<CC>, taken: MachLabel) {
1663        self.emit(Inst::WinchJmpIf {
1664            cc: cc.into(),
1665            taken,
1666        });
1667    }
1668
1669    /// Performs an unconditional jump to the given label.
1670    pub fn jmp(&mut self, target: MachLabel) {
1671        self.emit(Inst::JmpKnown { dst: target });
1672    }
1673
1674    /// Emits a jump table sequence.
1675    pub fn jmp_table(
1676        &mut self,
1677        targets: SmallVec<[MachLabel; 4]>,
1678        default: MachLabel,
1679        index: Reg,
1680        tmp1: Reg,
1681        tmp2: Reg,
1682    ) {
1683        self.emit(Inst::JmpTableSeq {
1684            idx: index.into(),
1685            tmp1: Writable::from_reg(tmp1.into()),
1686            tmp2: Writable::from_reg(tmp2.into()),
1687            default_target: default,
1688            targets: Box::new(targets.to_vec()),
1689        })
1690    }
1691
1692    /// Emit a trap instruction.
1693    pub fn trap(&mut self, code: TrapCode) {
1694        self.emit(Inst::Ud2 { trap_code: code })
1695    }
1696
1697    /// Conditional trap.
1698    pub fn trapif(&mut self, cc: impl Into<CC>, trap_code: TrapCode) {
1699        self.emit(Inst::TrapIf {
1700            cc: cc.into(),
1701            trap_code,
1702        });
1703    }
1704
1705    /// Load effective address.
1706    pub fn lea(&mut self, addr: &Address, dst: WritableReg, size: OperandSize) {
1707        let addr = Self::to_synthetic_amode(
1708            addr,
1709            &mut self.pool,
1710            &mut self.constants,
1711            &mut self.buffer,
1712            MemFlags::trusted(),
1713        );
1714        self.emit(Inst::LoadEffectiveAddress {
1715            addr,
1716            dst: dst.map(Into::into),
1717            size: size.into(),
1718        });
1719    }
1720
1721    pub fn adc_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
1722        self.emit(Inst::AluRmiR {
1723            size: size.into(),
1724            op: AluRmiROpcode::Adc,
1725            src1: dst.to_reg().into(),
1726            src2: src.into(),
1727            dst: dst.map(Into::into),
1728        });
1729    }
1730
1731    pub fn sbb_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
1732        self.emit(Inst::AluRmiR {
1733            size: size.into(),
1734            op: AluRmiROpcode::Sbb,
1735            src1: dst.to_reg().into(),
1736            src2: src.into(),
1737            dst: dst.map(Into::into),
1738        });
1739    }
1740
1741    pub fn mul_wide(
1742        &mut self,
1743        dst_lo: WritableReg,
1744        dst_hi: WritableReg,
1745        lhs: Reg,
1746        rhs: Reg,
1747        kind: MulWideKind,
1748        size: OperandSize,
1749    ) {
1750        self.emit(Inst::Mul {
1751            signed: kind == MulWideKind::Signed,
1752            size: size.into(),
1753            src1: lhs.into(),
1754            src2: rhs.into(),
1755            dst_lo: dst_lo.to_reg().into(),
1756            dst_hi: dst_hi.to_reg().into(),
1757        });
1758    }
1759
1760    /// Shuffles bytes in `src` according to contents of `mask` and puts
1761    /// result in `dst`.
1762    pub fn xmm_vpshufb_rrm(&mut self, dst: WritableReg, src: Reg, mask: &Address) {
1763        let mask = Self::to_synthetic_amode(
1764            mask,
1765            &mut self.pool,
1766            &mut self.constants,
1767            &mut self.buffer,
1768            MemFlags::trusted(),
1769        );
1770
1771        self.emit(Inst::XmmRmiRVex {
1772            op: args::AvxOpcode::Vpshufb,
1773            src1: src.into(),
1774            src2: XmmMemImm::unwrap_new(RegMemImm::Mem { addr: mask }),
1775            dst: dst.to_reg().into(),
1776        });
1777    }
1778
1779    /// Shuffles bytes in `src` according to contents of `mask` and puts
1780    /// result in `dst`.
1781    pub fn xmm_vpshufb_rrr(&mut self, dst: WritableReg, src: Reg, mask: Reg) {
1782        self.emit(Inst::XmmRmiRVex {
1783            op: args::AvxOpcode::Vpshufb,
1784            src1: src.into(),
1785            src2: XmmMemImm::unwrap_new(RegMemImm::reg(mask.into())),
1786            dst: dst.to_reg().into(),
1787        })
1788    }
1789
1790    /// Bitwise OR of `src1` and `src2`.
1791    pub fn vpor(&mut self, dst: WritableReg, src1: Reg, src2: Reg) {
1792        self.emit(Inst::XmmRmiRVex {
1793            op: args::AvxOpcode::Vpor,
1794            src1: src1.into(),
1795            src2: XmmMemImm::unwrap_new(src2.into()),
1796            dst: dst.to_reg().into(),
1797        })
1798    }
1799
1800    /// Add unsigned integers with unsigned saturation.
1801    ///
1802    /// Adds the src operands but when an individual byte result is larger than
1803    /// an unsigned byte integer, 0xFF is written instead.
1804    pub fn xmm_vpaddusb_rrm(&mut self, dst: WritableReg, src1: Reg, src2: &Address) {
1805        let src2 = Self::to_synthetic_amode(
1806            src2,
1807            &mut self.pool,
1808            &mut self.constants,
1809            &mut self.buffer,
1810            MemFlags::trusted(),
1811        );
1812
1813        self.emit(Inst::XmmRmiRVex {
1814            op: args::AvxOpcode::Vpaddusb,
1815            src1: src1.into(),
1816            src2: XmmMemImm::unwrap_new(RegMemImm::mem(src2)),
1817            dst: dst.to_reg().into(),
1818        })
1819    }
1820
1821    /// Converts an operand size to the appropriate opcode for `vpadd``.
1822    fn xmm_vpadd_opcode(size: OperandSize) -> AvxOpcode {
1823        match size {
1824            OperandSize::S8 => AvxOpcode::Vpaddb,
1825            OperandSize::S32 => AvxOpcode::Vpaddd,
1826            _ => unimplemented!(),
1827        }
1828    }
1829
1830    pub fn xmm_vpadd_rmr(
1831        &mut self,
1832        src1: Reg,
1833        src2: &Address,
1834        dst: WritableReg,
1835        size: OperandSize,
1836    ) {
1837        let address = Self::to_synthetic_amode(
1838            src2,
1839            &mut self.pool,
1840            &mut self.constants,
1841            &mut self.buffer,
1842            MemFlags::trusted(),
1843        );
1844
1845        self.emit(Inst::XmmRmiRVex {
1846            op: Self::xmm_vpadd_opcode(size),
1847            src1: src1.into(),
1848            src2: XmmMemImm::unwrap_new(RegMemImm::mem(address)),
1849            dst: dst.to_reg().into(),
1850        });
1851    }
1852
1853    /// Adds vectors of integers in `src1` and `src2` and puts the results in
1854    /// `dst`.
1855    pub fn xmm_vpadd_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {
1856        self.emit(Inst::XmmRmiRVex {
1857            op: Self::xmm_vpadd_opcode(size),
1858            src1: src1.into(),
1859            src2: src2.into(),
1860            dst: dst.to_reg().into(),
1861        });
1862    }
1863
1864    pub fn fence(&mut self, kind: FenceKind) {
1865        self.emit(Inst::Fence { kind });
1866    }
1867
1868    /// Extract a value from `src` into `dst` (zero extended) determined by `lane`.
1869    pub fn xmm_vpextr_rr(&mut self, dst: WritableReg, src: Reg, lane: u8, size: OperandSize) {
1870        self.emit(Inst::XmmToGprImmVex {
1871            op: Self::vpextr_opcode(size),
1872            src: src.into(),
1873            dst: dst.to_reg().into(),
1874            imm: lane,
1875        });
1876    }
1877
1878    /// Copy value from `src2`, merge into `src1`, and put result in `dst` at
1879    /// the location specified in `count`.
1880    pub fn xmm_vpinsr_rrm(
1881        &mut self,
1882        dst: WritableReg,
1883        src1: Reg,
1884        src2: &Address,
1885        count: u8,
1886        size: OperandSize,
1887    ) {
1888        let src2 = Self::to_synthetic_amode(
1889            src2,
1890            &mut self.pool,
1891            &mut self.constants,
1892            &mut self.buffer,
1893            MemFlags::trusted(),
1894        );
1895
1896        self.emit(Inst::XmmVexPinsr {
1897            op: Self::vpinsr_opcode(size),
1898            src1: src1.into(),
1899            src2: GprMem::unwrap_new(RegMem::mem(src2)),
1900            dst: dst.to_reg().into(),
1901            imm: count,
1902        });
1903    }
1904
1905    /// Copy value from `src2`, merge into `src1`, and put result in `dst` at
1906    /// the location specified in `count`.
1907    pub fn xmm_vpinsr_rrr(
1908        &mut self,
1909        dst: WritableReg,
1910        src1: Reg,
1911        src2: Reg,
1912        count: u8,
1913        size: OperandSize,
1914    ) {
1915        self.emit(Inst::XmmVexPinsr {
1916            op: Self::vpinsr_opcode(size),
1917            src1: src1.into(),
1918            src2: src2.into(),
1919            dst: dst.to_reg().into(),
1920            imm: count,
1921        });
1922    }
1923
1924    /// Copy a 32-bit float in `src2`, merge into `src1`, and put result in `dst`.
1925    pub fn xmm_vinsertps_rrm(&mut self, dst: WritableReg, src1: Reg, src2: &Address, imm: u8) {
1926        let src2 = Self::to_synthetic_amode(
1927            src2,
1928            &mut self.pool,
1929            &mut self.constants,
1930            &mut self.buffer,
1931            MemFlags::trusted(),
1932        );
1933
1934        self.emit(Inst::XmmRmRImmVex {
1935            op: AvxOpcode::Vinsertps,
1936            src1: src1.into(),
1937            src2: XmmMem::unwrap_new(RegMem::mem(src2)),
1938            dst: dst.to_reg().into(),
1939            imm,
1940        });
1941    }
1942
1943    /// Copy a 32-bit float in `src2`, merge into `src1`, and put result in `dst`.
1944    pub fn xmm_vinsertps_rrr(&mut self, dst: WritableReg, src1: Reg, src2: Reg, imm: u8) {
1945        self.emit(Inst::XmmRmRImmVex {
1946            op: AvxOpcode::Vinsertps,
1947            src1: src1.into(),
1948            src2: XmmMem::unwrap_new(RegMem::reg(src2.into())),
1949            dst: dst.to_reg().into(),
1950            imm,
1951        });
1952    }
1953
1954    /// Moves lower 64-bit float from `src2` into lower 64-bits of `dst` and the
1955    /// upper 64-bits in `src1` into the upper 64-bits of `dst`.
1956    pub fn xmm_vmovsd_rrr(&mut self, dst: WritableReg, src1: Reg, src2: Reg) {
1957        self.emit(Inst::XmmRmiRVex {
1958            op: AvxOpcode::Vmovsd,
1959            src1: src1.into(),
1960            src2: XmmMemImm::unwrap_new(src2.into()),
1961            dst: dst.to_reg().into(),
1962        })
1963    }
1964
1965    /// Moves 64-bit float from `src` into lower 64-bits of `dst`.
1966    /// Zeroes out the upper 64 bits of `dst`.
1967    pub fn xmm_vmovsd_rm(&mut self, dst: WritableReg, src: &Address) {
1968        let src = Self::to_synthetic_amode(
1969            src,
1970            &mut self.pool,
1971            &mut self.constants,
1972            &mut self.buffer,
1973            MemFlags::trusted(),
1974        );
1975
1976        self.emit(Inst::XmmUnaryRmRVex {
1977            op: AvxOpcode::Vmovsd,
1978            src: XmmMem::unwrap_new(RegMem::mem(src)),
1979            dst: dst.to_reg().into(),
1980        })
1981    }
1982
1983    /// Moves two 32-bit floats from `src2` to the upper 64-bits of `dst`.
1984    /// Copies two 32-bit floats from the lower 64-bits of `src1` to lower
1985    /// 64-bits of `dst`.
1986    pub fn xmm_vmovlhps_rrm(&mut self, dst: WritableReg, src1: Reg, src2: &Address) {
1987        let src2 = Self::to_synthetic_amode(
1988            src2,
1989            &mut self.pool,
1990            &mut self.constants,
1991            &mut self.buffer,
1992            MemFlags::trusted(),
1993        );
1994
1995        self.emit(Inst::XmmRmiRVex {
1996            op: AvxOpcode::Vmovlhps,
1997            src1: src1.into(),
1998            src2: XmmMemImm::unwrap_new(RegMemImm::mem(src2)),
1999            dst: dst.to_reg().into(),
2000        });
2001    }
2002
2003    /// Moves two 32-bit floats from the lower 64-bits of `src2` to the upper
2004    /// 64-bits of `dst`. Copies two 32-bit floats from the lower 64-bits of
2005    /// `src1` to lower 64-bits of `dst`.
2006    pub fn xmm_vmovlhps_rrr(&mut self, dst: WritableReg, src1: Reg, src2: Reg) {
2007        self.emit(Inst::XmmRmiRVex {
2008            op: AvxOpcode::Vmovlhps,
2009            src1: src1.into(),
2010            src2: XmmMemImm::unwrap_new(src2.into()),
2011            dst: dst.to_reg().into(),
2012        });
2013    }
2014
2015    /// Move unaligned packed integer values from address `src` to `dst`.
2016    pub fn xmm_vmovdqu_mr(&mut self, src: &Address, dst: WritableReg, flags: MemFlags) {
2017        let src = Self::to_synthetic_amode(
2018            src,
2019            &mut self.pool,
2020            &mut self.constants,
2021            &mut self.buffer,
2022            flags,
2023        );
2024        self.emit(Inst::XmmUnaryRmRVex {
2025            op: AvxOpcode::Vmovdqu,
2026            src: XmmMem::unwrap_new(RegMem::mem(src)),
2027            dst: dst.map(Into::into),
2028        });
2029    }
2030
2031    /// Move integer from `src` to xmm register `dst` using an AVX instruction.
2032    pub fn avx_gpr_to_xmm(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
2033        let op = match size {
2034            OperandSize::S32 => AvxOpcode::Vmovd,
2035            OperandSize::S64 => AvxOpcode::Vmovq,
2036            _ => unreachable!(),
2037        };
2038
2039        self.emit(Inst::GprToXmmVex {
2040            op,
2041            src: src.into(),
2042            dst: dst.map(Into::into),
2043            src_size: size.into(),
2044        })
2045    }
2046
2047    /// The `vpinsr` opcode to use.
2048    fn vpinsr_opcode(size: OperandSize) -> AvxOpcode {
2049        match size {
2050            OperandSize::S8 => AvxOpcode::Vpinsrb,
2051            OperandSize::S16 => AvxOpcode::Vpinsrw,
2052            OperandSize::S32 => AvxOpcode::Vpinsrd,
2053            OperandSize::S64 => AvxOpcode::Vpinsrq,
2054            _ => unimplemented!(),
2055        }
2056    }
2057
2058    /// Perform an AVX opcode `op` involving registers `src1` and `src2`, writing the
2059    /// result to `dst`.
2060    pub fn xmm_vex_rr(&mut self, op: AvxOpcode, src1: Reg, src2: Reg, dst: WritableReg) {
2061        self.emit(Inst::XmmRmiRVex {
2062            op,
2063            src1: src1.into(),
2064            src2: src2.into(),
2065            dst: dst.map(Into::into),
2066        })
2067    }
2068
2069    /// Perform an AVX opcode `op` involving register `src1` and an immediate `imm`, writing the
2070    /// result to `dst`.
2071    pub fn xmm_vex_ri(&mut self, op: AvxOpcode, src1: Reg, imm: u32, dst: WritableReg) {
2072        self.emit(Inst::XmmRmiRVex {
2073            op,
2074            src1: src1.into(),
2075            src2: XmmMemImm::unwrap_new(RegMemImm::imm(imm)),
2076            dst: dst.map(Into::into),
2077        })
2078    }
2079
2080    pub fn xmm_vptest(&mut self, src1: Reg, src2: Reg) {
2081        self.emit(Inst::XmmCmpRmRVex {
2082            op: AvxOpcode::Vptest,
2083            src1: src1.into(),
2084            src2: src2.into(),
2085        })
2086    }
2087
2088    /// The `vpextr` opcode to use.
2089    fn vpextr_opcode(size: OperandSize) -> AvxOpcode {
2090        match size {
2091            OperandSize::S8 => AvxOpcode::Vpextrb,
2092            OperandSize::S16 => AvxOpcode::Vpextrw,
2093            OperandSize::S32 => AvxOpcode::Vpextrd,
2094            OperandSize::S64 => AvxOpcode::Vpextrq,
2095            _ => unimplemented!(),
2096        }
2097    }
2098
2099    /// Extract a value from `src` into `addr` determined by `lane`.
2100    pub(crate) fn xmm_vpextr_rm(
2101        &mut self,
2102        addr: &Address,
2103        src: Reg,
2104        lane: u8,
2105        size: OperandSize,
2106        flags: MemFlags,
2107    ) -> anyhow::Result<()> {
2108        assert!(addr.is_offset());
2109        let dst = Self::to_synthetic_amode(
2110            addr,
2111            &mut self.pool,
2112            &mut self.constants,
2113            &mut self.buffer,
2114            flags,
2115        );
2116
2117        self.emit(Inst::XmmMovRMImmVex {
2118            op: Self::vpextr_opcode(size),
2119            src: src.into(),
2120            dst,
2121            imm: lane,
2122        });
2123
2124        Ok(())
2125    }
2126
2127    /// Converts vector of integers into vector of floating values.
2128    pub fn xmm_vcvt_rr(&mut self, src: Reg, dst: WritableReg, kind: VcvtKind) {
2129        let op = match kind {
2130            VcvtKind::I32ToF32 => AvxOpcode::Vcvtdq2ps,
2131            VcvtKind::I32ToF64 => AvxOpcode::Vcvtdq2pd,
2132            VcvtKind::F64ToF32 => AvxOpcode::Vcvtpd2ps,
2133            VcvtKind::F64ToI32 => AvxOpcode::Vcvttpd2dq,
2134            VcvtKind::F32ToF64 => AvxOpcode::Vcvtps2pd,
2135            VcvtKind::F32ToI32 => AvxOpcode::Vcvttps2dq,
2136        };
2137
2138        self.emit(Inst::XmmUnaryRmRVex {
2139            op,
2140            src: src.into(),
2141            dst: dst.to_reg().into(),
2142        });
2143    }
2144
2145    /// Shift vector data left by `imm`.
2146    pub fn xmm_vpsll_rr(&mut self, src: Reg, dst: WritableReg, imm: u32, size: OperandSize) {
2147        let op = match size {
2148            OperandSize::S32 => AvxOpcode::Vpslld,
2149            OperandSize::S64 => AvxOpcode::Vpsllq,
2150            _ => unimplemented!(),
2151        };
2152
2153        self.emit(Inst::XmmRmiRVex {
2154            op,
2155            src1: src.into(),
2156            src2: XmmMemImm::unwrap_new(RegMemImm::imm(imm)),
2157            dst: dst.to_reg().into(),
2158        });
2159    }
2160
2161    /// Shift vector data right by `imm`.
2162    pub fn xmm_vpsrl_rr(&mut self, src: Reg, dst: WritableReg, imm: u32, size: OperandSize) {
2163        let op = match size {
2164            OperandSize::S16 => AvxOpcode::Vpsrlw,
2165            OperandSize::S32 => AvxOpcode::Vpsrld,
2166            OperandSize::S64 => AvxOpcode::Vpsrlq,
2167            _ => unimplemented!(),
2168        };
2169
2170        self.emit(Inst::XmmRmiRVex {
2171            op,
2172            src1: src.into(),
2173            src2: XmmMemImm::unwrap_new(RegMemImm::imm(imm)),
2174            dst: dst.to_reg().into(),
2175        })
2176    }
2177
2178    /// Subtract integers in vector `src1` from integers in vector `src2`.
2179    pub fn xmm_vpsub_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {
2180        let op = match size {
2181            OperandSize::S32 => AvxOpcode::Vpsubd,
2182            OperandSize::S64 => AvxOpcode::Vpsubq,
2183            _ => unimplemented!(),
2184        };
2185
2186        self.emit(Inst::XmmRmiRVex {
2187            op,
2188            src1: src1.into(),
2189            src2: src2.into(),
2190            dst: dst.to_reg().into(),
2191        })
2192    }
2193
2194    fn vaddp_opcode(size: OperandSize) -> AvxOpcode {
2195        match size {
2196            OperandSize::S32 => AvxOpcode::Vaddps,
2197            OperandSize::S64 => AvxOpcode::Vaddpd,
2198            _ => unimplemented!(),
2199        }
2200    }
2201
2202    /// Add floats in vector `src1` to floats in vector `src2`.
2203    pub fn xmm_vaddp_rrm(
2204        &mut self,
2205        src1: Reg,
2206        src2: &Address,
2207        dst: WritableReg,
2208        size: OperandSize,
2209    ) {
2210        let address = Self::to_synthetic_amode(
2211            src2,
2212            &mut self.pool,
2213            &mut self.constants,
2214            &mut self.buffer,
2215            MemFlags::trusted(),
2216        );
2217
2218        self.emit(Inst::XmmRmiRVex {
2219            op: Self::vaddp_opcode(size),
2220            src1: src1.into(),
2221            src2: XmmMemImm::unwrap_new(RegMemImm::mem(address)),
2222            dst: dst.to_reg().into(),
2223        });
2224    }
2225
2226    /// Add floats in vector `src1` to floats in vector `src2`.
2227    pub fn xmm_vaddp_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {
2228        self.emit(Inst::XmmRmiRVex {
2229            op: Self::vaddp_opcode(size),
2230            src1: src1.into(),
2231            src2: src2.into(),
2232            dst: dst.to_reg().into(),
2233        })
2234    }
2235
2236    fn vpcmpeq_opcode(size: OperandSize) -> AvxOpcode {
2237        match size {
2238            OperandSize::S8 => AvxOpcode::Vpcmpeqb,
2239            OperandSize::S16 => AvxOpcode::Vpcmpeqw,
2240            OperandSize::S32 => AvxOpcode::Vpcmpeqd,
2241            OperandSize::S64 => AvxOpcode::Vpcmpeqq,
2242            _ => unimplemented!(),
2243        }
2244    }
2245
2246    /// Compare vector register `lhs` with a vector of integers in `rhs` for
2247    /// equality between packed integers and write the resulting vector into
2248    /// `dst`.
2249    pub fn xmm_vpcmpeq_rrm(
2250        &mut self,
2251        dst: WritableReg,
2252        lhs: Reg,
2253        address: &Address,
2254        size: OperandSize,
2255    ) {
2256        let address = Self::to_synthetic_amode(
2257            address,
2258            &mut self.pool,
2259            &mut self.constants,
2260            &mut self.buffer,
2261            MemFlags::trusted(),
2262        );
2263
2264        self.emit(Inst::XmmRmiRVex {
2265            op: Self::vpcmpeq_opcode(size),
2266            src1: lhs.into(),
2267            src2: XmmMemImm::unwrap_new(RegMemImm::mem(address)),
2268            dst: dst.to_reg().into(),
2269        });
2270    }
2271
2272    /// Compare vector registers `lhs` and `rhs` for equality between packed
2273    /// integers and write the resulting vector into `dst`.
2274    pub fn xmm_vpcmpeq_rrr(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) {
2275        self.emit(Inst::XmmRmiRVex {
2276            op: Self::vpcmpeq_opcode(size),
2277            src1: lhs.into(),
2278            src2: XmmMemImm::unwrap_new(rhs.into()),
2279            dst: dst.to_reg().into(),
2280        })
2281    }
2282
2283    /// Performs a greater than comparison with vectors of signed integers in
2284    /// `lhs` and `rhs` and puts the results in `dst`.
2285    pub fn xmm_vpcmpgt_rrr(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) {
2286        let op = match size {
2287            OperandSize::S8 => AvxOpcode::Vpcmpgtb,
2288            OperandSize::S16 => AvxOpcode::Vpcmpgtw,
2289            OperandSize::S32 => AvxOpcode::Vpcmpgtd,
2290            OperandSize::S64 => AvxOpcode::Vpcmpgtq,
2291            _ => unimplemented!(),
2292        };
2293
2294        self.emit(Inst::XmmRmiRVex {
2295            op,
2296            src1: lhs.into(),
2297            src2: XmmMemImm::unwrap_new(rhs.into()),
2298            dst: dst.to_reg().into(),
2299        })
2300    }
2301
2302    /// Performs a max operation with vectors of signed integers in `lhs` and
2303    /// `rhs` and puts the results in `dst`.
2304    pub fn xmm_vpmaxs_rrr(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) {
2305        let op = match size {
2306            OperandSize::S8 => AvxOpcode::Vpmaxsb,
2307            OperandSize::S16 => AvxOpcode::Vpmaxsw,
2308            OperandSize::S32 => AvxOpcode::Vpmaxsd,
2309            _ => unimplemented!(),
2310        };
2311
2312        self.emit(Inst::XmmRmiRVex {
2313            op,
2314            src1: lhs.into(),
2315            src2: XmmMemImm::unwrap_new(rhs.into()),
2316            dst: dst.to_reg().into(),
2317        })
2318    }
2319
2320    /// Performs a max operation with vectors of unsigned integers in `lhs` and
2321    /// `rhs` and puts the results in `dst`.
2322    pub fn xmm_vpmaxu_rrr(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) {
2323        let op = match size {
2324            OperandSize::S8 => AvxOpcode::Vpmaxub,
2325            OperandSize::S16 => AvxOpcode::Vpmaxuw,
2326            OperandSize::S32 => AvxOpcode::Vpmaxud,
2327            _ => unimplemented!(),
2328        };
2329
2330        self.emit(Inst::XmmRmiRVex {
2331            op,
2332            src1: lhs.into(),
2333            src2: XmmMemImm::unwrap_new(rhs.into()),
2334            dst: dst.to_reg().into(),
2335        })
2336    }
2337
2338    /// Performs a min operation with vectors of signed integers in `lhs` and
2339    /// `rhs` and puts the results in `dst`.
2340    pub fn xmm_vpmins_rrr(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) {
2341        let op = match size {
2342            OperandSize::S8 => AvxOpcode::Vpminsb,
2343            OperandSize::S16 => AvxOpcode::Vpminsw,
2344            OperandSize::S32 => AvxOpcode::Vpminsd,
2345            _ => unimplemented!(),
2346        };
2347
2348        self.emit(Inst::XmmRmiRVex {
2349            op,
2350            src1: lhs.into(),
2351            src2: XmmMemImm::unwrap_new(rhs.into()),
2352            dst: dst.to_reg().into(),
2353        })
2354    }
2355
2356    /// Performs a min operation with vectors of unsigned integers in `lhs` and
2357    /// `rhs` and puts the results in `dst`.
2358    pub fn xmm_vpminu_rrr(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) {
2359        let op = match size {
2360            OperandSize::S8 => AvxOpcode::Vpminub,
2361            OperandSize::S16 => AvxOpcode::Vpminuw,
2362            OperandSize::S32 => AvxOpcode::Vpminud,
2363            _ => unimplemented!(),
2364        };
2365
2366        self.emit(Inst::XmmRmiRVex {
2367            op,
2368            src1: lhs.into(),
2369            src2: XmmMemImm::unwrap_new(rhs.into()),
2370            dst: dst.to_reg().into(),
2371        })
2372    }
2373
2374    /// Performs a comparison operation between vectors of floats in `lhs` and
2375    /// `rhs` and puts the results in `dst`.
2376    pub fn xmm_vcmpp_rrr(
2377        &mut self,
2378        dst: WritableReg,
2379        lhs: Reg,
2380        rhs: Reg,
2381        size: OperandSize,
2382        kind: VcmpKind,
2383    ) {
2384        let op = match size {
2385            OperandSize::S32 => AvxOpcode::Vcmpps,
2386            OperandSize::S64 => AvxOpcode::Vcmppd,
2387            _ => unimplemented!(),
2388        };
2389
2390        self.emit(Inst::XmmRmRImmVex {
2391            op,
2392            src1: lhs.into(),
2393            src2: XmmMem::unwrap_new(rhs.into()),
2394            dst: dst.to_reg().into(),
2395            imm: match kind {
2396                VcmpKind::Eq => 0,
2397                VcmpKind::Lt => 1,
2398                VcmpKind::Le => 2,
2399                VcmpKind::Unord => 3,
2400                VcmpKind::Ne => 4,
2401            },
2402        });
2403    }
2404
2405    /// Takes the lower lanes of vectors of floats in `src1` and `src2` and
2406    /// interleaves them in `dst`.
2407    pub fn xmm_vunpcklp_rrm(
2408        &mut self,
2409        src1: Reg,
2410        src2: &Address,
2411        dst: WritableReg,
2412        size: OperandSize,
2413    ) {
2414        let op = match size {
2415            OperandSize::S32 => AvxOpcode::Vunpcklps,
2416            _ => unimplemented!(),
2417        };
2418
2419        let address = Self::to_synthetic_amode(
2420            src2,
2421            &mut self.pool,
2422            &mut self.constants,
2423            &mut self.buffer,
2424            MemFlags::trusted(),
2425        );
2426
2427        self.emit(Inst::XmmRmiRVex {
2428            op,
2429            src1: src1.into(),
2430            src2: XmmMemImm::unwrap_new(RegMemImm::mem(address)),
2431            dst: dst.to_reg().into(),
2432        });
2433    }
2434
2435    /// Performs a subtraction on two vectors of floats and puts the results in
2436    /// `dst`.
2437    pub fn xmm_vsub_rrm(&mut self, src1: Reg, src2: &Address, dst: WritableReg, size: OperandSize) {
2438        let op = match size {
2439            OperandSize::S64 => AvxOpcode::Vsubpd,
2440            _ => unimplemented!(),
2441        };
2442
2443        let address = Self::to_synthetic_amode(
2444            src2,
2445            &mut self.pool,
2446            &mut self.constants,
2447            &mut self.buffer,
2448            MemFlags::trusted(),
2449        );
2450
2451        self.emit(Inst::XmmRmiRVex {
2452            op,
2453            src1: src1.into(),
2454            src2: XmmMemImm::unwrap_new(RegMemImm::mem(address)),
2455            dst: dst.to_reg().into(),
2456        });
2457    }
2458
2459    /// Performs a subtraction on two vectors of floats and puts the results in
2460    /// `dst`.
2461    pub fn xmm_vsub_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {
2462        let op = match size {
2463            OperandSize::S32 => AvxOpcode::Vsubps,
2464            OperandSize::S64 => AvxOpcode::Vsubpd,
2465            _ => unimplemented!(),
2466        };
2467
2468        self.emit(Inst::XmmRmiRVex {
2469            op,
2470            src1: src1.into(),
2471            src2: src2.into(),
2472            dst: dst.to_reg().into(),
2473        });
2474    }
2475
2476    /// Converts a vector of signed integers into a vector of narrower integers
2477    /// using saturation to handle overflow.
2478    pub fn xmm_vpackss_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {
2479        let op = match size {
2480            OperandSize::S8 => AvxOpcode::Vpacksswb,
2481            OperandSize::S16 => AvxOpcode::Vpackssdw,
2482            _ => unimplemented!(),
2483        };
2484
2485        self.emit(Inst::XmmRmiRVex {
2486            op,
2487            src1: src1.into(),
2488            src2: src2.into(),
2489            dst: dst.to_reg().into(),
2490        });
2491    }
2492
2493    /// Converts a vector of unsigned integers into a vector of narrower
2494    /// integers using saturation to handle overflow.
2495    pub fn xmm_vpackus_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {
2496        let op = match size {
2497            OperandSize::S8 => AvxOpcode::Vpackuswb,
2498            OperandSize::S16 => AvxOpcode::Vpackusdw,
2499            _ => unimplemented!(),
2500        };
2501
2502        self.emit(Inst::XmmRmiRVex {
2503            op,
2504            src1: src1.into(),
2505            src2: src2.into(),
2506            dst: dst.to_reg().into(),
2507        });
2508    }
2509
2510    /// Concatenates `src1` and `src2` and shifts right by `imm` and puts
2511    /// result in `dst`.
2512    pub fn xmm_vpalignr_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, imm: u8) {
2513        self.emit(Inst::XmmRmRImmVex {
2514            op: AvxOpcode::Vpalignr,
2515            src1: src1.into(),
2516            src2: src2.into(),
2517            dst: dst.to_reg().into(),
2518            imm,
2519        })
2520    }
2521
2522    /// Unpacks and interleaves the higher lanes of vectors of integers in
2523    /// `src1` and `src2` and puts the results in `dst`.
2524    pub fn xmm_vpunpckh_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {
2525        let op = match size {
2526            OperandSize::S8 => AvxOpcode::Vpunpckhbw,
2527            OperandSize::S16 => AvxOpcode::Vpunpckhwd,
2528            _ => unimplemented!(),
2529        };
2530
2531        self.emit(Inst::XmmRmiRVex {
2532            op,
2533            src1: src1.into(),
2534            src2: src2.into(),
2535            dst: dst.to_reg().into(),
2536        });
2537    }
2538
2539    /// Bitwise logical xor of vectors of floats in `src1` and `src2` and puts
2540    /// the results in `dst`.
2541    pub fn xmm_vxorp_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {
2542        let op = match size {
2543            OperandSize::S32 => AvxOpcode::Vxorps,
2544            OperandSize::S64 => AvxOpcode::Vxorpd,
2545            _ => unimplemented!(),
2546        };
2547
2548        self.emit(Inst::XmmRmiRVex {
2549            op,
2550            src1: src1.into(),
2551            src2: src2.into(),
2552            dst: dst.to_reg().into(),
2553        });
2554    }
2555
2556    /// Unpacks and interleaves high order data of floats in `src1` and `src2`
2557    /// and puts the results in `dst`.
2558    pub fn xmm_vunpckhp_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {
2559        let op = match size {
2560            OperandSize::S32 => AvxOpcode::Vunpckhps,
2561            _ => unimplemented!(),
2562        };
2563
2564        self.emit(Inst::XmmRmiRVex {
2565            op,
2566            src1: src1.into(),
2567            src2: src2.into(),
2568            dst: dst.to_reg().into(),
2569        });
2570    }
2571
2572    pub(crate) fn xmm_rm_rvex3(
2573        &mut self,
2574        op: Avx512Opcode,
2575        src1: Reg,
2576        src2: Reg,
2577        dst: WritableReg,
2578    ) {
2579        self.emit(Inst::XmmRmREvex3 {
2580            op,
2581            // `src1` reuses `dst`, and is ignored in emission
2582            src1: dst.to_reg().into(),
2583            src2: src1.into(),
2584            src3: src2.into(),
2585            dst: dst.map(Into::into),
2586        });
2587    }
2588
2589    /// Creates a mask made up of the most significant bit of each byte of
2590    /// `src` and stores the result in `dst`.
2591    pub fn xmm_vpmovmsk_rr(
2592        &mut self,
2593        src: Reg,
2594        dst: WritableReg,
2595        src_size: OperandSize,
2596        dst_size: OperandSize,
2597    ) {
2598        let op = match src_size {
2599            OperandSize::S8 => AvxOpcode::Vpmovmskb,
2600            _ => unimplemented!(),
2601        };
2602
2603        self.emit(Inst::XmmToGprVex {
2604            op,
2605            src: src.into(),
2606            dst: dst.to_reg().into(),
2607            dst_size: dst_size.into(),
2608        });
2609    }
2610
2611    /// Creates a mask made up of the most significant bit of each byte of
2612    /// in `src` and stores the result in `dst`.
2613    pub fn xmm_vmovskp_rr(
2614        &mut self,
2615        src: Reg,
2616        dst: WritableReg,
2617        src_size: OperandSize,
2618        dst_size: OperandSize,
2619    ) {
2620        let op = match src_size {
2621            OperandSize::S32 => AvxOpcode::Vmovmskps,
2622            OperandSize::S64 => AvxOpcode::Vmovmskpd,
2623            _ => unimplemented!(),
2624        };
2625
2626        self.emit(Inst::XmmToGprVex {
2627            op,
2628            src: src.into(),
2629            dst: dst.to_reg().into(),
2630            dst_size: dst_size.into(),
2631        })
2632    }
2633
2634    /// Compute the absolute value of elements in vector `src` and put the
2635    /// results in `dst`.
2636    pub fn xmm_vpabs_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
2637        let op = match size {
2638            OperandSize::S8 => AvxOpcode::Vpabsb,
2639            OperandSize::S16 => AvxOpcode::Vpabsw,
2640            OperandSize::S32 => AvxOpcode::Vpabsd,
2641            _ => unimplemented!(),
2642        };
2643
2644        self.emit(Inst::XmmUnaryRmRVex {
2645            op,
2646            src: src.into(),
2647            dst: dst.to_reg().into(),
2648        });
2649    }
2650
2651    /// Arithmetically (sign preserving) right shift on vector in `src` by
2652    /// `imm` with result written to `dst`.
2653    pub fn xmm_vpsra_rri(&mut self, src: Reg, dst: WritableReg, imm: u32, size: OperandSize) {
2654        let op = match size {
2655            OperandSize::S32 => AvxOpcode::Vpsrad,
2656            _ => unimplemented!(),
2657        };
2658
2659        self.emit(Inst::XmmRmiRVex {
2660            op,
2661            src1: src.into(),
2662            src2: XmmMemImm::unwrap_new(RegMemImm::imm(imm)),
2663            dst: dst.to_reg().into(),
2664        });
2665    }
2666
2667    fn vandp_opcode(size: OperandSize) -> AvxOpcode {
2668        match size {
2669            OperandSize::S32 => AvxOpcode::Vandps,
2670            OperandSize::S64 => AvxOpcode::Vandpd,
2671            _ => unimplemented!(),
2672        }
2673    }
2674
2675    /// Perform an `and` operation on vectors of floats in `src1` and `src2`
2676    /// and put the results in `dst`.
2677    pub fn xmm_vandp_rrm(
2678        &mut self,
2679        src1: Reg,
2680        src2: &Address,
2681        dst: WritableReg,
2682        size: OperandSize,
2683    ) {
2684        let address = Self::to_synthetic_amode(
2685            src2,
2686            &mut self.pool,
2687            &mut self.constants,
2688            &mut self.buffer,
2689            MemFlags::trusted(),
2690        );
2691
2692        self.emit(Inst::XmmRmiRVex {
2693            op: Self::vandp_opcode(size),
2694            src1: src1.into(),
2695            src2: XmmMemImm::unwrap_new(RegMemImm::mem(address)),
2696            dst: dst.to_reg().into(),
2697        });
2698    }
2699
2700    /// Perform an `and` operation on vectors of floats in `src1` and `src2`
2701    /// and put the results in `dst`.
2702    pub fn xmm_vandp_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {
2703        self.emit(Inst::XmmRmiRVex {
2704            op: Self::vandp_opcode(size),
2705            src1: src1.into(),
2706            src2: src2.into(),
2707            dst: dst.to_reg().into(),
2708        });
2709    }
2710
2711    /// Perform an `and not` operation on vectors of floats in `src1` and
2712    /// `src2` and put the results in `dst`.
2713    pub fn xmm_vandnp_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {
2714        let op = match size {
2715            OperandSize::S32 => AvxOpcode::Vandnps,
2716            OperandSize::S64 => AvxOpcode::Vandnpd,
2717            _ => unimplemented!(),
2718        };
2719
2720        self.emit(Inst::XmmRmiRVex {
2721            op,
2722            src1: src1.into(),
2723            src2: src2.into(),
2724            dst: dst.to_reg().into(),
2725        });
2726    }
2727
2728    /// Perform a max operation across two vectors of floats and put the
2729    /// results in `dst`.
2730    pub fn xmm_vmaxp_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {
2731        let op = match size {
2732            OperandSize::S32 => AvxOpcode::Vmaxps,
2733            OperandSize::S64 => AvxOpcode::Vmaxpd,
2734            _ => unimplemented!(),
2735        };
2736
2737        self.emit(Inst::XmmRmiRVex {
2738            op,
2739            src1: src1.into(),
2740            src2: src2.into(),
2741            dst: dst.to_reg().into(),
2742        });
2743    }
2744
2745    fn vminp_opcode(size: OperandSize) -> AvxOpcode {
2746        match size {
2747            OperandSize::S32 => AvxOpcode::Vminps,
2748            OperandSize::S64 => AvxOpcode::Vminpd,
2749            _ => unimplemented!(),
2750        }
2751    }
2752
2753    // Perform a min operation across two vectors of floats and put the
2754    // results in `dst`.
2755    pub fn xmm_vminp_rrm(
2756        &mut self,
2757        src1: Reg,
2758        src2: &Address,
2759        dst: WritableReg,
2760        size: OperandSize,
2761    ) {
2762        let address = Self::to_synthetic_amode(
2763            src2,
2764            &mut self.pool,
2765            &mut self.constants,
2766            &mut self.buffer,
2767            MemFlags::trusted(),
2768        );
2769
2770        self.emit(Inst::XmmRmiRVex {
2771            op: Self::vminp_opcode(size),
2772            src1: src1.into(),
2773            src2: XmmMemImm::unwrap_new(RegMemImm::mem(address)),
2774            dst: dst.to_reg().into(),
2775        });
2776    }
2777
2778    // Perform a min operation across two vectors of floats and put the
2779    // results in `dst`.
2780    pub fn xmm_vminp_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {
2781        self.emit(Inst::XmmRmiRVex {
2782            op: Self::vminp_opcode(size),
2783            src1: src1.into(),
2784            src2: src2.into(),
2785            dst: dst.to_reg().into(),
2786        });
2787    }
2788
2789    // Round a vector of floats.
2790    pub fn xmm_vroundp_rri(
2791        &mut self,
2792        src: Reg,
2793        dst: WritableReg,
2794        mode: VroundMode,
2795        size: OperandSize,
2796    ) {
2797        let op = match size {
2798            OperandSize::S32 => AvxOpcode::Vroundps,
2799            OperandSize::S64 => AvxOpcode::Vroundpd,
2800            _ => unimplemented!(),
2801        };
2802
2803        self.emit(Inst::XmmUnaryRmRImmVex {
2804            op,
2805            src: src.into(),
2806            dst: dst.to_reg().into(),
2807            imm: match mode {
2808                VroundMode::TowardNearest => 0,
2809                VroundMode::TowardNegativeInfinity => 1,
2810                VroundMode::TowardPositiveInfinity => 2,
2811                VroundMode::TowardZero => 3,
2812            },
2813        });
2814    }
2815
2816    /// Shuffle of vectors of floats.
2817    pub fn xmm_vshufp_rrri(
2818        &mut self,
2819        src1: Reg,
2820        src2: Reg,
2821        dst: WritableReg,
2822        imm: u8,
2823        size: OperandSize,
2824    ) {
2825        let op = match size {
2826            OperandSize::S32 => AvxOpcode::Vshufps,
2827            _ => unimplemented!(),
2828        };
2829
2830        self.emit(Inst::XmmRmRImmVex {
2831            op,
2832            src1: src1.into(),
2833            src2: src2.into(),
2834            dst: dst.to_reg().into(),
2835            imm,
2836        });
2837    }
2838
2839    /// Each lane in `src1` is multiplied by the corresponding lane in `src2`
2840    /// producing intermediate 32-bit operands. Each intermediate 32-bit
2841    /// operand is truncated to 18 most significant bits. Rounding is performed
2842    /// by adding 1 to the least significant bit of the 18-bit intermediate
2843    /// result. The 16 bits immediately to the right of the most significant
2844    /// bit of each 18-bit intermediate result is placed in each lane of `dst`.
2845    pub fn xmm_vpmulhrs_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {
2846        let op = match size {
2847            OperandSize::S16 => AvxOpcode::Vpmulhrsw,
2848            _ => unimplemented!(),
2849        };
2850
2851        self.emit(Inst::XmmRmiRVex {
2852            op,
2853            src1: src1.into(),
2854            src2: src2.into(),
2855            dst: dst.to_reg().into(),
2856        });
2857    }
2858
2859    /// Performs a bitwise `and` operation on the vectors in `src1` and `src2`
2860    /// and stores the results in `dst`.
2861    pub fn xmm_vpand_rrm(&mut self, src1: Reg, src2: &Address, dst: WritableReg) {
2862        let address = Self::to_synthetic_amode(
2863            &src2,
2864            &mut self.pool,
2865            &mut self.constants,
2866            &mut self.buffer,
2867            MemFlags::trusted(),
2868        );
2869
2870        self.emit(Inst::XmmRmiRVex {
2871            op: AvxOpcode::Vpand,
2872            src1: src1.into(),
2873            src2: XmmMemImm::unwrap_new(RegMemImm::mem(address)),
2874            dst: dst.to_reg().into(),
2875        });
2876    }
2877
2878    /// Perform an average operation for the vector of unsigned integers in
2879    /// `src1` and `src2` and put the results in `dst`.
2880    pub fn xmm_vpavg_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {
2881        let op = match size {
2882            OperandSize::S8 => AvxOpcode::Vpavgb,
2883            OperandSize::S16 => AvxOpcode::Vpavgw,
2884            _ => unimplemented!(),
2885        };
2886
2887        self.emit(Inst::XmmRmiRVex {
2888            op,
2889            src1: src1.into(),
2890            src2: src2.into(),
2891            dst: dst.to_reg().into(),
2892        });
2893    }
2894
2895    /// Perform an or operation for the vectors of floats in `src1` and `src2`
2896    /// and put the results in `dst`.
2897    pub fn xmm_vorp_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {
2898        let op = match size {
2899            OperandSize::S32 => AvxOpcode::Vorps,
2900            OperandSize::S64 => AvxOpcode::Vorpd,
2901            _ => unimplemented!(),
2902        };
2903
2904        self.emit(Inst::XmmRmiRVex {
2905            op,
2906            src1: src1.into(),
2907            src2: src2.into(),
2908            dst: dst.to_reg().into(),
2909        });
2910    }
2911
2912    /// Divide the vector of floats in `src1` by the vector of floats in `src2`
2913    /// and put the results in `dst`.
2914    pub fn xmm_vdivp_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {
2915        let op = match size {
2916            OperandSize::S32 => AvxOpcode::Vdivps,
2917            OperandSize::S64 => AvxOpcode::Vdivpd,
2918            _ => unimplemented!(),
2919        };
2920
2921        self.emit(Inst::XmmRmiRVex {
2922            op,
2923            src1: src1.into(),
2924            src2: src2.into(),
2925            dst: dst.to_reg().into(),
2926        });
2927    }
2928
2929    /// Compute square roots of vector of floats in `src` and put the results
2930    /// in `dst`.
2931    pub fn xmm_vsqrtp_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {
2932        let op = match size {
2933            OperandSize::S32 => AvxOpcode::Vsqrtps,
2934            OperandSize::S64 => AvxOpcode::Vsqrtpd,
2935            _ => unimplemented!(),
2936        };
2937
2938        self.emit(Inst::XmmUnaryRmRVex {
2939            op,
2940            src: src.into(),
2941            dst: dst.to_reg().into(),
2942        });
2943    }
2944
2945    /// Multiply and add packed signed and unsigned bytes.
2946    pub fn xmm_vpmaddubs_rmr(
2947        &mut self,
2948        src: Reg,
2949        address: &Address,
2950        dst: WritableReg,
2951        size: OperandSize,
2952    ) {
2953        let address = Self::to_synthetic_amode(
2954            address,
2955            &mut self.pool,
2956            &mut self.constants,
2957            &mut self.buffer,
2958            MemFlags::trusted(),
2959        );
2960
2961        let op = match size {
2962            OperandSize::S16 => AvxOpcode::Vpmaddubsw,
2963            _ => unimplemented!(),
2964        };
2965
2966        self.emit(Inst::XmmRmiRVex {
2967            op,
2968            src1: src.into(),
2969            src2: XmmMemImm::unwrap_new(RegMemImm::mem(address)),
2970            dst: dst.to_reg().into(),
2971        });
2972    }
2973
2974    /// Multiple and add packed integers.
2975    pub fn xmm_vpmaddwd_rmr(&mut self, src: Reg, address: &Address, dst: WritableReg) {
2976        let address = Self::to_synthetic_amode(
2977            address,
2978            &mut self.pool,
2979            &mut self.constants,
2980            &mut self.buffer,
2981            MemFlags::trusted(),
2982        );
2983
2984        self.emit(Inst::XmmRmiRVex {
2985            op: AvxOpcode::Vpmaddwd,
2986            src1: src.into(),
2987            src2: XmmMemImm::unwrap_new(RegMemImm::mem(address)),
2988            dst: dst.to_reg().into(),
2989        })
2990    }
2991
2992    /// Perform a logical on vector in `src` and in `address` and put the
2993    /// results in `dst`.
2994    pub fn xmm_vpxor_rmr(&mut self, src: Reg, address: &Address, dst: WritableReg) {
2995        let address = Self::to_synthetic_amode(
2996            address,
2997            &mut self.pool,
2998            &mut self.constants,
2999            &mut self.buffer,
3000            MemFlags::trusted(),
3001        );
3002
3003        self.emit(Inst::XmmRmiRVex {
3004            op: AvxOpcode::Vpxor,
3005            src1: src.into(),
3006            src2: XmmMemImm::unwrap_new(RegMemImm::mem(address)),
3007            dst: dst.to_reg().into(),
3008        })
3009    }
3010}
3011
3012/// Captures the region in a MachBuffer where an add-with-immediate instruction would be emitted,
3013/// but the immediate is not yet known. Currently, this implementation expects a 32-bit immediate,
3014/// so 8 and 16 bit operand sizes are not supported.
3015pub(crate) struct PatchableAddToReg {
3016    /// The region to be patched in the [`MachBuffer`]. It must contain a valid add instruction
3017    /// sequence, accepting a 32-bit immediate.
3018    region: PatchRegion,
3019
3020    /// The offset into the patchable region where the patchable constant begins.
3021    constant_offset: usize,
3022}
3023
3024impl PatchableAddToReg {
3025    /// Create a new [`PatchableAddToReg`] by capturing a region in the output buffer where the
3026    /// add-with-immediate occurs. The [`MachBuffer`] will have and add-with-immediate instruction
3027    /// present in that region, though it will add `0` until the `::finalize` method is called.
3028    ///
3029    /// Currently this implementation expects to be able to patch a 32-bit immediate, which means
3030    /// that 8 and 16-bit addition cannot be supported.
3031    pub(crate) fn new(reg: Reg, size: OperandSize, buf: &mut MachBuffer<Inst>) -> Self {
3032        let open = buf.start_patchable();
3033
3034        // Emit the opcode and register use for the add instruction.
3035        let start = buf.cur_offset();
3036        Self::add_inst_bytes(reg, size, buf);
3037        let constant_offset = usize::try_from(buf.cur_offset() - start).unwrap();
3038
3039        // Emit a placeholder for the 32-bit immediate.
3040        buf.put4(0);
3041
3042        let region = buf.end_patchable(open);
3043
3044        Self {
3045            region,
3046            constant_offset,
3047        }
3048    }
3049
3050    /// Generate the prefix of the add instruction (rex byte (depending on register use), opcode,
3051    /// and register reference).
3052    fn add_inst_bytes(reg: Reg, size: OperandSize, buf: &mut MachBuffer<Inst>) {
3053        match size {
3054            OperandSize::S32 | OperandSize::S64 => {}
3055            _ => {
3056                panic!(
3057                    "{}-bit addition is not supported, please see the comment on PatchableAddToReg::new",
3058                    size.num_bits(),
3059                )
3060            }
3061        }
3062
3063        let enc_g = 0;
3064
3065        debug_assert!(reg.is_int());
3066        let enc_e = u8::try_from(reg.hw_enc()).unwrap();
3067
3068        RexFlags::from(args::OperandSize::from(size)).emit_two_op(buf, enc_g, enc_e);
3069
3070        // the opcode for an add
3071        buf.put1(0x81);
3072
3073        // the modrm byte
3074        buf.put1(encode_modrm(0b11, enc_g & 7, enc_e & 7));
3075    }
3076
3077    /// Patch the [`MachBuffer`] with the known constant to be added to the register. The final
3078    /// value is passed in as an i32, but the instruction encoding is fixed when
3079    /// [`PatchableAddToReg::new`] is called.
3080    pub(crate) fn finalize(self, val: i32, buffer: &mut MachBuffer<Inst>) {
3081        let slice = self.region.patch(buffer);
3082        debug_assert_eq!(slice.len(), self.constant_offset + 4);
3083        slice[self.constant_offset..].copy_from_slice(val.to_le_bytes().as_slice());
3084    }
3085}