cranelift_codegen/isa/x64/encoding/
evex.rs

1//! Encodes EVEX instructions. These instructions are those added by the AVX-512 extensions. The
2//! EVEX encoding requires a 4-byte prefix:
3//!
4//! Byte 0:  0x62
5//!         ┌───┬───┬───┬───┬───┬───┬───┬───┐
6//! Byte 1: │ R │ X │ B │ R'│ 0 │ 0 │ m │ m │
7//!         ├───┼───┼───┼───┼───┼───┼───┼───┤
8//! Byte 2: │ W │ v │ v │ v │ v │ 1 │ p │ p │
9//!         ├───┼───┼───┼───┼───┼───┼───┼───┤
10//! Byte 3: │ z │ L'│ L │ b │ V'│ a │ a │ a │
11//!         └───┴───┴───┴───┴───┴───┴───┴───┘
12//!
13//! The prefix is then followed by the opcode byte, the ModR/M byte, and other optional suffixes
14//! (e.g. SIB byte, displacements, immediates) based on the instruction (see section 2.6, Intel
15//! Software Development Manual, volume 2A).
16
17use super::rex::{self, LegacyPrefixes, OpcodeMap};
18use crate::MachBuffer;
19use crate::isa::x64::args::{Amode, Avx512TupleType};
20use crate::isa::x64::inst::Inst;
21use core::ops::RangeInclusive;
22
23/// Constructs an EVEX-encoded instruction using a builder pattern. This approach makes it visually
24/// easier to transform something the manual's syntax, `EVEX.256.66.0F38.W1 1F /r` to code:
25/// `EvexInstruction::new().length(...).prefix(...).map(...).w(true).opcode(0x1F).reg(...).rm(...)`.
26pub struct EvexInstruction {
27    bits: u32,
28    opcode: u8,
29    reg: Register,
30    rm: RegisterOrAmode,
31    tuple_type: Option<Avx512TupleType>,
32    imm: Option<u8>,
33}
34
35/// Because some of the bit flags in the EVEX prefix are reversed and users of `EvexInstruction` may
36/// choose to skip setting fields, here we set some sane defaults. Note that:
37/// - the first byte is always `0x62` but you will notice it at the end of the default `bits` value
38///   implemented--remember the little-endian order
39/// - some bits are always set to certain values: bits 10-11 to 0, bit 18 to 1
40/// - the other bits set correspond to reversed bits: R, X, B, R' (byte 1), vvvv (byte 2), V' (byte
41///   3).
42///
43/// See the `default_emission` test for what these defaults are equivalent to (e.g. using RAX,
44/// unsetting the W bit, etc.)
45impl Default for EvexInstruction {
46    fn default() -> Self {
47        Self {
48            bits: 0x08_7C_F0_62,
49            opcode: 0,
50            reg: Register::default(),
51            rm: RegisterOrAmode::Register(Register::default()),
52            tuple_type: None,
53            imm: None,
54        }
55    }
56}
57
58#[expect(
59    non_upper_case_globals,
60    reason = "This makes it easier to match the bit range names to the manual's names"
61)]
62impl EvexInstruction {
63    /// Construct a default EVEX instruction.
64    pub fn new() -> Self {
65        Self::default()
66    }
67
68    /// Set the length of the instruction . Note that there are sets of instructions (i.e. rounding,
69    /// memory broadcast) that modify the same underlying bits--at some point (TODO) we can add a
70    /// way to set those context bits and verify that both are not used (e.g. rounding AND length).
71    /// For now, this method is very convenient.
72    #[inline(always)]
73    pub fn length(mut self, length: EvexVectorLength) -> Self {
74        self.write(Self::LL, EvexContext::Other { length }.bits() as u32);
75        self
76    }
77
78    /// Set the legacy prefix byte of the instruction: None | 66 | F0 | F2 | F3. EVEX instructions
79    /// pack these into the prefix, not as separate bytes.
80    #[inline(always)]
81    pub fn prefix(mut self, prefix: LegacyPrefixes) -> Self {
82        self.write(Self::pp, prefix.bits() as u32);
83        self
84    }
85
86    /// Set the opcode map byte of the instruction: None | 0F | 0F38 | 0F3A. EVEX instructions pack
87    /// these into the prefix, not as separate bytes.
88    #[inline(always)]
89    pub fn map(mut self, map: OpcodeMap) -> Self {
90        self.write(Self::mm, map.bits() as u32);
91        self
92    }
93
94    /// Set the W bit, typically used to indicate an instruction using 64 bits of an operand (e.g.
95    /// 64 bit lanes). EVEX packs this bit in the EVEX prefix; previous encodings used the REX
96    /// prefix.
97    #[inline(always)]
98    pub fn w(mut self, w: bool) -> Self {
99        self.write(Self::W, w as u32);
100        self
101    }
102
103    /// Set the instruction opcode byte.
104    #[inline(always)]
105    pub fn opcode(mut self, opcode: u8) -> Self {
106        self.opcode = opcode;
107        self
108    }
109
110    /// Set the "tuple type" which is used for 8-bit scaling when a memory
111    /// operand is used.
112    #[inline(always)]
113    pub fn tuple_type(mut self, tt: Avx512TupleType) -> Self {
114        self.tuple_type = Some(tt);
115        self
116    }
117
118    /// Set the register to use for the `reg` bits; many instructions use this as the write operand.
119    /// Setting this affects both the ModRM byte (`reg` section) and the EVEX prefix (the extension
120    /// bits for register encodings > 8).
121    #[inline(always)]
122    pub fn reg(mut self, reg: impl Into<Register>) -> Self {
123        self.reg = reg.into();
124        let r = !(self.reg.0 >> 3) & 1;
125        let r_ = !(self.reg.0 >> 4) & 1;
126        self.write(Self::R, r as u32);
127        self.write(Self::R_, r_ as u32);
128        self
129    }
130
131    /// Set the mask to use. See section 2.6 in the Intel Software Developer's Manual, volume 2A for
132    /// more details.
133    #[inline(always)]
134    #[cfg_attr(not(test), expect(dead_code, reason = "here for future use"))]
135    pub fn mask(mut self, mask: EvexMasking) -> Self {
136        self.write(Self::aaa, mask.aaa_bits() as u32);
137        self.write(Self::z, mask.z_bit() as u32);
138        self
139    }
140
141    /// Set the `vvvvv` register; some instructions allow using this as a second, non-destructive
142    /// source register in 3-operand instructions (e.g. 2 read, 1 write).
143    #[inline(always)]
144    pub fn vvvvv(mut self, reg: impl Into<Register>) -> Self {
145        let reg = reg.into();
146        self.write(Self::vvvv, !(reg.0 as u32) & 0b1111);
147        self.write(Self::V_, !(reg.0 as u32 >> 4) & 0b1);
148        self
149    }
150
151    /// Set the register to use for the `rm` bits; many instructions use this
152    /// as the "read from register/memory" operand. Setting this affects both
153    /// the ModRM byte (`rm` section) and the EVEX prefix (the extension bits
154    /// for register encodings > 8).
155    #[inline(always)]
156    pub fn rm(mut self, reg: impl Into<RegisterOrAmode>) -> Self {
157        // NB: See Table 2-31. 32-Register Support in 64-bit Mode Using EVEX
158        // with Embedded REX Bits
159        self.rm = reg.into();
160        let x = match &self.rm {
161            RegisterOrAmode::Register(r) => r.0 >> 4,
162            RegisterOrAmode::Amode(Amode::ImmRegRegShift { index, .. }) => {
163                index.to_real_reg().unwrap().hw_enc() >> 3
164            }
165
166            // These two modes technically don't use the X bit, so leave it at
167            // 0.
168            RegisterOrAmode::Amode(Amode::ImmReg { .. }) => 0,
169            RegisterOrAmode::Amode(Amode::RipRelative { .. }) => 0,
170        };
171        // The X bit is stored in an inverted format, so invert it here.
172        self.write(Self::X, u32::from(!x & 1));
173
174        let b = match &self.rm {
175            RegisterOrAmode::Register(r) => r.0 >> 3,
176            RegisterOrAmode::Amode(Amode::ImmReg { base, .. }) => {
177                base.to_real_reg().unwrap().hw_enc() >> 3
178            }
179            RegisterOrAmode::Amode(Amode::ImmRegRegShift { base, .. }) => {
180                base.to_real_reg().unwrap().hw_enc() >> 3
181            }
182            // The 4th bit of %rip is 0
183            RegisterOrAmode::Amode(Amode::RipRelative { .. }) => 0,
184        };
185        // The B bit is stored in an inverted format, so invert it here.
186        self.write(Self::B, u32::from(!b & 1));
187        self
188    }
189
190    /// Set the imm byte.
191    #[inline(always)]
192    pub fn imm(mut self, imm: u8) -> Self {
193        self.imm = Some(imm);
194        self
195    }
196
197    /// Emit the EVEX-encoded instruction to the code sink:
198    ///
199    /// - the 4-byte EVEX prefix;
200    /// - the opcode byte;
201    /// - the ModR/M byte
202    /// - SIB bytes, if necessary
203    /// - an optional immediate, if necessary (not currently implemented)
204    pub fn encode(&self, sink: &mut MachBuffer<Inst>) {
205        if let RegisterOrAmode::Amode(amode) = &self.rm {
206            if let Some(trap_code) = amode.get_flags().trap_code() {
207                sink.add_trap(trap_code);
208            }
209        }
210        sink.put4(self.bits);
211        sink.put1(self.opcode);
212
213        match &self.rm {
214            RegisterOrAmode::Register(reg) => {
215                let rm: u8 = (*reg).into();
216                sink.put1(rex::encode_modrm(3, self.reg.0 & 7, rm & 7));
217            }
218            RegisterOrAmode::Amode(amode) => {
219                let scaling = self.scaling_for_8bit_disp();
220
221                let bytes_at_end = if self.imm.is_some() { 1 } else { 0 };
222                rex::emit_modrm_sib_disp(sink, self.reg.0 & 7, amode, bytes_at_end, Some(scaling));
223            }
224        }
225        if let Some(imm) = self.imm {
226            sink.put1(imm);
227        }
228    }
229
230    // In order to simplify the encoding of the various bit ranges in the prefix, we specify those
231    // ranges according to the table below (extracted from the Intel Software Development Manual,
232    // volume 2A). Remember that, because we pack the 4-byte prefix into a little-endian `u32`, this
233    // chart should be read from right-to-left, top-to-bottom. Note also that we start ranges at bit
234    // 8, leaving bits 0-7 for the mandatory `0x62`.
235    //         ┌───┬───┬───┬───┬───┬───┬───┬───┐
236    // Byte 1: │ R │ X │ B │ R'│ 0 │ 0 │ m │ m │
237    //         ├───┼───┼───┼───┼───┼───┼───┼───┤
238    // Byte 2: │ W │ v │ v │ v │ v │ 1 │ p │ p │
239    //         ├───┼───┼───┼───┼───┼───┼───┼───┤
240    // Byte 3: │ z │ L'│ L │ b │ V'│ a │ a │ a │
241    //         └───┴───┴───┴───┴───┴───┴───┴───┘
242
243    // Byte 1:
244    const mm: RangeInclusive<u8> = 8..=9;
245    const R_: RangeInclusive<u8> = 12..=12;
246    const B: RangeInclusive<u8> = 13..=13;
247    const X: RangeInclusive<u8> = 14..=14;
248    const R: RangeInclusive<u8> = 15..=15;
249
250    // Byte 2:
251    const pp: RangeInclusive<u8> = 16..=17;
252    const vvvv: RangeInclusive<u8> = 19..=22;
253    const W: RangeInclusive<u8> = 23..=23;
254
255    // Byte 3:
256    const aaa: RangeInclusive<u8> = 24..=26;
257    const V_: RangeInclusive<u8> = 27..=27;
258    const b: RangeInclusive<u8> = 28..=28;
259    const LL: RangeInclusive<u8> = 29..=30;
260    const z: RangeInclusive<u8> = 31..=31;
261
262    // A convenience method for writing the `value` bits to the given range in `self.bits`.
263    #[inline]
264    fn write(&mut self, range: RangeInclusive<u8>, value: u32) {
265        assert!(ExactSizeIterator::len(&range) > 0);
266        let size = range.end() - range.start() + 1; // Calculate the number of bits in the range.
267        let mask: u32 = (1 << size) - 1; // Generate a bit mask.
268        debug_assert!(
269            value <= mask,
270            "The written value should have fewer than {size} bits."
271        );
272        let mask_complement = !(mask << *range.start()); // Create the bitwise complement for the clear mask.
273        self.bits &= mask_complement; // Clear the bits in `range`; otherwise the OR below may allow previously-set bits to slip through.
274        let value = value << *range.start(); // Place the value in the correct location (assumes `value <= mask`).
275        self.bits |= value; // Modify the bits in `range`.
276    }
277
278    /// A convenience method for reading given range of bits in `self.bits`
279    /// shifted to the LSB of the returned value..
280    #[inline]
281    fn read(&self, range: RangeInclusive<u8>) -> u32 {
282        (self.bits >> range.start()) & ((1 << range.len()) - 1)
283    }
284
285    fn scaling_for_8bit_disp(&self) -> i8 {
286        use Avx512TupleType::*;
287
288        let vector_size_scaling = || match self.read(Self::LL) {
289            0b00 => 16,
290            0b01 => 32,
291            0b10 => 64,
292            _ => unreachable!(),
293        };
294
295        match self.tuple_type {
296            Some(Full) => {
297                if self.read(Self::b) == 1 {
298                    if self.read(Self::W) == 0 { 4 } else { 8 }
299                } else {
300                    vector_size_scaling()
301                }
302            }
303            Some(FullMem) => vector_size_scaling(),
304            Some(Mem128) => 16,
305            None => panic!("tuple type was not set"),
306        }
307    }
308}
309
310/// Describe the register index to use. This wrapper is a type-safe way to pass
311/// around the registers defined in `inst/regs.rs`.
312#[derive(Debug, Copy, Clone, Default)]
313pub struct Register(u8);
314impl From<u8> for Register {
315    fn from(reg: u8) -> Self {
316        debug_assert!(reg < 16);
317        Self(reg)
318    }
319}
320impl From<Register> for u8 {
321    fn from(reg: Register) -> u8 {
322        reg.0
323    }
324}
325
326#[derive(Debug, Clone)]
327pub enum RegisterOrAmode {
328    Register(Register),
329    Amode(Amode),
330}
331
332impl From<u8> for RegisterOrAmode {
333    fn from(reg: u8) -> Self {
334        RegisterOrAmode::Register(reg.into())
335    }
336}
337
338impl From<Amode> for RegisterOrAmode {
339    fn from(amode: Amode) -> Self {
340        RegisterOrAmode::Amode(amode)
341    }
342}
343
344/// Defines the EVEX context for the `L'`, `L`, and `b` bits (bits 6:4 of EVEX P2 byte). Table 2-36 in
345/// section 2.6.10 (Intel Software Development Manual, volume 2A) describes how these bits can be
346/// used together for certain classes of instructions; i.e., special care should be taken to ensure
347/// that instructions use an applicable correct `EvexContext`. Table 2-39 contains cases where
348/// opcodes can result in an #UD.
349pub enum EvexContext {
350    #[expect(dead_code, reason = "here for future use")]
351    RoundingRegToRegFP {
352        rc: EvexRoundingControl,
353    },
354    #[expect(dead_code, reason = "here for future use")]
355    NoRoundingFP {
356        sae: bool,
357        length: EvexVectorLength,
358    },
359    #[expect(dead_code, reason = "here for future use")]
360    MemoryOp {
361        broadcast: bool,
362        length: EvexVectorLength,
363    },
364    Other {
365        length: EvexVectorLength,
366    },
367}
368
369impl Default for EvexContext {
370    fn default() -> Self {
371        Self::Other {
372            length: EvexVectorLength::default(),
373        }
374    }
375}
376
377impl EvexContext {
378    /// Encode the `L'`, `L`, and `b` bits (bits 6:4 of EVEX P2 byte) for merging with the P2 byte.
379    pub fn bits(&self) -> u8 {
380        match self {
381            Self::RoundingRegToRegFP { rc } => 0b001 | rc.bits() << 1,
382            Self::NoRoundingFP { sae, length } => (*sae as u8) | length.bits() << 1,
383            Self::MemoryOp { broadcast, length } => (*broadcast as u8) | length.bits() << 1,
384            Self::Other { length } => length.bits() << 1,
385        }
386    }
387}
388
389/// The EVEX format allows choosing a vector length in the `L'` and `L` bits; see `EvexContext`.
390pub enum EvexVectorLength {
391    V128,
392    #[expect(dead_code, reason = "here for future cranelift use")]
393    V256,
394    #[expect(dead_code, reason = "here for future cranelift use")]
395    V512,
396}
397
398impl EvexVectorLength {
399    /// Encode the `L'` and `L` bits for merging with the P2 byte.
400    fn bits(&self) -> u8 {
401        match self {
402            Self::V128 => 0b00,
403            Self::V256 => 0b01,
404            Self::V512 => 0b10,
405            // 0b11 is reserved (#UD).
406        }
407    }
408}
409
410impl Default for EvexVectorLength {
411    fn default() -> Self {
412        Self::V128
413    }
414}
415
416/// The EVEX format allows defining rounding control in the `L'` and `L` bits; see `EvexContext`.
417#[expect(dead_code, reason = "here for future use")]
418pub enum EvexRoundingControl {
419    RNE,
420    RD,
421    RU,
422    RZ,
423}
424
425impl EvexRoundingControl {
426    /// Encode the `L'` and `L` bits for merging with the P2 byte.
427    fn bits(&self) -> u8 {
428        match self {
429            Self::RNE => 0b00,
430            Self::RD => 0b01,
431            Self::RU => 0b10,
432            Self::RZ => 0b11,
433        }
434    }
435}
436
437/// Defines the EVEX masking behavior; masking support is described in section 2.6.4 of the Intel
438/// Software Development Manual, volume 2A.
439pub enum EvexMasking {
440    None,
441    #[expect(dead_code, reason = "here for future use")]
442    Merging {
443        k: u8,
444    },
445    #[expect(dead_code, reason = "here for future use")]
446    Zeroing {
447        k: u8,
448    },
449}
450
451impl Default for EvexMasking {
452    fn default() -> Self {
453        EvexMasking::None
454    }
455}
456
457impl EvexMasking {
458    /// Encode the `z` bit for merging with the P2 byte.
459    pub fn z_bit(&self) -> u8 {
460        match self {
461            Self::None | Self::Merging { .. } => 0,
462            Self::Zeroing { .. } => 1,
463        }
464    }
465
466    /// Encode the `aaa` bits for merging with the P2 byte.
467    pub fn aaa_bits(&self) -> u8 {
468        match self {
469            Self::None => 0b000,
470            Self::Merging { k } | Self::Zeroing { k } => {
471                debug_assert!(*k <= 7);
472                *k
473            }
474        }
475    }
476}
477
478#[cfg(test)]
479mod tests {
480    use super::*;
481    use crate::ir::MemFlags;
482    use crate::isa::x64::args::Gpr;
483    use crate::isa::x64::inst::regs;
484    use std::vec::Vec;
485
486    // As a sanity test, we verify that the output of `xed-asmparse-main 'vpabsq xmm0{k0},
487    // xmm1'` matches this EVEX encoding machinery.
488    #[test]
489    fn vpabsq() {
490        let mut tmp = MachBuffer::<Inst>::new();
491        let tests: &[(crate::Reg, RegisterOrAmode, Vec<u8>)] = &[
492            // vpabsq %xmm1, %xmm0
493            (
494                regs::xmm0(),
495                regs::xmm1().to_real_reg().unwrap().hw_enc().into(),
496                vec![0x62, 0xf2, 0xfd, 0x08, 0x1f, 0xc1],
497            ),
498            // vpabsq %xmm8, %xmm10
499            (
500                regs::xmm10(),
501                regs::xmm8().to_real_reg().unwrap().hw_enc().into(),
502                vec![0x62, 0x52, 0xfd, 0x08, 0x1f, 0xd0],
503            ),
504            // vpabsq %xmm15, %xmm3
505            (
506                regs::xmm3(),
507                regs::xmm15().to_real_reg().unwrap().hw_enc().into(),
508                vec![0x62, 0xd2, 0xfd, 0x08, 0x1f, 0xdf],
509            ),
510            // vpabsq (%rsi), %xmm12
511            (
512                regs::xmm12(),
513                Amode::ImmReg {
514                    simm32: 0,
515                    base: regs::rsi(),
516                    flags: MemFlags::trusted(),
517                }
518                .into(),
519                vec![0x62, 0x72, 0xfd, 0x08, 0x1f, 0x26],
520            ),
521            // vpabsq 8(%r15), %xmm14
522            (
523                regs::xmm14(),
524                Amode::ImmReg {
525                    simm32: 8,
526                    base: regs::r15(),
527                    flags: MemFlags::trusted(),
528                }
529                .into(),
530                vec![0x62, 0x52, 0xfd, 0x08, 0x1f, 0xb7, 0x08, 0x00, 0x00, 0x00],
531            ),
532            // vpabsq 16(%r15), %xmm14
533            (
534                regs::xmm14(),
535                Amode::ImmReg {
536                    simm32: 16,
537                    base: regs::r15(),
538                    flags: MemFlags::trusted(),
539                }
540                .into(),
541                vec![0x62, 0x52, 0xfd, 0x08, 0x1f, 0x77, 0x01],
542            ),
543            // vpabsq 17(%rax), %xmm3
544            (
545                regs::xmm3(),
546                Amode::ImmReg {
547                    simm32: 17,
548                    base: regs::rax(),
549                    flags: MemFlags::trusted(),
550                }
551                .into(),
552                vec![0x62, 0xf2, 0xfd, 0x08, 0x1f, 0x98, 0x11, 0x00, 0x00, 0x00],
553            ),
554            // vpabsq (%rbx, %rsi, 8), %xmm9
555            (
556                regs::xmm9(),
557                Amode::ImmRegRegShift {
558                    simm32: 0,
559                    base: Gpr::unwrap_new(regs::rbx()),
560                    index: Gpr::unwrap_new(regs::rsi()),
561                    shift: 3,
562                    flags: MemFlags::trusted(),
563                }
564                .into(),
565                vec![0x62, 0x72, 0xfd, 0x08, 0x1f, 0x0c, 0xf3],
566            ),
567            // vpabsq 1(%r11, %rdi, 4), %xmm13
568            (
569                regs::xmm13(),
570                Amode::ImmRegRegShift {
571                    simm32: 1,
572                    base: Gpr::unwrap_new(regs::r11()),
573                    index: Gpr::unwrap_new(regs::rdi()),
574                    shift: 2,
575                    flags: MemFlags::trusted(),
576                }
577                .into(),
578                vec![
579                    0x62, 0x52, 0xfd, 0x08, 0x1f, 0xac, 0xbb, 0x01, 0x00, 0x00, 0x00,
580                ],
581            ),
582            // vpabsq 128(%rsp, %r10, 2), %xmm5
583            (
584                regs::xmm5(),
585                Amode::ImmRegRegShift {
586                    simm32: 128,
587                    base: Gpr::unwrap_new(regs::rsp()),
588                    index: Gpr::unwrap_new(regs::r10()),
589                    shift: 1,
590                    flags: MemFlags::trusted(),
591                }
592                .into(),
593                vec![0x62, 0xb2, 0xfd, 0x08, 0x1f, 0x6c, 0x54, 0x08],
594            ),
595            // vpabsq 112(%rbp, %r13, 1), %xmm6
596            (
597                regs::xmm6(),
598                Amode::ImmRegRegShift {
599                    simm32: 112,
600                    base: Gpr::unwrap_new(regs::rbp()),
601                    index: Gpr::unwrap_new(regs::r13()),
602                    shift: 0,
603                    flags: MemFlags::trusted(),
604                }
605                .into(),
606                vec![0x62, 0xb2, 0xfd, 0x08, 0x1f, 0x74, 0x2d, 0x07],
607            ),
608            // vpabsq (%rbp, %r13, 1), %xmm7
609            (
610                regs::xmm7(),
611                Amode::ImmRegRegShift {
612                    simm32: 0,
613                    base: Gpr::unwrap_new(regs::rbp()),
614                    index: Gpr::unwrap_new(regs::r13()),
615                    shift: 0,
616                    flags: MemFlags::trusted(),
617                }
618                .into(),
619                vec![0x62, 0xb2, 0xfd, 0x08, 0x1f, 0x7c, 0x2d, 0x00],
620            ),
621            // vpabsq 2032(%r12), %xmm8
622            (
623                regs::xmm8(),
624                Amode::ImmReg {
625                    simm32: 2032,
626                    base: regs::r12(),
627                    flags: MemFlags::trusted(),
628                }
629                .into(),
630                vec![0x62, 0x52, 0xfd, 0x08, 0x1f, 0x44, 0x24, 0x7f],
631            ),
632            // vpabsq 2048(%r13), %xmm9
633            (
634                regs::xmm9(),
635                Amode::ImmReg {
636                    simm32: 2048,
637                    base: regs::r13(),
638                    flags: MemFlags::trusted(),
639                }
640                .into(),
641                vec![0x62, 0x52, 0xfd, 0x08, 0x1f, 0x8d, 0x00, 0x08, 0x00, 0x00],
642            ),
643            // vpabsq -16(%r14), %xmm10
644            (
645                regs::xmm10(),
646                Amode::ImmReg {
647                    simm32: -16,
648                    base: regs::r14(),
649                    flags: MemFlags::trusted(),
650                }
651                .into(),
652                vec![0x62, 0x52, 0xfd, 0x08, 0x1f, 0x56, 0xff],
653            ),
654            // vpabsq -5(%r15), %xmm11
655            (
656                regs::xmm11(),
657                Amode::ImmReg {
658                    simm32: -5,
659                    base: regs::r15(),
660                    flags: MemFlags::trusted(),
661                }
662                .into(),
663                vec![0x62, 0x52, 0xfd, 0x08, 0x1f, 0x9f, 0xfb, 0xff, 0xff, 0xff],
664            ),
665            // vpabsq -2048(%rdx), %xmm12
666            (
667                regs::xmm12(),
668                Amode::ImmReg {
669                    simm32: -2048,
670                    base: regs::rdx(),
671                    flags: MemFlags::trusted(),
672                }
673                .into(),
674                vec![0x62, 0x72, 0xfd, 0x08, 0x1f, 0x62, 0x80],
675            ),
676            // vpabsq -2064(%rsi), %xmm13
677            (
678                regs::xmm13(),
679                Amode::ImmReg {
680                    simm32: -2064,
681                    base: regs::rsi(),
682                    flags: MemFlags::trusted(),
683                }
684                .into(),
685                vec![0x62, 0x72, 0xfd, 0x08, 0x1f, 0xae, 0xf0, 0xf7, 0xff, 0xff],
686            ),
687            // a: vpabsq a(%rip), %xmm14
688            (
689                regs::xmm14(),
690                Amode::RipRelative {
691                    target: tmp.get_label(),
692                }
693                .into(),
694                vec![0x62, 0x72, 0xfd, 0x08, 0x1f, 0x35, 0xf6, 0xff, 0xff, 0xff],
695            ),
696        ];
697
698        for (dst, src, encoding) in tests {
699            let mut sink = MachBuffer::new();
700            let label = sink.get_label();
701            sink.bind_label(label, &mut Default::default());
702            EvexInstruction::new()
703                .prefix(LegacyPrefixes::_66)
704                .map(OpcodeMap::_0F38)
705                .w(true)
706                .opcode(0x1F)
707                .reg(dst.to_real_reg().unwrap().hw_enc())
708                .rm(src.clone())
709                .length(EvexVectorLength::V128)
710                .tuple_type(Avx512TupleType::Full)
711                .encode(&mut sink);
712            let bytes0 = sink
713                .finish(&Default::default(), &mut Default::default())
714                .data;
715            assert_eq!(
716                bytes0.as_slice(),
717                encoding.as_slice(),
718                "dst={dst:?} src={src:?}"
719            );
720        }
721    }
722
723    /// Verify that the defaults are equivalent to an instruction with a `0x00` opcode using the
724    /// "0" register (i.e. `rax`), with sane defaults for the various configurable parameters. This
725    /// test is more interesting than it may appear because some of the parameters have flipped-bit
726    /// representations (e.g. `vvvvv`) so emitting 0s as a default will not work.
727    #[test]
728    fn default_emission() {
729        let mut sink = MachBuffer::new();
730        EvexInstruction::new().encode(&mut sink);
731        let bytes0 = sink
732            .finish(&Default::default(), &mut Default::default())
733            .data;
734
735        let mut sink = MachBuffer::new();
736        EvexInstruction::new()
737            .length(EvexVectorLength::V128)
738            .prefix(LegacyPrefixes::None)
739            .map(OpcodeMap::None)
740            .w(false)
741            .opcode(0x00)
742            .reg(regs::rax().to_real_reg().unwrap().hw_enc())
743            .rm(regs::rax().to_real_reg().unwrap().hw_enc())
744            .mask(EvexMasking::None)
745            .encode(&mut sink);
746        let bytes1 = sink
747            .finish(&Default::default(), &mut Default::default())
748            .data;
749
750        assert_eq!(bytes0, bytes1);
751    }
752}