cranelift_assembler_x64_meta/dsl/
encoding.rs

1//! A DSL for describing x64 encodings.
2//!
3//! Intended use:
4//! - construct an encoding using an abbreviated helper, e.g., [`rex`]
5//! - then, configure the encoding using builder methods, e.g., [`Rex::w`]
6//!
7//! ```
8//! # use cranelift_assembler_x64_meta::dsl::rex;
9//! let enc = rex(0x25).w().id();
10//! assert_eq!(enc.to_string(), "REX.W + 0x25 id")
11//! ```
12//!
13//! This module references the Intel® 64 and IA-32 Architectures Software
14//! Development Manual, Volume 2: [link].
15//!
16//! [link]: https://software.intel.com/content/www/us/en/develop/articles/intel-sdm.html
17
18use super::{Operand, OperandKind};
19use core::fmt;
20
21/// An abbreviated constructor for REX-encoded instructions.
22#[must_use]
23pub fn rex(opcode: impl Into<Opcodes>) -> Rex {
24    Rex {
25        opcodes: opcode.into(),
26        w: false,
27        modrm: None,
28        imm: Imm::None,
29        opcode_mod: None,
30    }
31}
32
33/// An abbreviated constructor for VEX-encoded instructions.
34#[must_use]
35pub fn vex(length: VexLength) -> Vex {
36    Vex {
37        length,
38        pp: None,
39        mmmmm: None,
40        w: VexW::WIG,
41        opcode: u8::MAX,
42        modrm: None,
43        imm: Imm::None,
44        is4: false,
45    }
46}
47
48/// Enumerate the ways x64 encodes instructions.
49pub enum Encoding {
50    Rex(Rex),
51    Vex(Vex),
52}
53
54impl Encoding {
55    /// Check that the encoding is valid for the given operands; this can find
56    /// issues earlier, before generating any Rust code.
57    pub fn validate(&self, operands: &[Operand]) {
58        match self {
59            Encoding::Rex(rex) => rex.validate(operands),
60            Encoding::Vex(vex) => vex.validate(operands),
61        }
62    }
63
64    /// Return the opcode for this encoding.
65    pub fn opcode(&self) -> u8 {
66        match self {
67            Encoding::Rex(rex) => rex.opcodes.opcode(),
68            Encoding::Vex(vex) => vex.opcode,
69        }
70    }
71}
72
73impl fmt::Display for Encoding {
74    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
75        match self {
76            Encoding::Rex(rex) => write!(f, "{rex}"),
77            Encoding::Vex(vex) => write!(f, "{vex}"),
78        }
79    }
80}
81
82#[derive(Clone, Copy, PartialEq)]
83pub enum ModRmKind {
84    /// Models `/digit`.
85    ///
86    /// From the reference manual: "a digit between 0 and 7 indicates that the
87    /// ModR/M byte of the instruction uses only the r/m (register or memory)
88    /// operand. The reg field contains the digit that provides an extension to
89    /// the instruction's opcode."
90    Digit(u8),
91
92    /// Models `/r`.
93    ///
94    /// From the reference manual: "indicates that the ModR/M byte of the
95    /// instruction contains a register operand and an r/m operand."
96    Reg,
97}
98
99impl ModRmKind {
100    /// Return the digit extending the opcode, if available.
101    #[must_use]
102    pub fn digit(&self) -> Option<u8> {
103        match self {
104            Self::Digit(digit) => Some(*digit),
105            _ => None,
106        }
107    }
108
109    /// Return the digit extending the opcode.
110    ///
111    /// # Panics
112    ///
113    /// Panics if not extension was defined.
114    pub fn unwrap_digit(&self) -> u8 {
115        self.digit().expect("expected an extension digit")
116    }
117}
118
119impl fmt::Display for ModRmKind {
120    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
121        match self {
122            ModRmKind::Digit(digit) => write!(f, "/{digit}"),
123            ModRmKind::Reg => write!(f, "/r"),
124        }
125    }
126}
127
128/// The traditional x64 encoding.
129///
130/// We use the "REX" name here in a slightly unorthodox way: "REX" is the name
131/// for the optional _byte_ extending the number of available registers, e.g.,
132/// but we use it here to distinguish this from other encoding formats (e.g.,
133/// VEX, EVEX). The "REX" _byte_ is still optional in this encoding and only
134/// emitted when necessary.
135pub struct Rex {
136    /// The opcodes for this instruction.
137    ///
138    /// Multi-byte opcodes are handled by passing an array of opcodes (including
139    /// prefixes like `0x66` and escape bytes like `0x0f`) to the constructor.
140    /// E.g., `66 0F 54` (`ANDPD`) is expressed as follows:
141    ///
142    /// ```
143    /// # use cranelift_assembler_x64_meta::dsl::rex;
144    /// let enc = rex([0x66, 0x0f, 0x54]);
145    /// ```
146    pub opcodes: Opcodes,
147    /// Indicates setting the REX.W bit.
148    ///
149    /// From the reference manual: "Indicates the use of a REX prefix that
150    /// affects operand size or instruction semantics. The ordering of the REX
151    /// prefix and other optional/mandatory instruction prefixes are discussed
152    /// in chapter 2. Note that REX prefixes that promote legacy instructions to
153    /// 64-bit behavior are not listed explicitly in the opcode column."
154    pub w: bool,
155    /// Indicates modifications to the ModR/M byte.
156    pub modrm: Option<ModRmKind>,
157    /// The number of bits used as an immediate operand to the instruction.
158    pub imm: Imm,
159    /// Used for `+rb`, `+rw`, `+rd`, and `+ro` instructions, which encode `reg`
160    /// bits in the opcode byte; if `Some`, this contains the expected bit width
161    /// of `reg`.
162    ///
163    /// From the reference manual: "[...] the lower 3 bits of the opcode byte is
164    /// used to encode the register operand without a modR/M byte. The
165    /// instruction lists the corresponding hexadecimal value of the opcode byte
166    /// with low 3 bits as 000b. In non-64-bit mode, a register code, from 0
167    /// through 7, is added to the hexadecimal value of the opcode byte. In
168    /// 64-bit mode, indicates the four bit field of REX.b and opcode[2:0] field
169    /// encodes the register operand of the instruction. “+ro” is applicable
170    /// only in 64-bit mode."
171    pub opcode_mod: Option<OpcodeMod>,
172}
173
174impl Rex {
175    /// Set the `REX.W` bit.
176    #[must_use]
177    pub fn w(self) -> Self {
178        Self { w: true, ..self }
179    }
180
181    /// Set the ModR/M byte to contain a register operand and an r/m operand;
182    /// equivalent to `/r` in the reference manual.
183    #[must_use]
184    pub fn r(self) -> Self {
185        Self {
186            modrm: Some(ModRmKind::Reg),
187            ..self
188        }
189    }
190
191    /// Set the digit extending the opcode; equivalent to `/<digit>` in the
192    /// reference manual.
193    ///
194    /// # Panics
195    ///
196    /// Panics if `extension` is too large.
197    #[must_use]
198    pub fn digit(self, extension: u8) -> Self {
199        assert!(extension <= 0b111, "must fit in 3 bits");
200        Self {
201            modrm: Some(ModRmKind::Digit(extension)),
202            ..self
203        }
204    }
205
206    /// Retrieve the digit extending the opcode, if available.
207    #[must_use]
208    pub fn unwrap_digit(&self) -> Option<u8> {
209        match self.modrm {
210            Some(ModRmKind::Digit(digit)) => Some(digit),
211            _ => None,
212        }
213    }
214
215    /// Append a byte-sized immediate operand (8-bit); equivalent to `ib` in the
216    /// reference manual.
217    ///
218    /// # Panics
219    ///
220    /// Panics if an immediate operand is already set.
221    #[must_use]
222    pub fn ib(self) -> Self {
223        assert_eq!(self.imm, Imm::None);
224        Self {
225            imm: Imm::ib,
226            ..self
227        }
228    }
229
230    /// Append a word-sized immediate operand (16-bit); equivalent to `iw` in
231    /// the reference manual.
232    ///
233    /// # Panics
234    ///
235    /// Panics if an immediate operand is already set.
236    #[must_use]
237    pub fn iw(self) -> Self {
238        assert_eq!(self.imm, Imm::None);
239        Self {
240            imm: Imm::iw,
241            ..self
242        }
243    }
244
245    /// Append a doubleword-sized immediate operand (32-bit); equivalent to `id`
246    /// in the reference manual.
247    ///
248    /// # Panics
249    ///
250    /// Panics if an immediate operand is already set.
251    #[must_use]
252    pub fn id(self) -> Self {
253        assert_eq!(self.imm, Imm::None);
254        Self {
255            imm: Imm::id,
256            ..self
257        }
258    }
259
260    /// Append a quadword-sized immediate operand (64-bit); equivalent to `io`
261    /// in the reference manual.
262    ///
263    /// # Panics
264    ///
265    /// Panics if an immediate operand is already set.
266    #[must_use]
267    pub fn io(self) -> Self {
268        assert_eq!(self.imm, Imm::None);
269        Self {
270            imm: Imm::io,
271            ..self
272        }
273    }
274
275    /// Modify the opcode byte with bits from an 8-bit `reg`; equivalent to
276    /// `+rb` in the reference manual.
277    #[must_use]
278    pub fn rb(self) -> Self {
279        Self {
280            opcode_mod: Some(OpcodeMod::rb),
281            ..self
282        }
283    }
284
285    /// Modify the opcode byte with bits from a 16-bit `reg`; equivalent to
286    /// `+rw` in the reference manual.
287    #[must_use]
288    pub fn rw(self) -> Self {
289        Self {
290            opcode_mod: Some(OpcodeMod::rw),
291            ..self
292        }
293    }
294
295    /// Modify the opcode byte with bits from a 32-bit `reg`; equivalent to
296    /// `+rd` in the reference manual.
297    #[must_use]
298    pub fn rd(self) -> Self {
299        Self {
300            opcode_mod: Some(OpcodeMod::rd),
301            ..self
302        }
303    }
304
305    /// Modify the opcode byte with bits from a 64-bit `reg`; equivalent to
306    /// `+ro` in the reference manual.
307    #[must_use]
308    pub fn ro(self) -> Self {
309        Self {
310            opcode_mod: Some(OpcodeMod::ro),
311            ..self
312        }
313    }
314
315    /// Check a subset of the rules for valid encodings outlined in chapter 2,
316    /// _Instruction Format_, of the Intel® 64 and IA-32 Architectures Software
317    /// Developer’s Manual, Volume 2A.
318    fn validate(&self, operands: &[Operand]) {
319        if let Some(OperandKind::Imm(op)) = operands
320            .iter()
321            .map(|o| o.location.kind())
322            .find(|k| matches!(k, OperandKind::Imm(_)))
323        {
324            assert_eq!(
325                op.bits(),
326                self.imm.bits(),
327                "for an immediate, the encoding width must match the declared operand width"
328            );
329        }
330
331        if let Some(opcode_mod) = &self.opcode_mod {
332            assert!(
333                self.opcodes.primary & 0b111 == 0,
334                "the lower three bits of the opcode byte should be 0"
335            );
336            assert!(
337                operands
338                    .iter()
339                    .all(|o| o.location.bits() == opcode_mod.bits().into()),
340                "the opcode modifier width must match the operand widths"
341            );
342        }
343    }
344}
345
346impl From<Rex> for Encoding {
347    fn from(rex: Rex) -> Encoding {
348        Encoding::Rex(rex)
349    }
350}
351
352impl fmt::Display for Rex {
353    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
354        if let Some(group1) = &self.opcodes.prefixes.group1 {
355            write!(f, "{group1} + ")?;
356        }
357        if let Some(group2) = &self.opcodes.prefixes.group2 {
358            write!(f, "{group2} + ")?;
359        }
360        if let Some(group3) = &self.opcodes.prefixes.group3 {
361            write!(f, "{group3} + ")?;
362        }
363        if let Some(group4) = &self.opcodes.prefixes.group4 {
364            write!(f, "{group4} + ")?;
365        }
366        if self.w {
367            write!(f, "REX.W + ")?;
368        }
369        if self.opcodes.escape {
370            write!(f, "0x0F + ")?;
371        }
372        write!(f, "{:#04X}", self.opcodes.primary)?;
373        if let Some(secondary) = self.opcodes.secondary {
374            write!(f, " {secondary:#04X}")?;
375        }
376        if let Some(modrm) = self.modrm {
377            write!(f, " {modrm}")?;
378        }
379        if let Some(opcode_mod) = &self.opcode_mod {
380            write!(f, " {opcode_mod}")?;
381        }
382        if self.imm != Imm::None {
383            write!(f, " {}", self.imm)?;
384        }
385        Ok(())
386    }
387}
388
389/// Describe an instruction's opcodes. From section 2.1.2 "Opcodes" in the
390/// reference manual:
391///
392/// > A primary opcode can be 1, 2, or 3 bytes in length. An additional 3-bit
393/// > opcode field is sometimes encoded in the ModR/M byte. Smaller fields can
394/// > be defined within the primary opcode. Such fields define the direction of
395/// > operation, size of displacements, register encoding, condition codes, or
396/// > sign extension. Encoding fields used by an opcode vary depending on the
397/// > class of operation.
398/// >
399/// > Two-byte opcode formats for general-purpose and SIMD instructions consist
400/// > of one of the following:
401/// > - An escape opcode byte `0FH` as the primary opcode and a second opcode
402/// >   byte.
403/// > - A mandatory prefix (`66H`, `F2H`, or `F3H`), an escape opcode byte, and
404/// >   a second opcode byte (same as previous bullet).
405/// >
406/// > For example, `CVTDQ2PD` consists of the following sequence: `F3 0F E6`.
407/// > The first byte is a mandatory prefix (it is not considered as a repeat
408/// > prefix).
409/// >
410/// > Three-byte opcode formats for general-purpose and SIMD instructions
411/// > consist of one of the following:
412/// > - An escape opcode byte `0FH` as the primary opcode, plus two additional
413/// >   opcode bytes.
414/// > - A mandatory prefix (`66H`, `F2H`, or `F3H`), an escape opcode byte, plus
415/// >   two additional opcode bytes (same as previous bullet).
416/// >
417/// > For example, `PHADDW` for XMM registers consists of the following
418/// > sequence: `66 0F 38 01`. The first byte is the mandatory prefix.
419pub struct Opcodes {
420    /// The prefix bytes for this instruction.
421    pub prefixes: Prefixes,
422    /// Indicates the use of an escape opcode byte, `0x0f`.
423    pub escape: bool,
424    /// The primary opcode.
425    pub primary: u8,
426    /// Some instructions (e.g., SIMD) may have a secondary opcode.
427    pub secondary: Option<u8>,
428}
429
430impl Opcodes {
431    /// Return the main opcode for this instruction.
432    ///
433    /// Note that [`Rex`]-encoded instructions have a complex opcode scheme (see
434    /// [`Opcodes`] documentation); the opcode one is usually looking for is the
435    /// last one. This returns the last opcode: the secondary opcode if one is
436    /// available and the primary otherwise.
437    fn opcode(&self) -> u8 {
438        if let Some(secondary) = self.secondary {
439            secondary
440        } else {
441            self.primary
442        }
443    }
444}
445
446impl From<u8> for Opcodes {
447    fn from(primary: u8) -> Opcodes {
448        Opcodes {
449            prefixes: Prefixes::default(),
450            escape: false,
451            primary,
452            secondary: None,
453        }
454    }
455}
456
457impl<const N: usize> From<[u8; N]> for Opcodes {
458    fn from(bytes: [u8; N]) -> Self {
459        let (prefixes, remaining) = Prefixes::parse(&bytes);
460        let (escape, primary, secondary) = match remaining {
461            [primary] => (false, *primary, None),
462            [0x0f, primary] => (true, *primary, None),
463            [0x0f, primary, secondary] => (true, *primary, Some(*secondary)),
464            _ => panic!(
465                "invalid opcodes after prefix; expected [opcode], [0x0f, opcode], or [0x0f, opcode, opcode], found {remaining:x?}"
466            ),
467        };
468        Self {
469            prefixes,
470            escape,
471            primary,
472            secondary,
473        }
474    }
475}
476
477/// The allowed prefixes for an instruction. From the reference manual (section
478/// 2.1.1):
479///
480/// > Instruction prefixes are divided into four groups, each with a set of
481/// > allowable prefix codes. For each instruction, it is only useful to include
482/// > up to one prefix code from each of the four groups (Groups 1, 2, 3, 4).
483/// > Groups 1 through 4 may be placed in any order relative to each other.
484#[derive(Default)]
485pub struct Prefixes {
486    pub group1: Option<Group1Prefix>,
487    pub group2: Option<Group2Prefix>,
488    pub group3: Option<Group3Prefix>,
489    pub group4: Option<Group4Prefix>,
490}
491
492impl Prefixes {
493    /// Parse a slice of `bytes` into a set of prefixes, returning both the
494    /// configured [`Prefixes`] as well as any remaining bytes.
495    fn parse(mut bytes: &[u8]) -> (Self, &[u8]) {
496        let mut prefixes = Self::default();
497        while !bytes.is_empty() && prefixes.try_assign(bytes[0]).is_ok() {
498            bytes = &bytes[1..];
499        }
500        (prefixes, bytes)
501    }
502
503    /// Attempt to parse a `byte` as a prefix and, if successful, assigns it to
504    /// the correct prefix group.
505    ///
506    /// # Panics
507    ///
508    /// This function panics if the prefix for a group is already set; this
509    /// disallows specifying multiple prefixes per group.
510    fn try_assign(&mut self, byte: u8) -> Result<(), ()> {
511        if let Ok(p) = Group1Prefix::try_from(byte) {
512            assert!(self.group1.is_none());
513            self.group1 = Some(p);
514            Ok(())
515        } else if let Ok(p) = Group2Prefix::try_from(byte) {
516            assert!(self.group2.is_none());
517            self.group2 = Some(p);
518            Ok(())
519        } else if let Ok(p) = Group3Prefix::try_from(byte) {
520            assert!(self.group3.is_none());
521            self.group3 = Some(p);
522            Ok(())
523        } else if let Ok(p) = Group4Prefix::try_from(byte) {
524            assert!(self.group4.is_none());
525            self.group4 = Some(p);
526            Ok(())
527        } else {
528            Err(())
529        }
530    }
531
532    /// Check if any prefix is present.
533    pub fn is_empty(&self) -> bool {
534        self.group1.is_none()
535            && self.group2.is_none()
536            && self.group3.is_none()
537            && self.group4.is_none()
538    }
539}
540
541pub enum Group1Prefix {
542    /// The LOCK prefix (`0xf0`). From the reference manual:
543    ///
544    /// > The LOCK prefix (F0H) forces an operation that ensures exclusive use
545    /// > of shared memory in a multiprocessor environment. See "LOCK—Assert
546    /// > LOCK# Signal Prefix" in Chapter 3, Instruction Set Reference, A-L, for
547    /// > a description of this prefix.
548    Lock,
549    /// A REPNE/REPNZ prefix (`0xf2`) or a BND prefix under certain conditions.
550    /// `REP*` prefixes apply only to string and input/output instructions but
551    /// can be used as mandatory prefixes in other kinds of instructions (e.g.,
552    /// SIMD) From the reference manual:
553    ///
554    /// > Repeat prefixes (F2H, F3H) cause an instruction to be repeated for
555    /// > each element of a string. Use these prefixes only with string and I/O
556    /// > instructions (MOVS, CMPS, SCAS, LODS, STOS, INS, and OUTS). Use of
557    /// > repeat prefixes and/or undefined opcodes with other Intel 64 or IA-32
558    /// > instructions is reserved; such use may cause unpredictable behavior.
559    /// >
560    /// > Some instructions may use F2H, F3H as a mandatory prefix to express
561    /// > distinct functionality.
562    REPNorBND,
563    /// A REPE/REPZ prefix (`0xf3`); `REP*` prefixes apply only to string and
564    /// input/output instructions but can be used as mandatory prefixes in other
565    /// kinds of instructions (e.g., SIMD). See `REPNorBND` for more details.
566    REP_,
567}
568
569impl TryFrom<u8> for Group1Prefix {
570    type Error = u8;
571    fn try_from(byte: u8) -> Result<Self, Self::Error> {
572        Ok(match byte {
573            0xF0 => Group1Prefix::Lock,
574            0xF2 => Group1Prefix::REPNorBND,
575            0xF3 => Group1Prefix::REP_,
576            byte => return Err(byte),
577        })
578    }
579}
580
581impl fmt::Display for Group1Prefix {
582    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
583        match self {
584            Group1Prefix::Lock => write!(f, "0xF0"),
585            Group1Prefix::REPNorBND => write!(f, "0xF2"),
586            Group1Prefix::REP_ => write!(f, "0xF3"),
587        }
588    }
589}
590
591/// Contains the segment override prefixes or a (deprecated) branch hint when
592/// used on a `Jcc` instruction. Note that using the segment override prefixes
593/// on a branch instruction is reserved. See section 2.1.1, "Instruction
594/// Prefixes," in the reference manual.
595pub enum Group2Prefix {
596    /// The CS segment override prefix (`0x2e`); also the "branch not taken"
597    /// hint.
598    CSorBNT,
599    /// The SS segment override prefix (`0x36`).
600    SS,
601    /// The DS segment override prefix (`0x3e`); also the "branch taken" hint.
602    DSorBT,
603    /// The ES segment override prefix (`0x26`).
604    ES,
605    /// The FS segment override prefix (`0x64`).
606    FS,
607    /// The GS segment override prefix (`0x65`).
608    GS,
609}
610
611impl TryFrom<u8> for Group2Prefix {
612    type Error = u8;
613    fn try_from(byte: u8) -> Result<Self, Self::Error> {
614        Ok(match byte {
615            0x2E => Group2Prefix::CSorBNT,
616            0x36 => Group2Prefix::SS,
617            0x3E => Group2Prefix::DSorBT,
618            0x26 => Group2Prefix::ES,
619            0x64 => Group2Prefix::FS,
620            0x65 => Group2Prefix::GS,
621            byte => return Err(byte),
622        })
623    }
624}
625
626impl fmt::Display for Group2Prefix {
627    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
628        match self {
629            Group2Prefix::CSorBNT => write!(f, "0x2E"),
630            Group2Prefix::SS => write!(f, "0x36"),
631            Group2Prefix::DSorBT => write!(f, "0x3E"),
632            Group2Prefix::ES => write!(f, "0x26"),
633            Group2Prefix::FS => write!(f, "0x64"),
634            Group2Prefix::GS => write!(f, "0x65"),
635        }
636    }
637}
638
639/// Contains the operand-size override prefix (`0x66`); also used as a SIMD
640/// prefix. From the reference manual:
641///
642/// > The operand-size override prefix allows a program to switch between 16-
643/// > and 32-bit operand sizes. Either size can be the default; use of the
644/// > prefix selects the non-default size. Some SSE2/SSE3/SSSE3/SSE4
645/// > instructions and instructions using a three-byte sequence of primary
646/// > opcode bytes may use 66H as a mandatory prefix to express distinct
647/// > functionality.
648pub enum Group3Prefix {
649    OperandSizeOverride,
650}
651
652impl TryFrom<u8> for Group3Prefix {
653    type Error = u8;
654    fn try_from(byte: u8) -> Result<Self, Self::Error> {
655        Ok(match byte {
656            0x66 => Group3Prefix::OperandSizeOverride,
657            byte => return Err(byte),
658        })
659    }
660}
661
662impl fmt::Display for Group3Prefix {
663    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
664        match self {
665            Group3Prefix::OperandSizeOverride => write!(f, "0x66"),
666        }
667    }
668}
669
670/// Contains the address-size override prefix (`0x67`). From the reference
671/// manual:
672///
673/// > The address-size override prefix (67H) allows programs to switch between
674/// > 16- and 32-bit addressing. Either size can be the default; the prefix
675/// > selects the non-default size.
676pub enum Group4Prefix {
677    AddressSizeOverride,
678}
679
680impl TryFrom<u8> for Group4Prefix {
681    type Error = u8;
682    fn try_from(byte: u8) -> Result<Self, Self::Error> {
683        Ok(match byte {
684            0x67 => Group4Prefix::AddressSizeOverride,
685            byte => return Err(byte),
686        })
687    }
688}
689
690impl fmt::Display for Group4Prefix {
691    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
692        match self {
693            Group4Prefix::AddressSizeOverride => write!(f, "0x67"),
694        }
695    }
696}
697
698/// Indicate the size of an immediate operand. From the reference manual:
699///
700/// > A 1-byte (ib), 2-byte (iw), 4-byte (id) or 8-byte (io) immediate operand
701/// > to the instruction that follows the opcode, ModR/M bytes or scale-indexing
702/// > bytes. The opcode determines if the operand is a signed value. All words,
703/// > doublewords, and quadwords are given with the low-order byte first.
704#[derive(Debug, PartialEq)]
705#[allow(non_camel_case_types, reason = "makes DSL definitions easier to read")]
706pub enum Imm {
707    None,
708    ib,
709    iw,
710    id,
711    io,
712}
713
714impl Imm {
715    fn bits(&self) -> u16 {
716        match self {
717            Self::None => 0,
718            Self::ib => 8,
719            Self::iw => 16,
720            Self::id => 32,
721            Self::io => 64,
722        }
723    }
724}
725
726impl fmt::Display for Imm {
727    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
728        match self {
729            Self::None => write!(f, ""),
730            Self::ib => write!(f, "ib"),
731            Self::iw => write!(f, "iw"),
732            Self::id => write!(f, "id"),
733            Self::io => write!(f, "io"),
734        }
735    }
736}
737
738/// Indicate the size of the `reg` used when modifying the lower three bits of
739/// the opcode byte; this corresponds to the `+rb`, `+rw`, `+rd`, and `+ro`
740/// modifiers in the reference manual.
741///
742/// ```
743/// # use cranelift_assembler_x64_meta::dsl::{rex};
744/// // The `bswap` instruction extends the opcode byte:
745/// let enc = rex([0x0F, 0xC8]).rd();
746/// assert_eq!(enc.to_string(), "0x0F + 0xC8 +rd");
747/// ```
748#[derive(Clone, Copy, Debug, PartialEq)]
749#[allow(non_camel_case_types, reason = "makes DSL definitions easier to read")]
750pub enum OpcodeMod {
751    rb,
752    rw,
753    rd,
754    ro,
755}
756
757impl OpcodeMod {
758    fn bits(&self) -> u8 {
759        match self {
760            Self::rb => 8,
761            Self::rw => 16,
762            Self::rd => 32,
763            Self::ro => 64,
764        }
765    }
766}
767
768impl fmt::Display for OpcodeMod {
769    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
770        match self {
771            Self::rb => write!(f, "+rb"),
772            Self::rw => write!(f, "+rw"),
773            Self::rd => write!(f, "+rd"),
774            Self::ro => write!(f, "+ro"),
775        }
776    }
777}
778
779/// Contains the legacy prefixes allowed for VEX-encoded instructions.
780///
781/// VEX encodes a subset of [`Group1Prefix`] and `0x66` (see [`Group3Prefix`])
782/// as part of the `pp` bit field.
783#[derive(Clone, Copy, PartialEq)]
784pub enum VexPrefix {
785    _66,
786    _F2,
787    _F3,
788}
789
790impl VexPrefix {
791    /// Encode the `pp` bits.
792    #[inline(always)]
793    pub(crate) fn bits(self) -> u8 {
794        match self {
795            Self::_66 => 0b01,
796            Self::_F3 => 0b10,
797            Self::_F2 => 0b11,
798        }
799    }
800}
801
802impl fmt::Display for VexPrefix {
803    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
804        match self {
805            Self::_66 => write!(f, "66"),
806            Self::_F3 => write!(f, "F3"),
807            Self::_F2 => write!(f, "F2"),
808        }
809    }
810}
811
812/// Contains the escape sequences allowed for VEX-encoded instructions.
813///
814/// VEX encodes these in the `mmmmmm` bit field.
815#[derive(Clone, Copy, PartialEq)]
816pub enum VexEscape {
817    _0F,
818    _0F3A,
819    _0F38,
820}
821
822impl VexEscape {
823    /// Encode the `m-mmmm` bits.
824    #[inline(always)]
825    pub(crate) fn bits(&self) -> u8 {
826        match self {
827            Self::_0F => 0b01,
828            Self::_0F38 => 0b10,
829            Self::_0F3A => 0b11,
830        }
831    }
832}
833
834impl fmt::Display for VexEscape {
835    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
836        match self {
837            Self::_0F => write!(f, "0F"),
838            Self::_0F3A => write!(f, "0F3A"),
839            Self::_0F38 => write!(f, "0F38"),
840        }
841    }
842}
843
844/// Contains allowed VEX length definitions.
845///
846/// VEX encodes these in the `L` bit field, a single bit with `128-bit = 0` and
847/// `256-bit = 1`. For convenience, we also include the `LIG` and `LZ`
848/// syntax, used by the reference manual, and always set these to `0`.
849pub enum VexLength {
850    /// Set `VEX.L` to `0` (128-bit).
851    L128,
852    /// Set `VEX.L` to `1` (256-bit).
853    L256,
854    /// Set `VEX.L` to `0`, but not necessarily for 128-bit operation. From the
855    /// reference manual: "The VEX.L must be encoded to be 0B, an #UD occurs if
856    /// VEX.L is not zero."
857    LZ,
858    /// The `VEX.L` bit is ignored (e.g., for floating point scalar
859    /// instructions). This assembler will emit `0`.
860    LIG,
861}
862
863impl VexLength {
864    /// Encode the `L` bit.
865    pub fn bits(&self) -> u8 {
866        match self {
867            Self::L128 | Self::LIG | Self::LZ => 0b0,
868            Self::L256 => 0b1,
869        }
870    }
871}
872
873impl fmt::Display for VexLength {
874    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
875        match self {
876            Self::L128 => write!(f, "128"),
877            Self::L256 => write!(f, "256"),
878            Self::LIG => write!(f, "LIG"),
879            Self::LZ => write!(f, "LZ"),
880        }
881    }
882}
883
884/// Model the `W` bit in VEX-encoded instructions.
885pub enum VexW {
886    /// The `W` bit is ignored; equivalent to `.WIG` in the manual.
887    WIG,
888    /// The `W` bit is set to `0`; equivalent to `.W0` in the manual.
889    W0,
890    /// The `W` bit is set to `1`; equivalent to `.W1` in the manual.
891    W1,
892}
893
894impl VexW {
895    /// Return `true` if the `W` bit is ignored; this is useful to check in the
896    /// DSL for the default case.
897    fn is_ignored(&self) -> bool {
898        match self {
899            Self::WIG => true,
900            Self::W0 | Self::W1 => false,
901        }
902    }
903
904    /// Return `true` if the `W` bit is set (`W1`); otherwise, return `false`.
905    pub(crate) fn as_bool(&self) -> bool {
906        match self {
907            Self::W1 => true,
908            Self::W0 | Self::WIG => false,
909        }
910    }
911}
912
913impl fmt::Display for VexW {
914    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
915        match self {
916            Self::WIG => write!(f, "WIG"),
917            Self::W0 => write!(f, "W0"),
918            Self::W1 => write!(f, "W1"),
919        }
920    }
921}
922
923/// The VEX encoding, introduced for AVX instructions.
924///
925/// ```
926/// # use cranelift_assembler_x64_meta::dsl::{vex, VexLength::L128};
927/// // To encode a BLENDPD instruction in the manual: VEX.128.66.0F3A.WIG 0D /r ib
928/// let enc = vex(L128)._66()._0f3a().wig().op(0x0D).r().ib();
929/// assert_eq!(enc.to_string(), "VEX.128.66.0F3A.WIG 0x0D /r ib");
930/// ```
931pub struct Vex {
932    /// The length of the operand (e.g., 128-bit or 256-bit).
933    pub length: VexLength,
934    /// Map the `PP` field encodings.
935    pub pp: Option<VexPrefix>,
936    /// Map the `MMMMM` field encodings.
937    pub mmmmm: Option<VexEscape>,
938    /// The `W` bit.
939    pub w: VexW,
940    /// VEX-encoded instructions have a single-byte opcode. Other prefix-related
941    /// bytes (see [`Opcodes`]) are encoded in the VEX prefixes (see `pp`,
942    /// `mmmmmm`). From the reference manual: "One (and only one) opcode byte
943    /// follows the 2 or 3 byte VEX."
944    pub opcode: u8,
945    /// See [`Rex.modrm`](Rex.modrm).
946    pub modrm: Option<ModRmKind>,
947    /// See [`Rex.imm`](Rex.imm).
948    pub imm: Imm,
949    /// See [`Vex::is4`]
950    pub is4: bool,
951}
952
953impl Vex {
954    /// Set the `pp` field to use [`VexPrefix::_66`]; equivalent to `.66` in the
955    /// manual.
956    pub fn _66(self) -> Self {
957        assert!(self.pp.is_none());
958        Self {
959            pp: Some(VexPrefix::_66),
960            ..self
961        }
962    }
963
964    /// Set the `pp` field to use [`VexPrefix::_F2`]; equivalent to `.F2` in the
965    /// manual.
966    pub fn _f2(self) -> Self {
967        assert!(self.pp.is_none());
968        Self {
969            pp: Some(VexPrefix::_F2),
970            ..self
971        }
972    }
973
974    /// Set the `pp` field to use [`VexPrefix::_F3`]; equivalent to `.F3` in the
975    /// manual.
976    pub fn _f3(self) -> Self {
977        assert!(self.pp.is_none());
978        Self {
979            pp: Some(VexPrefix::_F3),
980            ..self
981        }
982    }
983
984    /// Set the `mmmmmm` field to use [`VexEscape::_0F`]; equivalent to `.0F` in
985    /// the manual.
986    pub fn _0f(self) -> Self {
987        assert!(self.mmmmm.is_none());
988        Self {
989            mmmmm: Some(VexEscape::_0F),
990            ..self
991        }
992    }
993
994    /// Set the `mmmmmm` field to use [`VexEscape::_0F3A`]; equivalent to
995    /// `.0F3A` in the manual.
996    pub fn _0f3a(self) -> Self {
997        assert!(self.mmmmm.is_none());
998        Self {
999            mmmmm: Some(VexEscape::_0F3A),
1000            ..self
1001        }
1002    }
1003
1004    /// Set the `mmmmmm` field to use [`VexEscape::_0F38`]; equivalent to
1005    /// `.0F38` in the manual.
1006    pub fn _0f38(self) -> Self {
1007        assert!(self.mmmmm.is_none());
1008        Self {
1009            mmmmm: Some(VexEscape::_0F38),
1010            ..self
1011        }
1012    }
1013
1014    /// Set the `W` bit to `0`; equivalent to `.W0` in the manual.
1015    pub fn w0(self) -> Self {
1016        assert!(self.w.is_ignored());
1017        Self {
1018            w: VexW::W0,
1019            ..self
1020        }
1021    }
1022
1023    /// Set the `W` bit to `1`; equivalent to `.W1` in the manual.
1024    pub fn w1(self) -> Self {
1025        assert!(self.w.is_ignored());
1026        Self {
1027            w: VexW::W1,
1028            ..self
1029        }
1030    }
1031
1032    /// Ignore the `W` bit; equivalent to `.WIG` in the manual.
1033    pub fn wig(self) -> Self {
1034        assert!(self.w.is_ignored());
1035        Self {
1036            w: VexW::WIG,
1037            ..self
1038        }
1039    }
1040
1041    /// Set the single opcode for this VEX-encoded instruction.
1042    pub fn op(self, opcode: u8) -> Self {
1043        assert_eq!(self.opcode, u8::MAX);
1044        Self { opcode, ..self }
1045    }
1046
1047    /// Set the ModR/M byte to contain a register operand; see [`Rex::r`].
1048    pub fn r(self) -> Self {
1049        assert!(self.modrm.is_none());
1050        Self {
1051            modrm: Some(ModRmKind::Reg),
1052            ..self
1053        }
1054    }
1055
1056    /// Append a byte-sized immediate operand (8-bit); equivalent to `ib` in the
1057    /// reference manual.
1058    ///
1059    /// # Panics
1060    ///
1061    /// Panics if an immediate operand is already set.
1062    #[must_use]
1063    pub fn ib(self) -> Self {
1064        assert_eq!(self.imm, Imm::None);
1065        Self {
1066            imm: Imm::ib,
1067            ..self
1068        }
1069    }
1070
1071    /// Append a word-sized immediate operand (16-bit); equivalent to `iw` in
1072    /// the reference manual.
1073    ///
1074    /// # Panics
1075    ///
1076    /// Panics if an immediate operand is already set.
1077    #[must_use]
1078    pub fn iw(self) -> Self {
1079        assert_eq!(self.imm, Imm::None);
1080        Self {
1081            imm: Imm::iw,
1082            ..self
1083        }
1084    }
1085
1086    /// Append a doubleword-sized immediate operand (32-bit); equivalent to `id`
1087    /// in the reference manual.
1088    ///
1089    /// # Panics
1090    ///
1091    /// Panics if an immediate operand is already set.
1092    #[must_use]
1093    pub fn id(self) -> Self {
1094        assert_eq!(self.imm, Imm::None);
1095        Self {
1096            imm: Imm::id,
1097            ..self
1098        }
1099    }
1100
1101    /// Append a quadword-sized immediate operand (64-bit); equivalent to `io`
1102    /// in the reference manual.
1103    ///
1104    /// # Panics
1105    ///
1106    /// Panics if an immediate operand is already set.
1107    #[must_use]
1108    pub fn io(self) -> Self {
1109        assert_eq!(self.imm, Imm::None);
1110        Self {
1111            imm: Imm::io,
1112            ..self
1113        }
1114    }
1115
1116    /// Set the digit extending the opcode; equivalent to `/<digit>` in the
1117    /// reference manual.
1118    ///
1119    /// # Panics
1120    ///
1121    /// Panics if `extension` is too large.
1122    #[must_use]
1123    pub fn digit(self, extension: u8) -> Self {
1124        assert!(extension <= 0b111, "must fit in 3 bits");
1125        Self {
1126            modrm: Some(ModRmKind::Digit(extension)),
1127            ..self
1128        }
1129    }
1130
1131    /// An 8-bit immediate byte is present containing a source register
1132    /// specifier in either imm8[7:4] (for 64-bit
1133    /// mode) or imm8[6:4] (for 32-bit mode), and instruction-specific payload
1134    /// in imm8[3:0].
1135    pub fn is4(self) -> Self {
1136        Self { is4: true, ..self }
1137    }
1138
1139    fn validate(&self, _operands: &[Operand]) {
1140        assert!(self.opcode != u8::MAX);
1141        assert!(self.mmmmm.is_some());
1142    }
1143
1144    /// Retrieve the digit extending the opcode, if available.
1145    #[must_use]
1146    pub fn unwrap_digit(&self) -> Option<u8> {
1147        match self.modrm {
1148            Some(ModRmKind::Digit(digit)) => Some(digit),
1149            _ => None,
1150        }
1151    }
1152}
1153
1154impl From<Vex> for Encoding {
1155    fn from(vex: Vex) -> Encoding {
1156        Encoding::Vex(vex)
1157    }
1158}
1159
1160impl fmt::Display for Vex {
1161    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1162        write!(f, "VEX.{}", self.length)?;
1163        if let Some(pp) = self.pp {
1164            write!(f, ".{pp}")?;
1165        }
1166        if let Some(mmmmm) = self.mmmmm {
1167            write!(f, ".{mmmmm}")?;
1168        }
1169        write!(f, ".{} {:#04X}", self.w, self.opcode)?;
1170        if let Some(modrm) = self.modrm {
1171            write!(f, " {modrm}")?;
1172        }
1173        if self.imm != Imm::None {
1174            write!(f, " {}", self.imm)?;
1175        }
1176        Ok(())
1177    }
1178}