cranelift_assembler_x64_meta/dsl/
encoding.rs

1//! A DSL for describing x64 encodings.
2//!
3//! Intended use:
4//! - construct an encoding using an abbreviated helper, e.g., [`rex`]
5//! - then, configure the encoding using builder methods, e.g., [`Rex::w`]
6//!
7//! ```
8//! # use cranelift_assembler_x64_meta::dsl::rex;
9//! let enc = rex(0x25).w().id();
10//! assert_eq!(enc.to_string(), "REX.W + 0x25 id")
11//! ```
12//!
13//! This module references the Intel® 64 and IA-32 Architectures Software
14//! Development Manual, Volume 2: [link].
15//!
16//! [link]: https://software.intel.com/content/www/us/en/develop/articles/intel-sdm.html
17
18use super::{Operand, OperandKind};
19use core::fmt;
20
21/// An abbreviated constructor for REX-encoded instructions.
22#[must_use]
23pub fn rex(opcode: impl Into<Opcodes>) -> Rex {
24    Rex {
25        opcodes: opcode.into(),
26        w: false,
27        r: false,
28        digit: None,
29        imm: Imm::None,
30    }
31}
32
33/// An abbreviated constructor for VEX-encoded instructions.
34#[must_use]
35pub fn vex() -> Vex {
36    Vex {}
37}
38
39/// Enumerate the ways x64 encodes instructions.
40pub enum Encoding {
41    Rex(Rex),
42    Vex(Vex),
43}
44
45impl Encoding {
46    /// Check that the encoding is valid for the given operands; this can find
47    /// issues earlier, before generating any Rust code.
48    pub fn validate(&self, operands: &[Operand]) {
49        match self {
50            Encoding::Rex(rex) => rex.validate(operands),
51            Encoding::Vex(vex) => vex.validate(),
52        }
53    }
54}
55
56impl fmt::Display for Encoding {
57    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
58        match self {
59            Encoding::Rex(rex) => write!(f, "{rex}"),
60            Encoding::Vex(_vex) => todo!(),
61        }
62    }
63}
64
65/// The traditional x64 encoding.
66///
67/// We use the "REX" name here in a slightly unorthodox way: "REX" is the name
68/// for the optional _byte_ extending the number of available registers, e.g.,
69/// but we use it here to distinguish this from other encoding formats (e.g.,
70/// VEX, EVEX). The "REX" _byte_ is still optional in this encoding and only
71/// emitted when necessary.
72pub struct Rex {
73    /// The opcodes for this instruction.
74    ///
75    /// Multi-byte opcodes are handled by passing an array of opcodes (including
76    /// prefixes like `0x66` and escape bytes like `0x0f`) to the constructor.
77    /// E.g., `66 0F 54` (`ANDPD`) is expressed as follows:
78    ///
79    /// ```
80    /// # use cranelift_assembler_x64_meta::dsl::rex;
81    /// let enc = rex([0x66, 0x0f, 0x54]);
82    /// ```
83    pub opcodes: Opcodes,
84    /// Indicates setting the REX.W bit.
85    ///
86    /// From the reference manual: "Indicates the use of a REX prefix that
87    /// affects operand size or instruction semantics. The ordering of the REX
88    /// prefix and other optional/mandatory instruction prefixes are discussed
89    /// in chapter 2. Note that REX prefixes that promote legacy instructions to
90    /// 64-bit behavior are not listed explicitly in the opcode column."
91    pub w: bool,
92    /// From the reference manual: "indicates that the ModR/M byte of the
93    /// instruction contains a register operand and an r/m operand."
94    pub r: bool,
95    /// From the reference manual: "a digit between 0 and 7 indicates that the
96    /// ModR/M byte of the instruction uses only the r/m (register or memory)
97    /// operand. The reg field contains the digit that provides an extension to
98    /// the instruction's opcode."
99    pub digit: Option<u8>,
100    /// The number of bits used as an immediate operand to the instruction.
101    pub imm: Imm,
102}
103
104impl Rex {
105    /// Set the `REX.W` bit.
106    #[must_use]
107    pub fn w(self) -> Self {
108        Self { w: true, ..self }
109    }
110
111    /// Set the ModR/M byte to contain a register operand and an r/m operand;
112    /// equivalent to `/r` in the reference manual.
113    #[must_use]
114    pub fn r(self) -> Self {
115        Self { r: true, ..self }
116    }
117
118    /// Set the digit extending the opcode; equivalent to `/<digit>` in the
119    /// reference manual.
120    ///
121    /// # Panics
122    ///
123    /// Panics if `digit` is too large.
124    #[must_use]
125    pub fn digit(self, digit: u8) -> Self {
126        assert!(digit <= 0b111, "must fit in 3 bits");
127        Self {
128            digit: Some(digit),
129            ..self
130        }
131    }
132
133    /// Append a byte-sized immediate operand (8-bit); equivalent to `ib` in the
134    /// reference manual.
135    ///
136    /// # Panics
137    ///
138    /// Panics if an immediate operand is already set.
139    #[must_use]
140    pub fn ib(self) -> Self {
141        assert_eq!(self.imm, Imm::None);
142        Self {
143            imm: Imm::ib,
144            ..self
145        }
146    }
147
148    /// Append a word-sized immediate operand (16-bit); equivalent to `iw` in
149    /// the reference manual.
150    ///
151    /// # Panics
152    ///
153    /// Panics if an immediate operand is already set.
154    #[must_use]
155    pub fn iw(self) -> Self {
156        assert_eq!(self.imm, Imm::None);
157        Self {
158            imm: Imm::iw,
159            ..self
160        }
161    }
162
163    /// Append a doubleword-sized immediate operand (32-bit); equivalent to `id`
164    /// in the reference manual.
165    ///
166    /// # Panics
167    ///
168    /// Panics if an immediate operand is already set.
169    #[must_use]
170    pub fn id(self) -> Self {
171        assert_eq!(self.imm, Imm::None);
172        Self {
173            imm: Imm::id,
174            ..self
175        }
176    }
177
178    /// Append a quadword-sized immediate operand (64-bit); equivalent to `io`
179    /// in the reference manual.
180    ///
181    /// # Panics
182    ///
183    /// Panics if an immediate operand is already set.
184    #[must_use]
185    pub fn io(self) -> Self {
186        assert_eq!(self.imm, Imm::None);
187        Self {
188            imm: Imm::io,
189            ..self
190        }
191    }
192
193    /// Check a subset of the rules for valid encodings outlined in chapter 2,
194    /// _Instruction Format_, of the Intel® 64 and IA-32 Architectures Software
195    /// Developer’s Manual, Volume 2A.
196    fn validate(&self, operands: &[Operand]) {
197        assert!(!(self.r && self.digit.is_some()));
198        assert!(!(self.r && self.imm != Imm::None));
199        assert!(
200            !(self.w && (self.opcodes.prefixes.has_operand_size_override())),
201            "though valid, if REX.W is set then the 66 prefix is ignored--avoid encoding this"
202        );
203
204        if self.opcodes.prefixes.has_operand_size_override() {
205            assert!(
206                operands.iter().all(|&op| matches!(
207                    op.location.kind(),
208                    OperandKind::Imm(_) | OperandKind::FixedReg(_)
209                ) || op.location.bits() == 16
210                    || op.location.bits() == 128),
211                "when we encode the 66 prefix, we expect all operands to be 16-bit wide"
212            );
213        }
214
215        if let Some(OperandKind::Imm(op)) = operands
216            .iter()
217            .map(|o| o.location.kind())
218            .find(|k| matches!(k, OperandKind::Imm(_)))
219        {
220            assert_eq!(
221                op.bits(),
222                self.imm.bits(),
223                "for an immediate, the encoding width must match the declared operand width"
224            );
225        }
226    }
227}
228
229impl From<Rex> for Encoding {
230    fn from(rex: Rex) -> Encoding {
231        Encoding::Rex(rex)
232    }
233}
234
235impl fmt::Display for Rex {
236    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
237        if let Some(group1) = &self.opcodes.prefixes.group1 {
238            write!(f, "{group1} + ")?;
239        }
240        if let Some(group2) = &self.opcodes.prefixes.group2 {
241            write!(f, "{group2} + ")?;
242        }
243        if let Some(group3) = &self.opcodes.prefixes.group3 {
244            write!(f, "{group3} + ")?;
245        }
246        if let Some(group4) = &self.opcodes.prefixes.group4 {
247            write!(f, "{group4} + ")?;
248        }
249        if self.w {
250            write!(f, "REX.W + ")?;
251        }
252        if self.opcodes.escape {
253            write!(f, "0x0F + ")?;
254        }
255        write!(f, "{:#04x}", self.opcodes.primary)?;
256        if let Some(secondary) = self.opcodes.secondary {
257            write!(f, " {secondary:#04x}")?;
258        }
259        if self.r {
260            write!(f, " /r")?;
261        }
262        if let Some(digit) = self.digit {
263            write!(f, " /{digit}")?;
264        }
265        if self.imm != Imm::None {
266            write!(f, " {}", self.imm)?;
267        }
268        Ok(())
269    }
270}
271
272/// Describe an instruction's opcodes. From section 2.1.2 "Opcodes" in the
273/// reference manual:
274///
275/// > A primary opcode can be 1, 2, or 3 bytes in length. An additional 3-bit
276/// > opcode field is sometimes encoded in the ModR/M byte. Smaller fields can
277/// > be defined within the primary opcode. Such fields define the direction of
278/// > operation, size of displacements, register encoding, condition codes, or
279/// > sign extension. Encoding fields used by an opcode vary depending on the
280/// > class of operation.
281/// >
282/// > Two-byte opcode formats for general-purpose and SIMD instructions consist
283/// > of one of the following:
284/// > - An escape opcode byte `0FH` as the primary opcode and a second opcode
285/// >   byte.
286/// > - A mandatory prefix (`66H`, `F2H`, or `F3H`), an escape opcode byte, and
287/// >   a second opcode byte (same as previous bullet).
288/// >
289/// > For example, `CVTDQ2PD` consists of the following sequence: `F3 0F E6`.
290/// > The first byte is a mandatory prefix (it is not considered as a repeat
291/// > prefix).
292/// >
293/// > Three-byte opcode formats for general-purpose and SIMD instructions
294/// > consist of one of the following:
295/// > - An escape opcode byte `0FH` as the primary opcode, plus two additional
296/// >   opcode bytes.
297/// > - A mandatory prefix (`66H`, `F2H`, or `F3H`), an escape opcode byte, plus
298/// >   two additional opcode bytes (same as previous bullet).
299/// >
300/// > For example, `PHADDW` for XMM registers consists of the following
301/// > sequence: `66 0F 38 01`. The first byte is the mandatory prefix.
302pub struct Opcodes {
303    /// The prefix bytes for this instruction.
304    pub prefixes: Prefixes,
305    /// Indicates the use of an escape opcode byte, `0x0f`.
306    pub escape: bool,
307    /// The primary opcode.
308    pub primary: u8,
309    /// Some instructions (e.g., SIMD) may have a secondary opcode.
310    pub secondary: Option<u8>,
311}
312
313impl From<u8> for Opcodes {
314    fn from(primary: u8) -> Opcodes {
315        Opcodes {
316            prefixes: Prefixes::default(),
317            escape: false,
318            primary,
319            secondary: None,
320        }
321    }
322}
323
324impl<const N: usize> From<[u8; N]> for Opcodes {
325    fn from(bytes: [u8; N]) -> Self {
326        let (prefixes, remaining) = Prefixes::parse(&bytes);
327        let (escape, primary, secondary) = match remaining {
328            [primary] => (false, *primary, None),
329            [0x0f, primary] => (true, *primary, None),
330            [0x0f, primary, secondary] => (true, *primary, Some(*secondary)),
331            _ => panic!(
332                "invalid opcodes after prefix; expected [opcode], [0x0f, opcode], or [0x0f, opcode, opcode], found {remaining:?}"
333            ),
334        };
335        Self {
336            prefixes,
337            escape,
338            primary,
339            secondary,
340        }
341    }
342}
343
344/// The allowed prefixes for an instruction. From the reference manual (section
345/// 2.1.1):
346///
347/// > Instruction prefixes are divided into four groups, each with a set of
348/// > allowable prefix codes. For each instruction, it is only useful to include
349/// > up to one prefix code from each of the four groups (Groups 1, 2, 3, 4).
350/// > Groups 1 through 4 may be placed in any order relative to each other.
351#[derive(Default)]
352pub struct Prefixes {
353    pub group1: Option<Group1Prefix>,
354    pub group2: Option<Group2Prefix>,
355    pub group3: Option<Group3Prefix>,
356    pub group4: Option<Group4Prefix>,
357}
358
359impl Prefixes {
360    /// Parse a slice of `bytes` into a set of prefixes, returning both the
361    /// configured [`Prefixes`] as well as any remaining bytes.
362    fn parse(mut bytes: &[u8]) -> (Self, &[u8]) {
363        let mut prefixes = Self::default();
364        while !bytes.is_empty() && prefixes.try_assign(bytes[0]).is_ok() {
365            bytes = &bytes[1..];
366        }
367        (prefixes, bytes)
368    }
369
370    /// Attempt to parse a `byte` as a prefix and, if successful, assigns it to
371    /// the correct prefix group.
372    ///
373    /// # Panics
374    ///
375    /// This function panics if the prefix for a group is already set; this
376    /// disallows specifying multiple prefixes per group.
377    fn try_assign(&mut self, byte: u8) -> Result<(), ()> {
378        if let Ok(p) = Group1Prefix::try_from(byte) {
379            assert!(self.group1.is_none());
380            self.group1 = Some(p);
381            Ok(())
382        } else if let Ok(p) = Group2Prefix::try_from(byte) {
383            assert!(self.group2.is_none());
384            self.group2 = Some(p);
385            Ok(())
386        } else if let Ok(p) = Group3Prefix::try_from(byte) {
387            assert!(self.group3.is_none());
388            self.group3 = Some(p);
389            Ok(())
390        } else if let Ok(p) = Group4Prefix::try_from(byte) {
391            assert!(self.group4.is_none());
392            self.group4 = Some(p);
393            Ok(())
394        } else {
395            Err(())
396        }
397    }
398
399    /// Check if the `0x66` prefix is present.
400    fn has_operand_size_override(&self) -> bool {
401        matches!(self.group3, Some(Group3Prefix::OperandSizeOverride))
402    }
403
404    /// Check if any prefix is present.
405    pub fn is_empty(&self) -> bool {
406        self.group1.is_none()
407            && self.group2.is_none()
408            && self.group3.is_none()
409            && self.group4.is_none()
410    }
411}
412
413pub enum Group1Prefix {
414    /// The LOCK prefix (`0xf0`). From the reference manual:
415    ///
416    /// > The LOCK prefix (F0H) forces an operation that ensures exclusive use
417    /// > of shared memory in a multiprocessor environment. See "LOCK—Assert
418    /// > LOCK# Signal Prefix" in Chapter 3, Instruction Set Reference, A-L, for
419    /// > a description of this prefix.
420    Lock,
421    /// A REPNE/REPNZ prefix (`0xf2`) or a BND prefix under certain conditions.
422    /// `REP*` prefixes apply only to string and input/output instructions but
423    /// can be used as mandatory prefixes in other kinds of instructions (e.g.,
424    /// SIMD) From the reference manual:
425    ///
426    /// > Repeat prefixes (F2H, F3H) cause an instruction to be repeated for
427    /// > each element of a string. Use these prefixes only with string and I/O
428    /// > instructions (MOVS, CMPS, SCAS, LODS, STOS, INS, and OUTS). Use of
429    /// > repeat prefixes and/or undefined opcodes with other Intel 64 or IA-32
430    /// > instructions is reserved; such use may cause unpredictable behavior.
431    /// >
432    /// > Some instructions may use F2H, F3H as a mandatory prefix to express
433    /// > distinct functionality.
434    REPNorBND,
435    /// A REPE/REPZ prefix (`0xf3`); `REP*` prefixes apply only to string and
436    /// input/output instructions but can be used as mandatory prefixes in other
437    /// kinds of instructions (e.g., SIMD). See `REPNorBND` for more details.
438    REP_,
439}
440
441impl TryFrom<u8> for Group1Prefix {
442    type Error = u8;
443    fn try_from(byte: u8) -> Result<Self, Self::Error> {
444        Ok(match byte {
445            0xF0 => Group1Prefix::Lock,
446            0xF2 => Group1Prefix::REPNorBND,
447            0xF3 => Group1Prefix::REP_,
448            byte => return Err(byte),
449        })
450    }
451}
452
453impl fmt::Display for Group1Prefix {
454    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
455        match self {
456            Group1Prefix::Lock => write!(f, "0xF0"),
457            Group1Prefix::REPNorBND => write!(f, "0xF2"),
458            Group1Prefix::REP_ => write!(f, "0xF3"),
459        }
460    }
461}
462
463/// Contains the segment override prefixes or a (deprecated) branch hint when
464/// used on a `Jcc` instruction. Note that using the segment override prefixes
465/// on a branch instruction is reserved. See section 2.1.1, "Instruction
466/// Prefixes," in the reference manual.
467pub enum Group2Prefix {
468    /// The CS segment override prefix (`0x2e`); also the "branch not taken"
469    /// hint.
470    CSorBNT,
471    /// The SS segment override prefix (`0x36`).
472    SS,
473    /// The DS segment override prefix (`0x3e`); also the "branch taken" hint.
474    DSorBT,
475    /// The ES segment override prefix (`0x26`).
476    ES,
477    /// The FS segment override prefix (`0x64`).
478    FS,
479    /// The GS segment override prefix (`0x65`).
480    GS,
481}
482
483impl TryFrom<u8> for Group2Prefix {
484    type Error = u8;
485    fn try_from(byte: u8) -> Result<Self, Self::Error> {
486        Ok(match byte {
487            0x2E => Group2Prefix::CSorBNT,
488            0x36 => Group2Prefix::SS,
489            0x3E => Group2Prefix::DSorBT,
490            0x26 => Group2Prefix::ES,
491            0x64 => Group2Prefix::FS,
492            0x65 => Group2Prefix::GS,
493            byte => return Err(byte),
494        })
495    }
496}
497
498impl fmt::Display for Group2Prefix {
499    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
500        match self {
501            Group2Prefix::CSorBNT => write!(f, "0x2E"),
502            Group2Prefix::SS => write!(f, "0x36"),
503            Group2Prefix::DSorBT => write!(f, "0x3E"),
504            Group2Prefix::ES => write!(f, "0x26"),
505            Group2Prefix::FS => write!(f, "0x64"),
506            Group2Prefix::GS => write!(f, "0x65"),
507        }
508    }
509}
510
511/// Contains the operand-size override prefix (`0x66`); also used as a SIMD
512/// prefix. From the reference manual:
513///
514/// > The operand-size override prefix allows a program to switch between 16-
515/// > and 32-bit operand sizes. Either size can be the default; use of the
516/// > prefix selects the non-default size. Some SSE2/SSE3/SSSE3/SSE4
517/// > instructions and instructions using a three-byte sequence of primary
518/// > opcode bytes may use 66H as a mandatory prefix to express distinct
519/// > functionality.
520pub enum Group3Prefix {
521    OperandSizeOverride,
522}
523
524impl TryFrom<u8> for Group3Prefix {
525    type Error = u8;
526    fn try_from(byte: u8) -> Result<Self, Self::Error> {
527        Ok(match byte {
528            0x66 => Group3Prefix::OperandSizeOverride,
529            byte => return Err(byte),
530        })
531    }
532}
533
534impl fmt::Display for Group3Prefix {
535    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
536        match self {
537            Group3Prefix::OperandSizeOverride => write!(f, "0x66"),
538        }
539    }
540}
541
542/// Contains the address-size override prefix (`0x67`). From the reference
543/// manual:
544///
545/// > The address-size override prefix (67H) allows programs to switch between
546/// > 16- and 32-bit addressing. Either size can be the default; the prefix
547/// > selects the non-default size.
548pub enum Group4Prefix {
549    AddressSizeOverride,
550}
551
552impl TryFrom<u8> for Group4Prefix {
553    type Error = u8;
554    fn try_from(byte: u8) -> Result<Self, Self::Error> {
555        Ok(match byte {
556            0x67 => Group4Prefix::AddressSizeOverride,
557            byte => return Err(byte),
558        })
559    }
560}
561
562impl fmt::Display for Group4Prefix {
563    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
564        match self {
565            Group4Prefix::AddressSizeOverride => write!(f, "0x67"),
566        }
567    }
568}
569
570/// Indicate the size of an immediate operand. From the reference manual:
571///
572/// > A 1-byte (ib), 2-byte (iw), 4-byte (id) or 8-byte (io) immediate operand
573/// > to the instruction that follows the opcode, ModR/M bytes or scale-indexing
574/// > bytes. The opcode determines if the operand is a signed value. All words,
575/// > doublewords, and quadwords are given with the low-order byte first.
576#[derive(Debug, PartialEq)]
577#[allow(non_camel_case_types, reason = "makes DSL definitions easier to read")]
578pub enum Imm {
579    None,
580    ib,
581    iw,
582    id,
583    io,
584}
585
586impl Imm {
587    fn bits(&self) -> u8 {
588        match self {
589            Imm::None => 0,
590            Imm::ib => 8,
591            Imm::iw => 16,
592            Imm::id => 32,
593            Imm::io => 64,
594        }
595    }
596}
597
598impl fmt::Display for Imm {
599    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
600        match self {
601            Imm::None => write!(f, ""),
602            Imm::ib => write!(f, "ib"),
603            Imm::iw => write!(f, "iw"),
604            Imm::id => write!(f, "id"),
605            Imm::io => write!(f, "io"),
606        }
607    }
608}
609
610pub struct Vex {}
611
612impl Vex {
613    fn validate(&self) {
614        todo!()
615    }
616}