cranelift_assembler_x64_meta/dsl/
encoding.rs

1//! A DSL for describing x64 encodings.
2//!
3//! Intended use:
4//! - construct an encoding using an abbreviated helper, e.g., [`rex`]
5//! - then, configure the encoding using builder methods, e.g., [`Rex::w`]
6//!
7//! ```
8//! # use cranelift_assembler_x64_meta::dsl::rex;
9//! let enc = rex(0x25).w().id();
10//! assert_eq!(enc.to_string(), "REX.W + 0x25 id")
11//! ```
12//!
13//! This module references the Intel® 64 and IA-32 Architectures Software
14//! Development Manual, Volume 2: [link].
15//!
16//! [link]: https://software.intel.com/content/www/us/en/develop/articles/intel-sdm.html
17
18use super::{Operand, OperandKind};
19use core::fmt;
20
21/// An abbreviated constructor for REX-encoded instructions.
22#[must_use]
23pub fn rex(opcode: impl Into<Opcodes>) -> Rex {
24    Rex {
25        opcodes: opcode.into(),
26        w: false,
27        r: false,
28        digit: None,
29        imm: Imm::None,
30    }
31}
32
33/// An abbreviated constructor for VEX-encoded instructions.
34#[must_use]
35pub fn vex() -> Vex {
36    Vex {}
37}
38
39/// Enumerate the ways x64 encodes instructions.
40pub enum Encoding {
41    Rex(Rex),
42    Vex(Vex),
43}
44
45impl Encoding {
46    /// Check that the encoding is valid for the given operands; this can find
47    /// issues earlier, before generating any Rust code.
48    pub fn validate(&self, operands: &[Operand]) {
49        match self {
50            Encoding::Rex(rex) => rex.validate(operands),
51            Encoding::Vex(vex) => vex.validate(),
52        }
53    }
54}
55
56impl fmt::Display for Encoding {
57    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
58        match self {
59            Encoding::Rex(rex) => write!(f, "{rex}"),
60            Encoding::Vex(_vex) => todo!(),
61        }
62    }
63}
64
65/// The traditional x64 encoding.
66///
67/// We use the "REX" name here in a slightly unorthodox way: "REX" is the name
68/// for the optional _byte_ extending the number of available registers, e.g.,
69/// but we use it here to distinguish this from other encoding formats (e.g.,
70/// VEX, EVEX). The "REX" _byte_ is still optional in this encoding and only
71/// emitted when necessary.
72pub struct Rex {
73    /// The opcodes for this instruction.
74    ///
75    /// Multi-byte opcodes are handled by passing an array of opcodes (including
76    /// prefixes like `0x66` and escape bytes like `0x0f`) to the constructor.
77    /// E.g., `66 0F 54` (`ANDPD`) is expressed as follows:
78    ///
79    /// ```
80    /// # use cranelift_assembler_x64_meta::dsl::rex;
81    /// let enc = rex([0x66, 0x0f, 0x54]);
82    /// ```
83    pub opcodes: Opcodes,
84    /// Indicates setting the REX.W bit.
85    ///
86    /// From the reference manual: "Indicates the use of a REX prefix that
87    /// affects operand size or instruction semantics. The ordering of the REX
88    /// prefix and other optional/mandatory instruction prefixes are discussed
89    /// in chapter 2. Note that REX prefixes that promote legacy instructions to
90    /// 64-bit behavior are not listed explicitly in the opcode column."
91    pub w: bool,
92    /// From the reference manual: "indicates that the ModR/M byte of the
93    /// instruction contains a register operand and an r/m operand."
94    pub r: bool,
95    /// From the reference manual: "a digit between 0 and 7 indicates that the
96    /// ModR/M byte of the instruction uses only the r/m (register or memory)
97    /// operand. The reg field contains the digit that provides an extension to
98    /// the instruction's opcode."
99    pub digit: Option<u8>,
100    /// The number of bits used as an immediate operand to the instruction.
101    ///
102    /// From the reference manual: "a 1-byte (ib), 2-byte (iw), 4-byte (id) or
103    /// 8-byte (io) immediate operand to the instruction that follows the
104    /// opcode, ModR/M bytes or scale-indexing bytes. The opcode determines if
105    /// the operand is a signed value. All words, doublewords, and quadwords are
106    /// given with the low-order byte first."
107    pub imm: Imm,
108}
109
110impl Rex {
111    /// Set the `REX.W` bit.
112    #[must_use]
113    pub fn w(self) -> Self {
114        Self { w: true, ..self }
115    }
116
117    /// Set the ModR/M byte to contain a register operand and an r/m operand;
118    /// equivalent to `/r` in the reference manual.
119    #[must_use]
120    pub fn r(self) -> Self {
121        Self { r: true, ..self }
122    }
123
124    /// Set the digit extending the opcode; equivalent to `/<digit>` in the
125    /// reference manual.
126    ///
127    /// # Panics
128    ///
129    /// Panics if `digit` is too large.
130    #[must_use]
131    pub fn digit(self, digit: u8) -> Self {
132        assert!(digit <= 0b111, "must fit in 3 bits");
133        Self { digit: Some(digit), ..self }
134    }
135
136    /// Append a byte-sized immediate operand (8-bit); equivalent to `ib` in the
137    /// reference manual.
138    ///
139    /// # Panics
140    ///
141    /// Panics if an immediate operand is already set.
142    #[must_use]
143    pub fn ib(self) -> Self {
144        assert_eq!(self.imm, Imm::None);
145        Self { imm: Imm::ib, ..self }
146    }
147
148    /// Append a word-sized immediate operand (16-bit); equivalent to `iw` in
149    /// the reference manual.
150    ///
151    /// # Panics
152    ///
153    /// Panics if an immediate operand is already set.
154    #[must_use]
155    pub fn iw(self) -> Self {
156        assert_eq!(self.imm, Imm::None);
157        Self { imm: Imm::iw, ..self }
158    }
159
160    /// Append a doubleword-sized immediate operand (32-bit); equivalent to `id`
161    /// in the reference manual.
162    ///
163    /// # Panics
164    ///
165    /// Panics if an immediate operand is already set.
166    #[must_use]
167    pub fn id(self) -> Self {
168        assert_eq!(self.imm, Imm::None);
169        Self { imm: Imm::id, ..self }
170    }
171
172    /// Append a quadword-sized immediate operand (64-bit); equivalent to `io`
173    /// in the reference manual.
174    ///
175    /// # Panics
176    ///
177    /// Panics if an immediate operand is already set.
178    #[must_use]
179    pub fn io(self) -> Self {
180        assert_eq!(self.imm, Imm::None);
181        Self { imm: Imm::io, ..self }
182    }
183
184    /// Check a subset of the rules for valid encodings outlined in chapter 2,
185    /// _Instruction Format_, of the Intel® 64 and IA-32 Architectures Software
186    /// Developer’s Manual, Volume 2A.
187    fn validate(&self, operands: &[Operand]) {
188        assert!(!(self.r && self.digit.is_some()));
189        assert!(!(self.r && self.imm != Imm::None));
190        assert!(
191            !(self.w && (self.opcodes.prefix.contains_66())),
192            "though valid, if REX.W is set then the 66 prefix is ignored--avoid encoding this"
193        );
194
195        if self.opcodes.prefix.contains_66() {
196            assert!(
197                operands.iter().all(|&op| matches!(
198                    op.location.kind(),
199                    OperandKind::Imm(_) | OperandKind::FixedReg(_)
200                ) || op.location.bits() == 16
201                    || op.location.bits() == 128),
202                "when we encode the 66 prefix, we expect all operands to be 16-bit wide"
203            );
204        }
205
206        if let Some(OperandKind::Imm(op)) = operands
207            .iter()
208            .map(|o| o.location.kind())
209            .find(|k| matches!(k, OperandKind::Imm(_)))
210        {
211            assert_eq!(
212                op.bits(),
213                self.imm.bits(),
214                "for an immediate, the encoding width must match the declared operand width"
215            );
216        }
217    }
218}
219
220impl From<Rex> for Encoding {
221    fn from(rex: Rex) -> Encoding {
222        Encoding::Rex(rex)
223    }
224}
225
226impl fmt::Display for Rex {
227    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
228        match self.opcodes.prefix {
229            LegacyPrefix::NoPrefix => {}
230            LegacyPrefix::_66 => write!(f, "0x66 + ")?,
231            LegacyPrefix::_F0 => write!(f, "0xF0 + ")?,
232            LegacyPrefix::_66F0 => write!(f, "0x66 0xF0 + ")?,
233            LegacyPrefix::_F2 => write!(f, "0xF2 + ")?,
234            LegacyPrefix::_F3 => write!(f, "0xF3 + ")?,
235            LegacyPrefix::_66F3 => write!(f, "0x66 0xF3 + ")?,
236        }
237        if self.w {
238            write!(f, "REX.W + ")?;
239        }
240        if self.opcodes.escape {
241            write!(f, "0x0F + ")?;
242        }
243        write!(f, "{:#04x}", self.opcodes.primary)?;
244        if let Some(secondary) = self.opcodes.secondary {
245            write!(f, " {secondary:#04x}")?;
246        }
247        if self.r {
248            write!(f, " /r")?;
249        }
250        if let Some(digit) = self.digit {
251            write!(f, " /{digit}")?;
252        }
253        if self.imm != Imm::None {
254            write!(f, " {}", self.imm)?;
255        }
256        Ok(())
257    }
258}
259
260/// Describe an instruction's opcodes. From section 2.1.2 "Opcodes" in the
261/// reference manual:
262///
263/// > A primary opcode can be 1, 2, or 3 bytes in length. An additional 3-bit
264/// > opcode field is sometimes encoded in the ModR/M byte. Smaller fields can
265/// > be defined within the primary opcode. Such fields define the direction of
266/// > operation, size of displacements, register encoding, condition codes, or
267/// > sign extension. Encoding fields used by an opcode vary depending on the
268/// > class of operation.
269/// >
270/// > Two-byte opcode formats for general-purpose and SIMD instructions consist
271/// > of one of the following:
272/// > - An escape opcode byte `0FH` as the primary opcode and a second opcode
273/// >   byte.
274/// > - A mandatory prefix (`66H`, `F2H`, or `F3H`), an escape opcode byte, and
275/// >   a second opcode byte (same as previous bullet).
276/// >
277/// > For example, `CVTDQ2PD` consists of the following sequence: `F3 0F E6`.
278/// > The first byte is a mandatory prefix (it is not considered as a repeat
279/// > prefix).
280/// >
281/// > Three-byte opcode formats for general-purpose and SIMD instructions
282/// > consist of one of the following:
283/// > - An escape opcode byte `0FH` as the primary opcode, plus two additional
284/// >   opcode bytes.
285/// > - A mandatory prefix (`66H`, `F2H`, or `F3H`), an escape opcode byte, plus
286/// >   two additional opcode bytes (same as previous bullet).
287/// >
288/// > For example, `PHADDW` for XMM registers consists of the following
289/// > sequence: `66 0F 38 01`. The first byte is the mandatory prefix.
290pub struct Opcodes {
291    /// The prefix bytes for this instruction.
292    pub prefix: LegacyPrefix,
293    /// Indicates the use of an escape opcode byte, `0x0f`.
294    pub escape: bool,
295    /// The primary opcode.
296    pub primary: u8,
297    /// Some instructions (e.g., SIMD) may have a secondary opcode.
298    pub secondary: Option<u8>,
299}
300
301impl From<u8> for Opcodes {
302    fn from(primary: u8) -> Opcodes {
303        Opcodes {
304            prefix: LegacyPrefix::NoPrefix,
305            escape: false,
306            primary,
307            secondary: None,
308        }
309    }
310}
311
312impl From<[u8; 1]> for Opcodes {
313    fn from(bytes: [u8; 1]) -> Opcodes {
314        Opcodes::from(bytes[0])
315    }
316}
317
318impl From<[u8; 2]> for Opcodes {
319    fn from(bytes: [u8; 2]) -> Opcodes {
320        let [a, b] = bytes;
321        match (LegacyPrefix::try_from(a), b) {
322            (Ok(prefix), primary) => Opcodes { prefix, escape: false, primary, secondary: None },
323            (Err(0x0f), primary) => Opcodes {
324                prefix: LegacyPrefix::NoPrefix,
325                escape: true,
326                primary,
327                secondary: None,
328            },
329            _ => panic!("invalid opcodes; expected [prefix, opcode] or [0x0f, opcode]"),
330        }
331    }
332}
333
334impl From<[u8; 3]> for Opcodes {
335    fn from(bytes: [u8; 3]) -> Opcodes {
336        let [a, b, c] = bytes;
337        match (LegacyPrefix::try_from(a), b, c) {
338            (Ok(prefix), 0x0f, primary) => Opcodes { prefix, escape: true, primary, secondary: None },
339            (Err(0x0f), primary, secondary) => Opcodes {
340                prefix: LegacyPrefix::NoPrefix,
341                escape: true,
342                primary,
343                secondary: Some(secondary),
344            },
345            _ => panic!("invalid opcodes; expected [prefix, 0x0f, opcode] or [0x0f, opcode, opcode]"),
346        }
347    }
348}
349
350impl From<[u8; 4]> for Opcodes {
351    fn from(bytes: [u8; 4]) -> Opcodes {
352        let [a, b, c, d] = bytes;
353        match (LegacyPrefix::try_from(a), b, c, d) {
354            (Ok(prefix), 0x0f, primary, secondary) => Opcodes {
355                prefix,
356                escape: false,
357                primary,
358                secondary: Some(secondary),
359            },
360            _ => panic!("invalid opcodes; expected [prefix, 0x0f, opcode, opcode]"),
361        }
362    }
363}
364
365/// A prefix byte for an instruction.
366#[derive(PartialEq)]
367pub enum LegacyPrefix {
368    /// No prefix bytes.
369    NoPrefix,
370    /// An operand size override typically denoting "16-bit operation" or "SSE instructions". But the
371    /// reference manual is more nuanced:
372    ///
373    /// > The operand-size override prefix allows a program to switch between
374    /// > 16- and 32-bit operand sizes. Either size can be the default; use of
375    /// > the prefix selects the non-default.
376    /// > Some SSE2/SSE3/SSSE3/SSE4 instructions and instructions using a three-byte
377    /// > sequence of primary opcode bytes may use 66H as a mandatory prefix to express
378    /// > distinct functionality.
379    _66,
380    /// The lock prefix.
381    _F0,
382    /// Operand size override and lock.
383    _66F0,
384    /// REPNE, but no specific meaning here -- is just an opcode extension.
385    _F2,
386    /// REP/REPE, but no specific meaning here -- is just an opcode extension.
387    _F3,
388    /// Operand size override and same effect as F3.
389    _66F3,
390}
391
392impl LegacyPrefix {
393    #[must_use]
394    pub fn contains_66(&self) -> bool {
395        match self {
396            LegacyPrefix::_66 | LegacyPrefix::_66F0 | LegacyPrefix::_66F3 => true,
397            LegacyPrefix::NoPrefix | LegacyPrefix::_F0 | LegacyPrefix::_F2 | LegacyPrefix::_F3 => false,
398        }
399    }
400}
401
402impl TryFrom<u8> for LegacyPrefix {
403    type Error = u8;
404    fn try_from(byte: u8) -> Result<Self, Self::Error> {
405        Ok(match byte {
406            0x66 => LegacyPrefix::_66,
407            0xF0 => LegacyPrefix::_F0,
408            0xF2 => LegacyPrefix::_F2,
409            0xF3 => LegacyPrefix::_F3,
410            byte => return Err(byte),
411        })
412    }
413}
414
415#[derive(Debug, PartialEq)]
416#[allow(non_camel_case_types, reason = "makes DSL definitions easier to read")]
417pub enum Imm {
418    None,
419    ib,
420    iw,
421    id,
422    io,
423}
424
425impl Imm {
426    fn bits(&self) -> u8 {
427        match self {
428            Imm::None => 0,
429            Imm::ib => 8,
430            Imm::iw => 16,
431            Imm::id => 32,
432            Imm::io => 64,
433        }
434    }
435}
436
437impl fmt::Display for Imm {
438    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
439        match self {
440            Imm::None => write!(f, ""),
441            Imm::ib => write!(f, "ib"),
442            Imm::iw => write!(f, "iw"),
443            Imm::id => write!(f, "id"),
444            Imm::io => write!(f, "io"),
445        }
446    }
447}
448
449pub struct Vex {}
450
451impl Vex {
452    fn validate(&self) {
453        todo!()
454    }
455}