cranelift_assembler_x64_meta/dsl/encoding.rs
1//! A DSL for describing x64 encodings.
2//!
3//! Intended use:
4//! - construct an encoding using an abbreviated helper, e.g., [`rex`]
5//! - then, configure the encoding using builder methods, e.g., [`Rex::w`]
6//!
7//! ```
8//! # use cranelift_assembler_x64_meta::dsl::rex;
9//! let enc = rex(0x25).w().id();
10//! assert_eq!(enc.to_string(), "REX.W + 0x25 id")
11//! ```
12//!
13//! This module references the Intel® 64 and IA-32 Architectures Software
14//! Development Manual, Volume 2: [link].
15//!
16//! [link]: https://software.intel.com/content/www/us/en/develop/articles/intel-sdm.html
17
18use super::{Operand, OperandKind};
19use core::fmt;
20
21/// An abbreviated constructor for REX-encoded instructions.
22#[must_use]
23pub fn rex(opcode: impl Into<Opcodes>) -> Rex {
24 Rex {
25 opcodes: opcode.into(),
26 w: false,
27 r: false,
28 digit: None,
29 imm: Imm::None,
30 }
31}
32
33/// An abbreviated constructor for VEX-encoded instructions.
34#[must_use]
35pub fn vex() -> Vex {
36 Vex {}
37}
38
39/// Enumerate the ways x64 encodes instructions.
40pub enum Encoding {
41 Rex(Rex),
42 Vex(Vex),
43}
44
45impl Encoding {
46 /// Check that the encoding is valid for the given operands; this can find
47 /// issues earlier, before generating any Rust code.
48 pub fn validate(&self, operands: &[Operand]) {
49 match self {
50 Encoding::Rex(rex) => rex.validate(operands),
51 Encoding::Vex(vex) => vex.validate(),
52 }
53 }
54}
55
56impl fmt::Display for Encoding {
57 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
58 match self {
59 Encoding::Rex(rex) => write!(f, "{rex}"),
60 Encoding::Vex(_vex) => todo!(),
61 }
62 }
63}
64
65/// The traditional x64 encoding.
66///
67/// We use the "REX" name here in a slightly unorthodox way: "REX" is the name
68/// for the optional _byte_ extending the number of available registers, e.g.,
69/// but we use it here to distinguish this from other encoding formats (e.g.,
70/// VEX, EVEX). The "REX" _byte_ is still optional in this encoding and only
71/// emitted when necessary.
72pub struct Rex {
73 /// The opcodes for this instruction.
74 ///
75 /// Multi-byte opcodes are handled by passing an array of opcodes (including
76 /// prefixes like `0x66` and escape bytes like `0x0f`) to the constructor.
77 /// E.g., `66 0F 54` (`ANDPD`) is expressed as follows:
78 ///
79 /// ```
80 /// # use cranelift_assembler_x64_meta::dsl::rex;
81 /// let enc = rex([0x66, 0x0f, 0x54]);
82 /// ```
83 pub opcodes: Opcodes,
84 /// Indicates setting the REX.W bit.
85 ///
86 /// From the reference manual: "Indicates the use of a REX prefix that
87 /// affects operand size or instruction semantics. The ordering of the REX
88 /// prefix and other optional/mandatory instruction prefixes are discussed
89 /// in chapter 2. Note that REX prefixes that promote legacy instructions to
90 /// 64-bit behavior are not listed explicitly in the opcode column."
91 pub w: bool,
92 /// From the reference manual: "indicates that the ModR/M byte of the
93 /// instruction contains a register operand and an r/m operand."
94 pub r: bool,
95 /// From the reference manual: "a digit between 0 and 7 indicates that the
96 /// ModR/M byte of the instruction uses only the r/m (register or memory)
97 /// operand. The reg field contains the digit that provides an extension to
98 /// the instruction's opcode."
99 pub digit: Option<u8>,
100 /// The number of bits used as an immediate operand to the instruction.
101 ///
102 /// From the reference manual: "a 1-byte (ib), 2-byte (iw), 4-byte (id) or
103 /// 8-byte (io) immediate operand to the instruction that follows the
104 /// opcode, ModR/M bytes or scale-indexing bytes. The opcode determines if
105 /// the operand is a signed value. All words, doublewords, and quadwords are
106 /// given with the low-order byte first."
107 pub imm: Imm,
108}
109
110impl Rex {
111 /// Set the `REX.W` bit.
112 #[must_use]
113 pub fn w(self) -> Self {
114 Self { w: true, ..self }
115 }
116
117 /// Set the ModR/M byte to contain a register operand and an r/m operand;
118 /// equivalent to `/r` in the reference manual.
119 #[must_use]
120 pub fn r(self) -> Self {
121 Self { r: true, ..self }
122 }
123
124 /// Set the digit extending the opcode; equivalent to `/<digit>` in the
125 /// reference manual.
126 ///
127 /// # Panics
128 ///
129 /// Panics if `digit` is too large.
130 #[must_use]
131 pub fn digit(self, digit: u8) -> Self {
132 assert!(digit <= 0b111, "must fit in 3 bits");
133 Self { digit: Some(digit), ..self }
134 }
135
136 /// Append a byte-sized immediate operand (8-bit); equivalent to `ib` in the
137 /// reference manual.
138 ///
139 /// # Panics
140 ///
141 /// Panics if an immediate operand is already set.
142 #[must_use]
143 pub fn ib(self) -> Self {
144 assert_eq!(self.imm, Imm::None);
145 Self { imm: Imm::ib, ..self }
146 }
147
148 /// Append a word-sized immediate operand (16-bit); equivalent to `iw` in
149 /// the reference manual.
150 ///
151 /// # Panics
152 ///
153 /// Panics if an immediate operand is already set.
154 #[must_use]
155 pub fn iw(self) -> Self {
156 assert_eq!(self.imm, Imm::None);
157 Self { imm: Imm::iw, ..self }
158 }
159
160 /// Append a doubleword-sized immediate operand (32-bit); equivalent to `id`
161 /// in the reference manual.
162 ///
163 /// # Panics
164 ///
165 /// Panics if an immediate operand is already set.
166 #[must_use]
167 pub fn id(self) -> Self {
168 assert_eq!(self.imm, Imm::None);
169 Self { imm: Imm::id, ..self }
170 }
171
172 /// Append a quadword-sized immediate operand (64-bit); equivalent to `io`
173 /// in the reference manual.
174 ///
175 /// # Panics
176 ///
177 /// Panics if an immediate operand is already set.
178 #[must_use]
179 pub fn io(self) -> Self {
180 assert_eq!(self.imm, Imm::None);
181 Self { imm: Imm::io, ..self }
182 }
183
184 /// Check a subset of the rules for valid encodings outlined in chapter 2,
185 /// _Instruction Format_, of the Intel® 64 and IA-32 Architectures Software
186 /// Developer’s Manual, Volume 2A.
187 fn validate(&self, operands: &[Operand]) {
188 assert!(!(self.r && self.digit.is_some()));
189 assert!(!(self.r && self.imm != Imm::None));
190 assert!(
191 !(self.w && (self.opcodes.prefix.contains_66())),
192 "though valid, if REX.W is set then the 66 prefix is ignored--avoid encoding this"
193 );
194
195 if self.opcodes.prefix.contains_66() {
196 assert!(
197 operands.iter().all(|&op| matches!(
198 op.location.kind(),
199 OperandKind::Imm(_) | OperandKind::FixedReg(_)
200 ) || op.location.bits() == 16
201 || op.location.bits() == 128),
202 "when we encode the 66 prefix, we expect all operands to be 16-bit wide"
203 );
204 }
205
206 if let Some(OperandKind::Imm(op)) = operands
207 .iter()
208 .map(|o| o.location.kind())
209 .find(|k| matches!(k, OperandKind::Imm(_)))
210 {
211 assert_eq!(
212 op.bits(),
213 self.imm.bits(),
214 "for an immediate, the encoding width must match the declared operand width"
215 );
216 }
217 }
218}
219
220impl From<Rex> for Encoding {
221 fn from(rex: Rex) -> Encoding {
222 Encoding::Rex(rex)
223 }
224}
225
226impl fmt::Display for Rex {
227 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
228 match self.opcodes.prefix {
229 LegacyPrefix::NoPrefix => {}
230 LegacyPrefix::_66 => write!(f, "0x66 + ")?,
231 LegacyPrefix::_F0 => write!(f, "0xF0 + ")?,
232 LegacyPrefix::_66F0 => write!(f, "0x66 0xF0 + ")?,
233 LegacyPrefix::_F2 => write!(f, "0xF2 + ")?,
234 LegacyPrefix::_F3 => write!(f, "0xF3 + ")?,
235 LegacyPrefix::_66F3 => write!(f, "0x66 0xF3 + ")?,
236 }
237 if self.w {
238 write!(f, "REX.W + ")?;
239 }
240 if self.opcodes.escape {
241 write!(f, "0x0F + ")?;
242 }
243 write!(f, "{:#04x}", self.opcodes.primary)?;
244 if let Some(secondary) = self.opcodes.secondary {
245 write!(f, " {secondary:#04x}")?;
246 }
247 if self.r {
248 write!(f, " /r")?;
249 }
250 if let Some(digit) = self.digit {
251 write!(f, " /{digit}")?;
252 }
253 if self.imm != Imm::None {
254 write!(f, " {}", self.imm)?;
255 }
256 Ok(())
257 }
258}
259
260/// Describe an instruction's opcodes. From section 2.1.2 "Opcodes" in the
261/// reference manual:
262///
263/// > A primary opcode can be 1, 2, or 3 bytes in length. An additional 3-bit
264/// > opcode field is sometimes encoded in the ModR/M byte. Smaller fields can
265/// > be defined within the primary opcode. Such fields define the direction of
266/// > operation, size of displacements, register encoding, condition codes, or
267/// > sign extension. Encoding fields used by an opcode vary depending on the
268/// > class of operation.
269/// >
270/// > Two-byte opcode formats for general-purpose and SIMD instructions consist
271/// > of one of the following:
272/// > - An escape opcode byte `0FH` as the primary opcode and a second opcode
273/// > byte.
274/// > - A mandatory prefix (`66H`, `F2H`, or `F3H`), an escape opcode byte, and
275/// > a second opcode byte (same as previous bullet).
276/// >
277/// > For example, `CVTDQ2PD` consists of the following sequence: `F3 0F E6`.
278/// > The first byte is a mandatory prefix (it is not considered as a repeat
279/// > prefix).
280/// >
281/// > Three-byte opcode formats for general-purpose and SIMD instructions
282/// > consist of one of the following:
283/// > - An escape opcode byte `0FH` as the primary opcode, plus two additional
284/// > opcode bytes.
285/// > - A mandatory prefix (`66H`, `F2H`, or `F3H`), an escape opcode byte, plus
286/// > two additional opcode bytes (same as previous bullet).
287/// >
288/// > For example, `PHADDW` for XMM registers consists of the following
289/// > sequence: `66 0F 38 01`. The first byte is the mandatory prefix.
290pub struct Opcodes {
291 /// The prefix bytes for this instruction.
292 pub prefix: LegacyPrefix,
293 /// Indicates the use of an escape opcode byte, `0x0f`.
294 pub escape: bool,
295 /// The primary opcode.
296 pub primary: u8,
297 /// Some instructions (e.g., SIMD) may have a secondary opcode.
298 pub secondary: Option<u8>,
299}
300
301impl From<u8> for Opcodes {
302 fn from(primary: u8) -> Opcodes {
303 Opcodes {
304 prefix: LegacyPrefix::NoPrefix,
305 escape: false,
306 primary,
307 secondary: None,
308 }
309 }
310}
311
312impl From<[u8; 1]> for Opcodes {
313 fn from(bytes: [u8; 1]) -> Opcodes {
314 Opcodes::from(bytes[0])
315 }
316}
317
318impl From<[u8; 2]> for Opcodes {
319 fn from(bytes: [u8; 2]) -> Opcodes {
320 let [a, b] = bytes;
321 match (LegacyPrefix::try_from(a), b) {
322 (Ok(prefix), primary) => Opcodes { prefix, escape: false, primary, secondary: None },
323 (Err(0x0f), primary) => Opcodes {
324 prefix: LegacyPrefix::NoPrefix,
325 escape: true,
326 primary,
327 secondary: None,
328 },
329 _ => panic!("invalid opcodes; expected [prefix, opcode] or [0x0f, opcode]"),
330 }
331 }
332}
333
334impl From<[u8; 3]> for Opcodes {
335 fn from(bytes: [u8; 3]) -> Opcodes {
336 let [a, b, c] = bytes;
337 match (LegacyPrefix::try_from(a), b, c) {
338 (Ok(prefix), 0x0f, primary) => Opcodes { prefix, escape: true, primary, secondary: None },
339 (Err(0x0f), primary, secondary) => Opcodes {
340 prefix: LegacyPrefix::NoPrefix,
341 escape: true,
342 primary,
343 secondary: Some(secondary),
344 },
345 _ => panic!("invalid opcodes; expected [prefix, 0x0f, opcode] or [0x0f, opcode, opcode]"),
346 }
347 }
348}
349
350impl From<[u8; 4]> for Opcodes {
351 fn from(bytes: [u8; 4]) -> Opcodes {
352 let [a, b, c, d] = bytes;
353 match (LegacyPrefix::try_from(a), b, c, d) {
354 (Ok(prefix), 0x0f, primary, secondary) => Opcodes {
355 prefix,
356 escape: false,
357 primary,
358 secondary: Some(secondary),
359 },
360 _ => panic!("invalid opcodes; expected [prefix, 0x0f, opcode, opcode]"),
361 }
362 }
363}
364
365/// A prefix byte for an instruction.
366#[derive(PartialEq)]
367pub enum LegacyPrefix {
368 /// No prefix bytes.
369 NoPrefix,
370 /// An operand size override typically denoting "16-bit operation" or "SSE instructions". But the
371 /// reference manual is more nuanced:
372 ///
373 /// > The operand-size override prefix allows a program to switch between
374 /// > 16- and 32-bit operand sizes. Either size can be the default; use of
375 /// > the prefix selects the non-default.
376 /// > Some SSE2/SSE3/SSSE3/SSE4 instructions and instructions using a three-byte
377 /// > sequence of primary opcode bytes may use 66H as a mandatory prefix to express
378 /// > distinct functionality.
379 _66,
380 /// The lock prefix.
381 _F0,
382 /// Operand size override and lock.
383 _66F0,
384 /// REPNE, but no specific meaning here -- is just an opcode extension.
385 _F2,
386 /// REP/REPE, but no specific meaning here -- is just an opcode extension.
387 _F3,
388 /// Operand size override and same effect as F3.
389 _66F3,
390}
391
392impl LegacyPrefix {
393 #[must_use]
394 pub fn contains_66(&self) -> bool {
395 match self {
396 LegacyPrefix::_66 | LegacyPrefix::_66F0 | LegacyPrefix::_66F3 => true,
397 LegacyPrefix::NoPrefix | LegacyPrefix::_F0 | LegacyPrefix::_F2 | LegacyPrefix::_F3 => false,
398 }
399 }
400}
401
402impl TryFrom<u8> for LegacyPrefix {
403 type Error = u8;
404 fn try_from(byte: u8) -> Result<Self, Self::Error> {
405 Ok(match byte {
406 0x66 => LegacyPrefix::_66,
407 0xF0 => LegacyPrefix::_F0,
408 0xF2 => LegacyPrefix::_F2,
409 0xF3 => LegacyPrefix::_F3,
410 byte => return Err(byte),
411 })
412 }
413}
414
415#[derive(Debug, PartialEq)]
416#[allow(non_camel_case_types, reason = "makes DSL definitions easier to read")]
417pub enum Imm {
418 None,
419 ib,
420 iw,
421 id,
422 io,
423}
424
425impl Imm {
426 fn bits(&self) -> u8 {
427 match self {
428 Imm::None => 0,
429 Imm::ib => 8,
430 Imm::iw => 16,
431 Imm::id => 32,
432 Imm::io => 64,
433 }
434 }
435}
436
437impl fmt::Display for Imm {
438 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
439 match self {
440 Imm::None => write!(f, ""),
441 Imm::ib => write!(f, "ib"),
442 Imm::iw => write!(f, "iw"),
443 Imm::id => write!(f, "id"),
444 Imm::io => write!(f, "io"),
445 }
446 }
447}
448
449pub struct Vex {}
450
451impl Vex {
452 fn validate(&self) {
453 todo!()
454 }
455}