cranelift_assembler_x64_meta/dsl/encoding.rs
1//! A DSL for describing x64 encodings.
2//!
3//! Intended use:
4//! - construct an encoding using an abbreviated helper, e.g., [`rex`]
5//! - then, configure the encoding using builder methods, e.g., [`Rex::w`]
6//!
7//! ```
8//! # use cranelift_assembler_x64_meta::dsl::rex;
9//! let enc = rex(0x25).w().id();
10//! assert_eq!(enc.to_string(), "REX.W + 0x25 id")
11//! ```
12//!
13//! This module references the Intel® 64 and IA-32 Architectures Software
14//! Development Manual, Volume 2: [link].
15//!
16//! [link]: https://software.intel.com/content/www/us/en/develop/articles/intel-sdm.html
17
18use super::{Operand, OperandKind};
19use core::fmt;
20
21/// An abbreviated constructor for REX-encoded instructions.
22#[must_use]
23pub fn rex(opcode: impl Into<Opcodes>) -> Rex {
24 Rex {
25 opcodes: opcode.into(),
26 w: false,
27 r: false,
28 digit: None,
29 imm: Imm::None,
30 }
31}
32
33/// An abbreviated constructor for VEX-encoded instructions.
34#[must_use]
35pub fn vex() -> Vex {
36 Vex {}
37}
38
39/// Enumerate the ways x64 encodes instructions.
40pub enum Encoding {
41 Rex(Rex),
42 Vex(Vex),
43}
44
45impl Encoding {
46 /// Check that the encoding is valid for the given operands; this can find
47 /// issues earlier, before generating any Rust code.
48 pub fn validate(&self, operands: &[Operand]) {
49 match self {
50 Encoding::Rex(rex) => rex.validate(operands),
51 Encoding::Vex(vex) => vex.validate(),
52 }
53 }
54}
55
56impl fmt::Display for Encoding {
57 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
58 match self {
59 Encoding::Rex(rex) => write!(f, "{rex}"),
60 Encoding::Vex(_vex) => todo!(),
61 }
62 }
63}
64
65/// The traditional x64 encoding.
66///
67/// We use the "REX" name here in a slightly unorthodox way: "REX" is the name
68/// for the optional _byte_ extending the number of available registers, e.g.,
69/// but we use it here to distinguish this from other encoding formats (e.g.,
70/// VEX, EVEX). The "REX" _byte_ is still optional in this encoding and only
71/// emitted when necessary.
72pub struct Rex {
73 /// The opcodes for this instruction.
74 ///
75 /// Multi-byte opcodes are handled by passing an array of opcodes (including
76 /// prefixes like `0x66` and escape bytes like `0x0f`) to the constructor.
77 /// E.g., `66 0F 54` (`ANDPD`) is expressed as follows:
78 ///
79 /// ```
80 /// # use cranelift_assembler_x64_meta::dsl::rex;
81 /// let enc = rex([0x66, 0x0f, 0x54]);
82 /// ```
83 pub opcodes: Opcodes,
84 /// Indicates setting the REX.W bit.
85 ///
86 /// From the reference manual: "Indicates the use of a REX prefix that
87 /// affects operand size or instruction semantics. The ordering of the REX
88 /// prefix and other optional/mandatory instruction prefixes are discussed
89 /// in chapter 2. Note that REX prefixes that promote legacy instructions to
90 /// 64-bit behavior are not listed explicitly in the opcode column."
91 pub w: bool,
92 /// From the reference manual: "indicates that the ModR/M byte of the
93 /// instruction contains a register operand and an r/m operand."
94 pub r: bool,
95 /// From the reference manual: "a digit between 0 and 7 indicates that the
96 /// ModR/M byte of the instruction uses only the r/m (register or memory)
97 /// operand. The reg field contains the digit that provides an extension to
98 /// the instruction's opcode."
99 pub digit: Option<u8>,
100 /// The number of bits used as an immediate operand to the instruction.
101 pub imm: Imm,
102}
103
104impl Rex {
105 /// Set the `REX.W` bit.
106 #[must_use]
107 pub fn w(self) -> Self {
108 Self { w: true, ..self }
109 }
110
111 /// Set the ModR/M byte to contain a register operand and an r/m operand;
112 /// equivalent to `/r` in the reference manual.
113 #[must_use]
114 pub fn r(self) -> Self {
115 Self { r: true, ..self }
116 }
117
118 /// Set the digit extending the opcode; equivalent to `/<digit>` in the
119 /// reference manual.
120 ///
121 /// # Panics
122 ///
123 /// Panics if `digit` is too large.
124 #[must_use]
125 pub fn digit(self, digit: u8) -> Self {
126 assert!(digit <= 0b111, "must fit in 3 bits");
127 Self {
128 digit: Some(digit),
129 ..self
130 }
131 }
132
133 /// Append a byte-sized immediate operand (8-bit); equivalent to `ib` in the
134 /// reference manual.
135 ///
136 /// # Panics
137 ///
138 /// Panics if an immediate operand is already set.
139 #[must_use]
140 pub fn ib(self) -> Self {
141 assert_eq!(self.imm, Imm::None);
142 Self {
143 imm: Imm::ib,
144 ..self
145 }
146 }
147
148 /// Append a word-sized immediate operand (16-bit); equivalent to `iw` in
149 /// the reference manual.
150 ///
151 /// # Panics
152 ///
153 /// Panics if an immediate operand is already set.
154 #[must_use]
155 pub fn iw(self) -> Self {
156 assert_eq!(self.imm, Imm::None);
157 Self {
158 imm: Imm::iw,
159 ..self
160 }
161 }
162
163 /// Append a doubleword-sized immediate operand (32-bit); equivalent to `id`
164 /// in the reference manual.
165 ///
166 /// # Panics
167 ///
168 /// Panics if an immediate operand is already set.
169 #[must_use]
170 pub fn id(self) -> Self {
171 assert_eq!(self.imm, Imm::None);
172 Self {
173 imm: Imm::id,
174 ..self
175 }
176 }
177
178 /// Append a quadword-sized immediate operand (64-bit); equivalent to `io`
179 /// in the reference manual.
180 ///
181 /// # Panics
182 ///
183 /// Panics if an immediate operand is already set.
184 #[must_use]
185 pub fn io(self) -> Self {
186 assert_eq!(self.imm, Imm::None);
187 Self {
188 imm: Imm::io,
189 ..self
190 }
191 }
192
193 /// Check a subset of the rules for valid encodings outlined in chapter 2,
194 /// _Instruction Format_, of the Intel® 64 and IA-32 Architectures Software
195 /// Developer’s Manual, Volume 2A.
196 fn validate(&self, operands: &[Operand]) {
197 assert!(!(self.r && self.digit.is_some()));
198 assert!(!(self.r && self.imm != Imm::None));
199 assert!(
200 !(self.w && (self.opcodes.prefixes.has_operand_size_override())),
201 "though valid, if REX.W is set then the 66 prefix is ignored--avoid encoding this"
202 );
203
204 if self.opcodes.prefixes.has_operand_size_override() {
205 assert!(
206 operands.iter().all(|&op| matches!(
207 op.location.kind(),
208 OperandKind::Imm(_) | OperandKind::FixedReg(_)
209 ) || op.location.bits() == 16
210 || op.location.bits() == 128),
211 "when we encode the 66 prefix, we expect all operands to be 16-bit wide"
212 );
213 }
214
215 if let Some(OperandKind::Imm(op)) = operands
216 .iter()
217 .map(|o| o.location.kind())
218 .find(|k| matches!(k, OperandKind::Imm(_)))
219 {
220 assert_eq!(
221 op.bits(),
222 self.imm.bits(),
223 "for an immediate, the encoding width must match the declared operand width"
224 );
225 }
226 }
227}
228
229impl From<Rex> for Encoding {
230 fn from(rex: Rex) -> Encoding {
231 Encoding::Rex(rex)
232 }
233}
234
235impl fmt::Display for Rex {
236 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
237 if let Some(group1) = &self.opcodes.prefixes.group1 {
238 write!(f, "{group1} + ")?;
239 }
240 if let Some(group2) = &self.opcodes.prefixes.group2 {
241 write!(f, "{group2} + ")?;
242 }
243 if let Some(group3) = &self.opcodes.prefixes.group3 {
244 write!(f, "{group3} + ")?;
245 }
246 if let Some(group4) = &self.opcodes.prefixes.group4 {
247 write!(f, "{group4} + ")?;
248 }
249 if self.w {
250 write!(f, "REX.W + ")?;
251 }
252 if self.opcodes.escape {
253 write!(f, "0x0F + ")?;
254 }
255 write!(f, "{:#04x}", self.opcodes.primary)?;
256 if let Some(secondary) = self.opcodes.secondary {
257 write!(f, " {secondary:#04x}")?;
258 }
259 if self.r {
260 write!(f, " /r")?;
261 }
262 if let Some(digit) = self.digit {
263 write!(f, " /{digit}")?;
264 }
265 if self.imm != Imm::None {
266 write!(f, " {}", self.imm)?;
267 }
268 Ok(())
269 }
270}
271
272/// Describe an instruction's opcodes. From section 2.1.2 "Opcodes" in the
273/// reference manual:
274///
275/// > A primary opcode can be 1, 2, or 3 bytes in length. An additional 3-bit
276/// > opcode field is sometimes encoded in the ModR/M byte. Smaller fields can
277/// > be defined within the primary opcode. Such fields define the direction of
278/// > operation, size of displacements, register encoding, condition codes, or
279/// > sign extension. Encoding fields used by an opcode vary depending on the
280/// > class of operation.
281/// >
282/// > Two-byte opcode formats for general-purpose and SIMD instructions consist
283/// > of one of the following:
284/// > - An escape opcode byte `0FH` as the primary opcode and a second opcode
285/// > byte.
286/// > - A mandatory prefix (`66H`, `F2H`, or `F3H`), an escape opcode byte, and
287/// > a second opcode byte (same as previous bullet).
288/// >
289/// > For example, `CVTDQ2PD` consists of the following sequence: `F3 0F E6`.
290/// > The first byte is a mandatory prefix (it is not considered as a repeat
291/// > prefix).
292/// >
293/// > Three-byte opcode formats for general-purpose and SIMD instructions
294/// > consist of one of the following:
295/// > - An escape opcode byte `0FH` as the primary opcode, plus two additional
296/// > opcode bytes.
297/// > - A mandatory prefix (`66H`, `F2H`, or `F3H`), an escape opcode byte, plus
298/// > two additional opcode bytes (same as previous bullet).
299/// >
300/// > For example, `PHADDW` for XMM registers consists of the following
301/// > sequence: `66 0F 38 01`. The first byte is the mandatory prefix.
302pub struct Opcodes {
303 /// The prefix bytes for this instruction.
304 pub prefixes: Prefixes,
305 /// Indicates the use of an escape opcode byte, `0x0f`.
306 pub escape: bool,
307 /// The primary opcode.
308 pub primary: u8,
309 /// Some instructions (e.g., SIMD) may have a secondary opcode.
310 pub secondary: Option<u8>,
311}
312
313impl From<u8> for Opcodes {
314 fn from(primary: u8) -> Opcodes {
315 Opcodes {
316 prefixes: Prefixes::default(),
317 escape: false,
318 primary,
319 secondary: None,
320 }
321 }
322}
323
324impl<const N: usize> From<[u8; N]> for Opcodes {
325 fn from(bytes: [u8; N]) -> Self {
326 let (prefixes, remaining) = Prefixes::parse(&bytes);
327 let (escape, primary, secondary) = match remaining {
328 [primary] => (false, *primary, None),
329 [0x0f, primary] => (true, *primary, None),
330 [0x0f, primary, secondary] => (true, *primary, Some(*secondary)),
331 _ => panic!(
332 "invalid opcodes after prefix; expected [opcode], [0x0f, opcode], or [0x0f, opcode, opcode], found {remaining:?}"
333 ),
334 };
335 Self {
336 prefixes,
337 escape,
338 primary,
339 secondary,
340 }
341 }
342}
343
344/// The allowed prefixes for an instruction. From the reference manual (section
345/// 2.1.1):
346///
347/// > Instruction prefixes are divided into four groups, each with a set of
348/// > allowable prefix codes. For each instruction, it is only useful to include
349/// > up to one prefix code from each of the four groups (Groups 1, 2, 3, 4).
350/// > Groups 1 through 4 may be placed in any order relative to each other.
351#[derive(Default)]
352pub struct Prefixes {
353 pub group1: Option<Group1Prefix>,
354 pub group2: Option<Group2Prefix>,
355 pub group3: Option<Group3Prefix>,
356 pub group4: Option<Group4Prefix>,
357}
358
359impl Prefixes {
360 /// Parse a slice of `bytes` into a set of prefixes, returning both the
361 /// configured [`Prefixes`] as well as any remaining bytes.
362 fn parse(mut bytes: &[u8]) -> (Self, &[u8]) {
363 let mut prefixes = Self::default();
364 while !bytes.is_empty() && prefixes.try_assign(bytes[0]).is_ok() {
365 bytes = &bytes[1..];
366 }
367 (prefixes, bytes)
368 }
369
370 /// Attempt to parse a `byte` as a prefix and, if successful, assigns it to
371 /// the correct prefix group.
372 ///
373 /// # Panics
374 ///
375 /// This function panics if the prefix for a group is already set; this
376 /// disallows specifying multiple prefixes per group.
377 fn try_assign(&mut self, byte: u8) -> Result<(), ()> {
378 if let Ok(p) = Group1Prefix::try_from(byte) {
379 assert!(self.group1.is_none());
380 self.group1 = Some(p);
381 Ok(())
382 } else if let Ok(p) = Group2Prefix::try_from(byte) {
383 assert!(self.group2.is_none());
384 self.group2 = Some(p);
385 Ok(())
386 } else if let Ok(p) = Group3Prefix::try_from(byte) {
387 assert!(self.group3.is_none());
388 self.group3 = Some(p);
389 Ok(())
390 } else if let Ok(p) = Group4Prefix::try_from(byte) {
391 assert!(self.group4.is_none());
392 self.group4 = Some(p);
393 Ok(())
394 } else {
395 Err(())
396 }
397 }
398
399 /// Check if the `0x66` prefix is present.
400 fn has_operand_size_override(&self) -> bool {
401 matches!(self.group3, Some(Group3Prefix::OperandSizeOverride))
402 }
403
404 /// Check if any prefix is present.
405 pub fn is_empty(&self) -> bool {
406 self.group1.is_none()
407 && self.group2.is_none()
408 && self.group3.is_none()
409 && self.group4.is_none()
410 }
411}
412
413pub enum Group1Prefix {
414 /// The LOCK prefix (`0xf0`). From the reference manual:
415 ///
416 /// > The LOCK prefix (F0H) forces an operation that ensures exclusive use
417 /// > of shared memory in a multiprocessor environment. See "LOCK—Assert
418 /// > LOCK# Signal Prefix" in Chapter 3, Instruction Set Reference, A-L, for
419 /// > a description of this prefix.
420 Lock,
421 /// A REPNE/REPNZ prefix (`0xf2`) or a BND prefix under certain conditions.
422 /// `REP*` prefixes apply only to string and input/output instructions but
423 /// can be used as mandatory prefixes in other kinds of instructions (e.g.,
424 /// SIMD) From the reference manual:
425 ///
426 /// > Repeat prefixes (F2H, F3H) cause an instruction to be repeated for
427 /// > each element of a string. Use these prefixes only with string and I/O
428 /// > instructions (MOVS, CMPS, SCAS, LODS, STOS, INS, and OUTS). Use of
429 /// > repeat prefixes and/or undefined opcodes with other Intel 64 or IA-32
430 /// > instructions is reserved; such use may cause unpredictable behavior.
431 /// >
432 /// > Some instructions may use F2H, F3H as a mandatory prefix to express
433 /// > distinct functionality.
434 REPNorBND,
435 /// A REPE/REPZ prefix (`0xf3`); `REP*` prefixes apply only to string and
436 /// input/output instructions but can be used as mandatory prefixes in other
437 /// kinds of instructions (e.g., SIMD). See `REPNorBND` for more details.
438 REP_,
439}
440
441impl TryFrom<u8> for Group1Prefix {
442 type Error = u8;
443 fn try_from(byte: u8) -> Result<Self, Self::Error> {
444 Ok(match byte {
445 0xF0 => Group1Prefix::Lock,
446 0xF2 => Group1Prefix::REPNorBND,
447 0xF3 => Group1Prefix::REP_,
448 byte => return Err(byte),
449 })
450 }
451}
452
453impl fmt::Display for Group1Prefix {
454 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
455 match self {
456 Group1Prefix::Lock => write!(f, "0xF0"),
457 Group1Prefix::REPNorBND => write!(f, "0xF2"),
458 Group1Prefix::REP_ => write!(f, "0xF3"),
459 }
460 }
461}
462
463/// Contains the segment override prefixes or a (deprecated) branch hint when
464/// used on a `Jcc` instruction. Note that using the segment override prefixes
465/// on a branch instruction is reserved. See section 2.1.1, "Instruction
466/// Prefixes," in the reference manual.
467pub enum Group2Prefix {
468 /// The CS segment override prefix (`0x2e`); also the "branch not taken"
469 /// hint.
470 CSorBNT,
471 /// The SS segment override prefix (`0x36`).
472 SS,
473 /// The DS segment override prefix (`0x3e`); also the "branch taken" hint.
474 DSorBT,
475 /// The ES segment override prefix (`0x26`).
476 ES,
477 /// The FS segment override prefix (`0x64`).
478 FS,
479 /// The GS segment override prefix (`0x65`).
480 GS,
481}
482
483impl TryFrom<u8> for Group2Prefix {
484 type Error = u8;
485 fn try_from(byte: u8) -> Result<Self, Self::Error> {
486 Ok(match byte {
487 0x2E => Group2Prefix::CSorBNT,
488 0x36 => Group2Prefix::SS,
489 0x3E => Group2Prefix::DSorBT,
490 0x26 => Group2Prefix::ES,
491 0x64 => Group2Prefix::FS,
492 0x65 => Group2Prefix::GS,
493 byte => return Err(byte),
494 })
495 }
496}
497
498impl fmt::Display for Group2Prefix {
499 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
500 match self {
501 Group2Prefix::CSorBNT => write!(f, "0x2E"),
502 Group2Prefix::SS => write!(f, "0x36"),
503 Group2Prefix::DSorBT => write!(f, "0x3E"),
504 Group2Prefix::ES => write!(f, "0x26"),
505 Group2Prefix::FS => write!(f, "0x64"),
506 Group2Prefix::GS => write!(f, "0x65"),
507 }
508 }
509}
510
511/// Contains the operand-size override prefix (`0x66`); also used as a SIMD
512/// prefix. From the reference manual:
513///
514/// > The operand-size override prefix allows a program to switch between 16-
515/// > and 32-bit operand sizes. Either size can be the default; use of the
516/// > prefix selects the non-default size. Some SSE2/SSE3/SSSE3/SSE4
517/// > instructions and instructions using a three-byte sequence of primary
518/// > opcode bytes may use 66H as a mandatory prefix to express distinct
519/// > functionality.
520pub enum Group3Prefix {
521 OperandSizeOverride,
522}
523
524impl TryFrom<u8> for Group3Prefix {
525 type Error = u8;
526 fn try_from(byte: u8) -> Result<Self, Self::Error> {
527 Ok(match byte {
528 0x66 => Group3Prefix::OperandSizeOverride,
529 byte => return Err(byte),
530 })
531 }
532}
533
534impl fmt::Display for Group3Prefix {
535 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
536 match self {
537 Group3Prefix::OperandSizeOverride => write!(f, "0x66"),
538 }
539 }
540}
541
542/// Contains the address-size override prefix (`0x67`). From the reference
543/// manual:
544///
545/// > The address-size override prefix (67H) allows programs to switch between
546/// > 16- and 32-bit addressing. Either size can be the default; the prefix
547/// > selects the non-default size.
548pub enum Group4Prefix {
549 AddressSizeOverride,
550}
551
552impl TryFrom<u8> for Group4Prefix {
553 type Error = u8;
554 fn try_from(byte: u8) -> Result<Self, Self::Error> {
555 Ok(match byte {
556 0x67 => Group4Prefix::AddressSizeOverride,
557 byte => return Err(byte),
558 })
559 }
560}
561
562impl fmt::Display for Group4Prefix {
563 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
564 match self {
565 Group4Prefix::AddressSizeOverride => write!(f, "0x67"),
566 }
567 }
568}
569
570/// Indicate the size of an immediate operand. From the reference manual:
571///
572/// > A 1-byte (ib), 2-byte (iw), 4-byte (id) or 8-byte (io) immediate operand
573/// > to the instruction that follows the opcode, ModR/M bytes or scale-indexing
574/// > bytes. The opcode determines if the operand is a signed value. All words,
575/// > doublewords, and quadwords are given with the low-order byte first.
576#[derive(Debug, PartialEq)]
577#[allow(non_camel_case_types, reason = "makes DSL definitions easier to read")]
578pub enum Imm {
579 None,
580 ib,
581 iw,
582 id,
583 io,
584}
585
586impl Imm {
587 fn bits(&self) -> u8 {
588 match self {
589 Imm::None => 0,
590 Imm::ib => 8,
591 Imm::iw => 16,
592 Imm::id => 32,
593 Imm::io => 64,
594 }
595 }
596}
597
598impl fmt::Display for Imm {
599 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
600 match self {
601 Imm::None => write!(f, ""),
602 Imm::ib => write!(f, "ib"),
603 Imm::iw => write!(f, "iw"),
604 Imm::id => write!(f, "id"),
605 Imm::io => write!(f, "io"),
606 }
607 }
608}
609
610pub struct Vex {}
611
612impl Vex {
613 fn validate(&self) {
614 todo!()
615 }
616}