cranelift_assembler_x64_meta/dsl/encoding.rs
1//! A DSL for describing x64 encodings.
2//!
3//! Intended use:
4//! - construct an encoding using an abbreviated helper, e.g., [`rex`]
5//! - then, configure the encoding using builder methods, e.g., [`Rex::w`]
6//!
7//! ```
8//! # use cranelift_assembler_x64_meta::dsl::rex;
9//! let enc = rex(0x25).w().id();
10//! assert_eq!(enc.to_string(), "REX.W + 0x25 id")
11//! ```
12//!
13//! This module references the Intel® 64 and IA-32 Architectures Software
14//! Development Manual, Volume 2: [link].
15//!
16//! [link]: https://software.intel.com/content/www/us/en/develop/articles/intel-sdm.html
17
18use super::{Operand, OperandKind};
19use core::fmt;
20
21/// An abbreviated constructor for REX-encoded instructions.
22#[must_use]
23pub fn rex(opcode: impl Into<Opcodes>) -> Rex {
24 Rex {
25 opcodes: opcode.into(),
26 w: false,
27 modrm: None,
28 imm: Imm::None,
29 opcode_mod: None,
30 }
31}
32
33/// An abbreviated constructor for VEX-encoded instructions.
34#[must_use]
35pub fn vex(length: VexLength) -> Vex {
36 Vex {
37 length,
38 pp: None,
39 mmmmm: None,
40 w: VexW::WIG,
41 opcode: u8::MAX,
42 modrm: None,
43 imm: Imm::None,
44 is4: false,
45 }
46}
47
48/// Enumerate the ways x64 encodes instructions.
49pub enum Encoding {
50 Rex(Rex),
51 Vex(Vex),
52}
53
54impl Encoding {
55 /// Check that the encoding is valid for the given operands; this can find
56 /// issues earlier, before generating any Rust code.
57 pub fn validate(&self, operands: &[Operand]) {
58 match self {
59 Encoding::Rex(rex) => rex.validate(operands),
60 Encoding::Vex(vex) => vex.validate(operands),
61 }
62 }
63
64 /// Return the opcode for this encoding.
65 pub fn opcode(&self) -> u8 {
66 match self {
67 Encoding::Rex(rex) => rex.opcodes.opcode(),
68 Encoding::Vex(vex) => vex.opcode,
69 }
70 }
71}
72
73impl fmt::Display for Encoding {
74 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
75 match self {
76 Encoding::Rex(rex) => write!(f, "{rex}"),
77 Encoding::Vex(vex) => write!(f, "{vex}"),
78 }
79 }
80}
81
82#[derive(Clone, Copy, PartialEq)]
83pub enum ModRmKind {
84 /// Models `/digit`.
85 ///
86 /// From the reference manual: "a digit between 0 and 7 indicates that the
87 /// ModR/M byte of the instruction uses only the r/m (register or memory)
88 /// operand. The reg field contains the digit that provides an extension to
89 /// the instruction's opcode."
90 Digit(u8),
91
92 /// Models `/r`.
93 ///
94 /// From the reference manual: "indicates that the ModR/M byte of the
95 /// instruction contains a register operand and an r/m operand."
96 Reg,
97}
98
99impl ModRmKind {
100 /// Return the digit extending the opcode, if available.
101 #[must_use]
102 pub fn digit(&self) -> Option<u8> {
103 match self {
104 Self::Digit(digit) => Some(*digit),
105 _ => None,
106 }
107 }
108
109 /// Return the digit extending the opcode.
110 ///
111 /// # Panics
112 ///
113 /// Panics if not extension was defined.
114 pub fn unwrap_digit(&self) -> u8 {
115 self.digit().expect("expected an extension digit")
116 }
117}
118
119impl fmt::Display for ModRmKind {
120 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
121 match self {
122 ModRmKind::Digit(digit) => write!(f, "/{digit}"),
123 ModRmKind::Reg => write!(f, "/r"),
124 }
125 }
126}
127
128/// The traditional x64 encoding.
129///
130/// We use the "REX" name here in a slightly unorthodox way: "REX" is the name
131/// for the optional _byte_ extending the number of available registers, e.g.,
132/// but we use it here to distinguish this from other encoding formats (e.g.,
133/// VEX, EVEX). The "REX" _byte_ is still optional in this encoding and only
134/// emitted when necessary.
135pub struct Rex {
136 /// The opcodes for this instruction.
137 ///
138 /// Multi-byte opcodes are handled by passing an array of opcodes (including
139 /// prefixes like `0x66` and escape bytes like `0x0f`) to the constructor.
140 /// E.g., `66 0F 54` (`ANDPD`) is expressed as follows:
141 ///
142 /// ```
143 /// # use cranelift_assembler_x64_meta::dsl::rex;
144 /// let enc = rex([0x66, 0x0f, 0x54]);
145 /// ```
146 pub opcodes: Opcodes,
147 /// Indicates setting the REX.W bit.
148 ///
149 /// From the reference manual: "Indicates the use of a REX prefix that
150 /// affects operand size or instruction semantics. The ordering of the REX
151 /// prefix and other optional/mandatory instruction prefixes are discussed
152 /// in chapter 2. Note that REX prefixes that promote legacy instructions to
153 /// 64-bit behavior are not listed explicitly in the opcode column."
154 pub w: bool,
155 /// Indicates modifications to the ModR/M byte.
156 pub modrm: Option<ModRmKind>,
157 /// The number of bits used as an immediate operand to the instruction.
158 pub imm: Imm,
159 /// Used for `+rb`, `+rw`, `+rd`, and `+ro` instructions, which encode `reg`
160 /// bits in the opcode byte; if `Some`, this contains the expected bit width
161 /// of `reg`.
162 ///
163 /// From the reference manual: "[...] the lower 3 bits of the opcode byte is
164 /// used to encode the register operand without a modR/M byte. The
165 /// instruction lists the corresponding hexadecimal value of the opcode byte
166 /// with low 3 bits as 000b. In non-64-bit mode, a register code, from 0
167 /// through 7, is added to the hexadecimal value of the opcode byte. In
168 /// 64-bit mode, indicates the four bit field of REX.b and opcode[2:0] field
169 /// encodes the register operand of the instruction. “+ro” is applicable
170 /// only in 64-bit mode."
171 pub opcode_mod: Option<OpcodeMod>,
172}
173
174impl Rex {
175 /// Set the `REX.W` bit.
176 #[must_use]
177 pub fn w(self) -> Self {
178 Self { w: true, ..self }
179 }
180
181 /// Set the ModR/M byte to contain a register operand and an r/m operand;
182 /// equivalent to `/r` in the reference manual.
183 #[must_use]
184 pub fn r(self) -> Self {
185 Self {
186 modrm: Some(ModRmKind::Reg),
187 ..self
188 }
189 }
190
191 /// Set the digit extending the opcode; equivalent to `/<digit>` in the
192 /// reference manual.
193 ///
194 /// # Panics
195 ///
196 /// Panics if `extension` is too large.
197 #[must_use]
198 pub fn digit(self, extension: u8) -> Self {
199 assert!(extension <= 0b111, "must fit in 3 bits");
200 Self {
201 modrm: Some(ModRmKind::Digit(extension)),
202 ..self
203 }
204 }
205
206 /// Retrieve the digit extending the opcode, if available.
207 #[must_use]
208 pub fn unwrap_digit(&self) -> Option<u8> {
209 match self.modrm {
210 Some(ModRmKind::Digit(digit)) => Some(digit),
211 _ => None,
212 }
213 }
214
215 /// Append a byte-sized immediate operand (8-bit); equivalent to `ib` in the
216 /// reference manual.
217 ///
218 /// # Panics
219 ///
220 /// Panics if an immediate operand is already set.
221 #[must_use]
222 pub fn ib(self) -> Self {
223 assert_eq!(self.imm, Imm::None);
224 Self {
225 imm: Imm::ib,
226 ..self
227 }
228 }
229
230 /// Append a word-sized immediate operand (16-bit); equivalent to `iw` in
231 /// the reference manual.
232 ///
233 /// # Panics
234 ///
235 /// Panics if an immediate operand is already set.
236 #[must_use]
237 pub fn iw(self) -> Self {
238 assert_eq!(self.imm, Imm::None);
239 Self {
240 imm: Imm::iw,
241 ..self
242 }
243 }
244
245 /// Append a doubleword-sized immediate operand (32-bit); equivalent to `id`
246 /// in the reference manual.
247 ///
248 /// # Panics
249 ///
250 /// Panics if an immediate operand is already set.
251 #[must_use]
252 pub fn id(self) -> Self {
253 assert_eq!(self.imm, Imm::None);
254 Self {
255 imm: Imm::id,
256 ..self
257 }
258 }
259
260 /// Append a quadword-sized immediate operand (64-bit); equivalent to `io`
261 /// in the reference manual.
262 ///
263 /// # Panics
264 ///
265 /// Panics if an immediate operand is already set.
266 #[must_use]
267 pub fn io(self) -> Self {
268 assert_eq!(self.imm, Imm::None);
269 Self {
270 imm: Imm::io,
271 ..self
272 }
273 }
274
275 /// Modify the opcode byte with bits from an 8-bit `reg`; equivalent to
276 /// `+rb` in the reference manual.
277 #[must_use]
278 pub fn rb(self) -> Self {
279 Self {
280 opcode_mod: Some(OpcodeMod::rb),
281 ..self
282 }
283 }
284
285 /// Modify the opcode byte with bits from a 16-bit `reg`; equivalent to
286 /// `+rw` in the reference manual.
287 #[must_use]
288 pub fn rw(self) -> Self {
289 Self {
290 opcode_mod: Some(OpcodeMod::rw),
291 ..self
292 }
293 }
294
295 /// Modify the opcode byte with bits from a 32-bit `reg`; equivalent to
296 /// `+rd` in the reference manual.
297 #[must_use]
298 pub fn rd(self) -> Self {
299 Self {
300 opcode_mod: Some(OpcodeMod::rd),
301 ..self
302 }
303 }
304
305 /// Modify the opcode byte with bits from a 64-bit `reg`; equivalent to
306 /// `+ro` in the reference manual.
307 #[must_use]
308 pub fn ro(self) -> Self {
309 Self {
310 opcode_mod: Some(OpcodeMod::ro),
311 ..self
312 }
313 }
314
315 /// Check a subset of the rules for valid encodings outlined in chapter 2,
316 /// _Instruction Format_, of the Intel® 64 and IA-32 Architectures Software
317 /// Developer’s Manual, Volume 2A.
318 fn validate(&self, operands: &[Operand]) {
319 if let Some(OperandKind::Imm(op)) = operands
320 .iter()
321 .map(|o| o.location.kind())
322 .find(|k| matches!(k, OperandKind::Imm(_)))
323 {
324 assert_eq!(
325 op.bits(),
326 self.imm.bits(),
327 "for an immediate, the encoding width must match the declared operand width"
328 );
329 }
330
331 if let Some(opcode_mod) = &self.opcode_mod {
332 assert!(
333 self.opcodes.primary & 0b111 == 0,
334 "the lower three bits of the opcode byte should be 0"
335 );
336 assert!(
337 operands
338 .iter()
339 .all(|o| o.location.bits() == opcode_mod.bits().into()),
340 "the opcode modifier width must match the operand widths"
341 );
342 }
343 }
344}
345
346impl From<Rex> for Encoding {
347 fn from(rex: Rex) -> Encoding {
348 Encoding::Rex(rex)
349 }
350}
351
352impl fmt::Display for Rex {
353 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
354 if let Some(group1) = &self.opcodes.prefixes.group1 {
355 write!(f, "{group1} + ")?;
356 }
357 if let Some(group2) = &self.opcodes.prefixes.group2 {
358 write!(f, "{group2} + ")?;
359 }
360 if let Some(group3) = &self.opcodes.prefixes.group3 {
361 write!(f, "{group3} + ")?;
362 }
363 if let Some(group4) = &self.opcodes.prefixes.group4 {
364 write!(f, "{group4} + ")?;
365 }
366 if self.w {
367 write!(f, "REX.W + ")?;
368 }
369 if self.opcodes.escape {
370 write!(f, "0x0F + ")?;
371 }
372 write!(f, "{:#04X}", self.opcodes.primary)?;
373 if let Some(secondary) = self.opcodes.secondary {
374 write!(f, " {secondary:#04X}")?;
375 }
376 if let Some(modrm) = self.modrm {
377 write!(f, " {modrm}")?;
378 }
379 if let Some(opcode_mod) = &self.opcode_mod {
380 write!(f, " {opcode_mod}")?;
381 }
382 if self.imm != Imm::None {
383 write!(f, " {}", self.imm)?;
384 }
385 Ok(())
386 }
387}
388
389/// Describe an instruction's opcodes. From section 2.1.2 "Opcodes" in the
390/// reference manual:
391///
392/// > A primary opcode can be 1, 2, or 3 bytes in length. An additional 3-bit
393/// > opcode field is sometimes encoded in the ModR/M byte. Smaller fields can
394/// > be defined within the primary opcode. Such fields define the direction of
395/// > operation, size of displacements, register encoding, condition codes, or
396/// > sign extension. Encoding fields used by an opcode vary depending on the
397/// > class of operation.
398/// >
399/// > Two-byte opcode formats for general-purpose and SIMD instructions consist
400/// > of one of the following:
401/// > - An escape opcode byte `0FH` as the primary opcode and a second opcode
402/// > byte.
403/// > - A mandatory prefix (`66H`, `F2H`, or `F3H`), an escape opcode byte, and
404/// > a second opcode byte (same as previous bullet).
405/// >
406/// > For example, `CVTDQ2PD` consists of the following sequence: `F3 0F E6`.
407/// > The first byte is a mandatory prefix (it is not considered as a repeat
408/// > prefix).
409/// >
410/// > Three-byte opcode formats for general-purpose and SIMD instructions
411/// > consist of one of the following:
412/// > - An escape opcode byte `0FH` as the primary opcode, plus two additional
413/// > opcode bytes.
414/// > - A mandatory prefix (`66H`, `F2H`, or `F3H`), an escape opcode byte, plus
415/// > two additional opcode bytes (same as previous bullet).
416/// >
417/// > For example, `PHADDW` for XMM registers consists of the following
418/// > sequence: `66 0F 38 01`. The first byte is the mandatory prefix.
419pub struct Opcodes {
420 /// The prefix bytes for this instruction.
421 pub prefixes: Prefixes,
422 /// Indicates the use of an escape opcode byte, `0x0f`.
423 pub escape: bool,
424 /// The primary opcode.
425 pub primary: u8,
426 /// Some instructions (e.g., SIMD) may have a secondary opcode.
427 pub secondary: Option<u8>,
428}
429
430impl Opcodes {
431 /// Return the main opcode for this instruction.
432 ///
433 /// Note that [`Rex`]-encoded instructions have a complex opcode scheme (see
434 /// [`Opcodes`] documentation); the opcode one is usually looking for is the
435 /// last one. This returns the last opcode: the secondary opcode if one is
436 /// available and the primary otherwise.
437 fn opcode(&self) -> u8 {
438 if let Some(secondary) = self.secondary {
439 secondary
440 } else {
441 self.primary
442 }
443 }
444}
445
446impl From<u8> for Opcodes {
447 fn from(primary: u8) -> Opcodes {
448 Opcodes {
449 prefixes: Prefixes::default(),
450 escape: false,
451 primary,
452 secondary: None,
453 }
454 }
455}
456
457impl<const N: usize> From<[u8; N]> for Opcodes {
458 fn from(bytes: [u8; N]) -> Self {
459 let (prefixes, remaining) = Prefixes::parse(&bytes);
460 let (escape, primary, secondary) = match remaining {
461 [primary] => (false, *primary, None),
462 [0x0f, primary] => (true, *primary, None),
463 [0x0f, primary, secondary] => (true, *primary, Some(*secondary)),
464 _ => panic!(
465 "invalid opcodes after prefix; expected [opcode], [0x0f, opcode], or [0x0f, opcode, opcode], found {remaining:x?}"
466 ),
467 };
468 Self {
469 prefixes,
470 escape,
471 primary,
472 secondary,
473 }
474 }
475}
476
477/// The allowed prefixes for an instruction. From the reference manual (section
478/// 2.1.1):
479///
480/// > Instruction prefixes are divided into four groups, each with a set of
481/// > allowable prefix codes. For each instruction, it is only useful to include
482/// > up to one prefix code from each of the four groups (Groups 1, 2, 3, 4).
483/// > Groups 1 through 4 may be placed in any order relative to each other.
484#[derive(Default)]
485pub struct Prefixes {
486 pub group1: Option<Group1Prefix>,
487 pub group2: Option<Group2Prefix>,
488 pub group3: Option<Group3Prefix>,
489 pub group4: Option<Group4Prefix>,
490}
491
492impl Prefixes {
493 /// Parse a slice of `bytes` into a set of prefixes, returning both the
494 /// configured [`Prefixes`] as well as any remaining bytes.
495 fn parse(mut bytes: &[u8]) -> (Self, &[u8]) {
496 let mut prefixes = Self::default();
497 while !bytes.is_empty() && prefixes.try_assign(bytes[0]).is_ok() {
498 bytes = &bytes[1..];
499 }
500 (prefixes, bytes)
501 }
502
503 /// Attempt to parse a `byte` as a prefix and, if successful, assigns it to
504 /// the correct prefix group.
505 ///
506 /// # Panics
507 ///
508 /// This function panics if the prefix for a group is already set; this
509 /// disallows specifying multiple prefixes per group.
510 fn try_assign(&mut self, byte: u8) -> Result<(), ()> {
511 if let Ok(p) = Group1Prefix::try_from(byte) {
512 assert!(self.group1.is_none());
513 self.group1 = Some(p);
514 Ok(())
515 } else if let Ok(p) = Group2Prefix::try_from(byte) {
516 assert!(self.group2.is_none());
517 self.group2 = Some(p);
518 Ok(())
519 } else if let Ok(p) = Group3Prefix::try_from(byte) {
520 assert!(self.group3.is_none());
521 self.group3 = Some(p);
522 Ok(())
523 } else if let Ok(p) = Group4Prefix::try_from(byte) {
524 assert!(self.group4.is_none());
525 self.group4 = Some(p);
526 Ok(())
527 } else {
528 Err(())
529 }
530 }
531
532 /// Check if any prefix is present.
533 pub fn is_empty(&self) -> bool {
534 self.group1.is_none()
535 && self.group2.is_none()
536 && self.group3.is_none()
537 && self.group4.is_none()
538 }
539}
540
541pub enum Group1Prefix {
542 /// The LOCK prefix (`0xf0`). From the reference manual:
543 ///
544 /// > The LOCK prefix (F0H) forces an operation that ensures exclusive use
545 /// > of shared memory in a multiprocessor environment. See "LOCK—Assert
546 /// > LOCK# Signal Prefix" in Chapter 3, Instruction Set Reference, A-L, for
547 /// > a description of this prefix.
548 Lock,
549 /// A REPNE/REPNZ prefix (`0xf2`) or a BND prefix under certain conditions.
550 /// `REP*` prefixes apply only to string and input/output instructions but
551 /// can be used as mandatory prefixes in other kinds of instructions (e.g.,
552 /// SIMD) From the reference manual:
553 ///
554 /// > Repeat prefixes (F2H, F3H) cause an instruction to be repeated for
555 /// > each element of a string. Use these prefixes only with string and I/O
556 /// > instructions (MOVS, CMPS, SCAS, LODS, STOS, INS, and OUTS). Use of
557 /// > repeat prefixes and/or undefined opcodes with other Intel 64 or IA-32
558 /// > instructions is reserved; such use may cause unpredictable behavior.
559 /// >
560 /// > Some instructions may use F2H, F3H as a mandatory prefix to express
561 /// > distinct functionality.
562 REPNorBND,
563 /// A REPE/REPZ prefix (`0xf3`); `REP*` prefixes apply only to string and
564 /// input/output instructions but can be used as mandatory prefixes in other
565 /// kinds of instructions (e.g., SIMD). See `REPNorBND` for more details.
566 REP_,
567}
568
569impl TryFrom<u8> for Group1Prefix {
570 type Error = u8;
571 fn try_from(byte: u8) -> Result<Self, Self::Error> {
572 Ok(match byte {
573 0xF0 => Group1Prefix::Lock,
574 0xF2 => Group1Prefix::REPNorBND,
575 0xF3 => Group1Prefix::REP_,
576 byte => return Err(byte),
577 })
578 }
579}
580
581impl fmt::Display for Group1Prefix {
582 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
583 match self {
584 Group1Prefix::Lock => write!(f, "0xF0"),
585 Group1Prefix::REPNorBND => write!(f, "0xF2"),
586 Group1Prefix::REP_ => write!(f, "0xF3"),
587 }
588 }
589}
590
591/// Contains the segment override prefixes or a (deprecated) branch hint when
592/// used on a `Jcc` instruction. Note that using the segment override prefixes
593/// on a branch instruction is reserved. See section 2.1.1, "Instruction
594/// Prefixes," in the reference manual.
595pub enum Group2Prefix {
596 /// The CS segment override prefix (`0x2e`); also the "branch not taken"
597 /// hint.
598 CSorBNT,
599 /// The SS segment override prefix (`0x36`).
600 SS,
601 /// The DS segment override prefix (`0x3e`); also the "branch taken" hint.
602 DSorBT,
603 /// The ES segment override prefix (`0x26`).
604 ES,
605 /// The FS segment override prefix (`0x64`).
606 FS,
607 /// The GS segment override prefix (`0x65`).
608 GS,
609}
610
611impl TryFrom<u8> for Group2Prefix {
612 type Error = u8;
613 fn try_from(byte: u8) -> Result<Self, Self::Error> {
614 Ok(match byte {
615 0x2E => Group2Prefix::CSorBNT,
616 0x36 => Group2Prefix::SS,
617 0x3E => Group2Prefix::DSorBT,
618 0x26 => Group2Prefix::ES,
619 0x64 => Group2Prefix::FS,
620 0x65 => Group2Prefix::GS,
621 byte => return Err(byte),
622 })
623 }
624}
625
626impl fmt::Display for Group2Prefix {
627 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
628 match self {
629 Group2Prefix::CSorBNT => write!(f, "0x2E"),
630 Group2Prefix::SS => write!(f, "0x36"),
631 Group2Prefix::DSorBT => write!(f, "0x3E"),
632 Group2Prefix::ES => write!(f, "0x26"),
633 Group2Prefix::FS => write!(f, "0x64"),
634 Group2Prefix::GS => write!(f, "0x65"),
635 }
636 }
637}
638
639/// Contains the operand-size override prefix (`0x66`); also used as a SIMD
640/// prefix. From the reference manual:
641///
642/// > The operand-size override prefix allows a program to switch between 16-
643/// > and 32-bit operand sizes. Either size can be the default; use of the
644/// > prefix selects the non-default size. Some SSE2/SSE3/SSSE3/SSE4
645/// > instructions and instructions using a three-byte sequence of primary
646/// > opcode bytes may use 66H as a mandatory prefix to express distinct
647/// > functionality.
648pub enum Group3Prefix {
649 OperandSizeOverride,
650}
651
652impl TryFrom<u8> for Group3Prefix {
653 type Error = u8;
654 fn try_from(byte: u8) -> Result<Self, Self::Error> {
655 Ok(match byte {
656 0x66 => Group3Prefix::OperandSizeOverride,
657 byte => return Err(byte),
658 })
659 }
660}
661
662impl fmt::Display for Group3Prefix {
663 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
664 match self {
665 Group3Prefix::OperandSizeOverride => write!(f, "0x66"),
666 }
667 }
668}
669
670/// Contains the address-size override prefix (`0x67`). From the reference
671/// manual:
672///
673/// > The address-size override prefix (67H) allows programs to switch between
674/// > 16- and 32-bit addressing. Either size can be the default; the prefix
675/// > selects the non-default size.
676pub enum Group4Prefix {
677 AddressSizeOverride,
678}
679
680impl TryFrom<u8> for Group4Prefix {
681 type Error = u8;
682 fn try_from(byte: u8) -> Result<Self, Self::Error> {
683 Ok(match byte {
684 0x67 => Group4Prefix::AddressSizeOverride,
685 byte => return Err(byte),
686 })
687 }
688}
689
690impl fmt::Display for Group4Prefix {
691 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
692 match self {
693 Group4Prefix::AddressSizeOverride => write!(f, "0x67"),
694 }
695 }
696}
697
698/// Indicate the size of an immediate operand. From the reference manual:
699///
700/// > A 1-byte (ib), 2-byte (iw), 4-byte (id) or 8-byte (io) immediate operand
701/// > to the instruction that follows the opcode, ModR/M bytes or scale-indexing
702/// > bytes. The opcode determines if the operand is a signed value. All words,
703/// > doublewords, and quadwords are given with the low-order byte first.
704#[derive(Debug, PartialEq)]
705#[allow(non_camel_case_types, reason = "makes DSL definitions easier to read")]
706pub enum Imm {
707 None,
708 ib,
709 iw,
710 id,
711 io,
712}
713
714impl Imm {
715 fn bits(&self) -> u16 {
716 match self {
717 Self::None => 0,
718 Self::ib => 8,
719 Self::iw => 16,
720 Self::id => 32,
721 Self::io => 64,
722 }
723 }
724}
725
726impl fmt::Display for Imm {
727 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
728 match self {
729 Self::None => write!(f, ""),
730 Self::ib => write!(f, "ib"),
731 Self::iw => write!(f, "iw"),
732 Self::id => write!(f, "id"),
733 Self::io => write!(f, "io"),
734 }
735 }
736}
737
738/// Indicate the size of the `reg` used when modifying the lower three bits of
739/// the opcode byte; this corresponds to the `+rb`, `+rw`, `+rd`, and `+ro`
740/// modifiers in the reference manual.
741///
742/// ```
743/// # use cranelift_assembler_x64_meta::dsl::{rex};
744/// // The `bswap` instruction extends the opcode byte:
745/// let enc = rex([0x0F, 0xC8]).rd();
746/// assert_eq!(enc.to_string(), "0x0F + 0xC8 +rd");
747/// ```
748#[derive(Clone, Copy, Debug, PartialEq)]
749#[allow(non_camel_case_types, reason = "makes DSL definitions easier to read")]
750pub enum OpcodeMod {
751 rb,
752 rw,
753 rd,
754 ro,
755}
756
757impl OpcodeMod {
758 fn bits(&self) -> u8 {
759 match self {
760 Self::rb => 8,
761 Self::rw => 16,
762 Self::rd => 32,
763 Self::ro => 64,
764 }
765 }
766}
767
768impl fmt::Display for OpcodeMod {
769 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
770 match self {
771 Self::rb => write!(f, "+rb"),
772 Self::rw => write!(f, "+rw"),
773 Self::rd => write!(f, "+rd"),
774 Self::ro => write!(f, "+ro"),
775 }
776 }
777}
778
779/// Contains the legacy prefixes allowed for VEX-encoded instructions.
780///
781/// VEX encodes a subset of [`Group1Prefix`] and `0x66` (see [`Group3Prefix`])
782/// as part of the `pp` bit field.
783#[derive(Clone, Copy, PartialEq)]
784pub enum VexPrefix {
785 _66,
786 _F2,
787 _F3,
788}
789
790impl VexPrefix {
791 /// Encode the `pp` bits.
792 #[inline(always)]
793 pub(crate) fn bits(self) -> u8 {
794 match self {
795 Self::_66 => 0b01,
796 Self::_F3 => 0b10,
797 Self::_F2 => 0b11,
798 }
799 }
800}
801
802impl fmt::Display for VexPrefix {
803 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
804 match self {
805 Self::_66 => write!(f, "66"),
806 Self::_F3 => write!(f, "F3"),
807 Self::_F2 => write!(f, "F2"),
808 }
809 }
810}
811
812/// Contains the escape sequences allowed for VEX-encoded instructions.
813///
814/// VEX encodes these in the `mmmmmm` bit field.
815#[derive(Clone, Copy, PartialEq)]
816pub enum VexEscape {
817 _0F,
818 _0F3A,
819 _0F38,
820}
821
822impl VexEscape {
823 /// Encode the `m-mmmm` bits.
824 #[inline(always)]
825 pub(crate) fn bits(&self) -> u8 {
826 match self {
827 Self::_0F => 0b01,
828 Self::_0F38 => 0b10,
829 Self::_0F3A => 0b11,
830 }
831 }
832}
833
834impl fmt::Display for VexEscape {
835 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
836 match self {
837 Self::_0F => write!(f, "0F"),
838 Self::_0F3A => write!(f, "0F3A"),
839 Self::_0F38 => write!(f, "0F38"),
840 }
841 }
842}
843
844/// Contains allowed VEX length definitions.
845///
846/// VEX encodes these in the `L` bit field, a single bit with `128-bit = 0` and
847/// `256-bit = 1`. For convenience, we also include the `LIG` and `LZ`
848/// syntax, used by the reference manual, and always set these to `0`.
849pub enum VexLength {
850 /// Set `VEX.L` to `0` (128-bit).
851 L128,
852 /// Set `VEX.L` to `1` (256-bit).
853 L256,
854 /// Set `VEX.L` to `0`, but not necessarily for 128-bit operation. From the
855 /// reference manual: "The VEX.L must be encoded to be 0B, an #UD occurs if
856 /// VEX.L is not zero."
857 LZ,
858 /// The `VEX.L` bit is ignored (e.g., for floating point scalar
859 /// instructions). This assembler will emit `0`.
860 LIG,
861}
862
863impl VexLength {
864 /// Encode the `L` bit.
865 pub fn bits(&self) -> u8 {
866 match self {
867 Self::L128 | Self::LIG | Self::LZ => 0b0,
868 Self::L256 => 0b1,
869 }
870 }
871}
872
873impl fmt::Display for VexLength {
874 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
875 match self {
876 Self::L128 => write!(f, "128"),
877 Self::L256 => write!(f, "256"),
878 Self::LIG => write!(f, "LIG"),
879 Self::LZ => write!(f, "LZ"),
880 }
881 }
882}
883
884/// Model the `W` bit in VEX-encoded instructions.
885pub enum VexW {
886 /// The `W` bit is ignored; equivalent to `.WIG` in the manual.
887 WIG,
888 /// The `W` bit is set to `0`; equivalent to `.W0` in the manual.
889 W0,
890 /// The `W` bit is set to `1`; equivalent to `.W1` in the manual.
891 W1,
892}
893
894impl VexW {
895 /// Return `true` if the `W` bit is ignored; this is useful to check in the
896 /// DSL for the default case.
897 fn is_ignored(&self) -> bool {
898 match self {
899 Self::WIG => true,
900 Self::W0 | Self::W1 => false,
901 }
902 }
903
904 /// Return `true` if the `W` bit is set (`W1`); otherwise, return `false`.
905 pub(crate) fn as_bool(&self) -> bool {
906 match self {
907 Self::W1 => true,
908 Self::W0 | Self::WIG => false,
909 }
910 }
911}
912
913impl fmt::Display for VexW {
914 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
915 match self {
916 Self::WIG => write!(f, "WIG"),
917 Self::W0 => write!(f, "W0"),
918 Self::W1 => write!(f, "W1"),
919 }
920 }
921}
922
923/// The VEX encoding, introduced for AVX instructions.
924///
925/// ```
926/// # use cranelift_assembler_x64_meta::dsl::{vex, VexLength::L128};
927/// // To encode a BLENDPD instruction in the manual: VEX.128.66.0F3A.WIG 0D /r ib
928/// let enc = vex(L128)._66()._0f3a().wig().op(0x0D).r().ib();
929/// assert_eq!(enc.to_string(), "VEX.128.66.0F3A.WIG 0x0D /r ib");
930/// ```
931pub struct Vex {
932 /// The length of the operand (e.g., 128-bit or 256-bit).
933 pub length: VexLength,
934 /// Map the `PP` field encodings.
935 pub pp: Option<VexPrefix>,
936 /// Map the `MMMMM` field encodings.
937 pub mmmmm: Option<VexEscape>,
938 /// The `W` bit.
939 pub w: VexW,
940 /// VEX-encoded instructions have a single-byte opcode. Other prefix-related
941 /// bytes (see [`Opcodes`]) are encoded in the VEX prefixes (see `pp`,
942 /// `mmmmmm`). From the reference manual: "One (and only one) opcode byte
943 /// follows the 2 or 3 byte VEX."
944 pub opcode: u8,
945 /// See [`Rex.modrm`](Rex.modrm).
946 pub modrm: Option<ModRmKind>,
947 /// See [`Rex.imm`](Rex.imm).
948 pub imm: Imm,
949 /// See [`Vex::is4`]
950 pub is4: bool,
951}
952
953impl Vex {
954 /// Set the `pp` field to use [`VexPrefix::_66`]; equivalent to `.66` in the
955 /// manual.
956 pub fn _66(self) -> Self {
957 assert!(self.pp.is_none());
958 Self {
959 pp: Some(VexPrefix::_66),
960 ..self
961 }
962 }
963
964 /// Set the `pp` field to use [`VexPrefix::_F2`]; equivalent to `.F2` in the
965 /// manual.
966 pub fn _f2(self) -> Self {
967 assert!(self.pp.is_none());
968 Self {
969 pp: Some(VexPrefix::_F2),
970 ..self
971 }
972 }
973
974 /// Set the `pp` field to use [`VexPrefix::_F3`]; equivalent to `.F3` in the
975 /// manual.
976 pub fn _f3(self) -> Self {
977 assert!(self.pp.is_none());
978 Self {
979 pp: Some(VexPrefix::_F3),
980 ..self
981 }
982 }
983
984 /// Set the `mmmmmm` field to use [`VexEscape::_0F`]; equivalent to `.0F` in
985 /// the manual.
986 pub fn _0f(self) -> Self {
987 assert!(self.mmmmm.is_none());
988 Self {
989 mmmmm: Some(VexEscape::_0F),
990 ..self
991 }
992 }
993
994 /// Set the `mmmmmm` field to use [`VexEscape::_0F3A`]; equivalent to
995 /// `.0F3A` in the manual.
996 pub fn _0f3a(self) -> Self {
997 assert!(self.mmmmm.is_none());
998 Self {
999 mmmmm: Some(VexEscape::_0F3A),
1000 ..self
1001 }
1002 }
1003
1004 /// Set the `mmmmmm` field to use [`VexEscape::_0F38`]; equivalent to
1005 /// `.0F38` in the manual.
1006 pub fn _0f38(self) -> Self {
1007 assert!(self.mmmmm.is_none());
1008 Self {
1009 mmmmm: Some(VexEscape::_0F38),
1010 ..self
1011 }
1012 }
1013
1014 /// Set the `W` bit to `0`; equivalent to `.W0` in the manual.
1015 pub fn w0(self) -> Self {
1016 assert!(self.w.is_ignored());
1017 Self {
1018 w: VexW::W0,
1019 ..self
1020 }
1021 }
1022
1023 /// Set the `W` bit to `1`; equivalent to `.W1` in the manual.
1024 pub fn w1(self) -> Self {
1025 assert!(self.w.is_ignored());
1026 Self {
1027 w: VexW::W1,
1028 ..self
1029 }
1030 }
1031
1032 /// Ignore the `W` bit; equivalent to `.WIG` in the manual.
1033 pub fn wig(self) -> Self {
1034 assert!(self.w.is_ignored());
1035 Self {
1036 w: VexW::WIG,
1037 ..self
1038 }
1039 }
1040
1041 /// Set the single opcode for this VEX-encoded instruction.
1042 pub fn op(self, opcode: u8) -> Self {
1043 assert_eq!(self.opcode, u8::MAX);
1044 Self { opcode, ..self }
1045 }
1046
1047 /// Set the ModR/M byte to contain a register operand; see [`Rex::r`].
1048 pub fn r(self) -> Self {
1049 assert!(self.modrm.is_none());
1050 Self {
1051 modrm: Some(ModRmKind::Reg),
1052 ..self
1053 }
1054 }
1055
1056 /// Append a byte-sized immediate operand (8-bit); equivalent to `ib` in the
1057 /// reference manual.
1058 ///
1059 /// # Panics
1060 ///
1061 /// Panics if an immediate operand is already set.
1062 #[must_use]
1063 pub fn ib(self) -> Self {
1064 assert_eq!(self.imm, Imm::None);
1065 Self {
1066 imm: Imm::ib,
1067 ..self
1068 }
1069 }
1070
1071 /// Append a word-sized immediate operand (16-bit); equivalent to `iw` in
1072 /// the reference manual.
1073 ///
1074 /// # Panics
1075 ///
1076 /// Panics if an immediate operand is already set.
1077 #[must_use]
1078 pub fn iw(self) -> Self {
1079 assert_eq!(self.imm, Imm::None);
1080 Self {
1081 imm: Imm::iw,
1082 ..self
1083 }
1084 }
1085
1086 /// Append a doubleword-sized immediate operand (32-bit); equivalent to `id`
1087 /// in the reference manual.
1088 ///
1089 /// # Panics
1090 ///
1091 /// Panics if an immediate operand is already set.
1092 #[must_use]
1093 pub fn id(self) -> Self {
1094 assert_eq!(self.imm, Imm::None);
1095 Self {
1096 imm: Imm::id,
1097 ..self
1098 }
1099 }
1100
1101 /// Append a quadword-sized immediate operand (64-bit); equivalent to `io`
1102 /// in the reference manual.
1103 ///
1104 /// # Panics
1105 ///
1106 /// Panics if an immediate operand is already set.
1107 #[must_use]
1108 pub fn io(self) -> Self {
1109 assert_eq!(self.imm, Imm::None);
1110 Self {
1111 imm: Imm::io,
1112 ..self
1113 }
1114 }
1115
1116 /// Set the digit extending the opcode; equivalent to `/<digit>` in the
1117 /// reference manual.
1118 ///
1119 /// # Panics
1120 ///
1121 /// Panics if `extension` is too large.
1122 #[must_use]
1123 pub fn digit(self, extension: u8) -> Self {
1124 assert!(extension <= 0b111, "must fit in 3 bits");
1125 Self {
1126 modrm: Some(ModRmKind::Digit(extension)),
1127 ..self
1128 }
1129 }
1130
1131 /// An 8-bit immediate byte is present containing a source register
1132 /// specifier in either imm8[7:4] (for 64-bit
1133 /// mode) or imm8[6:4] (for 32-bit mode), and instruction-specific payload
1134 /// in imm8[3:0].
1135 pub fn is4(self) -> Self {
1136 Self { is4: true, ..self }
1137 }
1138
1139 fn validate(&self, _operands: &[Operand]) {
1140 assert!(self.opcode != u8::MAX);
1141 assert!(self.mmmmm.is_some());
1142 }
1143
1144 /// Retrieve the digit extending the opcode, if available.
1145 #[must_use]
1146 pub fn unwrap_digit(&self) -> Option<u8> {
1147 match self.modrm {
1148 Some(ModRmKind::Digit(digit)) => Some(digit),
1149 _ => None,
1150 }
1151 }
1152}
1153
1154impl From<Vex> for Encoding {
1155 fn from(vex: Vex) -> Encoding {
1156 Encoding::Vex(vex)
1157 }
1158}
1159
1160impl fmt::Display for Vex {
1161 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1162 write!(f, "VEX.{}", self.length)?;
1163 if let Some(pp) = self.pp {
1164 write!(f, ".{pp}")?;
1165 }
1166 if let Some(mmmmm) = self.mmmmm {
1167 write!(f, ".{mmmmm}")?;
1168 }
1169 write!(f, ".{} {:#04X}", self.w, self.opcode)?;
1170 if let Some(modrm) = self.modrm {
1171 write!(f, " {modrm}")?;
1172 }
1173 if self.imm != Imm::None {
1174 write!(f, " {}", self.imm)?;
1175 }
1176 Ok(())
1177 }
1178}