cranelift_assembler_x64/
mem.rs

1//! Memory operands to instructions.
2
3use crate::api::{AsReg, CodeSink, Constant, KnownOffset, KnownOffsetTable, Label, TrapCode};
4use crate::gpr::{self, NonRspGpr, Size};
5use crate::rex::{Disp, RexPrefix, encode_modrm, encode_sib};
6
7/// x64 memory addressing modes.
8#[derive(Copy, Clone, Debug)]
9#[cfg_attr(any(test, feature = "fuzz"), derive(arbitrary::Arbitrary))]
10pub enum Amode<R: AsReg> {
11    ImmReg {
12        base: R,
13        simm32: AmodeOffsetPlusKnownOffset,
14        trap: Option<TrapCode>,
15    },
16    ImmRegRegShift {
17        base: R,
18        index: NonRspGpr<R>,
19        scale: Scale,
20        simm32: AmodeOffset,
21        trap: Option<TrapCode>,
22    },
23    RipRelative {
24        target: DeferredTarget,
25    },
26}
27
28impl<R: AsReg> Amode<R> {
29    /// Return the [`TrapCode`] associated with this [`Amode`], if any.
30    pub fn trap_code(&self) -> Option<TrapCode> {
31        match self {
32            Amode::ImmReg { trap, .. } | Amode::ImmRegRegShift { trap, .. } => *trap,
33            Amode::RipRelative { .. } => None,
34        }
35    }
36
37    /// Return the [`RexPrefix`] for each variant of this [`Amode`].
38    #[must_use]
39    pub(crate) fn as_rex_prefix(&self, enc_reg: u8, has_w_bit: bool, uses_8bit: bool) -> RexPrefix {
40        match self {
41            Amode::ImmReg { base, .. } => {
42                RexPrefix::two_op(enc_reg, base.enc(), has_w_bit, uses_8bit)
43            }
44            Amode::ImmRegRegShift { base, index, .. } => {
45                RexPrefix::three_op(enc_reg, index.enc(), base.enc(), has_w_bit, uses_8bit)
46            }
47            Amode::RipRelative { .. } => RexPrefix::two_op(enc_reg, 0, has_w_bit, uses_8bit),
48        }
49    }
50
51    /// Emit the ModR/M, SIB, and displacement suffixes as needed for this
52    /// `Amode`.
53    pub(crate) fn encode_rex_suffixes(
54        &self,
55        sink: &mut impl CodeSink,
56        offsets: &impl KnownOffsetTable,
57        enc_reg: u8,
58        bytes_at_end: u8,
59    ) {
60        emit_modrm_sib_disp(sink, offsets, enc_reg, self, bytes_at_end, None);
61    }
62
63    /// Return the registers for encoding the `b` and `x` bits (e.g., in a VEX
64    /// prefix).
65    ///
66    /// During encoding, the `b` bit is set by the topmost bit (the fourth bit)
67    /// of either the `reg` register or, if this is a memory address, the `base`
68    /// register. The `x` bit is set by the `index` register, when used.
69    pub(crate) fn encode_bx_regs(&self) -> (Option<u8>, Option<u8>) {
70        match self {
71            Amode::ImmReg { base, .. } => (Some(base.enc()), None),
72            Amode::ImmRegRegShift { base, index, .. } => (Some(base.enc()), Some(index.enc())),
73            Amode::RipRelative { .. } => (None, None),
74        }
75    }
76}
77
78/// A 32-bit immediate for address offsets.
79#[derive(Clone, Copy, Debug)]
80#[cfg_attr(any(test, feature = "fuzz"), derive(arbitrary::Arbitrary))]
81pub struct AmodeOffset(i32);
82
83impl AmodeOffset {
84    #[must_use]
85    pub fn new(value: i32) -> Self {
86        Self(value)
87    }
88
89    #[must_use]
90    pub fn value(self) -> i32 {
91        self.0
92    }
93}
94
95impl From<i32> for AmodeOffset {
96    fn from(value: i32) -> Self {
97        Self(value)
98    }
99}
100
101impl std::fmt::LowerHex for AmodeOffset {
102    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
103        // This rather complex implementation is necessary to match how
104        // `capstone` pretty-prints memory immediates.
105        if self.0 == 0 {
106            return Ok(());
107        }
108        if self.0 < 0 {
109            write!(f, "-")?;
110        }
111        if self.0 > 9 || self.0 < -9 {
112            write!(f, "0x")?;
113        }
114        let abs = match self.0.checked_abs() {
115            Some(i) => i,
116            None => -2_147_483_648,
117        };
118        std::fmt::LowerHex::fmt(&abs, f)
119    }
120}
121
122/// An [`AmodeOffset`] immediate with an optional known offset.
123///
124/// Cranelift does not know certain offsets until emission time. To accommodate
125/// Cranelift, this structure stores an optional [`KnownOffset`]. The following
126/// happens immediately before emission:
127/// - the [`KnownOffset`] is looked up, mapping it to an offset value
128/// - the [`Simm32`] value is added to the offset value
129#[derive(Copy, Clone, Debug)]
130pub struct AmodeOffsetPlusKnownOffset {
131    pub simm32: AmodeOffset,
132    pub offset: Option<KnownOffset>,
133}
134
135impl AmodeOffsetPlusKnownOffset {
136    /// # Panics
137    ///
138    /// Panics if the sum of the immediate and the known offset value overflows.
139    #[must_use]
140    pub fn value(&self, offsets: &impl KnownOffsetTable) -> i32 {
141        let known_offset = match self.offset {
142            Some(offset) => offsets[usize::from(offset)],
143            None => 0,
144        };
145        known_offset
146            .checked_add(self.simm32.value())
147            .expect("no wrapping")
148    }
149}
150
151impl std::fmt::LowerHex for AmodeOffsetPlusKnownOffset {
152    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
153        if let Some(offset) = self.offset {
154            write!(f, "<offset:{offset}>+")?;
155        }
156        std::fmt::LowerHex::fmt(&self.simm32, f)
157    }
158}
159
160/// For RIP-relative addressing, keep track of the [`CodeSink`]-specific target.
161#[derive(Copy, Clone, Debug)]
162#[cfg_attr(any(test, feature = "fuzz"), derive(arbitrary::Arbitrary))]
163pub enum DeferredTarget {
164    Label(Label),
165    Constant(Constant),
166}
167
168impl<R: AsReg> std::fmt::Display for Amode<R> {
169    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
170        let pointer_width = Size::Quadword;
171        match self {
172            Amode::ImmReg { simm32, base, .. } => {
173                // Note: size is always 8; the address is 64 bits,
174                // even if the addressed operand is smaller.
175                let base = base.to_string(Some(pointer_width));
176                write!(f, "{simm32:x}({base})")
177            }
178            Amode::ImmRegRegShift {
179                simm32,
180                base,
181                index,
182                scale,
183                ..
184            } => {
185                let base = base.to_string(Some(pointer_width));
186                let index = index.to_string(pointer_width);
187                let shift = scale.shift();
188                if shift > 1 {
189                    write!(f, "{simm32:x}({base}, {index}, {shift})")
190                } else {
191                    write!(f, "{simm32:x}({base}, {index})")
192                }
193            }
194            Amode::RipRelative { .. } => write!(f, "(%rip)"),
195        }
196    }
197}
198
199/// The scaling factor for the index register in certain [`Amode`]s.
200#[derive(Copy, Clone, Debug)]
201#[cfg_attr(any(test, feature = "fuzz"), derive(arbitrary::Arbitrary))]
202pub enum Scale {
203    One,
204    Two,
205    Four,
206    Eight,
207}
208
209impl Scale {
210    /// Create a new [`Scale`] from its hardware encoding.
211    ///
212    /// # Panics
213    ///
214    /// Panics if `enc` is not a valid encoding for a scale (0-3).
215    #[must_use]
216    pub fn new(enc: u8) -> Self {
217        match enc {
218            0b00 => Scale::One,
219            0b01 => Scale::Two,
220            0b10 => Scale::Four,
221            0b11 => Scale::Eight,
222            _ => panic!("invalid scale encoding: {enc}"),
223        }
224    }
225
226    /// Return the hardware encoding of this [`Scale`].
227    fn enc(&self) -> u8 {
228        match self {
229            Scale::One => 0b00,
230            Scale::Two => 0b01,
231            Scale::Four => 0b10,
232            Scale::Eight => 0b11,
233        }
234    }
235
236    /// Return how much this [`Scale`] will shift the value in the index
237    /// register of the SIB byte.
238    ///
239    /// This is useful for pretty-printing; when encoding, one usually needs
240    /// [`Scale::enc`].
241    fn shift(&self) -> u8 {
242        1 << self.enc()
243    }
244}
245
246/// A general-purpose register or memory operand.
247#[derive(Copy, Clone, Debug)]
248#[cfg_attr(any(test, feature = "fuzz"), derive(arbitrary::Arbitrary))]
249#[allow(
250    clippy::module_name_repetitions,
251    reason = "'GprMem' indicates this has GPR and memory variants"
252)]
253pub enum GprMem<R: AsReg, M: AsReg> {
254    Gpr(R),
255    Mem(Amode<M>),
256}
257
258impl<R: AsReg, M: AsReg> GprMem<R, M> {
259    /// Pretty-print the operand.
260    pub fn to_string(&self, size: Size) -> String {
261        match self {
262            GprMem::Gpr(gpr) => gpr.to_string(Some(size)),
263            GprMem::Mem(amode) => amode.to_string(),
264        }
265    }
266
267    /// Return the [`RexPrefix`] for each variant of this [`GprMem`].
268    #[must_use]
269    pub(crate) fn as_rex_prefix(&self, enc_reg: u8, has_w_bit: bool, uses_8bit: bool) -> RexPrefix {
270        match self {
271            GprMem::Gpr(rm) => RexPrefix::two_op(enc_reg, rm.enc(), has_w_bit, uses_8bit),
272            GprMem::Mem(amode) => amode.as_rex_prefix(enc_reg, has_w_bit, uses_8bit),
273        }
274    }
275
276    /// Emit the ModR/M, SIB, and displacement suffixes for this [`GprMem`].
277    pub(crate) fn encode_rex_suffixes(
278        &self,
279        sink: &mut impl CodeSink,
280        offsets: &impl KnownOffsetTable,
281        enc_reg: u8,
282        bytes_at_end: u8,
283    ) {
284        match self {
285            GprMem::Gpr(gpr) => {
286                sink.put1(encode_modrm(0b11, enc_reg & 0b111, gpr.enc() & 0b111));
287            }
288            GprMem::Mem(amode) => {
289                amode.encode_rex_suffixes(sink, offsets, enc_reg, bytes_at_end);
290            }
291        }
292    }
293
294    /// Same as `XmmMem::encode_bx_regs`, but for `GprMem`.
295    pub(crate) fn encode_bx_regs(&self) -> (Option<u8>, Option<u8>) {
296        match self {
297            GprMem::Gpr(reg) => (Some(reg.enc()), None),
298            GprMem::Mem(amode) => amode.encode_bx_regs(),
299        }
300    }
301}
302
303impl<R: AsReg, M: AsReg> From<R> for GprMem<R, M> {
304    fn from(reg: R) -> GprMem<R, M> {
305        GprMem::Gpr(reg)
306    }
307}
308
309impl<R: AsReg, M: AsReg> From<Amode<M>> for GprMem<R, M> {
310    fn from(amode: Amode<M>) -> GprMem<R, M> {
311        GprMem::Mem(amode)
312    }
313}
314
315/// An XMM register or memory operand.
316#[derive(Copy, Clone, Debug)]
317#[cfg_attr(any(test, feature = "fuzz"), derive(arbitrary::Arbitrary))]
318#[allow(
319    clippy::module_name_repetitions,
320    reason = "'XmmMem' indicates this has Xmm and memory variants"
321)]
322pub enum XmmMem<R: AsReg, M: AsReg> {
323    Xmm(R),
324    Mem(Amode<M>),
325}
326
327impl<R: AsReg, M: AsReg> XmmMem<R, M> {
328    /// Pretty-print the operand.
329    pub fn to_string(&self) -> String {
330        match self {
331            XmmMem::Xmm(xmm) => xmm.to_string(None),
332            XmmMem::Mem(amode) => amode.to_string(),
333        }
334    }
335
336    /// Return the [`RexPrefix`] for each variant of this [`XmmMem`].
337    #[must_use]
338    pub(crate) fn as_rex_prefix(&self, enc_reg: u8, has_w_bit: bool, uses_8bit: bool) -> RexPrefix {
339        match self {
340            XmmMem::Xmm(rm) => RexPrefix::two_op(enc_reg, rm.enc(), has_w_bit, uses_8bit),
341            XmmMem::Mem(amode) => amode.as_rex_prefix(enc_reg, has_w_bit, uses_8bit),
342        }
343    }
344
345    /// Emit the ModR/M, SIB, and displacement suffixes for this [`XmmMem`].
346    pub(crate) fn encode_rex_suffixes(
347        &self,
348        sink: &mut impl CodeSink,
349        offsets: &impl KnownOffsetTable,
350        enc_reg: u8,
351        bytes_at_end: u8,
352    ) {
353        match self {
354            XmmMem::Xmm(xmm) => {
355                sink.put1(encode_modrm(0b11, enc_reg & 0b111, xmm.enc() & 0b111));
356            }
357            XmmMem::Mem(amode) => {
358                amode.encode_rex_suffixes(sink, offsets, enc_reg, bytes_at_end);
359            }
360        }
361    }
362
363    /// Return the registers for encoding the `b` and `x` bits (e.g., in a VEX
364    /// prefix).
365    ///
366    /// During encoding, the `b` bit is set by the topmost bit (the fourth bit)
367    /// of either the `reg` register or, if this is a memory address, the `base`
368    /// register. The `x` bit is set by the `index` register, when used.
369    pub(crate) fn encode_bx_regs(&self) -> (Option<u8>, Option<u8>) {
370        match self {
371            XmmMem::Xmm(reg) => (Some(reg.enc()), None),
372            XmmMem::Mem(amode) => amode.encode_bx_regs(),
373        }
374    }
375}
376
377impl<R: AsReg, M: AsReg> From<R> for XmmMem<R, M> {
378    fn from(reg: R) -> XmmMem<R, M> {
379        XmmMem::Xmm(reg)
380    }
381}
382
383impl<R: AsReg, M: AsReg> From<Amode<M>> for XmmMem<R, M> {
384    fn from(amode: Amode<M>) -> XmmMem<R, M> {
385        XmmMem::Mem(amode)
386    }
387}
388
389/// Emit the ModRM/SIB/displacement sequence for a memory operand.
390pub fn emit_modrm_sib_disp<R: AsReg>(
391    sink: &mut impl CodeSink,
392    offsets: &impl KnownOffsetTable,
393    enc_g: u8,
394    mem_e: &Amode<R>,
395    bytes_at_end: u8,
396    evex_scaling: Option<i8>,
397) {
398    match *mem_e {
399        Amode::ImmReg { simm32, base, .. } => {
400            let enc_e = base.enc();
401            let mut imm = Disp::new(simm32.value(offsets), evex_scaling);
402
403            // Most base registers allow for a single ModRM byte plus an
404            // optional immediate. If rsp is the base register, however, then a
405            // SIB byte must be used.
406            let enc_e_low3 = enc_e & 7;
407            if enc_e_low3 == gpr::enc::RSP {
408                // Displacement from RSP is encoded with a SIB byte where
409                // the index and base are both encoded as RSP's encoding of
410                // 0b100. This special encoding means that the index register
411                // isn't used and the base is 0b100 with or without a
412                // REX-encoded 4th bit (e.g. rsp or r12)
413                sink.put1(encode_modrm(imm.m0d(), enc_g & 7, 0b100));
414                sink.put1(0b00_100_100);
415                imm.emit(sink);
416            } else {
417                // If the base register is rbp and there's no offset then force
418                // a 1-byte zero offset since otherwise the encoding would be
419                // invalid.
420                if enc_e_low3 == gpr::enc::RBP {
421                    imm.force_immediate();
422                }
423                sink.put1(encode_modrm(imm.m0d(), enc_g & 7, enc_e & 7));
424                imm.emit(sink);
425            }
426        }
427
428        Amode::ImmRegRegShift {
429            simm32,
430            base,
431            index,
432            scale,
433            ..
434        } => {
435            let enc_base = base.enc();
436            let enc_index = index.enc();
437
438            // Encoding of ModRM/SIB bytes don't allow the index register to
439            // ever be rsp. Note, though, that the encoding of r12, whose three
440            // lower bits match the encoding of rsp, is explicitly allowed with
441            // REX bytes so only rsp is disallowed.
442            assert!(enc_index != gpr::enc::RSP);
443
444            // If the offset is zero then there is no immediate. Note, though,
445            // that if the base register's lower three bits are `101` then an
446            // offset must be present. This is a special case in the encoding of
447            // the SIB byte and requires an explicit displacement with rbp/r13.
448            let mut imm = Disp::new(simm32.value(), evex_scaling);
449            if enc_base & 7 == gpr::enc::RBP {
450                imm.force_immediate();
451            }
452
453            // With the above determined encode the ModRM byte, then the SIB
454            // byte, then any immediate as necessary.
455            sink.put1(encode_modrm(imm.m0d(), enc_g & 7, 0b100));
456            sink.put1(encode_sib(scale.enc(), enc_index & 7, enc_base & 7));
457            imm.emit(sink);
458        }
459
460        Amode::RipRelative { target } => {
461            // RIP-relative is mod=00, rm=101.
462            sink.put1(encode_modrm(0b00, enc_g & 7, 0b101));
463
464            let offset = sink.current_offset();
465            let target = match target {
466                DeferredTarget::Label(label) => label,
467                DeferredTarget::Constant(constant) => sink.get_label_for_constant(constant),
468            };
469            sink.use_label_at_offset(offset, target);
470
471            // N.B.: some instructions (XmmRmRImm format for example)
472            // have bytes *after* the RIP-relative offset. The
473            // addressed location is relative to the end of the
474            // instruction, but the relocation is nominally relative
475            // to the end of the u32 field. So, to compensate for
476            // this, we emit a negative extra offset in the u32 field
477            // initially, and the relocation will add to it.
478            sink.put4(-(i32::from(bytes_at_end)) as u32);
479        }
480    }
481}