cranelift_assembler_x64/
mem.rs

1//! Memory operands to instructions.
2
3use crate::api::{AsReg, CodeSink, Constant, KnownOffset, Label, TrapCode};
4use crate::gpr::{self, NonRspGpr, Size};
5use crate::rex::{Disp, RexPrefix, encode_modrm, encode_sib};
6
7/// x64 memory addressing modes.
8#[derive(Copy, Clone, Debug, PartialEq)]
9#[cfg_attr(any(test, feature = "fuzz"), derive(arbitrary::Arbitrary))]
10pub enum Amode<R: AsReg> {
11    ImmReg {
12        base: R,
13        simm32: AmodeOffsetPlusKnownOffset,
14        trap: Option<TrapCode>,
15    },
16    ImmRegRegShift {
17        base: R,
18        index: NonRspGpr<R>,
19        scale: Scale,
20        simm32: AmodeOffset,
21        trap: Option<TrapCode>,
22    },
23    RipRelative {
24        target: DeferredTarget,
25    },
26}
27
28impl<R: AsReg> Amode<R> {
29    /// Return the [`TrapCode`] associated with this [`Amode`], if any.
30    pub fn trap_code(&self) -> Option<TrapCode> {
31        match self {
32            Amode::ImmReg { trap, .. } | Amode::ImmRegRegShift { trap, .. } => *trap,
33            Amode::RipRelative { .. } => None,
34        }
35    }
36
37    /// Return the [`RexPrefix`] for each variant of this [`Amode`].
38    #[must_use]
39    pub(crate) fn as_rex_prefix(&self, enc_reg: u8, has_w_bit: bool, uses_8bit: bool) -> RexPrefix {
40        match self {
41            Amode::ImmReg { base, .. } => {
42                RexPrefix::mem_op(enc_reg, base.enc(), has_w_bit, uses_8bit)
43            }
44            Amode::ImmRegRegShift { base, index, .. } => {
45                RexPrefix::three_op(enc_reg, index.enc(), base.enc(), has_w_bit, uses_8bit)
46            }
47            Amode::RipRelative { .. } => RexPrefix::two_op(enc_reg, 0, has_w_bit, uses_8bit),
48        }
49    }
50
51    /// Emit the ModR/M, SIB, and displacement suffixes as needed for this
52    /// `Amode`.
53    pub(crate) fn encode_rex_suffixes(
54        &self,
55        sink: &mut impl CodeSink,
56        enc_reg: u8,
57        bytes_at_end: u8,
58    ) {
59        emit_modrm_sib_disp(sink, enc_reg, self, bytes_at_end, None);
60    }
61
62    /// Return the registers for encoding the `b` and `x` bits (e.g., in a VEX
63    /// prefix).
64    ///
65    /// During encoding, the `b` bit is set by the topmost bit (the fourth bit)
66    /// of either the `reg` register or, if this is a memory address, the `base`
67    /// register. The `x` bit is set by the `index` register, when used.
68    pub(crate) fn encode_bx_regs(&self) -> (Option<u8>, Option<u8>) {
69        match self {
70            Amode::ImmReg { base, .. } => (Some(base.enc()), None),
71            Amode::ImmRegRegShift { base, index, .. } => (Some(base.enc()), Some(index.enc())),
72            Amode::RipRelative { .. } => (None, None),
73        }
74    }
75}
76
77/// A 32-bit immediate for address offsets.
78#[derive(Clone, Copy, Debug, PartialEq)]
79#[cfg_attr(any(test, feature = "fuzz"), derive(arbitrary::Arbitrary))]
80pub struct AmodeOffset(i32);
81
82impl AmodeOffset {
83    pub const ZERO: AmodeOffset = AmodeOffset::new(0);
84
85    #[must_use]
86    pub const fn new(value: i32) -> Self {
87        Self(value)
88    }
89
90    #[must_use]
91    pub fn value(self) -> i32 {
92        self.0
93    }
94}
95
96impl From<i32> for AmodeOffset {
97    fn from(value: i32) -> Self {
98        Self(value)
99    }
100}
101
102impl std::fmt::LowerHex for AmodeOffset {
103    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
104        // This rather complex implementation is necessary to match how
105        // `capstone` pretty-prints memory immediates.
106        if self.0 == 0 {
107            return Ok(());
108        }
109        if self.0 < 0 {
110            write!(f, "-")?;
111        }
112        if self.0 > 9 || self.0 < -9 {
113            write!(f, "0x")?;
114        }
115        let abs = match self.0.checked_abs() {
116            Some(i) => i,
117            None => -2_147_483_648,
118        };
119        std::fmt::LowerHex::fmt(&abs, f)
120    }
121}
122
123/// An [`AmodeOffset`] immediate with an optional known offset.
124///
125/// Cranelift does not know certain offsets until emission time. To accommodate
126/// Cranelift, this structure stores an optional [`KnownOffset`]. The following
127/// happens immediately before emission:
128/// - the [`KnownOffset`] is looked up, mapping it to an offset value
129/// - the [`Simm32`] value is added to the offset value
130#[derive(Copy, Clone, Debug, PartialEq)]
131pub struct AmodeOffsetPlusKnownOffset {
132    pub simm32: AmodeOffset,
133    pub offset: Option<KnownOffset>,
134}
135
136impl AmodeOffsetPlusKnownOffset {
137    pub const ZERO: AmodeOffsetPlusKnownOffset = AmodeOffsetPlusKnownOffset {
138        simm32: AmodeOffset::ZERO,
139        offset: None,
140    };
141
142    /// # Panics
143    ///
144    /// Panics if the sum of the immediate and the known offset value overflows.
145    #[must_use]
146    pub fn value(&self, sink: &impl CodeSink) -> i32 {
147        let known_offset = match self.offset {
148            Some(offset) => sink.known_offset(offset),
149            None => 0,
150        };
151        known_offset
152            .checked_add(self.simm32.value())
153            .expect("no wrapping")
154    }
155}
156
157impl std::fmt::LowerHex for AmodeOffsetPlusKnownOffset {
158    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
159        if let Some(offset) = self.offset {
160            write!(f, "<offset:{offset}>+")?;
161        }
162        std::fmt::LowerHex::fmt(&self.simm32, f)
163    }
164}
165
166/// For RIP-relative addressing, keep track of the [`CodeSink`]-specific target.
167#[derive(Copy, Clone, Debug, PartialEq)]
168#[cfg_attr(any(test, feature = "fuzz"), derive(arbitrary::Arbitrary))]
169pub enum DeferredTarget {
170    Label(Label),
171    Constant(Constant),
172    None,
173}
174
175impl<R: AsReg> std::fmt::Display for Amode<R> {
176    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
177        let pointer_width = Size::Quadword;
178        match self {
179            Amode::ImmReg { simm32, base, .. } => {
180                // Note: size is always 8; the address is 64 bits,
181                // even if the addressed operand is smaller.
182                let base = base.to_string(Some(pointer_width));
183                write!(f, "{simm32:x}({base})")
184            }
185            Amode::ImmRegRegShift {
186                simm32,
187                base,
188                index,
189                scale,
190                ..
191            } => {
192                let base = base.to_string(Some(pointer_width));
193                let index = index.to_string(pointer_width);
194                let shift = scale.shift();
195                if shift > 1 {
196                    write!(f, "{simm32:x}({base}, {index}, {shift})")
197                } else {
198                    write!(f, "{simm32:x}({base}, {index})")
199                }
200            }
201            Amode::RipRelative { .. } => write!(f, "(%rip)"),
202        }
203    }
204}
205
206/// The scaling factor for the index register in certain [`Amode`]s.
207#[derive(Copy, Clone, Debug, PartialEq)]
208#[cfg_attr(any(test, feature = "fuzz"), derive(arbitrary::Arbitrary))]
209pub enum Scale {
210    One,
211    Two,
212    Four,
213    Eight,
214}
215
216impl Scale {
217    /// Create a new [`Scale`] from its hardware encoding.
218    ///
219    /// # Panics
220    ///
221    /// Panics if `enc` is not a valid encoding for a scale (0-3).
222    #[must_use]
223    pub fn new(enc: u8) -> Self {
224        match enc {
225            0b00 => Scale::One,
226            0b01 => Scale::Two,
227            0b10 => Scale::Four,
228            0b11 => Scale::Eight,
229            _ => panic!("invalid scale encoding: {enc}"),
230        }
231    }
232
233    /// Return the hardware encoding of this [`Scale`].
234    fn enc(&self) -> u8 {
235        match self {
236            Scale::One => 0b00,
237            Scale::Two => 0b01,
238            Scale::Four => 0b10,
239            Scale::Eight => 0b11,
240        }
241    }
242
243    /// Return how much this [`Scale`] will shift the value in the index
244    /// register of the SIB byte.
245    ///
246    /// This is useful for pretty-printing; when encoding, one usually needs
247    /// [`Scale::enc`].
248    fn shift(&self) -> u8 {
249        1 << self.enc()
250    }
251}
252
253/// A general-purpose register or memory operand.
254#[derive(Copy, Clone, Debug, PartialEq)]
255#[cfg_attr(any(test, feature = "fuzz"), derive(arbitrary::Arbitrary))]
256#[allow(
257    clippy::module_name_repetitions,
258    reason = "'GprMem' indicates this has GPR and memory variants"
259)]
260pub enum GprMem<R: AsReg, M: AsReg> {
261    Gpr(R),
262    Mem(Amode<M>),
263}
264
265impl<R: AsReg, M: AsReg> GprMem<R, M> {
266    /// Pretty-print the operand.
267    pub fn to_string(&self, size: Size) -> String {
268        match self {
269            GprMem::Gpr(gpr) => gpr.to_string(Some(size)),
270            GprMem::Mem(amode) => amode.to_string(),
271        }
272    }
273
274    /// Return the [`RexPrefix`] for each variant of this [`GprMem`].
275    #[must_use]
276    pub(crate) fn as_rex_prefix(&self, enc_reg: u8, has_w_bit: bool, uses_8bit: bool) -> RexPrefix {
277        match self {
278            GprMem::Gpr(rm) => RexPrefix::two_op(enc_reg, rm.enc(), has_w_bit, uses_8bit),
279            GprMem::Mem(amode) => amode.as_rex_prefix(enc_reg, has_w_bit, uses_8bit),
280        }
281    }
282
283    /// Emit the ModR/M, SIB, and displacement suffixes for this [`GprMem`].
284    pub(crate) fn encode_rex_suffixes(
285        &self,
286        sink: &mut impl CodeSink,
287        enc_reg: u8,
288        bytes_at_end: u8,
289    ) {
290        match self {
291            GprMem::Gpr(gpr) => {
292                sink.put1(encode_modrm(0b11, enc_reg & 0b111, gpr.enc() & 0b111));
293            }
294            GprMem::Mem(amode) => {
295                amode.encode_rex_suffixes(sink, enc_reg, bytes_at_end);
296            }
297        }
298    }
299
300    /// Same as `XmmMem::encode_bx_regs`, but for `GprMem`.
301    pub(crate) fn encode_bx_regs(&self) -> (Option<u8>, Option<u8>) {
302        match self {
303            GprMem::Gpr(reg) => (Some(reg.enc()), None),
304            GprMem::Mem(amode) => amode.encode_bx_regs(),
305        }
306    }
307}
308
309impl<R: AsReg, M: AsReg> From<R> for GprMem<R, M> {
310    fn from(reg: R) -> GprMem<R, M> {
311        GprMem::Gpr(reg)
312    }
313}
314
315impl<R: AsReg, M: AsReg> From<Amode<M>> for GprMem<R, M> {
316    fn from(amode: Amode<M>) -> GprMem<R, M> {
317        GprMem::Mem(amode)
318    }
319}
320
321/// An XMM register or memory operand.
322#[derive(Copy, Clone, Debug)]
323#[cfg_attr(any(test, feature = "fuzz"), derive(arbitrary::Arbitrary))]
324#[allow(
325    clippy::module_name_repetitions,
326    reason = "'XmmMem' indicates this has Xmm and memory variants"
327)]
328pub enum XmmMem<R: AsReg, M: AsReg> {
329    Xmm(R),
330    Mem(Amode<M>),
331}
332
333impl<R: AsReg, M: AsReg> XmmMem<R, M> {
334    /// Pretty-print the operand.
335    pub fn to_string(&self) -> String {
336        match self {
337            XmmMem::Xmm(xmm) => xmm.to_string(None),
338            XmmMem::Mem(amode) => amode.to_string(),
339        }
340    }
341
342    /// Return the [`RexPrefix`] for each variant of this [`XmmMem`].
343    #[must_use]
344    pub(crate) fn as_rex_prefix(&self, enc_reg: u8, has_w_bit: bool, uses_8bit: bool) -> RexPrefix {
345        match self {
346            XmmMem::Xmm(rm) => RexPrefix::two_op(enc_reg, rm.enc(), has_w_bit, uses_8bit),
347            XmmMem::Mem(amode) => amode.as_rex_prefix(enc_reg, has_w_bit, uses_8bit),
348        }
349    }
350
351    /// Emit the ModR/M, SIB, and displacement suffixes for this [`XmmMem`].
352    pub(crate) fn encode_rex_suffixes(
353        &self,
354        sink: &mut impl CodeSink,
355        enc_reg: u8,
356        bytes_at_end: u8,
357    ) {
358        match self {
359            XmmMem::Xmm(xmm) => {
360                sink.put1(encode_modrm(0b11, enc_reg & 0b111, xmm.enc() & 0b111));
361            }
362            XmmMem::Mem(amode) => {
363                amode.encode_rex_suffixes(sink, enc_reg, bytes_at_end);
364            }
365        }
366    }
367
368    /// Return the registers for encoding the `b` and `x` bits (e.g., in a VEX
369    /// prefix).
370    ///
371    /// During encoding, the `b` bit is set by the topmost bit (the fourth bit)
372    /// of either the `reg` register or, if this is a memory address, the `base`
373    /// register. The `x` bit is set by the `index` register, when used.
374    pub(crate) fn encode_bx_regs(&self) -> (Option<u8>, Option<u8>) {
375        match self {
376            XmmMem::Xmm(reg) => (Some(reg.enc()), None),
377            XmmMem::Mem(amode) => amode.encode_bx_regs(),
378        }
379    }
380}
381
382impl<R: AsReg, M: AsReg> From<R> for XmmMem<R, M> {
383    fn from(reg: R) -> XmmMem<R, M> {
384        XmmMem::Xmm(reg)
385    }
386}
387
388impl<R: AsReg, M: AsReg> From<Amode<M>> for XmmMem<R, M> {
389    fn from(amode: Amode<M>) -> XmmMem<R, M> {
390        XmmMem::Mem(amode)
391    }
392}
393
394/// Emit the ModRM/SIB/displacement sequence for a memory operand.
395pub fn emit_modrm_sib_disp<R: AsReg>(
396    sink: &mut impl CodeSink,
397    enc_g: u8,
398    mem_e: &Amode<R>,
399    bytes_at_end: u8,
400    evex_scaling: Option<i8>,
401) {
402    match *mem_e {
403        Amode::ImmReg { simm32, base, .. } => {
404            let enc_e = base.enc();
405            let mut imm = Disp::new(simm32.value(sink), evex_scaling);
406
407            // Most base registers allow for a single ModRM byte plus an
408            // optional immediate. If rsp is the base register, however, then a
409            // SIB byte must be used.
410            let enc_e_low3 = enc_e & 7;
411            if enc_e_low3 == gpr::enc::RSP {
412                // Displacement from RSP is encoded with a SIB byte where
413                // the index and base are both encoded as RSP's encoding of
414                // 0b100. This special encoding means that the index register
415                // isn't used and the base is 0b100 with or without a
416                // REX-encoded 4th bit (e.g. rsp or r12)
417                sink.put1(encode_modrm(imm.m0d(), enc_g & 7, 0b100));
418                sink.put1(0b00_100_100);
419                imm.emit(sink);
420            } else {
421                // If the base register is rbp and there's no offset then force
422                // a 1-byte zero offset since otherwise the encoding would be
423                // invalid.
424                if enc_e_low3 == gpr::enc::RBP {
425                    imm.force_immediate();
426                }
427                sink.put1(encode_modrm(imm.m0d(), enc_g & 7, enc_e & 7));
428                imm.emit(sink);
429            }
430        }
431
432        Amode::ImmRegRegShift {
433            simm32,
434            base,
435            index,
436            scale,
437            ..
438        } => {
439            let enc_base = base.enc();
440            let enc_index = index.enc();
441
442            // Encoding of ModRM/SIB bytes don't allow the index register to
443            // ever be rsp. Note, though, that the encoding of r12, whose three
444            // lower bits match the encoding of rsp, is explicitly allowed with
445            // REX bytes so only rsp is disallowed.
446            assert!(enc_index != gpr::enc::RSP);
447
448            // If the offset is zero then there is no immediate. Note, though,
449            // that if the base register's lower three bits are `101` then an
450            // offset must be present. This is a special case in the encoding of
451            // the SIB byte and requires an explicit displacement with rbp/r13.
452            let mut imm = Disp::new(simm32.value(), evex_scaling);
453            if enc_base & 7 == gpr::enc::RBP {
454                imm.force_immediate();
455            }
456
457            // With the above determined encode the ModRM byte, then the SIB
458            // byte, then any immediate as necessary.
459            sink.put1(encode_modrm(imm.m0d(), enc_g & 7, 0b100));
460            sink.put1(encode_sib(scale.enc(), enc_index & 7, enc_base & 7));
461            imm.emit(sink);
462        }
463
464        Amode::RipRelative { target } => {
465            // RIP-relative is mod=00, rm=101.
466            sink.put1(encode_modrm(0b00, enc_g & 7, 0b101));
467
468            // Inform the code sink about the RIP-relative `target` at the
469            // current offset, emitting a `LabelUse`, a relocation, or etc as
470            // appropriate.
471            sink.use_target(target);
472
473            // N.B.: some instructions (XmmRmRImm format for example)
474            // have bytes *after* the RIP-relative offset. The
475            // addressed location is relative to the end of the
476            // instruction, but the relocation is nominally relative
477            // to the end of the u32 field. So, to compensate for
478            // this, we emit a negative extra offset in the u32 field
479            // initially, and the relocation will add to it.
480            sink.put4(-(i32::from(bytes_at_end)) as u32);
481        }
482    }
483}