Skip to main content

cranelift_assembler_x64/
mem.rs

1//! Memory operands to instructions.
2
3use alloc::string::{String, ToString};
4
5use crate::api::{AsReg, CodeSink, Constant, KnownOffset, Label, TrapCode};
6use crate::gpr::{self, NonRspGpr, Size};
7use crate::rex::{Disp, RexPrefix, encode_modrm, encode_sib};
8
9/// x64 memory addressing modes.
10#[derive(Copy, Clone, Debug, PartialEq)]
11#[cfg_attr(any(test, feature = "fuzz"), derive(arbitrary::Arbitrary))]
12pub enum Amode<R: AsReg> {
13    ImmReg {
14        base: R,
15        simm32: AmodeOffsetPlusKnownOffset,
16        trap: Option<TrapCode>,
17    },
18    ImmRegRegShift {
19        base: R,
20        index: NonRspGpr<R>,
21        scale: Scale,
22        simm32: AmodeOffset,
23        trap: Option<TrapCode>,
24    },
25    RipRelative {
26        target: DeferredTarget,
27    },
28}
29
30impl<R: AsReg> Amode<R> {
31    /// Return the [`TrapCode`] associated with this [`Amode`], if any.
32    pub fn trap_code(&self) -> Option<TrapCode> {
33        match self {
34            Amode::ImmReg { trap, .. } | Amode::ImmRegRegShift { trap, .. } => *trap,
35            Amode::RipRelative { .. } => None,
36        }
37    }
38
39    /// Return the [`RexPrefix`] for each variant of this [`Amode`].
40    #[must_use]
41    pub(crate) fn as_rex_prefix(&self, enc_reg: u8, has_w_bit: bool, uses_8bit: bool) -> RexPrefix {
42        match self {
43            Amode::ImmReg { base, .. } => {
44                RexPrefix::mem_op(enc_reg, base.enc(), has_w_bit, uses_8bit)
45            }
46            Amode::ImmRegRegShift { base, index, .. } => {
47                RexPrefix::three_op(enc_reg, index.enc(), base.enc(), has_w_bit, uses_8bit)
48            }
49            Amode::RipRelative { .. } => RexPrefix::two_op(enc_reg, 0, has_w_bit, uses_8bit),
50        }
51    }
52
53    /// Emit the ModR/M, SIB, and displacement suffixes as needed for this
54    /// `Amode`.
55    pub(crate) fn encode_rex_suffixes(
56        &self,
57        sink: &mut impl CodeSink,
58        enc_reg: u8,
59        bytes_at_end: u8,
60        evex_scaling: Option<i8>,
61    ) {
62        emit_modrm_sib_disp(sink, enc_reg, self, bytes_at_end, evex_scaling);
63    }
64
65    /// Return the registers for encoding the `b` and `x` bits (e.g., in a VEX
66    /// prefix).
67    ///
68    /// During encoding, the `b` bit is set by the topmost bit (the fourth bit)
69    /// of either the `reg` register or, if this is a memory address, the `base`
70    /// register. The `x` bit is set by the `index` register, when used.
71    pub(crate) fn encode_bx_regs(&self) -> (Option<u8>, Option<u8>) {
72        match self {
73            Amode::ImmReg { base, .. } => (Some(base.enc()), None),
74            Amode::ImmRegRegShift { base, index, .. } => (Some(base.enc()), Some(index.enc())),
75            Amode::RipRelative { .. } => (None, None),
76        }
77    }
78}
79
80/// A 32-bit immediate for address offsets.
81#[derive(Clone, Copy, Debug, PartialEq)]
82pub struct AmodeOffset(i32);
83
84impl AmodeOffset {
85    pub const ZERO: AmodeOffset = AmodeOffset::new(0);
86
87    #[must_use]
88    pub const fn new(value: i32) -> Self {
89        Self(value)
90    }
91
92    #[must_use]
93    pub fn value(self) -> i32 {
94        self.0
95    }
96}
97
98impl From<i32> for AmodeOffset {
99    fn from(value: i32) -> Self {
100        Self(value)
101    }
102}
103
104impl core::fmt::LowerHex for AmodeOffset {
105    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
106        // This rather complex implementation is necessary to match how
107        // `capstone` pretty-prints memory immediates.
108        if self.0 == 0 {
109            return Ok(());
110        }
111        if self.0 < 0 {
112            write!(f, "-")?;
113        }
114        if self.0 > 9 || self.0 < -9 {
115            write!(f, "0x")?;
116        }
117        let abs = match self.0.checked_abs() {
118            Some(i) => i,
119            None => -2_147_483_648,
120        };
121        core::fmt::LowerHex::fmt(&abs, f)
122    }
123}
124
125/// An [`AmodeOffset`] immediate with an optional known offset.
126///
127/// Cranelift does not know certain offsets until emission time. To accommodate
128/// Cranelift, this structure stores an optional [`KnownOffset`]. The following
129/// happens immediately before emission:
130/// - the [`KnownOffset`] is looked up, mapping it to an offset value
131/// - the [`AmodeOffset`] value is added to the offset value
132#[derive(Copy, Clone, Debug, PartialEq)]
133pub struct AmodeOffsetPlusKnownOffset {
134    pub simm32: AmodeOffset,
135    pub offset: Option<KnownOffset>,
136}
137
138impl AmodeOffsetPlusKnownOffset {
139    pub const ZERO: AmodeOffsetPlusKnownOffset = AmodeOffsetPlusKnownOffset {
140        simm32: AmodeOffset::ZERO,
141        offset: None,
142    };
143
144    /// # Panics
145    ///
146    /// Panics if the sum of the immediate and the known offset value overflows.
147    #[must_use]
148    pub fn value(&self, sink: &impl CodeSink) -> i32 {
149        let known_offset = match self.offset {
150            Some(offset) => sink.known_offset(offset),
151            None => 0,
152        };
153        known_offset
154            .checked_add(self.simm32.value())
155            .expect("no wrapping")
156    }
157}
158
159impl core::fmt::LowerHex for AmodeOffsetPlusKnownOffset {
160    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
161        if let Some(offset) = self.offset {
162            write!(f, "<offset:{offset}>+")?;
163        }
164        core::fmt::LowerHex::fmt(&self.simm32, f)
165    }
166}
167
168/// For RIP-relative addressing, keep track of the [`CodeSink`]-specific target.
169#[derive(Copy, Clone, Debug, PartialEq)]
170#[cfg_attr(any(test, feature = "fuzz"), derive(arbitrary::Arbitrary))]
171pub enum DeferredTarget {
172    Label(Label),
173    Constant(Constant),
174    None,
175}
176
177impl<R: AsReg> core::fmt::Display for Amode<R> {
178    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
179        let pointer_width = Size::Quadword;
180        match self {
181            Amode::ImmReg { simm32, base, .. } => {
182                // Note: size is always 8; the address is 64 bits,
183                // even if the addressed operand is smaller.
184                let base = base.to_string(Some(pointer_width));
185                write!(f, "{simm32:x}({base})")
186            }
187            Amode::ImmRegRegShift {
188                simm32,
189                base,
190                index,
191                scale,
192                ..
193            } => {
194                let base = base.to_string(Some(pointer_width));
195                let index = index.to_string(pointer_width);
196                let shift = scale.shift();
197                if shift > 1 {
198                    write!(f, "{simm32:x}({base}, {index}, {shift})")
199                } else {
200                    write!(f, "{simm32:x}({base}, {index})")
201                }
202            }
203            Amode::RipRelative { .. } => write!(f, "(%rip)"),
204        }
205    }
206}
207
208/// The scaling factor for the index register in certain [`Amode`]s.
209#[derive(Copy, Clone, Debug, PartialEq)]
210#[cfg_attr(any(test, feature = "fuzz"), derive(arbitrary::Arbitrary))]
211pub enum Scale {
212    One,
213    Two,
214    Four,
215    Eight,
216}
217
218impl Scale {
219    /// Create a new [`Scale`] from its hardware encoding.
220    ///
221    /// # Panics
222    ///
223    /// Panics if `enc` is not a valid encoding for a scale (0-3).
224    #[must_use]
225    pub fn new(enc: u8) -> Self {
226        match enc {
227            0b00 => Scale::One,
228            0b01 => Scale::Two,
229            0b10 => Scale::Four,
230            0b11 => Scale::Eight,
231            _ => panic!("invalid scale encoding: {enc}"),
232        }
233    }
234
235    /// Return the hardware encoding of this [`Scale`].
236    fn enc(&self) -> u8 {
237        match self {
238            Scale::One => 0b00,
239            Scale::Two => 0b01,
240            Scale::Four => 0b10,
241            Scale::Eight => 0b11,
242        }
243    }
244
245    /// Return how much this [`Scale`] will shift the value in the index
246    /// register of the SIB byte.
247    ///
248    /// This is useful for pretty-printing; when encoding, one usually needs
249    /// [`Scale::enc`].
250    fn shift(&self) -> u8 {
251        1 << self.enc()
252    }
253}
254
255/// A general-purpose register or memory operand.
256#[derive(Copy, Clone, Debug, PartialEq)]
257#[cfg_attr(any(test, feature = "fuzz"), derive(arbitrary::Arbitrary))]
258#[allow(
259    clippy::module_name_repetitions,
260    reason = "'GprMem' indicates this has GPR and memory variants"
261)]
262pub enum GprMem<R: AsReg, M: AsReg> {
263    Gpr(R),
264    Mem(Amode<M>),
265}
266
267impl<R: AsReg, M: AsReg> GprMem<R, M> {
268    /// Pretty-print the operand.
269    pub fn to_string(&self, size: Size) -> String {
270        match self {
271            GprMem::Gpr(gpr) => gpr.to_string(Some(size)),
272            GprMem::Mem(amode) => amode.to_string(),
273        }
274    }
275
276    /// Return the [`RexPrefix`] for each variant of this [`GprMem`].
277    #[must_use]
278    pub(crate) fn as_rex_prefix(&self, enc_reg: u8, has_w_bit: bool, uses_8bit: bool) -> RexPrefix {
279        match self {
280            GprMem::Gpr(rm) => RexPrefix::two_op(enc_reg, rm.enc(), has_w_bit, uses_8bit),
281            GprMem::Mem(amode) => amode.as_rex_prefix(enc_reg, has_w_bit, uses_8bit),
282        }
283    }
284
285    /// Emit the ModR/M, SIB, and displacement suffixes for this [`GprMem`].
286    pub(crate) fn encode_rex_suffixes(
287        &self,
288        sink: &mut impl CodeSink,
289        enc_reg: u8,
290        bytes_at_end: u8,
291        evex_scaling: Option<i8>,
292    ) {
293        match self {
294            GprMem::Gpr(gpr) => {
295                sink.put1(encode_modrm(0b11, enc_reg & 0b111, gpr.enc() & 0b111));
296            }
297            GprMem::Mem(amode) => {
298                amode.encode_rex_suffixes(sink, enc_reg, bytes_at_end, evex_scaling);
299            }
300        }
301    }
302
303    /// Same as `XmmMem::encode_bx_regs`, but for `GprMem`.
304    pub(crate) fn encode_bx_regs(&self) -> (Option<u8>, Option<u8>) {
305        match self {
306            GprMem::Gpr(reg) => (Some(reg.enc()), None),
307            GprMem::Mem(amode) => amode.encode_bx_regs(),
308        }
309    }
310}
311
312impl<R: AsReg, M: AsReg> From<R> for GprMem<R, M> {
313    fn from(reg: R) -> GprMem<R, M> {
314        GprMem::Gpr(reg)
315    }
316}
317
318impl<R: AsReg, M: AsReg> From<Amode<M>> for GprMem<R, M> {
319    fn from(amode: Amode<M>) -> GprMem<R, M> {
320        GprMem::Mem(amode)
321    }
322}
323
324/// An XMM register or memory operand.
325#[derive(Copy, Clone, Debug)]
326#[cfg_attr(any(test, feature = "fuzz"), derive(arbitrary::Arbitrary))]
327#[allow(
328    clippy::module_name_repetitions,
329    reason = "'XmmMem' indicates this has Xmm and memory variants"
330)]
331pub enum XmmMem<R: AsReg, M: AsReg> {
332    Xmm(R),
333    Mem(Amode<M>),
334}
335
336impl<R: AsReg, M: AsReg> XmmMem<R, M> {
337    /// Pretty-print the operand.
338    pub fn to_string(&self) -> String {
339        match self {
340            XmmMem::Xmm(xmm) => xmm.to_string(None),
341            XmmMem::Mem(amode) => amode.to_string(),
342        }
343    }
344
345    /// Return the [`RexPrefix`] for each variant of this [`XmmMem`].
346    #[must_use]
347    pub(crate) fn as_rex_prefix(&self, enc_reg: u8, has_w_bit: bool, uses_8bit: bool) -> RexPrefix {
348        match self {
349            XmmMem::Xmm(rm) => RexPrefix::two_op(enc_reg, rm.enc(), has_w_bit, uses_8bit),
350            XmmMem::Mem(amode) => amode.as_rex_prefix(enc_reg, has_w_bit, uses_8bit),
351        }
352    }
353
354    /// Emit the ModR/M, SIB, and displacement suffixes for this [`XmmMem`].
355    pub(crate) fn encode_rex_suffixes(
356        &self,
357        sink: &mut impl CodeSink,
358        enc_reg: u8,
359        bytes_at_end: u8,
360        evex_scaling: Option<i8>,
361    ) {
362        match self {
363            XmmMem::Xmm(xmm) => {
364                sink.put1(encode_modrm(0b11, enc_reg & 0b111, xmm.enc() & 0b111));
365            }
366            XmmMem::Mem(amode) => {
367                amode.encode_rex_suffixes(sink, enc_reg, bytes_at_end, evex_scaling);
368            }
369        }
370    }
371
372    /// Return the registers for encoding the `b` and `x` bits (e.g., in a VEX
373    /// prefix).
374    ///
375    /// During encoding, the `b` bit is set by the topmost bit (the fourth bit)
376    /// of either the `reg` register or, if this is a memory address, the `base`
377    /// register. The `x` bit is set by the `index` register, when used.
378    pub(crate) fn encode_bx_regs(&self) -> (Option<u8>, Option<u8>) {
379        match self {
380            XmmMem::Xmm(reg) => (Some(reg.enc()), None),
381            XmmMem::Mem(amode) => amode.encode_bx_regs(),
382        }
383    }
384}
385
386impl<R: AsReg, M: AsReg> From<R> for XmmMem<R, M> {
387    fn from(reg: R) -> XmmMem<R, M> {
388        XmmMem::Xmm(reg)
389    }
390}
391
392impl<R: AsReg, M: AsReg> From<Amode<M>> for XmmMem<R, M> {
393    fn from(amode: Amode<M>) -> XmmMem<R, M> {
394        XmmMem::Mem(amode)
395    }
396}
397
398/// Emit the ModRM/SIB/displacement sequence for a memory operand.
399pub fn emit_modrm_sib_disp<R: AsReg>(
400    sink: &mut impl CodeSink,
401    enc_g: u8,
402    mem_e: &Amode<R>,
403    bytes_at_end: u8,
404    evex_scaling: Option<i8>,
405) {
406    match *mem_e {
407        Amode::ImmReg { simm32, base, .. } => {
408            let enc_e = base.enc();
409            let mut imm = Disp::new(simm32.value(sink), evex_scaling);
410
411            // Most base registers allow for a single ModRM byte plus an
412            // optional immediate. If rsp is the base register, however, then a
413            // SIB byte must be used.
414            let enc_e_low3 = enc_e & 7;
415            if enc_e_low3 == gpr::enc::RSP {
416                // Displacement from RSP is encoded with a SIB byte where
417                // the index and base are both encoded as RSP's encoding of
418                // 0b100. This special encoding means that the index register
419                // isn't used and the base is 0b100 with or without a
420                // REX-encoded 4th bit (e.g. rsp or r12)
421                sink.put1(encode_modrm(imm.m0d(), enc_g & 7, 0b100));
422                sink.put1(0b00_100_100);
423                imm.emit(sink);
424            } else {
425                // If the base register is rbp and there's no offset then force
426                // a 1-byte zero offset since otherwise the encoding would be
427                // invalid.
428                if enc_e_low3 == gpr::enc::RBP {
429                    imm.force_immediate();
430                }
431                sink.put1(encode_modrm(imm.m0d(), enc_g & 7, enc_e & 7));
432                imm.emit(sink);
433            }
434        }
435
436        Amode::ImmRegRegShift {
437            simm32,
438            base,
439            index,
440            scale,
441            ..
442        } => {
443            let enc_base = base.enc();
444            let enc_index = index.enc();
445
446            // Encoding of ModRM/SIB bytes don't allow the index register to
447            // ever be rsp. Note, though, that the encoding of r12, whose three
448            // lower bits match the encoding of rsp, is explicitly allowed with
449            // REX bytes so only rsp is disallowed.
450            assert!(enc_index != gpr::enc::RSP);
451
452            // If the offset is zero then there is no immediate. Note, though,
453            // that if the base register's lower three bits are `101` then an
454            // offset must be present. This is a special case in the encoding of
455            // the SIB byte and requires an explicit displacement with rbp/r13.
456            let mut imm = Disp::new(simm32.value(), evex_scaling);
457            if enc_base & 7 == gpr::enc::RBP {
458                imm.force_immediate();
459            }
460
461            // With the above determined encode the ModRM byte, then the SIB
462            // byte, then any immediate as necessary.
463            sink.put1(encode_modrm(imm.m0d(), enc_g & 7, 0b100));
464            sink.put1(encode_sib(scale.enc(), enc_index & 7, enc_base & 7));
465            imm.emit(sink);
466        }
467
468        Amode::RipRelative { target } => {
469            // RIP-relative is mod=00, rm=101.
470            sink.put1(encode_modrm(0b00, enc_g & 7, 0b101));
471
472            // Inform the code sink about the RIP-relative `target` at the
473            // current offset, emitting a `LabelUse`, a relocation, or etc as
474            // appropriate.
475            sink.use_target(target);
476
477            // N.B.: some instructions (XmmRmRImm format for example)
478            // have bytes *after* the RIP-relative offset. The
479            // addressed location is relative to the end of the
480            // instruction, but the relocation is nominally relative
481            // to the end of the u32 field. So, to compensate for
482            // this, we emit a negative extra offset in the u32 field
483            // initially, and the relocation will add to it.
484            sink.put4(-(i32::from(bytes_at_end)) as u32);
485        }
486    }
487}