cranelift_assembler_x64/
mem.rs

1//! Memory operands to instructions.
2
3use crate::api::{AsReg, CodeSink, Constant, KnownOffset, Label, TrapCode};
4use crate::gpr::{self, NonRspGpr, Size};
5use crate::rex::{Disp, RexPrefix, encode_modrm, encode_sib};
6
7/// x64 memory addressing modes.
8#[derive(Copy, Clone, Debug, PartialEq)]
9#[cfg_attr(any(test, feature = "fuzz"), derive(arbitrary::Arbitrary))]
10pub enum Amode<R: AsReg> {
11    ImmReg {
12        base: R,
13        simm32: AmodeOffsetPlusKnownOffset,
14        trap: Option<TrapCode>,
15    },
16    ImmRegRegShift {
17        base: R,
18        index: NonRspGpr<R>,
19        scale: Scale,
20        simm32: AmodeOffset,
21        trap: Option<TrapCode>,
22    },
23    RipRelative {
24        target: DeferredTarget,
25    },
26}
27
28impl<R: AsReg> Amode<R> {
29    /// Return the [`TrapCode`] associated with this [`Amode`], if any.
30    pub fn trap_code(&self) -> Option<TrapCode> {
31        match self {
32            Amode::ImmReg { trap, .. } | Amode::ImmRegRegShift { trap, .. } => *trap,
33            Amode::RipRelative { .. } => None,
34        }
35    }
36
37    /// Return the [`RexPrefix`] for each variant of this [`Amode`].
38    #[must_use]
39    pub(crate) fn as_rex_prefix(&self, enc_reg: u8, has_w_bit: bool, uses_8bit: bool) -> RexPrefix {
40        match self {
41            Amode::ImmReg { base, .. } => {
42                RexPrefix::mem_op(enc_reg, base.enc(), has_w_bit, uses_8bit)
43            }
44            Amode::ImmRegRegShift { base, index, .. } => {
45                RexPrefix::three_op(enc_reg, index.enc(), base.enc(), has_w_bit, uses_8bit)
46            }
47            Amode::RipRelative { .. } => RexPrefix::two_op(enc_reg, 0, has_w_bit, uses_8bit),
48        }
49    }
50
51    /// Emit the ModR/M, SIB, and displacement suffixes as needed for this
52    /// `Amode`.
53    pub(crate) fn encode_rex_suffixes(
54        &self,
55        sink: &mut impl CodeSink,
56        enc_reg: u8,
57        bytes_at_end: u8,
58        evex_scaling: Option<i8>,
59    ) {
60        emit_modrm_sib_disp(sink, enc_reg, self, bytes_at_end, evex_scaling);
61    }
62
63    /// Return the registers for encoding the `b` and `x` bits (e.g., in a VEX
64    /// prefix).
65    ///
66    /// During encoding, the `b` bit is set by the topmost bit (the fourth bit)
67    /// of either the `reg` register or, if this is a memory address, the `base`
68    /// register. The `x` bit is set by the `index` register, when used.
69    pub(crate) fn encode_bx_regs(&self) -> (Option<u8>, Option<u8>) {
70        match self {
71            Amode::ImmReg { base, .. } => (Some(base.enc()), None),
72            Amode::ImmRegRegShift { base, index, .. } => (Some(base.enc()), Some(index.enc())),
73            Amode::RipRelative { .. } => (None, None),
74        }
75    }
76}
77
78/// A 32-bit immediate for address offsets.
79#[derive(Clone, Copy, Debug, PartialEq)]
80pub struct AmodeOffset(i32);
81
82impl AmodeOffset {
83    pub const ZERO: AmodeOffset = AmodeOffset::new(0);
84
85    #[must_use]
86    pub const fn new(value: i32) -> Self {
87        Self(value)
88    }
89
90    #[must_use]
91    pub fn value(self) -> i32 {
92        self.0
93    }
94}
95
96impl From<i32> for AmodeOffset {
97    fn from(value: i32) -> Self {
98        Self(value)
99    }
100}
101
102impl std::fmt::LowerHex for AmodeOffset {
103    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
104        // This rather complex implementation is necessary to match how
105        // `capstone` pretty-prints memory immediates.
106        if self.0 == 0 {
107            return Ok(());
108        }
109        if self.0 < 0 {
110            write!(f, "-")?;
111        }
112        if self.0 > 9 || self.0 < -9 {
113            write!(f, "0x")?;
114        }
115        let abs = match self.0.checked_abs() {
116            Some(i) => i,
117            None => -2_147_483_648,
118        };
119        std::fmt::LowerHex::fmt(&abs, f)
120    }
121}
122
123/// An [`AmodeOffset`] immediate with an optional known offset.
124///
125/// Cranelift does not know certain offsets until emission time. To accommodate
126/// Cranelift, this structure stores an optional [`KnownOffset`]. The following
127/// happens immediately before emission:
128/// - the [`KnownOffset`] is looked up, mapping it to an offset value
129/// - the [`Simm32`] value is added to the offset value
130#[derive(Copy, Clone, Debug, PartialEq)]
131pub struct AmodeOffsetPlusKnownOffset {
132    pub simm32: AmodeOffset,
133    pub offset: Option<KnownOffset>,
134}
135
136impl AmodeOffsetPlusKnownOffset {
137    pub const ZERO: AmodeOffsetPlusKnownOffset = AmodeOffsetPlusKnownOffset {
138        simm32: AmodeOffset::ZERO,
139        offset: None,
140    };
141
142    /// # Panics
143    ///
144    /// Panics if the sum of the immediate and the known offset value overflows.
145    #[must_use]
146    pub fn value(&self, sink: &impl CodeSink) -> i32 {
147        let known_offset = match self.offset {
148            Some(offset) => sink.known_offset(offset),
149            None => 0,
150        };
151        known_offset
152            .checked_add(self.simm32.value())
153            .expect("no wrapping")
154    }
155}
156
157impl std::fmt::LowerHex for AmodeOffsetPlusKnownOffset {
158    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
159        if let Some(offset) = self.offset {
160            write!(f, "<offset:{offset}>+")?;
161        }
162        std::fmt::LowerHex::fmt(&self.simm32, f)
163    }
164}
165
166/// For RIP-relative addressing, keep track of the [`CodeSink`]-specific target.
167#[derive(Copy, Clone, Debug, PartialEq)]
168#[cfg_attr(any(test, feature = "fuzz"), derive(arbitrary::Arbitrary))]
169pub enum DeferredTarget {
170    Label(Label),
171    Constant(Constant),
172    None,
173}
174
175impl<R: AsReg> std::fmt::Display for Amode<R> {
176    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
177        let pointer_width = Size::Quadword;
178        match self {
179            Amode::ImmReg { simm32, base, .. } => {
180                // Note: size is always 8; the address is 64 bits,
181                // even if the addressed operand is smaller.
182                let base = base.to_string(Some(pointer_width));
183                write!(f, "{simm32:x}({base})")
184            }
185            Amode::ImmRegRegShift {
186                simm32,
187                base,
188                index,
189                scale,
190                ..
191            } => {
192                let base = base.to_string(Some(pointer_width));
193                let index = index.to_string(pointer_width);
194                let shift = scale.shift();
195                if shift > 1 {
196                    write!(f, "{simm32:x}({base}, {index}, {shift})")
197                } else {
198                    write!(f, "{simm32:x}({base}, {index})")
199                }
200            }
201            Amode::RipRelative { .. } => write!(f, "(%rip)"),
202        }
203    }
204}
205
206/// The scaling factor for the index register in certain [`Amode`]s.
207#[derive(Copy, Clone, Debug, PartialEq)]
208#[cfg_attr(any(test, feature = "fuzz"), derive(arbitrary::Arbitrary))]
209pub enum Scale {
210    One,
211    Two,
212    Four,
213    Eight,
214}
215
216impl Scale {
217    /// Create a new [`Scale`] from its hardware encoding.
218    ///
219    /// # Panics
220    ///
221    /// Panics if `enc` is not a valid encoding for a scale (0-3).
222    #[must_use]
223    pub fn new(enc: u8) -> Self {
224        match enc {
225            0b00 => Scale::One,
226            0b01 => Scale::Two,
227            0b10 => Scale::Four,
228            0b11 => Scale::Eight,
229            _ => panic!("invalid scale encoding: {enc}"),
230        }
231    }
232
233    /// Return the hardware encoding of this [`Scale`].
234    fn enc(&self) -> u8 {
235        match self {
236            Scale::One => 0b00,
237            Scale::Two => 0b01,
238            Scale::Four => 0b10,
239            Scale::Eight => 0b11,
240        }
241    }
242
243    /// Return how much this [`Scale`] will shift the value in the index
244    /// register of the SIB byte.
245    ///
246    /// This is useful for pretty-printing; when encoding, one usually needs
247    /// [`Scale::enc`].
248    fn shift(&self) -> u8 {
249        1 << self.enc()
250    }
251}
252
253/// A general-purpose register or memory operand.
254#[derive(Copy, Clone, Debug, PartialEq)]
255#[cfg_attr(any(test, feature = "fuzz"), derive(arbitrary::Arbitrary))]
256#[allow(
257    clippy::module_name_repetitions,
258    reason = "'GprMem' indicates this has GPR and memory variants"
259)]
260pub enum GprMem<R: AsReg, M: AsReg> {
261    Gpr(R),
262    Mem(Amode<M>),
263}
264
265impl<R: AsReg, M: AsReg> GprMem<R, M> {
266    /// Pretty-print the operand.
267    pub fn to_string(&self, size: Size) -> String {
268        match self {
269            GprMem::Gpr(gpr) => gpr.to_string(Some(size)),
270            GprMem::Mem(amode) => amode.to_string(),
271        }
272    }
273
274    /// Return the [`RexPrefix`] for each variant of this [`GprMem`].
275    #[must_use]
276    pub(crate) fn as_rex_prefix(&self, enc_reg: u8, has_w_bit: bool, uses_8bit: bool) -> RexPrefix {
277        match self {
278            GprMem::Gpr(rm) => RexPrefix::two_op(enc_reg, rm.enc(), has_w_bit, uses_8bit),
279            GprMem::Mem(amode) => amode.as_rex_prefix(enc_reg, has_w_bit, uses_8bit),
280        }
281    }
282
283    /// Emit the ModR/M, SIB, and displacement suffixes for this [`GprMem`].
284    pub(crate) fn encode_rex_suffixes(
285        &self,
286        sink: &mut impl CodeSink,
287        enc_reg: u8,
288        bytes_at_end: u8,
289        evex_scaling: Option<i8>,
290    ) {
291        match self {
292            GprMem::Gpr(gpr) => {
293                sink.put1(encode_modrm(0b11, enc_reg & 0b111, gpr.enc() & 0b111));
294            }
295            GprMem::Mem(amode) => {
296                amode.encode_rex_suffixes(sink, enc_reg, bytes_at_end, evex_scaling);
297            }
298        }
299    }
300
301    /// Same as `XmmMem::encode_bx_regs`, but for `GprMem`.
302    pub(crate) fn encode_bx_regs(&self) -> (Option<u8>, Option<u8>) {
303        match self {
304            GprMem::Gpr(reg) => (Some(reg.enc()), None),
305            GprMem::Mem(amode) => amode.encode_bx_regs(),
306        }
307    }
308}
309
310impl<R: AsReg, M: AsReg> From<R> for GprMem<R, M> {
311    fn from(reg: R) -> GprMem<R, M> {
312        GprMem::Gpr(reg)
313    }
314}
315
316impl<R: AsReg, M: AsReg> From<Amode<M>> for GprMem<R, M> {
317    fn from(amode: Amode<M>) -> GprMem<R, M> {
318        GprMem::Mem(amode)
319    }
320}
321
322/// An XMM register or memory operand.
323#[derive(Copy, Clone, Debug)]
324#[cfg_attr(any(test, feature = "fuzz"), derive(arbitrary::Arbitrary))]
325#[allow(
326    clippy::module_name_repetitions,
327    reason = "'XmmMem' indicates this has Xmm and memory variants"
328)]
329pub enum XmmMem<R: AsReg, M: AsReg> {
330    Xmm(R),
331    Mem(Amode<M>),
332}
333
334impl<R: AsReg, M: AsReg> XmmMem<R, M> {
335    /// Pretty-print the operand.
336    pub fn to_string(&self) -> String {
337        match self {
338            XmmMem::Xmm(xmm) => xmm.to_string(None),
339            XmmMem::Mem(amode) => amode.to_string(),
340        }
341    }
342
343    /// Return the [`RexPrefix`] for each variant of this [`XmmMem`].
344    #[must_use]
345    pub(crate) fn as_rex_prefix(&self, enc_reg: u8, has_w_bit: bool, uses_8bit: bool) -> RexPrefix {
346        match self {
347            XmmMem::Xmm(rm) => RexPrefix::two_op(enc_reg, rm.enc(), has_w_bit, uses_8bit),
348            XmmMem::Mem(amode) => amode.as_rex_prefix(enc_reg, has_w_bit, uses_8bit),
349        }
350    }
351
352    /// Emit the ModR/M, SIB, and displacement suffixes for this [`XmmMem`].
353    pub(crate) fn encode_rex_suffixes(
354        &self,
355        sink: &mut impl CodeSink,
356        enc_reg: u8,
357        bytes_at_end: u8,
358        evex_scaling: Option<i8>,
359    ) {
360        match self {
361            XmmMem::Xmm(xmm) => {
362                sink.put1(encode_modrm(0b11, enc_reg & 0b111, xmm.enc() & 0b111));
363            }
364            XmmMem::Mem(amode) => {
365                amode.encode_rex_suffixes(sink, enc_reg, bytes_at_end, evex_scaling);
366            }
367        }
368    }
369
370    /// Return the registers for encoding the `b` and `x` bits (e.g., in a VEX
371    /// prefix).
372    ///
373    /// During encoding, the `b` bit is set by the topmost bit (the fourth bit)
374    /// of either the `reg` register or, if this is a memory address, the `base`
375    /// register. The `x` bit is set by the `index` register, when used.
376    pub(crate) fn encode_bx_regs(&self) -> (Option<u8>, Option<u8>) {
377        match self {
378            XmmMem::Xmm(reg) => (Some(reg.enc()), None),
379            XmmMem::Mem(amode) => amode.encode_bx_regs(),
380        }
381    }
382}
383
384impl<R: AsReg, M: AsReg> From<R> for XmmMem<R, M> {
385    fn from(reg: R) -> XmmMem<R, M> {
386        XmmMem::Xmm(reg)
387    }
388}
389
390impl<R: AsReg, M: AsReg> From<Amode<M>> for XmmMem<R, M> {
391    fn from(amode: Amode<M>) -> XmmMem<R, M> {
392        XmmMem::Mem(amode)
393    }
394}
395
396/// Emit the ModRM/SIB/displacement sequence for a memory operand.
397pub fn emit_modrm_sib_disp<R: AsReg>(
398    sink: &mut impl CodeSink,
399    enc_g: u8,
400    mem_e: &Amode<R>,
401    bytes_at_end: u8,
402    evex_scaling: Option<i8>,
403) {
404    match *mem_e {
405        Amode::ImmReg { simm32, base, .. } => {
406            let enc_e = base.enc();
407            let mut imm = Disp::new(simm32.value(sink), evex_scaling);
408
409            // Most base registers allow for a single ModRM byte plus an
410            // optional immediate. If rsp is the base register, however, then a
411            // SIB byte must be used.
412            let enc_e_low3 = enc_e & 7;
413            if enc_e_low3 == gpr::enc::RSP {
414                // Displacement from RSP is encoded with a SIB byte where
415                // the index and base are both encoded as RSP's encoding of
416                // 0b100. This special encoding means that the index register
417                // isn't used and the base is 0b100 with or without a
418                // REX-encoded 4th bit (e.g. rsp or r12)
419                sink.put1(encode_modrm(imm.m0d(), enc_g & 7, 0b100));
420                sink.put1(0b00_100_100);
421                imm.emit(sink);
422            } else {
423                // If the base register is rbp and there's no offset then force
424                // a 1-byte zero offset since otherwise the encoding would be
425                // invalid.
426                if enc_e_low3 == gpr::enc::RBP {
427                    imm.force_immediate();
428                }
429                sink.put1(encode_modrm(imm.m0d(), enc_g & 7, enc_e & 7));
430                imm.emit(sink);
431            }
432        }
433
434        Amode::ImmRegRegShift {
435            simm32,
436            base,
437            index,
438            scale,
439            ..
440        } => {
441            let enc_base = base.enc();
442            let enc_index = index.enc();
443
444            // Encoding of ModRM/SIB bytes don't allow the index register to
445            // ever be rsp. Note, though, that the encoding of r12, whose three
446            // lower bits match the encoding of rsp, is explicitly allowed with
447            // REX bytes so only rsp is disallowed.
448            assert!(enc_index != gpr::enc::RSP);
449
450            // If the offset is zero then there is no immediate. Note, though,
451            // that if the base register's lower three bits are `101` then an
452            // offset must be present. This is a special case in the encoding of
453            // the SIB byte and requires an explicit displacement with rbp/r13.
454            let mut imm = Disp::new(simm32.value(), evex_scaling);
455            if enc_base & 7 == gpr::enc::RBP {
456                imm.force_immediate();
457            }
458
459            // With the above determined encode the ModRM byte, then the SIB
460            // byte, then any immediate as necessary.
461            sink.put1(encode_modrm(imm.m0d(), enc_g & 7, 0b100));
462            sink.put1(encode_sib(scale.enc(), enc_index & 7, enc_base & 7));
463            imm.emit(sink);
464        }
465
466        Amode::RipRelative { target } => {
467            // RIP-relative is mod=00, rm=101.
468            sink.put1(encode_modrm(0b00, enc_g & 7, 0b101));
469
470            // Inform the code sink about the RIP-relative `target` at the
471            // current offset, emitting a `LabelUse`, a relocation, or etc as
472            // appropriate.
473            sink.use_target(target);
474
475            // N.B.: some instructions (XmmRmRImm format for example)
476            // have bytes *after* the RIP-relative offset. The
477            // addressed location is relative to the end of the
478            // instruction, but the relocation is nominally relative
479            // to the end of the u32 field. So, to compensate for
480            // this, we emit a negative extra offset in the u32 field
481            // initially, and the relocation will add to it.
482            sink.put4(-(i32::from(bytes_at_end)) as u32);
483        }
484    }
485}