cranelift_assembler_x64/
mem.rs

1//! Memory operands to instructions.
2
3use crate::api::{AsReg, CodeSink, Constant, KnownOffset, KnownOffsetTable, Label, TrapCode};
4use crate::gpr::{self, NonRspGpr, Size};
5use crate::rex::{encode_modrm, encode_sib, Imm, RexFlags};
6use crate::xmm;
7
8/// x64 memory addressing modes.
9#[derive(Clone, Debug)]
10#[cfg_attr(any(test, feature = "fuzz"), derive(arbitrary::Arbitrary))]
11pub enum Amode<R: AsReg> {
12    ImmReg {
13        base: R,
14        simm32: AmodeOffsetPlusKnownOffset,
15        trap: Option<TrapCode>,
16    },
17    ImmRegRegShift {
18        base: R,
19        index: NonRspGpr<R>,
20        scale: Scale,
21        simm32: AmodeOffset,
22        trap: Option<TrapCode>,
23    },
24    RipRelative {
25        target: DeferredTarget,
26    },
27}
28
29impl<R: AsReg> Amode<R> {
30    /// Return the [`TrapCode`] associated with this [`Amode`], if any.
31    pub fn trap_code(&self) -> Option<TrapCode> {
32        match self {
33            Amode::ImmReg { trap, .. } | Amode::ImmRegRegShift { trap, .. } => *trap,
34            Amode::RipRelative { .. } => None,
35        }
36    }
37
38    /// Encode the [`Amode`] into a ModRM/SIB/displacement sequence.
39    pub fn emit_rex_prefix(&self, rex: RexFlags, enc_g: u8, sink: &mut impl CodeSink) {
40        match self {
41            Amode::ImmReg { base, .. } => {
42                let enc_e = base.enc();
43                rex.emit_two_op(sink, enc_g, enc_e);
44            }
45            Amode::ImmRegRegShift { base, index, .. } => {
46                let enc_base = base.enc();
47                let enc_index = index.enc();
48                rex.emit_three_op(sink, enc_g, enc_index, enc_base);
49            }
50            Amode::RipRelative { .. } => {
51                // note REX.B = 0.
52                rex.emit_two_op(sink, enc_g, 0);
53            }
54        }
55    }
56
57    /// Return the registers used by this [`Amode`].
58    ///
59    /// This is useful in generated code to allow access by a
60    /// [`RegisterVisitor`](crate::RegisterVisitor).
61    pub fn registers_mut(&mut self) -> Vec<&mut R> {
62        match self {
63            Amode::ImmReg { base, .. } => {
64                vec![base]
65            }
66            Amode::ImmRegRegShift { base, index, .. } => {
67                vec![base, index.as_mut()]
68            }
69            Amode::RipRelative { .. } => {
70                vec![]
71            }
72        }
73    }
74}
75
76/// A 32-bit immediate for address offsets.
77#[derive(Clone, Copy, Debug)]
78#[cfg_attr(any(test, feature = "fuzz"), derive(arbitrary::Arbitrary))]
79pub struct AmodeOffset(i32);
80
81impl AmodeOffset {
82    #[must_use]
83    pub fn new(value: i32) -> Self {
84        Self(value)
85    }
86
87    #[must_use]
88    pub fn value(self) -> i32 {
89        self.0
90    }
91}
92
93impl From<i32> for AmodeOffset {
94    fn from(value: i32) -> Self {
95        Self(value)
96    }
97}
98
99impl std::fmt::LowerHex for AmodeOffset {
100    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
101        // This rather complex implementation is necessary to match how
102        // `capstone` pretty-prints memory immediates.
103        if self.0 == 0 {
104            return Ok(());
105        }
106        if self.0 < 0 {
107            write!(f, "-")?;
108        }
109        if self.0 > 9 || self.0 < -9 {
110            write!(f, "0x")?;
111        }
112        let abs = match self.0.checked_abs() {
113            Some(i) => i,
114            None => -2_147_483_648,
115        };
116        std::fmt::LowerHex::fmt(&abs, f)
117    }
118}
119
120/// An [`AmodeOffset`] immediate with an optional known offset.
121///
122/// Cranelift does not know certain offsets until emission time. To accommodate
123/// Cranelift, this structure stores an optional [`KnownOffset`]. The following
124/// happens immediately before emission:
125/// - the [`KnownOffset`] is looked up, mapping it to an offset value
126/// - the [`Simm32`] value is added to the offset value
127#[derive(Clone, Debug)]
128pub struct AmodeOffsetPlusKnownOffset {
129    pub simm32: AmodeOffset,
130    pub offset: Option<KnownOffset>,
131}
132
133impl AmodeOffsetPlusKnownOffset {
134    /// # Panics
135    ///
136    /// Panics if the sum of the immediate and the known offset value overflows.
137    #[must_use]
138    pub fn value(&self, offsets: &impl KnownOffsetTable) -> i32 {
139        let known_offset = match self.offset {
140            Some(offset) => offsets[offset],
141            None => 0,
142        };
143        known_offset
144            .checked_add(self.simm32.value())
145            .expect("no wrapping")
146    }
147}
148
149impl std::fmt::LowerHex for AmodeOffsetPlusKnownOffset {
150    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
151        if let Some(offset) = self.offset {
152            write!(f, "<offset:{offset}>+")?;
153        }
154        std::fmt::LowerHex::fmt(&self.simm32, f)
155    }
156}
157
158/// For RIP-relative addressing, keep track of the [`CodeSink`]-specific target.
159#[derive(Clone, Debug)]
160#[cfg_attr(any(test, feature = "fuzz"), derive(arbitrary::Arbitrary))]
161pub enum DeferredTarget {
162    Label(Label),
163    Constant(Constant),
164}
165
166impl<R: AsReg> std::fmt::Display for Amode<R> {
167    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
168        match self {
169            Amode::ImmReg { simm32, base, .. } => {
170                // Note: size is always 8; the address is 64 bits,
171                // even if the addressed operand is smaller.
172                let base = gpr::enc::to_string(base.enc(), Size::Quadword);
173                write!(f, "{simm32:x}({base})")
174            }
175            Amode::ImmRegRegShift {
176                simm32,
177                base,
178                index,
179                scale,
180                ..
181            } => {
182                let base = gpr::enc::to_string(base.enc(), Size::Quadword);
183                let index = gpr::enc::to_string(index.enc(), Size::Quadword);
184                let shift = scale.shift();
185                if shift > 1 {
186                    write!(f, "{simm32:x}({base}, {index}, {shift})")
187                } else {
188                    write!(f, "{simm32:x}({base}, {index})")
189                }
190            }
191            Amode::RipRelative { .. } => write!(f, "(%rip)"),
192        }
193    }
194}
195
196/// The scaling factor for the index register in certain [`Amode`]s.
197#[derive(Clone, Debug)]
198#[cfg_attr(any(test, feature = "fuzz"), derive(arbitrary::Arbitrary))]
199pub enum Scale {
200    One,
201    Two,
202    Four,
203    Eight,
204}
205
206impl Scale {
207    /// Create a new [`Scale`] from its hardware encoding.
208    ///
209    /// # Panics
210    ///
211    /// Panics if `enc` is not a valid encoding for a scale (0-3).
212    #[must_use]
213    pub fn new(enc: u8) -> Self {
214        match enc {
215            0b00 => Scale::One,
216            0b01 => Scale::Two,
217            0b10 => Scale::Four,
218            0b11 => Scale::Eight,
219            _ => panic!("invalid scale encoding: {enc}"),
220        }
221    }
222
223    /// Return the hardware encoding of this [`Scale`].
224    fn enc(&self) -> u8 {
225        match self {
226            Scale::One => 0b00,
227            Scale::Two => 0b01,
228            Scale::Four => 0b10,
229            Scale::Eight => 0b11,
230        }
231    }
232
233    /// Return how much this [`Scale`] will shift the value in the index
234    /// register of the SIB byte.
235    ///
236    /// This is useful for pretty-printing; when encoding, one usually needs
237    /// [`Scale::enc`].
238    fn shift(&self) -> u8 {
239        1 << self.enc()
240    }
241}
242
243/// A general-purpose register or memory operand.
244#[derive(Clone, Debug)]
245#[cfg_attr(any(test, feature = "fuzz"), derive(arbitrary::Arbitrary))]
246#[allow(
247    clippy::module_name_repetitions,
248    reason = "'GprMem' indicates this has GPR and memory variants"
249)]
250pub enum GprMem<R: AsReg, M: AsReg> {
251    Gpr(R),
252    Mem(Amode<M>),
253}
254
255impl<R: AsReg, M: AsReg> GprMem<R, M> {
256    /// Pretty-print the operand.
257    pub fn to_string(&self, size: Size) -> String {
258        match self {
259            GprMem::Gpr(gpr) => gpr.to_string(Some(size)),
260            GprMem::Mem(amode) => amode.to_string(),
261        }
262    }
263
264    /// Proxy on the 8-bit REX flag emission; helpful for simplifying generated
265    /// code.
266    pub(crate) fn always_emit_if_8bit_needed(&self, rex: &mut RexFlags) {
267        match self {
268            GprMem::Gpr(gpr) => {
269                rex.always_emit_if_8bit_needed(gpr.enc());
270            }
271            GprMem::Mem(_) => {}
272        }
273    }
274}
275
276/// An XMM register or memory operand.
277#[derive(Clone, Debug)]
278#[cfg_attr(any(test, feature = "fuzz"), derive(arbitrary::Arbitrary))]
279#[allow(
280    clippy::module_name_repetitions,
281    reason = "'XmmMem' indicates this has Xmm and memory variants"
282)]
283pub enum XmmMem<R: AsReg, M: AsReg> {
284    Xmm(R),
285    Mem(Amode<M>),
286}
287
288impl<R: AsReg, M: AsReg> XmmMem<R, M> {
289    /// Pretty-print the operand.
290    pub fn to_string(&self) -> String {
291        match self {
292            XmmMem::Xmm(xmm) => xmm::enc::to_string(xmm.enc()).to_owned(),
293            XmmMem::Mem(amode) => amode.to_string(),
294        }
295    }
296}
297
298/// Emit the ModRM/SIB/displacement sequence for a memory operand.
299pub fn emit_modrm_sib_disp<R: AsReg>(
300    sink: &mut impl CodeSink,
301    offsets: &impl KnownOffsetTable,
302    enc_g: u8,
303    mem_e: &Amode<R>,
304    bytes_at_end: u8,
305    evex_scaling: Option<i8>,
306) {
307    match mem_e.clone() {
308        Amode::ImmReg { simm32, base, .. } => {
309            let enc_e = base.enc();
310            let mut imm = Imm::new(simm32.value(offsets), evex_scaling);
311
312            // Most base registers allow for a single ModRM byte plus an
313            // optional immediate. If rsp is the base register, however, then a
314            // SIB byte must be used.
315            let enc_e_low3 = enc_e & 7;
316            if enc_e_low3 == gpr::enc::RSP {
317                // Displacement from RSP is encoded with a SIB byte where
318                // the index and base are both encoded as RSP's encoding of
319                // 0b100. This special encoding means that the index register
320                // isn't used and the base is 0b100 with or without a
321                // REX-encoded 4th bit (e.g. rsp or r12)
322                sink.put1(encode_modrm(imm.m0d(), enc_g & 7, 0b100));
323                sink.put1(0b00_100_100);
324                imm.emit(sink);
325            } else {
326                // If the base register is rbp and there's no offset then force
327                // a 1-byte zero offset since otherwise the encoding would be
328                // invalid.
329                if enc_e_low3 == gpr::enc::RBP {
330                    imm.force_immediate();
331                }
332                sink.put1(encode_modrm(imm.m0d(), enc_g & 7, enc_e & 7));
333                imm.emit(sink);
334            }
335        }
336
337        Amode::ImmRegRegShift {
338            simm32,
339            base,
340            index,
341            scale,
342            ..
343        } => {
344            let enc_base = base.enc();
345            let enc_index = index.enc();
346
347            // Encoding of ModRM/SIB bytes don't allow the index register to
348            // ever be rsp. Note, though, that the encoding of r12, whose three
349            // lower bits match the encoding of rsp, is explicitly allowed with
350            // REX bytes so only rsp is disallowed.
351            assert!(enc_index != gpr::enc::RSP);
352
353            // If the offset is zero then there is no immediate. Note, though,
354            // that if the base register's lower three bits are `101` then an
355            // offset must be present. This is a special case in the encoding of
356            // the SIB byte and requires an explicit displacement with rbp/r13.
357            let mut imm = Imm::new(simm32.value(), evex_scaling);
358            if enc_base & 7 == gpr::enc::RBP {
359                imm.force_immediate();
360            }
361
362            // With the above determined encode the ModRM byte, then the SIB
363            // byte, then any immediate as necessary.
364            sink.put1(encode_modrm(imm.m0d(), enc_g & 7, 0b100));
365            sink.put1(encode_sib(scale.enc(), enc_index & 7, enc_base & 7));
366            imm.emit(sink);
367        }
368
369        Amode::RipRelative { target } => {
370            // RIP-relative is mod=00, rm=101.
371            sink.put1(encode_modrm(0b00, enc_g & 7, 0b101));
372
373            let offset = sink.current_offset();
374            let target = match target {
375                DeferredTarget::Label(label) => label.clone(),
376                DeferredTarget::Constant(constant) => sink.get_label_for_constant(constant.clone()),
377            };
378            sink.use_label_at_offset(offset, target);
379
380            // N.B.: some instructions (XmmRmRImm format for example)
381            // have bytes *after* the RIP-relative offset. The
382            // addressed location is relative to the end of the
383            // instruction, but the relocation is nominally relative
384            // to the end of the u32 field. So, to compensate for
385            // this, we emit a negative extra offset in the u32 field
386            // initially, and the relocation will add to it.
387            sink.put4(-(i32::from(bytes_at_end)) as u32);
388        }
389    }
390}