cranelift_assembler_x64/
fuzz.rs

1//! A fuzz testing oracle for roundtrip assembly-disassembly.
2//!
3//! This contains manual implementations of the `Arbitrary` trait for types
4//! throughout this crate to avoid depending on the `arbitrary` crate
5//! unconditionally (use the `fuzz` feature instead).
6
7use crate::{
8    AmodeOffset, AmodeOffsetPlusKnownOffset, AsReg, Fixed, Gpr, Inst, NonRspGpr, Registers, Xmm,
9};
10use arbitrary::{Arbitrary, Result, Unstructured};
11use capstone::{arch::x86, arch::BuildsCapstone, arch::BuildsCapstoneSyntax, Capstone};
12
13/// Take a random assembly instruction and check its encoding and
14/// pretty-printing against a known-good disassembler.
15///
16/// # Panics
17///
18/// This function panics to express failure as expected by the `arbitrary`
19/// fuzzer infrastructure. It may fail during assembly, disassembly, or when
20/// comparing the disassembled strings.
21pub fn roundtrip(inst: &Inst<FuzzRegs>) {
22    // Check that we can actually assemble this instruction.
23    let assembled = assemble(inst);
24    let expected = disassemble(&assembled, inst);
25
26    // Check that our pretty-printed output matches the known-good output. Trim
27    // off the instruction offset first.
28    let expected = expected.split_once(' ').unwrap().1;
29    let actual = inst.to_string();
30    if expected != actual && expected != replace_signed_immediates(&actual) {
31        println!("> {inst}");
32        println!("  debug: {inst:x?}");
33        println!("  assembled: {}", pretty_print_hexadecimal(&assembled));
34        println!("  expected (capstone): {expected}");
35        println!("  actual (to_string):  {actual}");
36        assert_eq!(expected, &actual);
37    }
38}
39
40/// Use this assembler to emit machine code into a byte buffer.
41///
42/// This will skip any traps or label registrations, but this is fine for the
43/// single-instruction disassembly we're doing here.
44fn assemble(inst: &Inst<FuzzRegs>) -> Vec<u8> {
45    let mut buffer = Vec::new();
46    let offsets: Vec<i32> = Vec::new();
47    inst.encode(&mut buffer, &offsets);
48    buffer
49}
50
51/// Building a new `Capstone` each time is suboptimal (TODO).
52fn disassemble(assembled: &[u8], original: &Inst<FuzzRegs>) -> String {
53    let cs = Capstone::new()
54        .x86()
55        .mode(x86::ArchMode::Mode64)
56        .syntax(x86::ArchSyntax::Att)
57        .detail(true)
58        .build()
59        .expect("failed to create Capstone object");
60    let insts = cs
61        .disasm_all(assembled, 0x0)
62        .expect("failed to disassemble");
63
64    if insts.len() != 1 {
65        println!("> {original}");
66        println!("  debug: {original:x?}");
67        println!("  assembled: {}", pretty_print_hexadecimal(&assembled));
68        assert_eq!(insts.len(), 1, "not a single instruction");
69    }
70
71    let inst = insts.first().expect("at least one instruction");
72    if assembled.len() != inst.len() {
73        println!("> {original}");
74        println!("  debug: {original:x?}");
75        println!("  assembled: {}", pretty_print_hexadecimal(&assembled));
76        println!(
77            "  capstone-assembled: {}",
78            pretty_print_hexadecimal(inst.bytes())
79        );
80        assert_eq!(assembled.len(), inst.len(), "extra bytes not disassembled");
81    }
82
83    inst.to_string()
84}
85
86fn pretty_print_hexadecimal(hex: &[u8]) -> String {
87    use std::fmt::Write;
88    let mut s = String::with_capacity(hex.len() * 2);
89    for b in hex {
90        write!(&mut s, "{b:02X}").unwrap();
91    }
92    s
93}
94
95/// See `replace_signed_immediates`.
96macro_rules! hex_print_signed_imm {
97    ($hex:expr, $from:ty => $to:ty) => {{
98        let imm = <$from>::from_str_radix($hex, 16).unwrap() as $to;
99        let mut simm = String::new();
100        if imm < 0 {
101            simm.push_str("-");
102        }
103        let abs = match imm.checked_abs() {
104            Some(i) => i,
105            None => <$to>::MIN,
106        };
107        if imm > -10 && imm < 10 {
108            simm.push_str(&format!("{:x}", abs));
109        } else {
110            simm.push_str(&format!("0x{:x}", abs));
111        }
112        simm
113    }};
114}
115
116/// Replace signed immediates in the disassembly with their unsigned hexadecimal
117/// equivalent. This is only necessary to match `capstone`'s complex
118/// pretty-printing rules; e.g. `capstone` will:
119/// - omit the `0x` prefix when printing `0x0` as `0`.
120/// - omit the `0x` prefix when print small values (less than 10)
121/// - print negative values as `-0x...` (signed hex) instead of `0xff...`
122///   (normal hex)
123fn replace_signed_immediates(dis: &str) -> std::borrow::Cow<str> {
124    match dis.find('$') {
125        None => dis.into(),
126        Some(idx) => {
127            let (prefix, rest) = dis.split_at(idx + 1); // Skip the '$'.
128            let (_, rest) = chomp("-", rest); // Skip the '-' if it's there.
129            let (_, rest) = chomp("0x", rest); // Skip the '0x' if it's there.
130            let n = rest.chars().take_while(char::is_ascii_hexdigit).count();
131            let (hex, rest) = rest.split_at(n); // Split at next non-hex character.
132            let simm = match hex.len() {
133                1 | 2 => hex_print_signed_imm!(hex, u8 => i8),
134                4 => hex_print_signed_imm!(hex, u16 => i16),
135                8 => hex_print_signed_imm!(hex, u32 => i32),
136                16 => hex_print_signed_imm!(hex, u64 => i64),
137                _ => panic!("unexpected length for hex: {hex}"),
138            };
139            format!("{prefix}{simm}{rest}").into()
140        }
141    }
142}
143
144// See `replace_signed_immediates`.
145fn chomp<'a>(pat: &str, s: &'a str) -> (&'a str, &'a str) {
146    if s.starts_with(pat) {
147        s.split_at(pat.len())
148    } else {
149        ("", s)
150    }
151}
152
153#[test]
154fn replace() {
155    assert_eq!(
156        replace_signed_immediates("andl $0xffffff9a, %r11d"),
157        "andl $-0x66, %r11d"
158    );
159    assert_eq!(
160        replace_signed_immediates("xorq $0xffffffffffffffbc, 0x7f139ecc(%r9)"),
161        "xorq $-0x44, 0x7f139ecc(%r9)"
162    );
163    assert_eq!(
164        replace_signed_immediates("subl $0x3ca77a19, -0x1a030f40(%r14)"),
165        "subl $0x3ca77a19, -0x1a030f40(%r14)"
166    );
167}
168
169/// Fuzz-specific registers.
170///
171/// For the fuzzer, we do not need any fancy register types; see [`FuzzReg`].
172#[derive(Arbitrary, Debug)]
173pub struct FuzzRegs;
174
175impl Registers for FuzzRegs {
176    type ReadGpr = FuzzReg;
177    type ReadWriteGpr = FuzzReg;
178    type ReadXmm = FuzzReg;
179    type ReadWriteXmm = FuzzReg;
180}
181
182/// A simple `u8` register type for fuzzing only.
183#[derive(Clone, Copy, Debug)]
184pub struct FuzzReg(u8);
185
186impl<'a> Arbitrary<'a> for FuzzReg {
187    fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
188        Ok(Self(u.int_in_range(0..=15)?))
189    }
190}
191
192impl AsReg for FuzzReg {
193    fn new(enc: u8) -> Self {
194        Self(enc)
195    }
196    fn enc(&self) -> u8 {
197        self.0
198    }
199}
200
201impl Arbitrary<'_> for AmodeOffsetPlusKnownOffset {
202    fn arbitrary(u: &mut Unstructured<'_>) -> Result<Self> {
203        // For now, we don't generate offsets (TODO).
204        Ok(Self {
205            simm32: AmodeOffset::arbitrary(u)?,
206            offset: None,
207        })
208    }
209}
210
211impl<R: AsReg, const E: u8> Arbitrary<'_> for Fixed<R, E> {
212    fn arbitrary(_: &mut Unstructured<'_>) -> Result<Self> {
213        Ok(Self::new(E))
214    }
215}
216
217impl<R: AsReg> Arbitrary<'_> for NonRspGpr<R> {
218    fn arbitrary(u: &mut Unstructured<'_>) -> Result<Self> {
219        use crate::gpr::enc::*;
220        let gpr = u.choose(&[
221            RAX, RCX, RDX, RBX, RBP, RSI, RDI, R8, R9, R10, R11, R12, R13, R14, R15,
222        ])?;
223        Ok(Self::new(R::new(*gpr)))
224    }
225}
226impl<'a, R: AsReg> Arbitrary<'a> for Gpr<R> {
227    fn arbitrary(u: &mut Unstructured<'a>) -> Result<Self> {
228        Ok(Self(R::new(u.int_in_range(0..=15)?)))
229    }
230}
231impl<'a, R: AsReg> Arbitrary<'a> for Xmm<R> {
232    fn arbitrary(u: &mut Unstructured<'a>) -> Result<Self> {
233        Ok(Self(R::new(u.int_in_range(0..=15)?)))
234    }
235}
236
237/// Helper trait that's used to be the same as `Registers` except with an extra
238/// `for<'a> Arbitrary<'a>` bound on all of the associated types.
239pub trait RegistersArbitrary:
240    Registers<
241    ReadGpr: for<'a> Arbitrary<'a>,
242    ReadWriteGpr: for<'a> Arbitrary<'a>,
243    ReadXmm: for<'a> Arbitrary<'a>,
244    ReadWriteXmm: for<'a> Arbitrary<'a>,
245>
246{
247}
248
249impl<R> RegistersArbitrary for R
250where
251    R: Registers,
252    R::ReadGpr: for<'a> Arbitrary<'a>,
253    R::ReadWriteGpr: for<'a> Arbitrary<'a>,
254    R::ReadXmm: for<'a> Arbitrary<'a>,
255    R::ReadWriteXmm: for<'a> Arbitrary<'a>,
256{
257}
258
259#[cfg(test)]
260mod test {
261    use super::*;
262    use arbtest::arbtest;
263    use std::sync::atomic::{AtomicUsize, Ordering};
264
265    #[test]
266    fn smoke() {
267        let count = AtomicUsize::new(0);
268        arbtest(|u| {
269            let inst: Inst<FuzzRegs> = u.arbitrary()?;
270            roundtrip(&inst);
271            println!("#{}: {inst}", count.fetch_add(1, Ordering::SeqCst));
272            Ok(())
273        })
274        .budget_ms(1_000);
275
276        // This will run the `roundtrip` fuzzer for one second. To repeatably
277        // test a single input, append `.seed(0x<failing seed>)`.
278    }
279}