cranelift_assembler_x64/
fuzz.rs

1//! A fuzz testing oracle for roundtrip assembly-disassembly.
2//!
3//! This contains manual implementations of the `Arbitrary` trait for types
4//! throughout this crate to avoid depending on the `arbitrary` crate
5//! unconditionally (use the `fuzz` feature instead).
6
7use crate::{AmodeOffset, AmodeOffsetPlusKnownOffset, AsReg, Gpr, Inst, NonRspGpr, Registers, Xmm};
8use arbitrary::{Arbitrary, Result, Unstructured};
9use capstone::{arch::x86, arch::BuildsCapstone, arch::BuildsCapstoneSyntax, Capstone};
10
11/// Take a random assembly instruction and check its encoding and
12/// pretty-printing against a known-good disassembler.
13///
14/// # Panics
15///
16/// This function panics to express failure as expected by the `arbitrary`
17/// fuzzer infrastructure. It may fail during assembly, disassembly, or when
18/// comparing the disassembled strings.
19pub fn roundtrip(inst: &Inst<FuzzRegs>) {
20    // Check that we can actually assemble this instruction.
21    let assembled = assemble(inst);
22    let expected = disassemble(&assembled);
23
24    // Check that our pretty-printed output matches the known-good output. Trim
25    // off the instruction offset first.
26    let expected = expected.split_once(' ').unwrap().1;
27    let actual = inst.to_string();
28    if expected != actual && expected != replace_signed_immediates(&actual) {
29        println!("> {inst}");
30        println!("  debug: {inst:x?}");
31        println!("  assembled: {}", pretty_print_hexadecimal(&assembled));
32        println!("  expected (capstone): {expected}");
33        println!("  actual (to_string):  {actual}");
34        assert_eq!(expected, &actual);
35    }
36}
37
38/// Use this assembler to emit machine code into a byte buffer.
39///
40/// This will skip any traps or label registrations, but this is fine for the
41/// single-instruction disassembly we're doing here.
42fn assemble(insn: &Inst<FuzzRegs>) -> Vec<u8> {
43    let mut buffer = Vec::new();
44    let offsets: Vec<i32> = Vec::new();
45    insn.encode(&mut buffer, &offsets);
46    buffer
47}
48
49/// Building a new `Capstone` each time is suboptimal (TODO).
50fn disassemble(assembled: &[u8]) -> String {
51    let cs = Capstone::new()
52        .x86()
53        .mode(x86::ArchMode::Mode64)
54        .syntax(x86::ArchSyntax::Att)
55        .detail(true)
56        .build()
57        .expect("failed to create Capstone object");
58    let insns = cs
59        .disasm_all(assembled, 0x0)
60        .expect("failed to disassemble");
61    assert_eq!(insns.len(), 1, "not a single instruction: {assembled:02x?}");
62    let insn = insns.first().expect("at least one instruction");
63    assert_eq!(
64        assembled.len(),
65        insn.len(),
66        "\ncranelift generated {} bytes: {assembled:02x?}\n\
67         capstone  generated {} bytes: {:02x?}",
68        assembled.len(),
69        insn.len(),
70        insn.bytes(),
71    );
72    insn.to_string()
73}
74
75fn pretty_print_hexadecimal(hex: &[u8]) -> String {
76    use std::fmt::Write;
77    let mut s = String::with_capacity(hex.len() * 2);
78    for b in hex {
79        write!(&mut s, "{b:02X}").unwrap();
80    }
81    s
82}
83
84/// See `replace_signed_immediates`.
85macro_rules! hex_print_signed_imm {
86    ($hex:expr, $from:ty => $to:ty) => {{
87        let imm = <$from>::from_str_radix($hex, 16).unwrap() as $to;
88        let mut simm = String::new();
89        if imm < 0 {
90            simm.push_str("-");
91        }
92        let abs = match imm.checked_abs() {
93            Some(i) => i,
94            None => <$to>::MIN,
95        };
96        if imm > -10 && imm < 10 {
97            simm.push_str(&format!("{:x}", abs));
98        } else {
99            simm.push_str(&format!("0x{:x}", abs));
100        }
101        simm
102    }};
103}
104
105/// Replace signed immediates in the disassembly with their unsigned hexadecimal
106/// equivalent. This is only necessary to match `capstone`'s complex
107/// pretty-printing rules; e.g. `capstone` will:
108/// - omit the `0x` prefix when printing `0x0` as `0`.
109/// - omit the `0x` prefix when print small values (less than 10)
110/// - print negative values as `-0x...` (signed hex) instead of `0xff...`
111///   (normal hex)
112fn replace_signed_immediates(dis: &str) -> std::borrow::Cow<str> {
113    match dis.find('$') {
114        None => dis.into(),
115        Some(idx) => {
116            let (prefix, rest) = dis.split_at(idx + 1); // Skip the '$'.
117            let (_, rest) = chomp("-", rest); // Skip the '-' if it's there.
118            let (_, rest) = chomp("0x", rest); // Skip the '0x' if it's there.
119            let n = rest.chars().take_while(char::is_ascii_hexdigit).count();
120            let (hex, rest) = rest.split_at(n); // Split at next non-hex character.
121            let simm = match hex.len() {
122                1 | 2 => hex_print_signed_imm!(hex, u8 => i8),
123                4 => hex_print_signed_imm!(hex, u16 => i16),
124                8 => hex_print_signed_imm!(hex, u32 => i32),
125                16 => hex_print_signed_imm!(hex, u64 => i64),
126                _ => panic!("unexpected length for hex: {hex}"),
127            };
128            format!("{prefix}{simm}{rest}").into()
129        }
130    }
131}
132
133// See `replace_signed_immediates`.
134fn chomp<'a>(pat: &str, s: &'a str) -> (&'a str, &'a str) {
135    if s.starts_with(pat) {
136        s.split_at(pat.len())
137    } else {
138        ("", s)
139    }
140}
141
142#[test]
143fn replace() {
144    assert_eq!(
145        replace_signed_immediates("andl $0xffffff9a, %r11d"),
146        "andl $-0x66, %r11d"
147    );
148    assert_eq!(
149        replace_signed_immediates("xorq $0xffffffffffffffbc, 0x7f139ecc(%r9)"),
150        "xorq $-0x44, 0x7f139ecc(%r9)"
151    );
152    assert_eq!(
153        replace_signed_immediates("subl $0x3ca77a19, -0x1a030f40(%r14)"),
154        "subl $0x3ca77a19, -0x1a030f40(%r14)"
155    );
156}
157
158/// Fuzz-specific registers.
159///
160/// For the fuzzer, we do not need any fancy register types; see [`FuzzReg`].
161#[derive(Arbitrary, Debug)]
162pub struct FuzzRegs;
163
164impl Registers for FuzzRegs {
165    type ReadGpr = FuzzReg;
166    type ReadWriteGpr = FuzzReg;
167    type ReadXmm = FuzzReg;
168    type ReadWriteXmm = FuzzReg;
169}
170
171/// A simple `u8` register type for fuzzing only.
172#[derive(Clone, Copy, Debug)]
173pub struct FuzzReg(u8);
174
175impl<'a> Arbitrary<'a> for FuzzReg {
176    fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
177        Ok(Self::new(u.int_in_range(0..=15)?))
178    }
179}
180
181impl AsReg for FuzzReg {
182    fn new(enc: u8) -> Self {
183        Self(enc)
184    }
185    fn enc(&self) -> u8 {
186        self.0
187    }
188}
189
190impl Arbitrary<'_> for AmodeOffsetPlusKnownOffset {
191    fn arbitrary(u: &mut Unstructured<'_>) -> Result<Self> {
192        // For now, we don't generate offsets (TODO).
193        Ok(Self {
194            simm32: AmodeOffset::arbitrary(u)?,
195            offset: None,
196        })
197    }
198}
199impl<R: AsReg> Arbitrary<'_> for NonRspGpr<R> {
200    fn arbitrary(u: &mut Unstructured<'_>) -> Result<Self> {
201        use crate::gpr::enc::*;
202        let gpr = u.choose(&[
203            RAX, RCX, RDX, RBX, RBP, RSI, RDI, R8, R9, R10, R11, R12, R13, R14, R15,
204        ])?;
205        Ok(Self::new(R::new(*gpr)))
206    }
207}
208impl<'a, R: AsReg> Arbitrary<'a> for Gpr<R> {
209    fn arbitrary(u: &mut Unstructured<'a>) -> Result<Self> {
210        Ok(Self(R::new(u.int_in_range(0..=15)?)))
211    }
212}
213impl<'a, R: AsReg> Arbitrary<'a> for Xmm<R> {
214    fn arbitrary(u: &mut Unstructured<'a>) -> Result<Self> {
215        Ok(Self(R::new(u.int_in_range(0..=15)?)))
216    }
217}
218
219/// Helper trait that's used to be the same as `Registers` except with an extra
220/// `for<'a> Arbitrary<'a>` bound on all of the associated types.
221pub trait RegistersArbitrary:
222    Registers<
223    ReadGpr: for<'a> Arbitrary<'a>,
224    ReadWriteGpr: for<'a> Arbitrary<'a>,
225    ReadXmm: for<'a> Arbitrary<'a>,
226    ReadWriteXmm: for<'a> Arbitrary<'a>,
227>
228{
229}
230
231impl<R> RegistersArbitrary for R
232where
233    R: Registers,
234    R::ReadGpr: for<'a> Arbitrary<'a>,
235    R::ReadWriteGpr: for<'a> Arbitrary<'a>,
236    R::ReadXmm: for<'a> Arbitrary<'a>,
237    R::ReadWriteXmm: for<'a> Arbitrary<'a>,
238{
239}
240
241#[cfg(test)]
242mod test {
243    use super::*;
244    use arbtest::arbtest;
245    use std::sync::atomic::{AtomicUsize, Ordering};
246
247    #[test]
248    fn smoke() {
249        let count = AtomicUsize::new(0);
250        arbtest(|u| {
251            let inst: Inst<FuzzRegs> = u.arbitrary()?;
252            roundtrip(&inst);
253            println!("#{}: {inst}", count.fetch_add(1, Ordering::SeqCst));
254            Ok(())
255        })
256        .budget_ms(1_000);
257
258        // This will run the `roundtrip` fuzzer for one second. To repeatably
259        // test a single input, append `.seed(0x<failing seed>)`.
260    }
261}