1use crate::{
8 AmodeOffset, AmodeOffsetPlusKnownOffset, AsReg, CodeSink, DeferredTarget, Fixed, Gpr, Inst,
9 KnownOffset, NonRspGpr, Registers, TrapCode, Xmm,
10};
11use arbitrary::{Arbitrary, Result, Unstructured};
12use capstone::{Capstone, arch::BuildsCapstone, arch::BuildsCapstoneSyntax, arch::x86};
13
14pub fn roundtrip(inst: &Inst<FuzzRegs>) {
23 let assembled = assemble(inst);
25 let expected = disassemble(&assembled, inst);
26
27 let expected = expected.split_once(' ').unwrap().1;
30 let actual = inst.to_string();
31 if expected != actual && expected.trim() != fix_up(&actual) {
32 println!("> {inst}");
33 println!(" debug: {inst:x?}");
34 println!(" assembled: {}", pretty_print_hexadecimal(&assembled));
35 println!(" expected (capstone): {expected}");
36 println!(" actual (to_string): {actual}");
37 assert_eq!(expected, &actual);
38 }
39}
40
41fn assemble(inst: &Inst<FuzzRegs>) -> Vec<u8> {
46 let mut sink = TestCodeSink::default();
47 inst.encode(&mut sink);
48 sink.patch_labels_as_if_they_referred_to_end();
49 sink.buf
50}
51
52#[derive(Default)]
53struct TestCodeSink {
54 buf: Vec<u8>,
55 offsets_using_label: Vec<usize>,
56}
57
58impl TestCodeSink {
59 fn patch_labels_as_if_they_referred_to_end(&mut self) {
74 let len = i32::try_from(self.buf.len()).unwrap();
75 for offset in self.offsets_using_label.iter() {
76 let range = self.buf[*offset..].first_chunk_mut::<4>().unwrap();
77 let offset = i32::try_from(*offset).unwrap() + 4;
78 let rel_distance = len - offset;
79 *range = (i32::from_le_bytes(*range) + rel_distance).to_le_bytes();
80 }
81 }
82}
83
84impl CodeSink for TestCodeSink {
85 fn put1(&mut self, v: u8) {
86 self.buf.extend_from_slice(&[v]);
87 }
88
89 fn put2(&mut self, v: u16) {
90 self.buf.extend_from_slice(&v.to_le_bytes());
91 }
92
93 fn put4(&mut self, v: u32) {
94 self.buf.extend_from_slice(&v.to_le_bytes());
95 }
96
97 fn put8(&mut self, v: u64) {
98 self.buf.extend_from_slice(&v.to_le_bytes());
99 }
100
101 fn add_trap(&mut self, _: TrapCode) {}
102
103 fn use_target(&mut self, _: DeferredTarget) {
104 let offset = self.buf.len();
105 self.offsets_using_label.push(offset);
106 }
107
108 fn known_offset(&self, target: KnownOffset) -> i32 {
109 panic!("unsupported known target {target:?}")
110 }
111}
112
113fn disassemble(assembled: &[u8], original: &Inst<FuzzRegs>) -> String {
115 let cs = Capstone::new()
116 .x86()
117 .mode(x86::ArchMode::Mode64)
118 .syntax(x86::ArchSyntax::Att)
119 .detail(true)
120 .build()
121 .expect("failed to create Capstone object");
122 let insts = cs
123 .disasm_all(assembled, 0x0)
124 .expect("failed to disassemble");
125
126 if insts.len() != 1 {
127 println!("> {original}");
128 println!(" debug: {original:x?}");
129 println!(" assembled: {}", pretty_print_hexadecimal(&assembled));
130 assert_eq!(insts.len(), 1, "not a single instruction");
131 }
132
133 let inst = insts.first().expect("at least one instruction");
134 if assembled.len() != inst.len() {
135 println!("> {original}");
136 println!(" debug: {original:x?}");
137 println!(" assembled: {}", pretty_print_hexadecimal(&assembled));
138 println!(
139 " capstone-assembled: {}",
140 pretty_print_hexadecimal(inst.bytes())
141 );
142 assert_eq!(assembled.len(), inst.len(), "extra bytes not disassembled");
143 }
144
145 inst.to_string()
146}
147
148fn pretty_print_hexadecimal(hex: &[u8]) -> String {
149 use std::fmt::Write;
150 let mut s = String::with_capacity(hex.len() * 2);
151 for b in hex {
152 write!(&mut s, "{b:02X}").unwrap();
153 }
154 s
155}
156
157macro_rules! hex_print_signed_imm {
159 ($hex:expr, $from:ty => $to:ty) => {{
160 let imm = <$from>::from_str_radix($hex, 16).unwrap() as $to;
161 let mut simm = String::new();
162 if imm < 0 {
163 simm.push_str("-");
164 }
165 let abs = match imm.checked_abs() {
166 Some(i) => i,
167 None => <$to>::MIN,
168 };
169 if imm > -10 && imm < 10 {
170 simm.push_str(&format!("{:x}", abs));
171 } else {
172 simm.push_str(&format!("0x{:x}", abs));
173 }
174 simm
175 }};
176}
177
178fn replace_signed_immediates(dis: &str) -> std::borrow::Cow<'_, str> {
187 match dis.find('$') {
188 None => dis.into(),
189 Some(idx) => {
190 let (prefix, rest) = dis.split_at(idx + 1); let (_, rest) = chomp("-", rest); let (_, rest) = chomp("0x", rest); let n = rest.chars().take_while(char::is_ascii_hexdigit).count();
194 let (hex, rest) = rest.split_at(n); let simm = if dis.starts_with("mov") {
196 u64::from_str_radix(hex, 16).unwrap().to_string()
197 } else {
198 match hex.len() {
199 1 | 2 => hex_print_signed_imm!(hex, u8 => i8),
200 4 => hex_print_signed_imm!(hex, u16 => i16),
201 8 => hex_print_signed_imm!(hex, u32 => i32),
202 16 => hex_print_signed_imm!(hex, u64 => i64),
203 _ => panic!("unexpected length for hex: {hex}"),
204 }
205 };
206 format!("{prefix}{simm}{rest}").into()
207 }
208 }
209}
210
211fn chomp<'a>(pat: &str, s: &'a str) -> (&'a str, &'a str) {
213 if s.starts_with(pat) {
214 s.split_at(pat.len())
215 } else {
216 ("", s)
217 }
218}
219
220#[test]
221fn replace() {
222 assert_eq!(
223 replace_signed_immediates("andl $0xffffff9a, %r11d"),
224 "andl $-0x66, %r11d"
225 );
226 assert_eq!(
227 replace_signed_immediates("xorq $0xffffffffffffffbc, 0x7f139ecc(%r9)"),
228 "xorq $-0x44, 0x7f139ecc(%r9)"
229 );
230 assert_eq!(
231 replace_signed_immediates("subl $0x3ca77a19, -0x1a030f40(%r14)"),
232 "subl $0x3ca77a19, -0x1a030f40(%r14)"
233 );
234 assert_eq!(
235 replace_signed_immediates("movq $0xffffffff864ae103, %rsi"),
236 "movq $18446744071667638531, %rsi"
237 );
238}
239
240fn remove_after_semicolon(dis: &str) -> &str {
244 match dis.find(';') {
245 None => dis,
246 Some(idx) => {
247 let (prefix, _) = dis.split_at(idx);
248 prefix.trim()
249 }
250 }
251}
252
253#[test]
254fn remove_after_parenthesis_test() {
255 assert_eq!(
256 remove_after_semicolon("imulb 0x7658eddd(%rcx) ;; implicit: %ax"),
257 "imulb 0x7658eddd(%rcx)"
258 );
259}
260
261fn fix_up(dis: &str) -> std::borrow::Cow<'_, str> {
263 let dis = remove_after_semicolon(dis);
264 replace_signed_immediates(&dis)
265}
266
267#[derive(Arbitrary, Debug)]
271pub struct FuzzRegs;
272
273impl Registers for FuzzRegs {
274 type ReadGpr = FuzzReg;
275 type ReadWriteGpr = FuzzReg;
276 type WriteGpr = FuzzReg;
277 type ReadXmm = FuzzReg;
278 type ReadWriteXmm = FuzzReg;
279 type WriteXmm = FuzzReg;
280}
281
282#[derive(Clone, Copy, Debug, PartialEq)]
284pub struct FuzzReg(u8);
285
286impl<'a> Arbitrary<'a> for FuzzReg {
287 fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
288 Ok(Self(u.int_in_range(0..=15)?))
289 }
290}
291
292impl AsReg for FuzzReg {
293 fn new(enc: u8) -> Self {
294 Self(enc)
295 }
296 fn enc(&self) -> u8 {
297 self.0
298 }
299}
300
301impl Arbitrary<'_> for AmodeOffsetPlusKnownOffset {
302 fn arbitrary(u: &mut Unstructured<'_>) -> Result<Self> {
303 Ok(Self {
305 simm32: AmodeOffset::arbitrary(u)?,
306 offset: None,
307 })
308 }
309}
310
311impl<R: AsReg, const E: u8> Arbitrary<'_> for Fixed<R, E> {
312 fn arbitrary(_: &mut Unstructured<'_>) -> Result<Self> {
313 Ok(Self::new(E))
314 }
315}
316
317impl<R: AsReg> Arbitrary<'_> for NonRspGpr<R> {
318 fn arbitrary(u: &mut Unstructured<'_>) -> Result<Self> {
319 use crate::gpr::enc::*;
320 let gpr = u.choose(&[
321 RAX, RCX, RDX, RBX, RBP, RSI, RDI, R8, R9, R10, R11, R12, R13, R14, R15,
322 ])?;
323 Ok(Self::new(R::new(*gpr)))
324 }
325}
326impl<'a, R: AsReg> Arbitrary<'a> for Gpr<R> {
327 fn arbitrary(u: &mut Unstructured<'a>) -> Result<Self> {
328 Ok(Self(R::new(u.int_in_range(0..=15)?)))
329 }
330}
331impl<'a, R: AsReg> Arbitrary<'a> for Xmm<R> {
332 fn arbitrary(u: &mut Unstructured<'a>) -> Result<Self> {
333 Ok(Self(R::new(u.int_in_range(0..=15)?)))
334 }
335}
336
337pub trait RegistersArbitrary:
340 Registers<
341 ReadGpr: for<'a> Arbitrary<'a>,
342 ReadWriteGpr: for<'a> Arbitrary<'a>,
343 WriteGpr: for<'a> Arbitrary<'a>,
344 ReadXmm: for<'a> Arbitrary<'a>,
345 ReadWriteXmm: for<'a> Arbitrary<'a>,
346 WriteXmm: for<'a> Arbitrary<'a>,
347 >
348{
349}
350
351impl<R> RegistersArbitrary for R
352where
353 R: Registers,
354 R::ReadGpr: for<'a> Arbitrary<'a>,
355 R::ReadWriteGpr: for<'a> Arbitrary<'a>,
356 R::WriteGpr: for<'a> Arbitrary<'a>,
357 R::ReadXmm: for<'a> Arbitrary<'a>,
358 R::ReadWriteXmm: for<'a> Arbitrary<'a>,
359 R::WriteXmm: for<'a> Arbitrary<'a>,
360{
361}
362
363#[cfg(test)]
364mod test {
365 use super::*;
366 use arbtest::arbtest;
367 use std::sync::atomic::{AtomicUsize, Ordering};
368
369 #[test]
370 fn smoke() {
371 let count = AtomicUsize::new(0);
372 arbtest(|u| {
373 let inst: Inst<FuzzRegs> = u.arbitrary()?;
374 roundtrip(&inst);
375 println!("#{}: {inst}", count.fetch_add(1, Ordering::SeqCst));
376 Ok(())
377 })
378 .budget_ms(1_000);
379
380 }
383
384 #[test]
385 fn callq() {
386 for i in -500..500 {
387 println!("immediate: {i}");
388 let inst = crate::inst::callq_d::new(i);
389 roundtrip(&inst.into());
390 }
391 }
392}