1use std::string::{String, ToString};
8use std::vec::Vec;
9use std::{format, println};
10
11use crate::{
12 AmodeOffset, AmodeOffsetPlusKnownOffset, AsReg, CodeSink, DeferredTarget, Fixed, Gpr, Inst,
13 KnownOffset, NonRspGpr, Registers, TrapCode, Xmm,
14};
15use arbitrary::{Arbitrary, Result, Unstructured};
16use capstone::{Capstone, arch::BuildsCapstone, arch::BuildsCapstoneSyntax, arch::x86};
17
18pub fn roundtrip(inst: &Inst<FuzzRegs>) {
27 let assembled = assemble(inst);
29 let expected = disassemble(&assembled, inst);
30
31 let expected = expected.split_once(' ').unwrap().1;
34 let actual = inst.to_string();
35 if expected != actual && expected.trim() != fix_up(&actual) {
36 println!("> {inst}");
37 println!(" debug: {inst:x?}");
38 println!(" assembled: {}", pretty_print_hexadecimal(&assembled));
39 println!(" expected (capstone): {expected}");
40 println!(" actual (to_string): {actual}");
41 assert_eq!(expected, &actual);
42 }
43}
44
45fn assemble(inst: &Inst<FuzzRegs>) -> Vec<u8> {
50 let mut sink = TestCodeSink::default();
51 inst.encode(&mut sink);
52 sink.patch_labels_as_if_they_referred_to_end();
53 sink.buf
54}
55
56#[derive(Default)]
57struct TestCodeSink {
58 buf: Vec<u8>,
59 offsets_using_label: Vec<usize>,
60}
61
62impl TestCodeSink {
63 fn patch_labels_as_if_they_referred_to_end(&mut self) {
78 let len = i32::try_from(self.buf.len()).unwrap();
79 for offset in self.offsets_using_label.iter() {
80 let range = self.buf[*offset..].first_chunk_mut::<4>().unwrap();
81 let offset = i32::try_from(*offset).unwrap() + 4;
82 let rel_distance = len - offset;
83 *range = (i32::from_le_bytes(*range) + rel_distance).to_le_bytes();
84 }
85 }
86}
87
88impl CodeSink for TestCodeSink {
89 fn put1(&mut self, v: u8) {
90 self.buf.extend_from_slice(&[v]);
91 }
92
93 fn put2(&mut self, v: u16) {
94 self.buf.extend_from_slice(&v.to_le_bytes());
95 }
96
97 fn put4(&mut self, v: u32) {
98 self.buf.extend_from_slice(&v.to_le_bytes());
99 }
100
101 fn put8(&mut self, v: u64) {
102 self.buf.extend_from_slice(&v.to_le_bytes());
103 }
104
105 fn add_trap(&mut self, _: TrapCode) {}
106
107 fn use_target(&mut self, _: DeferredTarget) {
108 let offset = self.buf.len();
109 self.offsets_using_label.push(offset);
110 }
111
112 fn known_offset(&self, target: KnownOffset) -> i32 {
113 panic!("unsupported known target {target:?}")
114 }
115}
116
117fn disassemble(assembled: &[u8], original: &Inst<FuzzRegs>) -> String {
119 let cs = Capstone::new()
120 .x86()
121 .mode(x86::ArchMode::Mode64)
122 .syntax(x86::ArchSyntax::Att)
123 .detail(true)
124 .build()
125 .expect("failed to create Capstone object");
126 let insts = cs
127 .disasm_all(assembled, 0x0)
128 .expect("failed to disassemble");
129
130 if insts.len() != 1 {
131 println!("> {original}");
132 println!(" debug: {original:x?}");
133 println!(" assembled: {}", pretty_print_hexadecimal(&assembled));
134 assert_eq!(insts.len(), 1, "not a single instruction");
135 }
136
137 let inst = insts.first().expect("at least one instruction");
138 if assembled.len() != inst.len() {
139 println!("> {original}");
140 println!(" debug: {original:x?}");
141 println!(" assembled: {}", pretty_print_hexadecimal(&assembled));
142 println!(
143 " capstone-assembled: {}",
144 pretty_print_hexadecimal(inst.bytes())
145 );
146 assert_eq!(assembled.len(), inst.len(), "extra bytes not disassembled");
147 }
148
149 inst.to_string()
150}
151
152fn pretty_print_hexadecimal(hex: &[u8]) -> String {
153 use core::fmt::Write;
154 let mut s = String::with_capacity(hex.len() * 2);
155 for b in hex {
156 write!(&mut s, "{b:02X}").unwrap();
157 }
158 s
159}
160
161macro_rules! hex_print_signed_imm {
163 ($hex:expr, $from:ty => $to:ty) => {{
164 let imm = <$from>::from_str_radix($hex, 16).unwrap() as $to;
165 let mut simm = String::new();
166 if imm < 0 {
167 simm.push_str("-");
168 }
169 let abs = match imm.checked_abs() {
170 Some(i) => i,
171 None => <$to>::MIN,
172 };
173 if imm > -10 && imm < 10 {
174 simm.push_str(&format!("{:x}", abs));
175 } else {
176 simm.push_str(&format!("0x{:x}", abs));
177 }
178 simm
179 }};
180}
181
182fn replace_signed_immediates(dis: &str) -> alloc::borrow::Cow<'_, str> {
191 match dis.find('$') {
192 None => dis.into(),
193 Some(idx) => {
194 let (prefix, rest) = dis.split_at(idx + 1); let (_, rest) = chomp("-", rest); let (_, rest) = chomp("0x", rest); let n = rest.chars().take_while(char::is_ascii_hexdigit).count();
198 let (hex, rest) = rest.split_at(n); let simm = if dis.starts_with("mov") {
200 u64::from_str_radix(hex, 16).unwrap().to_string()
201 } else {
202 match hex.len() {
203 1 | 2 => hex_print_signed_imm!(hex, u8 => i8),
204 4 => hex_print_signed_imm!(hex, u16 => i16),
205 8 => hex_print_signed_imm!(hex, u32 => i32),
206 16 => hex_print_signed_imm!(hex, u64 => i64),
207 _ => panic!("unexpected length for hex: {hex}"),
208 }
209 };
210 format!("{prefix}{simm}{rest}").into()
211 }
212 }
213}
214
215fn chomp<'a>(pat: &str, s: &'a str) -> (&'a str, &'a str) {
217 if s.starts_with(pat) {
218 s.split_at(pat.len())
219 } else {
220 ("", s)
221 }
222}
223
224#[test]
225fn replace() {
226 assert_eq!(
227 replace_signed_immediates("andl $0xffffff9a, %r11d"),
228 "andl $-0x66, %r11d"
229 );
230 assert_eq!(
231 replace_signed_immediates("xorq $0xffffffffffffffbc, 0x7f139ecc(%r9)"),
232 "xorq $-0x44, 0x7f139ecc(%r9)"
233 );
234 assert_eq!(
235 replace_signed_immediates("subl $0x3ca77a19, -0x1a030f40(%r14)"),
236 "subl $0x3ca77a19, -0x1a030f40(%r14)"
237 );
238 assert_eq!(
239 replace_signed_immediates("movq $0xffffffff864ae103, %rsi"),
240 "movq $18446744071667638531, %rsi"
241 );
242}
243
244fn remove_after_semicolon(dis: &str) -> &str {
248 match dis.find(';') {
249 None => dis,
250 Some(idx) => {
251 let (prefix, _) = dis.split_at(idx);
252 prefix.trim()
253 }
254 }
255}
256
257#[test]
258fn remove_after_parenthesis_test() {
259 assert_eq!(
260 remove_after_semicolon("imulb 0x7658eddd(%rcx) ;; implicit: %ax"),
261 "imulb 0x7658eddd(%rcx)"
262 );
263}
264
265fn fix_up(dis: &str) -> alloc::borrow::Cow<'_, str> {
267 let dis = remove_after_semicolon(dis);
268 replace_signed_immediates(&dis)
269}
270
271#[derive(Clone, Arbitrary, Debug)]
275pub struct FuzzRegs;
276
277impl Registers for FuzzRegs {
278 type ReadGpr = FuzzReg;
279 type ReadWriteGpr = FuzzReg;
280 type WriteGpr = FuzzReg;
281 type ReadXmm = FuzzReg;
282 type ReadWriteXmm = FuzzReg;
283 type WriteXmm = FuzzReg;
284}
285
286#[derive(Clone, Copy, Debug, PartialEq)]
288pub struct FuzzReg(u8);
289
290impl<'a> Arbitrary<'a> for FuzzReg {
291 fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
292 Ok(Self(u.int_in_range(0..=15)?))
293 }
294}
295
296impl AsReg for FuzzReg {
297 fn new(enc: u8) -> Self {
298 Self(enc)
299 }
300 fn enc(&self) -> u8 {
301 self.0
302 }
303}
304
305impl Arbitrary<'_> for AmodeOffset {
306 fn arbitrary(u: &mut Unstructured<'_>) -> Result<Self> {
307 let base = if u.arbitrary()? {
313 i32::from(u.arbitrary::<i8>()?)
314 } else {
315 u.arbitrary::<i32>()?
316 };
317 Ok(match u.int_in_range(0..=5)? {
318 0 => AmodeOffset::ZERO,
319 n => AmodeOffset::new(base << (n - 1)),
320 })
321 }
322}
323
324impl Arbitrary<'_> for AmodeOffsetPlusKnownOffset {
325 fn arbitrary(u: &mut Unstructured<'_>) -> Result<Self> {
326 Ok(Self {
328 simm32: AmodeOffset::arbitrary(u)?,
329 offset: None,
330 })
331 }
332}
333
334impl<R: AsReg, const E: u8> Arbitrary<'_> for Fixed<R, E> {
335 fn arbitrary(_: &mut Unstructured<'_>) -> Result<Self> {
336 Ok(Self::new(E))
337 }
338}
339
340impl<R: AsReg> Arbitrary<'_> for NonRspGpr<R> {
341 fn arbitrary(u: &mut Unstructured<'_>) -> Result<Self> {
342 use crate::gpr::enc::*;
343 let gpr = u.choose(&[
344 RAX, RCX, RDX, RBX, RBP, RSI, RDI, R8, R9, R10, R11, R12, R13, R14, R15,
345 ])?;
346 Ok(Self::new(R::new(*gpr)))
347 }
348}
349impl<'a, R: AsReg> Arbitrary<'a> for Gpr<R> {
350 fn arbitrary(u: &mut Unstructured<'a>) -> Result<Self> {
351 Ok(Self(R::new(u.int_in_range(0..=15)?)))
352 }
353}
354impl<'a, R: AsReg> Arbitrary<'a> for Xmm<R> {
355 fn arbitrary(u: &mut Unstructured<'a>) -> Result<Self> {
356 Ok(Self(R::new(u.int_in_range(0..=15)?)))
357 }
358}
359
360pub trait RegistersArbitrary:
363 Registers<
364 ReadGpr: for<'a> Arbitrary<'a>,
365 ReadWriteGpr: for<'a> Arbitrary<'a>,
366 WriteGpr: for<'a> Arbitrary<'a>,
367 ReadXmm: for<'a> Arbitrary<'a>,
368 ReadWriteXmm: for<'a> Arbitrary<'a>,
369 WriteXmm: for<'a> Arbitrary<'a>,
370 >
371{
372}
373
374impl<R> RegistersArbitrary for R
375where
376 R: Registers,
377 R::ReadGpr: for<'a> Arbitrary<'a>,
378 R::ReadWriteGpr: for<'a> Arbitrary<'a>,
379 R::WriteGpr: for<'a> Arbitrary<'a>,
380 R::ReadXmm: for<'a> Arbitrary<'a>,
381 R::ReadWriteXmm: for<'a> Arbitrary<'a>,
382 R::WriteXmm: for<'a> Arbitrary<'a>,
383{
384}
385
386#[cfg(test)]
387mod test {
388 use super::*;
389 use arbtest::arbtest;
390 use std::sync::atomic::{AtomicUsize, Ordering};
391
392 #[test]
393 fn smoke() {
394 let count = AtomicUsize::new(0);
395 arbtest(|u| {
396 let inst: Inst<FuzzRegs> = u.arbitrary()?;
397 roundtrip(&inst);
398 println!("#{}: {inst}", count.fetch_add(1, Ordering::SeqCst));
399 Ok(())
400 })
401 .budget_ms(1_000);
402
403 }
406
407 #[test]
408 fn callq() {
409 for i in -500..500 {
410 println!("immediate: {i}");
411 let inst = crate::inst::callq_d::new(i);
412 roundtrip(&inst.into());
413 }
414 }
415}