pulley_interpreter/
disas.rs

1//! Disassembly support for pulley bytecode.
2
3use crate::decode::*;
4use crate::imms::*;
5use crate::regs::*;
6use alloc::string::String;
7use core::fmt::Write;
8
9/// A Pulley bytecode disassembler.
10///
11/// This is implemented as an `OpVisitor`, where you pass a `Disassembler` to a
12/// `Decoder` in order to disassemble instructions from a bytecode stream.
13///
14/// Alternatively, you can use the `Disassembler::disassemble_all` method to
15/// disassemble a complete bytecode stream.
16pub struct Disassembler<'a> {
17    raw_bytecode: &'a [u8],
18    bytecode: SafeBytecodeStream<'a>,
19    disas: String,
20    start_offset: usize,
21    start: usize,
22    temp: String,
23    offsets: bool,
24    hexdump: bool,
25    br_tables: bool,
26}
27
28impl<'a> Disassembler<'a> {
29    /// Disassemble every instruction in the given bytecode stream.
30    pub fn disassemble_all(bytecode: &'a [u8]) -> Result<String> {
31        let mut disas = Self::new(bytecode);
32        Decoder::decode_all(&mut disas)?;
33        Ok(disas.disas)
34    }
35
36    /// Create a new `Disassembler` that can be used to incrementally
37    /// disassemble instructions from the given bytecode stream.
38    pub fn new(bytecode: &'a [u8]) -> Self {
39        Self {
40            raw_bytecode: bytecode,
41            bytecode: SafeBytecodeStream::new(bytecode),
42            disas: String::new(),
43            start: 0,
44            start_offset: 0,
45            temp: String::new(),
46            offsets: true,
47            hexdump: true,
48            br_tables: true,
49        }
50    }
51
52    /// Whether to prefix each instruction's disassembly with its offset.
53    ///
54    /// True by default.
55    pub fn offsets(&mut self, offsets: bool) -> &mut Self {
56        self.offsets = offsets;
57        self
58    }
59
60    /// Whether to include a hexdump of the bytecode in the disassembly.
61    ///
62    /// True by default.
63    pub fn hexdump(&mut self, hexdump: bool) -> &mut Self {
64        self.hexdump = hexdump;
65        self
66    }
67
68    /// Whether to include branche tables in the disassembly.
69    ///
70    /// True by default.
71    pub fn br_tables(&mut self, enable: bool) -> &mut Self {
72        self.br_tables = enable;
73        self
74    }
75
76    /// Configures the offset that this function starts from, if it doesn't
77    /// start from 0.
78    ///
79    /// This can possibly be useful when a single function at a time is being
80    /// disassembled.
81    pub fn start_offset(&mut self, offset: usize) -> &mut Self {
82        self.start_offset = offset;
83        self
84    }
85
86    /// Get the disassembly thus far.
87    pub fn disas(&self) -> &str {
88        &self.disas
89    }
90
91    fn disas_op(&mut self, mnemonic: &str, operands: &[&dyn Disas]) {
92        write!(&mut self.temp, "{mnemonic}").unwrap();
93        for (i, val) in operands.iter().enumerate() {
94            if i > 0 {
95                write!(&mut self.temp, ",").unwrap();
96            }
97            write!(&mut self.temp, " ").unwrap();
98            val.disas(self.start + self.start_offset, &mut self.temp);
99        }
100    }
101
102    fn disas_br_table32(&mut self, reg: XReg, amt: u32) {
103        self.disas_op("br_table32", &[&reg, &amt]);
104        for _ in 0..amt {
105            self.after_visit();
106            self.start = self.bytecode.position();
107            if let Ok(offset) = PcRelOffset::decode(self.bytecode()) {
108                if self.br_tables {
109                    offset.disas(self.start + self.start_offset, &mut self.temp);
110                }
111            }
112        }
113    }
114}
115
116/// Anything inside an instruction that can be disassembled: registers,
117/// immediates, etc...
118trait Disas {
119    fn disas(&self, position: usize, disas: &mut String);
120}
121
122impl Disas for XReg {
123    fn disas(&self, _position: usize, disas: &mut String) {
124        write!(disas, "{self}").unwrap();
125    }
126}
127
128impl Disas for FReg {
129    fn disas(&self, _position: usize, disas: &mut String) {
130        write!(disas, "{self}").unwrap();
131    }
132}
133
134impl Disas for VReg {
135    fn disas(&self, _position: usize, disas: &mut String) {
136        write!(disas, "{self}").unwrap();
137    }
138}
139
140impl Disas for i8 {
141    fn disas(&self, _position: usize, disas: &mut String) {
142        write!(disas, "{self}").unwrap();
143    }
144}
145
146impl Disas for i16 {
147    fn disas(&self, _position: usize, disas: &mut String) {
148        write!(disas, "{self}").unwrap();
149    }
150}
151
152impl Disas for i32 {
153    fn disas(&self, _position: usize, disas: &mut String) {
154        write!(disas, "{self}").unwrap();
155    }
156}
157
158impl Disas for i64 {
159    fn disas(&self, _position: usize, disas: &mut String) {
160        write!(disas, "{self}").unwrap();
161    }
162}
163
164impl Disas for i128 {
165    fn disas(&self, _position: usize, disas: &mut String) {
166        write!(disas, "{self}").unwrap();
167    }
168}
169
170impl Disas for u8 {
171    fn disas(&self, _position: usize, disas: &mut String) {
172        write!(disas, "{self}").unwrap();
173    }
174}
175
176impl Disas for u16 {
177    fn disas(&self, _position: usize, disas: &mut String) {
178        write!(disas, "{self}").unwrap();
179    }
180}
181
182impl Disas for u32 {
183    fn disas(&self, _position: usize, disas: &mut String) {
184        write!(disas, "{self}").unwrap();
185    }
186}
187
188impl Disas for u64 {
189    fn disas(&self, _position: usize, disas: &mut String) {
190        write!(disas, "{self}").unwrap();
191    }
192}
193
194impl Disas for u128 {
195    fn disas(&self, _position: usize, disas: &mut String) {
196        write!(disas, "{self}").unwrap();
197    }
198}
199
200impl Disas for PcRelOffset {
201    fn disas(&self, position: usize, disas: &mut String) {
202        let offset = i64::from(i32::from(*self));
203        let target = (position as u64).wrapping_add(offset as u64);
204        let (prefix, offset) = if offset < 0 {
205            ("-", -offset)
206        } else {
207            ("", offset)
208        };
209        write!(disas, "{prefix}{offset:#x}    // target = {target:#x}").unwrap()
210    }
211}
212
213impl Disas for U6 {
214    fn disas(&self, _position: usize, disas: &mut String) {
215        write!(disas, "{}", u8::from(*self)).unwrap();
216    }
217}
218
219fn disas_list<T: Disas>(position: usize, disas: &mut String, iter: impl IntoIterator<Item = T>) {
220    let mut iter = iter.into_iter();
221    let Some(first) = iter.next() else { return };
222    first.disas(position, disas);
223
224    for item in iter {
225        write!(disas, ", ").unwrap();
226        item.disas(position, disas);
227    }
228}
229
230impl<D, S1, S2> Disas for BinaryOperands<D, S1, S2>
231where
232    D: Reg + Disas,
233    S1: Reg + Disas,
234    S2: Reg + Disas,
235{
236    fn disas(&self, position: usize, disas: &mut String) {
237        self.dst.disas(position, disas);
238        write!(disas, ", ").unwrap();
239        self.src1.disas(position, disas);
240        write!(disas, ", ").unwrap();
241        self.src2.disas(position, disas);
242    }
243}
244
245impl<D, S1> Disas for BinaryOperands<D, S1, U6>
246where
247    D: Reg + Disas,
248    S1: Reg + Disas,
249{
250    fn disas(&self, position: usize, disas: &mut String) {
251        self.dst.disas(position, disas);
252        write!(disas, ", ").unwrap();
253        self.src1.disas(position, disas);
254        write!(disas, ", ").unwrap();
255        self.src2.disas(position, disas);
256    }
257}
258
259impl<R: Reg + Disas> Disas for UpperRegSet<R> {
260    fn disas(&self, position: usize, disas: &mut String) {
261        disas_list(position, disas, *self)
262    }
263}
264
265impl Disas for AddrO32 {
266    fn disas(&self, position: usize, disas: &mut String) {
267        self.addr.disas(position, disas);
268        write!(disas, ", ").unwrap();
269        self.offset.disas(position, disas);
270    }
271}
272
273impl Disas for AddrZ {
274    fn disas(&self, position: usize, disas: &mut String) {
275        self.addr.disas(position, disas);
276        write!(disas, ", ").unwrap();
277        self.offset.disas(position, disas);
278    }
279}
280
281impl Disas for AddrG32 {
282    fn disas(&self, position: usize, disas: &mut String) {
283        self.host_heap_base.disas(position, disas);
284        write!(disas, ", ").unwrap();
285        self.host_heap_bound.disas(position, disas);
286        write!(disas, ", ").unwrap();
287        self.wasm_addr.disas(position, disas);
288        write!(disas, ", ").unwrap();
289        self.offset.disas(position, disas);
290    }
291}
292
293impl Disas for AddrG32Bne {
294    fn disas(&self, position: usize, disas: &mut String) {
295        self.host_heap_base.disas(position, disas);
296        write!(disas, ", *[").unwrap();
297        self.host_heap_bound_addr.disas(position, disas);
298        write!(disas, " + ").unwrap();
299        self.host_heap_bound_offset.disas(position, disas);
300        write!(disas, "], ").unwrap();
301        self.wasm_addr.disas(position, disas);
302        write!(disas, ", ").unwrap();
303        self.offset.disas(position, disas);
304    }
305}
306
307macro_rules! impl_disas {
308    (
309        $(
310            $( #[$attr:meta] )*
311                $snake_name:ident = $name:ident $( {
312                $(
313                    $( #[$field_attr:meta] )*
314                    $field:ident : $field_ty:ty
315                ),*
316            } )? ;
317        )*
318    ) => {
319        $(
320            impl_disas!(@one $snake_name = $name $( { $($field: $field_ty),* } )?);
321        )*
322    };
323
324    // Diassembling `br_table` is a bit special as it has trailing byte after
325    // the opcode of the branch table itself.
326    (
327        @one br_table32 = BrTable32 $( {
328            $(
329                $field:ident : $field_ty:ty
330            ),*
331        } )?
332    ) => {
333        fn br_table32(&mut self $( $( , $field : $field_ty )* )? ) {
334            self.disas_br_table32($($($field),*)?)
335        }
336    };
337
338    // All other opcodes other than `br_table` are handled in the same manner.
339    (
340        @one $snake_name:ident = $name:ident $( {
341            $(
342                $field:ident : $field_ty:ty
343            ),*
344        } )?
345    ) => {
346        fn $snake_name(&mut self $( $( , $field : $field_ty )* )? ) {
347            self.disas_op(stringify!($snake_name), &[$($(&$field),*)?])
348        }
349    };
350}
351
352impl<'a> OpVisitor for Disassembler<'a> {
353    type BytecodeStream = SafeBytecodeStream<'a>;
354
355    fn bytecode(&mut self) -> &mut Self::BytecodeStream {
356        &mut self.bytecode
357    }
358
359    type Return = ();
360
361    fn before_visit(&mut self) {
362        self.start = self.bytecode.position();
363    }
364
365    fn after_visit(&mut self) {
366        if self.offsets {
367            write!(&mut self.disas, "{:8x}: ", self.start + self.start_offset).unwrap();
368        }
369        if self.hexdump {
370            let size = self.bytecode.position() - self.start;
371            let mut need_space = false;
372            for byte in &self.raw_bytecode[self.start..][..size] {
373                let space = if need_space { " " } else { "" };
374                write!(&mut self.disas, "{space}{byte:02x}").unwrap();
375                need_space = true;
376            }
377            for _ in 0..12_usize.saturating_sub(size) {
378                write!(&mut self.disas, "   ").unwrap();
379            }
380        }
381        self.disas.push_str(&self.temp);
382        self.temp.clear();
383
384        self.disas.push('\n');
385    }
386
387    for_each_op!(impl_disas);
388}
389
390impl ExtendedOpVisitor for Disassembler<'_> {
391    for_each_extended_op!(impl_disas);
392}