wasmtime_fuzzing/generators/
stacks.rs

1//! Generate a Wasm program that keeps track of its current stack frames.
2//!
3//! We can then compare the stack trace we observe in Wasmtime to what the Wasm
4//! program believes its stack should be. Any discrepancies between the two
5//! points to a bug in either this test case generator or Wasmtime's stack
6//! walker.
7
8use std::mem;
9
10use arbitrary::{Arbitrary, Result, Unstructured};
11use wasm_encoder::{Instruction, ValType};
12
13const MAX_FUNCS: u32 = 20;
14const MAX_OPS: usize = 1_000;
15const MAX_PARAMS: usize = 10;
16
17/// Generate a Wasm module that keeps track of its current call stack, to
18/// compare to the host.
19#[derive(Debug)]
20pub struct Stacks {
21    funcs: Vec<Function>,
22    inputs: Vec<u8>,
23}
24
25#[derive(Debug, Default)]
26struct Function {
27    ops: Vec<Op>,
28    params: usize,
29    results: usize,
30}
31
32#[derive(Debug, Clone, Copy)]
33enum Op {
34    CheckStackInHost,
35    Call(u32),
36    CallThroughHost(u32),
37    ReturnCall(u32),
38}
39
40impl<'a> Arbitrary<'a> for Stacks {
41    fn arbitrary(u: &mut Unstructured<'a>) -> Result<Self> {
42        let funcs = Self::arbitrary_funcs(u)?;
43        let n = u.len().min(200);
44        let inputs = u.bytes(n)?.to_vec();
45        Ok(Stacks { funcs, inputs })
46    }
47}
48
49impl Stacks {
50    fn arbitrary_funcs(u: &mut Unstructured) -> Result<Vec<Function>> {
51        // Generate a list of functions first with a number of parameters and
52        // results. Bodies are generated afterwards.
53        let nfuncs = u.int_in_range(1..=MAX_FUNCS)?;
54        let mut funcs = (0..nfuncs)
55            .map(|_| {
56                Ok(Function {
57                    ops: Vec::new(), // generated later
58                    params: u.int_in_range(0..=MAX_PARAMS)?,
59                    results: u.int_in_range(0..=MAX_PARAMS)?,
60                })
61            })
62            .collect::<Result<Vec<_>>>()?;
63        let mut funcs_by_result = vec![Vec::new(); MAX_PARAMS + 1];
64        for (i, func) in funcs.iter().enumerate() {
65            funcs_by_result[func.results].push(i as u32);
66        }
67
68        // Fill in each function body with various instructions/operations now
69        // that the set of functions is known.
70        for f in funcs.iter_mut() {
71            let funcs_with_same_results = &funcs_by_result[f.results];
72            for _ in 0..u.arbitrary_len::<usize>()?.min(MAX_OPS) {
73                let op = match u.int_in_range(0..=3)? {
74                    0 => Op::CheckStackInHost,
75                    1 => Op::Call(u.int_in_range(0..=nfuncs - 1)?),
76                    2 => Op::CallThroughHost(u.int_in_range(0..=nfuncs - 1)?),
77                    // This only works if the target function has the same
78                    // number of results, so choose from a different set here.
79                    3 => Op::ReturnCall(*u.choose(funcs_with_same_results)?),
80                    _ => unreachable!(),
81                };
82                f.ops.push(op);
83                // once a `return_call` has been generated there's no need to
84                // generate any more instructions, so fall through to below.
85                if let Some(Op::ReturnCall(_)) = f.ops.last() {
86                    break;
87                }
88            }
89        }
90
91        Ok(funcs)
92    }
93
94    /// Get the input values to run the Wasm module with.
95    pub fn inputs(&self) -> &[u8] {
96        &self.inputs
97    }
98
99    /// Get this test case's Wasm module.
100    ///
101    /// The Wasm module has the following imports:
102    ///
103    /// * `host.check_stack: [] -> []`: The host can check the Wasm's
104    ///   understanding of its own stack against the host's understanding of the
105    ///   Wasm stack to find discrepancy bugs.
106    ///
107    /// * `host.call_func: [funcref] -> []`: The host should call the given
108    ///   `funcref`, creating a call stack with multiple sequences of contiguous
109    ///   Wasm frames on the stack like `[..., wasm, host, wasm]`.
110    ///
111    /// The Wasm module has the following exports:
112    ///
113    /// * `run: [i32] -> []`: This function should be called with each of the
114    ///   input values to run this generated test case.
115    ///
116    /// * `get_stack: [] -> [i32 i32]`: Get the pointer and length of the `u32`
117    ///   array of this Wasm's understanding of its stack. This is useful for
118    ///   checking whether the host's view of the stack at a trap matches the
119    ///   Wasm program's understanding.
120    pub fn wasm(&self) -> Vec<u8> {
121        let mut module = wasm_encoder::Module::new();
122
123        let mut types = wasm_encoder::TypeSection::new();
124
125        let run_type = types.len();
126        types
127            .ty()
128            .function(vec![wasm_encoder::ValType::I32], vec![]);
129
130        let get_stack_type = types.len();
131        types.ty().function(
132            vec![],
133            vec![wasm_encoder::ValType::I32, wasm_encoder::ValType::I32],
134        );
135
136        let call_func_type = types.len();
137        types
138            .ty()
139            .function(vec![wasm_encoder::ValType::FUNCREF], vec![]);
140
141        let check_stack_type = types.len();
142        types.ty().function(vec![], vec![]);
143
144        let func_types_start = types.len();
145        for func in self.funcs.iter() {
146            types.ty().function(
147                vec![ValType::I32; func.params],
148                vec![ValType::I32; func.results],
149            );
150        }
151
152        section(&mut module, types);
153
154        let mut imports = wasm_encoder::ImportSection::new();
155        let check_stack_func = 0;
156        imports.import(
157            "host",
158            "check_stack",
159            wasm_encoder::EntityType::Function(check_stack_type),
160        );
161        let call_func_func = 1;
162        imports.import(
163            "host",
164            "call_func",
165            wasm_encoder::EntityType::Function(call_func_type),
166        );
167        let num_imported_funcs = 2;
168        section(&mut module, imports);
169
170        let mut funcs = wasm_encoder::FunctionSection::new();
171        for (i, _) in self.funcs.iter().enumerate() {
172            funcs.function(func_types_start + (i as u32));
173        }
174        let run_func = funcs.len() + num_imported_funcs;
175        funcs.function(run_type);
176        let get_stack_func = funcs.len() + num_imported_funcs;
177        funcs.function(get_stack_type);
178        section(&mut module, funcs);
179
180        let mut mems = wasm_encoder::MemorySection::new();
181        let memory = mems.len();
182        mems.memory(wasm_encoder::MemoryType {
183            minimum: 1,
184            maximum: Some(1),
185            memory64: false,
186            shared: false,
187            page_size_log2: None,
188        });
189        section(&mut module, mems);
190
191        let mut globals = wasm_encoder::GlobalSection::new();
192        let fuel_global = globals.len();
193        globals.global(
194            wasm_encoder::GlobalType {
195                val_type: wasm_encoder::ValType::I32,
196                mutable: true,
197                shared: false,
198            },
199            &wasm_encoder::ConstExpr::i32_const(0),
200        );
201        let stack_len_global = globals.len();
202        globals.global(
203            wasm_encoder::GlobalType {
204                val_type: wasm_encoder::ValType::I32,
205                mutable: true,
206                shared: false,
207            },
208            &wasm_encoder::ConstExpr::i32_const(0),
209        );
210        section(&mut module, globals);
211
212        let mut exports = wasm_encoder::ExportSection::new();
213        exports.export("run", wasm_encoder::ExportKind::Func, run_func);
214        exports.export("get_stack", wasm_encoder::ExportKind::Func, get_stack_func);
215        exports.export("memory", wasm_encoder::ExportKind::Memory, memory);
216        exports.export("fuel", wasm_encoder::ExportKind::Global, fuel_global);
217        section(&mut module, exports);
218
219        let mut elems = wasm_encoder::ElementSection::new();
220        elems.declared(wasm_encoder::Elements::Functions(
221            (0..num_imported_funcs + u32::try_from(self.funcs.len()).unwrap())
222                .collect::<Vec<_>>()
223                .into(),
224        ));
225        section(&mut module, elems);
226
227        let check_fuel = |body: &mut wasm_encoder::Function| {
228            // Trap if we are out of fuel.
229            body.instruction(&Instruction::GlobalGet(fuel_global))
230                .instruction(&Instruction::I32Eqz)
231                .instruction(&Instruction::If(wasm_encoder::BlockType::Empty))
232                .instruction(&Instruction::Unreachable)
233                .instruction(&Instruction::End);
234
235            // Decrement fuel.
236            body.instruction(&Instruction::GlobalGet(fuel_global))
237                .instruction(&Instruction::I32Const(1))
238                .instruction(&Instruction::I32Sub)
239                .instruction(&Instruction::GlobalSet(fuel_global));
240        };
241
242        let push_func_to_stack = |body: &mut wasm_encoder::Function, func: u32| {
243            // Add this function to our internal stack.
244            //
245            // Note that we know our `stack_len_global` can't go beyond memory
246            // bounds because we limit fuel to at most `u8::MAX` and each stack
247            // entry is an `i32` and `u8::MAX * size_of(i32)` still fits in one
248            // Wasm page.
249            body.instruction(&Instruction::GlobalGet(stack_len_global))
250                .instruction(&Instruction::I32Const(func as i32))
251                .instruction(&Instruction::I32Store(wasm_encoder::MemArg {
252                    offset: 0,
253                    align: 0,
254                    memory_index: memory,
255                }))
256                .instruction(&Instruction::GlobalGet(stack_len_global))
257                .instruction(&Instruction::I32Const(mem::size_of::<i32>() as i32))
258                .instruction(&Instruction::I32Add)
259                .instruction(&Instruction::GlobalSet(stack_len_global));
260        };
261
262        let pop_func_from_stack = |body: &mut wasm_encoder::Function| {
263            // Remove this function from our internal stack.
264            body.instruction(&Instruction::GlobalGet(stack_len_global))
265                .instruction(&Instruction::I32Const(mem::size_of::<i32>() as i32))
266                .instruction(&Instruction::I32Sub)
267                .instruction(&Instruction::GlobalSet(stack_len_global));
268        };
269
270        let push_params = |body: &mut wasm_encoder::Function, func: u32| {
271            let func = &self.funcs[func as usize];
272            for _ in 0..func.params {
273                body.instruction(&Instruction::I32Const(0));
274            }
275        };
276        let pop_results = |body: &mut wasm_encoder::Function, func: u32| {
277            let func = &self.funcs[func as usize];
278            for _ in 0..func.results {
279                body.instruction(&Instruction::Drop);
280            }
281        };
282        let push_results = |body: &mut wasm_encoder::Function, func: u32| {
283            let func = &self.funcs[func as usize];
284            for _ in 0..func.results {
285                body.instruction(&Instruction::I32Const(0));
286            }
287        };
288
289        let mut code = wasm_encoder::CodeSection::new();
290        for (func_index, func) in self.funcs.iter().enumerate() {
291            let mut body = wasm_encoder::Function::new(vec![]);
292
293            push_func_to_stack(
294                &mut body,
295                num_imported_funcs + u32::try_from(func_index).unwrap(),
296            );
297            check_fuel(&mut body);
298
299            let mut check_fuel_and_pop_at_end = true;
300
301            // Perform our specified operations.
302            for op in &func.ops {
303                assert!(check_fuel_and_pop_at_end);
304                match op {
305                    Op::CheckStackInHost => {
306                        body.instruction(&Instruction::Call(check_stack_func));
307                    }
308                    Op::Call(f) => {
309                        push_params(&mut body, *f);
310                        body.instruction(&Instruction::Call(f + num_imported_funcs));
311                        pop_results(&mut body, *f);
312                    }
313                    Op::CallThroughHost(f) => {
314                        body.instruction(&Instruction::RefFunc(f + num_imported_funcs))
315                            .instruction(&Instruction::Call(call_func_func));
316                    }
317
318                    // For a `return_call` preemptively check fuel to possibly
319                    // trap and then pop our function from the in-wasm managed
320                    // stack. After that execute the `return_call` itself.
321                    Op::ReturnCall(f) => {
322                        push_params(&mut body, *f);
323                        check_fuel(&mut body);
324                        pop_func_from_stack(&mut body);
325                        check_fuel_and_pop_at_end = false;
326                        body.instruction(&Instruction::ReturnCall(f + num_imported_funcs));
327                    }
328                }
329            }
330
331            // Potentially trap at the end of our function as well, so that we
332            // exercise the scenario where the Wasm-to-host trampoline
333            // initialized `last_wasm_exit_sp` et al when calling out to a host
334            // function, but then we returned back to Wasm and then trapped
335            // while `last_wasm_exit_sp` et al are still initialized from that
336            // previous host call.
337            if check_fuel_and_pop_at_end {
338                check_fuel(&mut body);
339                pop_func_from_stack(&mut body);
340                push_results(&mut body, func_index as u32);
341            }
342
343            function(&mut code, body);
344        }
345
346        let mut run_body = wasm_encoder::Function::new(vec![]);
347
348        // Reset the bump pointer for the internal stack (this allows us to
349        // reuse an instance in the oracle, rather than re-instantiate).
350        run_body
351            .instruction(&Instruction::I32Const(0))
352            .instruction(&Instruction::GlobalSet(stack_len_global));
353
354        // Initialize the fuel global.
355        run_body
356            .instruction(&Instruction::LocalGet(0))
357            .instruction(&Instruction::GlobalSet(fuel_global));
358
359        push_func_to_stack(&mut run_body, run_func);
360
361        // Make sure to check for out-of-fuel in the `run` function as well, so
362        // that we also capture stack traces with only one frame, not just `run`
363        // followed by the first locally-defined function and then zero or more
364        // extra frames.
365        check_fuel(&mut run_body);
366
367        // Call the first locally defined function.
368        push_params(&mut run_body, 0);
369        run_body.instruction(&Instruction::Call(num_imported_funcs));
370        pop_results(&mut run_body, 0);
371
372        check_fuel(&mut run_body);
373        pop_func_from_stack(&mut run_body);
374
375        function(&mut code, run_body);
376
377        let mut get_stack_body = wasm_encoder::Function::new(vec![]);
378        get_stack_body
379            .instruction(&Instruction::I32Const(0))
380            .instruction(&Instruction::GlobalGet(stack_len_global));
381        function(&mut code, get_stack_body);
382
383        section(&mut module, code);
384
385        return module.finish();
386
387        // Helper that defines a section in the module and takes ownership of it
388        // so that it is dropped and its memory reclaimed after adding it to the
389        // module.
390        fn section(module: &mut wasm_encoder::Module, section: impl wasm_encoder::Section) {
391            module.section(&section);
392        }
393
394        // Helper that defines a function body in the code section and takes
395        // ownership of it so that it is dropped and its memory reclaimed after
396        // adding it to the module.
397        fn function(code: &mut wasm_encoder::CodeSection, mut func: wasm_encoder::Function) {
398            func.instruction(&Instruction::End);
399            code.function(&func);
400        }
401    }
402}
403
404#[cfg(test)]
405mod tests {
406    use super::*;
407    use rand::prelude::*;
408    use wasmparser::Validator;
409
410    #[test]
411    fn stacks_generates_valid_wasm_modules() {
412        let mut rng = SmallRng::seed_from_u64(0);
413        let mut buf = vec![0; 2048];
414        for _ in 0..1024 {
415            rng.fill_bytes(&mut buf);
416            let u = Unstructured::new(&buf);
417            if let Ok(stacks) = Stacks::arbitrary_take_rest(u) {
418                let wasm = stacks.wasm();
419                validate(&wasm);
420            }
421        }
422    }
423
424    fn validate(wasm: &[u8]) {
425        let mut validator = Validator::new();
426        let err = match validator.validate_all(wasm) {
427            Ok(_) => return,
428            Err(e) => e,
429        };
430        drop(std::fs::write("test.wasm", wasm));
431        if let Ok(text) = wasmprinter::print_bytes(wasm) {
432            drop(std::fs::write("test.wat", &text));
433        }
434        panic!("wasm failed to validate: {err}");
435    }
436}