cranelift_fuzzgen/
lib.rs

1use crate::config::Config;
2use crate::function_generator::FunctionGenerator;
3use crate::settings::{Flags, OptLevel};
4use anyhow::Result;
5use arbitrary::{Arbitrary, Unstructured};
6use cranelift::codegen::Context;
7use cranelift::codegen::data_value::DataValue;
8use cranelift::codegen::ir::{Function, LibCall};
9use cranelift::codegen::ir::{UserExternalName, UserFuncName};
10use cranelift::codegen::isa::Builder;
11use cranelift::prelude::isa::{OwnedTargetIsa, TargetIsa};
12use cranelift::prelude::settings::SettingKind;
13use cranelift::prelude::*;
14use cranelift_arbitrary::CraneliftArbitrary;
15use cranelift_native::builder_with_options;
16use rand::{Rng, SeedableRng, rngs::SmallRng};
17use target_isa_extras::TargetIsaExtras;
18use target_lexicon::Architecture;
19
20mod config;
21mod cranelift_arbitrary;
22mod function_generator;
23mod passes;
24mod print;
25mod target_isa_extras;
26
27pub use print::PrintableTestCase;
28
29pub type TestCaseInput = Vec<DataValue>;
30
31pub enum IsaFlagGen {
32    /// When generating ISA flags, ensure that they are all supported by
33    /// the current host.
34    Host,
35    /// All flags available in cranelift are allowed to be generated.
36    /// We also allow generating all possible values for each enum flag.
37    All,
38}
39
40pub struct FuzzGen<'r, 'data>
41where
42    'data: 'r,
43{
44    pub u: &'r mut Unstructured<'data>,
45    pub config: Config,
46}
47
48impl<'r, 'data> FuzzGen<'r, 'data>
49where
50    'data: 'r,
51{
52    pub fn new(u: &'r mut Unstructured<'data>) -> Self {
53        Self {
54            u,
55            config: Config::default(),
56        }
57    }
58
59    pub fn generate_signature(&mut self, isa: &dyn TargetIsa) -> Result<Signature> {
60        let max_params = self.u.int_in_range(self.config.signature_params.clone())?;
61        let max_rets = self.u.int_in_range(self.config.signature_rets.clone())?;
62        Ok(self.u.signature(
63            isa.supports_simd(),
64            isa.triple().architecture,
65            max_params,
66            max_rets,
67        )?)
68    }
69
70    pub fn generate_test_inputs(mut self, signature: &Signature) -> Result<Vec<TestCaseInput>> {
71        let mut inputs = Vec::new();
72
73        // Generate up to "max_test_case_inputs" inputs, we need an upper bound here since
74        // the fuzzer at some point starts trying to feed us way too many inputs. (I found one
75        // test case with 130k inputs!)
76        for _ in 0..self.config.max_test_case_inputs {
77            let last_len = self.u.len();
78
79            let test_args = signature
80                .params
81                .iter()
82                .map(|p| self.u.datavalue(p.value_type))
83                .collect::<Result<TestCaseInput>>()?;
84
85            inputs.push(test_args);
86
87            // Continue generating input as long as we just consumed some of self.u. Otherwise
88            // we'll generate the same test input again and again, forever. Note that once self.u
89            // becomes empty we obviously can't consume any more of it, so this check is more
90            // general. Also note that we need to generate at least one input or the fuzz target
91            // won't actually test anything, so checking at the end of the loop is good, even if
92            // self.u is empty from the start and we end up with all zeros in test_args.
93            assert!(self.u.len() <= last_len);
94            if self.u.len() == last_len {
95                break;
96            }
97        }
98
99        Ok(inputs)
100    }
101
102    fn run_func_passes(&mut self, func: Function, isa: &dyn TargetIsa) -> Result<Function> {
103        // Do a NaN Canonicalization pass on the generated function.
104        //
105        // Both IEEE754 and the Wasm spec are somewhat loose about what is allowed
106        // to be returned from NaN producing operations. And in practice this changes
107        // from X86 to Aarch64 and others. Even in the same host machine, the
108        // interpreter may produce a code sequence different from cranelift that
109        // generates different NaN's but produces legal results according to the spec.
110        //
111        // These differences cause spurious failures in the fuzzer. To fix this
112        // we enable the NaN Canonicalization pass that replaces any NaN's produced
113        // with a single fixed canonical NaN value.
114        //
115        // This is something that we can enable via flags for the compiled version, however
116        // the interpreter won't get that version, so call that pass manually here.
117
118        let mut ctx = Context::for_function(func);
119
120        // We disable the verifier here, since if it fails it prevents a test case from
121        // being generated and formatted by `cargo fuzz fmt`.
122        // We run the verifier before compiling the code, so it always gets verified.
123        let flags = settings::Flags::new({
124            let mut builder = settings::builder();
125            builder.set("enable_verifier", "false").unwrap();
126            builder
127        });
128
129        // Create a new TargetISA from the given ISA, this ensures that we copy all ISA
130        // flags, which may have an effect on the code generated by the passes below.
131        let isa = Builder::from_target_isa(isa)
132            .finish(flags)
133            .expect("Failed to build TargetISA");
134
135        // Finally run the NaN canonicalization pass
136        ctx.canonicalize_nans(isa.as_ref())
137            .expect("Failed NaN canonicalization pass");
138
139        // Run the int_divz pass
140        //
141        // This pass replaces divs and rems with sequences that do not trap
142        passes::do_int_divz_pass(self, &mut ctx.func)?;
143
144        // This pass replaces fcvt* instructions with sequences that do not trap
145        passes::do_fcvt_trap_pass(self, &mut ctx.func)?;
146
147        Ok(ctx.func)
148    }
149
150    pub fn generate_func(
151        &mut self,
152        name: UserFuncName,
153        isa: OwnedTargetIsa,
154        usercalls: Vec<(UserExternalName, Signature)>,
155        libcalls: Vec<LibCall>,
156    ) -> Result<Function> {
157        let sig = self.generate_signature(&*isa)?;
158
159        let func = FunctionGenerator::new(
160            &mut self.u,
161            &self.config,
162            isa.clone(),
163            name,
164            sig,
165            usercalls,
166            libcalls,
167        )
168        .generate()?;
169
170        self.run_func_passes(func, &*isa)
171    }
172
173    /// Generate a random set of cranelift flags.
174    /// Only semantics preserving flags are considered
175    pub fn generate_flags(&mut self, target_arch: Architecture) -> arbitrary::Result<Flags> {
176        let mut builder = settings::builder();
177
178        let opt = self.u.choose(OptLevel::all())?;
179        builder.set("opt_level", &format!("{opt}")[..]).unwrap();
180
181        // Boolean flags
182        // TODO: enable_pinned_reg does not work with our current trampolines. See: #4376
183        // TODO: is_pic has issues:
184        //   x86: https://github.com/bytecodealliance/wasmtime/issues/5005
185        //   aarch64: https://github.com/bytecodealliance/wasmtime/issues/2735
186        let bool_settings = [
187            "enable_alias_analysis",
188            "enable_safepoints",
189            "unwind_info",
190            "preserve_frame_pointers",
191            "enable_jump_tables",
192            "enable_heap_access_spectre_mitigation",
193            "enable_table_access_spectre_mitigation",
194            "enable_incremental_compilation_cache_checks",
195            "regalloc_checker",
196            "enable_llvm_abi_extensions",
197        ];
198        for flag_name in bool_settings {
199            let enabled = self
200                .config
201                .compile_flag_ratio
202                .get(&flag_name)
203                .map(|&(num, denum)| self.u.ratio(num, denum))
204                .unwrap_or_else(|| bool::arbitrary(self.u))?;
205
206            let value = format!("{enabled}");
207            builder.set(flag_name, value.as_str()).unwrap();
208        }
209
210        let supports_inline_probestack = match target_arch {
211            Architecture::X86_64 => true,
212            Architecture::Aarch64(_) => true,
213            Architecture::Riscv64(_) => true,
214            _ => false,
215        };
216
217        // Optionally test inline stackprobes on supported platforms
218        // TODO: Test outlined stack probes.
219        if supports_inline_probestack && bool::arbitrary(self.u)? {
220            builder.enable("enable_probestack").unwrap();
221            builder.set("probestack_strategy", "inline").unwrap();
222
223            let size = self
224                .u
225                .int_in_range(self.config.stack_probe_size_log2.clone())?;
226            builder
227                .set("probestack_size_log2", &format!("{size}"))
228                .unwrap();
229        }
230
231        // Generate random basic block padding
232        let bb_padding = self
233            .u
234            .int_in_range(self.config.bb_padding_log2_size.clone())
235            .unwrap();
236        builder
237            .set("bb_padding_log2_minus_one", &format!("{bb_padding}"))
238            .unwrap();
239
240        // Fixed settings
241
242        // We need llvm ABI extensions for i128 values on x86, so enable it regardless of
243        // what we picked above.
244        if target_arch == Architecture::X86_64 {
245            builder.enable("enable_llvm_abi_extensions").unwrap();
246        }
247
248        // FIXME(#9510) remove once this option is permanently disabled
249        builder.enable("enable_multi_ret_implicit_sret").unwrap();
250
251        // This is the default, but we should ensure that it wasn't accidentally turned off anywhere.
252        builder.enable("enable_verifier").unwrap();
253
254        // These settings just panic when they're not enabled and we try to use their respective functionality
255        // so they aren't very interesting to be automatically generated.
256        builder.enable("enable_atomics").unwrap();
257        builder.enable("enable_float").unwrap();
258
259        // `machine_code_cfg_info` generates additional metadata for the embedder but this doesn't feed back
260        // into compilation anywhere, we leave it on unconditionally to make sure the generation doesn't panic.
261        builder.enable("machine_code_cfg_info").unwrap();
262
263        // Differential fuzzing between the interpreter and the host will only
264        // really work if NaN payloads are canonicalized, so enable this.
265        builder.enable("enable_nan_canonicalization").unwrap();
266
267        Ok(Flags::new(builder))
268    }
269
270    /// Generate a random set of ISA flags and apply them to a Builder.
271    ///
272    /// Based on `mode` we can either allow all flags, or just the subset that is
273    /// supported by the current host.
274    ///
275    /// In all cases only a subset of the allowed flags is applied to the builder.
276    pub fn set_isa_flags(&mut self, builder: &mut Builder, mode: IsaFlagGen) -> Result<()> {
277        // `max_isa` is the maximal set of flags that we can use.
278        let max_builder = match mode {
279            IsaFlagGen::All => {
280                let mut max_builder = isa::lookup(builder.triple().clone())?;
281
282                for flag in max_builder.iter() {
283                    match flag.kind {
284                        SettingKind::Bool => {
285                            max_builder.enable(flag.name)?;
286                        }
287                        SettingKind::Enum => {
288                            // Since these are enums there isn't a "max" value per se, pick one at random.
289                            let value = self.u.choose(flag.values.unwrap())?;
290                            max_builder.set(flag.name, value)?;
291                        }
292                        SettingKind::Preset => {
293                            // Presets are just special flags that combine other flags, we don't
294                            // want to enable them directly, just the underlying flags.
295                        }
296                        _ => todo!(),
297                    };
298                }
299                max_builder
300            }
301            // Use `cranelift-native` to do feature detection for us.
302            IsaFlagGen::Host => builder_with_options(true)
303                .expect("Unable to build a TargetIsa for the current host"),
304        };
305        // Cranelift has a somewhat weird API for this, but we need to build the final `TargetIsa` to be able
306        // to extract the values for the ISA flags. We need that to use the `string_value()` that formats
307        // the values so that we can pass it into the builder again.
308        let max_isa = max_builder.finish(Flags::new(settings::builder()))?;
309
310        // We give each of the flags a chance of being copied over. Otherwise we
311        // keep the default. Note that a constant amount of data is taken from
312        // `self.u` as a seed for a `SmallRng` which is then transitively used
313        // to make decisions about what flags to include. This is done to ensure
314        // that the same test case generates similarly across different machines
315        // with different CPUs when `Host` is used above.
316        let mut rng = SmallRng::from_seed(self.u.arbitrary()?);
317        for value in max_isa.isa_flags().iter() {
318            if rng.random() {
319                continue;
320            }
321            builder.set(value.name, &value.value_string())?;
322        }
323
324        Ok(())
325    }
326}