Skip to main content

wasmtime_wizer/
lib.rs

1//! Wizer: the WebAssembly pre-initializer!
2//!
3//! See the [`Wizer`] struct for details.
4
5#![deny(missing_docs)]
6#![cfg_attr(docsrs, feature(doc_cfg))]
7
8mod info;
9mod instrument;
10mod parse;
11mod rewrite;
12mod snapshot;
13
14#[cfg(feature = "wasmtime")]
15mod wasmtime;
16#[cfg(feature = "wasmtime")]
17pub use wasmtime::*;
18#[cfg(feature = "component-model")]
19mod component;
20#[cfg(feature = "component-model")]
21pub use component::*;
22#[cfg(not(feature = "rayon"))]
23mod rayoff;
24
25pub use crate::info::ModuleContext;
26pub use crate::snapshot::SnapshotVal;
27use ::wasmtime::{Result, bail, error::Context as _};
28use std::collections::{HashMap, HashSet};
29pub use wasmparser::ValType;
30
31const DEFAULT_KEEP_INIT_FUNC: bool = false;
32
33/// Wizer: the WebAssembly pre-initializer!
34///
35/// Don't wait for your Wasm module to initialize itself, pre-initialize it!
36/// Wizer instantiates your WebAssembly module, executes its initialization
37/// function, and then serializes the instance's initialized state out into a
38/// new WebAssembly module. Now you can use this new, pre-initialized
39/// WebAssembly module to hit the ground running, without making your users wait
40/// for that first-time set up code to complete.
41///
42/// ## Caveats
43///
44/// * The initialization function may not call any imported functions. Doing so
45///   will trigger a trap and `wizer` will exit.
46///
47/// * The Wasm module may not import globals, tables, or memories.
48///
49/// * Reference types are not supported yet. This is tricky because it would
50///   allow the Wasm module to mutate tables, and we would need to be able to
51///   snapshot the new table state, but funcrefs and externrefs don't have
52///   identity and aren't comparable in the Wasm spec, which makes snapshotting
53///   difficult.
54#[derive(Clone, Debug)]
55#[cfg_attr(feature = "clap", derive(clap::Parser))]
56pub struct Wizer {
57    /// The Wasm export name of the function that should be executed to
58    /// initialize the Wasm module.
59    #[cfg_attr(
60        feature = "clap",
61        arg(short = 'f', long, default_value = "wizer-initialize")
62    )]
63    init_func: String,
64
65    /// Any function renamings to perform.
66    ///
67    /// A renaming specification `dst=src` renames a function export `src` to
68    /// `dst`, overwriting any previous `dst` export.
69    ///
70    /// Multiple renamings can be specified. It is an error to specify more than
71    /// one source to rename to a destination name, or to specify more than one
72    /// renaming destination for one source.
73    ///
74    /// This option can be used, for example, to replace a `_start` entry point
75    /// in an initialized module with an alternate entry point.
76    ///
77    /// When module linking is enabled, these renames are only applied to the
78    /// outermost module.
79    #[cfg_attr(
80        feature = "clap",
81        arg(
82            short = 'r',
83            long = "rename-func",
84            alias = "func-rename",
85            value_name = "dst=src",
86            value_parser = parse_rename,
87        ),
88    )]
89    func_renames: Vec<(String, String)>,
90
91    /// After initialization, should the Wasm module still export the
92    /// initialization function?
93    ///
94    /// This is `false` by default, meaning that the initialization function is
95    /// no longer exported from the Wasm module.
96    #[cfg_attr(
97        feature = "clap",
98        arg(long, require_equals = true, value_name = "true|false")
99    )]
100    keep_init_func: Option<Option<bool>>,
101}
102
103#[cfg(feature = "clap")]
104fn parse_rename(s: &str) -> Result<(String, String)> {
105    let parts: Vec<&str> = s.splitn(2, '=').collect();
106    if parts.len() != 2 {
107        bail!("must contain exactly one equals character ('=')");
108    }
109    Ok((parts[0].into(), parts[1].into()))
110}
111
112#[derive(Default)]
113struct FuncRenames {
114    /// For a given export name that we encounter in the original module, a map
115    /// to a new name, if any, to emit in the output module.
116    rename_src_to_dst: HashMap<String, String>,
117    /// A set of export names that we ignore in the original module (because
118    /// they are overwritten by renamings).
119    rename_dsts: HashSet<String>,
120}
121
122impl FuncRenames {
123    fn parse(renames: &[(String, String)]) -> Result<FuncRenames> {
124        let mut ret = FuncRenames {
125            rename_src_to_dst: HashMap::new(),
126            rename_dsts: HashSet::new(),
127        };
128        if renames.is_empty() {
129            return Ok(ret);
130        }
131
132        for (dst, src) in renames {
133            if ret.rename_dsts.contains(dst) {
134                bail!("Duplicated function rename dst {dst}");
135            }
136            if ret.rename_src_to_dst.contains_key(src) {
137                bail!("Duplicated function rename src {src}");
138            }
139            ret.rename_dsts.insert(dst.clone());
140            ret.rename_src_to_dst.insert(src.clone(), dst.clone());
141        }
142
143        Ok(ret)
144    }
145}
146
147impl Wizer {
148    /// Construct a new `Wizer` builder.
149    pub fn new() -> Self {
150        Wizer {
151            init_func: "wizer-initialize".to_string(),
152            func_renames: vec![],
153            keep_init_func: None,
154        }
155    }
156
157    /// The export name of the initializer function.
158    ///
159    /// Defaults to `"wizer-initialize"`.
160    pub fn init_func(&mut self, init_func: impl Into<String>) -> &mut Self {
161        self.init_func = init_func.into();
162        self
163    }
164
165    /// Returns the initialization function that will be run for wizer.
166    pub fn get_init_func(&self) -> &str {
167        &self.init_func
168    }
169
170    /// Add a function rename to perform.
171    pub fn func_rename(&mut self, new_name: &str, old_name: &str) -> &mut Self {
172        self.func_renames
173            .push((new_name.to_string(), old_name.to_string()));
174        self
175    }
176
177    /// After initialization, should the Wasm module still export the
178    /// initialization function?
179    ///
180    /// This is `false` by default, meaning that the initialization function is
181    /// no longer exported from the Wasm module.
182    pub fn keep_init_func(&mut self, keep: bool) -> &mut Self {
183        self.keep_init_func = Some(Some(keep));
184        self
185    }
186
187    /// First half of [`Self::run`] which instruments the provided `wasm` and
188    /// produces a new wasm module which should be run by a runtime.
189    ///
190    /// After the returned wasm is executed the context returned here and the
191    /// state of the instance should be passed to [`Self::snapshot`].
192    pub fn instrument<'a>(&self, wasm: &'a [u8]) -> Result<(ModuleContext<'a>, Vec<u8>)> {
193        // Make sure we're given valid Wasm from the get go.
194        self.wasm_validate(&wasm)?;
195
196        let mut cx = parse::parse(wasm)?;
197
198        // When wizening core modules directly some imports aren't supported,
199        // so check for those here.
200        for import in cx.imports() {
201            match import.ty {
202                wasmparser::TypeRef::Global(_) => {
203                    bail!("imported globals are not supported")
204                }
205                wasmparser::TypeRef::Table(_) => {
206                    bail!("imported tables are not supported")
207                }
208                wasmparser::TypeRef::Memory(_) => {
209                    bail!("imported memories are not supported")
210                }
211                wasmparser::TypeRef::Func(_) => {}
212                wasmparser::TypeRef::FuncExact(_) => {}
213                wasmparser::TypeRef::Tag(_) => {}
214            }
215        }
216
217        let instrumented_wasm = instrument::instrument(&mut cx);
218        self.debug_assert_valid_wasm(&instrumented_wasm, "instrumented module");
219
220        Ok((cx, instrumented_wasm))
221    }
222
223    /// Second half of [`Self::run`] which takes the [`ModuleContext`] returned
224    /// by [`Self::instrument`] and the state of the `instance` after it has
225    /// possibly executed its initialization function.
226    ///
227    /// This returns a new WebAssembly binary which has all state
228    /// pre-initialized.
229    pub async fn snapshot(
230        &self,
231        mut cx: ModuleContext<'_>,
232        instance: &mut impl InstanceState,
233    ) -> Result<Vec<u8>> {
234        // Parse rename spec.
235        let renames = FuncRenames::parse(&self.func_renames)?;
236
237        let snapshot = snapshot::snapshot(&cx, instance).await;
238        let rewritten_wasm = self.rewrite(&mut cx, &snapshot, &renames, true);
239
240        self.debug_assert_valid_wasm(&rewritten_wasm, "rewritten module");
241
242        Ok(rewritten_wasm)
243    }
244
245    fn debug_assert_valid_wasm(&self, wasm: &[u8], context: &str) {
246        if !cfg!(debug_assertions) {
247            return;
248        }
249        if let Err(error) = self.wasm_validate(&wasm) {
250            #[cfg(feature = "wasmprinter")]
251            let wat = wasmprinter::print_bytes(&wasm)
252                .unwrap_or_else(|e| format!("Disassembling to WAT failed: {e}"));
253            #[cfg(not(feature = "wasmprinter"))]
254            let wat = "`wasmprinter` cargo feature is not enabled".to_string();
255
256            let wat = if wat.len() > 16 * 1024 {
257                std::fs::write("invalid.wat", wat).expect("writing to invalid.wat");
258                "written to invalid.wat"
259            } else {
260                &wat
261            };
262            panic!("{context} is not valid wasm: {error:?}\n\nWAT:\n{wat}");
263        }
264    }
265
266    fn wasm_validate(&self, wasm: &[u8]) -> Result<()> {
267        log::debug!("Validating input Wasm");
268
269        wasmparser::Validator::new_with_features(wasmparser::WasmFeatures::all())
270            .validate_all(wasm)
271            .context("wasm validation failed")?;
272
273        for payload in wasmparser::Parser::new(0).parse_all(wasm) {
274            match payload? {
275                wasmparser::Payload::CodeSectionEntry(code) => {
276                    let mut ops = code.get_operators_reader()?;
277                    while !ops.eof() {
278                        match ops.read()? {
279                            // Table mutations aren't allowed as wizer has no
280                            // way to record a snapshot of a table at this time.
281                            // The only table mutations allowed are those from
282                            // active element segments which can be
283                            // deterministically replayed, so disallow all other
284                            // forms of mutating a table.
285                            //
286                            // Ideally Wizer could take a snapshot of a table
287                            // post-instantiation and then ensure that after
288                            // running initialization the table didn't get
289                            // mutated, allowing these instructions, but that's
290                            // also not possible at this time.
291                            wasmparser::Operator::TableCopy { .. } => {
292                                bail!("unsupported `table.copy` instruction")
293                            }
294                            wasmparser::Operator::TableInit { .. } => {
295                                bail!("unsupported `table.init` instruction")
296                            }
297                            wasmparser::Operator::TableSet { .. } => {
298                                bail!("unsupported `table.set` instruction")
299                            }
300                            wasmparser::Operator::TableGrow { .. } => {
301                                bail!("unsupported `table.grow` instruction")
302                            }
303                            wasmparser::Operator::TableFill { .. } => {
304                                bail!("unsupported `table.fill` instruction")
305                            }
306
307                            // Wizer has no way of dynamically determining which
308                            // element or data segments were dropped during
309                            // execution so instead disallow these instructions
310                            // entirely. Like above it'd be nice to allow them
311                            // but just forbid their execution during the
312                            // initialization function, but that can't be done
313                            // easily at this time.
314                            wasmparser::Operator::ElemDrop { .. } => {
315                                bail!("unsupported `elem.drop` instruction")
316                            }
317                            wasmparser::Operator::DataDrop { .. } => {
318                                bail!("unsupported `data.drop` instruction")
319                            }
320
321                            // Wizer can't snapshot GC references, so disallow
322                            // any mutation of GC references. This prevents, for
323                            // example, reading something from a table and then
324                            // mutating it.
325                            wasmparser::Operator::StructSet { .. } => {
326                                bail!("unsupported `struct.set` instruction")
327                            }
328                            wasmparser::Operator::ArraySet { .. } => {
329                                bail!("unsupported `array.set` instruction")
330                            }
331                            wasmparser::Operator::ArrayFill { .. } => {
332                                bail!("unsupported `array.fill` instruction")
333                            }
334                            wasmparser::Operator::ArrayCopy { .. } => {
335                                bail!("unsupported `array.copy` instruction")
336                            }
337                            wasmparser::Operator::ArrayInitData { .. } => {
338                                bail!("unsupported `array.init_data` instruction")
339                            }
340                            wasmparser::Operator::ArrayInitElem { .. } => {
341                                bail!("unsupported `array.init_elem` instruction")
342                            }
343
344                            _ => continue,
345                        }
346                    }
347                }
348                wasmparser::Payload::GlobalSection(globals) => {
349                    for g in globals {
350                        let g = g?.ty;
351                        if !g.mutable {
352                            continue;
353                        }
354                        match g.content_type {
355                            wasmparser::ValType::I32
356                            | wasmparser::ValType::I64
357                            | wasmparser::ValType::F32
358                            | wasmparser::ValType::F64
359                            | wasmparser::ValType::V128 => {}
360                            wasmparser::ValType::Ref(_) => {
361                                bail!("unsupported mutable global containing a reference type")
362                            }
363                        }
364                    }
365                }
366                _ => {}
367            }
368        }
369
370        Ok(())
371    }
372
373    fn get_keep_init_func(&self) -> bool {
374        match self.keep_init_func {
375            Some(keep) => keep.unwrap_or(true),
376            None => DEFAULT_KEEP_INIT_FUNC,
377        }
378    }
379}
380
381/// Abstract ability to load state from a WebAssembly instance after it's been
382/// instantiated and some exports have run.
383pub trait InstanceState {
384    /// Loads the global specified by `name`, returning a `SnapshotVal`.
385    ///
386    /// # Panics
387    ///
388    /// This function panics if `name` isn't an exported global or if the type
389    /// of the global doesn't fit in `SnapshotVal`.
390    fn global_get(
391        &mut self,
392        name: &str,
393        type_hint: ValType,
394    ) -> impl Future<Output = SnapshotVal> + Send;
395
396    /// Loads the contents of the memory specified by `name`, returning the
397    /// entier contents as a `Vec<u8>`.
398    ///
399    /// # Panics
400    ///
401    /// This function panics if `name` isn't an exported memory.
402    fn memory_contents(
403        &mut self,
404        name: &str,
405        contents: impl FnOnce(&[u8]) + Send,
406    ) -> impl Future<Output = ()> + Send;
407}