wasmtime_wizer/lib.rs
1//! Wizer: the WebAssembly pre-initializer!
2//!
3//! See the [`Wizer`] struct for details.
4
5#![deny(missing_docs)]
6#![cfg_attr(docsrs, feature(doc_cfg))]
7
8mod info;
9mod instrument;
10mod parse;
11mod rewrite;
12mod snapshot;
13
14#[cfg(feature = "wasmtime")]
15mod wasmtime;
16#[cfg(feature = "wasmtime")]
17pub use wasmtime::*;
18#[cfg(feature = "component-model")]
19mod component;
20#[cfg(feature = "component-model")]
21pub use component::*;
22#[cfg(not(feature = "rayon"))]
23mod rayoff;
24
25pub use crate::info::ModuleContext;
26pub use crate::snapshot::SnapshotVal;
27use ::wasmtime::{Result, bail, error::Context as _};
28use std::collections::{HashMap, HashSet};
29pub use wasmparser::ValType;
30
31const DEFAULT_KEEP_INIT_FUNC: bool = false;
32
33/// Wizer: the WebAssembly pre-initializer!
34///
35/// Don't wait for your Wasm module to initialize itself, pre-initialize it!
36/// Wizer instantiates your WebAssembly module, executes its initialization
37/// function, and then serializes the instance's initialized state out into a
38/// new WebAssembly module. Now you can use this new, pre-initialized
39/// WebAssembly module to hit the ground running, without making your users wait
40/// for that first-time set up code to complete.
41///
42/// ## Caveats
43///
44/// * The initialization function may not call any imported functions. Doing so
45/// will trigger a trap and `wizer` will exit.
46///
47/// * The Wasm module may not import globals, tables, or memories.
48///
49/// * Reference types are not supported yet. This is tricky because it would
50/// allow the Wasm module to mutate tables, and we would need to be able to
51/// snapshot the new table state, but funcrefs and externrefs don't have
52/// identity and aren't comparable in the Wasm spec, which makes snapshotting
53/// difficult.
54#[derive(Clone, Debug)]
55#[cfg_attr(feature = "clap", derive(clap::Parser))]
56pub struct Wizer {
57 /// The Wasm export name of the function that should be executed to
58 /// initialize the Wasm module.
59 #[cfg_attr(
60 feature = "clap",
61 arg(short = 'f', long, default_value = "wizer-initialize")
62 )]
63 init_func: String,
64
65 /// Any function renamings to perform.
66 ///
67 /// A renaming specification `dst=src` renames a function export `src` to
68 /// `dst`, overwriting any previous `dst` export.
69 ///
70 /// Multiple renamings can be specified. It is an error to specify more than
71 /// one source to rename to a destination name, or to specify more than one
72 /// renaming destination for one source.
73 ///
74 /// This option can be used, for example, to replace a `_start` entry point
75 /// in an initialized module with an alternate entry point.
76 ///
77 /// When module linking is enabled, these renames are only applied to the
78 /// outermost module.
79 #[cfg_attr(
80 feature = "clap",
81 arg(
82 short = 'r',
83 long = "rename-func",
84 alias = "func-rename",
85 value_name = "dst=src",
86 value_parser = parse_rename,
87 ),
88 )]
89 func_renames: Vec<(String, String)>,
90
91 /// After initialization, should the Wasm module still export the
92 /// initialization function?
93 ///
94 /// This is `false` by default, meaning that the initialization function is
95 /// no longer exported from the Wasm module.
96 #[cfg_attr(
97 feature = "clap",
98 arg(long, require_equals = true, value_name = "true|false")
99 )]
100 keep_init_func: Option<Option<bool>>,
101}
102
103#[cfg(feature = "clap")]
104fn parse_rename(s: &str) -> Result<(String, String)> {
105 let parts: Vec<&str> = s.splitn(2, '=').collect();
106 if parts.len() != 2 {
107 bail!("must contain exactly one equals character ('=')");
108 }
109 Ok((parts[0].into(), parts[1].into()))
110}
111
112#[derive(Default)]
113struct FuncRenames {
114 /// For a given export name that we encounter in the original module, a map
115 /// to a new name, if any, to emit in the output module.
116 rename_src_to_dst: HashMap<String, String>,
117 /// A set of export names that we ignore in the original module (because
118 /// they are overwritten by renamings).
119 rename_dsts: HashSet<String>,
120}
121
122impl FuncRenames {
123 fn parse(renames: &[(String, String)]) -> Result<FuncRenames> {
124 let mut ret = FuncRenames {
125 rename_src_to_dst: HashMap::new(),
126 rename_dsts: HashSet::new(),
127 };
128 if renames.is_empty() {
129 return Ok(ret);
130 }
131
132 for (dst, src) in renames {
133 if ret.rename_dsts.contains(dst) {
134 bail!("Duplicated function rename dst {dst}");
135 }
136 if ret.rename_src_to_dst.contains_key(src) {
137 bail!("Duplicated function rename src {src}");
138 }
139 ret.rename_dsts.insert(dst.clone());
140 ret.rename_src_to_dst.insert(src.clone(), dst.clone());
141 }
142
143 Ok(ret)
144 }
145}
146
147impl Wizer {
148 /// Construct a new `Wizer` builder.
149 pub fn new() -> Self {
150 Wizer {
151 init_func: "wizer-initialize".to_string(),
152 func_renames: vec![],
153 keep_init_func: None,
154 }
155 }
156
157 /// The export name of the initializer function.
158 ///
159 /// Defaults to `"wizer-initialize"`.
160 pub fn init_func(&mut self, init_func: impl Into<String>) -> &mut Self {
161 self.init_func = init_func.into();
162 self
163 }
164
165 /// Returns the initialization function that will be run for wizer.
166 pub fn get_init_func(&self) -> &str {
167 &self.init_func
168 }
169
170 /// Add a function rename to perform.
171 pub fn func_rename(&mut self, new_name: &str, old_name: &str) -> &mut Self {
172 self.func_renames
173 .push((new_name.to_string(), old_name.to_string()));
174 self
175 }
176
177 /// After initialization, should the Wasm module still export the
178 /// initialization function?
179 ///
180 /// This is `false` by default, meaning that the initialization function is
181 /// no longer exported from the Wasm module.
182 pub fn keep_init_func(&mut self, keep: bool) -> &mut Self {
183 self.keep_init_func = Some(Some(keep));
184 self
185 }
186
187 /// First half of [`Self::run`] which instruments the provided `wasm` and
188 /// produces a new wasm module which should be run by a runtime.
189 ///
190 /// After the returned wasm is executed the context returned here and the
191 /// state of the instance should be passed to [`Self::snapshot`].
192 pub fn instrument<'a>(&self, wasm: &'a [u8]) -> Result<(ModuleContext<'a>, Vec<u8>)> {
193 // Make sure we're given valid Wasm from the get go.
194 self.wasm_validate(&wasm)?;
195
196 let mut cx = parse::parse(wasm)?;
197
198 // When wizening core modules directly some imports aren't supported,
199 // so check for those here.
200 for import in cx.imports() {
201 match import.ty {
202 wasmparser::TypeRef::Global(_) => {
203 bail!("imported globals are not supported")
204 }
205 wasmparser::TypeRef::Table(_) => {
206 bail!("imported tables are not supported")
207 }
208 wasmparser::TypeRef::Memory(_) => {
209 bail!("imported memories are not supported")
210 }
211 wasmparser::TypeRef::Func(_) => {}
212 wasmparser::TypeRef::FuncExact(_) => {}
213 wasmparser::TypeRef::Tag(_) => {}
214 }
215 }
216
217 let instrumented_wasm = instrument::instrument(&mut cx);
218 self.debug_assert_valid_wasm(&instrumented_wasm, "instrumented module");
219
220 Ok((cx, instrumented_wasm))
221 }
222
223 /// Second half of [`Self::run`] which takes the [`ModuleContext`] returned
224 /// by [`Self::instrument`] and the state of the `instance` after it has
225 /// possibly executed its initialization function.
226 ///
227 /// This returns a new WebAssembly binary which has all state
228 /// pre-initialized.
229 pub async fn snapshot(
230 &self,
231 mut cx: ModuleContext<'_>,
232 instance: &mut impl InstanceState,
233 ) -> Result<Vec<u8>> {
234 // Parse rename spec.
235 let renames = FuncRenames::parse(&self.func_renames)?;
236
237 let snapshot = snapshot::snapshot(&cx, instance).await;
238 let rewritten_wasm = self.rewrite(&mut cx, &snapshot, &renames, true);
239
240 self.debug_assert_valid_wasm(&rewritten_wasm, "rewritten module");
241
242 Ok(rewritten_wasm)
243 }
244
245 fn debug_assert_valid_wasm(&self, wasm: &[u8], context: &str) {
246 if !cfg!(debug_assertions) {
247 return;
248 }
249 if let Err(error) = self.wasm_validate(&wasm) {
250 #[cfg(feature = "wasmprinter")]
251 let wat = wasmprinter::print_bytes(&wasm)
252 .unwrap_or_else(|e| format!("Disassembling to WAT failed: {e}"));
253 #[cfg(not(feature = "wasmprinter"))]
254 let wat = "`wasmprinter` cargo feature is not enabled".to_string();
255
256 let wat = if wat.len() > 16 * 1024 {
257 std::fs::write("invalid.wat", wat).expect("writing to invalid.wat");
258 "written to invalid.wat"
259 } else {
260 &wat
261 };
262 panic!("{context} is not valid wasm: {error:?}\n\nWAT:\n{wat}");
263 }
264 }
265
266 fn wasm_validate(&self, wasm: &[u8]) -> Result<()> {
267 log::debug!("Validating input Wasm");
268
269 wasmparser::Validator::new_with_features(wasmparser::WasmFeatures::all())
270 .validate_all(wasm)
271 .context("wasm validation failed")?;
272
273 for payload in wasmparser::Parser::new(0).parse_all(wasm) {
274 match payload? {
275 wasmparser::Payload::CodeSectionEntry(code) => {
276 let mut ops = code.get_operators_reader()?;
277 while !ops.eof() {
278 match ops.read()? {
279 // Table mutations aren't allowed as wizer has no
280 // way to record a snapshot of a table at this time.
281 // The only table mutations allowed are those from
282 // active element segments which can be
283 // deterministically replayed, so disallow all other
284 // forms of mutating a table.
285 //
286 // Ideally Wizer could take a snapshot of a table
287 // post-instantiation and then ensure that after
288 // running initialization the table didn't get
289 // mutated, allowing these instructions, but that's
290 // also not possible at this time.
291 wasmparser::Operator::TableCopy { .. } => {
292 bail!("unsupported `table.copy` instruction")
293 }
294 wasmparser::Operator::TableInit { .. } => {
295 bail!("unsupported `table.init` instruction")
296 }
297 wasmparser::Operator::TableSet { .. } => {
298 bail!("unsupported `table.set` instruction")
299 }
300 wasmparser::Operator::TableGrow { .. } => {
301 bail!("unsupported `table.grow` instruction")
302 }
303 wasmparser::Operator::TableFill { .. } => {
304 bail!("unsupported `table.fill` instruction")
305 }
306
307 // Wizer has no way of dynamically determining which
308 // element or data segments were dropped during
309 // execution so instead disallow these instructions
310 // entirely. Like above it'd be nice to allow them
311 // but just forbid their execution during the
312 // initialization function, but that can't be done
313 // easily at this time.
314 wasmparser::Operator::ElemDrop { .. } => {
315 bail!("unsupported `elem.drop` instruction")
316 }
317 wasmparser::Operator::DataDrop { .. } => {
318 bail!("unsupported `data.drop` instruction")
319 }
320
321 // Wizer can't snapshot GC references, so disallow
322 // any mutation of GC references. This prevents, for
323 // example, reading something from a table and then
324 // mutating it.
325 wasmparser::Operator::StructSet { .. } => {
326 bail!("unsupported `struct.set` instruction")
327 }
328 wasmparser::Operator::ArraySet { .. } => {
329 bail!("unsupported `array.set` instruction")
330 }
331 wasmparser::Operator::ArrayFill { .. } => {
332 bail!("unsupported `array.fill` instruction")
333 }
334 wasmparser::Operator::ArrayCopy { .. } => {
335 bail!("unsupported `array.copy` instruction")
336 }
337 wasmparser::Operator::ArrayInitData { .. } => {
338 bail!("unsupported `array.init_data` instruction")
339 }
340 wasmparser::Operator::ArrayInitElem { .. } => {
341 bail!("unsupported `array.init_elem` instruction")
342 }
343
344 _ => continue,
345 }
346 }
347 }
348 wasmparser::Payload::GlobalSection(globals) => {
349 for g in globals {
350 let g = g?.ty;
351 if !g.mutable {
352 continue;
353 }
354 match g.content_type {
355 wasmparser::ValType::I32
356 | wasmparser::ValType::I64
357 | wasmparser::ValType::F32
358 | wasmparser::ValType::F64
359 | wasmparser::ValType::V128 => {}
360 wasmparser::ValType::Ref(_) => {
361 bail!("unsupported mutable global containing a reference type")
362 }
363 }
364 }
365 }
366 _ => {}
367 }
368 }
369
370 Ok(())
371 }
372
373 fn get_keep_init_func(&self) -> bool {
374 match self.keep_init_func {
375 Some(keep) => keep.unwrap_or(true),
376 None => DEFAULT_KEEP_INIT_FUNC,
377 }
378 }
379}
380
381/// Abstract ability to load state from a WebAssembly instance after it's been
382/// instantiated and some exports have run.
383pub trait InstanceState {
384 /// Loads the global specified by `name`, returning a `SnapshotVal`.
385 ///
386 /// # Panics
387 ///
388 /// This function panics if `name` isn't an exported global or if the type
389 /// of the global doesn't fit in `SnapshotVal`.
390 fn global_get(
391 &mut self,
392 name: &str,
393 type_hint: ValType,
394 ) -> impl Future<Output = SnapshotVal> + Send;
395
396 /// Loads the contents of the memory specified by `name`, returning the
397 /// entier contents as a `Vec<u8>`.
398 ///
399 /// # Panics
400 ///
401 /// This function panics if `name` isn't an exported memory.
402 fn memory_contents(
403 &mut self,
404 name: &str,
405 contents: impl FnOnce(&[u8]) + Send,
406 ) -> impl Future<Output = ()> + Send;
407}