wasmtime_environ/compile/module_environ.rs
1use crate::error::{OutOfMemory, Result, bail};
2use crate::module::{
3 FuncRefIndex, Initializer, MemoryInitialization, Module, TableSegment, TableSegmentElements,
4};
5use crate::prelude::*;
6use crate::{
7 ConstExpr, ConstOp, DataIndex, DefinedFuncIndex, DefinedGlobalIndex, ElemIndex,
8 EngineOrModuleTypeIndex, EntityIndex, EntityType, FuncIndex, FuncKey, GlobalIndex, IndexType,
9 MemoryIndex, MemoryInitializer, ModuleInternedTypeIndex, ModuleStartup, ModuleTypesBuilder,
10 PanicOnOom as _, PassiveElemIndex, PrimaryMap, RuntimeDataIndex, StaticModuleIndex, TableIndex,
11 TableInitialValue, TableInitialization, Tag, TagIndex, Tunables, TypeConvert, TypeIndex,
12 WasmHeapTopType, WasmHeapType, WasmResult, WasmValType, WasmparserTypeConverter,
13};
14use alloc::borrow::Cow;
15use cranelift_entity::SecondaryMap;
16use cranelift_entity::packed_option::ReservedValue;
17use std::collections::HashMap;
18use std::mem;
19use std::path::PathBuf;
20use std::sync::Arc;
21use wasmparser::{
22 CustomSectionReader, DataKind, ElementItems, ElementKind, Encoding, ExternalKind,
23 FuncToValidate, FunctionBody, KnownCustom, NameSectionReader, Naming, Parser, Payload, TypeRef,
24 Validator, ValidatorResources, types::Types,
25};
26
27/// Object containing the standalone environment information.
28pub struct ModuleEnvironment<'a, 'data> {
29 /// The current module being translated
30 result: ModuleTranslation<'data>,
31
32 /// Intern'd types for this entire translation, shared by all modules.
33 types: &'a mut ModuleTypesBuilder,
34
35 // Various bits and pieces of configuration
36 validator: &'a mut Validator,
37 tunables: &'a Tunables,
38}
39
40/// The result of translating via `ModuleEnvironment`.
41///
42/// Function bodies are not yet translated, and data initializers have not yet
43/// been copied out of the original buffer.
44pub struct ModuleTranslation<'data> {
45 /// Module information.
46 pub module: Module,
47
48 /// The input wasm binary.
49 ///
50 /// This can be useful, for example, when modules are parsed from a
51 /// component and the embedder wants access to the raw wasm modules
52 /// themselves.
53 pub wasm: &'data [u8],
54
55 /// The byte offset of this module's Wasm binary within the outer
56 /// binary (e.g. a component). For standalone modules this is 0.
57 /// This is used to convert component-relative source locations to
58 /// module-relative source locations.
59 pub wasm_module_offset: u64,
60
61 /// References to the function bodies.
62 pub function_body_inputs: PrimaryMap<DefinedFuncIndex, FunctionBodyData<'data>>,
63
64 /// For each imported function, the single statically-known function that
65 /// always satisfies that import, if any.
66 ///
67 /// This is used to turn what would otherwise be indirect calls through the
68 /// imports table into direct calls, when possible.
69 ///
70 /// When filled in, this only ever contains
71 /// `FuncKey::DefinedWasmFunction(..)`s and `FuncKey::Intrinsic(..)`s.
72 pub known_imported_functions: SecondaryMap<FuncIndex, Option<FuncKey>>,
73
74 /// A list of type signatures which are considered exported from this
75 /// module, or those that can possibly be called. This list is sorted, and
76 /// trampolines for each of these signatures are required.
77 pub exported_signatures: Vec<ModuleInternedTypeIndex>,
78
79 /// DWARF debug information, if enabled, parsed from the module.
80 pub debuginfo: DebugInfoData<'data>,
81
82 /// Set if debuginfo was found but it was not parsed due to `Tunables`
83 /// configuration.
84 pub has_unparsed_debuginfo: bool,
85
86 /// The desired alignment of `data` in the final data section of the object
87 /// file that we'll emit.
88 ///
89 /// Note that this is 1 by default but `MemoryInitialization::Static` might
90 /// switch this to a higher alignment to facilitate mmap-ing data from
91 /// an object file into a linear memory.
92 pub data_align: Option<u64>,
93
94 /// Map from a data segment to whether it's a passive data segment or not.
95 pub runtime_data_map: SecondaryMap<DataIndex, Option<RuntimeDataIndex>>,
96
97 /// Map from an elem segment to whether it's a passive elem segment or not.
98 pub passive_elem_map: SecondaryMap<ElemIndex, Option<PassiveElemIndex>>,
99
100 /// List of passive element segments found in this module which will get
101 /// concatenated for the final artifact.
102 pub runtime_data: PrimaryMap<RuntimeDataIndex, Cow<'data, [u8]>>,
103
104 /// Record of all passive data segments that this module contains.
105 ///
106 /// These are processed during [`ModuleTranslation::finalize_memory_init`]
107 /// and eventually moved over into the `runtime_data` list above. Until
108 /// then, however, their `RuntimeDataIndex` is not yet assigned.
109 passive_data: Vec<(DataIndex, &'data [u8])>,
110
111 /// When we're parsing the code section this will be incremented so we know
112 /// which function is currently being defined.
113 code_index: u32,
114
115 /// The type information of the current module made available at the end of the
116 /// validation process.
117 types: Option<Types>,
118
119 /// Per-function [`BranchHintReader`]s from the `metadata.code.branch_hint`
120 /// section, keyed by function index. Populated only when
121 /// [`Tunables::branch_hinting`] is enabled.
122 branch_hints: HashMap<FuncIndex, BranchHintReader<'data>>,
123
124 /// The WebAssembly `start` function, if defined.
125 pub start_func: Option<FuncIndex>,
126
127 /// Initializers for `global` values which aren't considered "simple".
128 ///
129 /// These initializers are later compiled into a "module startup" function.
130 pub global_initializers: Vec<(DefinedGlobalIndex, ConstExpr)>,
131
132 /// Definitions of all passive elements found within a module.
133 ///
134 /// This maps passive element segments to their definition, either functions
135 /// or expressions-basd.
136 pub passive_elements: PrimaryMap<PassiveElemIndex, TableSegmentElements>,
137
138 /// WebAssembly table initialization data, per table.
139 ///
140 /// This keeps track of all per-table initialization (e.g. initial value for
141 /// non-null tables) as well as active element segments. This is processed
142 /// and refined by [`ModuleTranslation::finalize_table_init`] after
143 /// translation.
144 pub table_initialization: TableInitialization,
145
146 /// WebAssembly memory initialization.
147 ///
148 /// This is held here in an `Unprocessed` form during translation, and then
149 /// this is later finished with [`ModuleTranslation::finalize_memory_init`].
150 pub memory_init: MemoryInit<'data>,
151}
152
153/// Different forms of memory initialization that happens for a module.
154pub enum MemoryInit<'a> {
155 /// Raw active data segments that are being applied for an instance.
156 ///
157 /// This list contains the raw data which hasn't yet been processed into
158 /// `RuntimeDataIndex`, for example. This is later processed during
159 /// [`ModuleTranslation::finalize_memory_init`] to optionally shuffle things
160 /// around.
161 Unprocessed(Vec<MemoryInitializer<'a>>),
162
163 /// Finalized memory initialization to be executed after
164 /// [`ModuleTranslation::finalize_memory_init`] has run. This represents
165 /// active data segments which may have been merged from the `Unprocessed`
166 /// list above, and may or may not have statically know offsets.
167 Processed(Vec<(MemoryIndex, MemorySegmentOffset, RuntimeDataIndex)>),
168}
169
170/// Offset within [`MemoryInit::Processed`] which indicates the initial offset
171/// a data segment is applied at.
172pub enum MemorySegmentOffset {
173 /// A "complicated" constant expression deferred to get evaluated at runtime
174 /// with compiled code.
175 Expr(ConstExpr),
176
177 /// A statically known, in-bounds, constant value.
178 Static(u64),
179}
180
181/// Lazy decoder over the branch hints attached to a single function in the
182/// `metadata.code.branch_hint` custom section
183/// ([branch-hinting proposal](https://github.com/WebAssembly/branch-hinting)).
184pub type BranchHintReader<'a> = wasmparser::SectionLimited<'a, wasmparser::BranchHint>;
185
186impl<'data> ModuleTranslation<'data> {
187 /// Create a new translation for the module with the given index.
188 pub fn new(module_index: StaticModuleIndex) -> Self {
189 Self {
190 module: Module::new(module_index),
191 wasm: &[],
192 wasm_module_offset: 0,
193 function_body_inputs: PrimaryMap::default(),
194 known_imported_functions: SecondaryMap::default(),
195 exported_signatures: Vec::default(),
196 debuginfo: DebugInfoData::default(),
197 has_unparsed_debuginfo: false,
198 data_align: None,
199 runtime_data: Default::default(),
200 code_index: 0,
201 types: None,
202 runtime_data_map: Default::default(),
203 passive_elem_map: Default::default(),
204 branch_hints: HashMap::default(),
205 start_func: None,
206 global_initializers: Vec::new(),
207 passive_elements: Default::default(),
208 table_initialization: Default::default(),
209 memory_init: MemoryInit::Unprocessed(Vec::new()),
210 passive_data: Default::default(),
211 }
212 }
213
214 /// Returns the [`BranchHintReader`] for `func`, if the section attached any.
215 pub fn branch_hints(&self, func: FuncIndex) -> Option<BranchHintReader<'data>> {
216 self.branch_hints.get(&func).cloned()
217 }
218
219 /// Returns a reference to the type information of the current module.
220 pub fn get_types(&self) -> &Types {
221 self.types
222 .as_ref()
223 .expect("module type information to be available")
224 }
225
226 /// Get this translation's module's index.
227 pub fn module_index(&self) -> StaticModuleIndex {
228 self.module.module_index
229 }
230}
231
232/// Contains function data: byte code and its offset in the module.
233pub struct FunctionBodyData<'a> {
234 /// The body of the function, containing code and locals.
235 pub body: FunctionBody<'a>,
236 /// Validator for the function body
237 pub validator: FuncToValidate<ValidatorResources>,
238}
239
240#[derive(Debug, Default)]
241#[expect(missing_docs, reason = "self-describing fields")]
242pub struct DebugInfoData<'a> {
243 pub dwarf: Dwarf<'a>,
244 pub name_section: NameSection<'a>,
245 pub wasm_file: WasmFileInfo,
246 pub debug_loc: gimli::DebugLoc<Reader<'a>>,
247 pub debug_loclists: gimli::DebugLocLists<Reader<'a>>,
248 pub debug_ranges: gimli::DebugRanges<Reader<'a>>,
249 pub debug_rnglists: gimli::DebugRngLists<Reader<'a>>,
250 pub debug_cu_index: gimli::DebugCuIndex<Reader<'a>>,
251 pub debug_tu_index: gimli::DebugTuIndex<Reader<'a>>,
252}
253
254#[expect(missing_docs, reason = "self-describing")]
255pub type Dwarf<'input> = gimli::Dwarf<Reader<'input>>;
256
257type Reader<'input> = gimli::EndianSlice<'input, gimli::LittleEndian>;
258
259#[derive(Debug, Default)]
260#[expect(missing_docs, reason = "self-describing fields")]
261pub struct NameSection<'a> {
262 pub module_name: Option<&'a str>,
263 pub func_names: HashMap<FuncIndex, &'a str>,
264 pub locals_names: HashMap<FuncIndex, HashMap<u32, &'a str>>,
265}
266
267#[derive(Debug, Default)]
268#[expect(missing_docs, reason = "self-describing fields")]
269pub struct WasmFileInfo {
270 pub path: Option<PathBuf>,
271 pub code_section_offset: u64,
272 pub imported_func_count: u32,
273 pub funcs: Vec<FunctionMetadata>,
274}
275
276#[derive(Debug)]
277#[expect(missing_docs, reason = "self-describing fields")]
278pub struct FunctionMetadata {
279 pub params: Box<[WasmValType]>,
280 pub locals: Box<[(u32, WasmValType)]>,
281}
282
283impl<'a, 'data> ModuleEnvironment<'a, 'data> {
284 /// Allocates the environment data structures.
285 pub fn new(
286 tunables: &'a Tunables,
287 validator: &'a mut Validator,
288 types: &'a mut ModuleTypesBuilder,
289 module_index: StaticModuleIndex,
290 ) -> Self {
291 Self {
292 result: ModuleTranslation::new(module_index),
293 types,
294 tunables,
295 validator,
296 }
297 }
298
299 /// Translate a wasm module using this environment.
300 ///
301 /// This function will translate the `data` provided with `parser`,
302 /// validating everything along the way with this environment's validator.
303 ///
304 /// The result of translation, [`ModuleTranslation`], contains everything
305 /// necessary to compile functions afterwards as well as learn type
306 /// information about the module at runtime.
307 pub fn translate(
308 mut self,
309 parser: Parser,
310 data: &'data [u8],
311 ) -> Result<ModuleTranslation<'data>> {
312 self.result.wasm = data;
313
314 for payload in parser.parse_all(data) {
315 self.translate_payload(payload?)?;
316 }
317
318 Ok(self.result)
319 }
320
321 fn translate_payload(&mut self, payload: Payload<'data>) -> Result<()> {
322 match payload {
323 Payload::Version {
324 num,
325 encoding,
326 range,
327 } => {
328 self.validator.version(num, encoding, &range)?;
329 match encoding {
330 Encoding::Module => {}
331 Encoding::Component => {
332 bail!("expected a WebAssembly module but was given a WebAssembly component")
333 }
334 }
335 }
336
337 Payload::End(offset) => {
338 self.result.types = Some(self.validator.end(offset)?);
339
340 // With the `escaped_funcs` set of functions finished
341 // we can calculate the set of signatures that are exported as
342 // the set of exported functions' signatures.
343 self.result.exported_signatures = self
344 .result
345 .module
346 .functions
347 .iter()
348 .filter_map(|(_, func)| {
349 if func.is_escaping() {
350 Some(func.signature.unwrap_module_type_index())
351 } else {
352 None
353 }
354 })
355 .collect();
356 self.result.exported_signatures.sort_unstable();
357 self.result.exported_signatures.dedup();
358 }
359
360 Payload::TypeSection(types) => {
361 self.validator.type_section(&types)?;
362
363 let count = self.validator.types(0).unwrap().core_type_count_in_module();
364 log::trace!("interning {count} Wasm types");
365
366 let capacity = usize::try_from(count).unwrap();
367 self.result.module.types.reserve(capacity)?;
368 self.types.reserve_wasm_signatures(capacity);
369
370 // Iterate over each *rec group* -- not type -- defined in the
371 // types section. Rec groups are the unit of canonicalization
372 // and therefore the unit at which we need to process at a
373 // time. `wasmparser` has already done the hard work of
374 // de-duplicating and canonicalizing the rec groups within the
375 // module for us, we just need to translate them into our data
376 // structures. Note that, if the Wasm defines duplicate rec
377 // groups, we need copy the duplicates over (shallowly) as well,
378 // so that our types index space doesn't have holes.
379 let mut type_index = 0;
380 while type_index < count {
381 let validator_types = self.validator.types(0).unwrap();
382
383 // Get the rec group for the current type index, which is
384 // always the first type defined in a rec group.
385 log::trace!("looking up wasmparser type for index {type_index}");
386 let core_type_id = validator_types.core_type_at_in_module(type_index);
387 log::trace!(
388 " --> {core_type_id:?} = {:?}",
389 validator_types[core_type_id],
390 );
391 let rec_group_id = validator_types.rec_group_id_of(core_type_id);
392 debug_assert_eq!(
393 validator_types
394 .rec_group_elements(rec_group_id)
395 .position(|id| id == core_type_id),
396 Some(0)
397 );
398
399 // Intern the rec group and then fill in this module's types
400 // index space.
401 let interned = self.types.intern_rec_group(validator_types, rec_group_id)?;
402 let elems = self.types.rec_group_elements(interned);
403 let len = elems.len();
404 self.result.module.types.reserve(len)?;
405 for ty in elems {
406 self.result.module.types.push(ty.into())?;
407 }
408
409 // Advance `type_index` to the start of the next rec group.
410 type_index += u32::try_from(len).unwrap();
411 }
412 }
413
414 Payload::ImportSection(imports) => {
415 self.validator.import_section(&imports)?;
416
417 let cnt = usize::try_from(imports.count()).unwrap();
418 self.result.module.initializers.reserve(cnt)?;
419
420 for entry in imports.into_imports() {
421 let import = entry?;
422 let ty = match import.ty {
423 TypeRef::Func(index) => {
424 let index = TypeIndex::from_u32(index);
425 let interned_index = self.result.module.types[index];
426 self.result.module.num_imported_funcs += 1;
427 self.result.debuginfo.wasm_file.imported_func_count += 1;
428 EntityType::Function(interned_index)
429 }
430 TypeRef::Memory(ty) => {
431 self.result.module.num_imported_memories += 1;
432 EntityType::Memory(ty.into())
433 }
434 TypeRef::Global(ty) => {
435 self.result.module.num_imported_globals += 1;
436 EntityType::Global(self.convert_global_type(&ty)?)
437 }
438 TypeRef::Table(ty) => {
439 self.result.module.num_imported_tables += 1;
440 EntityType::Table(self.convert_table_type(&ty)?)
441 }
442 TypeRef::Tag(ty) => {
443 let index = TypeIndex::from_u32(ty.func_type_idx);
444 let signature = self.result.module.types[index];
445 let exception = self.types.define_exception_type_for_tag(
446 signature.unwrap_module_type_index(),
447 );
448 let tag = Tag {
449 signature,
450 exception: EngineOrModuleTypeIndex::Module(exception),
451 };
452 self.result.module.num_imported_tags += 1;
453 EntityType::Tag(tag)
454 }
455 TypeRef::FuncExact(_) => {
456 bail!("custom-descriptors proposal not implemented yet");
457 }
458 };
459 self.declare_import(import.module, import.name, ty)?;
460 }
461 }
462
463 Payload::FunctionSection(functions) => {
464 self.validator.function_section(&functions)?;
465
466 let cnt = usize::try_from(functions.count()).unwrap();
467 self.result.module.functions.reserve_exact(cnt)?;
468
469 for entry in functions {
470 let sigindex = entry?;
471 let ty = TypeIndex::from_u32(sigindex);
472 let interned_index = self.result.module.types[ty];
473 self.result.module.push_function(interned_index);
474 }
475 }
476
477 Payload::TableSection(tables) => {
478 self.validator.table_section(&tables)?;
479 let cnt = usize::try_from(tables.count()).unwrap();
480 self.result.module.tables.reserve_exact(cnt)?;
481
482 for entry in tables {
483 let wasmparser::Table { ty, init } = entry?;
484 let table = self.convert_table_type(&ty)?;
485 self.result.module.needs_gc_heap |= table.ref_type.is_vmgcref_type();
486 self.result.module.tables.push(table)?;
487 let init = match init {
488 wasmparser::TableInit::RefNull => TableInitialValue::Null,
489 wasmparser::TableInit::Expr(expr) => {
490 let (init, escaped) = ConstExpr::from_wasmparser(self, expr)?;
491 for f in escaped {
492 self.flag_func_escaped(f);
493 }
494 TableInitialValue::Expr(init)
495 }
496 };
497 self.result.table_initialization.initial_values.push(init)?;
498 self.result
499 .module
500 .table_initialization
501 .push(Default::default())?;
502 }
503 }
504
505 Payload::MemorySection(memories) => {
506 self.validator.memory_section(&memories)?;
507
508 let cnt = usize::try_from(memories.count()).unwrap();
509 self.result.module.memories.reserve_exact(cnt)?;
510
511 for entry in memories {
512 let memory = entry?;
513 self.result.module.memories.push(memory.into())?;
514 }
515 }
516
517 Payload::TagSection(tags) => {
518 self.validator.tag_section(&tags)?;
519
520 for entry in tags {
521 let sigindex = entry?.func_type_idx;
522 let ty = TypeIndex::from_u32(sigindex);
523 let interned_index = self.result.module.types[ty];
524 let exception = self
525 .types
526 .define_exception_type_for_tag(interned_index.unwrap_module_type_index());
527 self.result.module.push_tag(interned_index, exception);
528 }
529 }
530
531 Payload::GlobalSection(globals) => {
532 self.validator.global_section(&globals)?;
533
534 let cnt = usize::try_from(globals.count()).unwrap();
535 self.result.module.globals.reserve_exact(cnt)?;
536
537 for entry in globals {
538 let wasmparser::Global { ty, init_expr } = entry?;
539 let (initializer, escaped) = ConstExpr::from_wasmparser(self, init_expr)?;
540 for f in escaped {
541 self.flag_func_escaped(f);
542 }
543 let ty = self.convert_global_type(&ty)?;
544 let index = self.result.module.globals.push(ty)?;
545 let defined_index = self.result.module.defined_global_index(index).unwrap();
546 match initializer.const_eval() {
547 Some(val) => {
548 self.result
549 .module
550 .global_initializers
551 .push((defined_index, val))?;
552 }
553 None => {
554 // "Complicated" global initializers are deferred
555 // to get evaluated in the startup function.
556 self.require_startup_func();
557 self.result
558 .global_initializers
559 .push((defined_index, initializer));
560 }
561 }
562 }
563 }
564
565 Payload::ExportSection(exports) => {
566 self.validator.export_section(&exports)?;
567
568 let cnt = usize::try_from(exports.count()).unwrap();
569 self.result.module.exports.reserve(cnt)?;
570
571 for entry in exports {
572 let wasmparser::Export { name, kind, index } = entry?;
573 let entity = match kind {
574 ExternalKind::Func | ExternalKind::FuncExact => {
575 let index = FuncIndex::from_u32(index);
576 self.flag_func_escaped(index);
577 EntityIndex::Function(index)
578 }
579 ExternalKind::Table => EntityIndex::Table(TableIndex::from_u32(index)),
580 ExternalKind::Memory => EntityIndex::Memory(MemoryIndex::from_u32(index)),
581 ExternalKind::Global => EntityIndex::Global(GlobalIndex::from_u32(index)),
582 ExternalKind::Tag => EntityIndex::Tag(TagIndex::from_u32(index)),
583 };
584 let name = self.result.module.strings.insert(name)?;
585 self.result.module.exports.insert(name, entity)?;
586 }
587 }
588
589 Payload::StartSection { func, range } => {
590 self.validator.start_section(func, &range)?;
591
592 let func_index = FuncIndex::from_u32(func);
593 debug_assert!(self.result.start_func.is_none());
594 self.result.start_func = Some(func_index);
595
596 // To make startup a bit easier, invoking the `start` function
597 // is a responsibility deferred to the startup function.
598 self.require_startup_func();
599 }
600
601 Payload::ElementSection(elements) => {
602 self.validator.element_section(&elements)?;
603
604 for (index, entry) in elements.into_iter().enumerate() {
605 let wasmparser::Element {
606 kind,
607 items,
608 range: _,
609 } = entry?;
610
611 // Build up a list of `FuncIndex` corresponding to all the
612 // entries listed in this segment. Note that it's not
613 // possible to create anything other than a `ref.null
614 // extern` for externref segments, so those just get
615 // translated to the reserved value of `FuncIndex`.
616 let elements = match items {
617 ElementItems::Functions(funcs) => {
618 let mut elems =
619 Vec::with_capacity(usize::try_from(funcs.count()).unwrap());
620 for func in funcs {
621 let func = FuncIndex::from_u32(func?);
622 self.flag_func_escaped(func);
623 elems.push(func);
624 }
625 TableSegmentElements::Functions(elems.into())
626 }
627 ElementItems::Expressions(ty, items) => {
628 let ty = self.convert_ref_type(ty)?;
629 let mut exprs =
630 Vec::with_capacity(usize::try_from(items.count()).unwrap());
631 for expr in items {
632 let (expr, escaped) = ConstExpr::from_wasmparser(self, expr?)?;
633 exprs.push(expr);
634 for func in escaped {
635 self.flag_func_escaped(func);
636 }
637 }
638 TableSegmentElements::Expressions {
639 ty,
640 exprs: exprs.into(),
641 }
642 }
643 };
644
645 let passive_index = match kind {
646 ElementKind::Active {
647 table_index,
648 offset_expr,
649 } => {
650 let table_index = TableIndex::from_u32(table_index.unwrap_or(0));
651 let (offset, escaped) = ConstExpr::from_wasmparser(self, offset_expr)?;
652 debug_assert!(escaped.is_empty());
653
654 self.result
655 .table_initialization
656 .segments
657 .push(TableSegment {
658 table_index,
659 offset,
660 elements,
661 })?;
662 None
663 }
664
665 ElementKind::Passive => {
666 let passive_index = self
667 .result
668 .module
669 .passive_elements
670 .push((elements.ty(), elements.len()))?;
671 self.result.passive_elements.push(elements);
672 // One-time initialization of passive element
673 // segments is deferred to the startup function.
674 self.require_startup_func();
675 Some(passive_index)
676 }
677
678 ElementKind::Declared => None,
679 };
680 let elem_index = ElemIndex::from_u32(index as u32);
681 self.result
682 .passive_elem_map
683 .insert(elem_index, passive_index);
684 }
685 }
686
687 Payload::CodeSectionStart { count, range, .. } => {
688 self.validator.code_section_start(&range)?;
689 let cnt = usize::try_from(count).unwrap();
690 self.result.function_body_inputs.reserve_exact(cnt);
691 self.result.debuginfo.wasm_file.code_section_offset = range.start as u64;
692 }
693
694 Payload::CodeSectionEntry(body) => {
695 let validator = self.validator.code_section_entry(&body)?;
696 let func_index =
697 self.result.code_index + self.result.module.num_imported_funcs as u32;
698 let func_index = FuncIndex::from_u32(func_index);
699
700 if self.tunables.debug_native {
701 let sig_index = self.result.module.functions[func_index]
702 .signature
703 .unwrap_module_type_index();
704 let sig = self.types[sig_index].unwrap_func();
705 let mut locals = Vec::new();
706 for pair in body.get_locals_reader()? {
707 let (cnt, ty) = pair?;
708 let ty = self.convert_valtype(ty)?;
709 locals.push((cnt, ty));
710 }
711 self.result
712 .debuginfo
713 .wasm_file
714 .funcs
715 .push(FunctionMetadata {
716 locals: locals.into_boxed_slice(),
717 params: sig.params().into(),
718 });
719 }
720 if self.tunables.debug_guest {
721 // All functions are potentially reachable and
722 // callable by the guest debugger, so they must
723 // all be flagged as escaping.
724 self.flag_func_escaped(func_index);
725 }
726 self.result
727 .function_body_inputs
728 .push(FunctionBodyData { validator, body });
729 self.result.code_index += 1;
730 }
731
732 Payload::DataSection(data) => {
733 self.validator.data_section(&data)?;
734
735 assert!(self.result.module.memory_initialization.is_segmented());
736
737 for (index, entry) in data.into_iter().enumerate() {
738 let wasmparser::Data {
739 kind,
740 data,
741 range: _,
742 } = entry?;
743 let data_index = DataIndex::from_u32(index.try_into().unwrap());
744 match kind {
745 DataKind::Active {
746 memory_index,
747 offset_expr,
748 } => {
749 let memory_index = MemoryIndex::from_u32(memory_index);
750 let (offset, escaped) = ConstExpr::from_wasmparser(self, offset_expr)?;
751 debug_assert!(escaped.is_empty());
752
753 let MemoryInit::Unprocessed(list) = &mut self.result.memory_init else {
754 panic!("memory initializers should be unprocessed at this point");
755 };
756 list.push(MemoryInitializer {
757 memory_index,
758 offset,
759 data,
760 });
761 }
762 DataKind::Passive => {
763 self.result.passive_data.push((data_index, data));
764 }
765 }
766 }
767 }
768
769 Payload::DataCountSection { count, range } => {
770 self.validator.data_count_section(count, &range)?;
771
772 // Note: the count passed in here is the *total* segment count
773 // There is no way to reserve for just the passive segments as
774 // they are discovered when iterating the data section entries
775 // Given that the total segment count might be much larger than
776 // the passive count, do not reserve anything here.
777 }
778
779 Payload::CustomSection(s)
780 if s.name() == "webidl-bindings" || s.name() == "wasm-interface-types" =>
781 {
782 bail!(
783 "\
784Support for interface types has temporarily been removed from `wasmtime`.
785
786For more information about this temporary change you can read on the issue online:
787
788 https://github.com/bytecodealliance/wasmtime/issues/1271
789
790and for re-adding support for interface types you can see this issue:
791
792 https://github.com/bytecodealliance/wasmtime/issues/677
793"
794 )
795 }
796
797 Payload::CustomSection(s) => {
798 self.register_custom_section(&s);
799 }
800
801 // It's expected that validation will probably reject other
802 // payloads such as `UnknownSection` or those related to the
803 // component model. If, however, something gets past validation then
804 // that's a bug in Wasmtime as we forgot to implement something.
805 other => {
806 self.validator.payload(&other)?;
807 panic!("unimplemented section in wasm file {other:?}");
808 }
809 }
810 Ok(())
811 }
812
813 fn register_custom_section(&mut self, section: &CustomSectionReader<'data>) {
814 match section.as_known() {
815 KnownCustom::Name(name) => {
816 let result = self.name_section(name);
817 if let Err(e) = result {
818 log::warn!("failed to parse name section {e:?}");
819 }
820 }
821 KnownCustom::BranchHints(reader) if self.tunables.branch_hinting => {
822 // Branch hints are advisory and this section is never validated;
823 // it is decoded lazily during compilation, so record only the
824 // per-function sub-readers here. Discard the whole section if any
825 // entry is malformed rather than applying it partially.
826 let mut hints = HashMap::new();
827 let result: wasmparser::Result<()> = reader.into_iter().try_for_each(|func| {
828 let func = func?;
829 // A well-formed section lists each function at most once; keep
830 // the first entry deterministically if it repeats.
831 hints
832 .entry(FuncIndex::from_u32(func.func))
833 .or_insert(func.hints);
834 Ok(())
835 });
836 match result {
837 Ok(()) => self.result.branch_hints = hints,
838 Err(e) => log::warn!("failed to parse branch-hint section {e:?}"),
839 }
840 }
841 _ => {
842 let name = section.name().trim_end_matches(".dwo");
843 if name.starts_with(".debug_") {
844 self.dwarf_section(name, section);
845 }
846 }
847 }
848 }
849
850 fn dwarf_section(&mut self, name: &str, section: &CustomSectionReader<'data>) {
851 if !self.tunables.debug_native && !self.tunables.parse_wasm_debuginfo {
852 self.result.has_unparsed_debuginfo = true;
853 return;
854 }
855 let info = &mut self.result.debuginfo;
856 let dwarf = &mut info.dwarf;
857 let endian = gimli::LittleEndian;
858 let data = section.data();
859 let slice = gimli::EndianSlice::new(data, endian);
860
861 match name {
862 // `gimli::Dwarf` fields.
863 ".debug_abbrev" => dwarf.debug_abbrev = gimli::DebugAbbrev::new(data, endian),
864 ".debug_addr" => dwarf.debug_addr = gimli::DebugAddr::from(slice),
865 ".debug_info" => {
866 dwarf.debug_info = gimli::DebugInfo::new(data, endian);
867 }
868 ".debug_line" => dwarf.debug_line = gimli::DebugLine::new(data, endian),
869 ".debug_line_str" => dwarf.debug_line_str = gimli::DebugLineStr::from(slice),
870 ".debug_str" => dwarf.debug_str = gimli::DebugStr::new(data, endian),
871 ".debug_str_offsets" => dwarf.debug_str_offsets = gimli::DebugStrOffsets::from(slice),
872 ".debug_str_sup" => {
873 let mut dwarf_sup: Dwarf<'data> = Default::default();
874 dwarf_sup.debug_str = gimli::DebugStr::from(slice);
875 dwarf.sup = Some(Arc::new(dwarf_sup));
876 }
877 ".debug_types" => dwarf.debug_types = gimli::DebugTypes::from(slice),
878
879 // Additional fields.
880 ".debug_loc" => info.debug_loc = gimli::DebugLoc::from(slice),
881 ".debug_loclists" => info.debug_loclists = gimli::DebugLocLists::from(slice),
882 ".debug_ranges" => info.debug_ranges = gimli::DebugRanges::new(data, endian),
883 ".debug_rnglists" => info.debug_rnglists = gimli::DebugRngLists::new(data, endian),
884
885 // DWARF package fields
886 ".debug_cu_index" => info.debug_cu_index = gimli::DebugCuIndex::new(data, endian),
887 ".debug_tu_index" => info.debug_tu_index = gimli::DebugTuIndex::new(data, endian),
888
889 // We don't use these at the moment.
890 ".debug_aranges" | ".debug_pubnames" | ".debug_pubtypes" => return,
891 other => {
892 log::warn!("unknown debug section `{other}`");
893 return;
894 }
895 }
896
897 dwarf.ranges = gimli::RangeLists::new(info.debug_ranges, info.debug_rnglists);
898 dwarf.locations = gimli::LocationLists::new(info.debug_loc, info.debug_loclists);
899 }
900
901 /// Declares a new import with the `module` and `field` names, importing the
902 /// `ty` specified.
903 ///
904 /// Note that this method is somewhat tricky due to the implementation of
905 /// the module linking proposal. In the module linking proposal two-level
906 /// imports are recast as single-level imports of instances. That recasting
907 /// happens here by recording an import of an instance for the first time
908 /// we see a two-level import.
909 ///
910 /// When the module linking proposal is disabled, however, disregard this
911 /// logic and instead work directly with two-level imports since no
912 /// instances are defined.
913 fn declare_import(
914 &mut self,
915 module: &'data str,
916 field: &'data str,
917 ty: EntityType,
918 ) -> Result<(), OutOfMemory> {
919 let index = self.push_type(ty);
920 self.result.module.initializers.push(Initializer::Import {
921 name: self.result.module.strings.insert(module)?,
922 field: self.result.module.strings.insert(field)?,
923 index,
924 })?;
925 Ok(())
926 }
927
928 fn push_type(&mut self, ty: EntityType) -> EntityIndex {
929 match ty {
930 EntityType::Function(ty) => EntityIndex::Function({
931 let func_index = self
932 .result
933 .module
934 .push_function(ty.unwrap_module_type_index());
935 // Imported functions can escape; in fact, they've already done
936 // so to get here.
937 self.flag_func_escaped(func_index);
938 func_index
939 }),
940 EntityType::Table(ty) => {
941 EntityIndex::Table(self.result.module.tables.push(ty).panic_on_oom())
942 }
943 EntityType::Memory(ty) => {
944 EntityIndex::Memory(self.result.module.memories.push(ty).panic_on_oom())
945 }
946 EntityType::Global(ty) => {
947 EntityIndex::Global(self.result.module.globals.push(ty).panic_on_oom())
948 }
949 EntityType::Tag(ty) => {
950 EntityIndex::Tag(self.result.module.tags.push(ty).panic_on_oom())
951 }
952 }
953 }
954
955 fn flag_func_escaped(&mut self, func: FuncIndex) {
956 let ty = &mut self.result.module.functions[func];
957 // If this was already assigned a funcref index no need to re-assign it.
958 if ty.is_escaping() {
959 return;
960 }
961 let index = self.result.module.num_escaped_funcs as u32;
962 ty.func_ref = FuncRefIndex::from_u32(index);
963 self.result.module.num_escaped_funcs += 1;
964 }
965
966 /// Parses the Name section of the wasm module.
967 fn name_section(&mut self, names: NameSectionReader<'data>) -> WasmResult<()> {
968 for subsection in names {
969 match subsection? {
970 wasmparser::Name::Function(names) => {
971 for name in names {
972 let Naming { index, name } = name?;
973 // Skip this naming if it's naming a function that
974 // doesn't actually exist.
975 if (index as usize) >= self.result.module.functions.len() {
976 continue;
977 }
978
979 // Store the name unconditionally, regardless of
980 // whether we're parsing debuginfo, since function
981 // names are almost always present in the
982 // final compilation artifact.
983 let index = FuncIndex::from_u32(index);
984 self.result
985 .debuginfo
986 .name_section
987 .func_names
988 .insert(index, name);
989 }
990 }
991 wasmparser::Name::Module { name, .. } => {
992 self.result.module.name =
993 Some(self.result.module.strings.insert(name).panic_on_oom());
994 if self.tunables.debug_native {
995 self.result.debuginfo.name_section.module_name = Some(name);
996 }
997 }
998 wasmparser::Name::Local(reader) => {
999 if !self.tunables.debug_native {
1000 continue;
1001 }
1002 for f in reader {
1003 let f = f?;
1004 // Skip this naming if it's naming a function that
1005 // doesn't actually exist.
1006 if (f.index as usize) >= self.result.module.functions.len() {
1007 continue;
1008 }
1009 for name in f.names {
1010 let Naming { index, name } = name?;
1011
1012 self.result
1013 .debuginfo
1014 .name_section
1015 .locals_names
1016 .entry(FuncIndex::from_u32(f.index))
1017 .or_insert(HashMap::new())
1018 .insert(index, name);
1019 }
1020 }
1021 }
1022 wasmparser::Name::Label(_)
1023 | wasmparser::Name::Type(_)
1024 | wasmparser::Name::Table(_)
1025 | wasmparser::Name::Global(_)
1026 | wasmparser::Name::Memory(_)
1027 | wasmparser::Name::Element(_)
1028 | wasmparser::Name::Data(_)
1029 | wasmparser::Name::Tag(_)
1030 | wasmparser::Name::Field(_)
1031 | wasmparser::Name::Unknown { .. } => {}
1032 }
1033 }
1034 Ok(())
1035 }
1036
1037 fn require_startup_func(&mut self) {
1038 self.result.require_startup_func(self.types);
1039 }
1040}
1041
1042impl TypeConvert for ModuleEnvironment<'_, '_> {
1043 fn lookup_heap_type(&self, index: wasmparser::UnpackedIndex) -> WasmHeapType {
1044 WasmparserTypeConverter::new(&self.types, |idx| {
1045 self.result.module.types[idx].unwrap_module_type_index()
1046 })
1047 .lookup_heap_type(index)
1048 }
1049
1050 fn lookup_type_index(&self, index: wasmparser::UnpackedIndex) -> EngineOrModuleTypeIndex {
1051 WasmparserTypeConverter::new(&self.types, |idx| {
1052 self.result.module.types[idx].unwrap_module_type_index()
1053 })
1054 .lookup_type_index(index)
1055 }
1056}
1057
1058impl ModuleTranslation<'_> {
1059 /// Called after translation is complete this will finalize the memory
1060 /// initialization strategy for this module.
1061 ///
1062 /// This will notably use `Self::try_static_init` to attempt to massage
1063 /// data segments to being CoW-init-friendly. Afterwards the
1064 /// `self.memory_init` field is transitioned from `Unprocessed` to
1065 /// `Processed`.
1066 pub fn finalize_memory_init(
1067 &mut self,
1068 tunables: &Tunables,
1069 page_size: u64,
1070 max_image_size_always_allowed: u64,
1071 types: &mut ModuleTypesBuilder,
1072 ) {
1073 if tunables.memory_init_cow {
1074 self.try_static_init(page_size, max_image_size_always_allowed);
1075 }
1076
1077 // If any memory is statically initialized, and if that memory has an
1078 // initial data segment, then a startup function is at least
1079 // conditionally needed if the memory needs initialization. Flag as such
1080 // here.
1081 if let MemoryInitialization::Static { map } = &self.module.memory_initialization {
1082 if map.iter().any(|(_, v)| v.is_some()) {
1083 self.require_startup_func_if_memories_need_init(types);
1084 }
1085 }
1086
1087 // If, after `try_static_init`, initializers are still `Unprocessed`
1088 // then this is the catch-all fallback path for initialization. All
1089 // segments are promoted into `self.runtime_data` and then the
1090 // initialization is rewritten to `Processed`.
1091 if let MemoryInit::Unprocessed(list) = &mut self.memory_init {
1092 let segments = mem::take(list);
1093 let mut new_initializers = Vec::new();
1094 for segment in segments {
1095 new_initializers.push((
1096 segment.memory_index,
1097 MemorySegmentOffset::Expr(segment.offset),
1098 self.runtime_data.push(segment.data.into()),
1099 ));
1100 }
1101 if !new_initializers.is_empty() {
1102 self.require_startup_func(types);
1103 }
1104 self.memory_init = MemoryInit::Processed(new_initializers);
1105 }
1106
1107 // At this point append all passive data to the `runtime_data` list.
1108 // This notably occurs after `try_static_init` above to ensure that the
1109 // page-aligned data for static initialization, if applicable, comes
1110 // first.
1111 for (data_index, segment) in self.passive_data.iter() {
1112 let runtime_index = self.runtime_data.push((*segment).into());
1113 self.runtime_data_map
1114 .insert(*data_index, Some(runtime_index));
1115 }
1116
1117 // And, finally, record all chunks from `self.runtime_data` within
1118 // `self.module.runtime_data` as well.
1119 let mut cur = 0;
1120 for (idx, data) in self.runtime_data.iter() {
1121 let len = u32::try_from(data.len()).unwrap();
1122 let i = self.module.runtime_data.push(cur..cur + len).panic_on_oom();
1123 cur += len;
1124 assert_eq!(idx, i);
1125 }
1126 }
1127
1128 /// Attempts to convert segmented memory initialization into static
1129 /// initialization for the module that this translation represents.
1130 ///
1131 /// If this module's memory initialization is not compatible with paged
1132 /// initialization then this won't change anything. Otherwise if it is
1133 /// compatible then the `memory_initialization` field will be updated.
1134 ///
1135 /// Takes a `page_size` argument in order to ensure that all
1136 /// initialization is page-aligned for mmap-ability, and
1137 /// `max_image_size_always_allowed` to control how we decide
1138 /// whether to use static init.
1139 ///
1140 /// We will try to avoid generating very sparse images, which are
1141 /// possible if e.g. a module has an initializer at offset 0 and a
1142 /// very high offset (say, 1 GiB). To avoid this, we use a dual
1143 /// condition: we always allow images less than
1144 /// `max_image_size_always_allowed`, and the embedder of Wasmtime
1145 /// can set this if desired to ensure that static init should
1146 /// always be done if the size of the module or its heaps is
1147 /// otherwise bounded by the system. We also allow images with
1148 /// static init data bigger than that, but only if it is "dense",
1149 /// defined as having at least half (50%) of its pages with some
1150 /// data.
1151 ///
1152 /// We could do something slightly better by building a dense part
1153 /// and keeping a sparse list of outlier/leftover segments (see
1154 /// issue #3820). This would also allow mostly-static init of
1155 /// modules that have some dynamically-placed data segments. But,
1156 /// for now, this is sufficient to allow a system that "knows what
1157 /// it's doing" to always get static init.
1158 fn try_static_init(&mut self, page_size: u64, max_image_size_always_allowed: u64) {
1159 let segments = match &mut self.memory_init {
1160 MemoryInit::Unprocessed(list) => list,
1161 _ => return,
1162 };
1163
1164 // First a dry run of memory initialization is performed. This
1165 // collects information about the extent of memory initialized for each
1166 // memory as well as the size of all data segments being copied in.
1167 struct Memory<'a> {
1168 data_size: u64,
1169 min_addr: u64,
1170 max_addr: u64,
1171 segments: Vec<(u64, &'a [u8])>,
1172 }
1173 let mut info = PrimaryMap::with_capacity(self.module.memories.len());
1174 for _ in 0..self.module.memories.len() {
1175 info.push(Memory {
1176 data_size: 0,
1177 min_addr: u64::MAX,
1178 max_addr: 0,
1179 segments: Vec::new(),
1180 });
1181 }
1182
1183 for initializer in segments.iter() {
1184 let &MemoryInitializer {
1185 memory_index,
1186 ref offset,
1187 ref data,
1188 } = initializer;
1189
1190 // Currently `Static` only applies to locally-defined memories,
1191 // so if a data segment references an imported memory then
1192 // transitioning to a `Static` memory initializer is not
1193 // possible.
1194 if self.module.defined_memory_index(memory_index).is_none() {
1195 return;
1196 }
1197
1198 // First up determine the start/end range and verify that they're
1199 // in-bounds for the initial size of the memory at `memory_index`.
1200 // Note that this can bail if we don't have access to globals yet
1201 // (e.g. this is a task happening before instantiation at
1202 // compile-time).
1203 let start = match (offset.ops(), self.module.memories[memory_index].idx_type) {
1204 (&[ConstOp::I32Const(offset)], IndexType::I32) => offset.cast_unsigned().into(),
1205 (&[ConstOp::I64Const(offset)], IndexType::I64) => offset.cast_unsigned(),
1206 _ => return,
1207 };
1208 let len = u64::try_from(data.len()).unwrap();
1209 let end = match start.checked_add(len) {
1210 Some(end) => end,
1211 None => return,
1212 };
1213
1214 match self.module.memories[memory_index].minimum_byte_size() {
1215 Ok(max) => {
1216 if end > max {
1217 return;
1218 }
1219 }
1220
1221 // Note that computing the minimum can overflow if the page
1222 // size is the default 64KiB and the memory's minimum size in
1223 // pages is `1 << 48`, the maximum number of minimum pages for
1224 // 64-bit memories. We don't return `false` to signal an error
1225 // here and instead defer the error to runtime, when it will be
1226 // impossible to allocate that much memory anyways.
1227 Err(_) => return,
1228 }
1229
1230 // Skip empty in-bounds data segments.
1231 if data.is_empty() {
1232 continue;
1233 }
1234
1235 let info = &mut info[memory_index];
1236 let len64 = u64::try_from(data.len()).unwrap();
1237 info.data_size += len64;
1238 info.min_addr = info.min_addr.min(start);
1239 info.max_addr = info.max_addr.max(start + len64);
1240 info.segments.push((start, data));
1241 }
1242
1243 // Validate that the memory information collected is indeed valid for
1244 // static memory initialization.
1245 for (i, info) in info.iter().filter(|(_, info)| info.data_size > 0) {
1246 let image_size = info.max_addr - info.min_addr;
1247
1248 // Simplify things for now by bailing out entirely if any memory has
1249 // a page size smaller than the host's page size. This fixes a case
1250 // where currently initializers are created in host-page-size units
1251 // of length which means that a larger-than-the-entire-memory
1252 // initializer can be created. This can be handled technically but
1253 // would require some more changes to help fix the assert elsewhere
1254 // that this protects against.
1255 if self.module.memories[i].page_size() < page_size {
1256 return;
1257 }
1258
1259 // If the range of memory being initialized is less than twice the
1260 // total size of the data itself then it's assumed that static
1261 // initialization is ok. This means we'll at most double memory
1262 // consumption during the memory image creation process, which is
1263 // currently assumed to "probably be ok" but this will likely need
1264 // tweaks over time.
1265 if image_size < info.data_size.saturating_mul(2) {
1266 continue;
1267 }
1268
1269 // If the memory initialization image is larger than the size of all
1270 // data, then we still allow memory initialization if the image will
1271 // be of a relatively modest size, such as 1MB here.
1272 if image_size < max_image_size_always_allowed {
1273 continue;
1274 }
1275
1276 // At this point memory initialization is concluded to be too
1277 // expensive to do at compile time so it's entirely deferred to
1278 // happen at runtime.
1279 return;
1280 }
1281
1282 // Here's where we've now committed to changing to static memory. The
1283 // memory initialization image is built here from the page data and then
1284 // it's converted to a single initializer.
1285 let mut map = TryPrimaryMap::with_capacity(info.len()).panic_on_oom();
1286 let mut new_initializers = Vec::new();
1287 for (memory, info) in info.iter() {
1288 // Create the in-memory `image` which is the initialized contents of
1289 // this linear memory.
1290 let extent = if info.segments.len() > 0 {
1291 (info.max_addr - info.min_addr) as usize
1292 } else {
1293 0
1294 };
1295 let mut image = Vec::with_capacity(extent);
1296 for (offset, data) in info.segments.iter() {
1297 let offset = usize::try_from(*offset - info.min_addr).unwrap();
1298 if image.len() < offset {
1299 image.resize(offset, 0u8);
1300 image.extend_from_slice(data);
1301 } else {
1302 image.splice(
1303 offset..(offset + data.len()).min(image.len()),
1304 data.iter().copied(),
1305 );
1306 }
1307 }
1308 assert_eq!(image.len(), extent);
1309 assert_eq!(image.capacity(), extent);
1310 let mut offset = if info.segments.len() > 0 {
1311 info.min_addr
1312 } else {
1313 0
1314 };
1315
1316 // Chop off trailing zeros from the image as memory is already
1317 // zero-initialized. Note that `i` is the position of a nonzero
1318 // entry here, so to not lose it we truncate to `i + 1`.
1319 if let Some(i) = image.iter().rposition(|i| *i != 0) {
1320 image.truncate(i + 1);
1321 }
1322
1323 // Also chop off leading zeros, if any.
1324 if let Some(i) = image.iter().position(|i| *i != 0) {
1325 offset += i as u64;
1326 image.drain(..i);
1327 }
1328 let mut len = u64::try_from(image.len()).unwrap();
1329
1330 // The goal is to enable mapping this image directly into memory, so
1331 // the offset into linear memory must be a multiple of the page
1332 // size. If that's not already the case then the image is padded at
1333 // the front and back with extra zeros as necessary
1334 if offset % page_size != 0 {
1335 let zero_padding = offset % page_size;
1336 image.splice(0..0, std::iter::repeat(0).take(zero_padding as usize));
1337 offset -= zero_padding;
1338 len += zero_padding;
1339 }
1340 if len % page_size != 0 {
1341 let zero_padding = page_size - (len % page_size);
1342 image.extend(std::iter::repeat(0).take(zero_padding as usize));
1343 len += zero_padding;
1344 }
1345 let runtime_index = if image.is_empty() {
1346 None
1347 } else {
1348 Some(self.runtime_data.push(image.into()))
1349 };
1350
1351 // Offset/length should now always be page-aligned.
1352 assert!(offset % page_size == 0);
1353 assert!(len % page_size == 0);
1354
1355 // Record the static memory initializer which describes this image,
1356 // only needed if the image is actually present and has a nonzero
1357 // length. The `offset` has been calculates above, originally
1358 // sourced from `info.min_addr`. The `data` field is the extent
1359 // within the final data segment we'll emit to an ELF image, which
1360 // is the concatenation of `self.data`, so here it's the size of
1361 // the section-so-far plus the current segment we're appending.
1362 let idx = map.push(runtime_index.map(|i| (offset, i))).panic_on_oom();
1363 assert_eq!(idx, memory);
1364 if let Some(runtime_index) = runtime_index {
1365 new_initializers.push((idx, MemorySegmentOffset::Static(offset), runtime_index));
1366 }
1367 }
1368 self.data_align = Some(page_size);
1369 self.module.memory_initialization = MemoryInitialization::Static { map };
1370 self.memory_init = MemoryInit::Processed(new_initializers);
1371 }
1372
1373 /// Finalizes the initialization of tables.
1374 ///
1375 /// This is invoked after translation and notably uses
1376 /// `Self::try_func_table_init` to attempt to optimize initialization of
1377 /// tables into static precomputed images.
1378 pub fn finalize_table_init(&mut self, tunables: &Tunables, types: &mut ModuleTypesBuilder) {
1379 if tunables.table_lazy_init {
1380 self.try_func_table_init();
1381 }
1382
1383 // If any table has a non-null initializers, or if there's any active
1384 // data segments, then a startup function is unconditionally required to
1385 // configure the table.
1386 if self
1387 .table_initialization
1388 .initial_values
1389 .iter()
1390 .any(|(_, v)| !matches!(v, TableInitialValue::Null))
1391 || !self.table_initialization.segments.is_empty()
1392 {
1393 self.require_startup_func(types);
1394 }
1395 }
1396
1397 /// Attempts to convert the module's table initializers to
1398 /// FuncTable form where possible. This enables lazy table
1399 /// initialization later by providing a one-to-one map of initial
1400 /// table values, without having to parse all segments.
1401 fn try_func_table_init(&mut self) {
1402 // This should be large enough to support very large Wasm
1403 // modules with huge funcref tables, but small enough to avoid
1404 // OOMs or DoS on truly sparse tables.
1405 const MAX_FUNC_TABLE_SIZE: u64 = 1024 * 1024;
1406
1407 // First convert any element-initialized tables to images of just that
1408 // single function if the minimum size of the table allows doing so.
1409 for ((i, init), (_, table)) in self.table_initialization.initial_values.iter_mut().zip(
1410 self.module
1411 .tables
1412 .iter()
1413 .skip(self.module.num_imported_tables),
1414 ) {
1415 let table_size = table.limits.min;
1416 if table_size > MAX_FUNC_TABLE_SIZE {
1417 continue;
1418 }
1419 if let TableInitialValue::Expr(expr) = init {
1420 if let [ConstOp::RefFunc(f)] = expr.ops() {
1421 assert!(self.module.table_initialization[i].is_empty());
1422 self.module.table_initialization[i] =
1423 try_vec![*f; table_size as usize].panic_on_oom();
1424 *init = TableInitialValue::Null;
1425 }
1426 }
1427 }
1428
1429 let mut segments = mem::take(&mut self.table_initialization.segments)
1430 .into_iter()
1431 .peekable();
1432
1433 // The goal of this loop is to interpret a table segment and apply it
1434 // "statically" to a local table. This will iterate over segments and
1435 // apply them one-by-one to each table.
1436 //
1437 // If any segment can't be applied, however, then this loop exits and
1438 // all remaining segments are placed back into the segment list. This is
1439 // because segments are supposed to be initialized one-at-a-time which
1440 // means that intermediate state is visible with respect to traps. If
1441 // anything isn't statically known to not trap it's pessimistically
1442 // assumed to trap meaning all further segment initializers must be
1443 // applied manually at instantiation time.
1444 while let Some(segment) = segments.peek() {
1445 let defined_index = match self.module.defined_table_index(segment.table_index) {
1446 Some(index) => index,
1447 // Skip imported tables: we can't provide a preconstructed
1448 // table for them, because their values depend on the
1449 // imported table overlaid with whatever segments we have.
1450 None => break,
1451 };
1452
1453 // If the base of this segment is dynamic, then we can't
1454 // include it in the statically-built array of initial
1455 // contents.
1456 let offset = match segment.offset.ops() {
1457 &[ConstOp::I32Const(offset)] => u64::from(offset.cast_unsigned()),
1458 &[ConstOp::I64Const(offset)] => offset.cast_unsigned(),
1459 _ => break,
1460 };
1461
1462 // Get the end of this segment. If out-of-bounds, or too
1463 // large for our dense table representation, then skip the
1464 // segment.
1465 let top = match offset.checked_add(segment.elements.len()) {
1466 Some(top) => top,
1467 None => break,
1468 };
1469 let table_size = self.module.tables[segment.table_index].limits.min;
1470 if top > table_size || top > MAX_FUNC_TABLE_SIZE {
1471 break;
1472 }
1473
1474 match self.module.tables[segment.table_index]
1475 .ref_type
1476 .heap_type
1477 .top()
1478 {
1479 WasmHeapTopType::Func => {}
1480 // If this is not a funcref table, then we can't support a
1481 // pre-computed table of function indices. Technically this
1482 // initializer won't trap so we could continue processing
1483 // segments, but that's left as a future optimization if
1484 // necessary.
1485 WasmHeapTopType::Any
1486 | WasmHeapTopType::Extern
1487 | WasmHeapTopType::Cont
1488 | WasmHeapTopType::Exn => break,
1489 }
1490
1491 // Function indices can be optimized here, but fully general
1492 // expressions are deferred to get evaluated at runtime.
1493 let function_elements = match &segment.elements {
1494 TableSegmentElements::Functions(indices) => indices,
1495 TableSegmentElements::Expressions { .. } => break,
1496 };
1497
1498 match &self.table_initialization.initial_values[defined_index] {
1499 TableInitialValue::Null => {}
1500
1501 // If this table is still listed as an initial value here
1502 // then that means the initial size of the table doesn't
1503 // support a precomputed function list, so skip this.
1504 // Technically this won't trap so it's possible to process
1505 // further initializers, but that's left as a future
1506 // optimization.
1507 TableInitialValue::Expr(_) => break,
1508 }
1509 let precomputed = &mut self.module.table_initialization[defined_index];
1510
1511 // At this point we're committing to pre-initializing the table
1512 // with the `segment` that's being iterated over. This segment is
1513 // applied to the `precomputed` list for the table by ensuring
1514 // it's large enough to hold the segment and then copying the
1515 // segment into the precomputed list.
1516 if precomputed.len() < top as usize {
1517 precomputed
1518 .resize(top as usize, FuncIndex::reserved_value())
1519 .panic_on_oom();
1520 }
1521 let dst = &mut precomputed[offset as usize..top as usize];
1522 dst.copy_from_slice(&function_elements);
1523
1524 // advance the iterator to see the next segment
1525 let _ = segments.next();
1526 }
1527 self.table_initialization.segments = segments.try_collect().panic_on_oom();
1528 }
1529
1530 /// Helper function to ratchet the `startup` function for this module as
1531 /// `Always`.
1532 fn require_startup_func(&mut self, types: &mut ModuleTypesBuilder) {
1533 let ty = match self.module.startup {
1534 ModuleStartup::None => types.startup_func_type().into(),
1535 ModuleStartup::Always(_) => return,
1536 ModuleStartup::IfMemoriesNeedInit(ty) => ty,
1537 };
1538 self.module.startup = ModuleStartup::Always(ty);
1539 }
1540
1541 /// Helper function to ratchet the `startup` function for this module as
1542 /// `IfMemoriesNeedInit`.
1543 fn require_startup_func_if_memories_need_init(&mut self, types: &mut ModuleTypesBuilder) {
1544 let ty = match self.module.startup {
1545 ModuleStartup::None => types.startup_func_type().into(),
1546 ModuleStartup::Always(_) | ModuleStartup::IfMemoriesNeedInit(_) => return,
1547 };
1548 self.module.startup = ModuleStartup::IfMemoriesNeedInit(ty);
1549 }
1550}