wasmtime_environ/compile/
module_environ.rs

1use crate::module::{
2    FuncRefIndex, Initializer, MemoryInitialization, MemoryInitializer, Module, TableSegment,
3    TableSegmentElements,
4};
5use crate::{
6    ConstExpr, ConstOp, DataIndex, DefinedFuncIndex, ElemIndex, EngineOrModuleTypeIndex,
7    EntityIndex, EntityType, FuncIndex, GlobalIndex, IndexType, InitMemory, MemoryIndex,
8    ModuleInternedTypeIndex, ModuleTypesBuilder, PrimaryMap, SizeOverflow, StaticMemoryInitializer,
9    TableIndex, TableInitialValue, Tag, TagIndex, Tunables, TypeConvert, TypeIndex, WasmError,
10    WasmHeapTopType, WasmHeapType, WasmResult, WasmValType, WasmparserTypeConverter,
11};
12use crate::{StaticModuleIndex, prelude::*};
13use anyhow::{Result, bail};
14use cranelift_entity::SecondaryMap;
15use cranelift_entity::packed_option::ReservedValue;
16use std::borrow::Cow;
17use std::collections::HashMap;
18use std::mem;
19use std::path::PathBuf;
20use std::sync::Arc;
21use wasmparser::{
22    CustomSectionReader, DataKind, ElementItems, ElementKind, Encoding, ExternalKind,
23    FuncToValidate, FunctionBody, KnownCustom, NameSectionReader, Naming, Parser, Payload, TypeRef,
24    Validator, ValidatorResources, types::Types,
25};
26
27/// Object containing the standalone environment information.
28pub struct ModuleEnvironment<'a, 'data> {
29    /// The current module being translated
30    result: ModuleTranslation<'data>,
31
32    /// Intern'd types for this entire translation, shared by all modules.
33    types: &'a mut ModuleTypesBuilder,
34
35    // Various bits and pieces of configuration
36    validator: &'a mut Validator,
37    tunables: &'a Tunables,
38}
39
40/// The result of translating via `ModuleEnvironment`.
41///
42/// Function bodies are not yet translated, and data initializers have not yet
43/// been copied out of the original buffer.
44pub struct ModuleTranslation<'data> {
45    /// Module information.
46    pub module: Module,
47
48    /// This module's index.
49    pub module_index: StaticModuleIndex,
50
51    /// The input wasm binary.
52    ///
53    /// This can be useful, for example, when modules are parsed from a
54    /// component and the embedder wants access to the raw wasm modules
55    /// themselves.
56    pub wasm: &'data [u8],
57
58    /// References to the function bodies.
59    pub function_body_inputs: PrimaryMap<DefinedFuncIndex, FunctionBodyData<'data>>,
60
61    /// For each imported function, the single statically-known defined function
62    /// that satisfies that import, if any. This is used to turn what would
63    /// otherwise be indirect calls through the imports table into direct calls,
64    /// when possible.
65    pub known_imported_functions:
66        SecondaryMap<FuncIndex, Option<(StaticModuleIndex, DefinedFuncIndex)>>,
67
68    /// A list of type signatures which are considered exported from this
69    /// module, or those that can possibly be called. This list is sorted, and
70    /// trampolines for each of these signatures are required.
71    pub exported_signatures: Vec<ModuleInternedTypeIndex>,
72
73    /// DWARF debug information, if enabled, parsed from the module.
74    pub debuginfo: DebugInfoData<'data>,
75
76    /// Set if debuginfo was found but it was not parsed due to `Tunables`
77    /// configuration.
78    pub has_unparsed_debuginfo: bool,
79
80    /// List of data segments found in this module which should be concatenated
81    /// together for the final compiled artifact.
82    ///
83    /// These data segments, when concatenated, are indexed by the
84    /// `MemoryInitializer` type.
85    pub data: Vec<Cow<'data, [u8]>>,
86
87    /// The desired alignment of `data` in the final data section of the object
88    /// file that we'll emit.
89    ///
90    /// Note that this is 1 by default but `MemoryInitialization::Static` might
91    /// switch this to a higher alignment to facilitate mmap-ing data from
92    /// an object file into a linear memory.
93    pub data_align: Option<u64>,
94
95    /// Total size of all data pushed onto `data` so far.
96    total_data: u32,
97
98    /// List of passive element segments found in this module which will get
99    /// concatenated for the final artifact.
100    pub passive_data: Vec<&'data [u8]>,
101
102    /// Total size of all passive data pushed into `passive_data` so far.
103    total_passive_data: u32,
104
105    /// When we're parsing the code section this will be incremented so we know
106    /// which function is currently being defined.
107    code_index: u32,
108
109    /// The type information of the current module made available at the end of the
110    /// validation process.
111    types: Option<Types>,
112}
113
114impl<'data> ModuleTranslation<'data> {
115    /// Create a new translation for the module with the given index.
116    pub fn new(module_index: StaticModuleIndex) -> Self {
117        Self {
118            module_index,
119            module: Module::default(),
120            wasm: &[],
121            function_body_inputs: PrimaryMap::default(),
122            known_imported_functions: SecondaryMap::default(),
123            exported_signatures: Vec::default(),
124            debuginfo: DebugInfoData::default(),
125            has_unparsed_debuginfo: false,
126            data: Vec::default(),
127            data_align: None,
128            total_data: 0,
129            passive_data: Vec::default(),
130            total_passive_data: 0,
131            code_index: 0,
132            types: None,
133        }
134    }
135
136    /// Returns a reference to the type information of the current module.
137    pub fn get_types(&self) -> &Types {
138        self.types
139            .as_ref()
140            .expect("module type information to be available")
141    }
142}
143
144/// Contains function data: byte code and its offset in the module.
145pub struct FunctionBodyData<'a> {
146    /// The body of the function, containing code and locals.
147    pub body: FunctionBody<'a>,
148    /// Validator for the function body
149    pub validator: FuncToValidate<ValidatorResources>,
150}
151
152#[derive(Debug, Default)]
153#[expect(missing_docs, reason = "self-describing fields")]
154pub struct DebugInfoData<'a> {
155    pub dwarf: Dwarf<'a>,
156    pub name_section: NameSection<'a>,
157    pub wasm_file: WasmFileInfo,
158    pub debug_loc: gimli::DebugLoc<Reader<'a>>,
159    pub debug_loclists: gimli::DebugLocLists<Reader<'a>>,
160    pub debug_ranges: gimli::DebugRanges<Reader<'a>>,
161    pub debug_rnglists: gimli::DebugRngLists<Reader<'a>>,
162    pub debug_cu_index: gimli::DebugCuIndex<Reader<'a>>,
163    pub debug_tu_index: gimli::DebugTuIndex<Reader<'a>>,
164}
165
166#[expect(missing_docs, reason = "self-describing")]
167pub type Dwarf<'input> = gimli::Dwarf<Reader<'input>>;
168
169type Reader<'input> = gimli::EndianSlice<'input, gimli::LittleEndian>;
170
171#[derive(Debug, Default)]
172#[expect(missing_docs, reason = "self-describing fields")]
173pub struct NameSection<'a> {
174    pub module_name: Option<&'a str>,
175    pub func_names: HashMap<FuncIndex, &'a str>,
176    pub locals_names: HashMap<FuncIndex, HashMap<u32, &'a str>>,
177}
178
179#[derive(Debug, Default)]
180#[expect(missing_docs, reason = "self-describing fields")]
181pub struct WasmFileInfo {
182    pub path: Option<PathBuf>,
183    pub code_section_offset: u64,
184    pub imported_func_count: u32,
185    pub funcs: Vec<FunctionMetadata>,
186}
187
188#[derive(Debug)]
189#[expect(missing_docs, reason = "self-describing fields")]
190pub struct FunctionMetadata {
191    pub params: Box<[WasmValType]>,
192    pub locals: Box<[(u32, WasmValType)]>,
193}
194
195impl<'a, 'data> ModuleEnvironment<'a, 'data> {
196    /// Allocates the environment data structures.
197    pub fn new(
198        tunables: &'a Tunables,
199        validator: &'a mut Validator,
200        types: &'a mut ModuleTypesBuilder,
201        module_index: StaticModuleIndex,
202    ) -> Self {
203        Self {
204            result: ModuleTranslation::new(module_index),
205            types,
206            tunables,
207            validator,
208        }
209    }
210
211    /// Translate a wasm module using this environment.
212    ///
213    /// This function will translate the `data` provided with `parser`,
214    /// validating everything along the way with this environment's validator.
215    ///
216    /// The result of translation, [`ModuleTranslation`], contains everything
217    /// necessary to compile functions afterwards as well as learn type
218    /// information about the module at runtime.
219    pub fn translate(
220        mut self,
221        parser: Parser,
222        data: &'data [u8],
223    ) -> Result<ModuleTranslation<'data>> {
224        self.result.wasm = data;
225
226        for payload in parser.parse_all(data) {
227            self.translate_payload(payload?)?;
228        }
229
230        Ok(self.result)
231    }
232
233    fn translate_payload(&mut self, payload: Payload<'data>) -> Result<()> {
234        match payload {
235            Payload::Version {
236                num,
237                encoding,
238                range,
239            } => {
240                self.validator.version(num, encoding, &range)?;
241                match encoding {
242                    Encoding::Module => {}
243                    Encoding::Component => {
244                        bail!("expected a WebAssembly module but was given a WebAssembly component")
245                    }
246                }
247            }
248
249            Payload::End(offset) => {
250                self.result.types = Some(self.validator.end(offset)?);
251
252                // With the `escaped_funcs` set of functions finished
253                // we can calculate the set of signatures that are exported as
254                // the set of exported functions' signatures.
255                self.result.exported_signatures = self
256                    .result
257                    .module
258                    .functions
259                    .iter()
260                    .filter_map(|(_, func)| {
261                        if func.is_escaping() {
262                            Some(func.signature.unwrap_module_type_index())
263                        } else {
264                            None
265                        }
266                    })
267                    .collect();
268                self.result.exported_signatures.sort_unstable();
269                self.result.exported_signatures.dedup();
270            }
271
272            Payload::TypeSection(types) => {
273                self.validator.type_section(&types)?;
274
275                let count = self.validator.types(0).unwrap().core_type_count_in_module();
276                log::trace!("interning {count} Wasm types");
277
278                let capacity = usize::try_from(count).unwrap();
279                self.result.module.types.reserve(capacity);
280                self.types.reserve_wasm_signatures(capacity);
281
282                // Iterate over each *rec group* -- not type -- defined in the
283                // types section. Rec groups are the unit of canonicalization
284                // and therefore the unit at which we need to process at a
285                // time. `wasmparser` has already done the hard work of
286                // de-duplicating and canonicalizing the rec groups within the
287                // module for us, we just need to translate them into our data
288                // structures. Note that, if the Wasm defines duplicate rec
289                // groups, we need copy the duplicates over (shallowly) as well,
290                // so that our types index space doesn't have holes.
291                let mut type_index = 0;
292                while type_index < count {
293                    let validator_types = self.validator.types(0).unwrap();
294
295                    // Get the rec group for the current type index, which is
296                    // always the first type defined in a rec group.
297                    log::trace!("looking up wasmparser type for index {type_index}");
298                    let core_type_id = validator_types.core_type_at_in_module(type_index);
299                    log::trace!(
300                        "  --> {core_type_id:?} = {:?}",
301                        validator_types[core_type_id],
302                    );
303                    let rec_group_id = validator_types.rec_group_id_of(core_type_id);
304                    debug_assert_eq!(
305                        validator_types
306                            .rec_group_elements(rec_group_id)
307                            .position(|id| id == core_type_id),
308                        Some(0)
309                    );
310
311                    // Intern the rec group and then fill in this module's types
312                    // index space.
313                    let interned = self.types.intern_rec_group(validator_types, rec_group_id)?;
314                    let elems = self.types.rec_group_elements(interned);
315                    let len = elems.len();
316                    self.result.module.types.reserve(len);
317                    for ty in elems {
318                        self.result.module.types.push(ty.into());
319                    }
320
321                    // Advance `type_index` to the start of the next rec group.
322                    type_index += u32::try_from(len).unwrap();
323                }
324            }
325
326            Payload::ImportSection(imports) => {
327                self.validator.import_section(&imports)?;
328
329                let cnt = usize::try_from(imports.count()).unwrap();
330                self.result.module.initializers.reserve(cnt);
331
332                for entry in imports {
333                    let import = entry?;
334                    let ty = match import.ty {
335                        TypeRef::Func(index) => {
336                            let index = TypeIndex::from_u32(index);
337                            let interned_index = self.result.module.types[index];
338                            self.result.module.num_imported_funcs += 1;
339                            self.result.debuginfo.wasm_file.imported_func_count += 1;
340                            EntityType::Function(interned_index)
341                        }
342                        TypeRef::Memory(ty) => {
343                            self.result.module.num_imported_memories += 1;
344                            EntityType::Memory(ty.into())
345                        }
346                        TypeRef::Global(ty) => {
347                            self.result.module.num_imported_globals += 1;
348                            EntityType::Global(self.convert_global_type(&ty)?)
349                        }
350                        TypeRef::Table(ty) => {
351                            self.result.module.num_imported_tables += 1;
352                            EntityType::Table(self.convert_table_type(&ty)?)
353                        }
354                        TypeRef::Tag(ty) => {
355                            let index = TypeIndex::from_u32(ty.func_type_idx);
356                            let signature = self.result.module.types[index];
357                            let tag = Tag { signature };
358                            self.result.module.num_imported_tags += 1;
359                            EntityType::Tag(tag)
360                        }
361                    };
362                    self.declare_import(import.module, import.name, ty);
363                }
364            }
365
366            Payload::FunctionSection(functions) => {
367                self.validator.function_section(&functions)?;
368
369                let cnt = usize::try_from(functions.count()).unwrap();
370                self.result.module.functions.reserve_exact(cnt);
371
372                for entry in functions {
373                    let sigindex = entry?;
374                    let ty = TypeIndex::from_u32(sigindex);
375                    let interned_index = self.result.module.types[ty];
376                    self.result.module.push_function(interned_index);
377                }
378            }
379
380            Payload::TableSection(tables) => {
381                self.validator.table_section(&tables)?;
382                let cnt = usize::try_from(tables.count()).unwrap();
383                self.result.module.tables.reserve_exact(cnt);
384
385                for entry in tables {
386                    let wasmparser::Table { ty, init } = entry?;
387                    let table = self.convert_table_type(&ty)?;
388                    self.result.module.needs_gc_heap |= table.ref_type.is_vmgcref_type();
389                    self.result.module.tables.push(table);
390                    let init = match init {
391                        wasmparser::TableInit::RefNull => TableInitialValue::Null {
392                            precomputed: Vec::new(),
393                        },
394                        wasmparser::TableInit::Expr(expr) => {
395                            let (init, escaped) = ConstExpr::from_wasmparser(self, expr)?;
396                            for f in escaped {
397                                self.flag_func_escaped(f);
398                            }
399                            TableInitialValue::Expr(init)
400                        }
401                    };
402                    self.result
403                        .module
404                        .table_initialization
405                        .initial_values
406                        .push(init);
407                }
408            }
409
410            Payload::MemorySection(memories) => {
411                self.validator.memory_section(&memories)?;
412
413                let cnt = usize::try_from(memories.count()).unwrap();
414                self.result.module.memories.reserve_exact(cnt);
415
416                for entry in memories {
417                    let memory = entry?;
418                    self.result.module.memories.push(memory.into());
419                }
420            }
421
422            Payload::TagSection(tags) => {
423                self.validator.tag_section(&tags)?;
424
425                for entry in tags {
426                    let sigindex = entry?.func_type_idx;
427                    let ty = TypeIndex::from_u32(sigindex);
428                    let interned_index = self.result.module.types[ty];
429                    self.result.module.push_tag(interned_index);
430                }
431            }
432
433            Payload::GlobalSection(globals) => {
434                self.validator.global_section(&globals)?;
435
436                let cnt = usize::try_from(globals.count()).unwrap();
437                self.result.module.globals.reserve_exact(cnt);
438
439                for entry in globals {
440                    let wasmparser::Global { ty, init_expr } = entry?;
441                    let (initializer, escaped) = ConstExpr::from_wasmparser(self, init_expr)?;
442                    for f in escaped {
443                        self.flag_func_escaped(f);
444                    }
445                    let ty = self.convert_global_type(&ty)?;
446                    self.result.module.globals.push(ty);
447                    self.result.module.global_initializers.push(initializer);
448                }
449            }
450
451            Payload::ExportSection(exports) => {
452                self.validator.export_section(&exports)?;
453
454                let cnt = usize::try_from(exports.count()).unwrap();
455                self.result.module.exports.reserve(cnt);
456
457                for entry in exports {
458                    let wasmparser::Export { name, kind, index } = entry?;
459                    let entity = match kind {
460                        ExternalKind::Func => {
461                            let index = FuncIndex::from_u32(index);
462                            self.flag_func_escaped(index);
463                            EntityIndex::Function(index)
464                        }
465                        ExternalKind::Table => EntityIndex::Table(TableIndex::from_u32(index)),
466                        ExternalKind::Memory => EntityIndex::Memory(MemoryIndex::from_u32(index)),
467                        ExternalKind::Global => EntityIndex::Global(GlobalIndex::from_u32(index)),
468                        ExternalKind::Tag => EntityIndex::Tag(TagIndex::from_u32(index)),
469                    };
470                    self.result
471                        .module
472                        .exports
473                        .insert(String::from(name), entity);
474                }
475            }
476
477            Payload::StartSection { func, range } => {
478                self.validator.start_section(func, &range)?;
479
480                let func_index = FuncIndex::from_u32(func);
481                self.flag_func_escaped(func_index);
482                debug_assert!(self.result.module.start_func.is_none());
483                self.result.module.start_func = Some(func_index);
484            }
485
486            Payload::ElementSection(elements) => {
487                self.validator.element_section(&elements)?;
488
489                for (index, entry) in elements.into_iter().enumerate() {
490                    let wasmparser::Element {
491                        kind,
492                        items,
493                        range: _,
494                    } = entry?;
495
496                    // Build up a list of `FuncIndex` corresponding to all the
497                    // entries listed in this segment. Note that it's not
498                    // possible to create anything other than a `ref.null
499                    // extern` for externref segments, so those just get
500                    // translated to the reserved value of `FuncIndex`.
501                    let elements = match items {
502                        ElementItems::Functions(funcs) => {
503                            let mut elems =
504                                Vec::with_capacity(usize::try_from(funcs.count()).unwrap());
505                            for func in funcs {
506                                let func = FuncIndex::from_u32(func?);
507                                self.flag_func_escaped(func);
508                                elems.push(func);
509                            }
510                            TableSegmentElements::Functions(elems.into())
511                        }
512                        ElementItems::Expressions(_ty, items) => {
513                            let mut exprs =
514                                Vec::with_capacity(usize::try_from(items.count()).unwrap());
515                            for expr in items {
516                                let (expr, escaped) = ConstExpr::from_wasmparser(self, expr?)?;
517                                exprs.push(expr);
518                                for func in escaped {
519                                    self.flag_func_escaped(func);
520                                }
521                            }
522                            TableSegmentElements::Expressions(exprs.into())
523                        }
524                    };
525
526                    match kind {
527                        ElementKind::Active {
528                            table_index,
529                            offset_expr,
530                        } => {
531                            let table_index = TableIndex::from_u32(table_index.unwrap_or(0));
532                            let (offset, escaped) = ConstExpr::from_wasmparser(self, offset_expr)?;
533                            debug_assert!(escaped.is_empty());
534
535                            self.result
536                                .module
537                                .table_initialization
538                                .segments
539                                .push(TableSegment {
540                                    table_index,
541                                    offset,
542                                    elements,
543                                });
544                        }
545
546                        ElementKind::Passive => {
547                            let elem_index = ElemIndex::from_u32(index as u32);
548                            let index = self.result.module.passive_elements.len();
549                            self.result.module.passive_elements.push(elements);
550                            self.result
551                                .module
552                                .passive_elements_map
553                                .insert(elem_index, index);
554                        }
555
556                        ElementKind::Declared => {}
557                    }
558                }
559            }
560
561            Payload::CodeSectionStart { count, range, .. } => {
562                self.validator.code_section_start(&range)?;
563                let cnt = usize::try_from(count).unwrap();
564                self.result.function_body_inputs.reserve_exact(cnt);
565                self.result.debuginfo.wasm_file.code_section_offset = range.start as u64;
566            }
567
568            Payload::CodeSectionEntry(body) => {
569                let validator = self.validator.code_section_entry(&body)?;
570                let func_index =
571                    self.result.code_index + self.result.module.num_imported_funcs as u32;
572                let func_index = FuncIndex::from_u32(func_index);
573
574                if self.tunables.generate_native_debuginfo {
575                    let sig_index = self.result.module.functions[func_index]
576                        .signature
577                        .unwrap_module_type_index();
578                    let sig = self.types[sig_index].unwrap_func();
579                    let mut locals = Vec::new();
580                    for pair in body.get_locals_reader()? {
581                        let (cnt, ty) = pair?;
582                        let ty = self.convert_valtype(ty)?;
583                        locals.push((cnt, ty));
584                    }
585                    self.result
586                        .debuginfo
587                        .wasm_file
588                        .funcs
589                        .push(FunctionMetadata {
590                            locals: locals.into_boxed_slice(),
591                            params: sig.params().into(),
592                        });
593                }
594                self.result
595                    .function_body_inputs
596                    .push(FunctionBodyData { validator, body });
597                self.result.code_index += 1;
598            }
599
600            Payload::DataSection(data) => {
601                self.validator.data_section(&data)?;
602
603                let initializers = match &mut self.result.module.memory_initialization {
604                    MemoryInitialization::Segmented(i) => i,
605                    _ => unreachable!(),
606                };
607
608                let cnt = usize::try_from(data.count()).unwrap();
609                initializers.reserve_exact(cnt);
610                self.result.data.reserve_exact(cnt);
611
612                for (index, entry) in data.into_iter().enumerate() {
613                    let wasmparser::Data {
614                        kind,
615                        data,
616                        range: _,
617                    } = entry?;
618                    let mk_range = |total: &mut u32| -> Result<_, WasmError> {
619                        let range = u32::try_from(data.len())
620                            .ok()
621                            .and_then(|size| {
622                                let start = *total;
623                                let end = start.checked_add(size)?;
624                                Some(start..end)
625                            })
626                            .ok_or_else(|| {
627                                WasmError::Unsupported(format!(
628                                    "more than 4 gigabytes of data in wasm module",
629                                ))
630                            })?;
631                        *total += range.end - range.start;
632                        Ok(range)
633                    };
634                    match kind {
635                        DataKind::Active {
636                            memory_index,
637                            offset_expr,
638                        } => {
639                            let range = mk_range(&mut self.result.total_data)?;
640                            let memory_index = MemoryIndex::from_u32(memory_index);
641                            let (offset, escaped) = ConstExpr::from_wasmparser(self, offset_expr)?;
642                            debug_assert!(escaped.is_empty());
643
644                            let initializers = match &mut self.result.module.memory_initialization {
645                                MemoryInitialization::Segmented(i) => i,
646                                _ => unreachable!(),
647                            };
648                            initializers.push(MemoryInitializer {
649                                memory_index,
650                                offset,
651                                data: range,
652                            });
653                            self.result.data.push(data.into());
654                        }
655                        DataKind::Passive => {
656                            let data_index = DataIndex::from_u32(index as u32);
657                            let range = mk_range(&mut self.result.total_passive_data)?;
658                            self.result.passive_data.push(data);
659                            self.result
660                                .module
661                                .passive_data_map
662                                .insert(data_index, range);
663                        }
664                    }
665                }
666            }
667
668            Payload::DataCountSection { count, range } => {
669                self.validator.data_count_section(count, &range)?;
670
671                // Note: the count passed in here is the *total* segment count
672                // There is no way to reserve for just the passive segments as
673                // they are discovered when iterating the data section entries
674                // Given that the total segment count might be much larger than
675                // the passive count, do not reserve anything here.
676            }
677
678            Payload::CustomSection(s)
679                if s.name() == "webidl-bindings" || s.name() == "wasm-interface-types" =>
680            {
681                bail!(
682                    "\
683Support for interface types has temporarily been removed from `wasmtime`.
684
685For more information about this temporary change you can read on the issue online:
686
687    https://github.com/bytecodealliance/wasmtime/issues/1271
688
689and for re-adding support for interface types you can see this issue:
690
691    https://github.com/bytecodealliance/wasmtime/issues/677
692"
693                )
694            }
695
696            Payload::CustomSection(s) => {
697                self.register_custom_section(&s);
698            }
699
700            // It's expected that validation will probably reject other
701            // payloads such as `UnknownSection` or those related to the
702            // component model. If, however, something gets past validation then
703            // that's a bug in Wasmtime as we forgot to implement something.
704            other => {
705                self.validator.payload(&other)?;
706                panic!("unimplemented section in wasm file {other:?}");
707            }
708        }
709        Ok(())
710    }
711
712    fn register_custom_section(&mut self, section: &CustomSectionReader<'data>) {
713        match section.as_known() {
714            KnownCustom::Name(name) => {
715                let result = self.name_section(name);
716                if let Err(e) = result {
717                    log::warn!("failed to parse name section {e:?}");
718                }
719            }
720            _ => {
721                let name = section.name().trim_end_matches(".dwo");
722                if name.starts_with(".debug_") {
723                    self.dwarf_section(name, section);
724                }
725            }
726        }
727    }
728
729    fn dwarf_section(&mut self, name: &str, section: &CustomSectionReader<'data>) {
730        if !self.tunables.generate_native_debuginfo && !self.tunables.parse_wasm_debuginfo {
731            self.result.has_unparsed_debuginfo = true;
732            return;
733        }
734        let info = &mut self.result.debuginfo;
735        let dwarf = &mut info.dwarf;
736        let endian = gimli::LittleEndian;
737        let data = section.data();
738        let slice = gimli::EndianSlice::new(data, endian);
739
740        match name {
741            // `gimli::Dwarf` fields.
742            ".debug_abbrev" => dwarf.debug_abbrev = gimli::DebugAbbrev::new(data, endian),
743            ".debug_addr" => dwarf.debug_addr = gimli::DebugAddr::from(slice),
744            ".debug_info" => {
745                dwarf.debug_info = gimli::DebugInfo::new(data, endian);
746            }
747            ".debug_line" => dwarf.debug_line = gimli::DebugLine::new(data, endian),
748            ".debug_line_str" => dwarf.debug_line_str = gimli::DebugLineStr::from(slice),
749            ".debug_str" => dwarf.debug_str = gimli::DebugStr::new(data, endian),
750            ".debug_str_offsets" => dwarf.debug_str_offsets = gimli::DebugStrOffsets::from(slice),
751            ".debug_str_sup" => {
752                let mut dwarf_sup: Dwarf<'data> = Default::default();
753                dwarf_sup.debug_str = gimli::DebugStr::from(slice);
754                dwarf.sup = Some(Arc::new(dwarf_sup));
755            }
756            ".debug_types" => dwarf.debug_types = gimli::DebugTypes::from(slice),
757
758            // Additional fields.
759            ".debug_loc" => info.debug_loc = gimli::DebugLoc::from(slice),
760            ".debug_loclists" => info.debug_loclists = gimli::DebugLocLists::from(slice),
761            ".debug_ranges" => info.debug_ranges = gimli::DebugRanges::new(data, endian),
762            ".debug_rnglists" => info.debug_rnglists = gimli::DebugRngLists::new(data, endian),
763
764            // DWARF package fields
765            ".debug_cu_index" => info.debug_cu_index = gimli::DebugCuIndex::new(data, endian),
766            ".debug_tu_index" => info.debug_tu_index = gimli::DebugTuIndex::new(data, endian),
767
768            // We don't use these at the moment.
769            ".debug_aranges" | ".debug_pubnames" | ".debug_pubtypes" => return,
770            other => {
771                log::warn!("unknown debug section `{other}`");
772                return;
773            }
774        }
775
776        dwarf.ranges = gimli::RangeLists::new(info.debug_ranges, info.debug_rnglists);
777        dwarf.locations = gimli::LocationLists::new(info.debug_loc, info.debug_loclists);
778    }
779
780    /// Declares a new import with the `module` and `field` names, importing the
781    /// `ty` specified.
782    ///
783    /// Note that this method is somewhat tricky due to the implementation of
784    /// the module linking proposal. In the module linking proposal two-level
785    /// imports are recast as single-level imports of instances. That recasting
786    /// happens here by recording an import of an instance for the first time
787    /// we see a two-level import.
788    ///
789    /// When the module linking proposal is disabled, however, disregard this
790    /// logic and instead work directly with two-level imports since no
791    /// instances are defined.
792    fn declare_import(&mut self, module: &'data str, field: &'data str, ty: EntityType) {
793        let index = self.push_type(ty);
794        self.result.module.initializers.push(Initializer::Import {
795            name: module.to_owned(),
796            field: field.to_owned(),
797            index,
798        });
799    }
800
801    fn push_type(&mut self, ty: EntityType) -> EntityIndex {
802        match ty {
803            EntityType::Function(ty) => EntityIndex::Function({
804                let func_index = self
805                    .result
806                    .module
807                    .push_function(ty.unwrap_module_type_index());
808                // Imported functions can escape; in fact, they've already done
809                // so to get here.
810                self.flag_func_escaped(func_index);
811                func_index
812            }),
813            EntityType::Table(ty) => EntityIndex::Table(self.result.module.tables.push(ty)),
814            EntityType::Memory(ty) => EntityIndex::Memory(self.result.module.memories.push(ty)),
815            EntityType::Global(ty) => EntityIndex::Global(self.result.module.globals.push(ty)),
816            EntityType::Tag(ty) => EntityIndex::Tag(self.result.module.tags.push(ty)),
817        }
818    }
819
820    fn flag_func_escaped(&mut self, func: FuncIndex) {
821        let ty = &mut self.result.module.functions[func];
822        // If this was already assigned a funcref index no need to re-assign it.
823        if ty.is_escaping() {
824            return;
825        }
826        let index = self.result.module.num_escaped_funcs as u32;
827        ty.func_ref = FuncRefIndex::from_u32(index);
828        self.result.module.num_escaped_funcs += 1;
829    }
830
831    /// Parses the Name section of the wasm module.
832    fn name_section(&mut self, names: NameSectionReader<'data>) -> WasmResult<()> {
833        for subsection in names {
834            match subsection? {
835                wasmparser::Name::Function(names) => {
836                    for name in names {
837                        let Naming { index, name } = name?;
838                        // Skip this naming if it's naming a function that
839                        // doesn't actually exist.
840                        if (index as usize) >= self.result.module.functions.len() {
841                            continue;
842                        }
843
844                        // Store the name unconditionally, regardless of
845                        // whether we're parsing debuginfo, since function
846                        // names are almost always present in the
847                        // final compilation artifact.
848                        let index = FuncIndex::from_u32(index);
849                        self.result
850                            .debuginfo
851                            .name_section
852                            .func_names
853                            .insert(index, name);
854                    }
855                }
856                wasmparser::Name::Module { name, .. } => {
857                    self.result.module.name = Some(name.to_string());
858                    if self.tunables.generate_native_debuginfo {
859                        self.result.debuginfo.name_section.module_name = Some(name);
860                    }
861                }
862                wasmparser::Name::Local(reader) => {
863                    if !self.tunables.generate_native_debuginfo {
864                        continue;
865                    }
866                    for f in reader {
867                        let f = f?;
868                        // Skip this naming if it's naming a function that
869                        // doesn't actually exist.
870                        if (f.index as usize) >= self.result.module.functions.len() {
871                            continue;
872                        }
873                        for name in f.names {
874                            let Naming { index, name } = name?;
875
876                            self.result
877                                .debuginfo
878                                .name_section
879                                .locals_names
880                                .entry(FuncIndex::from_u32(f.index))
881                                .or_insert(HashMap::new())
882                                .insert(index, name);
883                        }
884                    }
885                }
886                wasmparser::Name::Label(_)
887                | wasmparser::Name::Type(_)
888                | wasmparser::Name::Table(_)
889                | wasmparser::Name::Global(_)
890                | wasmparser::Name::Memory(_)
891                | wasmparser::Name::Element(_)
892                | wasmparser::Name::Data(_)
893                | wasmparser::Name::Tag(_)
894                | wasmparser::Name::Field(_)
895                | wasmparser::Name::Unknown { .. } => {}
896            }
897        }
898        Ok(())
899    }
900}
901
902impl TypeConvert for ModuleEnvironment<'_, '_> {
903    fn lookup_heap_type(&self, index: wasmparser::UnpackedIndex) -> WasmHeapType {
904        WasmparserTypeConverter::new(&self.types, |idx| {
905            self.result.module.types[idx].unwrap_module_type_index()
906        })
907        .lookup_heap_type(index)
908    }
909
910    fn lookup_type_index(&self, index: wasmparser::UnpackedIndex) -> EngineOrModuleTypeIndex {
911        WasmparserTypeConverter::new(&self.types, |idx| {
912            self.result.module.types[idx].unwrap_module_type_index()
913        })
914        .lookup_type_index(index)
915    }
916}
917
918impl ModuleTranslation<'_> {
919    /// Attempts to convert segmented memory initialization into static
920    /// initialization for the module that this translation represents.
921    ///
922    /// If this module's memory initialization is not compatible with paged
923    /// initialization then this won't change anything. Otherwise if it is
924    /// compatible then the `memory_initialization` field will be updated.
925    ///
926    /// Takes a `page_size` argument in order to ensure that all
927    /// initialization is page-aligned for mmap-ability, and
928    /// `max_image_size_always_allowed` to control how we decide
929    /// whether to use static init.
930    ///
931    /// We will try to avoid generating very sparse images, which are
932    /// possible if e.g. a module has an initializer at offset 0 and a
933    /// very high offset (say, 1 GiB). To avoid this, we use a dual
934    /// condition: we always allow images less than
935    /// `max_image_size_always_allowed`, and the embedder of Wasmtime
936    /// can set this if desired to ensure that static init should
937    /// always be done if the size of the module or its heaps is
938    /// otherwise bounded by the system. We also allow images with
939    /// static init data bigger than that, but only if it is "dense",
940    /// defined as having at least half (50%) of its pages with some
941    /// data.
942    ///
943    /// We could do something slightly better by building a dense part
944    /// and keeping a sparse list of outlier/leftover segments (see
945    /// issue #3820). This would also allow mostly-static init of
946    /// modules that have some dynamically-placed data segments. But,
947    /// for now, this is sufficient to allow a system that "knows what
948    /// it's doing" to always get static init.
949    pub fn try_static_init(&mut self, page_size: u64, max_image_size_always_allowed: u64) {
950        // This method only attempts to transform a `Segmented` memory init
951        // into a `Static` one, no other state.
952        if !self.module.memory_initialization.is_segmented() {
953            return;
954        }
955
956        // First a dry run of memory initialization is performed. This
957        // collects information about the extent of memory initialized for each
958        // memory as well as the size of all data segments being copied in.
959        struct Memory {
960            data_size: u64,
961            min_addr: u64,
962            max_addr: u64,
963            // The `usize` here is a pointer into `self.data` which is the list
964            // of data segments corresponding to what was found in the original
965            // wasm module.
966            segments: Vec<(usize, StaticMemoryInitializer)>,
967        }
968        let mut info = PrimaryMap::with_capacity(self.module.memories.len());
969        for _ in 0..self.module.memories.len() {
970            info.push(Memory {
971                data_size: 0,
972                min_addr: u64::MAX,
973                max_addr: 0,
974                segments: Vec::new(),
975            });
976        }
977
978        struct InitMemoryAtCompileTime<'a> {
979            module: &'a Module,
980            info: &'a mut PrimaryMap<MemoryIndex, Memory>,
981            idx: usize,
982        }
983        impl InitMemory for InitMemoryAtCompileTime<'_> {
984            fn memory_size_in_bytes(
985                &mut self,
986                memory_index: MemoryIndex,
987            ) -> Result<u64, SizeOverflow> {
988                self.module.memories[memory_index].minimum_byte_size()
989            }
990
991            fn eval_offset(&mut self, memory_index: MemoryIndex, expr: &ConstExpr) -> Option<u64> {
992                match (expr.ops(), self.module.memories[memory_index].idx_type) {
993                    (&[ConstOp::I32Const(offset)], IndexType::I32) => {
994                        Some(offset.cast_unsigned().into())
995                    }
996                    (&[ConstOp::I64Const(offset)], IndexType::I64) => Some(offset.cast_unsigned()),
997                    _ => None,
998                }
999            }
1000
1001            fn write(&mut self, memory: MemoryIndex, init: &StaticMemoryInitializer) -> bool {
1002                // Currently `Static` only applies to locally-defined memories,
1003                // so if a data segment references an imported memory then
1004                // transitioning to a `Static` memory initializer is not
1005                // possible.
1006                if self.module.defined_memory_index(memory).is_none() {
1007                    return false;
1008                };
1009                let info = &mut self.info[memory];
1010                let data_len = u64::from(init.data.end - init.data.start);
1011                if data_len > 0 {
1012                    info.data_size += data_len;
1013                    info.min_addr = info.min_addr.min(init.offset);
1014                    info.max_addr = info.max_addr.max(init.offset + data_len);
1015                    info.segments.push((self.idx, init.clone()));
1016                }
1017                self.idx += 1;
1018                true
1019            }
1020        }
1021        let ok = self
1022            .module
1023            .memory_initialization
1024            .init_memory(&mut InitMemoryAtCompileTime {
1025                idx: 0,
1026                module: &self.module,
1027                info: &mut info,
1028            });
1029        if !ok {
1030            return;
1031        }
1032
1033        // Validate that the memory information collected is indeed valid for
1034        // static memory initialization.
1035        for (i, info) in info.iter().filter(|(_, info)| info.data_size > 0) {
1036            let image_size = info.max_addr - info.min_addr;
1037
1038            // Simplify things for now by bailing out entirely if any memory has
1039            // a page size smaller than the host's page size. This fixes a case
1040            // where currently initializers are created in host-page-size units
1041            // of length which means that a larger-than-the-entire-memory
1042            // initializer can be created. This can be handled technically but
1043            // would require some more changes to help fix the assert elsewhere
1044            // that this protects against.
1045            if self.module.memories[i].page_size() < page_size {
1046                return;
1047            }
1048
1049            // If the range of memory being initialized is less than twice the
1050            // total size of the data itself then it's assumed that static
1051            // initialization is ok. This means we'll at most double memory
1052            // consumption during the memory image creation process, which is
1053            // currently assumed to "probably be ok" but this will likely need
1054            // tweaks over time.
1055            if image_size < info.data_size.saturating_mul(2) {
1056                continue;
1057            }
1058
1059            // If the memory initialization image is larger than the size of all
1060            // data, then we still allow memory initialization if the image will
1061            // be of a relatively modest size, such as 1MB here.
1062            if image_size < max_image_size_always_allowed {
1063                continue;
1064            }
1065
1066            // At this point memory initialization is concluded to be too
1067            // expensive to do at compile time so it's entirely deferred to
1068            // happen at runtime.
1069            return;
1070        }
1071
1072        // Here's where we've now committed to changing to static memory. The
1073        // memory initialization image is built here from the page data and then
1074        // it's converted to a single initializer.
1075        let data = mem::replace(&mut self.data, Vec::new());
1076        let mut map = PrimaryMap::with_capacity(info.len());
1077        let mut module_data_size = 0u32;
1078        for (memory, info) in info.iter() {
1079            // Create the in-memory `image` which is the initialized contents of
1080            // this linear memory.
1081            let extent = if info.segments.len() > 0 {
1082                (info.max_addr - info.min_addr) as usize
1083            } else {
1084                0
1085            };
1086            let mut image = Vec::with_capacity(extent);
1087            for (idx, init) in info.segments.iter() {
1088                let data = &data[*idx];
1089                assert_eq!(data.len(), init.data.len());
1090                let offset = usize::try_from(init.offset - info.min_addr).unwrap();
1091                if image.len() < offset {
1092                    image.resize(offset, 0u8);
1093                    image.extend_from_slice(data);
1094                } else {
1095                    image.splice(
1096                        offset..(offset + data.len()).min(image.len()),
1097                        data.iter().copied(),
1098                    );
1099                }
1100            }
1101            assert_eq!(image.len(), extent);
1102            assert_eq!(image.capacity(), extent);
1103            let mut offset = if info.segments.len() > 0 {
1104                info.min_addr
1105            } else {
1106                0
1107            };
1108
1109            // Chop off trailing zeros from the image as memory is already
1110            // zero-initialized. Note that `i` is the position of a nonzero
1111            // entry here, so to not lose it we truncate to `i + 1`.
1112            if let Some(i) = image.iter().rposition(|i| *i != 0) {
1113                image.truncate(i + 1);
1114            }
1115
1116            // Also chop off leading zeros, if any.
1117            if let Some(i) = image.iter().position(|i| *i != 0) {
1118                offset += i as u64;
1119                image.drain(..i);
1120            }
1121            let mut len = u64::try_from(image.len()).unwrap();
1122
1123            // The goal is to enable mapping this image directly into memory, so
1124            // the offset into linear memory must be a multiple of the page
1125            // size. If that's not already the case then the image is padded at
1126            // the front and back with extra zeros as necessary
1127            if offset % page_size != 0 {
1128                let zero_padding = offset % page_size;
1129                self.data.push(vec![0; zero_padding as usize].into());
1130                offset -= zero_padding;
1131                len += zero_padding;
1132            }
1133            self.data.push(image.into());
1134            if len % page_size != 0 {
1135                let zero_padding = page_size - (len % page_size);
1136                self.data.push(vec![0; zero_padding as usize].into());
1137                len += zero_padding;
1138            }
1139
1140            // Offset/length should now always be page-aligned.
1141            assert!(offset % page_size == 0);
1142            assert!(len % page_size == 0);
1143
1144            // Create the `StaticMemoryInitializer` which describes this image,
1145            // only needed if the image is actually present and has a nonzero
1146            // length. The `offset` has been calculates above, originally
1147            // sourced from `info.min_addr`. The `data` field is the extent
1148            // within the final data segment we'll emit to an ELF image, which
1149            // is the concatenation of `self.data`, so here it's the size of
1150            // the section-so-far plus the current segment we're appending.
1151            let len = u32::try_from(len).unwrap();
1152            let init = if len > 0 {
1153                Some(StaticMemoryInitializer {
1154                    offset,
1155                    data: module_data_size..module_data_size + len,
1156                })
1157            } else {
1158                None
1159            };
1160            let idx = map.push(init);
1161            assert_eq!(idx, memory);
1162            module_data_size += len;
1163        }
1164        self.data_align = Some(page_size);
1165        self.module.memory_initialization = MemoryInitialization::Static { map };
1166    }
1167
1168    /// Attempts to convert the module's table initializers to
1169    /// FuncTable form where possible. This enables lazy table
1170    /// initialization later by providing a one-to-one map of initial
1171    /// table values, without having to parse all segments.
1172    pub fn try_func_table_init(&mut self) {
1173        // This should be large enough to support very large Wasm
1174        // modules with huge funcref tables, but small enough to avoid
1175        // OOMs or DoS on truly sparse tables.
1176        const MAX_FUNC_TABLE_SIZE: u64 = 1024 * 1024;
1177
1178        // First convert any element-initialized tables to images of just that
1179        // single function if the minimum size of the table allows doing so.
1180        for ((_, init), (_, table)) in self
1181            .module
1182            .table_initialization
1183            .initial_values
1184            .iter_mut()
1185            .zip(
1186                self.module
1187                    .tables
1188                    .iter()
1189                    .skip(self.module.num_imported_tables),
1190            )
1191        {
1192            let table_size = table.limits.min;
1193            if table_size > MAX_FUNC_TABLE_SIZE {
1194                continue;
1195            }
1196            if let TableInitialValue::Expr(expr) = init {
1197                if let [ConstOp::RefFunc(f)] = expr.ops() {
1198                    *init = TableInitialValue::Null {
1199                        precomputed: vec![*f; table_size as usize],
1200                    };
1201                }
1202            }
1203        }
1204
1205        let mut segments = mem::take(&mut self.module.table_initialization.segments)
1206            .into_iter()
1207            .peekable();
1208
1209        // The goal of this loop is to interpret a table segment and apply it
1210        // "statically" to a local table. This will iterate over segments and
1211        // apply them one-by-one to each table.
1212        //
1213        // If any segment can't be applied, however, then this loop exits and
1214        // all remaining segments are placed back into the segment list. This is
1215        // because segments are supposed to be initialized one-at-a-time which
1216        // means that intermediate state is visible with respect to traps. If
1217        // anything isn't statically known to not trap it's pessimistically
1218        // assumed to trap meaning all further segment initializers must be
1219        // applied manually at instantiation time.
1220        while let Some(segment) = segments.peek() {
1221            let defined_index = match self.module.defined_table_index(segment.table_index) {
1222                Some(index) => index,
1223                // Skip imported tables: we can't provide a preconstructed
1224                // table for them, because their values depend on the
1225                // imported table overlaid with whatever segments we have.
1226                None => break,
1227            };
1228
1229            // If the base of this segment is dynamic, then we can't
1230            // include it in the statically-built array of initial
1231            // contents.
1232            let offset = match segment.offset.ops() {
1233                &[ConstOp::I32Const(offset)] => u64::from(offset.cast_unsigned()),
1234                &[ConstOp::I64Const(offset)] => offset.cast_unsigned(),
1235                _ => break,
1236            };
1237
1238            // Get the end of this segment. If out-of-bounds, or too
1239            // large for our dense table representation, then skip the
1240            // segment.
1241            let top = match offset.checked_add(segment.elements.len()) {
1242                Some(top) => top,
1243                None => break,
1244            };
1245            let table_size = self.module.tables[segment.table_index].limits.min;
1246            if top > table_size || top > MAX_FUNC_TABLE_SIZE {
1247                break;
1248            }
1249
1250            match self.module.tables[segment.table_index]
1251                .ref_type
1252                .heap_type
1253                .top()
1254            {
1255                WasmHeapTopType::Func => {}
1256                // If this is not a funcref table, then we can't support a
1257                // pre-computed table of function indices. Technically this
1258                // initializer won't trap so we could continue processing
1259                // segments, but that's left as a future optimization if
1260                // necessary.
1261                WasmHeapTopType::Any
1262                | WasmHeapTopType::Extern
1263                | WasmHeapTopType::Cont
1264                | WasmHeapTopType::Exn => break,
1265            }
1266
1267            // Function indices can be optimized here, but fully general
1268            // expressions are deferred to get evaluated at runtime.
1269            let function_elements = match &segment.elements {
1270                TableSegmentElements::Functions(indices) => indices,
1271                TableSegmentElements::Expressions(_) => break,
1272            };
1273
1274            let precomputed =
1275                match &mut self.module.table_initialization.initial_values[defined_index] {
1276                    TableInitialValue::Null { precomputed } => precomputed,
1277
1278                    // If this table is still listed as an initial value here
1279                    // then that means the initial size of the table doesn't
1280                    // support a precomputed function list, so skip this.
1281                    // Technically this won't trap so it's possible to process
1282                    // further initializers, but that's left as a future
1283                    // optimization.
1284                    TableInitialValue::Expr(_) => break,
1285                };
1286
1287            // At this point we're committing to pre-initializing the table
1288            // with the `segment` that's being iterated over. This segment is
1289            // applied to the `precomputed` list for the table by ensuring
1290            // it's large enough to hold the segment and then copying the
1291            // segment into the precomputed list.
1292            if precomputed.len() < top as usize {
1293                precomputed.resize(top as usize, FuncIndex::reserved_value());
1294            }
1295            let dst = &mut precomputed[offset as usize..top as usize];
1296            dst.copy_from_slice(&function_elements);
1297
1298            // advance the iterator to see the next segment
1299            let _ = segments.next();
1300        }
1301        self.module.table_initialization.segments = segments.collect();
1302    }
1303}