Skip to main content

wasmtime_environ/compile/
module_environ.rs

1use crate::error::{OutOfMemory, Result, bail};
2use crate::module::{
3    FuncRefIndex, Initializer, MemoryInitialization, MemoryInitializer, Module, TableSegment,
4    TableSegmentElements,
5};
6use crate::prelude::*;
7use crate::{
8    ConstExpr, ConstOp, DataIndex, DefinedFuncIndex, ElemIndex, EngineOrModuleTypeIndex,
9    EntityIndex, EntityType, FuncIndex, FuncKey, GlobalIndex, IndexType, InitMemory, MemoryIndex,
10    ModuleInternedTypeIndex, ModuleTypesBuilder, PanicOnOom as _, PrimaryMap, SizeOverflow,
11    StaticMemoryInitializer, StaticModuleIndex, TableIndex, TableInitialValue, Tag, TagIndex,
12    Tunables, TypeConvert, TypeIndex, WasmError, WasmHeapTopType, WasmHeapType, WasmResult,
13    WasmValType, WasmparserTypeConverter, collections,
14};
15use cranelift_entity::SecondaryMap;
16use cranelift_entity::packed_option::ReservedValue;
17use std::borrow::Cow;
18use std::collections::HashMap;
19use std::mem;
20use std::path::PathBuf;
21use std::sync::Arc;
22use wasmparser::{
23    CustomSectionReader, DataKind, ElementItems, ElementKind, Encoding, ExternalKind,
24    FuncToValidate, FunctionBody, KnownCustom, NameSectionReader, Naming, Parser, Payload, TypeRef,
25    Validator, ValidatorResources, types::Types,
26};
27
28/// Object containing the standalone environment information.
29pub struct ModuleEnvironment<'a, 'data> {
30    /// The current module being translated
31    result: ModuleTranslation<'data>,
32
33    /// Intern'd types for this entire translation, shared by all modules.
34    types: &'a mut ModuleTypesBuilder,
35
36    // Various bits and pieces of configuration
37    validator: &'a mut Validator,
38    tunables: &'a Tunables,
39}
40
41/// The result of translating via `ModuleEnvironment`.
42///
43/// Function bodies are not yet translated, and data initializers have not yet
44/// been copied out of the original buffer.
45pub struct ModuleTranslation<'data> {
46    /// Module information.
47    pub module: Module,
48
49    /// The input wasm binary.
50    ///
51    /// This can be useful, for example, when modules are parsed from a
52    /// component and the embedder wants access to the raw wasm modules
53    /// themselves.
54    pub wasm: &'data [u8],
55
56    /// References to the function bodies.
57    pub function_body_inputs: PrimaryMap<DefinedFuncIndex, FunctionBodyData<'data>>,
58
59    /// For each imported function, the single statically-known function that
60    /// always satisfies that import, if any.
61    ///
62    /// This is used to turn what would otherwise be indirect calls through the
63    /// imports table into direct calls, when possible.
64    ///
65    /// When filled in, this only ever contains
66    /// `FuncKey::DefinedWasmFunction(..)`s and `FuncKey::Intrinsic(..)`s.
67    pub known_imported_functions: SecondaryMap<FuncIndex, Option<FuncKey>>,
68
69    /// A list of type signatures which are considered exported from this
70    /// module, or those that can possibly be called. This list is sorted, and
71    /// trampolines for each of these signatures are required.
72    pub exported_signatures: Vec<ModuleInternedTypeIndex>,
73
74    /// DWARF debug information, if enabled, parsed from the module.
75    pub debuginfo: DebugInfoData<'data>,
76
77    /// Set if debuginfo was found but it was not parsed due to `Tunables`
78    /// configuration.
79    pub has_unparsed_debuginfo: bool,
80
81    /// List of data segments found in this module which should be concatenated
82    /// together for the final compiled artifact.
83    ///
84    /// These data segments, when concatenated, are indexed by the
85    /// `MemoryInitializer` type.
86    pub data: Vec<Cow<'data, [u8]>>,
87
88    /// The desired alignment of `data` in the final data section of the object
89    /// file that we'll emit.
90    ///
91    /// Note that this is 1 by default but `MemoryInitialization::Static` might
92    /// switch this to a higher alignment to facilitate mmap-ing data from
93    /// an object file into a linear memory.
94    pub data_align: Option<u64>,
95
96    /// Total size of all data pushed onto `data` so far.
97    total_data: u32,
98
99    /// List of passive element segments found in this module which will get
100    /// concatenated for the final artifact.
101    pub passive_data: Vec<&'data [u8]>,
102
103    /// Total size of all passive data pushed into `passive_data` so far.
104    total_passive_data: u32,
105
106    /// When we're parsing the code section this will be incremented so we know
107    /// which function is currently being defined.
108    code_index: u32,
109
110    /// The type information of the current module made available at the end of the
111    /// validation process.
112    types: Option<Types>,
113}
114
115impl<'data> ModuleTranslation<'data> {
116    /// Create a new translation for the module with the given index.
117    pub fn new(module_index: StaticModuleIndex) -> Self {
118        Self {
119            module: Module::new(module_index),
120            wasm: &[],
121            function_body_inputs: PrimaryMap::default(),
122            known_imported_functions: SecondaryMap::default(),
123            exported_signatures: Vec::default(),
124            debuginfo: DebugInfoData::default(),
125            has_unparsed_debuginfo: false,
126            data: Vec::default(),
127            data_align: None,
128            total_data: 0,
129            passive_data: Vec::default(),
130            total_passive_data: 0,
131            code_index: 0,
132            types: None,
133        }
134    }
135
136    /// Returns a reference to the type information of the current module.
137    pub fn get_types(&self) -> &Types {
138        self.types
139            .as_ref()
140            .expect("module type information to be available")
141    }
142
143    /// Get this translation's module's index.
144    pub fn module_index(&self) -> StaticModuleIndex {
145        self.module.module_index
146    }
147}
148
149/// Contains function data: byte code and its offset in the module.
150pub struct FunctionBodyData<'a> {
151    /// The body of the function, containing code and locals.
152    pub body: FunctionBody<'a>,
153    /// Validator for the function body
154    pub validator: FuncToValidate<ValidatorResources>,
155}
156
157#[derive(Debug, Default)]
158#[expect(missing_docs, reason = "self-describing fields")]
159pub struct DebugInfoData<'a> {
160    pub dwarf: Dwarf<'a>,
161    pub name_section: NameSection<'a>,
162    pub wasm_file: WasmFileInfo,
163    pub debug_loc: gimli::DebugLoc<Reader<'a>>,
164    pub debug_loclists: gimli::DebugLocLists<Reader<'a>>,
165    pub debug_ranges: gimli::DebugRanges<Reader<'a>>,
166    pub debug_rnglists: gimli::DebugRngLists<Reader<'a>>,
167    pub debug_cu_index: gimli::DebugCuIndex<Reader<'a>>,
168    pub debug_tu_index: gimli::DebugTuIndex<Reader<'a>>,
169}
170
171#[expect(missing_docs, reason = "self-describing")]
172pub type Dwarf<'input> = gimli::Dwarf<Reader<'input>>;
173
174type Reader<'input> = gimli::EndianSlice<'input, gimli::LittleEndian>;
175
176#[derive(Debug, Default)]
177#[expect(missing_docs, reason = "self-describing fields")]
178pub struct NameSection<'a> {
179    pub module_name: Option<&'a str>,
180    pub func_names: HashMap<FuncIndex, &'a str>,
181    pub locals_names: HashMap<FuncIndex, HashMap<u32, &'a str>>,
182}
183
184#[derive(Debug, Default)]
185#[expect(missing_docs, reason = "self-describing fields")]
186pub struct WasmFileInfo {
187    pub path: Option<PathBuf>,
188    pub code_section_offset: u64,
189    pub imported_func_count: u32,
190    pub funcs: Vec<FunctionMetadata>,
191}
192
193#[derive(Debug)]
194#[expect(missing_docs, reason = "self-describing fields")]
195pub struct FunctionMetadata {
196    pub params: Box<[WasmValType]>,
197    pub locals: Box<[(u32, WasmValType)]>,
198}
199
200impl<'a, 'data> ModuleEnvironment<'a, 'data> {
201    /// Allocates the environment data structures.
202    pub fn new(
203        tunables: &'a Tunables,
204        validator: &'a mut Validator,
205        types: &'a mut ModuleTypesBuilder,
206        module_index: StaticModuleIndex,
207    ) -> Self {
208        Self {
209            result: ModuleTranslation::new(module_index),
210            types,
211            tunables,
212            validator,
213        }
214    }
215
216    /// Translate a wasm module using this environment.
217    ///
218    /// This function will translate the `data` provided with `parser`,
219    /// validating everything along the way with this environment's validator.
220    ///
221    /// The result of translation, [`ModuleTranslation`], contains everything
222    /// necessary to compile functions afterwards as well as learn type
223    /// information about the module at runtime.
224    pub fn translate(
225        mut self,
226        parser: Parser,
227        data: &'data [u8],
228    ) -> Result<ModuleTranslation<'data>> {
229        self.result.wasm = data;
230
231        for payload in parser.parse_all(data) {
232            self.translate_payload(payload?)?;
233        }
234
235        Ok(self.result)
236    }
237
238    fn translate_payload(&mut self, payload: Payload<'data>) -> Result<()> {
239        match payload {
240            Payload::Version {
241                num,
242                encoding,
243                range,
244            } => {
245                self.validator.version(num, encoding, &range)?;
246                match encoding {
247                    Encoding::Module => {}
248                    Encoding::Component => {
249                        bail!("expected a WebAssembly module but was given a WebAssembly component")
250                    }
251                }
252            }
253
254            Payload::End(offset) => {
255                self.result.types = Some(self.validator.end(offset)?);
256
257                // With the `escaped_funcs` set of functions finished
258                // we can calculate the set of signatures that are exported as
259                // the set of exported functions' signatures.
260                self.result.exported_signatures = self
261                    .result
262                    .module
263                    .functions
264                    .iter()
265                    .filter_map(|(_, func)| {
266                        if func.is_escaping() {
267                            Some(func.signature.unwrap_module_type_index())
268                        } else {
269                            None
270                        }
271                    })
272                    .collect();
273                self.result.exported_signatures.sort_unstable();
274                self.result.exported_signatures.dedup();
275            }
276
277            Payload::TypeSection(types) => {
278                self.validator.type_section(&types)?;
279
280                let count = self.validator.types(0).unwrap().core_type_count_in_module();
281                log::trace!("interning {count} Wasm types");
282
283                let capacity = usize::try_from(count).unwrap();
284                self.result.module.types.reserve(capacity)?;
285                self.types.reserve_wasm_signatures(capacity);
286
287                // Iterate over each *rec group* -- not type -- defined in the
288                // types section. Rec groups are the unit of canonicalization
289                // and therefore the unit at which we need to process at a
290                // time. `wasmparser` has already done the hard work of
291                // de-duplicating and canonicalizing the rec groups within the
292                // module for us, we just need to translate them into our data
293                // structures. Note that, if the Wasm defines duplicate rec
294                // groups, we need copy the duplicates over (shallowly) as well,
295                // so that our types index space doesn't have holes.
296                let mut type_index = 0;
297                while type_index < count {
298                    let validator_types = self.validator.types(0).unwrap();
299
300                    // Get the rec group for the current type index, which is
301                    // always the first type defined in a rec group.
302                    log::trace!("looking up wasmparser type for index {type_index}");
303                    let core_type_id = validator_types.core_type_at_in_module(type_index);
304                    log::trace!(
305                        "  --> {core_type_id:?} = {:?}",
306                        validator_types[core_type_id],
307                    );
308                    let rec_group_id = validator_types.rec_group_id_of(core_type_id);
309                    debug_assert_eq!(
310                        validator_types
311                            .rec_group_elements(rec_group_id)
312                            .position(|id| id == core_type_id),
313                        Some(0)
314                    );
315
316                    // Intern the rec group and then fill in this module's types
317                    // index space.
318                    let interned = self.types.intern_rec_group(validator_types, rec_group_id)?;
319                    let elems = self.types.rec_group_elements(interned);
320                    let len = elems.len();
321                    self.result.module.types.reserve(len)?;
322                    for ty in elems {
323                        self.result.module.types.push(ty.into())?;
324                    }
325
326                    // Advance `type_index` to the start of the next rec group.
327                    type_index += u32::try_from(len).unwrap();
328                }
329            }
330
331            Payload::ImportSection(imports) => {
332                self.validator.import_section(&imports)?;
333
334                let cnt = usize::try_from(imports.count()).unwrap();
335                self.result.module.initializers.reserve(cnt)?;
336
337                for entry in imports.into_imports() {
338                    let import = entry?;
339                    let ty = match import.ty {
340                        TypeRef::Func(index) => {
341                            let index = TypeIndex::from_u32(index);
342                            let interned_index = self.result.module.types[index];
343                            self.result.module.num_imported_funcs += 1;
344                            self.result.debuginfo.wasm_file.imported_func_count += 1;
345                            EntityType::Function(interned_index)
346                        }
347                        TypeRef::Memory(ty) => {
348                            self.result.module.num_imported_memories += 1;
349                            EntityType::Memory(ty.into())
350                        }
351                        TypeRef::Global(ty) => {
352                            self.result.module.num_imported_globals += 1;
353                            EntityType::Global(self.convert_global_type(&ty)?)
354                        }
355                        TypeRef::Table(ty) => {
356                            self.result.module.num_imported_tables += 1;
357                            EntityType::Table(self.convert_table_type(&ty)?)
358                        }
359                        TypeRef::Tag(ty) => {
360                            let index = TypeIndex::from_u32(ty.func_type_idx);
361                            let signature = self.result.module.types[index];
362                            let exception = self.types.define_exception_type_for_tag(
363                                signature.unwrap_module_type_index(),
364                            );
365                            let tag = Tag {
366                                signature,
367                                exception: EngineOrModuleTypeIndex::Module(exception),
368                            };
369                            self.result.module.num_imported_tags += 1;
370                            EntityType::Tag(tag)
371                        }
372                        TypeRef::FuncExact(_) => {
373                            bail!("custom-descriptors proposal not implemented yet");
374                        }
375                    };
376                    self.declare_import(import.module, import.name, ty)?;
377                }
378            }
379
380            Payload::FunctionSection(functions) => {
381                self.validator.function_section(&functions)?;
382
383                let cnt = usize::try_from(functions.count()).unwrap();
384                self.result.module.functions.reserve_exact(cnt)?;
385
386                for entry in functions {
387                    let sigindex = entry?;
388                    let ty = TypeIndex::from_u32(sigindex);
389                    let interned_index = self.result.module.types[ty];
390                    self.result.module.push_function(interned_index);
391                }
392            }
393
394            Payload::TableSection(tables) => {
395                self.validator.table_section(&tables)?;
396                let cnt = usize::try_from(tables.count()).unwrap();
397                self.result.module.tables.reserve_exact(cnt)?;
398
399                for entry in tables {
400                    let wasmparser::Table { ty, init } = entry?;
401                    let table = self.convert_table_type(&ty)?;
402                    self.result.module.needs_gc_heap |= table.ref_type.is_vmgcref_type();
403                    self.result.module.tables.push(table)?;
404                    let init = match init {
405                        wasmparser::TableInit::RefNull => TableInitialValue::Null {
406                            precomputed: collections::Vec::new(),
407                        },
408                        wasmparser::TableInit::Expr(expr) => {
409                            let (init, escaped) = ConstExpr::from_wasmparser(self, expr)?;
410                            for f in escaped {
411                                self.flag_func_escaped(f);
412                            }
413                            TableInitialValue::Expr(init)
414                        }
415                    };
416                    self.result
417                        .module
418                        .table_initialization
419                        .initial_values
420                        .push(init)?;
421                }
422            }
423
424            Payload::MemorySection(memories) => {
425                self.validator.memory_section(&memories)?;
426
427                let cnt = usize::try_from(memories.count()).unwrap();
428                self.result.module.memories.reserve_exact(cnt)?;
429
430                for entry in memories {
431                    let memory = entry?;
432                    self.result.module.memories.push(memory.into())?;
433                }
434            }
435
436            Payload::TagSection(tags) => {
437                self.validator.tag_section(&tags)?;
438
439                for entry in tags {
440                    let sigindex = entry?.func_type_idx;
441                    let ty = TypeIndex::from_u32(sigindex);
442                    let interned_index = self.result.module.types[ty];
443                    let exception = self
444                        .types
445                        .define_exception_type_for_tag(interned_index.unwrap_module_type_index());
446                    self.result.module.push_tag(interned_index, exception);
447                }
448            }
449
450            Payload::GlobalSection(globals) => {
451                self.validator.global_section(&globals)?;
452
453                let cnt = usize::try_from(globals.count()).unwrap();
454                self.result.module.globals.reserve_exact(cnt)?;
455
456                for entry in globals {
457                    let wasmparser::Global { ty, init_expr } = entry?;
458                    let (initializer, escaped) = ConstExpr::from_wasmparser(self, init_expr)?;
459                    for f in escaped {
460                        self.flag_func_escaped(f);
461                    }
462                    let ty = self.convert_global_type(&ty)?;
463                    self.result.module.globals.push(ty)?;
464                    self.result.module.global_initializers.push(initializer)?;
465                }
466            }
467
468            Payload::ExportSection(exports) => {
469                self.validator.export_section(&exports)?;
470
471                let cnt = usize::try_from(exports.count()).unwrap();
472                self.result.module.exports.reserve(cnt)?;
473
474                for entry in exports {
475                    let wasmparser::Export { name, kind, index } = entry?;
476                    let entity = match kind {
477                        ExternalKind::Func | ExternalKind::FuncExact => {
478                            let index = FuncIndex::from_u32(index);
479                            self.flag_func_escaped(index);
480                            EntityIndex::Function(index)
481                        }
482                        ExternalKind::Table => EntityIndex::Table(TableIndex::from_u32(index)),
483                        ExternalKind::Memory => EntityIndex::Memory(MemoryIndex::from_u32(index)),
484                        ExternalKind::Global => EntityIndex::Global(GlobalIndex::from_u32(index)),
485                        ExternalKind::Tag => EntityIndex::Tag(TagIndex::from_u32(index)),
486                    };
487                    let name = self.result.module.strings.insert(name)?;
488                    self.result.module.exports.insert(name, entity)?;
489                }
490            }
491
492            Payload::StartSection { func, range } => {
493                self.validator.start_section(func, &range)?;
494
495                let func_index = FuncIndex::from_u32(func);
496                self.flag_func_escaped(func_index);
497                debug_assert!(self.result.module.start_func.is_none());
498                self.result.module.start_func = Some(func_index);
499            }
500
501            Payload::ElementSection(elements) => {
502                self.validator.element_section(&elements)?;
503
504                for (index, entry) in elements.into_iter().enumerate() {
505                    let wasmparser::Element {
506                        kind,
507                        items,
508                        range: _,
509                    } = entry?;
510
511                    // Build up a list of `FuncIndex` corresponding to all the
512                    // entries listed in this segment. Note that it's not
513                    // possible to create anything other than a `ref.null
514                    // extern` for externref segments, so those just get
515                    // translated to the reserved value of `FuncIndex`.
516                    let elements = match items {
517                        ElementItems::Functions(funcs) => {
518                            let mut elems =
519                                Vec::with_capacity(usize::try_from(funcs.count()).unwrap());
520                            for func in funcs {
521                                let func = FuncIndex::from_u32(func?);
522                                self.flag_func_escaped(func);
523                                elems.push(func);
524                            }
525                            TableSegmentElements::Functions(elems.into())
526                        }
527                        ElementItems::Expressions(_ty, items) => {
528                            let mut exprs =
529                                Vec::with_capacity(usize::try_from(items.count()).unwrap());
530                            for expr in items {
531                                let (expr, escaped) = ConstExpr::from_wasmparser(self, expr?)?;
532                                exprs.push(expr);
533                                for func in escaped {
534                                    self.flag_func_escaped(func);
535                                }
536                            }
537                            TableSegmentElements::Expressions(exprs.into())
538                        }
539                    };
540
541                    match kind {
542                        ElementKind::Active {
543                            table_index,
544                            offset_expr,
545                        } => {
546                            let table_index = TableIndex::from_u32(table_index.unwrap_or(0));
547                            let (offset, escaped) = ConstExpr::from_wasmparser(self, offset_expr)?;
548                            debug_assert!(escaped.is_empty());
549
550                            self.result.module.table_initialization.segments.push(
551                                TableSegment {
552                                    table_index,
553                                    offset,
554                                    elements,
555                                },
556                            )?;
557                        }
558
559                        ElementKind::Passive => {
560                            let elem_index = ElemIndex::from_u32(index as u32);
561                            let index = self.result.module.passive_elements.len();
562                            self.result.module.passive_elements.push(elements)?;
563                            self.result
564                                .module
565                                .passive_elements_map
566                                .insert(elem_index, index);
567                        }
568
569                        ElementKind::Declared => {}
570                    }
571                }
572            }
573
574            Payload::CodeSectionStart { count, range, .. } => {
575                self.validator.code_section_start(&range)?;
576                let cnt = usize::try_from(count).unwrap();
577                self.result.function_body_inputs.reserve_exact(cnt);
578                self.result.debuginfo.wasm_file.code_section_offset = range.start as u64;
579            }
580
581            Payload::CodeSectionEntry(body) => {
582                let validator = self.validator.code_section_entry(&body)?;
583                let func_index =
584                    self.result.code_index + self.result.module.num_imported_funcs as u32;
585                let func_index = FuncIndex::from_u32(func_index);
586
587                if self.tunables.debug_native {
588                    let sig_index = self.result.module.functions[func_index]
589                        .signature
590                        .unwrap_module_type_index();
591                    let sig = self.types[sig_index].unwrap_func();
592                    let mut locals = Vec::new();
593                    for pair in body.get_locals_reader()? {
594                        let (cnt, ty) = pair?;
595                        let ty = self.convert_valtype(ty)?;
596                        locals.push((cnt, ty));
597                    }
598                    self.result
599                        .debuginfo
600                        .wasm_file
601                        .funcs
602                        .push(FunctionMetadata {
603                            locals: locals.into_boxed_slice(),
604                            params: sig.params().into(),
605                        });
606                }
607                if self.tunables.debug_guest {
608                    // All functions are potentially reachable and
609                    // callable by the guest debugger, so they must
610                    // all be flagged as escaping.
611                    self.flag_func_escaped(func_index);
612                }
613                self.result
614                    .function_body_inputs
615                    .push(FunctionBodyData { validator, body });
616                self.result.code_index += 1;
617            }
618
619            Payload::DataSection(data) => {
620                self.validator.data_section(&data)?;
621
622                let initializers = match &mut self.result.module.memory_initialization {
623                    MemoryInitialization::Segmented(i) => i,
624                    _ => unreachable!(),
625                };
626
627                let cnt = usize::try_from(data.count()).unwrap();
628                initializers.reserve_exact(cnt)?;
629                self.result.data.reserve_exact(cnt);
630
631                for (index, entry) in data.into_iter().enumerate() {
632                    let wasmparser::Data {
633                        kind,
634                        data,
635                        range: _,
636                    } = entry?;
637                    let mk_range = |total: &mut u32| -> Result<_, WasmError> {
638                        let range = u32::try_from(data.len())
639                            .ok()
640                            .and_then(|size| {
641                                let start = *total;
642                                let end = start.checked_add(size)?;
643                                Some(start..end)
644                            })
645                            .ok_or_else(|| {
646                                WasmError::Unsupported(format!(
647                                    "more than 4 gigabytes of data in wasm module",
648                                ))
649                            })?;
650                        *total += range.end - range.start;
651                        Ok(range)
652                    };
653                    match kind {
654                        DataKind::Active {
655                            memory_index,
656                            offset_expr,
657                        } => {
658                            let range = mk_range(&mut self.result.total_data)?;
659                            let memory_index = MemoryIndex::from_u32(memory_index);
660                            let (offset, escaped) = ConstExpr::from_wasmparser(self, offset_expr)?;
661                            debug_assert!(escaped.is_empty());
662
663                            let initializers = match &mut self.result.module.memory_initialization {
664                                MemoryInitialization::Segmented(i) => i,
665                                _ => unreachable!(),
666                            };
667                            initializers.push(MemoryInitializer {
668                                memory_index,
669                                offset,
670                                data: range,
671                            })?;
672                            self.result.data.push(data.into());
673                        }
674                        DataKind::Passive => {
675                            let data_index = DataIndex::from_u32(index as u32);
676                            let range = mk_range(&mut self.result.total_passive_data)?;
677                            self.result.passive_data.push(data);
678                            self.result
679                                .module
680                                .passive_data_map
681                                .insert(data_index, range);
682                        }
683                    }
684                }
685            }
686
687            Payload::DataCountSection { count, range } => {
688                self.validator.data_count_section(count, &range)?;
689
690                // Note: the count passed in here is the *total* segment count
691                // There is no way to reserve for just the passive segments as
692                // they are discovered when iterating the data section entries
693                // Given that the total segment count might be much larger than
694                // the passive count, do not reserve anything here.
695            }
696
697            Payload::CustomSection(s)
698                if s.name() == "webidl-bindings" || s.name() == "wasm-interface-types" =>
699            {
700                bail!(
701                    "\
702Support for interface types has temporarily been removed from `wasmtime`.
703
704For more information about this temporary change you can read on the issue online:
705
706    https://github.com/bytecodealliance/wasmtime/issues/1271
707
708and for re-adding support for interface types you can see this issue:
709
710    https://github.com/bytecodealliance/wasmtime/issues/677
711"
712                )
713            }
714
715            Payload::CustomSection(s) => {
716                self.register_custom_section(&s);
717            }
718
719            // It's expected that validation will probably reject other
720            // payloads such as `UnknownSection` or those related to the
721            // component model. If, however, something gets past validation then
722            // that's a bug in Wasmtime as we forgot to implement something.
723            other => {
724                self.validator.payload(&other)?;
725                panic!("unimplemented section in wasm file {other:?}");
726            }
727        }
728        Ok(())
729    }
730
731    fn register_custom_section(&mut self, section: &CustomSectionReader<'data>) {
732        match section.as_known() {
733            KnownCustom::Name(name) => {
734                let result = self.name_section(name);
735                if let Err(e) = result {
736                    log::warn!("failed to parse name section {e:?}");
737                }
738            }
739            _ => {
740                let name = section.name().trim_end_matches(".dwo");
741                if name.starts_with(".debug_") {
742                    self.dwarf_section(name, section);
743                }
744            }
745        }
746    }
747
748    fn dwarf_section(&mut self, name: &str, section: &CustomSectionReader<'data>) {
749        if !self.tunables.debug_native && !self.tunables.parse_wasm_debuginfo {
750            self.result.has_unparsed_debuginfo = true;
751            return;
752        }
753        let info = &mut self.result.debuginfo;
754        let dwarf = &mut info.dwarf;
755        let endian = gimli::LittleEndian;
756        let data = section.data();
757        let slice = gimli::EndianSlice::new(data, endian);
758
759        match name {
760            // `gimli::Dwarf` fields.
761            ".debug_abbrev" => dwarf.debug_abbrev = gimli::DebugAbbrev::new(data, endian),
762            ".debug_addr" => dwarf.debug_addr = gimli::DebugAddr::from(slice),
763            ".debug_info" => {
764                dwarf.debug_info = gimli::DebugInfo::new(data, endian);
765            }
766            ".debug_line" => dwarf.debug_line = gimli::DebugLine::new(data, endian),
767            ".debug_line_str" => dwarf.debug_line_str = gimli::DebugLineStr::from(slice),
768            ".debug_str" => dwarf.debug_str = gimli::DebugStr::new(data, endian),
769            ".debug_str_offsets" => dwarf.debug_str_offsets = gimli::DebugStrOffsets::from(slice),
770            ".debug_str_sup" => {
771                let mut dwarf_sup: Dwarf<'data> = Default::default();
772                dwarf_sup.debug_str = gimli::DebugStr::from(slice);
773                dwarf.sup = Some(Arc::new(dwarf_sup));
774            }
775            ".debug_types" => dwarf.debug_types = gimli::DebugTypes::from(slice),
776
777            // Additional fields.
778            ".debug_loc" => info.debug_loc = gimli::DebugLoc::from(slice),
779            ".debug_loclists" => info.debug_loclists = gimli::DebugLocLists::from(slice),
780            ".debug_ranges" => info.debug_ranges = gimli::DebugRanges::new(data, endian),
781            ".debug_rnglists" => info.debug_rnglists = gimli::DebugRngLists::new(data, endian),
782
783            // DWARF package fields
784            ".debug_cu_index" => info.debug_cu_index = gimli::DebugCuIndex::new(data, endian),
785            ".debug_tu_index" => info.debug_tu_index = gimli::DebugTuIndex::new(data, endian),
786
787            // We don't use these at the moment.
788            ".debug_aranges" | ".debug_pubnames" | ".debug_pubtypes" => return,
789            other => {
790                log::warn!("unknown debug section `{other}`");
791                return;
792            }
793        }
794
795        dwarf.ranges = gimli::RangeLists::new(info.debug_ranges, info.debug_rnglists);
796        dwarf.locations = gimli::LocationLists::new(info.debug_loc, info.debug_loclists);
797    }
798
799    /// Declares a new import with the `module` and `field` names, importing the
800    /// `ty` specified.
801    ///
802    /// Note that this method is somewhat tricky due to the implementation of
803    /// the module linking proposal. In the module linking proposal two-level
804    /// imports are recast as single-level imports of instances. That recasting
805    /// happens here by recording an import of an instance for the first time
806    /// we see a two-level import.
807    ///
808    /// When the module linking proposal is disabled, however, disregard this
809    /// logic and instead work directly with two-level imports since no
810    /// instances are defined.
811    fn declare_import(
812        &mut self,
813        module: &'data str,
814        field: &'data str,
815        ty: EntityType,
816    ) -> Result<(), OutOfMemory> {
817        let index = self.push_type(ty);
818        self.result.module.initializers.push(Initializer::Import {
819            name: self.result.module.strings.insert(module)?,
820            field: self.result.module.strings.insert(field)?,
821            index,
822        })?;
823        Ok(())
824    }
825
826    fn push_type(&mut self, ty: EntityType) -> EntityIndex {
827        match ty {
828            EntityType::Function(ty) => EntityIndex::Function({
829                let func_index = self
830                    .result
831                    .module
832                    .push_function(ty.unwrap_module_type_index());
833                // Imported functions can escape; in fact, they've already done
834                // so to get here.
835                self.flag_func_escaped(func_index);
836                func_index
837            }),
838            EntityType::Table(ty) => {
839                EntityIndex::Table(self.result.module.tables.push(ty).panic_on_oom())
840            }
841            EntityType::Memory(ty) => {
842                EntityIndex::Memory(self.result.module.memories.push(ty).panic_on_oom())
843            }
844            EntityType::Global(ty) => {
845                EntityIndex::Global(self.result.module.globals.push(ty).panic_on_oom())
846            }
847            EntityType::Tag(ty) => {
848                EntityIndex::Tag(self.result.module.tags.push(ty).panic_on_oom())
849            }
850        }
851    }
852
853    fn flag_func_escaped(&mut self, func: FuncIndex) {
854        let ty = &mut self.result.module.functions[func];
855        // If this was already assigned a funcref index no need to re-assign it.
856        if ty.is_escaping() {
857            return;
858        }
859        let index = self.result.module.num_escaped_funcs as u32;
860        ty.func_ref = FuncRefIndex::from_u32(index);
861        self.result.module.num_escaped_funcs += 1;
862    }
863
864    /// Parses the Name section of the wasm module.
865    fn name_section(&mut self, names: NameSectionReader<'data>) -> WasmResult<()> {
866        for subsection in names {
867            match subsection? {
868                wasmparser::Name::Function(names) => {
869                    for name in names {
870                        let Naming { index, name } = name?;
871                        // Skip this naming if it's naming a function that
872                        // doesn't actually exist.
873                        if (index as usize) >= self.result.module.functions.len() {
874                            continue;
875                        }
876
877                        // Store the name unconditionally, regardless of
878                        // whether we're parsing debuginfo, since function
879                        // names are almost always present in the
880                        // final compilation artifact.
881                        let index = FuncIndex::from_u32(index);
882                        self.result
883                            .debuginfo
884                            .name_section
885                            .func_names
886                            .insert(index, name);
887                    }
888                }
889                wasmparser::Name::Module { name, .. } => {
890                    self.result.module.name =
891                        Some(self.result.module.strings.insert(name).panic_on_oom());
892                    if self.tunables.debug_native {
893                        self.result.debuginfo.name_section.module_name = Some(name);
894                    }
895                }
896                wasmparser::Name::Local(reader) => {
897                    if !self.tunables.debug_native {
898                        continue;
899                    }
900                    for f in reader {
901                        let f = f?;
902                        // Skip this naming if it's naming a function that
903                        // doesn't actually exist.
904                        if (f.index as usize) >= self.result.module.functions.len() {
905                            continue;
906                        }
907                        for name in f.names {
908                            let Naming { index, name } = name?;
909
910                            self.result
911                                .debuginfo
912                                .name_section
913                                .locals_names
914                                .entry(FuncIndex::from_u32(f.index))
915                                .or_insert(HashMap::new())
916                                .insert(index, name);
917                        }
918                    }
919                }
920                wasmparser::Name::Label(_)
921                | wasmparser::Name::Type(_)
922                | wasmparser::Name::Table(_)
923                | wasmparser::Name::Global(_)
924                | wasmparser::Name::Memory(_)
925                | wasmparser::Name::Element(_)
926                | wasmparser::Name::Data(_)
927                | wasmparser::Name::Tag(_)
928                | wasmparser::Name::Field(_)
929                | wasmparser::Name::Unknown { .. } => {}
930            }
931        }
932        Ok(())
933    }
934}
935
936impl TypeConvert for ModuleEnvironment<'_, '_> {
937    fn lookup_heap_type(&self, index: wasmparser::UnpackedIndex) -> WasmHeapType {
938        WasmparserTypeConverter::new(&self.types, |idx| {
939            self.result.module.types[idx].unwrap_module_type_index()
940        })
941        .lookup_heap_type(index)
942    }
943
944    fn lookup_type_index(&self, index: wasmparser::UnpackedIndex) -> EngineOrModuleTypeIndex {
945        WasmparserTypeConverter::new(&self.types, |idx| {
946            self.result.module.types[idx].unwrap_module_type_index()
947        })
948        .lookup_type_index(index)
949    }
950}
951
952impl ModuleTranslation<'_> {
953    /// Attempts to convert segmented memory initialization into static
954    /// initialization for the module that this translation represents.
955    ///
956    /// If this module's memory initialization is not compatible with paged
957    /// initialization then this won't change anything. Otherwise if it is
958    /// compatible then the `memory_initialization` field will be updated.
959    ///
960    /// Takes a `page_size` argument in order to ensure that all
961    /// initialization is page-aligned for mmap-ability, and
962    /// `max_image_size_always_allowed` to control how we decide
963    /// whether to use static init.
964    ///
965    /// We will try to avoid generating very sparse images, which are
966    /// possible if e.g. a module has an initializer at offset 0 and a
967    /// very high offset (say, 1 GiB). To avoid this, we use a dual
968    /// condition: we always allow images less than
969    /// `max_image_size_always_allowed`, and the embedder of Wasmtime
970    /// can set this if desired to ensure that static init should
971    /// always be done if the size of the module or its heaps is
972    /// otherwise bounded by the system. We also allow images with
973    /// static init data bigger than that, but only if it is "dense",
974    /// defined as having at least half (50%) of its pages with some
975    /// data.
976    ///
977    /// We could do something slightly better by building a dense part
978    /// and keeping a sparse list of outlier/leftover segments (see
979    /// issue #3820). This would also allow mostly-static init of
980    /// modules that have some dynamically-placed data segments. But,
981    /// for now, this is sufficient to allow a system that "knows what
982    /// it's doing" to always get static init.
983    pub fn try_static_init(&mut self, page_size: u64, max_image_size_always_allowed: u64) {
984        // This method only attempts to transform a `Segmented` memory init
985        // into a `Static` one, no other state.
986        if !self.module.memory_initialization.is_segmented() {
987            return;
988        }
989
990        // First a dry run of memory initialization is performed. This
991        // collects information about the extent of memory initialized for each
992        // memory as well as the size of all data segments being copied in.
993        struct Memory {
994            data_size: u64,
995            min_addr: u64,
996            max_addr: u64,
997            // The `usize` here is a pointer into `self.data` which is the list
998            // of data segments corresponding to what was found in the original
999            // wasm module.
1000            segments: Vec<(usize, StaticMemoryInitializer)>,
1001        }
1002        let mut info = PrimaryMap::with_capacity(self.module.memories.len());
1003        for _ in 0..self.module.memories.len() {
1004            info.push(Memory {
1005                data_size: 0,
1006                min_addr: u64::MAX,
1007                max_addr: 0,
1008                segments: Vec::new(),
1009            });
1010        }
1011
1012        struct InitMemoryAtCompileTime<'a> {
1013            module: &'a Module,
1014            info: &'a mut PrimaryMap<MemoryIndex, Memory>,
1015            idx: usize,
1016        }
1017        impl InitMemory for InitMemoryAtCompileTime<'_> {
1018            fn memory_size_in_bytes(
1019                &mut self,
1020                memory_index: MemoryIndex,
1021            ) -> Result<u64, SizeOverflow> {
1022                self.module.memories[memory_index].minimum_byte_size()
1023            }
1024
1025            fn eval_offset(&mut self, memory_index: MemoryIndex, expr: &ConstExpr) -> Option<u64> {
1026                match (expr.ops(), self.module.memories[memory_index].idx_type) {
1027                    (&[ConstOp::I32Const(offset)], IndexType::I32) => {
1028                        Some(offset.cast_unsigned().into())
1029                    }
1030                    (&[ConstOp::I64Const(offset)], IndexType::I64) => Some(offset.cast_unsigned()),
1031                    _ => None,
1032                }
1033            }
1034
1035            fn write(&mut self, memory: MemoryIndex, init: &StaticMemoryInitializer) -> bool {
1036                // Currently `Static` only applies to locally-defined memories,
1037                // so if a data segment references an imported memory then
1038                // transitioning to a `Static` memory initializer is not
1039                // possible.
1040                if self.module.defined_memory_index(memory).is_none() {
1041                    return false;
1042                };
1043                let info = &mut self.info[memory];
1044                let data_len = u64::from(init.data.end - init.data.start);
1045                if data_len > 0 {
1046                    info.data_size += data_len;
1047                    info.min_addr = info.min_addr.min(init.offset);
1048                    info.max_addr = info.max_addr.max(init.offset + data_len);
1049                    info.segments.push((self.idx, init.clone()));
1050                }
1051                self.idx += 1;
1052                true
1053            }
1054        }
1055        let ok = self
1056            .module
1057            .memory_initialization
1058            .init_memory(&mut InitMemoryAtCompileTime {
1059                idx: 0,
1060                module: &self.module,
1061                info: &mut info,
1062            });
1063        if !ok {
1064            return;
1065        }
1066
1067        // Validate that the memory information collected is indeed valid for
1068        // static memory initialization.
1069        for (i, info) in info.iter().filter(|(_, info)| info.data_size > 0) {
1070            let image_size = info.max_addr - info.min_addr;
1071
1072            // Simplify things for now by bailing out entirely if any memory has
1073            // a page size smaller than the host's page size. This fixes a case
1074            // where currently initializers are created in host-page-size units
1075            // of length which means that a larger-than-the-entire-memory
1076            // initializer can be created. This can be handled technically but
1077            // would require some more changes to help fix the assert elsewhere
1078            // that this protects against.
1079            if self.module.memories[i].page_size() < page_size {
1080                return;
1081            }
1082
1083            // If the range of memory being initialized is less than twice the
1084            // total size of the data itself then it's assumed that static
1085            // initialization is ok. This means we'll at most double memory
1086            // consumption during the memory image creation process, which is
1087            // currently assumed to "probably be ok" but this will likely need
1088            // tweaks over time.
1089            if image_size < info.data_size.saturating_mul(2) {
1090                continue;
1091            }
1092
1093            // If the memory initialization image is larger than the size of all
1094            // data, then we still allow memory initialization if the image will
1095            // be of a relatively modest size, such as 1MB here.
1096            if image_size < max_image_size_always_allowed {
1097                continue;
1098            }
1099
1100            // At this point memory initialization is concluded to be too
1101            // expensive to do at compile time so it's entirely deferred to
1102            // happen at runtime.
1103            return;
1104        }
1105
1106        // Here's where we've now committed to changing to static memory. The
1107        // memory initialization image is built here from the page data and then
1108        // it's converted to a single initializer.
1109        let data = mem::replace(&mut self.data, Vec::new());
1110        let mut map = collections::PrimaryMap::with_capacity(info.len()).panic_on_oom();
1111        let mut module_data_size = 0u32;
1112        for (memory, info) in info.iter() {
1113            // Create the in-memory `image` which is the initialized contents of
1114            // this linear memory.
1115            let extent = if info.segments.len() > 0 {
1116                (info.max_addr - info.min_addr) as usize
1117            } else {
1118                0
1119            };
1120            let mut image = Vec::with_capacity(extent);
1121            for (idx, init) in info.segments.iter() {
1122                let data = &data[*idx];
1123                assert_eq!(data.len(), init.data.len());
1124                let offset = usize::try_from(init.offset - info.min_addr).unwrap();
1125                if image.len() < offset {
1126                    image.resize(offset, 0u8);
1127                    image.extend_from_slice(data);
1128                } else {
1129                    image.splice(
1130                        offset..(offset + data.len()).min(image.len()),
1131                        data.iter().copied(),
1132                    );
1133                }
1134            }
1135            assert_eq!(image.len(), extent);
1136            assert_eq!(image.capacity(), extent);
1137            let mut offset = if info.segments.len() > 0 {
1138                info.min_addr
1139            } else {
1140                0
1141            };
1142
1143            // Chop off trailing zeros from the image as memory is already
1144            // zero-initialized. Note that `i` is the position of a nonzero
1145            // entry here, so to not lose it we truncate to `i + 1`.
1146            if let Some(i) = image.iter().rposition(|i| *i != 0) {
1147                image.truncate(i + 1);
1148            }
1149
1150            // Also chop off leading zeros, if any.
1151            if let Some(i) = image.iter().position(|i| *i != 0) {
1152                offset += i as u64;
1153                image.drain(..i);
1154            }
1155            let mut len = u64::try_from(image.len()).unwrap();
1156
1157            // The goal is to enable mapping this image directly into memory, so
1158            // the offset into linear memory must be a multiple of the page
1159            // size. If that's not already the case then the image is padded at
1160            // the front and back with extra zeros as necessary
1161            if offset % page_size != 0 {
1162                let zero_padding = offset % page_size;
1163                self.data.push(vec![0; zero_padding as usize].into());
1164                offset -= zero_padding;
1165                len += zero_padding;
1166            }
1167            self.data.push(image.into());
1168            if len % page_size != 0 {
1169                let zero_padding = page_size - (len % page_size);
1170                self.data.push(vec![0; zero_padding as usize].into());
1171                len += zero_padding;
1172            }
1173
1174            // Offset/length should now always be page-aligned.
1175            assert!(offset % page_size == 0);
1176            assert!(len % page_size == 0);
1177
1178            // Create the `StaticMemoryInitializer` which describes this image,
1179            // only needed if the image is actually present and has a nonzero
1180            // length. The `offset` has been calculates above, originally
1181            // sourced from `info.min_addr`. The `data` field is the extent
1182            // within the final data segment we'll emit to an ELF image, which
1183            // is the concatenation of `self.data`, so here it's the size of
1184            // the section-so-far plus the current segment we're appending.
1185            let len = u32::try_from(len).unwrap();
1186            let init = if len > 0 {
1187                Some(StaticMemoryInitializer {
1188                    offset,
1189                    data: module_data_size..module_data_size + len,
1190                })
1191            } else {
1192                None
1193            };
1194            let idx = map.push(init).panic_on_oom();
1195            assert_eq!(idx, memory);
1196            module_data_size += len;
1197        }
1198        self.data_align = Some(page_size);
1199        self.module.memory_initialization = MemoryInitialization::Static { map };
1200    }
1201
1202    /// Attempts to convert the module's table initializers to
1203    /// FuncTable form where possible. This enables lazy table
1204    /// initialization later by providing a one-to-one map of initial
1205    /// table values, without having to parse all segments.
1206    pub fn try_func_table_init(&mut self) {
1207        // This should be large enough to support very large Wasm
1208        // modules with huge funcref tables, but small enough to avoid
1209        // OOMs or DoS on truly sparse tables.
1210        const MAX_FUNC_TABLE_SIZE: u64 = 1024 * 1024;
1211
1212        // First convert any element-initialized tables to images of just that
1213        // single function if the minimum size of the table allows doing so.
1214        for ((_, init), (_, table)) in self
1215            .module
1216            .table_initialization
1217            .initial_values
1218            .iter_mut()
1219            .zip(
1220                self.module
1221                    .tables
1222                    .iter()
1223                    .skip(self.module.num_imported_tables),
1224            )
1225        {
1226            let table_size = table.limits.min;
1227            if table_size > MAX_FUNC_TABLE_SIZE {
1228                continue;
1229            }
1230            if let TableInitialValue::Expr(expr) = init {
1231                if let [ConstOp::RefFunc(f)] = expr.ops() {
1232                    *init = TableInitialValue::Null {
1233                        precomputed: collections::vec![*f; table_size as usize].panic_on_oom(),
1234                    };
1235                }
1236            }
1237        }
1238
1239        let mut segments = mem::take(&mut self.module.table_initialization.segments)
1240            .into_iter()
1241            .peekable();
1242
1243        // The goal of this loop is to interpret a table segment and apply it
1244        // "statically" to a local table. This will iterate over segments and
1245        // apply them one-by-one to each table.
1246        //
1247        // If any segment can't be applied, however, then this loop exits and
1248        // all remaining segments are placed back into the segment list. This is
1249        // because segments are supposed to be initialized one-at-a-time which
1250        // means that intermediate state is visible with respect to traps. If
1251        // anything isn't statically known to not trap it's pessimistically
1252        // assumed to trap meaning all further segment initializers must be
1253        // applied manually at instantiation time.
1254        while let Some(segment) = segments.peek() {
1255            let defined_index = match self.module.defined_table_index(segment.table_index) {
1256                Some(index) => index,
1257                // Skip imported tables: we can't provide a preconstructed
1258                // table for them, because their values depend on the
1259                // imported table overlaid with whatever segments we have.
1260                None => break,
1261            };
1262
1263            // If the base of this segment is dynamic, then we can't
1264            // include it in the statically-built array of initial
1265            // contents.
1266            let offset = match segment.offset.ops() {
1267                &[ConstOp::I32Const(offset)] => u64::from(offset.cast_unsigned()),
1268                &[ConstOp::I64Const(offset)] => offset.cast_unsigned(),
1269                _ => break,
1270            };
1271
1272            // Get the end of this segment. If out-of-bounds, or too
1273            // large for our dense table representation, then skip the
1274            // segment.
1275            let top = match offset.checked_add(segment.elements.len()) {
1276                Some(top) => top,
1277                None => break,
1278            };
1279            let table_size = self.module.tables[segment.table_index].limits.min;
1280            if top > table_size || top > MAX_FUNC_TABLE_SIZE {
1281                break;
1282            }
1283
1284            match self.module.tables[segment.table_index]
1285                .ref_type
1286                .heap_type
1287                .top()
1288            {
1289                WasmHeapTopType::Func => {}
1290                // If this is not a funcref table, then we can't support a
1291                // pre-computed table of function indices. Technically this
1292                // initializer won't trap so we could continue processing
1293                // segments, but that's left as a future optimization if
1294                // necessary.
1295                WasmHeapTopType::Any
1296                | WasmHeapTopType::Extern
1297                | WasmHeapTopType::Cont
1298                | WasmHeapTopType::Exn => break,
1299            }
1300
1301            // Function indices can be optimized here, but fully general
1302            // expressions are deferred to get evaluated at runtime.
1303            let function_elements = match &segment.elements {
1304                TableSegmentElements::Functions(indices) => indices,
1305                TableSegmentElements::Expressions(_) => break,
1306            };
1307
1308            let precomputed =
1309                match &mut self.module.table_initialization.initial_values[defined_index] {
1310                    TableInitialValue::Null { precomputed } => precomputed,
1311
1312                    // If this table is still listed as an initial value here
1313                    // then that means the initial size of the table doesn't
1314                    // support a precomputed function list, so skip this.
1315                    // Technically this won't trap so it's possible to process
1316                    // further initializers, but that's left as a future
1317                    // optimization.
1318                    TableInitialValue::Expr(_) => break,
1319                };
1320
1321            // At this point we're committing to pre-initializing the table
1322            // with the `segment` that's being iterated over. This segment is
1323            // applied to the `precomputed` list for the table by ensuring
1324            // it's large enough to hold the segment and then copying the
1325            // segment into the precomputed list.
1326            if precomputed.len() < top as usize {
1327                precomputed
1328                    .resize(top as usize, FuncIndex::reserved_value())
1329                    .panic_on_oom();
1330            }
1331            let dst = &mut precomputed[offset as usize..top as usize];
1332            dst.copy_from_slice(&function_elements);
1333
1334            // advance the iterator to see the next segment
1335            let _ = segments.next();
1336        }
1337        self.module.table_initialization.segments = segments.try_collect().panic_on_oom();
1338    }
1339}