wasmtime_environ/compile/
module_environ.rs

1use crate::module::{
2    FuncRefIndex, Initializer, MemoryInitialization, MemoryInitializer, Module, TableSegment,
3    TableSegmentElements,
4};
5use crate::{
6    ConstExpr, ConstOp, DataIndex, DefinedFuncIndex, ElemIndex, EngineOrModuleTypeIndex,
7    EntityIndex, EntityType, FuncIndex, GlobalIndex, IndexType, InitMemory, MemoryIndex,
8    ModuleInternedTypeIndex, ModuleTypesBuilder, PrimaryMap, SizeOverflow, StaticMemoryInitializer,
9    TableIndex, TableInitialValue, Tag, TagIndex, Tunables, TypeConvert, TypeIndex, Unsigned,
10    WasmError, WasmHeapTopType, WasmHeapType, WasmResult, WasmValType, WasmparserTypeConverter,
11};
12use crate::{StaticModuleIndex, prelude::*};
13use anyhow::{Result, bail};
14use cranelift_entity::SecondaryMap;
15use cranelift_entity::packed_option::ReservedValue;
16use std::borrow::Cow;
17use std::collections::HashMap;
18use std::mem;
19use std::path::PathBuf;
20use std::sync::Arc;
21use wasmparser::{
22    CustomSectionReader, DataKind, ElementItems, ElementKind, Encoding, ExternalKind,
23    FuncToValidate, FunctionBody, KnownCustom, NameSectionReader, Naming, Parser, Payload, TypeRef,
24    Validator, ValidatorResources, types::Types,
25};
26
27/// Object containing the standalone environment information.
28pub struct ModuleEnvironment<'a, 'data> {
29    /// The current module being translated
30    result: ModuleTranslation<'data>,
31
32    /// Intern'd types for this entire translation, shared by all modules.
33    types: &'a mut ModuleTypesBuilder,
34
35    // Various bits and pieces of configuration
36    validator: &'a mut Validator,
37    tunables: &'a Tunables,
38}
39
40/// The result of translating via `ModuleEnvironment`. Function bodies are not
41/// yet translated, and data initializers have not yet been copied out of the
42/// original buffer.
43#[derive(Default)]
44pub struct ModuleTranslation<'data> {
45    /// Module information.
46    pub module: Module,
47
48    /// The input wasm binary.
49    ///
50    /// This can be useful, for example, when modules are parsed from a
51    /// component and the embedder wants access to the raw wasm modules
52    /// themselves.
53    pub wasm: &'data [u8],
54
55    /// References to the function bodies.
56    pub function_body_inputs: PrimaryMap<DefinedFuncIndex, FunctionBodyData<'data>>,
57
58    /// For each imported function, the single statically-known defined function
59    /// that satisfies that import, if any. This is used to turn what would
60    /// otherwise be indirect calls through the imports table into direct calls,
61    /// when possible.
62    pub known_imported_functions:
63        SecondaryMap<FuncIndex, Option<(StaticModuleIndex, DefinedFuncIndex)>>,
64
65    /// A list of type signatures which are considered exported from this
66    /// module, or those that can possibly be called. This list is sorted, and
67    /// trampolines for each of these signatures are required.
68    pub exported_signatures: Vec<ModuleInternedTypeIndex>,
69
70    /// DWARF debug information, if enabled, parsed from the module.
71    pub debuginfo: DebugInfoData<'data>,
72
73    /// Set if debuginfo was found but it was not parsed due to `Tunables`
74    /// configuration.
75    pub has_unparsed_debuginfo: bool,
76
77    /// List of data segments found in this module which should be concatenated
78    /// together for the final compiled artifact.
79    ///
80    /// These data segments, when concatenated, are indexed by the
81    /// `MemoryInitializer` type.
82    pub data: Vec<Cow<'data, [u8]>>,
83
84    /// The desired alignment of `data` in the final data section of the object
85    /// file that we'll emit.
86    ///
87    /// Note that this is 1 by default but `MemoryInitialization::Static` might
88    /// switch this to a higher alignment to facilitate mmap-ing data from
89    /// an object file into a linear memory.
90    pub data_align: Option<u64>,
91
92    /// Total size of all data pushed onto `data` so far.
93    total_data: u32,
94
95    /// List of passive element segments found in this module which will get
96    /// concatenated for the final artifact.
97    pub passive_data: Vec<&'data [u8]>,
98
99    /// Total size of all passive data pushed into `passive_data` so far.
100    total_passive_data: u32,
101
102    /// When we're parsing the code section this will be incremented so we know
103    /// which function is currently being defined.
104    code_index: u32,
105
106    /// The type information of the current module made available at the end of the
107    /// validation process.
108    types: Option<Types>,
109}
110
111impl<'data> ModuleTranslation<'data> {
112    /// Returns a reference to the type information of the current module.
113    pub fn get_types(&self) -> &Types {
114        self.types
115            .as_ref()
116            .expect("module type information to be available")
117    }
118}
119
120/// Contains function data: byte code and its offset in the module.
121pub struct FunctionBodyData<'a> {
122    /// The body of the function, containing code and locals.
123    pub body: FunctionBody<'a>,
124    /// Validator for the function body
125    pub validator: FuncToValidate<ValidatorResources>,
126}
127
128#[derive(Debug, Default)]
129#[expect(missing_docs, reason = "self-describing fields")]
130pub struct DebugInfoData<'a> {
131    pub dwarf: Dwarf<'a>,
132    pub name_section: NameSection<'a>,
133    pub wasm_file: WasmFileInfo,
134    pub debug_loc: gimli::DebugLoc<Reader<'a>>,
135    pub debug_loclists: gimli::DebugLocLists<Reader<'a>>,
136    pub debug_ranges: gimli::DebugRanges<Reader<'a>>,
137    pub debug_rnglists: gimli::DebugRngLists<Reader<'a>>,
138    pub debug_cu_index: gimli::DebugCuIndex<Reader<'a>>,
139    pub debug_tu_index: gimli::DebugTuIndex<Reader<'a>>,
140}
141
142#[expect(missing_docs, reason = "self-describing")]
143pub type Dwarf<'input> = gimli::Dwarf<Reader<'input>>;
144
145type Reader<'input> = gimli::EndianSlice<'input, gimli::LittleEndian>;
146
147#[derive(Debug, Default)]
148#[expect(missing_docs, reason = "self-describing fields")]
149pub struct NameSection<'a> {
150    pub module_name: Option<&'a str>,
151    pub func_names: HashMap<FuncIndex, &'a str>,
152    pub locals_names: HashMap<FuncIndex, HashMap<u32, &'a str>>,
153}
154
155#[derive(Debug, Default)]
156#[expect(missing_docs, reason = "self-describing fields")]
157pub struct WasmFileInfo {
158    pub path: Option<PathBuf>,
159    pub code_section_offset: u64,
160    pub imported_func_count: u32,
161    pub funcs: Vec<FunctionMetadata>,
162}
163
164#[derive(Debug)]
165#[expect(missing_docs, reason = "self-describing fields")]
166pub struct FunctionMetadata {
167    pub params: Box<[WasmValType]>,
168    pub locals: Box<[(u32, WasmValType)]>,
169}
170
171impl<'a, 'data> ModuleEnvironment<'a, 'data> {
172    /// Allocates the environment data structures.
173    pub fn new(
174        tunables: &'a Tunables,
175        validator: &'a mut Validator,
176        types: &'a mut ModuleTypesBuilder,
177    ) -> Self {
178        Self {
179            result: ModuleTranslation::default(),
180            types,
181            tunables,
182            validator,
183        }
184    }
185
186    /// Translate a wasm module using this environment.
187    ///
188    /// This function will translate the `data` provided with `parser`,
189    /// validating everything along the way with this environment's validator.
190    ///
191    /// The result of translation, [`ModuleTranslation`], contains everything
192    /// necessary to compile functions afterwards as well as learn type
193    /// information about the module at runtime.
194    pub fn translate(
195        mut self,
196        parser: Parser,
197        data: &'data [u8],
198    ) -> Result<ModuleTranslation<'data>> {
199        self.result.wasm = data;
200
201        for payload in parser.parse_all(data) {
202            self.translate_payload(payload?)?;
203        }
204
205        Ok(self.result)
206    }
207
208    fn translate_payload(&mut self, payload: Payload<'data>) -> Result<()> {
209        match payload {
210            Payload::Version {
211                num,
212                encoding,
213                range,
214            } => {
215                self.validator.version(num, encoding, &range)?;
216                match encoding {
217                    Encoding::Module => {}
218                    Encoding::Component => {
219                        bail!("expected a WebAssembly module but was given a WebAssembly component")
220                    }
221                }
222            }
223
224            Payload::End(offset) => {
225                self.result.types = Some(self.validator.end(offset)?);
226
227                // With the `escaped_funcs` set of functions finished
228                // we can calculate the set of signatures that are exported as
229                // the set of exported functions' signatures.
230                self.result.exported_signatures = self
231                    .result
232                    .module
233                    .functions
234                    .iter()
235                    .filter_map(|(_, func)| {
236                        if func.is_escaping() {
237                            Some(func.signature.unwrap_module_type_index())
238                        } else {
239                            None
240                        }
241                    })
242                    .collect();
243                self.result.exported_signatures.sort_unstable();
244                self.result.exported_signatures.dedup();
245            }
246
247            Payload::TypeSection(types) => {
248                self.validator.type_section(&types)?;
249
250                let count = self.validator.types(0).unwrap().core_type_count_in_module();
251                log::trace!("interning {count} Wasm types");
252
253                let capacity = usize::try_from(count).unwrap();
254                self.result.module.types.reserve(capacity);
255                self.types.reserve_wasm_signatures(capacity);
256
257                // Iterate over each *rec group* -- not type -- defined in the
258                // types section. Rec groups are the unit of canonicalization
259                // and therefore the unit at which we need to process at a
260                // time. `wasmparser` has already done the hard work of
261                // de-duplicating and canonicalizing the rec groups within the
262                // module for us, we just need to translate them into our data
263                // structures. Note that, if the Wasm defines duplicate rec
264                // groups, we need copy the duplicates over (shallowly) as well,
265                // so that our types index space doesn't have holes.
266                let mut type_index = 0;
267                while type_index < count {
268                    let validator_types = self.validator.types(0).unwrap();
269
270                    // Get the rec group for the current type index, which is
271                    // always the first type defined in a rec group.
272                    log::trace!("looking up wasmparser type for index {type_index}");
273                    let core_type_id = validator_types.core_type_at_in_module(type_index);
274                    log::trace!(
275                        "  --> {core_type_id:?} = {:?}",
276                        validator_types[core_type_id],
277                    );
278                    let rec_group_id = validator_types.rec_group_id_of(core_type_id);
279                    debug_assert_eq!(
280                        validator_types
281                            .rec_group_elements(rec_group_id)
282                            .position(|id| id == core_type_id),
283                        Some(0)
284                    );
285
286                    // Intern the rec group and then fill in this module's types
287                    // index space.
288                    let interned = self.types.intern_rec_group(validator_types, rec_group_id)?;
289                    let elems = self.types.rec_group_elements(interned);
290                    let len = elems.len();
291                    self.result.module.types.reserve(len);
292                    for ty in elems {
293                        self.result.module.types.push(ty.into());
294                    }
295
296                    // Advance `type_index` to the start of the next rec group.
297                    type_index += u32::try_from(len).unwrap();
298                }
299            }
300
301            Payload::ImportSection(imports) => {
302                self.validator.import_section(&imports)?;
303
304                let cnt = usize::try_from(imports.count()).unwrap();
305                self.result.module.initializers.reserve(cnt);
306
307                for entry in imports {
308                    let import = entry?;
309                    let ty = match import.ty {
310                        TypeRef::Func(index) => {
311                            let index = TypeIndex::from_u32(index);
312                            let interned_index = self.result.module.types[index];
313                            self.result.module.num_imported_funcs += 1;
314                            self.result.debuginfo.wasm_file.imported_func_count += 1;
315                            EntityType::Function(interned_index)
316                        }
317                        TypeRef::Memory(ty) => {
318                            self.result.module.num_imported_memories += 1;
319                            EntityType::Memory(ty.into())
320                        }
321                        TypeRef::Global(ty) => {
322                            self.result.module.num_imported_globals += 1;
323                            EntityType::Global(self.convert_global_type(&ty)?)
324                        }
325                        TypeRef::Table(ty) => {
326                            self.result.module.num_imported_tables += 1;
327                            EntityType::Table(self.convert_table_type(&ty)?)
328                        }
329                        TypeRef::Tag(ty) => {
330                            let index = TypeIndex::from_u32(ty.func_type_idx);
331                            let signature = self.result.module.types[index];
332                            let tag = Tag { signature };
333                            self.result.module.num_imported_tags += 1;
334                            EntityType::Tag(tag)
335                        }
336                    };
337                    self.declare_import(import.module, import.name, ty);
338                }
339            }
340
341            Payload::FunctionSection(functions) => {
342                self.validator.function_section(&functions)?;
343
344                let cnt = usize::try_from(functions.count()).unwrap();
345                self.result.module.functions.reserve_exact(cnt);
346
347                for entry in functions {
348                    let sigindex = entry?;
349                    let ty = TypeIndex::from_u32(sigindex);
350                    let interned_index = self.result.module.types[ty];
351                    self.result.module.push_function(interned_index);
352                }
353            }
354
355            Payload::TableSection(tables) => {
356                self.validator.table_section(&tables)?;
357                let cnt = usize::try_from(tables.count()).unwrap();
358                self.result.module.tables.reserve_exact(cnt);
359
360                for entry in tables {
361                    let wasmparser::Table { ty, init } = entry?;
362                    let table = self.convert_table_type(&ty)?;
363                    self.result.module.tables.push(table);
364                    let init = match init {
365                        wasmparser::TableInit::RefNull => TableInitialValue::Null {
366                            precomputed: Vec::new(),
367                        },
368                        wasmparser::TableInit::Expr(expr) => {
369                            let (init, escaped) = ConstExpr::from_wasmparser(expr)?;
370                            for f in escaped {
371                                self.flag_func_escaped(f);
372                            }
373                            TableInitialValue::Expr(init)
374                        }
375                    };
376                    self.result
377                        .module
378                        .table_initialization
379                        .initial_values
380                        .push(init);
381                }
382            }
383
384            Payload::MemorySection(memories) => {
385                self.validator.memory_section(&memories)?;
386
387                let cnt = usize::try_from(memories.count()).unwrap();
388                self.result.module.memories.reserve_exact(cnt);
389
390                for entry in memories {
391                    let memory = entry?;
392                    self.result.module.memories.push(memory.into());
393                }
394            }
395
396            Payload::TagSection(tags) => {
397                self.validator.tag_section(&tags)?;
398
399                for entry in tags {
400                    let sigindex = entry?.func_type_idx;
401                    let ty = TypeIndex::from_u32(sigindex);
402                    let interned_index = self.result.module.types[ty];
403                    self.result.module.push_tag(interned_index);
404                }
405            }
406
407            Payload::GlobalSection(globals) => {
408                self.validator.global_section(&globals)?;
409
410                let cnt = usize::try_from(globals.count()).unwrap();
411                self.result.module.globals.reserve_exact(cnt);
412
413                for entry in globals {
414                    let wasmparser::Global { ty, init_expr } = entry?;
415                    let (initializer, escaped) = ConstExpr::from_wasmparser(init_expr)?;
416                    for f in escaped {
417                        self.flag_func_escaped(f);
418                    }
419                    let ty = self.convert_global_type(&ty)?;
420                    self.result.module.globals.push(ty);
421                    self.result.module.global_initializers.push(initializer);
422                }
423            }
424
425            Payload::ExportSection(exports) => {
426                self.validator.export_section(&exports)?;
427
428                let cnt = usize::try_from(exports.count()).unwrap();
429                self.result.module.exports.reserve(cnt);
430
431                for entry in exports {
432                    let wasmparser::Export { name, kind, index } = entry?;
433                    let entity = match kind {
434                        ExternalKind::Func => {
435                            let index = FuncIndex::from_u32(index);
436                            self.flag_func_escaped(index);
437                            EntityIndex::Function(index)
438                        }
439                        ExternalKind::Table => EntityIndex::Table(TableIndex::from_u32(index)),
440                        ExternalKind::Memory => EntityIndex::Memory(MemoryIndex::from_u32(index)),
441                        ExternalKind::Global => EntityIndex::Global(GlobalIndex::from_u32(index)),
442                        ExternalKind::Tag => EntityIndex::Tag(TagIndex::from_u32(index)),
443                    };
444                    self.result
445                        .module
446                        .exports
447                        .insert(String::from(name), entity);
448                }
449            }
450
451            Payload::StartSection { func, range } => {
452                self.validator.start_section(func, &range)?;
453
454                let func_index = FuncIndex::from_u32(func);
455                self.flag_func_escaped(func_index);
456                debug_assert!(self.result.module.start_func.is_none());
457                self.result.module.start_func = Some(func_index);
458            }
459
460            Payload::ElementSection(elements) => {
461                self.validator.element_section(&elements)?;
462
463                for (index, entry) in elements.into_iter().enumerate() {
464                    let wasmparser::Element {
465                        kind,
466                        items,
467                        range: _,
468                    } = entry?;
469
470                    // Build up a list of `FuncIndex` corresponding to all the
471                    // entries listed in this segment. Note that it's not
472                    // possible to create anything other than a `ref.null
473                    // extern` for externref segments, so those just get
474                    // translated to the reserved value of `FuncIndex`.
475                    let elements = match items {
476                        ElementItems::Functions(funcs) => {
477                            let mut elems =
478                                Vec::with_capacity(usize::try_from(funcs.count()).unwrap());
479                            for func in funcs {
480                                let func = FuncIndex::from_u32(func?);
481                                self.flag_func_escaped(func);
482                                elems.push(func);
483                            }
484                            TableSegmentElements::Functions(elems.into())
485                        }
486                        ElementItems::Expressions(_ty, items) => {
487                            let mut exprs =
488                                Vec::with_capacity(usize::try_from(items.count()).unwrap());
489                            for expr in items {
490                                let (expr, escaped) = ConstExpr::from_wasmparser(expr?)?;
491                                exprs.push(expr);
492                                for func in escaped {
493                                    self.flag_func_escaped(func);
494                                }
495                            }
496                            TableSegmentElements::Expressions(exprs.into())
497                        }
498                    };
499
500                    match kind {
501                        ElementKind::Active {
502                            table_index,
503                            offset_expr,
504                        } => {
505                            let table_index = TableIndex::from_u32(table_index.unwrap_or(0));
506                            let (offset, escaped) = ConstExpr::from_wasmparser(offset_expr)?;
507                            debug_assert!(escaped.is_empty());
508
509                            self.result
510                                .module
511                                .table_initialization
512                                .segments
513                                .push(TableSegment {
514                                    table_index,
515                                    offset,
516                                    elements,
517                                });
518                        }
519
520                        ElementKind::Passive => {
521                            let elem_index = ElemIndex::from_u32(index as u32);
522                            let index = self.result.module.passive_elements.len();
523                            self.result.module.passive_elements.push(elements);
524                            self.result
525                                .module
526                                .passive_elements_map
527                                .insert(elem_index, index);
528                        }
529
530                        ElementKind::Declared => {}
531                    }
532                }
533            }
534
535            Payload::CodeSectionStart { count, range, .. } => {
536                self.validator.code_section_start(&range)?;
537                let cnt = usize::try_from(count).unwrap();
538                self.result.function_body_inputs.reserve_exact(cnt);
539                self.result.debuginfo.wasm_file.code_section_offset = range.start as u64;
540            }
541
542            Payload::CodeSectionEntry(body) => {
543                let validator = self.validator.code_section_entry(&body)?;
544                let func_index =
545                    self.result.code_index + self.result.module.num_imported_funcs as u32;
546                let func_index = FuncIndex::from_u32(func_index);
547
548                if self.tunables.generate_native_debuginfo {
549                    let sig_index = self.result.module.functions[func_index]
550                        .signature
551                        .unwrap_module_type_index();
552                    let sig = self.types[sig_index].unwrap_func();
553                    let mut locals = Vec::new();
554                    for pair in body.get_locals_reader()? {
555                        let (cnt, ty) = pair?;
556                        let ty = self.convert_valtype(ty)?;
557                        locals.push((cnt, ty));
558                    }
559                    self.result
560                        .debuginfo
561                        .wasm_file
562                        .funcs
563                        .push(FunctionMetadata {
564                            locals: locals.into_boxed_slice(),
565                            params: sig.params().into(),
566                        });
567                }
568                self.result
569                    .function_body_inputs
570                    .push(FunctionBodyData { validator, body });
571                self.result.code_index += 1;
572            }
573
574            Payload::DataSection(data) => {
575                self.validator.data_section(&data)?;
576
577                let initializers = match &mut self.result.module.memory_initialization {
578                    MemoryInitialization::Segmented(i) => i,
579                    _ => unreachable!(),
580                };
581
582                let cnt = usize::try_from(data.count()).unwrap();
583                initializers.reserve_exact(cnt);
584                self.result.data.reserve_exact(cnt);
585
586                for (index, entry) in data.into_iter().enumerate() {
587                    let wasmparser::Data {
588                        kind,
589                        data,
590                        range: _,
591                    } = entry?;
592                    let mk_range = |total: &mut u32| -> Result<_, WasmError> {
593                        let range = u32::try_from(data.len())
594                            .ok()
595                            .and_then(|size| {
596                                let start = *total;
597                                let end = start.checked_add(size)?;
598                                Some(start..end)
599                            })
600                            .ok_or_else(|| {
601                                WasmError::Unsupported(format!(
602                                    "more than 4 gigabytes of data in wasm module",
603                                ))
604                            })?;
605                        *total += range.end - range.start;
606                        Ok(range)
607                    };
608                    match kind {
609                        DataKind::Active {
610                            memory_index,
611                            offset_expr,
612                        } => {
613                            let range = mk_range(&mut self.result.total_data)?;
614                            let memory_index = MemoryIndex::from_u32(memory_index);
615                            let (offset, escaped) = ConstExpr::from_wasmparser(offset_expr)?;
616                            debug_assert!(escaped.is_empty());
617
618                            initializers.push(MemoryInitializer {
619                                memory_index,
620                                offset,
621                                data: range,
622                            });
623                            self.result.data.push(data.into());
624                        }
625                        DataKind::Passive => {
626                            let data_index = DataIndex::from_u32(index as u32);
627                            let range = mk_range(&mut self.result.total_passive_data)?;
628                            self.result.passive_data.push(data);
629                            self.result
630                                .module
631                                .passive_data_map
632                                .insert(data_index, range);
633                        }
634                    }
635                }
636            }
637
638            Payload::DataCountSection { count, range } => {
639                self.validator.data_count_section(count, &range)?;
640
641                // Note: the count passed in here is the *total* segment count
642                // There is no way to reserve for just the passive segments as
643                // they are discovered when iterating the data section entries
644                // Given that the total segment count might be much larger than
645                // the passive count, do not reserve anything here.
646            }
647
648            Payload::CustomSection(s)
649                if s.name() == "webidl-bindings" || s.name() == "wasm-interface-types" =>
650            {
651                bail!(
652                    "\
653Support for interface types has temporarily been removed from `wasmtime`.
654
655For more information about this temporary change you can read on the issue online:
656
657    https://github.com/bytecodealliance/wasmtime/issues/1271
658
659and for re-adding support for interface types you can see this issue:
660
661    https://github.com/bytecodealliance/wasmtime/issues/677
662"
663                )
664            }
665
666            Payload::CustomSection(s) => {
667                self.register_custom_section(&s);
668            }
669
670            // It's expected that validation will probably reject other
671            // payloads such as `UnknownSection` or those related to the
672            // component model. If, however, something gets past validation then
673            // that's a bug in Wasmtime as we forgot to implement something.
674            other => {
675                self.validator.payload(&other)?;
676                panic!("unimplemented section in wasm file {other:?}");
677            }
678        }
679        Ok(())
680    }
681
682    fn register_custom_section(&mut self, section: &CustomSectionReader<'data>) {
683        match section.as_known() {
684            KnownCustom::Name(name) => {
685                let result = self.name_section(name);
686                if let Err(e) = result {
687                    log::warn!("failed to parse name section {e:?}");
688                }
689            }
690            _ => {
691                let name = section.name().trim_end_matches(".dwo");
692                if name.starts_with(".debug_") {
693                    self.dwarf_section(name, section);
694                }
695            }
696        }
697    }
698
699    fn dwarf_section(&mut self, name: &str, section: &CustomSectionReader<'data>) {
700        if !self.tunables.generate_native_debuginfo && !self.tunables.parse_wasm_debuginfo {
701            self.result.has_unparsed_debuginfo = true;
702            return;
703        }
704        let info = &mut self.result.debuginfo;
705        let dwarf = &mut info.dwarf;
706        let endian = gimli::LittleEndian;
707        let data = section.data();
708        let slice = gimli::EndianSlice::new(data, endian);
709
710        match name {
711            // `gimli::Dwarf` fields.
712            ".debug_abbrev" => dwarf.debug_abbrev = gimli::DebugAbbrev::new(data, endian),
713            ".debug_addr" => dwarf.debug_addr = gimli::DebugAddr::from(slice),
714            ".debug_info" => {
715                dwarf.debug_info = gimli::DebugInfo::new(data, endian);
716            }
717            ".debug_line" => dwarf.debug_line = gimli::DebugLine::new(data, endian),
718            ".debug_line_str" => dwarf.debug_line_str = gimli::DebugLineStr::from(slice),
719            ".debug_str" => dwarf.debug_str = gimli::DebugStr::new(data, endian),
720            ".debug_str_offsets" => dwarf.debug_str_offsets = gimli::DebugStrOffsets::from(slice),
721            ".debug_str_sup" => {
722                let mut dwarf_sup: Dwarf<'data> = Default::default();
723                dwarf_sup.debug_str = gimli::DebugStr::from(slice);
724                dwarf.sup = Some(Arc::new(dwarf_sup));
725            }
726            ".debug_types" => dwarf.debug_types = gimli::DebugTypes::from(slice),
727
728            // Additional fields.
729            ".debug_loc" => info.debug_loc = gimli::DebugLoc::from(slice),
730            ".debug_loclists" => info.debug_loclists = gimli::DebugLocLists::from(slice),
731            ".debug_ranges" => info.debug_ranges = gimli::DebugRanges::new(data, endian),
732            ".debug_rnglists" => info.debug_rnglists = gimli::DebugRngLists::new(data, endian),
733
734            // DWARF package fields
735            ".debug_cu_index" => info.debug_cu_index = gimli::DebugCuIndex::new(data, endian),
736            ".debug_tu_index" => info.debug_tu_index = gimli::DebugTuIndex::new(data, endian),
737
738            // We don't use these at the moment.
739            ".debug_aranges" | ".debug_pubnames" | ".debug_pubtypes" => return,
740            other => {
741                log::warn!("unknown debug section `{other}`");
742                return;
743            }
744        }
745
746        dwarf.ranges = gimli::RangeLists::new(info.debug_ranges, info.debug_rnglists);
747        dwarf.locations = gimli::LocationLists::new(info.debug_loc, info.debug_loclists);
748    }
749
750    /// Declares a new import with the `module` and `field` names, importing the
751    /// `ty` specified.
752    ///
753    /// Note that this method is somewhat tricky due to the implementation of
754    /// the module linking proposal. In the module linking proposal two-level
755    /// imports are recast as single-level imports of instances. That recasting
756    /// happens here by recording an import of an instance for the first time
757    /// we see a two-level import.
758    ///
759    /// When the module linking proposal is disabled, however, disregard this
760    /// logic and instead work directly with two-level imports since no
761    /// instances are defined.
762    fn declare_import(&mut self, module: &'data str, field: &'data str, ty: EntityType) {
763        let index = self.push_type(ty);
764        self.result.module.initializers.push(Initializer::Import {
765            name: module.to_owned(),
766            field: field.to_owned(),
767            index,
768        });
769    }
770
771    fn push_type(&mut self, ty: EntityType) -> EntityIndex {
772        match ty {
773            EntityType::Function(ty) => EntityIndex::Function({
774                let func_index = self
775                    .result
776                    .module
777                    .push_function(ty.unwrap_module_type_index());
778                // Imported functions can escape; in fact, they've already done
779                // so to get here.
780                self.flag_func_escaped(func_index);
781                func_index
782            }),
783            EntityType::Table(ty) => EntityIndex::Table(self.result.module.tables.push(ty)),
784            EntityType::Memory(ty) => EntityIndex::Memory(self.result.module.memories.push(ty)),
785            EntityType::Global(ty) => EntityIndex::Global(self.result.module.globals.push(ty)),
786            EntityType::Tag(ty) => EntityIndex::Tag(self.result.module.tags.push(ty)),
787        }
788    }
789
790    fn flag_func_escaped(&mut self, func: FuncIndex) {
791        let ty = &mut self.result.module.functions[func];
792        // If this was already assigned a funcref index no need to re-assign it.
793        if ty.is_escaping() {
794            return;
795        }
796        let index = self.result.module.num_escaped_funcs as u32;
797        ty.func_ref = FuncRefIndex::from_u32(index);
798        self.result.module.num_escaped_funcs += 1;
799    }
800
801    /// Parses the Name section of the wasm module.
802    fn name_section(&mut self, names: NameSectionReader<'data>) -> WasmResult<()> {
803        for subsection in names {
804            match subsection? {
805                wasmparser::Name::Function(names) => {
806                    for name in names {
807                        let Naming { index, name } = name?;
808                        // Skip this naming if it's naming a function that
809                        // doesn't actually exist.
810                        if (index as usize) >= self.result.module.functions.len() {
811                            continue;
812                        }
813
814                        // Store the name unconditionally, regardless of
815                        // whether we're parsing debuginfo, since function
816                        // names are almost always present in the
817                        // final compilation artifact.
818                        let index = FuncIndex::from_u32(index);
819                        self.result
820                            .debuginfo
821                            .name_section
822                            .func_names
823                            .insert(index, name);
824                    }
825                }
826                wasmparser::Name::Module { name, .. } => {
827                    self.result.module.name = Some(name.to_string());
828                    if self.tunables.generate_native_debuginfo {
829                        self.result.debuginfo.name_section.module_name = Some(name);
830                    }
831                }
832                wasmparser::Name::Local(reader) => {
833                    if !self.tunables.generate_native_debuginfo {
834                        continue;
835                    }
836                    for f in reader {
837                        let f = f?;
838                        // Skip this naming if it's naming a function that
839                        // doesn't actually exist.
840                        if (f.index as usize) >= self.result.module.functions.len() {
841                            continue;
842                        }
843                        for name in f.names {
844                            let Naming { index, name } = name?;
845
846                            self.result
847                                .debuginfo
848                                .name_section
849                                .locals_names
850                                .entry(FuncIndex::from_u32(f.index))
851                                .or_insert(HashMap::new())
852                                .insert(index, name);
853                        }
854                    }
855                }
856                wasmparser::Name::Label(_)
857                | wasmparser::Name::Type(_)
858                | wasmparser::Name::Table(_)
859                | wasmparser::Name::Global(_)
860                | wasmparser::Name::Memory(_)
861                | wasmparser::Name::Element(_)
862                | wasmparser::Name::Data(_)
863                | wasmparser::Name::Tag(_)
864                | wasmparser::Name::Field(_)
865                | wasmparser::Name::Unknown { .. } => {}
866            }
867        }
868        Ok(())
869    }
870}
871
872impl TypeConvert for ModuleEnvironment<'_, '_> {
873    fn lookup_heap_type(&self, index: wasmparser::UnpackedIndex) -> WasmHeapType {
874        WasmparserTypeConverter::new(&self.types, |idx| {
875            self.result.module.types[idx].unwrap_module_type_index()
876        })
877        .lookup_heap_type(index)
878    }
879
880    fn lookup_type_index(&self, index: wasmparser::UnpackedIndex) -> EngineOrModuleTypeIndex {
881        WasmparserTypeConverter::new(&self.types, |idx| {
882            self.result.module.types[idx].unwrap_module_type_index()
883        })
884        .lookup_type_index(index)
885    }
886}
887
888impl ModuleTranslation<'_> {
889    /// Attempts to convert segmented memory initialization into static
890    /// initialization for the module that this translation represents.
891    ///
892    /// If this module's memory initialization is not compatible with paged
893    /// initialization then this won't change anything. Otherwise if it is
894    /// compatible then the `memory_initialization` field will be updated.
895    ///
896    /// Takes a `page_size` argument in order to ensure that all
897    /// initialization is page-aligned for mmap-ability, and
898    /// `max_image_size_always_allowed` to control how we decide
899    /// whether to use static init.
900    ///
901    /// We will try to avoid generating very sparse images, which are
902    /// possible if e.g. a module has an initializer at offset 0 and a
903    /// very high offset (say, 1 GiB). To avoid this, we use a dual
904    /// condition: we always allow images less than
905    /// `max_image_size_always_allowed`, and the embedder of Wasmtime
906    /// can set this if desired to ensure that static init should
907    /// always be done if the size of the module or its heaps is
908    /// otherwise bounded by the system. We also allow images with
909    /// static init data bigger than that, but only if it is "dense",
910    /// defined as having at least half (50%) of its pages with some
911    /// data.
912    ///
913    /// We could do something slightly better by building a dense part
914    /// and keeping a sparse list of outlier/leftover segments (see
915    /// issue #3820). This would also allow mostly-static init of
916    /// modules that have some dynamically-placed data segments. But,
917    /// for now, this is sufficient to allow a system that "knows what
918    /// it's doing" to always get static init.
919    pub fn try_static_init(&mut self, page_size: u64, max_image_size_always_allowed: u64) {
920        // This method only attempts to transform a `Segmented` memory init
921        // into a `Static` one, no other state.
922        if !self.module.memory_initialization.is_segmented() {
923            return;
924        }
925
926        // First a dry run of memory initialization is performed. This
927        // collects information about the extent of memory initialized for each
928        // memory as well as the size of all data segments being copied in.
929        struct Memory {
930            data_size: u64,
931            min_addr: u64,
932            max_addr: u64,
933            // The `usize` here is a pointer into `self.data` which is the list
934            // of data segments corresponding to what was found in the original
935            // wasm module.
936            segments: Vec<(usize, StaticMemoryInitializer)>,
937        }
938        let mut info = PrimaryMap::with_capacity(self.module.memories.len());
939        for _ in 0..self.module.memories.len() {
940            info.push(Memory {
941                data_size: 0,
942                min_addr: u64::MAX,
943                max_addr: 0,
944                segments: Vec::new(),
945            });
946        }
947
948        struct InitMemoryAtCompileTime<'a> {
949            module: &'a Module,
950            info: &'a mut PrimaryMap<MemoryIndex, Memory>,
951            idx: usize,
952        }
953        impl InitMemory for InitMemoryAtCompileTime<'_> {
954            fn memory_size_in_bytes(
955                &mut self,
956                memory_index: MemoryIndex,
957            ) -> Result<u64, SizeOverflow> {
958                self.module.memories[memory_index].minimum_byte_size()
959            }
960
961            fn eval_offset(&mut self, memory_index: MemoryIndex, expr: &ConstExpr) -> Option<u64> {
962                match (expr.ops(), self.module.memories[memory_index].idx_type) {
963                    (&[ConstOp::I32Const(offset)], IndexType::I32) => {
964                        Some(offset.unsigned().into())
965                    }
966                    (&[ConstOp::I64Const(offset)], IndexType::I64) => Some(offset.unsigned()),
967                    _ => None,
968                }
969            }
970
971            fn write(&mut self, memory: MemoryIndex, init: &StaticMemoryInitializer) -> bool {
972                // Currently `Static` only applies to locally-defined memories,
973                // so if a data segment references an imported memory then
974                // transitioning to a `Static` memory initializer is not
975                // possible.
976                if self.module.defined_memory_index(memory).is_none() {
977                    return false;
978                };
979                let info = &mut self.info[memory];
980                let data_len = u64::from(init.data.end - init.data.start);
981                if data_len > 0 {
982                    info.data_size += data_len;
983                    info.min_addr = info.min_addr.min(init.offset);
984                    info.max_addr = info.max_addr.max(init.offset + data_len);
985                    info.segments.push((self.idx, init.clone()));
986                }
987                self.idx += 1;
988                true
989            }
990        }
991        let ok = self
992            .module
993            .memory_initialization
994            .init_memory(&mut InitMemoryAtCompileTime {
995                idx: 0,
996                module: &self.module,
997                info: &mut info,
998            });
999        if !ok {
1000            return;
1001        }
1002
1003        // Validate that the memory information collected is indeed valid for
1004        // static memory initialization.
1005        for (i, info) in info.iter().filter(|(_, info)| info.data_size > 0) {
1006            let image_size = info.max_addr - info.min_addr;
1007
1008            // Simplify things for now by bailing out entirely if any memory has
1009            // a page size smaller than the host's page size. This fixes a case
1010            // where currently initializers are created in host-page-size units
1011            // of length which means that a larger-than-the-entire-memory
1012            // initializer can be created. This can be handled technically but
1013            // would require some more changes to help fix the assert elsewhere
1014            // that this protects against.
1015            if self.module.memories[i].page_size() < page_size {
1016                return;
1017            }
1018
1019            // If the range of memory being initialized is less than twice the
1020            // total size of the data itself then it's assumed that static
1021            // initialization is ok. This means we'll at most double memory
1022            // consumption during the memory image creation process, which is
1023            // currently assumed to "probably be ok" but this will likely need
1024            // tweaks over time.
1025            if image_size < info.data_size.saturating_mul(2) {
1026                continue;
1027            }
1028
1029            // If the memory initialization image is larger than the size of all
1030            // data, then we still allow memory initialization if the image will
1031            // be of a relatively modest size, such as 1MB here.
1032            if image_size < max_image_size_always_allowed {
1033                continue;
1034            }
1035
1036            // At this point memory initialization is concluded to be too
1037            // expensive to do at compile time so it's entirely deferred to
1038            // happen at runtime.
1039            return;
1040        }
1041
1042        // Here's where we've now committed to changing to static memory. The
1043        // memory initialization image is built here from the page data and then
1044        // it's converted to a single initializer.
1045        let data = mem::replace(&mut self.data, Vec::new());
1046        let mut map = PrimaryMap::with_capacity(info.len());
1047        let mut module_data_size = 0u32;
1048        for (memory, info) in info.iter() {
1049            // Create the in-memory `image` which is the initialized contents of
1050            // this linear memory.
1051            let extent = if info.segments.len() > 0 {
1052                (info.max_addr - info.min_addr) as usize
1053            } else {
1054                0
1055            };
1056            let mut image = Vec::with_capacity(extent);
1057            for (idx, init) in info.segments.iter() {
1058                let data = &data[*idx];
1059                assert_eq!(data.len(), init.data.len());
1060                let offset = usize::try_from(init.offset - info.min_addr).unwrap();
1061                if image.len() < offset {
1062                    image.resize(offset, 0u8);
1063                    image.extend_from_slice(data);
1064                } else {
1065                    image.splice(
1066                        offset..(offset + data.len()).min(image.len()),
1067                        data.iter().copied(),
1068                    );
1069                }
1070            }
1071            assert_eq!(image.len(), extent);
1072            assert_eq!(image.capacity(), extent);
1073            let mut offset = if info.segments.len() > 0 {
1074                info.min_addr
1075            } else {
1076                0
1077            };
1078
1079            // Chop off trailing zeros from the image as memory is already
1080            // zero-initialized. Note that `i` is the position of a nonzero
1081            // entry here, so to not lose it we truncate to `i + 1`.
1082            if let Some(i) = image.iter().rposition(|i| *i != 0) {
1083                image.truncate(i + 1);
1084            }
1085
1086            // Also chop off leading zeros, if any.
1087            if let Some(i) = image.iter().position(|i| *i != 0) {
1088                offset += i as u64;
1089                image.drain(..i);
1090            }
1091            let mut len = u64::try_from(image.len()).unwrap();
1092
1093            // The goal is to enable mapping this image directly into memory, so
1094            // the offset into linear memory must be a multiple of the page
1095            // size. If that's not already the case then the image is padded at
1096            // the front and back with extra zeros as necessary
1097            if offset % page_size != 0 {
1098                let zero_padding = offset % page_size;
1099                self.data.push(vec![0; zero_padding as usize].into());
1100                offset -= zero_padding;
1101                len += zero_padding;
1102            }
1103            self.data.push(image.into());
1104            if len % page_size != 0 {
1105                let zero_padding = page_size - (len % page_size);
1106                self.data.push(vec![0; zero_padding as usize].into());
1107                len += zero_padding;
1108            }
1109
1110            // Offset/length should now always be page-aligned.
1111            assert!(offset % page_size == 0);
1112            assert!(len % page_size == 0);
1113
1114            // Create the `StaticMemoryInitializer` which describes this image,
1115            // only needed if the image is actually present and has a nonzero
1116            // length. The `offset` has been calculates above, originally
1117            // sourced from `info.min_addr`. The `data` field is the extent
1118            // within the final data segment we'll emit to an ELF image, which
1119            // is the concatenation of `self.data`, so here it's the size of
1120            // the section-so-far plus the current segment we're appending.
1121            let len = u32::try_from(len).unwrap();
1122            let init = if len > 0 {
1123                Some(StaticMemoryInitializer {
1124                    offset,
1125                    data: module_data_size..module_data_size + len,
1126                })
1127            } else {
1128                None
1129            };
1130            let idx = map.push(init);
1131            assert_eq!(idx, memory);
1132            module_data_size += len;
1133        }
1134        self.data_align = Some(page_size);
1135        self.module.memory_initialization = MemoryInitialization::Static { map };
1136    }
1137
1138    /// Attempts to convert the module's table initializers to
1139    /// FuncTable form where possible. This enables lazy table
1140    /// initialization later by providing a one-to-one map of initial
1141    /// table values, without having to parse all segments.
1142    pub fn try_func_table_init(&mut self) {
1143        // This should be large enough to support very large Wasm
1144        // modules with huge funcref tables, but small enough to avoid
1145        // OOMs or DoS on truly sparse tables.
1146        const MAX_FUNC_TABLE_SIZE: u64 = 1024 * 1024;
1147
1148        // First convert any element-initialized tables to images of just that
1149        // single function if the minimum size of the table allows doing so.
1150        for ((_, init), (_, table)) in self
1151            .module
1152            .table_initialization
1153            .initial_values
1154            .iter_mut()
1155            .zip(
1156                self.module
1157                    .tables
1158                    .iter()
1159                    .skip(self.module.num_imported_tables),
1160            )
1161        {
1162            let table_size = table.limits.min;
1163            if table_size > MAX_FUNC_TABLE_SIZE {
1164                continue;
1165            }
1166            if let TableInitialValue::Expr(expr) = init {
1167                if let [ConstOp::RefFunc(f)] = expr.ops() {
1168                    *init = TableInitialValue::Null {
1169                        precomputed: vec![*f; table_size as usize],
1170                    };
1171                }
1172            }
1173        }
1174
1175        let mut segments = mem::take(&mut self.module.table_initialization.segments)
1176            .into_iter()
1177            .peekable();
1178
1179        // The goal of this loop is to interpret a table segment and apply it
1180        // "statically" to a local table. This will iterate over segments and
1181        // apply them one-by-one to each table.
1182        //
1183        // If any segment can't be applied, however, then this loop exits and
1184        // all remaining segments are placed back into the segment list. This is
1185        // because segments are supposed to be initialized one-at-a-time which
1186        // means that intermediate state is visible with respect to traps. If
1187        // anything isn't statically known to not trap it's pessimistically
1188        // assumed to trap meaning all further segment initializers must be
1189        // applied manually at instantiation time.
1190        while let Some(segment) = segments.peek() {
1191            let defined_index = match self.module.defined_table_index(segment.table_index) {
1192                Some(index) => index,
1193                // Skip imported tables: we can't provide a preconstructed
1194                // table for them, because their values depend on the
1195                // imported table overlaid with whatever segments we have.
1196                None => break,
1197            };
1198
1199            // If the base of this segment is dynamic, then we can't
1200            // include it in the statically-built array of initial
1201            // contents.
1202            let offset = match segment.offset.ops() {
1203                &[ConstOp::I32Const(offset)] => u64::from(offset.unsigned()),
1204                &[ConstOp::I64Const(offset)] => offset.unsigned(),
1205                _ => break,
1206            };
1207
1208            // Get the end of this segment. If out-of-bounds, or too
1209            // large for our dense table representation, then skip the
1210            // segment.
1211            let top = match offset.checked_add(segment.elements.len()) {
1212                Some(top) => top,
1213                None => break,
1214            };
1215            let table_size = self.module.tables[segment.table_index].limits.min;
1216            if top > table_size || top > MAX_FUNC_TABLE_SIZE {
1217                break;
1218            }
1219
1220            match self.module.tables[segment.table_index]
1221                .ref_type
1222                .heap_type
1223                .top()
1224            {
1225                WasmHeapTopType::Func => {}
1226                // If this is not a funcref table, then we can't support a
1227                // pre-computed table of function indices. Technically this
1228                // initializer won't trap so we could continue processing
1229                // segments, but that's left as a future optimization if
1230                // necessary.
1231                WasmHeapTopType::Any
1232                | WasmHeapTopType::Extern
1233                | WasmHeapTopType::Cont
1234                | WasmHeapTopType::Exn => break,
1235            }
1236
1237            // Function indices can be optimized here, but fully general
1238            // expressions are deferred to get evaluated at runtime.
1239            let function_elements = match &segment.elements {
1240                TableSegmentElements::Functions(indices) => indices,
1241                TableSegmentElements::Expressions(_) => break,
1242            };
1243
1244            let precomputed =
1245                match &mut self.module.table_initialization.initial_values[defined_index] {
1246                    TableInitialValue::Null { precomputed } => precomputed,
1247
1248                    // If this table is still listed as an initial value here
1249                    // then that means the initial size of the table doesn't
1250                    // support a precomputed function list, so skip this.
1251                    // Technically this won't trap so it's possible to process
1252                    // further initializers, but that's left as a future
1253                    // optimization.
1254                    TableInitialValue::Expr(_) => break,
1255                };
1256
1257            // At this point we're committing to pre-initializing the table
1258            // with the `segment` that's being iterated over. This segment is
1259            // applied to the `precomputed` list for the table by ensuring
1260            // it's large enough to hold the segment and then copying the
1261            // segment into the precomputed list.
1262            if precomputed.len() < top as usize {
1263                precomputed.resize(top as usize, FuncIndex::reserved_value());
1264            }
1265            let dst = &mut precomputed[offset as usize..top as usize];
1266            dst.copy_from_slice(&function_elements);
1267
1268            // advance the iterator to see the next segment
1269            let _ = segments.next();
1270        }
1271        self.module.table_initialization.segments = segments.collect();
1272    }
1273}