wasmtime_environ/compile/
module_environ.rs

1use crate::module::{
2    FuncRefIndex, Initializer, MemoryInitialization, MemoryInitializer, Module, TableSegment,
3    TableSegmentElements,
4};
5use crate::prelude::*;
6use crate::{
7    ConstExpr, ConstOp, DataIndex, DefinedFuncIndex, ElemIndex, EngineOrModuleTypeIndex,
8    EntityIndex, EntityType, FuncIndex, GlobalIndex, IndexType, InitMemory, MemoryIndex,
9    ModuleInternedTypeIndex, ModuleTypesBuilder, PrimaryMap, SizeOverflow, StaticMemoryInitializer,
10    TableIndex, TableInitialValue, Tag, TagIndex, Tunables, TypeConvert, TypeIndex, Unsigned,
11    WasmError, WasmHeapTopType, WasmHeapType, WasmResult, WasmValType, WasmparserTypeConverter,
12};
13use anyhow::{bail, Result};
14use cranelift_entity::packed_option::ReservedValue;
15use std::borrow::Cow;
16use std::collections::HashMap;
17use std::mem;
18use std::path::PathBuf;
19use std::sync::Arc;
20use wasmparser::{
21    types::Types, CustomSectionReader, DataKind, ElementItems, ElementKind, Encoding, ExternalKind,
22    FuncToValidate, FunctionBody, KnownCustom, NameSectionReader, Naming, Parser, Payload, TypeRef,
23    Validator, ValidatorResources,
24};
25
26/// Object containing the standalone environment information.
27pub struct ModuleEnvironment<'a, 'data> {
28    /// The current module being translated
29    result: ModuleTranslation<'data>,
30
31    /// Intern'd types for this entire translation, shared by all modules.
32    types: &'a mut ModuleTypesBuilder,
33
34    // Various bits and pieces of configuration
35    validator: &'a mut Validator,
36    tunables: &'a Tunables,
37}
38
39/// The result of translating via `ModuleEnvironment`. Function bodies are not
40/// yet translated, and data initializers have not yet been copied out of the
41/// original buffer.
42#[derive(Default)]
43pub struct ModuleTranslation<'data> {
44    /// Module information.
45    pub module: Module,
46
47    /// The input wasm binary.
48    ///
49    /// This can be useful, for example, when modules are parsed from a
50    /// component and the embedder wants access to the raw wasm modules
51    /// themselves.
52    pub wasm: &'data [u8],
53
54    /// References to the function bodies.
55    pub function_body_inputs: PrimaryMap<DefinedFuncIndex, FunctionBodyData<'data>>,
56
57    /// A list of type signatures which are considered exported from this
58    /// module, or those that can possibly be called. This list is sorted, and
59    /// trampolines for each of these signatures are required.
60    pub exported_signatures: Vec<ModuleInternedTypeIndex>,
61
62    /// DWARF debug information, if enabled, parsed from the module.
63    pub debuginfo: DebugInfoData<'data>,
64
65    /// Set if debuginfo was found but it was not parsed due to `Tunables`
66    /// configuration.
67    pub has_unparsed_debuginfo: bool,
68
69    /// List of data segments found in this module which should be concatenated
70    /// together for the final compiled artifact.
71    ///
72    /// These data segments, when concatenated, are indexed by the
73    /// `MemoryInitializer` type.
74    pub data: Vec<Cow<'data, [u8]>>,
75
76    /// The desired alignment of `data` in the final data section of the object
77    /// file that we'll emit.
78    ///
79    /// Note that this is 1 by default but `MemoryInitialization::Static` might
80    /// switch this to a higher alignment to facilitate mmap-ing data from
81    /// an object file into a linear memory.
82    pub data_align: Option<u64>,
83
84    /// Total size of all data pushed onto `data` so far.
85    total_data: u32,
86
87    /// List of passive element segments found in this module which will get
88    /// concatenated for the final artifact.
89    pub passive_data: Vec<&'data [u8]>,
90
91    /// Total size of all passive data pushed into `passive_data` so far.
92    total_passive_data: u32,
93
94    /// When we're parsing the code section this will be incremented so we know
95    /// which function is currently being defined.
96    code_index: u32,
97
98    /// The type information of the current module made available at the end of the
99    /// validation process.
100    types: Option<Types>,
101}
102
103impl<'data> ModuleTranslation<'data> {
104    /// Returns a reference to the type information of the current module.
105    pub fn get_types(&self) -> &Types {
106        self.types
107            .as_ref()
108            .expect("module type information to be available")
109    }
110}
111
112/// Contains function data: byte code and its offset in the module.
113pub struct FunctionBodyData<'a> {
114    /// The body of the function, containing code and locals.
115    pub body: FunctionBody<'a>,
116    /// Validator for the function body
117    pub validator: FuncToValidate<ValidatorResources>,
118}
119
120#[derive(Debug, Default)]
121#[allow(missing_docs, reason = "self-describing fields")]
122pub struct DebugInfoData<'a> {
123    pub dwarf: Dwarf<'a>,
124    pub name_section: NameSection<'a>,
125    pub wasm_file: WasmFileInfo,
126    pub debug_loc: gimli::DebugLoc<Reader<'a>>,
127    pub debug_loclists: gimli::DebugLocLists<Reader<'a>>,
128    pub debug_ranges: gimli::DebugRanges<Reader<'a>>,
129    pub debug_rnglists: gimli::DebugRngLists<Reader<'a>>,
130    pub debug_cu_index: gimli::DebugCuIndex<Reader<'a>>,
131    pub debug_tu_index: gimli::DebugTuIndex<Reader<'a>>,
132}
133
134#[allow(missing_docs, reason = "self-describing")]
135pub type Dwarf<'input> = gimli::Dwarf<Reader<'input>>;
136
137type Reader<'input> = gimli::EndianSlice<'input, gimli::LittleEndian>;
138
139#[derive(Debug, Default)]
140#[allow(missing_docs, reason = "self-describing fields")]
141pub struct NameSection<'a> {
142    pub module_name: Option<&'a str>,
143    pub func_names: HashMap<FuncIndex, &'a str>,
144    pub locals_names: HashMap<FuncIndex, HashMap<u32, &'a str>>,
145}
146
147#[derive(Debug, Default)]
148#[allow(missing_docs, reason = "self-describing fields")]
149pub struct WasmFileInfo {
150    pub path: Option<PathBuf>,
151    pub code_section_offset: u64,
152    pub imported_func_count: u32,
153    pub funcs: Vec<FunctionMetadata>,
154}
155
156#[derive(Debug)]
157#[allow(missing_docs, reason = "self-describing fields")]
158pub struct FunctionMetadata {
159    pub params: Box<[WasmValType]>,
160    pub locals: Box<[(u32, WasmValType)]>,
161}
162
163impl<'a, 'data> ModuleEnvironment<'a, 'data> {
164    /// Allocates the environment data structures.
165    pub fn new(
166        tunables: &'a Tunables,
167        validator: &'a mut Validator,
168        types: &'a mut ModuleTypesBuilder,
169    ) -> Self {
170        Self {
171            result: ModuleTranslation::default(),
172            types,
173            tunables,
174            validator,
175        }
176    }
177
178    /// Translate a wasm module using this environment.
179    ///
180    /// This function will translate the `data` provided with `parser`,
181    /// validating everything along the way with this environment's validator.
182    ///
183    /// The result of translation, [`ModuleTranslation`], contains everything
184    /// necessary to compile functions afterwards as well as learn type
185    /// information about the module at runtime.
186    pub fn translate(
187        mut self,
188        parser: Parser,
189        data: &'data [u8],
190    ) -> Result<ModuleTranslation<'data>> {
191        self.result.wasm = data;
192
193        for payload in parser.parse_all(data) {
194            self.translate_payload(payload?)?;
195        }
196
197        Ok(self.result)
198    }
199
200    fn translate_payload(&mut self, payload: Payload<'data>) -> Result<()> {
201        match payload {
202            Payload::Version {
203                num,
204                encoding,
205                range,
206            } => {
207                self.validator.version(num, encoding, &range)?;
208                match encoding {
209                    Encoding::Module => {}
210                    Encoding::Component => {
211                        bail!("expected a WebAssembly module but was given a WebAssembly component")
212                    }
213                }
214            }
215
216            Payload::End(offset) => {
217                self.result.types = Some(self.validator.end(offset)?);
218
219                // With the `escaped_funcs` set of functions finished
220                // we can calculate the set of signatures that are exported as
221                // the set of exported functions' signatures.
222                self.result.exported_signatures = self
223                    .result
224                    .module
225                    .functions
226                    .iter()
227                    .filter_map(|(_, func)| {
228                        if func.is_escaping() {
229                            Some(func.signature.unwrap_module_type_index())
230                        } else {
231                            None
232                        }
233                    })
234                    .collect();
235                self.result.exported_signatures.sort_unstable();
236                self.result.exported_signatures.dedup();
237            }
238
239            Payload::TypeSection(types) => {
240                self.validator.type_section(&types)?;
241
242                let count = self.validator.types(0).unwrap().core_type_count_in_module();
243                log::trace!("interning {count} Wasm types");
244
245                let capacity = usize::try_from(count).unwrap();
246                self.result.module.types.reserve(capacity);
247                self.types.reserve_wasm_signatures(capacity);
248
249                // Iterate over each *rec group* -- not type -- defined in the
250                // types section. Rec groups are the unit of canonicalization
251                // and therefore the unit at which we need to process at a
252                // time. `wasmparser` has already done the hard work of
253                // de-duplicating and canonicalizing the rec groups within the
254                // module for us, we just need to translate them into our data
255                // structures. Note that, if the Wasm defines duplicate rec
256                // groups, we need copy the duplicates over (shallowly) as well,
257                // so that our types index space doesn't have holes.
258                let mut type_index = 0;
259                while type_index < count {
260                    let validator_types = self.validator.types(0).unwrap();
261
262                    // Get the rec group for the current type index, which is
263                    // always the first type defined in a rec group.
264                    log::trace!("looking up wasmparser type for index {type_index}");
265                    let core_type_id = validator_types.core_type_at_in_module(type_index);
266                    log::trace!(
267                        "  --> {core_type_id:?} = {:?}",
268                        validator_types[core_type_id],
269                    );
270                    let rec_group_id = validator_types.rec_group_id_of(core_type_id);
271                    debug_assert_eq!(
272                        validator_types
273                            .rec_group_elements(rec_group_id)
274                            .position(|id| id == core_type_id),
275                        Some(0)
276                    );
277
278                    // Intern the rec group and then fill in this module's types
279                    // index space.
280                    let interned = self.types.intern_rec_group(validator_types, rec_group_id)?;
281                    let elems = self.types.rec_group_elements(interned);
282                    let len = elems.len();
283                    self.result.module.types.reserve(len);
284                    for ty in elems {
285                        self.result.module.types.push(ty.into());
286                    }
287
288                    // Advance `type_index` to the start of the next rec group.
289                    type_index += u32::try_from(len).unwrap();
290                }
291            }
292
293            Payload::ImportSection(imports) => {
294                self.validator.import_section(&imports)?;
295
296                let cnt = usize::try_from(imports.count()).unwrap();
297                self.result.module.initializers.reserve(cnt);
298
299                for entry in imports {
300                    let import = entry?;
301                    let ty = match import.ty {
302                        TypeRef::Func(index) => {
303                            let index = TypeIndex::from_u32(index);
304                            let interned_index = self.result.module.types[index];
305                            self.result.module.num_imported_funcs += 1;
306                            self.result.debuginfo.wasm_file.imported_func_count += 1;
307                            EntityType::Function(interned_index)
308                        }
309                        TypeRef::Memory(ty) => {
310                            self.result.module.num_imported_memories += 1;
311                            EntityType::Memory(ty.into())
312                        }
313                        TypeRef::Global(ty) => {
314                            self.result.module.num_imported_globals += 1;
315                            EntityType::Global(self.convert_global_type(&ty))
316                        }
317                        TypeRef::Table(ty) => {
318                            self.result.module.num_imported_tables += 1;
319                            EntityType::Table(self.convert_table_type(&ty)?)
320                        }
321                        TypeRef::Tag(ty) => {
322                            let index = TypeIndex::from_u32(ty.func_type_idx);
323                            let signature = self.result.module.types[index];
324                            let tag = Tag { signature };
325                            self.result.module.num_imported_tags += 1;
326                            EntityType::Tag(tag)
327                        }
328                    };
329                    self.declare_import(import.module, import.name, ty);
330                }
331            }
332
333            Payload::FunctionSection(functions) => {
334                self.validator.function_section(&functions)?;
335
336                let cnt = usize::try_from(functions.count()).unwrap();
337                self.result.module.functions.reserve_exact(cnt);
338
339                for entry in functions {
340                    let sigindex = entry?;
341                    let ty = TypeIndex::from_u32(sigindex);
342                    let interned_index = self.result.module.types[ty];
343                    self.result.module.push_function(interned_index);
344                }
345            }
346
347            Payload::TableSection(tables) => {
348                self.validator.table_section(&tables)?;
349                let cnt = usize::try_from(tables.count()).unwrap();
350                self.result.module.tables.reserve_exact(cnt);
351
352                for entry in tables {
353                    let wasmparser::Table { ty, init } = entry?;
354                    let table = self.convert_table_type(&ty)?;
355                    self.result.module.tables.push(table);
356                    let init = match init {
357                        wasmparser::TableInit::RefNull => TableInitialValue::Null {
358                            precomputed: Vec::new(),
359                        },
360                        wasmparser::TableInit::Expr(expr) => {
361                            let (init, escaped) = ConstExpr::from_wasmparser(expr)?;
362                            for f in escaped {
363                                self.flag_func_escaped(f);
364                            }
365                            TableInitialValue::Expr(init)
366                        }
367                    };
368                    self.result
369                        .module
370                        .table_initialization
371                        .initial_values
372                        .push(init);
373                }
374            }
375
376            Payload::MemorySection(memories) => {
377                self.validator.memory_section(&memories)?;
378
379                let cnt = usize::try_from(memories.count()).unwrap();
380                self.result.module.memories.reserve_exact(cnt);
381
382                for entry in memories {
383                    let memory = entry?;
384                    self.result.module.memories.push(memory.into());
385                }
386            }
387
388            Payload::TagSection(tags) => {
389                self.validator.tag_section(&tags)?;
390
391                for entry in tags {
392                    let sigindex = entry?.func_type_idx;
393                    let ty = TypeIndex::from_u32(sigindex);
394                    let interned_index = self.result.module.types[ty];
395                    self.result.module.push_tag(interned_index);
396                }
397            }
398
399            Payload::GlobalSection(globals) => {
400                self.validator.global_section(&globals)?;
401
402                let cnt = usize::try_from(globals.count()).unwrap();
403                self.result.module.globals.reserve_exact(cnt);
404
405                for entry in globals {
406                    let wasmparser::Global { ty, init_expr } = entry?;
407                    let (initializer, escaped) = ConstExpr::from_wasmparser(init_expr)?;
408                    for f in escaped {
409                        self.flag_func_escaped(f);
410                    }
411                    let ty = self.convert_global_type(&ty);
412                    self.result.module.globals.push(ty);
413                    self.result.module.global_initializers.push(initializer);
414                }
415            }
416
417            Payload::ExportSection(exports) => {
418                self.validator.export_section(&exports)?;
419
420                let cnt = usize::try_from(exports.count()).unwrap();
421                self.result.module.exports.reserve(cnt);
422
423                for entry in exports {
424                    let wasmparser::Export { name, kind, index } = entry?;
425                    let entity = match kind {
426                        ExternalKind::Func => {
427                            let index = FuncIndex::from_u32(index);
428                            self.flag_func_escaped(index);
429                            EntityIndex::Function(index)
430                        }
431                        ExternalKind::Table => EntityIndex::Table(TableIndex::from_u32(index)),
432                        ExternalKind::Memory => EntityIndex::Memory(MemoryIndex::from_u32(index)),
433                        ExternalKind::Global => EntityIndex::Global(GlobalIndex::from_u32(index)),
434                        ExternalKind::Tag => EntityIndex::Tag(TagIndex::from_u32(index)),
435                    };
436                    self.result
437                        .module
438                        .exports
439                        .insert(String::from(name), entity);
440                }
441            }
442
443            Payload::StartSection { func, range } => {
444                self.validator.start_section(func, &range)?;
445
446                let func_index = FuncIndex::from_u32(func);
447                self.flag_func_escaped(func_index);
448                debug_assert!(self.result.module.start_func.is_none());
449                self.result.module.start_func = Some(func_index);
450            }
451
452            Payload::ElementSection(elements) => {
453                self.validator.element_section(&elements)?;
454
455                for (index, entry) in elements.into_iter().enumerate() {
456                    let wasmparser::Element {
457                        kind,
458                        items,
459                        range: _,
460                    } = entry?;
461
462                    // Build up a list of `FuncIndex` corresponding to all the
463                    // entries listed in this segment. Note that it's not
464                    // possible to create anything other than a `ref.null
465                    // extern` for externref segments, so those just get
466                    // translated to the reserved value of `FuncIndex`.
467                    let elements = match items {
468                        ElementItems::Functions(funcs) => {
469                            let mut elems =
470                                Vec::with_capacity(usize::try_from(funcs.count()).unwrap());
471                            for func in funcs {
472                                let func = FuncIndex::from_u32(func?);
473                                self.flag_func_escaped(func);
474                                elems.push(func);
475                            }
476                            TableSegmentElements::Functions(elems.into())
477                        }
478                        ElementItems::Expressions(_ty, items) => {
479                            let mut exprs =
480                                Vec::with_capacity(usize::try_from(items.count()).unwrap());
481                            for expr in items {
482                                let (expr, escaped) = ConstExpr::from_wasmparser(expr?)?;
483                                exprs.push(expr);
484                                for func in escaped {
485                                    self.flag_func_escaped(func);
486                                }
487                            }
488                            TableSegmentElements::Expressions(exprs.into())
489                        }
490                    };
491
492                    match kind {
493                        ElementKind::Active {
494                            table_index,
495                            offset_expr,
496                        } => {
497                            let table_index = TableIndex::from_u32(table_index.unwrap_or(0));
498                            let (offset, escaped) = ConstExpr::from_wasmparser(offset_expr)?;
499                            debug_assert!(escaped.is_empty());
500
501                            self.result
502                                .module
503                                .table_initialization
504                                .segments
505                                .push(TableSegment {
506                                    table_index,
507                                    offset,
508                                    elements: elements.into(),
509                                });
510                        }
511
512                        ElementKind::Passive => {
513                            let elem_index = ElemIndex::from_u32(index as u32);
514                            let index = self.result.module.passive_elements.len();
515                            self.result.module.passive_elements.push(elements.into());
516                            self.result
517                                .module
518                                .passive_elements_map
519                                .insert(elem_index, index);
520                        }
521
522                        ElementKind::Declared => {}
523                    }
524                }
525            }
526
527            Payload::CodeSectionStart { count, range, .. } => {
528                self.validator.code_section_start(count, &range)?;
529                let cnt = usize::try_from(count).unwrap();
530                self.result.function_body_inputs.reserve_exact(cnt);
531                self.result.debuginfo.wasm_file.code_section_offset = range.start as u64;
532            }
533
534            Payload::CodeSectionEntry(body) => {
535                let validator = self.validator.code_section_entry(&body)?;
536                let func_index =
537                    self.result.code_index + self.result.module.num_imported_funcs as u32;
538                let func_index = FuncIndex::from_u32(func_index);
539
540                if self.tunables.generate_native_debuginfo {
541                    let sig_index = self.result.module.functions[func_index]
542                        .signature
543                        .unwrap_module_type_index();
544                    let sig = self.types[sig_index].unwrap_func();
545                    let mut locals = Vec::new();
546                    for pair in body.get_locals_reader()? {
547                        let (cnt, ty) = pair?;
548                        let ty = self.convert_valtype(ty);
549                        locals.push((cnt, ty));
550                    }
551                    self.result
552                        .debuginfo
553                        .wasm_file
554                        .funcs
555                        .push(FunctionMetadata {
556                            locals: locals.into_boxed_slice(),
557                            params: sig.params().into(),
558                        });
559                }
560                self.result
561                    .function_body_inputs
562                    .push(FunctionBodyData { validator, body });
563                self.result.code_index += 1;
564            }
565
566            Payload::DataSection(data) => {
567                self.validator.data_section(&data)?;
568
569                let initializers = match &mut self.result.module.memory_initialization {
570                    MemoryInitialization::Segmented(i) => i,
571                    _ => unreachable!(),
572                };
573
574                let cnt = usize::try_from(data.count()).unwrap();
575                initializers.reserve_exact(cnt);
576                self.result.data.reserve_exact(cnt);
577
578                for (index, entry) in data.into_iter().enumerate() {
579                    let wasmparser::Data {
580                        kind,
581                        data,
582                        range: _,
583                    } = entry?;
584                    let mk_range = |total: &mut u32| -> Result<_, WasmError> {
585                        let range = u32::try_from(data.len())
586                            .ok()
587                            .and_then(|size| {
588                                let start = *total;
589                                let end = start.checked_add(size)?;
590                                Some(start..end)
591                            })
592                            .ok_or_else(|| {
593                                WasmError::Unsupported(format!(
594                                    "more than 4 gigabytes of data in wasm module",
595                                ))
596                            })?;
597                        *total += range.end - range.start;
598                        Ok(range)
599                    };
600                    match kind {
601                        DataKind::Active {
602                            memory_index,
603                            offset_expr,
604                        } => {
605                            let range = mk_range(&mut self.result.total_data)?;
606                            let memory_index = MemoryIndex::from_u32(memory_index);
607                            let (offset, escaped) = ConstExpr::from_wasmparser(offset_expr)?;
608                            debug_assert!(escaped.is_empty());
609
610                            initializers.push(MemoryInitializer {
611                                memory_index,
612                                offset,
613                                data: range,
614                            });
615                            self.result.data.push(data.into());
616                        }
617                        DataKind::Passive => {
618                            let data_index = DataIndex::from_u32(index as u32);
619                            let range = mk_range(&mut self.result.total_passive_data)?;
620                            self.result.passive_data.push(data);
621                            self.result
622                                .module
623                                .passive_data_map
624                                .insert(data_index, range);
625                        }
626                    }
627                }
628            }
629
630            Payload::DataCountSection { count, range } => {
631                self.validator.data_count_section(count, &range)?;
632
633                // Note: the count passed in here is the *total* segment count
634                // There is no way to reserve for just the passive segments as
635                // they are discovered when iterating the data section entries
636                // Given that the total segment count might be much larger than
637                // the passive count, do not reserve anything here.
638            }
639
640            Payload::CustomSection(s)
641                if s.name() == "webidl-bindings" || s.name() == "wasm-interface-types" =>
642            {
643                bail!(
644                    "\
645Support for interface types has temporarily been removed from `wasmtime`.
646
647For more information about this temporary change you can read on the issue online:
648
649    https://github.com/bytecodealliance/wasmtime/issues/1271
650
651and for re-adding support for interface types you can see this issue:
652
653    https://github.com/bytecodealliance/wasmtime/issues/677
654"
655                )
656            }
657
658            Payload::CustomSection(s) => {
659                self.register_custom_section(&s);
660            }
661
662            // It's expected that validation will probably reject other
663            // payloads such as `UnknownSection` or those related to the
664            // component model. If, however, something gets past validation then
665            // that's a bug in Wasmtime as we forgot to implement something.
666            other => {
667                self.validator.payload(&other)?;
668                panic!("unimplemented section in wasm file {other:?}");
669            }
670        }
671        Ok(())
672    }
673
674    fn register_custom_section(&mut self, section: &CustomSectionReader<'data>) {
675        match section.as_known() {
676            KnownCustom::Name(name) => {
677                let result = self.name_section(name);
678                if let Err(e) = result {
679                    log::warn!("failed to parse name section {:?}", e);
680                }
681            }
682            _ => {
683                let name = section.name().trim_end_matches(".dwo");
684                if name.starts_with(".debug_") {
685                    self.dwarf_section(name, section);
686                }
687            }
688        }
689    }
690
691    fn dwarf_section(&mut self, name: &str, section: &CustomSectionReader<'data>) {
692        if !self.tunables.generate_native_debuginfo && !self.tunables.parse_wasm_debuginfo {
693            self.result.has_unparsed_debuginfo = true;
694            return;
695        }
696        let info = &mut self.result.debuginfo;
697        let dwarf = &mut info.dwarf;
698        let endian = gimli::LittleEndian;
699        let data = section.data();
700        let slice = gimli::EndianSlice::new(data, endian);
701
702        match name {
703            // `gimli::Dwarf` fields.
704            ".debug_abbrev" => dwarf.debug_abbrev = gimli::DebugAbbrev::new(data, endian),
705            ".debug_addr" => dwarf.debug_addr = gimli::DebugAddr::from(slice),
706            ".debug_info" => {
707                dwarf.debug_info = gimli::DebugInfo::new(data, endian);
708            }
709            ".debug_line" => dwarf.debug_line = gimli::DebugLine::new(data, endian),
710            ".debug_line_str" => dwarf.debug_line_str = gimli::DebugLineStr::from(slice),
711            ".debug_str" => dwarf.debug_str = gimli::DebugStr::new(data, endian),
712            ".debug_str_offsets" => dwarf.debug_str_offsets = gimli::DebugStrOffsets::from(slice),
713            ".debug_str_sup" => {
714                let mut dwarf_sup: Dwarf<'data> = Default::default();
715                dwarf_sup.debug_str = gimli::DebugStr::from(slice);
716                dwarf.sup = Some(Arc::new(dwarf_sup));
717            }
718            ".debug_types" => dwarf.debug_types = gimli::DebugTypes::from(slice),
719
720            // Additional fields.
721            ".debug_loc" => info.debug_loc = gimli::DebugLoc::from(slice),
722            ".debug_loclists" => info.debug_loclists = gimli::DebugLocLists::from(slice),
723            ".debug_ranges" => info.debug_ranges = gimli::DebugRanges::new(data, endian),
724            ".debug_rnglists" => info.debug_rnglists = gimli::DebugRngLists::new(data, endian),
725
726            // DWARF package fields
727            ".debug_cu_index" => info.debug_cu_index = gimli::DebugCuIndex::new(data, endian),
728            ".debug_tu_index" => info.debug_tu_index = gimli::DebugTuIndex::new(data, endian),
729
730            // We don't use these at the moment.
731            ".debug_aranges" | ".debug_pubnames" | ".debug_pubtypes" => return,
732            other => {
733                log::warn!("unknown debug section `{}`", other);
734                return;
735            }
736        }
737
738        dwarf.ranges = gimli::RangeLists::new(info.debug_ranges, info.debug_rnglists);
739        dwarf.locations = gimli::LocationLists::new(info.debug_loc, info.debug_loclists);
740    }
741
742    /// Declares a new import with the `module` and `field` names, importing the
743    /// `ty` specified.
744    ///
745    /// Note that this method is somewhat tricky due to the implementation of
746    /// the module linking proposal. In the module linking proposal two-level
747    /// imports are recast as single-level imports of instances. That recasting
748    /// happens here by recording an import of an instance for the first time
749    /// we see a two-level import.
750    ///
751    /// When the module linking proposal is disabled, however, disregard this
752    /// logic and instead work directly with two-level imports since no
753    /// instances are defined.
754    fn declare_import(&mut self, module: &'data str, field: &'data str, ty: EntityType) {
755        let index = self.push_type(ty);
756        self.result.module.initializers.push(Initializer::Import {
757            name: module.to_owned(),
758            field: field.to_owned(),
759            index,
760        });
761    }
762
763    fn push_type(&mut self, ty: EntityType) -> EntityIndex {
764        match ty {
765            EntityType::Function(ty) => EntityIndex::Function({
766                let func_index = self
767                    .result
768                    .module
769                    .push_function(ty.unwrap_module_type_index());
770                // Imported functions can escape; in fact, they've already done
771                // so to get here.
772                self.flag_func_escaped(func_index);
773                func_index
774            }),
775            EntityType::Table(ty) => EntityIndex::Table(self.result.module.tables.push(ty)),
776            EntityType::Memory(ty) => EntityIndex::Memory(self.result.module.memories.push(ty)),
777            EntityType::Global(ty) => EntityIndex::Global(self.result.module.globals.push(ty)),
778            EntityType::Tag(ty) => EntityIndex::Tag(self.result.module.tags.push(ty)),
779        }
780    }
781
782    fn flag_func_escaped(&mut self, func: FuncIndex) {
783        let ty = &mut self.result.module.functions[func];
784        // If this was already assigned a funcref index no need to re-assign it.
785        if ty.is_escaping() {
786            return;
787        }
788        let index = self.result.module.num_escaped_funcs as u32;
789        ty.func_ref = FuncRefIndex::from_u32(index);
790        self.result.module.num_escaped_funcs += 1;
791    }
792
793    /// Parses the Name section of the wasm module.
794    fn name_section(&mut self, names: NameSectionReader<'data>) -> WasmResult<()> {
795        for subsection in names {
796            match subsection? {
797                wasmparser::Name::Function(names) => {
798                    for name in names {
799                        let Naming { index, name } = name?;
800                        // Skip this naming if it's naming a function that
801                        // doesn't actually exist.
802                        if (index as usize) >= self.result.module.functions.len() {
803                            continue;
804                        }
805
806                        // Store the name unconditionally, regardless of
807                        // whether we're parsing debuginfo, since function
808                        // names are almost always present in the
809                        // final compilation artifact.
810                        let index = FuncIndex::from_u32(index);
811                        self.result
812                            .debuginfo
813                            .name_section
814                            .func_names
815                            .insert(index, name);
816                    }
817                }
818                wasmparser::Name::Module { name, .. } => {
819                    self.result.module.name = Some(name.to_string());
820                    if self.tunables.generate_native_debuginfo {
821                        self.result.debuginfo.name_section.module_name = Some(name);
822                    }
823                }
824                wasmparser::Name::Local(reader) => {
825                    if !self.tunables.generate_native_debuginfo {
826                        continue;
827                    }
828                    for f in reader {
829                        let f = f?;
830                        // Skip this naming if it's naming a function that
831                        // doesn't actually exist.
832                        if (f.index as usize) >= self.result.module.functions.len() {
833                            continue;
834                        }
835                        for name in f.names {
836                            let Naming { index, name } = name?;
837
838                            self.result
839                                .debuginfo
840                                .name_section
841                                .locals_names
842                                .entry(FuncIndex::from_u32(f.index))
843                                .or_insert(HashMap::new())
844                                .insert(index, name);
845                        }
846                    }
847                }
848                wasmparser::Name::Label(_)
849                | wasmparser::Name::Type(_)
850                | wasmparser::Name::Table(_)
851                | wasmparser::Name::Global(_)
852                | wasmparser::Name::Memory(_)
853                | wasmparser::Name::Element(_)
854                | wasmparser::Name::Data(_)
855                | wasmparser::Name::Tag(_)
856                | wasmparser::Name::Field(_)
857                | wasmparser::Name::Unknown { .. } => {}
858            }
859        }
860        Ok(())
861    }
862}
863
864impl TypeConvert for ModuleEnvironment<'_, '_> {
865    fn lookup_heap_type(&self, index: wasmparser::UnpackedIndex) -> WasmHeapType {
866        WasmparserTypeConverter::new(&self.types, |idx| {
867            self.result.module.types[idx].unwrap_module_type_index()
868        })
869        .lookup_heap_type(index)
870    }
871
872    fn lookup_type_index(&self, index: wasmparser::UnpackedIndex) -> EngineOrModuleTypeIndex {
873        WasmparserTypeConverter::new(&self.types, |idx| {
874            self.result.module.types[idx].unwrap_module_type_index()
875        })
876        .lookup_type_index(index)
877    }
878}
879
880impl ModuleTranslation<'_> {
881    /// Attempts to convert segmented memory initialization into static
882    /// initialization for the module that this translation represents.
883    ///
884    /// If this module's memory initialization is not compatible with paged
885    /// initialization then this won't change anything. Otherwise if it is
886    /// compatible then the `memory_initialization` field will be updated.
887    ///
888    /// Takes a `page_size` argument in order to ensure that all
889    /// initialization is page-aligned for mmap-ability, and
890    /// `max_image_size_always_allowed` to control how we decide
891    /// whether to use static init.
892    ///
893    /// We will try to avoid generating very sparse images, which are
894    /// possible if e.g. a module has an initializer at offset 0 and a
895    /// very high offset (say, 1 GiB). To avoid this, we use a dual
896    /// condition: we always allow images less than
897    /// `max_image_size_always_allowed`, and the embedder of Wasmtime
898    /// can set this if desired to ensure that static init should
899    /// always be done if the size of the module or its heaps is
900    /// otherwise bounded by the system. We also allow images with
901    /// static init data bigger than that, but only if it is "dense",
902    /// defined as having at least half (50%) of its pages with some
903    /// data.
904    ///
905    /// We could do something slightly better by building a dense part
906    /// and keeping a sparse list of outlier/leftover segments (see
907    /// issue #3820). This would also allow mostly-static init of
908    /// modules that have some dynamically-placed data segments. But,
909    /// for now, this is sufficient to allow a system that "knows what
910    /// it's doing" to always get static init.
911    pub fn try_static_init(&mut self, page_size: u64, max_image_size_always_allowed: u64) {
912        // This method only attempts to transform a `Segmented` memory init
913        // into a `Static` one, no other state.
914        if !self.module.memory_initialization.is_segmented() {
915            return;
916        }
917
918        // First a dry run of memory initialization is performed. This
919        // collects information about the extent of memory initialized for each
920        // memory as well as the size of all data segments being copied in.
921        struct Memory {
922            data_size: u64,
923            min_addr: u64,
924            max_addr: u64,
925            // The `usize` here is a pointer into `self.data` which is the list
926            // of data segments corresponding to what was found in the original
927            // wasm module.
928            segments: Vec<(usize, StaticMemoryInitializer)>,
929        }
930        let mut info = PrimaryMap::with_capacity(self.module.memories.len());
931        for _ in 0..self.module.memories.len() {
932            info.push(Memory {
933                data_size: 0,
934                min_addr: u64::MAX,
935                max_addr: 0,
936                segments: Vec::new(),
937            });
938        }
939
940        struct InitMemoryAtCompileTime<'a> {
941            module: &'a Module,
942            info: &'a mut PrimaryMap<MemoryIndex, Memory>,
943            idx: usize,
944        }
945        impl InitMemory for InitMemoryAtCompileTime<'_> {
946            fn memory_size_in_bytes(
947                &mut self,
948                memory_index: MemoryIndex,
949            ) -> Result<u64, SizeOverflow> {
950                self.module.memories[memory_index].minimum_byte_size()
951            }
952
953            fn eval_offset(&mut self, memory_index: MemoryIndex, expr: &ConstExpr) -> Option<u64> {
954                match (expr.ops(), self.module.memories[memory_index].idx_type) {
955                    (&[ConstOp::I32Const(offset)], IndexType::I32) => {
956                        Some(offset.unsigned().into())
957                    }
958                    (&[ConstOp::I64Const(offset)], IndexType::I64) => Some(offset.unsigned()),
959                    _ => None,
960                }
961            }
962
963            fn write(&mut self, memory: MemoryIndex, init: &StaticMemoryInitializer) -> bool {
964                // Currently `Static` only applies to locally-defined memories,
965                // so if a data segment references an imported memory then
966                // transitioning to a `Static` memory initializer is not
967                // possible.
968                if self.module.defined_memory_index(memory).is_none() {
969                    return false;
970                };
971                let info = &mut self.info[memory];
972                let data_len = u64::from(init.data.end - init.data.start);
973                if data_len > 0 {
974                    info.data_size += data_len;
975                    info.min_addr = info.min_addr.min(init.offset);
976                    info.max_addr = info.max_addr.max(init.offset + data_len);
977                    info.segments.push((self.idx, init.clone()));
978                }
979                self.idx += 1;
980                true
981            }
982        }
983        let ok = self
984            .module
985            .memory_initialization
986            .init_memory(&mut InitMemoryAtCompileTime {
987                idx: 0,
988                module: &self.module,
989                info: &mut info,
990            });
991        if !ok {
992            return;
993        }
994
995        // Validate that the memory information collected is indeed valid for
996        // static memory initialization.
997        for (i, info) in info.iter().filter(|(_, info)| info.data_size > 0) {
998            let image_size = info.max_addr - info.min_addr;
999
1000            // Simplify things for now by bailing out entirely if any memory has
1001            // a page size smaller than the host's page size. This fixes a case
1002            // where currently initializers are created in host-page-size units
1003            // of length which means that a larger-than-the-entire-memory
1004            // initializer can be created. This can be handled technically but
1005            // would require some more changes to help fix the assert elsewhere
1006            // that this protects against.
1007            if self.module.memories[i].page_size() < page_size {
1008                return;
1009            }
1010
1011            // If the range of memory being initialized is less than twice the
1012            // total size of the data itself then it's assumed that static
1013            // initialization is ok. This means we'll at most double memory
1014            // consumption during the memory image creation process, which is
1015            // currently assumed to "probably be ok" but this will likely need
1016            // tweaks over time.
1017            if image_size < info.data_size.saturating_mul(2) {
1018                continue;
1019            }
1020
1021            // If the memory initialization image is larger than the size of all
1022            // data, then we still allow memory initialization if the image will
1023            // be of a relatively modest size, such as 1MB here.
1024            if image_size < max_image_size_always_allowed {
1025                continue;
1026            }
1027
1028            // At this point memory initialization is concluded to be too
1029            // expensive to do at compile time so it's entirely deferred to
1030            // happen at runtime.
1031            return;
1032        }
1033
1034        // Here's where we've now committed to changing to static memory. The
1035        // memory initialization image is built here from the page data and then
1036        // it's converted to a single initializer.
1037        let data = mem::replace(&mut self.data, Vec::new());
1038        let mut map = PrimaryMap::with_capacity(info.len());
1039        let mut module_data_size = 0u32;
1040        for (memory, info) in info.iter() {
1041            // Create the in-memory `image` which is the initialized contents of
1042            // this linear memory.
1043            let extent = if info.segments.len() > 0 {
1044                (info.max_addr - info.min_addr) as usize
1045            } else {
1046                0
1047            };
1048            let mut image = Vec::with_capacity(extent);
1049            for (idx, init) in info.segments.iter() {
1050                let data = &data[*idx];
1051                assert_eq!(data.len(), init.data.len());
1052                let offset = usize::try_from(init.offset - info.min_addr).unwrap();
1053                if image.len() < offset {
1054                    image.resize(offset, 0u8);
1055                    image.extend_from_slice(data);
1056                } else {
1057                    image.splice(
1058                        offset..(offset + data.len()).min(image.len()),
1059                        data.iter().copied(),
1060                    );
1061                }
1062            }
1063            assert_eq!(image.len(), extent);
1064            assert_eq!(image.capacity(), extent);
1065            let mut offset = if info.segments.len() > 0 {
1066                info.min_addr
1067            } else {
1068                0
1069            };
1070
1071            // Chop off trailing zeros from the image as memory is already
1072            // zero-initialized. Note that `i` is the position of a nonzero
1073            // entry here, so to not lose it we truncate to `i + 1`.
1074            if let Some(i) = image.iter().rposition(|i| *i != 0) {
1075                image.truncate(i + 1);
1076            }
1077
1078            // Also chop off leading zeros, if any.
1079            if let Some(i) = image.iter().position(|i| *i != 0) {
1080                offset += i as u64;
1081                image.drain(..i);
1082            }
1083            let mut len = u64::try_from(image.len()).unwrap();
1084
1085            // The goal is to enable mapping this image directly into memory, so
1086            // the offset into linear memory must be a multiple of the page
1087            // size. If that's not already the case then the image is padded at
1088            // the front and back with extra zeros as necessary
1089            if offset % page_size != 0 {
1090                let zero_padding = offset % page_size;
1091                self.data.push(vec![0; zero_padding as usize].into());
1092                offset -= zero_padding;
1093                len += zero_padding;
1094            }
1095            self.data.push(image.into());
1096            if len % page_size != 0 {
1097                let zero_padding = page_size - (len % page_size);
1098                self.data.push(vec![0; zero_padding as usize].into());
1099                len += zero_padding;
1100            }
1101
1102            // Offset/length should now always be page-aligned.
1103            assert!(offset % page_size == 0);
1104            assert!(len % page_size == 0);
1105
1106            // Create the `StaticMemoryInitializer` which describes this image,
1107            // only needed if the image is actually present and has a nonzero
1108            // length. The `offset` has been calculates above, originally
1109            // sourced from `info.min_addr`. The `data` field is the extent
1110            // within the final data segment we'll emit to an ELF image, which
1111            // is the concatenation of `self.data`, so here it's the size of
1112            // the section-so-far plus the current segment we're appending.
1113            let len = u32::try_from(len).unwrap();
1114            let init = if len > 0 {
1115                Some(StaticMemoryInitializer {
1116                    offset,
1117                    data: module_data_size..module_data_size + len,
1118                })
1119            } else {
1120                None
1121            };
1122            let idx = map.push(init);
1123            assert_eq!(idx, memory);
1124            module_data_size += len;
1125        }
1126        self.data_align = Some(page_size);
1127        self.module.memory_initialization = MemoryInitialization::Static { map };
1128    }
1129
1130    /// Attempts to convert the module's table initializers to
1131    /// FuncTable form where possible. This enables lazy table
1132    /// initialization later by providing a one-to-one map of initial
1133    /// table values, without having to parse all segments.
1134    pub fn try_func_table_init(&mut self) {
1135        // This should be large enough to support very large Wasm
1136        // modules with huge funcref tables, but small enough to avoid
1137        // OOMs or DoS on truly sparse tables.
1138        const MAX_FUNC_TABLE_SIZE: u64 = 1024 * 1024;
1139
1140        // First convert any element-initialized tables to images of just that
1141        // single function if the minimum size of the table allows doing so.
1142        for ((_, init), (_, table)) in self
1143            .module
1144            .table_initialization
1145            .initial_values
1146            .iter_mut()
1147            .zip(
1148                self.module
1149                    .tables
1150                    .iter()
1151                    .skip(self.module.num_imported_tables),
1152            )
1153        {
1154            let table_size = table.limits.min;
1155            if table_size > MAX_FUNC_TABLE_SIZE {
1156                continue;
1157            }
1158            if let TableInitialValue::Expr(expr) = init {
1159                if let [ConstOp::RefFunc(f)] = expr.ops() {
1160                    *init = TableInitialValue::Null {
1161                        precomputed: vec![*f; table_size as usize],
1162                    };
1163                }
1164            }
1165        }
1166
1167        let mut segments = mem::take(&mut self.module.table_initialization.segments)
1168            .into_iter()
1169            .peekable();
1170
1171        // The goal of this loop is to interpret a table segment and apply it
1172        // "statically" to a local table. This will iterate over segments and
1173        // apply them one-by-one to each table.
1174        //
1175        // If any segment can't be applied, however, then this loop exits and
1176        // all remaining segments are placed back into the segment list. This is
1177        // because segments are supposed to be initialized one-at-a-time which
1178        // means that intermediate state is visible with respect to traps. If
1179        // anything isn't statically known to not trap it's pessimistically
1180        // assumed to trap meaning all further segment initializers must be
1181        // applied manually at instantiation time.
1182        while let Some(segment) = segments.peek() {
1183            let defined_index = match self.module.defined_table_index(segment.table_index) {
1184                Some(index) => index,
1185                // Skip imported tables: we can't provide a preconstructed
1186                // table for them, because their values depend on the
1187                // imported table overlaid with whatever segments we have.
1188                None => break,
1189            };
1190
1191            // If the base of this segment is dynamic, then we can't
1192            // include it in the statically-built array of initial
1193            // contents.
1194            let offset = match segment.offset.ops() {
1195                &[ConstOp::I32Const(offset)] => u64::from(offset.unsigned()),
1196                &[ConstOp::I64Const(offset)] => offset.unsigned(),
1197                _ => break,
1198            };
1199
1200            // Get the end of this segment. If out-of-bounds, or too
1201            // large for our dense table representation, then skip the
1202            // segment.
1203            let top = match offset.checked_add(segment.elements.len()) {
1204                Some(top) => top,
1205                None => break,
1206            };
1207            let table_size = self.module.tables[segment.table_index].limits.min;
1208            if top > table_size || top > MAX_FUNC_TABLE_SIZE {
1209                break;
1210            }
1211
1212            match self.module.tables[segment.table_index]
1213                .ref_type
1214                .heap_type
1215                .top()
1216            {
1217                WasmHeapTopType::Func => {}
1218                // If this is not a funcref table, then we can't support a
1219                // pre-computed table of function indices. Technically this
1220                // initializer won't trap so we could continue processing
1221                // segments, but that's left as a future optimization if
1222                // necessary.
1223                WasmHeapTopType::Any | WasmHeapTopType::Extern | WasmHeapTopType::Cont => break,
1224            }
1225
1226            // Function indices can be optimized here, but fully general
1227            // expressions are deferred to get evaluated at runtime.
1228            let function_elements = match &segment.elements {
1229                TableSegmentElements::Functions(indices) => indices,
1230                TableSegmentElements::Expressions(_) => break,
1231            };
1232
1233            let precomputed =
1234                match &mut self.module.table_initialization.initial_values[defined_index] {
1235                    TableInitialValue::Null { precomputed } => precomputed,
1236
1237                    // If this table is still listed as an initial value here
1238                    // then that means the initial size of the table doesn't
1239                    // support a precomputed function list, so skip this.
1240                    // Technically this won't trap so it's possible to process
1241                    // further initializers, but that's left as a future
1242                    // optimization.
1243                    TableInitialValue::Expr(_) => break,
1244                };
1245
1246            // At this point we're committing to pre-initializing the table
1247            // with the `segment` that's being iterated over. This segment is
1248            // applied to the `precomputed` list for the table by ensuring
1249            // it's large enough to hold the segment and then copying the
1250            // segment into the precomputed list.
1251            if precomputed.len() < top as usize {
1252                precomputed.resize(top as usize, FuncIndex::reserved_value());
1253            }
1254            let dst = &mut precomputed[offset as usize..top as usize];
1255            dst.copy_from_slice(&function_elements);
1256
1257            // advance the iterator to see the next segment
1258            let _ = segments.next();
1259        }
1260        self.module.table_initialization.segments = segments.collect();
1261    }
1262}