wasmtime/runtime/
instantiate.rs

1//! Define the `instantiate` function, which takes a byte array containing an
2//! encoded wasm module and returns a live wasm instance. Also, define
3//! `CompiledModule` to allow compiling and instantiating to be done as separate
4//! steps.
5
6use crate::prelude::*;
7use crate::runtime::vm::{CompiledModuleId, MmapVec};
8use crate::{code_memory::CodeMemory, profiling_agent::ProfilingAgent};
9use alloc::sync::Arc;
10use core::str;
11use wasmtime_environ::{
12    CompiledFunctionInfo, CompiledModuleInfo, DefinedFuncIndex, FilePos, FuncIndex, FunctionLoc,
13    FunctionName, Metadata, Module, ModuleInternedTypeIndex, PrimaryMap,
14};
15
16/// A compiled wasm module, ready to be instantiated.
17pub struct CompiledModule {
18    module: Arc<Module>,
19    funcs: PrimaryMap<DefinedFuncIndex, CompiledFunctionInfo>,
20    wasm_to_array_trampolines: Vec<(ModuleInternedTypeIndex, FunctionLoc)>,
21    meta: Metadata,
22    code_memory: Arc<CodeMemory>,
23    /// A unique ID used to register this module with the engine.
24    unique_id: CompiledModuleId,
25    func_names: Vec<FunctionName>,
26}
27
28impl CompiledModule {
29    /// Creates `CompiledModule` directly from a precompiled artifact.
30    ///
31    /// The `code_memory` argument is expected to be the result of a previous
32    /// call to `ObjectBuilder::finish` above. This is an ELF image, at this
33    /// time, which contains all necessary information to create a
34    /// `CompiledModule` from a compilation.
35    ///
36    /// This method also takes `info`, an optionally-provided deserialization
37    /// of the artifacts' compilation metadata section. If this information is
38    /// not provided then the information will be
39    /// deserialized from the image of the compilation artifacts. Otherwise it
40    /// will be assumed to be what would otherwise happen if the section were
41    /// to be deserialized.
42    ///
43    /// The `profiler` argument here is used to inform JIT profiling runtimes
44    /// about new code that is loaded.
45    pub fn from_artifacts(
46        code_memory: Arc<CodeMemory>,
47        info: CompiledModuleInfo,
48        profiler: &dyn ProfilingAgent,
49    ) -> Result<Self> {
50        let mut ret = Self {
51            module: Arc::new(info.module),
52            funcs: info.funcs,
53            wasm_to_array_trampolines: info.wasm_to_array_trampolines,
54            code_memory,
55            meta: info.meta,
56            unique_id: CompiledModuleId::new(),
57            func_names: info.func_names,
58        };
59        ret.register_profiling(profiler)?;
60
61        Ok(ret)
62    }
63
64    fn register_profiling(&mut self, profiler: &dyn ProfilingAgent) -> Result<()> {
65        // TODO-Bug?: "code_memory" is not exclusive for this module in the case of components,
66        // so we may be registering the same code range multiple times here.
67        profiler.register_module(&self.code_memory.mmap()[..], &|addr| {
68            let (idx, _) = self.func_by_text_offset(addr)?;
69            let idx = self.module.func_index(idx);
70            let name = self.func_name(idx)?;
71            let mut demangled = String::new();
72            wasmtime_environ::demangle_function_name(&mut demangled, name).unwrap();
73            Some(demangled)
74        });
75        Ok(())
76    }
77
78    /// Get this module's unique ID. It is unique with respect to a
79    /// single allocator (which is ordinarily held on a Wasm engine).
80    pub fn unique_id(&self) -> CompiledModuleId {
81        self.unique_id
82    }
83
84    /// Returns the underlying memory which contains the compiled module's
85    /// image.
86    pub fn mmap(&self) -> &MmapVec {
87        self.code_memory.mmap()
88    }
89
90    /// Returns the underlying owned mmap of this compiled image.
91    pub fn code_memory(&self) -> &Arc<CodeMemory> {
92        &self.code_memory
93    }
94
95    /// Returns the text section of the ELF image for this compiled module.
96    ///
97    /// This memory should have the read/execute permissions.
98    #[inline]
99    pub fn text(&self) -> &[u8] {
100        self.code_memory.text()
101    }
102
103    /// Return a reference-counting pointer to a module.
104    pub fn module(&self) -> &Arc<Module> {
105        &self.module
106    }
107
108    /// Looks up the `name` section name for the function index `idx`, if one
109    /// was specified in the original wasm module.
110    pub fn func_name(&self, idx: FuncIndex) -> Option<&str> {
111        // Find entry for `idx`, if present.
112        let i = self.func_names.binary_search_by_key(&idx, |n| n.idx).ok()?;
113        let name = &self.func_names[i];
114
115        // Here we `unwrap` the `from_utf8` but this can theoretically be a
116        // `from_utf8_unchecked` if we really wanted since this section is
117        // guaranteed to only have valid utf-8 data. Until it's a problem it's
118        // probably best to double-check this though.
119        let data = self.code_memory().func_name_data();
120        Some(str::from_utf8(&data[name.offset as usize..][..name.len as usize]).unwrap())
121    }
122
123    /// Return a reference to a mutable module (if possible).
124    pub fn module_mut(&mut self) -> Option<&mut Module> {
125        Arc::get_mut(&mut self.module)
126    }
127
128    /// Returns an iterator over all functions defined within this module with
129    /// their index and their body in memory.
130    #[inline]
131    pub fn finished_functions(
132        &self,
133    ) -> impl ExactSizeIterator<Item = (DefinedFuncIndex, &[u8])> + '_ {
134        self.funcs
135            .iter()
136            .map(move |(i, _)| (i, self.finished_function(i)))
137    }
138
139    /// Returns the body of the function that `index` points to.
140    #[inline]
141    pub fn finished_function(&self, index: DefinedFuncIndex) -> &[u8] {
142        let loc = self.funcs[index].wasm_func_loc;
143        &self.text()[loc.start as usize..][..loc.length as usize]
144    }
145
146    /// Get the array-to-Wasm trampoline for the function `index` points to.
147    ///
148    /// If the function `index` points to does not escape, then `None` is
149    /// returned.
150    ///
151    /// These trampolines are used for array callers (e.g. `Func::new`)
152    /// calling Wasm callees.
153    pub fn array_to_wasm_trampoline(&self, index: DefinedFuncIndex) -> Option<&[u8]> {
154        let loc = self.funcs[index].array_to_wasm_trampoline?;
155        Some(&self.text()[loc.start as usize..][..loc.length as usize])
156    }
157
158    /// Get the Wasm-to-array trampoline for the given signature.
159    ///
160    /// These trampolines are used for filling in
161    /// `VMFuncRef::wasm_call` for `Func::wrap`-style host funcrefs
162    /// that don't have access to a compiler when created.
163    pub fn wasm_to_array_trampoline(&self, signature: ModuleInternedTypeIndex) -> &[u8] {
164        let idx = match self
165            .wasm_to_array_trampolines
166            .binary_search_by_key(&signature, |entry| entry.0)
167        {
168            Ok(idx) => idx,
169            Err(_) => panic!("missing trampoline for {signature:?}"),
170        };
171
172        let (_, loc) = self.wasm_to_array_trampolines[idx];
173        &self.text()[loc.start as usize..][..loc.length as usize]
174    }
175
176    /// Lookups a defined function by a program counter value.
177    ///
178    /// Returns the defined function index and the relative address of
179    /// `text_offset` within the function itself.
180    pub fn func_by_text_offset(&self, text_offset: usize) -> Option<(DefinedFuncIndex, u32)> {
181        let text_offset = u32::try_from(text_offset).unwrap();
182
183        let index = match self.funcs.binary_search_values_by_key(&text_offset, |e| {
184            debug_assert!(e.wasm_func_loc.length > 0);
185            // Return the inclusive "end" of the function
186            e.wasm_func_loc.start + e.wasm_func_loc.length - 1
187        }) {
188            Ok(k) => {
189                // Exact match, pc is at the end of this function
190                k
191            }
192            Err(k) => {
193                // Not an exact match, k is where `pc` would be "inserted"
194                // Since we key based on the end, function `k` might contain `pc`,
195                // so we'll validate on the range check below
196                k
197            }
198        };
199
200        let CompiledFunctionInfo { wasm_func_loc, .. } = self.funcs.get(index)?;
201        let start = wasm_func_loc.start;
202        let end = wasm_func_loc.start + wasm_func_loc.length;
203
204        if text_offset < start || end < text_offset {
205            return None;
206        }
207
208        Some((index, text_offset - wasm_func_loc.start))
209    }
210
211    /// Gets the function location information for a given function index.
212    pub fn func_loc(&self, index: DefinedFuncIndex) -> &FunctionLoc {
213        &self
214            .funcs
215            .get(index)
216            .expect("defined function should be present")
217            .wasm_func_loc
218    }
219
220    /// Returns the original binary offset in the file that `index` was defined
221    /// at.
222    pub fn func_start_srcloc(&self, index: DefinedFuncIndex) -> FilePos {
223        self.funcs[index].start_srcloc
224    }
225
226    /// Creates a new symbolication context which can be used to further
227    /// symbolicate stack traces.
228    ///
229    /// Basically this makes a thing which parses debuginfo and can tell you
230    /// what filename and line number a wasm pc comes from.
231    #[cfg(feature = "addr2line")]
232    pub fn symbolize_context(&self) -> Result<Option<SymbolizeContext<'_>>> {
233        use gimli::EndianSlice;
234        if !self.meta.has_wasm_debuginfo {
235            return Ok(None);
236        }
237        let dwarf = gimli::Dwarf::load(|id| -> Result<_> {
238            // Lookup the `id` in the `dwarf` array prepared for this module
239            // during module serialization where it's sorted by the `id` key. If
240            // found this is a range within the general module's concatenated
241            // dwarf section which is extracted here, otherwise it's just an
242            // empty list to represent that it's not present.
243            let data = self
244                .meta
245                .dwarf
246                .binary_search_by_key(&(id as u8), |(id, _)| *id)
247                .ok()
248                .and_then(|i| {
249                    let (_, range) = &self.meta.dwarf[i];
250                    let start = range.start.try_into().ok()?;
251                    let end = range.end.try_into().ok()?;
252                    self.code_memory().wasm_dwarf().get(start..end)
253                })
254                .unwrap_or(&[]);
255            Ok(EndianSlice::new(data, gimli::LittleEndian))
256        })?;
257        let cx = addr2line::Context::from_dwarf(dwarf)
258            .context("failed to create addr2line dwarf mapping context")?;
259        Ok(Some(SymbolizeContext {
260            inner: cx,
261            code_section_offset: self.meta.code_section_offset,
262        }))
263    }
264
265    /// Returns whether the original wasm module had unparsed debug information
266    /// based on the tunables configuration.
267    pub fn has_unparsed_debuginfo(&self) -> bool {
268        self.meta.has_unparsed_debuginfo
269    }
270
271    /// Indicates whether this module came with n address map such that lookups
272    /// via `wasmtime_environ::lookup_file_pos` will succeed.
273    ///
274    /// If this function returns `false` then `lookup_file_pos` will always
275    /// return `None`.
276    pub fn has_address_map(&self) -> bool {
277        !self.code_memory.address_map_data().is_empty()
278    }
279}
280
281#[cfg(feature = "addr2line")]
282type Addr2LineContext<'a> = addr2line::Context<gimli::EndianSlice<'a, gimli::LittleEndian>>;
283
284/// A context which contains dwarf debug information to translate program
285/// counters back to filenames and line numbers.
286#[cfg(feature = "addr2line")]
287pub struct SymbolizeContext<'a> {
288    inner: Addr2LineContext<'a>,
289    code_section_offset: u64,
290}
291
292#[cfg(feature = "addr2line")]
293impl<'a> SymbolizeContext<'a> {
294    /// Returns access to the [`addr2line::Context`] which can be used to query
295    /// frame information with.
296    pub fn addr2line(&self) -> &Addr2LineContext<'a> {
297        &self.inner
298    }
299
300    /// Returns the offset of the code section in the original wasm file, used
301    /// to calculate lookup values into the DWARF.
302    pub fn code_section_offset(&self) -> u64 {
303        self.code_section_offset
304    }
305}