Skip to main content

wasmtime/runtime/module/
registry.rs

1//! Implements a registry of modules for a store.
2
3use crate::code::{EngineCode, EngineCodePC, ModuleWithCode, StoreCode, StoreCodePC};
4#[cfg(feature = "component-model")]
5use crate::component::Component;
6use crate::runtime::vm::VMWasmCallFunction;
7use crate::sync::{OnceLock, RwLock};
8use crate::vm::CompiledModuleId;
9use crate::{Engine, FrameInfo, Module, code_memory::CodeMemory, prelude::*};
10use alloc::sync::Arc;
11#[cfg(not(feature = "debug"))]
12use core::marker::PhantomData;
13use core::ops::Range;
14use core::ptr::NonNull;
15use wasmtime_environ::{
16    CompiledFunctionsTable, FuncKey, StaticModuleIndex, VMSharedTypeIndex,
17    collections::btree_map::Entry,
18};
19
20/// Used for registering modules with a store.
21///
22/// There are two basic purposes that this registry serves:
23///
24/// - It keeps all modules and their metadata alive as long as the
25///   store exists.
26/// - It owns the [`StoreCode`], i.e. possibly-private-copy of machine
27///   code, for all modules that execute in this store.
28///
29/// The registry allows for translation of EngineCode to StoreCode,
30/// deduplicating by the start address of the EngineCode; and allows
31/// for looking up modules by "registered module ID", and looking up
32/// StoreCode and Modules by PC.
33///
34/// Note that multiple modules may be backed by a single
35/// `StoreCode`. This is specifically the case for components in
36/// general. When a component is first instantiated, the component
37/// itself is registered (which loads the StoreCode into the
38/// registry), then each individual module within that component is
39/// registered and added to the data structures.
40///
41/// A brief overview of the kinds of compiled object and their
42/// relationships:
43///
44/// - `Module` is a Wasm module. It owns a `CompiledModule`.
45/// - `CompiledModule` contains metadata about the module (e.g., a map
46///   from Wasm function indices to locations in the machine code),
47///   and also owns an `EngineCode`.
48/// - `EngineCode` holds an `Arc` to a `CodeMemory` with the canonical
49///   copy of machine code, as well as some lower-level metadata
50///   (signatures and types). It is instantiated by this registry into
51///   `StoreCode`.
52/// - `StoreCode` owns either another `Arc` to the same `CodeMemory`
53///   as `EngineCode`, or if guest debugging is enabled and causes us
54///   to clone private copies of code for patching per store, owns its
55///   own private `CodeMemory` at a different address.
56/// - Instances hold a `RegisteredModuleId` to be able to look up their modules.
57#[derive(Default)]
58pub struct ModuleRegistry {
59    /// StoreCode and Modules associated with it.
60    ///
61    /// Keyed by the start address of the `StoreCode`. We maintain the
62    /// invariant of no overlaps on insertion. We use a range query to
63    /// find the StoreCode for a given PC: take the range `0..=pc`,
64    /// then take the last element of the range. That picks the
65    /// highest start address <= the query, and we can check whether
66    /// it contains the address.
67    loaded_code: TryBTreeMap<StoreCodePC, LoadedCode>,
68
69    /// Map from EngineCodePC start to StoreCodePC start. We use this
70    /// to memoize the store-code creation process: each EngineCode is
71    /// instantiated to a StoreCode only once per store.
72    store_code: TryBTreeMap<EngineCodePC, StoreCodePC>,
73
74    /// Modules instantiated in this registry.
75    ///
76    /// Every module is placed in this map, but not every module will
77    /// be in a LoadedCode entry, because the module may have no text.
78    modules: TryBTreeMap<RegisteredModuleId, Module>,
79}
80
81struct LoadedCode {
82    /// The StoreCode in this range.
83    code: StoreCode,
84
85    /// The index of compiled functions in `code`.
86    ///
87    /// This index is used to map from program counters back to a `FuncKey`.
88    /// Primarily used in [`ModuleRegistry::lookup_frame_info`] at this time
89    /// below.
90    index: Arc<CompiledFunctionsTable>,
91
92    /// A mapping from the [`StaticModuleIndex`] values returned by the `index`
93    /// field above to a module within this registry.
94    ///
95    /// This is lazily populated as modules are registered and instantiated from
96    /// a component.
97    modules: TrySecondaryMap<StaticModuleIndex, Option<RegisteredModuleId>>,
98}
99
100/// An identifier of a module that has previously been inserted into a
101/// `ModuleRegistry`.
102///
103/// This is just a newtype around `CompiledModuleId`, which is unique
104/// within the Engine.
105#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
106pub struct RegisteredModuleId(CompiledModuleId);
107
108fn assert_no_overlap(
109    loaded_code: &TryBTreeMap<StoreCodePC, LoadedCode>,
110    range: Range<StoreCodePC>,
111) {
112    if let Some((start, _)) = loaded_code.range(range.start..).next() {
113        assert!(start >= range.end);
114    }
115    if let Some((_, code)) = loaded_code.range(..range.end).next_back() {
116        assert!(code.code.text_range().end <= range.start);
117    }
118}
119
120#[cfg(feature = "debug")]
121pub struct RegisterBreakpointState<'a>(pub(crate) &'a crate::runtime::debug::BreakpointState);
122#[cfg(not(feature = "debug"))]
123pub struct RegisterBreakpointState<'a>(pub(crate) PhantomData<&'a ()>);
124
125impl<'a> RegisterBreakpointState<'a> {
126    #[cfg(feature = "debug")]
127    fn update(&self, code: &mut StoreCode, module: &Module) -> Result<()> {
128        self.0.patch_new_module(code, module)
129    }
130    #[cfg(not(feature = "debug"))]
131    fn update(&self, _code: &mut StoreCode, _module: &Module) -> Result<()> {
132        Ok(())
133    }
134}
135
136enum ModuleOrComponent<'a> {
137    Module(&'a Module),
138    #[cfg(feature = "component-model")]
139    Component(&'a Component),
140}
141
142impl ModuleRegistry {
143    /// Get a previously-registered module by id.
144    pub fn module_by_id(&self, id: RegisteredModuleId) -> Option<&Module> {
145        self.modules.get(id)
146    }
147
148    /// Get a module by CompiledModuleId, if present.
149    pub fn module_by_compiled_id(&self, id: CompiledModuleId) -> Option<&Module> {
150        self.modules.get(RegisteredModuleId(id))
151    }
152
153    /// Looks up `pc`, an absolute program counter address, to see if it
154    /// corresponds to any loaded code within this [`ModuleRegistry`].
155    ///
156    /// Returns `None` if nothing is found, and otherwise returns the
157    /// corresponding [`LoadedCode`] as well as a relative pc from the start of
158    /// the code's text section if found.
159    fn loaded_code_by_pc(&self, pc: usize) -> Option<(&LoadedCode, usize)> {
160        let (_, code) = self
161            .loaded_code
162            .range(..=StoreCodePC::from_raw(pc))
163            .next_back()?;
164        let offset = StoreCodePC::offset_of(code.code.text_range(), pc)?;
165        Some((code, offset))
166    }
167
168    /// Consults this [`ModuleRegistry`] to see if `pc` corrseponds to any
169    /// previously-registered block of code.
170    ///
171    /// Upon success returns the [`StoreCode`] as well as a relative pc from the
172    /// start of the text section.
173    pub fn store_code_by_pc(&self, pc: usize) -> Option<(&StoreCode, usize)> {
174        let (code, pc) = self.loaded_code_by_pc(pc)?;
175        Some((&code.code, pc))
176    }
177
178    /// Fetches the `StoreCode` for a given `EngineCode`.
179    pub fn store_code(&self, engine_code: &EngineCode) -> Option<&StoreCode> {
180        let store_code_pc = self.store_code_base(engine_code)?;
181        let (_, code) = self.loaded_code.range(store_code_pc..).next()?;
182        Some(&code.code)
183    }
184
185    /// Fetches the base `StoreCodePC` for a given `EngineCode`.
186    pub fn store_code_base(&self, engine_code: &EngineCode) -> Option<StoreCodePC> {
187        self.store_code.get(engine_code.text_range().start).cloned()
188    }
189
190    /// Fetches the base `StoreCodePC` for a given `EngineCode` with
191    /// `Module`, registering the module if not already registered.
192    pub fn store_code_base_or_register(
193        &mut self,
194        module: &Module,
195        breakpoint_state: RegisterBreakpointState,
196    ) -> Result<StoreCodePC> {
197        let key = module.engine_code().text_range().start;
198        if !self.store_code.contains_key(key) {
199            let engine = module.engine().clone();
200            self.register_module(module, &engine, breakpoint_state)?;
201        }
202        Ok(*self.store_code.get(key).unwrap())
203    }
204
205    /// Fetches a mutable `StoreCode` for a given base `StoreCodePC`.
206    pub fn store_code_mut(&mut self, store_code_base: StoreCodePC) -> Option<&mut StoreCode> {
207        let (_, code) = self.loaded_code.range_mut(store_code_base..).next()?;
208        assert_eq!(code.code.text_range().start, store_code_base);
209        Some(&mut code.code)
210    }
211
212    /// Gets an iterator over all modules in the registry.
213    #[cfg(any(feature = "coredump", feature = "debug"))]
214    pub fn all_modules(&self) -> impl Iterator<Item = &'_ Module> + '_ {
215        self.modules.values()
216    }
217
218    /// Registers a new module with the registry.
219    pub fn register_module(
220        &mut self,
221        module: &Module,
222        engine: &Engine,
223        breakpoint_state: RegisterBreakpointState,
224    ) -> Result<RegisteredModuleId> {
225        self.register(ModuleOrComponent::Module(module), engine, breakpoint_state)
226            .map(|id| id.unwrap())
227    }
228
229    #[cfg(feature = "component-model")]
230    pub fn register_component(
231        &mut self,
232        component: &Component,
233        engine: &Engine,
234        breakpoint_state: RegisterBreakpointState,
235    ) -> Result<()> {
236        self.register(
237            ModuleOrComponent::Component(component),
238            engine,
239            breakpoint_state,
240        )?;
241        Ok(())
242    }
243
244    /// Registers a new module with the registry.
245    fn register(
246        &mut self,
247        module_or_component: ModuleOrComponent<'_>,
248        engine: &Engine,
249        breakpoint_state: RegisterBreakpointState,
250    ) -> Result<Option<RegisteredModuleId>> {
251        let compiled_id = match module_or_component {
252            ModuleOrComponent::Module(module) => module.id(),
253            #[cfg(feature = "component-model")]
254            ModuleOrComponent::Component(component) => component.id(),
255        };
256        let code = match module_or_component {
257            ModuleOrComponent::Module(module) => module.engine_code(),
258            #[cfg(feature = "component-model")]
259            ModuleOrComponent::Component(component) => component.engine_code(),
260        };
261        // Register the module, if any.
262        let id = match module_or_component {
263            ModuleOrComponent::Module(module) => {
264                let id = RegisteredModuleId(compiled_id);
265                self.modules.entry(id).or_insert_with(|| module.clone())?;
266                Some(id)
267            }
268            #[cfg(feature = "component-model")]
269            ModuleOrComponent::Component(_) => None,
270        };
271
272        // Create a StoreCode if one does not already exist.
273        let store_code_pc = match self.store_code.entry(code.text_range().start) {
274            Entry::Vacant(v) => {
275                let store_code = StoreCode::new(engine, code)?;
276                let store_code_pc = store_code.text_range().start;
277                assert_no_overlap(&self.loaded_code, store_code.text_range());
278                let index = match module_or_component {
279                    ModuleOrComponent::Module(module) => module.index(),
280                    #[cfg(feature = "component-model")]
281                    ModuleOrComponent::Component(component) => component.index(),
282                };
283                self.loaded_code.insert(
284                    store_code_pc,
285                    LoadedCode {
286                        code: store_code,
287                        index: index.clone(),
288                        modules: Default::default(),
289                    },
290                )?;
291                *v.insert(store_code_pc)?
292            }
293            Entry::Occupied(o) => *o.get(),
294        };
295
296        // Add this module to the LoadedCode if not present.
297        if let (ModuleOrComponent::Module(module), Some(id)) = (module_or_component, id) {
298            let loaded_code = self
299                .loaded_code
300                .get_mut(store_code_pc)
301                .expect("loaded_code must have entry for StoreCodePC");
302            loaded_code
303                .modules
304                .insert(module.env_module().module_index, Some(id))?;
305            breakpoint_state.update(&mut loaded_code.code, module)?;
306        }
307
308        Ok(id)
309    }
310
311    /// Fetches frame information about a program counter in a backtrace.
312    ///
313    /// Returns an object if this `pc` is known to some previously registered
314    /// module, or returns `None` if no information can be found. The first
315    /// boolean returned indicates whether the original module has unparsed
316    /// debug information due to the compiler's configuration. The second
317    /// boolean indicates whether the engine used to compile this module is
318    /// using environment variables to control debuginfo parsing.
319    pub(crate) fn lookup_frame_info<'a>(
320        &'a self,
321        pc: usize,
322    ) -> Option<(FrameInfo, ModuleWithCode<'a>)> {
323        let (code, text_offset) = self.loaded_code_by_pc(pc)?;
324        let module_index = match code
325            .index
326            .func_by_text_offset(u32::try_from(text_offset).ok()?)?
327        {
328            FuncKey::DefinedWasmFunction(module, _) => module,
329            _ => return None,
330        };
331        let module_id = (*code.modules.get(module_index)?)?;
332        let module = self
333            .modules
334            .get(module_id)
335            .expect("referenced module ID not found");
336        let info = FrameInfo::new(module.clone(), text_offset)?;
337        let module_with_code = ModuleWithCode::from_raw(module, &code.code);
338        Some((info, module_with_code))
339    }
340
341    pub fn wasm_to_array_trampoline(
342        &self,
343        sig: VMSharedTypeIndex,
344    ) -> Option<NonNull<VMWasmCallFunction>> {
345        // TODO: We are doing a linear search over each module. This is fine for
346        // now because we typically have very few modules per store (almost
347        // always one, in fact). If this linear search ever becomes a
348        // bottleneck, we could avoid it by incrementally and lazily building a
349        // `VMSharedSignatureIndex` to `SignatureIndex` map.
350        //
351        // See also the comment in `ModuleInner::wasm_to_native_trampoline`.
352        for module in self.modules.values() {
353            if let Some(trampoline) = module.wasm_to_array_trampoline(sig) {
354                return Some(trampoline);
355            }
356        }
357        None
358    }
359}
360
361// This is the global code registry that stores information for all loaded code
362// objects that are currently in use by any `Store` in the current process.
363//
364// The purpose of this map is to be called from signal handlers to determine
365// whether a program counter is a wasm trap or not. Specifically macOS has
366// no contextual information about the thread available, hence the necessity
367// for global state rather than using thread local state.
368//
369// This is similar to `ModuleRegistry` except that it has less information and
370// supports removal. Any time anything is registered with a `ModuleRegistry`
371// it is also automatically registered with the singleton global module
372// registry. When a `ModuleRegistry` is destroyed then all of its entries
373// are removed from the global registry.
374fn global_code() -> &'static RwLock<GlobalRegistry> {
375    static GLOBAL_CODE: OnceLock<RwLock<GlobalRegistry>> = OnceLock::new();
376    GLOBAL_CODE.get_or_init(Default::default)
377}
378
379type GlobalRegistry = TryBTreeMap<usize, (usize, Arc<CodeMemory>)>;
380
381/// Find which registered region of code contains the given program counter, and
382/// what offset that PC is within that module's code.
383pub fn lookup_code(pc: usize) -> Option<(Arc<CodeMemory>, usize)> {
384    let all_modules = global_code().read();
385    let (_end, (start, module)) = all_modules.range(pc..).next()?;
386    let text_offset = pc.checked_sub(*start)?;
387    Some((module.clone(), text_offset))
388}
389
390/// Registers a new region of code.
391///
392/// Must not have been previously registered and must be `unregister`'d to
393/// prevent leaking memory.
394///
395/// This is required to enable traps to work correctly since the signal handler
396/// will lookup in the `GLOBAL_CODE` list to determine which a particular pc
397/// is a trap or not.
398pub fn register_code(image: &Arc<CodeMemory>, address: Range<usize>) -> Result<(), OutOfMemory> {
399    if address.is_empty() {
400        return Ok(());
401    }
402    let start = address.start;
403    let end = address.end - 1;
404    let prev = global_code().write().insert(end, (start, image.clone()))?;
405    assert!(prev.is_none());
406    Ok(())
407}
408
409/// Unregisters a code mmap from the global map.
410///
411/// Must have been previously registered with `register`.
412pub fn unregister_code(address: Range<usize>) {
413    if address.is_empty() {
414        return;
415    }
416    let end = address.end - 1;
417    let code = global_code().write().remove(end);
418    assert!(code.is_some());
419}
420
421#[test]
422#[cfg_attr(miri, ignore)]
423fn test_frame_info() -> Result<(), crate::Error> {
424    use crate::*;
425
426    let mut store = Store::<()>::default();
427    let module = Module::new(
428        store.engine(),
429        r#"
430            (module
431                (func (export "add") (param $x i32) (param $y i32) (result i32) (i32.add (local.get $x) (local.get $y)))
432                (func (export "sub") (param $x i32) (param $y i32) (result i32) (i32.sub (local.get $x) (local.get $y)))
433                (func (export "mul") (param $x i32) (param $y i32) (result i32) (i32.mul (local.get $x) (local.get $y)))
434                (func (export "div_s") (param $x i32) (param $y i32) (result i32) (i32.div_s (local.get $x) (local.get $y)))
435                (func (export "div_u") (param $x i32) (param $y i32) (result i32) (i32.div_u (local.get $x) (local.get $y)))
436                (func (export "rem_s") (param $x i32) (param $y i32) (result i32) (i32.rem_s (local.get $x) (local.get $y)))
437                (func (export "rem_u") (param $x i32) (param $y i32) (result i32) (i32.rem_u (local.get $x) (local.get $y)))
438            )
439         "#,
440    )?;
441    // Create an instance to ensure the frame information is registered.
442    Instance::new(&mut store, &module, &[])?;
443
444    // Look for frame info for each function. Assume that StoreCode
445    // does not actually clone in the default configuration.
446    for (i, range) in module.compiled_module().finished_function_ranges() {
447        let base = module.engine_code().text_range().start.raw();
448        let start = base + range.start;
449        let end = base + range.end;
450        for pc in start..end {
451            let (frame, _) = store
452                .as_context()
453                .0
454                .modules()
455                .lookup_frame_info(pc)
456                .unwrap();
457            assert!(
458                frame.func_index() == i.as_u32(),
459                "lookup of {:#x} returned {}, expected {}",
460                pc,
461                frame.func_index(),
462                i.as_u32()
463            );
464        }
465    }
466    Ok(())
467}