wasmtime/runtime/module/
registry.rs

1//! Implements a registry of modules for a store.
2
3use crate::code::{EngineCode, EngineCodePC, ModuleWithCode, StoreCode, StoreCodePC};
4#[cfg(feature = "component-model")]
5use crate::component::Component;
6use crate::runtime::vm::VMWasmCallFunction;
7use crate::sync::{OnceLock, RwLock};
8use crate::vm::CompiledModuleId;
9use crate::{Engine, prelude::*};
10use crate::{FrameInfo, Module, code_memory::CodeMemory};
11use alloc::collections::btree_map::{BTreeMap, Entry};
12use alloc::sync::Arc;
13use core::ops::Range;
14use core::ptr::NonNull;
15use wasmtime_environ::VMSharedTypeIndex;
16
17/// Used for registering modules with a store.
18///
19/// There are two basic purposes that this registry serves:
20///
21/// - It keeps all modules and their metadata alive as long as the
22///   store exists.
23/// - It owns the [`StoreCode`], i.e. possibly-private-copy of machine
24///   code, for all modules that execute in this store.
25///
26/// The registry allows for translation of EngineCode to StoreCode,
27/// deduplicating by the start address of the EngineCode; and allows
28/// for looking up modules by "registered module ID", and looking up
29/// StoreCode and Modules by PC.
30///
31/// Note that multiple modules may be backed by a single
32/// `StoreCode`. This is specifically the case for components in
33/// general. When a component is first instantiated, the component
34/// itself is registered (which loads the StoreCode into the
35/// registry), then each individual module within that component is
36/// registered and added to the data structures.
37///
38/// A brief overview of the kinds of compiled object and their
39/// relationships:
40///
41/// - `Module` is a Wasm module. It owns a `CompiledModule`.
42/// - `CompiledModule` contains metadata about the module (e.g., a map
43///   from Wasm function indices to locations in the machine code),
44///   and also owns an `EngineCode`.
45/// - `EngineCode` holds an `Arc` to a `CodeMemory` with the canonical
46///   copy of machine code, as well as some lower-level metadata
47///   (signatures and types). It is instantiated by this registry into
48///   `StoreCode`.
49/// - `StoreCode` owns either another `Arc` to the same `CodeMemory`
50///   as `EngineCode`, or if guest debugging is enabled and causes us
51///   to clone private copies of code for patching per store, owns its
52///   own private `CodeMemory` at a different address.
53/// - Instances hold a `RegisteredModuleId` to be able to look up their modules.
54#[derive(Default)]
55pub struct ModuleRegistry {
56    /// StoreCode and Modules associated with it.
57    ///
58    /// Keyed by the start address of the `StoreCode`. We maintain the
59    /// invariant of no overlaps on insertion. We use a range query to
60    /// find the StoreCode for a given PC: take the range `0..=pc`,
61    /// then take the last element of the range. That picks the
62    /// highest start address <= the query, and we can check whether
63    /// it contains the address.
64    loaded_code: BTreeMap<StoreCodePC, LoadedCode>,
65
66    /// Map from EngineCodePC start to StoreCodePC start. We use this
67    /// to memoize the store-code creation process: each EngineCode is
68    /// instantiated to a StoreCode only once per store.
69    store_code: BTreeMap<EngineCodePC, StoreCodePC>,
70
71    /// Modules instantiated in this registry.
72    ///
73    /// Every module is placed in this map, but not every module will
74    /// be in a LoadedCode entry, because the module may have no text.
75    modules: BTreeMap<RegisteredModuleId, Module>,
76}
77
78struct LoadedCode {
79    /// The StoreCode in this range.
80    code: StoreCode,
81
82    /// Map by starting text offset of Modules in this code region.
83    modules: BTreeMap<usize, RegisteredModuleId>,
84}
85
86/// An identifier of a module that has previously been inserted into a
87/// `ModuleRegistry`.
88///
89/// This is just a newtype around `CompiledModuleId`, which is unique
90/// within the Engine.
91#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
92pub struct RegisteredModuleId(CompiledModuleId);
93
94fn assert_no_overlap(loaded_code: &BTreeMap<StoreCodePC, LoadedCode>, range: Range<StoreCodePC>) {
95    if let Some((start, _)) = loaded_code.range(range.start..).next() {
96        assert!(*start >= range.end);
97    }
98    if let Some((_, code)) = loaded_code.range(..range.end).next_back() {
99        assert!(code.code.text_range().end <= range.start);
100    }
101}
102
103impl ModuleRegistry {
104    /// Get a previously-registered module by id.
105    pub fn module_by_id(&self, id: RegisteredModuleId) -> Option<&Module> {
106        self.modules.get(&id)
107    }
108
109    /// Get a module by CompiledModuleId, if present.
110    pub fn module_by_compiled_id(&self, id: CompiledModuleId) -> Option<&Module> {
111        self.modules.get(&RegisteredModuleId(id))
112    }
113
114    /// Fetches a registered StoreCode and module and an offset within
115    /// it given a program counter value.
116    pub fn module_and_code_by_pc<'a>(&'a self, pc: usize) -> Option<(ModuleWithCode<'a>, usize)> {
117        let (_, code) = self
118            .loaded_code
119            .range(..=StoreCodePC::from_raw(pc))
120            .next_back()?;
121        let offset = StoreCodePC::offset_of(code.code.text_range(), pc)?;
122        let (_, module_id) = code.modules.range(..=offset).next_back()?;
123        let module = self.modules.get(&module_id)?;
124        Some((ModuleWithCode::from_raw(module, &code.code), offset))
125    }
126
127    /// Fetches the `StoreCode` for a given `EngineCode`.
128    pub fn store_code(&self, engine_code: &EngineCode) -> Option<&StoreCode> {
129        let store_code_pc = self.store_code_base(engine_code)?;
130        let (_, code) = self.loaded_code.range(store_code_pc..).next()?;
131        Some(&code.code)
132    }
133
134    /// Fetches the base `StoreCodePC` for a given `EngineCode`.
135    pub fn store_code_base(&self, engine_code: &EngineCode) -> Option<StoreCodePC> {
136        self.store_code
137            .get(&engine_code.text_range().start)
138            .cloned()
139    }
140
141    /// Fetches the base `StoreCodePC` for a given `EngineCode` with
142    /// `Module`, registering the module if not already registered.
143    pub fn store_code_base_or_register(&mut self, module: &Module) -> Result<StoreCodePC> {
144        let key = module.engine_code().text_range().start;
145        if !self.store_code.contains_key(&key) {
146            let engine = module.engine().clone();
147            self.register_module(module, &engine)?;
148        }
149        Ok(*self.store_code.get(&key).unwrap())
150    }
151
152    /// Fetches a mutable `StoreCode` for a given base `StoreCodePC`.
153    pub fn store_code_mut(&mut self, store_code_base: StoreCodePC) -> Option<&mut StoreCode> {
154        let (_, code) = self.loaded_code.range_mut(store_code_base..).next()?;
155        assert_eq!(code.code.text_range().start, store_code_base);
156        Some(&mut code.code)
157    }
158
159    /// Gets an iterator over all modules in the registry.
160    #[cfg(any(feature = "coredump", feature = "debug"))]
161    pub fn all_modules(&self) -> impl Iterator<Item = &'_ Module> + '_ {
162        self.modules.values()
163    }
164
165    /// Registers a new module with the registry.
166    pub fn register_module(
167        &mut self,
168        module: &Module,
169        engine: &Engine,
170    ) -> Result<RegisteredModuleId> {
171        self.register(module.id(), module.engine_code(), Some(module), engine)
172            .map(|id| id.unwrap())
173    }
174
175    #[cfg(feature = "component-model")]
176    pub fn register_component(&mut self, component: &Component, engine: &Engine) -> Result<()> {
177        self.register(component.id(), component.engine_code(), None, engine)?;
178        Ok(())
179    }
180
181    /// Registers a new module with the registry.
182    fn register(
183        &mut self,
184        compiled_id: CompiledModuleId,
185        code: &Arc<EngineCode>,
186        module: Option<&Module>,
187        engine: &Engine,
188    ) -> Result<Option<RegisteredModuleId>> {
189        // Register the module, if any.
190        let id = module.map(|module| {
191            let id = RegisteredModuleId(compiled_id);
192            self.modules.entry(id).or_insert_with(|| module.clone());
193            id
194        });
195
196        // Create a StoreCode if one does not already exist.
197        let store_code_pc = match self.store_code.entry(code.text_range().start) {
198            Entry::Vacant(v) => {
199                let store_code = StoreCode::new(engine, code)?;
200                let store_code_pc = store_code.text_range().start;
201                assert_no_overlap(&self.loaded_code, store_code.text_range());
202                self.loaded_code.insert(
203                    store_code_pc,
204                    LoadedCode {
205                        code: store_code,
206                        modules: BTreeMap::default(),
207                    },
208                );
209                *v.insert(store_code_pc)
210            }
211            Entry::Occupied(o) => *o.get(),
212        };
213
214        // Add this module to the LoadedCode if not present.
215        if let (Some(module), Some(id)) = (module, id) {
216            if let Some((_, range)) = module.compiled_module().finished_function_ranges().next() {
217                let loaded_code = self
218                    .loaded_code
219                    .get_mut(&store_code_pc)
220                    .expect("loaded_code must have entry for StoreCodePC");
221                loaded_code.modules.insert(range.start, id);
222            }
223        }
224
225        Ok(id)
226    }
227
228    /// Fetches frame information about a program counter in a backtrace.
229    ///
230    /// Returns an object if this `pc` is known to some previously registered
231    /// module, or returns `None` if no information can be found. The first
232    /// boolean returned indicates whether the original module has unparsed
233    /// debug information due to the compiler's configuration. The second
234    /// boolean indicates whether the engine used to compile this module is
235    /// using environment variables to control debuginfo parsing.
236    pub(crate) fn lookup_frame_info<'a>(
237        &'a self,
238        pc: usize,
239    ) -> Option<(FrameInfo, ModuleWithCode<'a>)> {
240        let (_, code) = self
241            .loaded_code
242            .range(..=StoreCodePC::from_raw(pc))
243            .next_back()?;
244        let text_offset = StoreCodePC::offset_of(code.code.text_range(), pc)?;
245        let (_, module_id) = code.modules.range(..=text_offset).next_back()?;
246        let module = self
247            .modules
248            .get(&module_id)
249            .expect("referenced module ID not found");
250        let info = FrameInfo::new(module.clone(), text_offset)?;
251        let module_with_code = ModuleWithCode::from_raw(module, &code.code);
252        Some((info, module_with_code))
253    }
254
255    pub fn wasm_to_array_trampoline(
256        &self,
257        sig: VMSharedTypeIndex,
258    ) -> Option<NonNull<VMWasmCallFunction>> {
259        // TODO: We are doing a linear search over each module. This is fine for
260        // now because we typically have very few modules per store (almost
261        // always one, in fact). If this linear search ever becomes a
262        // bottleneck, we could avoid it by incrementally and lazily building a
263        // `VMSharedSignatureIndex` to `SignatureIndex` map.
264        //
265        // See also the comment in `ModuleInner::wasm_to_native_trampoline`.
266        for module in self.modules.values() {
267            if let Some(trampoline) = module.wasm_to_array_trampoline(sig) {
268                return Some(trampoline);
269            }
270        }
271        None
272    }
273}
274
275// This is the global code registry that stores information for all loaded code
276// objects that are currently in use by any `Store` in the current process.
277//
278// The purpose of this map is to be called from signal handlers to determine
279// whether a program counter is a wasm trap or not. Specifically macOS has
280// no contextual information about the thread available, hence the necessity
281// for global state rather than using thread local state.
282//
283// This is similar to `ModuleRegistry` except that it has less information and
284// supports removal. Any time anything is registered with a `ModuleRegistry`
285// it is also automatically registered with the singleton global module
286// registry. When a `ModuleRegistry` is destroyed then all of its entries
287// are removed from the global registry.
288fn global_code() -> &'static RwLock<GlobalRegistry> {
289    static GLOBAL_CODE: OnceLock<RwLock<GlobalRegistry>> = OnceLock::new();
290    GLOBAL_CODE.get_or_init(Default::default)
291}
292
293type GlobalRegistry = BTreeMap<usize, (usize, Arc<CodeMemory>)>;
294
295/// Find which registered region of code contains the given program counter, and
296/// what offset that PC is within that module's code.
297pub fn lookup_code(pc: usize) -> Option<(Arc<CodeMemory>, usize)> {
298    let all_modules = global_code().read();
299    let (_end, (start, module)) = all_modules.range(pc..).next()?;
300    let text_offset = pc.checked_sub(*start)?;
301    Some((module.clone(), text_offset))
302}
303
304/// Registers a new region of code.
305///
306/// Must not have been previously registered and must be `unregister`'d to
307/// prevent leaking memory.
308///
309/// This is required to enable traps to work correctly since the signal handler
310/// will lookup in the `GLOBAL_CODE` list to determine which a particular pc
311/// is a trap or not.
312pub fn register_code(image: &Arc<CodeMemory>, address: Range<usize>) {
313    if address.is_empty() {
314        return;
315    }
316    let start = address.start;
317    let end = address.end - 1;
318    let prev = global_code().write().insert(end, (start, image.clone()));
319    assert!(prev.is_none());
320}
321
322/// Unregisters a code mmap from the global map.
323///
324/// Must have been previously registered with `register`.
325pub fn unregister_code(address: Range<usize>) {
326    if address.is_empty() {
327        return;
328    }
329    let end = address.end - 1;
330    let code = global_code().write().remove(&end);
331    assert!(code.is_some());
332}
333
334#[test]
335#[cfg_attr(miri, ignore)]
336fn test_frame_info() -> Result<(), anyhow::Error> {
337    use crate::*;
338
339    let mut store = Store::<()>::default();
340    let module = Module::new(
341        store.engine(),
342        r#"
343            (module
344                (func (export "add") (param $x i32) (param $y i32) (result i32) (i32.add (local.get $x) (local.get $y)))
345                (func (export "sub") (param $x i32) (param $y i32) (result i32) (i32.sub (local.get $x) (local.get $y)))
346                (func (export "mul") (param $x i32) (param $y i32) (result i32) (i32.mul (local.get $x) (local.get $y)))
347                (func (export "div_s") (param $x i32) (param $y i32) (result i32) (i32.div_s (local.get $x) (local.get $y)))
348                (func (export "div_u") (param $x i32) (param $y i32) (result i32) (i32.div_u (local.get $x) (local.get $y)))
349                (func (export "rem_s") (param $x i32) (param $y i32) (result i32) (i32.rem_s (local.get $x) (local.get $y)))
350                (func (export "rem_u") (param $x i32) (param $y i32) (result i32) (i32.rem_u (local.get $x) (local.get $y)))
351            )
352         "#,
353    )?;
354    // Create an instance to ensure the frame information is registered.
355    Instance::new(&mut store, &module, &[])?;
356
357    // Look for frame info for each function. Assume that StoreCode
358    // does not actually clone in the default configuration.
359    for (i, range) in module.compiled_module().finished_function_ranges() {
360        let base = module.engine_code().text_range().start.raw();
361        let start = base + range.start;
362        let end = base + range.end;
363        for pc in start..end {
364            let (frame, _) = store
365                .as_context()
366                .0
367                .modules()
368                .lookup_frame_info(pc)
369                .unwrap();
370            assert!(
371                frame.func_index() == i.as_u32(),
372                "lookup of {:#x} returned {}, expected {}",
373                pc,
374                frame.func_index(),
375                i.as_u32()
376            );
377        }
378    }
379    Ok(())
380}