wasmtime/runtime/module/
registry.rs

1//! Implements a registry of modules for a store.
2
3use crate::code::CodeObject;
4#[cfg(feature = "component-model")]
5use crate::component::Component;
6use crate::prelude::*;
7use crate::runtime::vm::VMWasmCallFunction;
8use crate::sync::{OnceLock, RwLock};
9use crate::{code_memory::CodeMemory, FrameInfo, Module};
10use alloc::collections::btree_map::{BTreeMap, Entry};
11use alloc::sync::Arc;
12use core::ptr::NonNull;
13use wasmtime_environ::VMSharedTypeIndex;
14
15/// Used for registering modules with a store.
16///
17/// Note that the primary reason for this registry is to ensure that everything
18/// in `Module` is kept alive for the duration of a `Store`. At this time we
19/// need "basically everything" within a `Module` to stay alive once it's
20/// instantiated within a store. While there's some smaller portions that could
21/// theoretically be omitted as they're not needed by the store they're
22/// currently small enough to not worry much about.
23#[derive(Default)]
24pub struct ModuleRegistry {
25    // Keyed by the end address of a `CodeObject`.
26    //
27    // The value here is the start address and the information about what's
28    // loaded at that address.
29    loaded_code: BTreeMap<usize, (usize, LoadedCode)>,
30
31    // Preserved for keeping data segments alive or similar
32    modules_without_code: Vec<Module>,
33}
34
35struct LoadedCode {
36    /// Kept alive here in the store to have a strong reference to keep the
37    /// relevant code mapped while the store is alive.
38    _code: Arc<CodeObject>,
39
40    /// Modules found within `self.code`, keyed by start address here of the
41    /// address of the first function in the module.
42    modules: BTreeMap<usize, Module>,
43}
44
45/// An identifier of a module that has previously been inserted into a
46/// `ModuleRegistry`.
47#[derive(Clone, Copy)]
48pub enum RegisteredModuleId {
49    /// Index into `ModuleRegistry::modules_without_code`.
50    WithoutCode(usize),
51    /// Start address of the module's code so that we can get it again via
52    /// `ModuleRegistry::lookup_module`.
53    LoadedCode(usize),
54}
55
56impl ModuleRegistry {
57    /// Get a previously-registered module by id.
58    pub fn lookup_module_by_id(&self, id: RegisteredModuleId) -> Option<&Module> {
59        match id {
60            RegisteredModuleId::WithoutCode(idx) => self.modules_without_code.get(idx),
61            RegisteredModuleId::LoadedCode(pc) => {
62                let (module, _) = self.module_and_offset(pc)?;
63                Some(module)
64            }
65        }
66    }
67
68    /// Fetches a registered module given a program counter value.
69    #[cfg(feature = "gc")]
70    pub fn lookup_module_by_pc(&self, pc: usize) -> Option<&Module> {
71        let (module, _) = self.module_and_offset(pc)?;
72        Some(module)
73    }
74
75    fn code(&self, pc: usize) -> Option<(&LoadedCode, usize)> {
76        let (end, (start, code)) = self.loaded_code.range(pc..).next()?;
77        if pc < *start || *end < pc {
78            return None;
79        }
80        Some((code, pc - *start))
81    }
82
83    fn module_and_offset(&self, pc: usize) -> Option<(&Module, usize)> {
84        let (code, offset) = self.code(pc)?;
85        Some((code.module(pc)?, offset))
86    }
87
88    /// Gets an iterator over all modules in the registry.
89    #[cfg(feature = "coredump")]
90    pub fn all_modules(&self) -> impl Iterator<Item = &'_ Module> + '_ {
91        self.loaded_code
92            .values()
93            .flat_map(|(_, code)| code.modules.values())
94            .chain(self.modules_without_code.iter())
95    }
96
97    /// Registers a new module with the registry.
98    pub fn register_module(&mut self, module: &Module) -> RegisteredModuleId {
99        self.register(module.code_object(), Some(module)).unwrap()
100    }
101
102    #[cfg(feature = "component-model")]
103    pub fn register_component(&mut self, component: &Component) {
104        self.register(component.code_object(), None);
105    }
106
107    /// Registers a new module with the registry.
108    fn register(
109        &mut self,
110        code: &Arc<CodeObject>,
111        module: Option<&Module>,
112    ) -> Option<RegisteredModuleId> {
113        let text = code.code_memory().text();
114
115        // If there's not actually any functions in this module then we may
116        // still need to preserve it for its data segments. Instances of this
117        // module will hold a pointer to the data stored in the module itself,
118        // and for schemes that perform lazy initialization which could use the
119        // module in the future. For that reason we continue to register empty
120        // modules and retain them.
121        if text.is_empty() {
122            return module.map(|module| {
123                let id = RegisteredModuleId::WithoutCode(self.modules_without_code.len());
124                self.modules_without_code.push(module.clone());
125                id
126            });
127        }
128
129        // The module code range is exclusive for end, so make it inclusive as
130        // it may be a valid PC value
131        let start_addr = text.as_ptr() as usize;
132        let end_addr = start_addr + text.len() - 1;
133        let id = module.map(|_| RegisteredModuleId::LoadedCode(start_addr));
134
135        // If this module is already present in the registry then that means
136        // it's either an overlapping image, for example for two modules
137        // found within a component, or it's a second instantiation of the same
138        // module. Delegate to `push_module` to find out.
139        if let Some((other_start, prev)) = self.loaded_code.get_mut(&end_addr) {
140            assert_eq!(*other_start, start_addr);
141            if let Some(module) = module {
142                prev.push_module(module);
143            }
144            return id;
145        }
146
147        // Assert that this module's code doesn't collide with any other
148        // registered modules
149        if let Some((_, (prev_start, _))) = self.loaded_code.range(start_addr..).next() {
150            assert!(*prev_start > end_addr);
151        }
152        if let Some((prev_end, _)) = self.loaded_code.range(..=start_addr).next_back() {
153            assert!(*prev_end < start_addr);
154        }
155
156        let mut item = LoadedCode {
157            _code: code.clone(),
158            modules: Default::default(),
159        };
160        if let Some(module) = module {
161            item.push_module(module);
162        }
163        let prev = self.loaded_code.insert(end_addr, (start_addr, item));
164        assert!(prev.is_none());
165        id
166    }
167
168    /// Fetches frame information about a program counter in a backtrace.
169    ///
170    /// Returns an object if this `pc` is known to some previously registered
171    /// module, or returns `None` if no information can be found. The first
172    /// boolean returned indicates whether the original module has unparsed
173    /// debug information due to the compiler's configuration. The second
174    /// boolean indicates whether the engine used to compile this module is
175    /// using environment variables to control debuginfo parsing.
176    pub(crate) fn lookup_frame_info(&self, pc: usize) -> Option<(FrameInfo, &Module)> {
177        let (module, offset) = self.module_and_offset(pc)?;
178        let info = FrameInfo::new(module.clone(), offset)?;
179        Some((info, module))
180    }
181
182    pub fn wasm_to_array_trampoline(
183        &self,
184        sig: VMSharedTypeIndex,
185    ) -> Option<NonNull<VMWasmCallFunction>> {
186        // TODO: We are doing a linear search over each module. This is fine for
187        // now because we typically have very few modules per store (almost
188        // always one, in fact). If this linear search ever becomes a
189        // bottleneck, we could avoid it by incrementally and lazily building a
190        // `VMSharedSignatureIndex` to `SignatureIndex` map.
191        //
192        // See also the comment in `ModuleInner::wasm_to_native_trampoline`.
193        for (_, code) in self.loaded_code.values() {
194            for module in code.modules.values() {
195                if let Some(trampoline) = module.wasm_to_array_trampoline(sig) {
196                    return Some(trampoline);
197                }
198            }
199        }
200        None
201    }
202}
203
204impl LoadedCode {
205    fn push_module(&mut self, module: &Module) {
206        let func = match module.compiled_module().finished_functions().next() {
207            Some((_, func)) => func,
208            // There are no compiled functions in this module so there's no
209            // need to push onto `self.modules` which is only used for frame
210            // information lookup for a trap which only symbolicates defined
211            // functions.
212            None => return,
213        };
214        let start = func.as_ptr() as usize;
215
216        match self.modules.entry(start) {
217            // This module is already present, and it should be the same as
218            // `module`.
219            Entry::Occupied(m) => {
220                debug_assert!(Arc::ptr_eq(&module.inner, &m.get().inner));
221            }
222            // This module was not already present, so now it's time to insert.
223            Entry::Vacant(v) => {
224                v.insert(module.clone());
225            }
226        }
227    }
228
229    fn module(&self, pc: usize) -> Option<&Module> {
230        // The `modules` map is keyed on the start address of the first
231        // function in the module, so find the first module whose start address
232        // is less than the `pc`. That may be the wrong module but lookup
233        // within the module should fail in that case.
234        let (_start, module) = self.modules.range(..=pc).next_back()?;
235        Some(module)
236    }
237}
238
239// This is the global code registry that stores information for all loaded code
240// objects that are currently in use by any `Store` in the current process.
241//
242// The purpose of this map is to be called from signal handlers to determine
243// whether a program counter is a wasm trap or not. Specifically macOS has
244// no contextual information about the thread available, hence the necessity
245// for global state rather than using thread local state.
246//
247// This is similar to `ModuleRegistry` except that it has less information and
248// supports removal. Any time anything is registered with a `ModuleRegistry`
249// it is also automatically registered with the singleton global module
250// registry. When a `ModuleRegistry` is destroyed then all of its entries
251// are removed from the global registry.
252fn global_code() -> &'static RwLock<GlobalRegistry> {
253    static GLOBAL_CODE: OnceLock<RwLock<GlobalRegistry>> = OnceLock::new();
254    GLOBAL_CODE.get_or_init(Default::default)
255}
256
257type GlobalRegistry = BTreeMap<usize, (usize, Arc<CodeMemory>)>;
258
259/// Find which registered region of code contains the given program counter, and
260/// what offset that PC is within that module's code.
261pub fn lookup_code(pc: usize) -> Option<(Arc<CodeMemory>, usize)> {
262    let all_modules = global_code().read();
263    let (_end, (start, module)) = all_modules.range(pc..).next()?;
264    let text_offset = pc.checked_sub(*start)?;
265    Some((module.clone(), text_offset))
266}
267
268/// Registers a new region of code.
269///
270/// Must not have been previously registered and must be `unregister`'d to
271/// prevent leaking memory.
272///
273/// This is required to enable traps to work correctly since the signal handler
274/// will lookup in the `GLOBAL_CODE` list to determine which a particular pc
275/// is a trap or not.
276pub fn register_code(code: &Arc<CodeMemory>) {
277    let text = code.text();
278    if text.is_empty() {
279        return;
280    }
281    let start = text.as_ptr() as usize;
282    let end = start + text.len() - 1;
283    let prev = global_code().write().insert(end, (start, code.clone()));
284    assert!(prev.is_none());
285}
286
287/// Unregisters a code mmap from the global map.
288///
289/// Must have been previously registered with `register`.
290pub fn unregister_code(code: &Arc<CodeMemory>) {
291    let text = code.text();
292    if text.is_empty() {
293        return;
294    }
295    let end = (text.as_ptr() as usize) + text.len() - 1;
296    let code = global_code().write().remove(&end);
297    assert!(code.is_some());
298}
299
300#[test]
301#[cfg_attr(miri, ignore)]
302fn test_frame_info() -> Result<(), anyhow::Error> {
303    use crate::*;
304
305    let mut store = Store::<()>::default();
306    let module = Module::new(
307        store.engine(),
308        r#"
309            (module
310                (func (export "add") (param $x i32) (param $y i32) (result i32) (i32.add (local.get $x) (local.get $y)))
311                (func (export "sub") (param $x i32) (param $y i32) (result i32) (i32.sub (local.get $x) (local.get $y)))
312                (func (export "mul") (param $x i32) (param $y i32) (result i32) (i32.mul (local.get $x) (local.get $y)))
313                (func (export "div_s") (param $x i32) (param $y i32) (result i32) (i32.div_s (local.get $x) (local.get $y)))
314                (func (export "div_u") (param $x i32) (param $y i32) (result i32) (i32.div_u (local.get $x) (local.get $y)))
315                (func (export "rem_s") (param $x i32) (param $y i32) (result i32) (i32.rem_s (local.get $x) (local.get $y)))
316                (func (export "rem_u") (param $x i32) (param $y i32) (result i32) (i32.rem_u (local.get $x) (local.get $y)))
317            )
318         "#,
319    )?;
320    // Create an instance to ensure the frame information is registered.
321    Instance::new(&mut store, &module, &[])?;
322
323    for (i, alloc) in module.compiled_module().finished_functions() {
324        let (start, end) = {
325            let ptr = alloc.as_ptr();
326            let len = alloc.len();
327            (ptr as usize, ptr as usize + len)
328        };
329        for pc in start..end {
330            let (frame, _) = store
331                .as_context()
332                .0
333                .modules()
334                .lookup_frame_info(pc)
335                .unwrap();
336            assert!(
337                frame.func_index() == i.as_u32(),
338                "lookup of {:#x} returned {}, expected {}",
339                pc,
340                frame.func_index(),
341                i.as_u32()
342            );
343        }
344    }
345    Ok(())
346}