wasmtime/runtime/module/
registry.rs

1//! Implements a registry of modules for a store.
2
3use crate::code::CodeObject;
4#[cfg(feature = "component-model")]
5use crate::component::Component;
6use crate::prelude::*;
7use crate::runtime::vm::VMWasmCallFunction;
8use crate::sync::{OnceLock, RwLock};
9use crate::{FrameInfo, Module, code_memory::CodeMemory};
10use alloc::collections::btree_map::{BTreeMap, Entry};
11use alloc::sync::Arc;
12use core::ptr::NonNull;
13use wasmtime_environ::VMSharedTypeIndex;
14
15/// Used for registering modules with a store.
16///
17/// Note that the primary reason for this registry is to ensure that everything
18/// in `Module` is kept alive for the duration of a `Store`. At this time we
19/// need "basically everything" within a `Module` to stay alive once it's
20/// instantiated within a store. While there's some smaller portions that could
21/// theoretically be omitted as they're not needed by the store they're
22/// currently small enough to not worry much about.
23#[derive(Default)]
24pub struct ModuleRegistry {
25    // Keyed by the end address of a `CodeObject`.
26    //
27    // The value here is the start address and the information about what's
28    // loaded at that address.
29    loaded_code: BTreeMap<usize, (usize, LoadedCode)>,
30
31    // Preserved for keeping data segments alive or similar
32    modules_without_code: Vec<Module>,
33}
34
35struct LoadedCode {
36    /// Kept alive here in the store to have a strong reference to keep the
37    /// relevant code mapped while the store is alive.
38    _code: Arc<CodeObject>,
39
40    /// Modules found within `self.code`, keyed by start address here of the
41    /// address of the first function in the module.
42    modules: BTreeMap<usize, Module>,
43
44    /// These modules have no functions inside of them but they can still be
45    /// used for trampoline lookup.
46    modules_with_only_trampolines: Vec<Module>,
47}
48
49/// An identifier of a module that has previously been inserted into a
50/// `ModuleRegistry`.
51#[derive(Clone, Copy, Debug)]
52pub enum RegisteredModuleId {
53    /// Index into `ModuleRegistry::modules_without_code`.
54    WithoutCode(usize),
55    /// Start address of the module's code so that we can get it again via
56    /// `ModuleRegistry::lookup_module`.
57    LoadedCode(usize),
58}
59
60impl ModuleRegistry {
61    /// Get a previously-registered module by id.
62    pub fn lookup_module_by_id(&self, id: RegisteredModuleId) -> Option<&Module> {
63        match id {
64            RegisteredModuleId::WithoutCode(idx) => self.modules_without_code.get(idx),
65            RegisteredModuleId::LoadedCode(pc) => {
66                let (module, _) = self.module_and_offset(pc)?;
67                Some(module)
68            }
69        }
70    }
71
72    /// Fetches a registered module given a program counter value.
73    #[cfg(feature = "gc")]
74    pub fn lookup_module_by_pc(&self, pc: usize) -> Option<&Module> {
75        let (module, _) = self.module_and_offset(pc)?;
76        Some(module)
77    }
78
79    fn code(&self, pc: usize) -> Option<(&LoadedCode, usize)> {
80        let (end, (start, code)) = self.loaded_code.range(pc..).next()?;
81        if pc < *start || *end < pc {
82            return None;
83        }
84        Some((code, pc - *start))
85    }
86
87    fn module_and_offset(&self, pc: usize) -> Option<(&Module, usize)> {
88        let (code, offset) = self.code(pc)?;
89        Some((code.module(pc)?, offset))
90    }
91
92    /// Gets an iterator over all modules in the registry.
93    #[cfg(feature = "coredump")]
94    pub fn all_modules(&self) -> impl Iterator<Item = &'_ Module> + '_ {
95        self.loaded_code
96            .values()
97            .flat_map(|(_, code)| code.modules.values())
98            .chain(self.modules_without_code.iter())
99    }
100
101    /// Registers a new module with the registry.
102    pub fn register_module(&mut self, module: &Module) -> RegisteredModuleId {
103        self.register(module.code_object(), Some(module)).unwrap()
104    }
105
106    #[cfg(feature = "component-model")]
107    pub fn register_component(&mut self, component: &Component) {
108        self.register(component.code_object(), None);
109    }
110
111    /// Registers a new module with the registry.
112    fn register(
113        &mut self,
114        code: &Arc<CodeObject>,
115        module: Option<&Module>,
116    ) -> Option<RegisteredModuleId> {
117        let text = code.code_memory().text();
118
119        // If there's not actually any functions in this module then we may
120        // still need to preserve it for its data segments. Instances of this
121        // module will hold a pointer to the data stored in the module itself,
122        // and for schemes that perform lazy initialization which could use the
123        // module in the future. For that reason we continue to register empty
124        // modules and retain them.
125        if text.is_empty() {
126            return module.map(|module| {
127                let id = RegisteredModuleId::WithoutCode(self.modules_without_code.len());
128                self.modules_without_code.push(module.clone());
129                id
130            });
131        }
132
133        // The module code range is exclusive for end, so make it inclusive as
134        // it may be a valid PC value
135        let start_addr = text.as_ptr() as usize;
136        let end_addr = start_addr + text.len() - 1;
137        let id = module.map(|_| RegisteredModuleId::LoadedCode(start_addr));
138
139        // If this module is already present in the registry then that means
140        // it's either an overlapping image, for example for two modules
141        // found within a component, or it's a second instantiation of the same
142        // module. Delegate to `push_module` to find out.
143        if let Some((other_start, prev)) = self.loaded_code.get_mut(&end_addr) {
144            assert_eq!(*other_start, start_addr);
145            if let Some(module) = module {
146                prev.push_module(module);
147            }
148            return id;
149        }
150
151        // Assert that this module's code doesn't collide with any other
152        // registered modules
153        if let Some((_, (prev_start, _))) = self.loaded_code.range(start_addr..).next() {
154            assert!(*prev_start > end_addr);
155        }
156        if let Some((prev_end, _)) = self.loaded_code.range(..=start_addr).next_back() {
157            assert!(*prev_end < start_addr);
158        }
159
160        let mut item = LoadedCode {
161            _code: code.clone(),
162            modules: Default::default(),
163            modules_with_only_trampolines: Vec::new(),
164        };
165        if let Some(module) = module {
166            item.push_module(module);
167        }
168        let prev = self.loaded_code.insert(end_addr, (start_addr, item));
169        assert!(prev.is_none());
170        id
171    }
172
173    /// Fetches frame information about a program counter in a backtrace.
174    ///
175    /// Returns an object if this `pc` is known to some previously registered
176    /// module, or returns `None` if no information can be found. The first
177    /// boolean returned indicates whether the original module has unparsed
178    /// debug information due to the compiler's configuration. The second
179    /// boolean indicates whether the engine used to compile this module is
180    /// using environment variables to control debuginfo parsing.
181    pub(crate) fn lookup_frame_info(&self, pc: usize) -> Option<(FrameInfo, &Module)> {
182        let (module, offset) = self.module_and_offset(pc)?;
183        let info = FrameInfo::new(module.clone(), offset)?;
184        Some((info, module))
185    }
186
187    pub fn wasm_to_array_trampoline(
188        &self,
189        sig: VMSharedTypeIndex,
190    ) -> Option<NonNull<VMWasmCallFunction>> {
191        // TODO: We are doing a linear search over each module. This is fine for
192        // now because we typically have very few modules per store (almost
193        // always one, in fact). If this linear search ever becomes a
194        // bottleneck, we could avoid it by incrementally and lazily building a
195        // `VMSharedSignatureIndex` to `SignatureIndex` map.
196        //
197        // See also the comment in `ModuleInner::wasm_to_native_trampoline`.
198        for (_, code) in self.loaded_code.values() {
199            for module in code
200                .modules
201                .values()
202                .chain(&code.modules_with_only_trampolines)
203            {
204                if let Some(trampoline) = module.wasm_to_array_trampoline(sig) {
205                    return Some(trampoline);
206                }
207            }
208        }
209        None
210    }
211}
212
213impl LoadedCode {
214    fn push_module(&mut self, module: &Module) {
215        let func = match module.compiled_module().finished_functions().next() {
216            Some((_, func)) => func,
217            // There are no compiled functions in this module so there's no
218            // need to push onto `self.modules` which is only used for frame
219            // information lookup for a trap which only symbolicates defined
220            // functions.
221            None => {
222                self.modules_with_only_trampolines.push(module.clone());
223                return;
224            }
225        };
226        let start = func.as_ptr() as usize;
227
228        match self.modules.entry(start) {
229            // This module is already present, and it should be the same as
230            // `module`.
231            Entry::Occupied(m) => {
232                debug_assert!(Arc::ptr_eq(&module.inner, &m.get().inner));
233            }
234            // This module was not already present, so now it's time to insert.
235            Entry::Vacant(v) => {
236                v.insert(module.clone());
237            }
238        }
239    }
240
241    fn module(&self, pc: usize) -> Option<&Module> {
242        // The `modules` map is keyed on the start address of the first
243        // function in the module, so find the first module whose start address
244        // is less than the `pc`. That may be the wrong module but lookup
245        // within the module should fail in that case.
246        let (_start, module) = self.modules.range(..=pc).next_back()?;
247        Some(module)
248    }
249}
250
251// This is the global code registry that stores information for all loaded code
252// objects that are currently in use by any `Store` in the current process.
253//
254// The purpose of this map is to be called from signal handlers to determine
255// whether a program counter is a wasm trap or not. Specifically macOS has
256// no contextual information about the thread available, hence the necessity
257// for global state rather than using thread local state.
258//
259// This is similar to `ModuleRegistry` except that it has less information and
260// supports removal. Any time anything is registered with a `ModuleRegistry`
261// it is also automatically registered with the singleton global module
262// registry. When a `ModuleRegistry` is destroyed then all of its entries
263// are removed from the global registry.
264fn global_code() -> &'static RwLock<GlobalRegistry> {
265    static GLOBAL_CODE: OnceLock<RwLock<GlobalRegistry>> = OnceLock::new();
266    GLOBAL_CODE.get_or_init(Default::default)
267}
268
269type GlobalRegistry = BTreeMap<usize, (usize, Arc<CodeMemory>)>;
270
271/// Find which registered region of code contains the given program counter, and
272/// what offset that PC is within that module's code.
273pub fn lookup_code(pc: usize) -> Option<(Arc<CodeMemory>, usize)> {
274    let all_modules = global_code().read();
275    let (_end, (start, module)) = all_modules.range(pc..).next()?;
276    let text_offset = pc.checked_sub(*start)?;
277    Some((module.clone(), text_offset))
278}
279
280/// Registers a new region of code.
281///
282/// Must not have been previously registered and must be `unregister`'d to
283/// prevent leaking memory.
284///
285/// This is required to enable traps to work correctly since the signal handler
286/// will lookup in the `GLOBAL_CODE` list to determine which a particular pc
287/// is a trap or not.
288pub fn register_code(code: &Arc<CodeMemory>) {
289    let text = code.text();
290    if text.is_empty() {
291        return;
292    }
293    let start = text.as_ptr() as usize;
294    let end = start + text.len() - 1;
295    let prev = global_code().write().insert(end, (start, code.clone()));
296    assert!(prev.is_none());
297}
298
299/// Unregisters a code mmap from the global map.
300///
301/// Must have been previously registered with `register`.
302pub fn unregister_code(code: &Arc<CodeMemory>) {
303    let text = code.text();
304    if text.is_empty() {
305        return;
306    }
307    let end = (text.as_ptr() as usize) + text.len() - 1;
308    let code = global_code().write().remove(&end);
309    assert!(code.is_some());
310}
311
312#[test]
313#[cfg_attr(miri, ignore)]
314fn test_frame_info() -> Result<(), anyhow::Error> {
315    use crate::*;
316
317    let mut store = Store::<()>::default();
318    let module = Module::new(
319        store.engine(),
320        r#"
321            (module
322                (func (export "add") (param $x i32) (param $y i32) (result i32) (i32.add (local.get $x) (local.get $y)))
323                (func (export "sub") (param $x i32) (param $y i32) (result i32) (i32.sub (local.get $x) (local.get $y)))
324                (func (export "mul") (param $x i32) (param $y i32) (result i32) (i32.mul (local.get $x) (local.get $y)))
325                (func (export "div_s") (param $x i32) (param $y i32) (result i32) (i32.div_s (local.get $x) (local.get $y)))
326                (func (export "div_u") (param $x i32) (param $y i32) (result i32) (i32.div_u (local.get $x) (local.get $y)))
327                (func (export "rem_s") (param $x i32) (param $y i32) (result i32) (i32.rem_s (local.get $x) (local.get $y)))
328                (func (export "rem_u") (param $x i32) (param $y i32) (result i32) (i32.rem_u (local.get $x) (local.get $y)))
329            )
330         "#,
331    )?;
332    // Create an instance to ensure the frame information is registered.
333    Instance::new(&mut store, &module, &[])?;
334
335    for (i, alloc) in module.compiled_module().finished_functions() {
336        let (start, end) = {
337            let ptr = alloc.as_ptr();
338            let len = alloc.len();
339            (ptr as usize, ptr as usize + len)
340        };
341        for pc in start..end {
342            let (frame, _) = store
343                .as_context()
344                .0
345                .modules()
346                .lookup_frame_info(pc)
347                .unwrap();
348            assert!(
349                frame.func_index() == i.as_u32(),
350                "lookup of {:#x} returned {}, expected {}",
351                pc,
352                frame.func_index(),
353                i.as_u32()
354            );
355        }
356    }
357    Ok(())
358}