wasmtime/runtime/
profiling.rs

1#[cfg(feature = "component-model")]
2use crate::component::Component;
3use crate::instantiate::CompiledModule;
4use crate::prelude::*;
5use crate::runtime::vm::Backtrace;
6use crate::{AsContext, CallHook, Module};
7use core::cmp::Ordering;
8use fxprof_processed_profile::debugid::DebugId;
9use fxprof_processed_profile::{
10    CategoryHandle, Frame, FrameFlags, FrameInfo, LibraryInfo, MarkerLocations, MarkerTiming,
11    Profile, ReferenceTimestamp, StaticSchemaMarker, StaticSchemaMarkerField, StringHandle, Symbol,
12    SymbolTable, Timestamp,
13};
14use std::ops::Range;
15use std::sync::Arc;
16use std::time::{Duration, Instant};
17use wasmtime_environ::demangle_function_name_or_index;
18
19// TODO: collect more data
20// - On non-Windows, measure thread-local CPU usage between events with
21//   rustix::time::clock_gettime(ClockId::ThreadCPUTime)
22// - Report which wasm module, and maybe instance, each frame came from
23
24/// Collects basic profiling data for a single WebAssembly guest.
25///
26/// This profiler can't provide measurements that are as accurate or detailed
27/// as a platform-specific profiler, such as `perf` on Linux. On the other
28/// hand, this profiler works on every platform that Wasmtime supports. Also,
29/// as an embedder you can use this profiler selectively on individual guest
30/// instances rather than profiling the entire process.
31///
32/// To use this, you'll need to arrange to call [`GuestProfiler::sample`] at
33/// regular intervals while the guest is on the stack. The most straightforward
34/// way to do that is to call it from a callback registered with
35/// [`Store::epoch_deadline_callback()`](crate::Store::epoch_deadline_callback).
36///
37/// # Accuracy
38///
39/// The data collection granularity is limited by the mechanism you use to
40/// interrupt guest execution and collect a profiling sample.
41///
42/// If you use epoch interruption, then samples will only be collected at
43/// function entry points and loop headers. This introduces some bias to the
44/// results. In addition, samples will only be taken at times when WebAssembly
45/// functions are running, not during host-calls.
46///
47/// It is technically possible to use fuel interruption instead. That
48/// introduces worse bias since samples occur after a certain number of
49/// WebAssembly instructions, which can take different amounts of time.
50///
51/// You may instead be able to use platform-specific methods, such as
52/// `setitimer(ITIMER_VIRTUAL, ...)` on POSIX-compliant systems, to sample on
53/// a more accurate interval. The only current requirement is that the guest
54/// you wish to profile must be on the same stack where you call `sample`,
55/// and executing within the same thread. However, the `GuestProfiler::sample`
56/// method is not currently async-signal-safe, so doing this correctly is not
57/// easy.
58///
59/// # Security
60///
61/// Profiles produced using this profiler do not include any configuration
62/// details from the host, such as virtual memory addresses, or from any
63/// WebAssembly modules that you haven't specifically allowed. So for
64/// example, these profiles should be safe to share with untrusted users
65/// who have provided untrusted code that you are running in a multi-tenancy
66/// environment.
67///
68/// However, the profile does include byte offsets into the text section of
69/// the compiled module, revealing some information about the size of the code
70/// generated for each module. For user-provided modules, the user could get
71/// the same information by compiling the module for themself using a similar
72/// version of Wasmtime on the same target architecture, but for any module
73/// where they don't already have the WebAssembly module binary available this
74/// could theoretically lead to an undesirable information disclosure. So you
75/// should only include user-provided modules in profiles.
76#[derive(Debug)]
77pub struct GuestProfiler {
78    profile: Profile,
79    modules: Modules,
80    process: fxprof_processed_profile::ProcessHandle,
81    thread: fxprof_processed_profile::ThreadHandle,
82    start: Instant,
83    marker: CallMarker,
84}
85
86#[derive(Debug)]
87struct ProfiledModule {
88    module: Module,
89    fxprof_libhandle: fxprof_processed_profile::LibraryHandle,
90    text_range: Range<usize>,
91}
92
93type Modules = Vec<ProfiledModule>;
94
95impl GuestProfiler {
96    /// Begin profiling a new guest. When this function is called, the current
97    /// wall-clock time is recorded as the start time for the guest.
98    ///
99    /// The `module_name` parameter is recorded in the profile to help identify
100    /// where the profile came from.
101    ///
102    /// The `interval` parameter should match the rate at which you intend
103    /// to call `sample`. However, this is used as a hint and not required to
104    /// exactly match the real sample rate.
105    ///
106    /// Only modules which are present in the `modules` vector will appear in
107    /// stack traces in this profile. Any stack frames which were executing
108    /// host code or functions from other modules will be omitted. See the
109    /// "Security" section of the [`GuestProfiler`] documentation for guidance
110    /// on what modules should not be included in this list.
111    pub fn new(
112        module_name: &str,
113        interval: Duration,
114        modules: impl IntoIterator<Item = (String, Module)>,
115    ) -> Self {
116        let zero = ReferenceTimestamp::from_millis_since_unix_epoch(0.0);
117        let mut profile = Profile::new(module_name, zero, interval.into());
118
119        // Past this point, we just need to think about modules as we pull out
120        // the disparate module information from components.
121        let mut modules: Vec<_> = modules
122            .into_iter()
123            .filter_map(|(name, module)| {
124                let compiled = module.compiled_module();
125                let text_range = {
126                    // Assumption: within text, the code for a given module is packed linearly and
127                    // is non-overlapping; if this is violated, it should be safe but might result
128                    // in incorrect profiling results.
129                    let start =
130                        compiled.finished_functions().next()?.1.as_ptr_range().start as usize;
131                    let end = compiled.finished_functions().last()?.1.as_ptr_range().end as usize;
132                    start..end
133                };
134
135                module_symbols(name, compiled).map(|lib| {
136                    let libhandle = profile.add_lib(lib);
137                    ProfiledModule {
138                        module,
139                        fxprof_libhandle: libhandle,
140                        text_range,
141                    }
142                })
143            })
144            .collect();
145
146        modules.sort_unstable_by_key(|m| m.text_range.start);
147
148        profile.set_reference_timestamp(std::time::SystemTime::now().into());
149        let process = profile.add_process(module_name, 0, Timestamp::from_nanos_since_reference(0));
150        let thread = profile.add_thread(process, 0, Timestamp::from_nanos_since_reference(0), true);
151        let start = Instant::now();
152        let marker = CallMarker::new(&mut profile);
153        Self {
154            profile,
155            modules,
156            process,
157            thread,
158            start,
159            marker,
160        }
161    }
162
163    /// Create a new profiler for the provided component
164    ///
165    /// See [`GuestProfiler::new`] for additional information; this function
166    /// works identically except that it takes a component and sets up
167    /// instrumentation to track calls in each of its constituent modules.
168    #[cfg(feature = "component-model")]
169    pub fn new_component(
170        component_name: &str,
171        interval: Duration,
172        component: Component,
173        extra_modules: impl IntoIterator<Item = (String, Module)>,
174    ) -> Self {
175        let modules = component
176            .static_modules()
177            .map(|m| (m.name().unwrap_or("<unknown>").to_string(), m.clone()))
178            .chain(extra_modules);
179        Self::new(component_name, interval, modules)
180    }
181
182    /// Add a sample to the profile. This function collects a backtrace from
183    /// any stack frames for allowed modules on the current stack. It should
184    /// typically be called from a callback registered using
185    /// [`Store::epoch_deadline_callback()`](crate::Store::epoch_deadline_callback).
186    ///
187    /// The `delta` parameter is the amount of CPU time that was used by this
188    /// guest since the previous sample. It is allowed to pass `Duration::ZERO`
189    /// here if recording CPU usage information is not needed.
190    pub fn sample(&mut self, store: impl AsContext, delta: Duration) {
191        let now = Timestamp::from_nanos_since_reference(
192            self.start.elapsed().as_nanos().try_into().unwrap(),
193        );
194        let backtrace = Backtrace::new(store.as_context().0);
195        let frames = lookup_frames(&self.modules, &backtrace);
196        let stack = self
197            .profile
198            .intern_stack_frames(self.thread, frames.into_iter());
199        self.profile
200            .add_sample(self.thread, now, stack, delta.into(), 1);
201    }
202
203    /// Add a marker for transitions between guest and host to the profile.
204    /// This function should typically be called from a callback registered
205    /// using [`Store::call_hook()`](crate::Store::call_hook), and the `kind`
206    /// parameter should be the value of the same type passed into that hook.
207    pub fn call_hook(&mut self, store: impl AsContext, kind: CallHook) {
208        let now = Timestamp::from_nanos_since_reference(
209            self.start.elapsed().as_nanos().try_into().unwrap(),
210        );
211        match kind {
212            CallHook::CallingWasm | CallHook::ReturningFromWasm => {}
213            CallHook::CallingHost => {
214                let backtrace = Backtrace::new(store.as_context().0);
215                let frames = lookup_frames(&self.modules, &backtrace);
216                let marker = self.profile.add_marker(
217                    self.thread,
218                    MarkerTiming::IntervalStart(now),
219                    self.marker,
220                );
221                let stack = self
222                    .profile
223                    .intern_stack_frames(self.thread, frames.into_iter());
224                self.profile.set_marker_stack(self.thread, marker, stack);
225            }
226            CallHook::ReturningFromHost => {
227                self.profile
228                    .add_marker(self.thread, MarkerTiming::IntervalEnd(now), self.marker);
229            }
230        }
231    }
232
233    /// When the guest finishes running, call this function to write the
234    /// profile to the given `output`. The output is a JSON-formatted object in
235    /// the [Firefox "processed profile format"][fmt]. Files in this format may
236    /// be visualized at <https://profiler.firefox.com/>.
237    ///
238    /// [fmt]: https://github.com/firefox-devtools/profiler/blob/main/docs-developer/processed-profile-format.md
239    pub fn finish(mut self, output: impl std::io::Write) -> Result<()> {
240        let now = Timestamp::from_nanos_since_reference(
241            self.start.elapsed().as_nanos().try_into().unwrap(),
242        );
243        self.profile.set_thread_end_time(self.thread, now);
244        self.profile.set_process_end_time(self.process, now);
245
246        serde_json::to_writer(output, &self.profile)?;
247        Ok(())
248    }
249}
250
251fn module_symbols(name: String, compiled: &CompiledModule) -> Option<LibraryInfo> {
252    let symbols = Vec::from_iter(compiled.finished_functions().map(|(defined_idx, _)| {
253        let loc = compiled.func_loc(defined_idx);
254        let func_idx = compiled.module().func_index(defined_idx);
255        let mut name = String::new();
256        demangle_function_name_or_index(
257            &mut name,
258            compiled.func_name(func_idx),
259            defined_idx.as_u32() as usize,
260        )
261        .unwrap();
262        Symbol {
263            address: loc.start,
264            size: Some(loc.length),
265            name,
266        }
267    }));
268    if symbols.is_empty() {
269        return None;
270    }
271
272    Some(LibraryInfo {
273        name,
274        debug_name: String::new(),
275        path: String::new(),
276        debug_path: String::new(),
277        debug_id: DebugId::nil(),
278        code_id: None,
279        arch: None,
280        symbol_table: Some(Arc::new(SymbolTable::new(symbols))),
281    })
282}
283
284fn lookup_frames<'a>(
285    modules: &'a Modules,
286    backtrace: &'a Backtrace,
287) -> impl Iterator<Item = FrameInfo> + 'a {
288    backtrace
289        .frames()
290        // Samply needs to see the oldest frame first, but we list the newest
291        // first, so iterate in reverse.
292        .rev()
293        .filter_map(|frame| {
294            let idx = modules
295                .binary_search_by(|probe| {
296                    if probe.text_range.contains(&frame.pc()) {
297                        Ordering::Equal
298                    } else {
299                        probe.text_range.start.cmp(&frame.pc())
300                    }
301                })
302                .ok()?;
303            let module = modules.get(idx)?;
304
305            // We need to point to the modules full text (not just its functions) as
306            // the offset for the final phase; these can be different for component
307            // model modules.
308            let module_text_start = module.module.text().as_ptr_range().start as usize;
309            return Some(FrameInfo {
310                frame: Frame::RelativeAddressFromReturnAddress(
311                    module.fxprof_libhandle,
312                    u32::try_from(frame.pc() - module_text_start).unwrap(),
313                ),
314                category_pair: CategoryHandle::OTHER.into(),
315                flags: FrameFlags::empty(),
316            });
317        })
318}
319
320#[derive(Debug, Clone, Copy)]
321struct CallMarker {
322    name: StringHandle,
323}
324
325impl CallMarker {
326    fn new(profile: &mut Profile) -> Self {
327        let name = profile.intern_string(Self::UNIQUE_MARKER_TYPE_NAME);
328        Self { name }
329    }
330}
331
332impl StaticSchemaMarker for CallMarker {
333    const UNIQUE_MARKER_TYPE_NAME: &'static str = "hostcall";
334    const FIELDS: &'static [StaticSchemaMarkerField] = &[];
335    const LOCATIONS: MarkerLocations = MarkerLocations::MARKER_CHART
336        .union(MarkerLocations::MARKER_TABLE.union(MarkerLocations::TIMELINE_OVERVIEW));
337
338    fn name(&self, _profile: &mut Profile) -> StringHandle {
339        self.name
340    }
341    fn category(&self, _profile: &mut Profile) -> CategoryHandle {
342        CategoryHandle::OTHER
343    }
344    fn string_field_value(&self, _field_index: u32) -> StringHandle {
345        unreachable!("no fields")
346    }
347    fn number_field_value(&self, _field_index: u32) -> f64 {
348        unreachable!("no fields")
349    }
350}