Skip to main content

wasmtime/runtime/
profiling.rs

1#[cfg(feature = "component-model")]
2use crate::component::Component;
3use crate::runtime::vm::Backtrace;
4use crate::{AsContext, CallHook, Module};
5use crate::{Engine, prelude::*};
6use core::cmp::Ordering;
7use fxprof_processed_profile::debugid::DebugId;
8use fxprof_processed_profile::{
9    CategoryHandle, Frame, FrameFlags, FrameInfo, LibraryInfo, MarkerLocations, MarkerTiming,
10    Profile, ReferenceTimestamp, StaticSchemaMarker, StaticSchemaMarkerField, StringHandle, Symbol,
11    SymbolTable, Timestamp,
12};
13use std::ops::Range;
14use std::sync::Arc;
15use std::time::{Duration, Instant};
16use wasmtime_environ::{FuncKey, demangle_function_name_or_index};
17
18// TODO: collect more data
19// - On non-Windows, measure thread-local CPU usage between events with
20//   rustix::time::clock_gettime(ClockId::ThreadCPUTime)
21// - Report which wasm module, and maybe instance, each frame came from
22
23/// Collects basic profiling data for a single WebAssembly guest.
24///
25/// This profiler can't provide measurements that are as accurate or detailed
26/// as a platform-specific profiler, such as `perf` on Linux. On the other
27/// hand, this profiler works on every platform that Wasmtime supports. Also,
28/// as an embedder you can use this profiler selectively on individual guest
29/// instances rather than profiling the entire process.
30///
31/// To use this, you'll need to arrange to call [`GuestProfiler::sample`] at
32/// regular intervals while the guest is on the stack. The most straightforward
33/// way to do that is to call it from a callback registered with
34/// [`Store::epoch_deadline_callback()`](crate::Store::epoch_deadline_callback).
35///
36/// # Accuracy
37///
38/// The data collection granularity is limited by the mechanism you use to
39/// interrupt guest execution and collect a profiling sample.
40///
41/// If you use epoch interruption, then samples will only be collected at
42/// function entry points and loop headers. This introduces some bias to the
43/// results. In addition, samples will only be taken at times when WebAssembly
44/// functions are running, not during host-calls.
45///
46/// It is technically possible to use fuel interruption instead. That
47/// introduces worse bias since samples occur after a certain number of
48/// WebAssembly instructions, which can take different amounts of time.
49///
50/// You may instead be able to use platform-specific methods, such as
51/// `setitimer(ITIMER_VIRTUAL, ...)` on POSIX-compliant systems, to sample on
52/// a more accurate interval. The only current requirement is that the guest
53/// you wish to profile must be on the same stack where you call `sample`,
54/// and executing within the same thread. However, the `GuestProfiler::sample`
55/// method is not currently async-signal-safe, so doing this correctly is not
56/// easy.
57///
58/// # Security
59///
60/// Profiles produced using this profiler do not include any configuration
61/// details from the host, such as virtual memory addresses, or from any
62/// WebAssembly modules that you haven't specifically allowed. So for
63/// example, these profiles should be safe to share with untrusted users
64/// who have provided untrusted code that you are running in a multi-tenancy
65/// environment.
66///
67/// However, the profile does include byte offsets into the text section of
68/// the compiled module, revealing some information about the size of the code
69/// generated for each module. For user-provided modules, the user could get
70/// the same information by compiling the module for themself using a similar
71/// version of Wasmtime on the same target architecture, but for any module
72/// where they don't already have the WebAssembly module binary available this
73/// could theoretically lead to an undesirable information disclosure. So you
74/// should only include user-provided modules in profiles.
75#[derive(Debug)]
76pub struct GuestProfiler {
77    profile: Profile,
78    modules: Modules,
79    process: fxprof_processed_profile::ProcessHandle,
80    thread: fxprof_processed_profile::ThreadHandle,
81    start: Instant,
82    marker: CallMarker,
83}
84
85#[derive(Debug)]
86struct ProfiledModule {
87    module: Module,
88    fxprof_libhandle: fxprof_processed_profile::LibraryHandle,
89    text_range: Range<usize>,
90}
91
92type Modules = Vec<ProfiledModule>;
93
94impl GuestProfiler {
95    /// Begin profiling a new guest. When this function is called, the current
96    /// wall-clock time is recorded as the start time for the guest.
97    ///
98    /// The `module_name` parameter is recorded in the profile to help identify
99    /// where the profile came from.
100    ///
101    /// The `interval` parameter should match the rate at which you intend
102    /// to call `sample`. However, this is used as a hint and not required to
103    /// exactly match the real sample rate.
104    ///
105    /// Only modules which are present in the `modules` vector will appear in
106    /// stack traces in this profile. Any stack frames which were executing
107    /// host code or functions from other modules will be omitted. See the
108    /// "Security" section of the [`GuestProfiler`] documentation for guidance
109    /// on what modules should not be included in this list.
110    pub fn new(
111        engine: &Engine,
112        module_name: &str,
113        interval: Duration,
114        modules: impl IntoIterator<Item = (String, Module)>,
115    ) -> Result<Self> {
116        // Check that guest debugging is not enabled. The
117        // instrumentation would make profiling results unreliable,
118        // but more fundamentally, it means that code is cloned per
119        // instantiation (for breakpoint patching) so the logic below
120        // is incorrect.
121        if engine.tunables().debug_guest {
122            crate::bail!("Profiling cannot be performed when guest-debugging is enabled.");
123        }
124
125        let zero = ReferenceTimestamp::from_millis_since_unix_epoch(0.0);
126        let mut profile = Profile::new(module_name, zero, interval.into());
127
128        // Past this point, we just need to think about modules as we pull out
129        // the disparate module information from components.
130        let mut modules: Vec<_> = modules
131            .into_iter()
132            .filter_map(|(name, module)| {
133                assert!(Engine::same(module.engine(), engine));
134                let compiled = module.compiled_module();
135                let text_range = {
136                    // Assumption: within text, the code for a given module is packed linearly and
137                    // is non-overlapping; if this is violated, it should be safe but might result
138                    // in incorrect profiling results.
139                    let start = compiled.finished_function_ranges().next()?.1.start;
140                    let end = compiled.finished_function_ranges().last()?.1.end;
141
142                    let start = (module.engine_code().text_range().start + start).raw();
143                    let end = (module.engine_code().text_range().start + end).raw();
144                    start..end
145                };
146
147                module_symbols(name, &module).map(|lib| {
148                    let libhandle = profile.add_lib(lib);
149                    ProfiledModule {
150                        module,
151                        fxprof_libhandle: libhandle,
152                        text_range,
153                    }
154                })
155            })
156            .collect();
157
158        modules.sort_unstable_by_key(|m| m.text_range.start);
159
160        profile.set_reference_timestamp(std::time::SystemTime::now().into());
161        let process = profile.add_process(module_name, 0, Timestamp::from_nanos_since_reference(0));
162        let thread = profile.add_thread(process, 0, Timestamp::from_nanos_since_reference(0), true);
163        let start = Instant::now();
164        let marker = CallMarker::new(&mut profile);
165        Ok(Self {
166            profile,
167            modules,
168            process,
169            thread,
170            start,
171            marker,
172        })
173    }
174
175    /// Create a new profiler for the provided component
176    ///
177    /// See [`GuestProfiler::new`] for additional information; this function
178    /// works identically except that it takes a component and sets up
179    /// instrumentation to track calls in each of its constituent modules.
180    #[cfg(feature = "component-model")]
181    pub fn new_component(
182        engine: &Engine,
183        component_name: &str,
184        interval: Duration,
185        component: Component,
186        extra_modules: impl IntoIterator<Item = (String, Module)>,
187    ) -> Result<Self> {
188        let modules = component
189            .static_modules()
190            .map(|m| (m.name().unwrap_or("<unknown>").to_string(), m.clone()))
191            .chain(extra_modules);
192        Self::new(engine, component_name, interval, modules)
193    }
194
195    /// Add a sample to the profile. This function collects a backtrace from
196    /// any stack frames for allowed modules on the current stack. It should
197    /// typically be called from a callback registered using
198    /// [`Store::epoch_deadline_callback()`](crate::Store::epoch_deadline_callback).
199    ///
200    /// The `delta` parameter is the amount of CPU time that was used by this
201    /// guest since the previous sample. It is allowed to pass `Duration::ZERO`
202    /// here if recording CPU usage information is not needed.
203    pub fn sample(&mut self, store: impl AsContext, delta: Duration) {
204        let now = Timestamp::from_nanos_since_reference(
205            self.start.elapsed().as_nanos().try_into().unwrap(),
206        );
207        let backtrace = Backtrace::new(store.as_context().0);
208        let frames = lookup_frames(&self.modules, &backtrace);
209        let stack = self
210            .profile
211            .intern_stack_frames(self.thread, frames.into_iter());
212        self.profile
213            .add_sample(self.thread, now, stack, delta.into(), 1);
214    }
215
216    /// Add a marker for transitions between guest and host to the profile.
217    /// This function should typically be called from a callback registered
218    /// using [`Store::call_hook()`](crate::Store::call_hook), and the `kind`
219    /// parameter should be the value of the same type passed into that hook.
220    pub fn call_hook(&mut self, store: impl AsContext, kind: CallHook) {
221        let now = Timestamp::from_nanos_since_reference(
222            self.start.elapsed().as_nanos().try_into().unwrap(),
223        );
224        match kind {
225            CallHook::CallingWasm | CallHook::ReturningFromWasm => {}
226            CallHook::CallingHost => {
227                let backtrace = Backtrace::new(store.as_context().0);
228                let frames = lookup_frames(&self.modules, &backtrace);
229                let marker = self.profile.add_marker(
230                    self.thread,
231                    MarkerTiming::IntervalStart(now),
232                    self.marker,
233                );
234                let stack = self
235                    .profile
236                    .intern_stack_frames(self.thread, frames.into_iter());
237                self.profile.set_marker_stack(self.thread, marker, stack);
238            }
239            CallHook::ReturningFromHost => {
240                self.profile
241                    .add_marker(self.thread, MarkerTiming::IntervalEnd(now), self.marker);
242            }
243        }
244    }
245
246    /// When the guest finishes running, call this function to write the
247    /// profile to the given `output`. The output is a JSON-formatted object in
248    /// the [Firefox "processed profile format"][fmt]. Files in this format may
249    /// be visualized at <https://profiler.firefox.com/>.
250    ///
251    /// [fmt]: https://github.com/firefox-devtools/profiler/blob/main/docs-developer/processed-profile-format.md
252    pub fn finish(mut self, output: impl std::io::Write) -> Result<()> {
253        let now = Timestamp::from_nanos_since_reference(
254            self.start.elapsed().as_nanos().try_into().unwrap(),
255        );
256        self.profile.set_thread_end_time(self.thread, now);
257        self.profile.set_process_end_time(self.process, now);
258
259        serde_json::to_writer(output, &self.profile)?;
260        Ok(())
261    }
262}
263
264fn module_symbols(name: String, module: &Module) -> Option<LibraryInfo> {
265    let compiled = module.compiled_module();
266    let symbols = Vec::from_iter(
267        module
268            .env_module()
269            .defined_func_indices()
270            .map(|defined_idx| {
271                let key =
272                    FuncKey::DefinedWasmFunction(module.env_module().module_index, defined_idx);
273                let loc = compiled.func_loc(key);
274                let func_idx = compiled.module().func_index(defined_idx);
275                let mut name = String::new();
276                demangle_function_name_or_index(
277                    &mut name,
278                    compiled.func_name(func_idx),
279                    defined_idx.as_u32() as usize,
280                )
281                .unwrap();
282                Symbol {
283                    address: loc.start,
284                    size: Some(loc.length),
285                    name,
286                }
287            }),
288    );
289    if symbols.is_empty() {
290        return None;
291    }
292
293    Some(LibraryInfo {
294        name,
295        debug_name: String::new(),
296        path: String::new(),
297        debug_path: String::new(),
298        debug_id: DebugId::nil(),
299        code_id: None,
300        arch: None,
301        symbol_table: Some(Arc::new(SymbolTable::new(symbols))),
302    })
303}
304
305fn lookup_frames<'a>(
306    modules: &'a Modules,
307    backtrace: &'a Backtrace,
308) -> impl Iterator<Item = FrameInfo> + 'a {
309    backtrace
310        .frames()
311        // Samply needs to see the oldest frame first, but we list the newest
312        // first, so iterate in reverse.
313        .rev()
314        .filter_map(|frame| {
315            let idx = modules
316                .binary_search_by(|probe| {
317                    if probe.text_range.contains(&frame.pc()) {
318                        Ordering::Equal
319                    } else {
320                        probe.text_range.start.cmp(&frame.pc())
321                    }
322                })
323                .ok()?;
324            let module = modules.get(idx)?;
325
326            // We need to point to the modules full text (not just its functions) as
327            // the offset for the final phase; these can be different for component
328            // model modules.
329            let module_text_start = module.module.text().as_ptr_range().start as usize;
330            return Some(FrameInfo {
331                frame: Frame::RelativeAddressFromReturnAddress(
332                    module.fxprof_libhandle,
333                    u32::try_from(frame.pc() - module_text_start).unwrap(),
334                ),
335                category_pair: CategoryHandle::OTHER.into(),
336                flags: FrameFlags::empty(),
337            });
338        })
339}
340
341#[derive(Debug, Clone, Copy)]
342struct CallMarker {
343    name: StringHandle,
344}
345
346impl CallMarker {
347    fn new(profile: &mut Profile) -> Self {
348        let name = profile.intern_string(Self::UNIQUE_MARKER_TYPE_NAME);
349        Self { name }
350    }
351}
352
353impl StaticSchemaMarker for CallMarker {
354    const UNIQUE_MARKER_TYPE_NAME: &'static str = "hostcall";
355    const FIELDS: &'static [StaticSchemaMarkerField] = &[];
356    const LOCATIONS: MarkerLocations = MarkerLocations::MARKER_CHART
357        .union(MarkerLocations::MARKER_TABLE.union(MarkerLocations::TIMELINE_OVERVIEW));
358
359    fn name(&self, _profile: &mut Profile) -> StringHandle {
360        self.name
361    }
362    fn category(&self, _profile: &mut Profile) -> CategoryHandle {
363        CategoryHandle::OTHER
364    }
365    fn string_field_value(&self, _field_index: u32) -> StringHandle {
366        unreachable!("no fields")
367    }
368    fn number_field_value(&self, _field_index: u32) -> f64 {
369        unreachable!("no fields")
370    }
371}