wasmtime/runtime/
profiling.rs

1#[cfg(feature = "component-model")]
2use crate::component::Component;
3use crate::instantiate::CompiledModule;
4use crate::prelude::*;
5use crate::runtime::vm::Backtrace;
6use crate::{AsContext, CallHook, Module};
7use core::cmp::Ordering;
8use fxprof_processed_profile::debugid::DebugId;
9use fxprof_processed_profile::{
10    CategoryHandle, Frame, FrameFlags, FrameInfo, LibraryInfo, MarkerLocation, MarkerSchema,
11    MarkerTiming, Profile, ProfilerMarker, ReferenceTimestamp, Symbol, SymbolTable, Timestamp,
12};
13use std::ops::Range;
14use std::sync::Arc;
15use std::time::{Duration, Instant};
16use wasmtime_environ::demangle_function_name_or_index;
17
18// TODO: collect more data
19// - On non-Windows, measure thread-local CPU usage between events with
20//   rustix::time::clock_gettime(ClockId::ThreadCPUTime)
21// - Report which wasm module, and maybe instance, each frame came from
22
23/// Collects basic profiling data for a single WebAssembly guest.
24///
25/// This profiler can't provide measurements that are as accurate or detailed
26/// as a platform-specific profiler, such as `perf` on Linux. On the other
27/// hand, this profiler works on every platform that Wasmtime supports. Also,
28/// as an embedder you can use this profiler selectively on individual guest
29/// instances rather than profiling the entire process.
30///
31/// To use this, you'll need to arrange to call [`GuestProfiler::sample`] at
32/// regular intervals while the guest is on the stack. The most straightforward
33/// way to do that is to call it from a callback registered with
34/// [`Store::epoch_deadline_callback()`](crate::Store::epoch_deadline_callback).
35///
36/// # Accuracy
37///
38/// The data collection granularity is limited by the mechanism you use to
39/// interrupt guest execution and collect a profiling sample.
40///
41/// If you use epoch interruption, then samples will only be collected at
42/// function entry points and loop headers. This introduces some bias to the
43/// results. In addition, samples will only be taken at times when WebAssembly
44/// functions are running, not during host-calls.
45///
46/// It is technically possible to use fuel interruption instead. That
47/// introduces worse bias since samples occur after a certain number of
48/// WebAssembly instructions, which can take different amounts of time.
49///
50/// You may instead be able to use platform-specific methods, such as
51/// `setitimer(ITIMER_VIRTUAL, ...)` on POSIX-compliant systems, to sample on
52/// a more accurate interval. The only current requirement is that the guest
53/// you wish to profile must be on the same stack where you call `sample`,
54/// and executing within the same thread. However, the `GuestProfiler::sample`
55/// method is not currently async-signal-safe, so doing this correctly is not
56/// easy.
57///
58/// # Security
59///
60/// Profiles produced using this profiler do not include any configuration
61/// details from the host, such as virtual memory addresses, or from any
62/// WebAssembly modules that you haven't specifically allowed. So for
63/// example, these profiles should be safe to share with untrusted users
64/// who have provided untrusted code that you are running in a multi-tenancy
65/// environment.
66///
67/// However, the profile does include byte offsets into the text section of
68/// the compiled module, revealing some information about the size of the code
69/// generated for each module. For user-provided modules, the user could get
70/// the same information by compiling the module for themself using a similar
71/// version of Wasmtime on the same target architecture, but for any module
72/// where they don't already have the WebAssembly module binary available this
73/// could theoretically lead to an undesirable information disclosure. So you
74/// should only include user-provided modules in profiles.
75#[derive(Debug)]
76pub struct GuestProfiler {
77    profile: Profile,
78    modules: Modules,
79    process: fxprof_processed_profile::ProcessHandle,
80    thread: fxprof_processed_profile::ThreadHandle,
81    start: Instant,
82}
83
84#[derive(Debug)]
85struct ProfiledModule {
86    module: Module,
87    fxprof_libhandle: fxprof_processed_profile::LibraryHandle,
88    text_range: Range<usize>,
89}
90
91type Modules = Vec<ProfiledModule>;
92
93impl GuestProfiler {
94    /// Begin profiling a new guest. When this function is called, the current
95    /// wall-clock time is recorded as the start time for the guest.
96    ///
97    /// The `module_name` parameter is recorded in the profile to help identify
98    /// where the profile came from.
99    ///
100    /// The `interval` parameter should match the rate at which you intend
101    /// to call `sample`. However, this is used as a hint and not required to
102    /// exactly match the real sample rate.
103    ///
104    /// Only modules which are present in the `modules` vector will appear in
105    /// stack traces in this profile. Any stack frames which were executing
106    /// host code or functions from other modules will be omitted. See the
107    /// "Security" section of the [`GuestProfiler`] documentation for guidance
108    /// on what modules should not be included in this list.
109    pub fn new(
110        module_name: &str,
111        interval: Duration,
112        modules: impl IntoIterator<Item = (String, Module)>,
113    ) -> Self {
114        let zero = ReferenceTimestamp::from_millis_since_unix_epoch(0.0);
115        let mut profile = Profile::new(module_name, zero, interval.into());
116
117        // Past this point, we just need to think about modules as we pull out
118        // the disparate module information from components.
119        let mut modules: Vec<_> = modules
120            .into_iter()
121            .filter_map(|(name, module)| {
122                let compiled = module.compiled_module();
123                let text_range = {
124                    // Assumption: within text, the code for a given module is packed linearly and
125                    // is non-overlapping; if this is violated, it should be safe but might result
126                    // in incorrect profiling results.
127                    let start =
128                        compiled.finished_functions().next()?.1.as_ptr_range().start as usize;
129                    let end = compiled.finished_functions().last()?.1.as_ptr_range().end as usize;
130                    start..end
131                };
132
133                module_symbols(name, compiled).map(|lib| {
134                    let libhandle = profile.add_lib(lib);
135                    ProfiledModule {
136                        module,
137                        fxprof_libhandle: libhandle,
138                        text_range,
139                    }
140                })
141            })
142            .collect();
143
144        modules.sort_unstable_by_key(|m| m.text_range.start);
145
146        profile.set_reference_timestamp(std::time::SystemTime::now().into());
147        let process = profile.add_process(module_name, 0, Timestamp::from_nanos_since_reference(0));
148        let thread = profile.add_thread(process, 0, Timestamp::from_nanos_since_reference(0), true);
149        let start = Instant::now();
150        Self {
151            profile,
152            modules,
153            process,
154            thread,
155            start,
156        }
157    }
158
159    /// Create a new profiler for the provided component
160    ///
161    /// See [`GuestProfiler::new`] for additional information; this function
162    /// works identically except that it takes a component and sets up
163    /// instrumentation to track calls in each of its constituent modules.
164    #[cfg(feature = "component-model")]
165    pub fn new_component(
166        component_name: &str,
167        interval: Duration,
168        component: Component,
169        extra_modules: impl IntoIterator<Item = (String, Module)>,
170    ) -> Self {
171        let modules = component
172            .static_modules()
173            .into_iter()
174            .map(|m| (m.name().unwrap_or("<unknown>").to_string(), m.clone()))
175            .chain(extra_modules);
176        Self::new(component_name, interval, modules)
177    }
178
179    /// Add a sample to the profile. This function collects a backtrace from
180    /// any stack frames for allowed modules on the current stack. It should
181    /// typically be called from a callback registered using
182    /// [`Store::epoch_deadline_callback()`](crate::Store::epoch_deadline_callback).
183    ///
184    /// The `delta` parameter is the amount of CPU time that was used by this
185    /// guest since the previous sample. It is allowed to pass `Duration::ZERO`
186    /// here if recording CPU usage information is not needed.
187    pub fn sample(&mut self, store: impl AsContext, delta: Duration) {
188        let now = Timestamp::from_nanos_since_reference(
189            self.start.elapsed().as_nanos().try_into().unwrap(),
190        );
191        let backtrace = Backtrace::new(store.as_context().0);
192        let frames = lookup_frames(&self.modules, &backtrace);
193        self.profile
194            .add_sample(self.thread, now, frames, delta.into(), 1);
195    }
196
197    /// Add a marker for transitions between guest and host to the profile.
198    /// This function should typically be called from a callback registered
199    /// using [`Store::call_hook()`](crate::Store::call_hook), and the `kind`
200    /// parameter should be the value of the same type passed into that hook.
201    pub fn call_hook(&mut self, store: impl AsContext, kind: CallHook) {
202        let now = Timestamp::from_nanos_since_reference(
203            self.start.elapsed().as_nanos().try_into().unwrap(),
204        );
205        match kind {
206            CallHook::CallingWasm | CallHook::ReturningFromWasm => {}
207            CallHook::CallingHost => {
208                let backtrace = Backtrace::new(store.as_context().0);
209                let frames = lookup_frames(&self.modules, &backtrace);
210                self.profile.add_marker_with_stack(
211                    self.thread,
212                    "hostcall",
213                    CallMarker,
214                    MarkerTiming::IntervalStart(now),
215                    frames,
216                );
217            }
218            CallHook::ReturningFromHost => {
219                self.profile.add_marker(
220                    self.thread,
221                    "hostcall",
222                    CallMarker,
223                    MarkerTiming::IntervalEnd(now),
224                );
225            }
226        }
227    }
228
229    /// When the guest finishes running, call this function to write the
230    /// profile to the given `output`. The output is a JSON-formatted object in
231    /// the [Firefox "processed profile format"][fmt]. Files in this format may
232    /// be visualized at <https://profiler.firefox.com/>.
233    ///
234    /// [fmt]: https://github.com/firefox-devtools/profiler/blob/main/docs-developer/processed-profile-format.md
235    pub fn finish(mut self, output: impl std::io::Write) -> Result<()> {
236        let now = Timestamp::from_nanos_since_reference(
237            self.start.elapsed().as_nanos().try_into().unwrap(),
238        );
239        self.profile.set_thread_end_time(self.thread, now);
240        self.profile.set_process_end_time(self.process, now);
241
242        serde_json::to_writer(output, &self.profile)?;
243        Ok(())
244    }
245}
246
247fn module_symbols(name: String, compiled: &CompiledModule) -> Option<LibraryInfo> {
248    let symbols = Vec::from_iter(compiled.finished_functions().map(|(defined_idx, _)| {
249        let loc = compiled.func_loc(defined_idx);
250        let func_idx = compiled.module().func_index(defined_idx);
251        let mut name = String::new();
252        demangle_function_name_or_index(
253            &mut name,
254            compiled.func_name(func_idx),
255            defined_idx.as_u32() as usize,
256        )
257        .unwrap();
258        Symbol {
259            address: loc.start,
260            size: Some(loc.length),
261            name,
262        }
263    }));
264    if symbols.is_empty() {
265        return None;
266    }
267
268    Some(LibraryInfo {
269        name,
270        debug_name: String::new(),
271        path: String::new(),
272        debug_path: String::new(),
273        debug_id: DebugId::nil(),
274        code_id: None,
275        arch: None,
276        symbol_table: Some(Arc::new(SymbolTable::new(symbols))),
277    })
278}
279
280fn lookup_frames<'a>(
281    modules: &'a Modules,
282    backtrace: &'a Backtrace,
283) -> impl Iterator<Item = FrameInfo> + 'a {
284    backtrace
285        .frames()
286        // Samply needs to see the oldest frame first, but we list the newest
287        // first, so iterate in reverse.
288        .rev()
289        .filter_map(|frame| {
290            let idx = modules
291                .binary_search_by(|probe| {
292                    if probe.text_range.contains(&frame.pc()) {
293                        Ordering::Equal
294                    } else {
295                        probe.text_range.start.cmp(&frame.pc())
296                    }
297                })
298                .ok()?;
299            let module = modules.get(idx)?;
300
301            // We need to point to the modules full text (not just its functions) as
302            // the offset for the final phase; these can be different for component
303            // model modules.
304            let module_text_start = module.module.text().as_ptr_range().start as usize;
305            return Some(FrameInfo {
306                frame: Frame::RelativeAddressFromReturnAddress(
307                    module.fxprof_libhandle,
308                    u32::try_from(frame.pc() - module_text_start).unwrap(),
309                ),
310                category_pair: CategoryHandle::OTHER.into(),
311                flags: FrameFlags::empty(),
312            });
313        })
314}
315
316struct CallMarker;
317
318impl ProfilerMarker for CallMarker {
319    const MARKER_TYPE_NAME: &'static str = "hostcall";
320
321    fn schema() -> MarkerSchema {
322        MarkerSchema {
323            type_name: Self::MARKER_TYPE_NAME,
324            locations: vec![
325                MarkerLocation::MarkerChart,
326                MarkerLocation::MarkerTable,
327                MarkerLocation::TimelineOverview,
328            ],
329            chart_label: None,
330            tooltip_label: None,
331            table_label: None,
332            fields: vec![],
333        }
334    }
335
336    fn json_marker_data(&self) -> serde_json::Value {
337        serde_json::json!({ "type": Self::MARKER_TYPE_NAME })
338    }
339}