wasmtime/runtime/profiling.rs
1use crate::instantiate::CompiledModule;
2use crate::prelude::*;
3use crate::runtime::vm::Backtrace;
4use crate::{AsContext, CallHook, Module};
5use fxprof_processed_profile::debugid::DebugId;
6use fxprof_processed_profile::{
7 CategoryHandle, Frame, FrameFlags, FrameInfo, LibraryInfo, MarkerLocation, MarkerSchema,
8 MarkerTiming, Profile, ProfilerMarker, ReferenceTimestamp, Symbol, SymbolTable, Timestamp,
9};
10use std::ops::Range;
11use std::sync::Arc;
12use std::time::{Duration, Instant};
13use wasmtime_environ::demangle_function_name_or_index;
14
15// TODO: collect more data
16// - On non-Windows, measure thread-local CPU usage between events with
17// rustix::time::clock_gettime(ClockId::ThreadCPUTime)
18// - Report which wasm module, and maybe instance, each frame came from
19
20/// Collects basic profiling data for a single WebAssembly guest.
21///
22/// This profiler can't provide measurements that are as accurate or detailed
23/// as a platform-specific profiler, such as `perf` on Linux. On the other
24/// hand, this profiler works on every platform that Wasmtime supports. Also,
25/// as an embedder you can use this profiler selectively on individual guest
26/// instances rather than profiling the entire process.
27///
28/// To use this, you'll need to arrange to call [`GuestProfiler::sample`] at
29/// regular intervals while the guest is on the stack. The most straightforward
30/// way to do that is to call it from a callback registered with
31/// [`Store::epoch_deadline_callback()`](crate::Store::epoch_deadline_callback).
32///
33/// # Accuracy
34///
35/// The data collection granularity is limited by the mechanism you use to
36/// interrupt guest execution and collect a profiling sample.
37///
38/// If you use epoch interruption, then samples will only be collected at
39/// function entry points and loop headers. This introduces some bias to the
40/// results. In addition, samples will only be taken at times when WebAssembly
41/// functions are running, not during host-calls.
42///
43/// It is technically possible to use fuel interruption instead. That
44/// introduces worse bias since samples occur after a certain number of
45/// WebAssembly instructions, which can take different amounts of time.
46///
47/// You may instead be able to use platform-specific methods, such as
48/// `setitimer(ITIMER_VIRTUAL, ...)` on POSIX-compliant systems, to sample on
49/// a more accurate interval. The only current requirement is that the guest
50/// you wish to profile must be on the same stack where you call `sample`,
51/// and executing within the same thread. However, the `GuestProfiler::sample`
52/// method is not currently async-signal-safe, so doing this correctly is not
53/// easy.
54///
55/// # Security
56///
57/// Profiles produced using this profiler do not include any configuration
58/// details from the host, such as virtual memory addresses, or from any
59/// WebAssembly modules that you haven't specifically allowed. So for
60/// example, these profiles should be safe to share with untrusted users
61/// who have provided untrusted code that you are running in a multi-tenancy
62/// environment.
63///
64/// However, the profile does include byte offsets into the text section of
65/// the compiled module, revealing some information about the size of the code
66/// generated for each module. For user-provided modules, the user could get
67/// the same information by compiling the module for themself using a similar
68/// version of Wasmtime on the same target architecture, but for any module
69/// where they don't already have the WebAssembly module binary available this
70/// could theoretically lead to an undesirable information disclosure. So you
71/// should only include user-provided modules in profiles.
72#[derive(Debug)]
73pub struct GuestProfiler {
74 profile: Profile,
75 modules: Modules,
76 process: fxprof_processed_profile::ProcessHandle,
77 thread: fxprof_processed_profile::ThreadHandle,
78 start: Instant,
79}
80
81type Modules = Vec<(Range<usize>, fxprof_processed_profile::LibraryHandle)>;
82
83impl GuestProfiler {
84 /// Begin profiling a new guest. When this function is called, the current
85 /// wall-clock time is recorded as the start time for the guest.
86 ///
87 /// The `module_name` parameter is recorded in the profile to help identify
88 /// where the profile came from.
89 ///
90 /// The `interval` parameter should match the rate at which you intend
91 /// to call `sample`. However, this is used as a hint and not required to
92 /// exactly match the real sample rate.
93 ///
94 /// Only modules which are present in the `modules` vector will appear in
95 /// stack traces in this profile. Any stack frames which were executing
96 /// host code or functions from other modules will be omitted. See the
97 /// "Security" section of the [`GuestProfiler`] documentation for guidance
98 /// on what modules should not be included in this list.
99 pub fn new(module_name: &str, interval: Duration, modules: Vec<(String, Module)>) -> Self {
100 let zero = ReferenceTimestamp::from_millis_since_unix_epoch(0.0);
101 let mut profile = Profile::new(module_name, zero, interval.into());
102
103 let mut modules: Vec<_> = modules
104 .into_iter()
105 .filter_map(|(name, module)| {
106 let compiled = module.compiled_module();
107 let text = compiled.text().as_ptr_range();
108 let address_range = text.start as usize..text.end as usize;
109 module_symbols(name, compiled).map(|lib| (address_range, profile.add_lib(lib)))
110 })
111 .collect();
112
113 modules.sort_unstable_by_key(|(range, _)| range.start);
114
115 profile.set_reference_timestamp(std::time::SystemTime::now().into());
116 let process = profile.add_process(module_name, 0, Timestamp::from_nanos_since_reference(0));
117 let thread = profile.add_thread(process, 0, Timestamp::from_nanos_since_reference(0), true);
118 let start = Instant::now();
119 Self {
120 profile,
121 modules,
122 process,
123 thread,
124 start,
125 }
126 }
127
128 /// Add a sample to the profile. This function collects a backtrace from
129 /// any stack frames for allowed modules on the current stack. It should
130 /// typically be called from a callback registered using
131 /// [`Store::epoch_deadline_callback()`](crate::Store::epoch_deadline_callback).
132 ///
133 /// The `delta` parameter is the amount of CPU time that was used by this
134 /// guest since the previous sample. It is allowed to pass `Duration::ZERO`
135 /// here if recording CPU usage information is not needed.
136 pub fn sample(&mut self, store: impl AsContext, delta: Duration) {
137 let now = Timestamp::from_nanos_since_reference(
138 self.start.elapsed().as_nanos().try_into().unwrap(),
139 );
140 let backtrace = Backtrace::new(store.as_context().0);
141 let frames = lookup_frames(&self.modules, &backtrace);
142 self.profile
143 .add_sample(self.thread, now, frames, delta.into(), 1);
144 }
145
146 /// Add a marker for transitions between guest and host to the profile.
147 /// This function should typically be called from a callback registered
148 /// using [`Store::call_hook()`](crate::Store::call_hook), and the `kind`
149 /// parameter should be the value of the same type passed into that hook.
150 pub fn call_hook(&mut self, store: impl AsContext, kind: CallHook) {
151 let now = Timestamp::from_nanos_since_reference(
152 self.start.elapsed().as_nanos().try_into().unwrap(),
153 );
154 match kind {
155 CallHook::CallingWasm | CallHook::ReturningFromWasm => {}
156 CallHook::CallingHost => {
157 let backtrace = Backtrace::new(store.as_context().0);
158 let frames = lookup_frames(&self.modules, &backtrace);
159 self.profile.add_marker_with_stack(
160 self.thread,
161 "hostcall",
162 CallMarker,
163 MarkerTiming::IntervalStart(now),
164 frames,
165 );
166 }
167 CallHook::ReturningFromHost => {
168 self.profile.add_marker(
169 self.thread,
170 "hostcall",
171 CallMarker,
172 MarkerTiming::IntervalEnd(now),
173 );
174 }
175 }
176 }
177
178 /// When the guest finishes running, call this function to write the
179 /// profile to the given `output`. The output is a JSON-formatted object in
180 /// the [Firefox "processed profile format"][fmt]. Files in this format may
181 /// be visualized at <https://profiler.firefox.com/>.
182 ///
183 /// [fmt]: https://github.com/firefox-devtools/profiler/blob/main/docs-developer/processed-profile-format.md
184 pub fn finish(mut self, output: impl std::io::Write) -> Result<()> {
185 let now = Timestamp::from_nanos_since_reference(
186 self.start.elapsed().as_nanos().try_into().unwrap(),
187 );
188 self.profile.set_thread_end_time(self.thread, now);
189 self.profile.set_process_end_time(self.process, now);
190
191 serde_json::to_writer(output, &self.profile)?;
192 Ok(())
193 }
194}
195
196fn module_symbols(name: String, compiled: &CompiledModule) -> Option<LibraryInfo> {
197 let symbols = Vec::from_iter(compiled.finished_functions().map(|(defined_idx, _)| {
198 let loc = compiled.func_loc(defined_idx);
199 let func_idx = compiled.module().func_index(defined_idx);
200 let mut name = String::new();
201 demangle_function_name_or_index(
202 &mut name,
203 compiled.func_name(func_idx),
204 defined_idx.as_u32() as usize,
205 )
206 .unwrap();
207 Symbol {
208 address: loc.start,
209 size: Some(loc.length),
210 name,
211 }
212 }));
213 if symbols.is_empty() {
214 return None;
215 }
216
217 Some(LibraryInfo {
218 name,
219 debug_name: String::new(),
220 path: String::new(),
221 debug_path: String::new(),
222 debug_id: DebugId::nil(),
223 code_id: None,
224 arch: None,
225 symbol_table: Some(Arc::new(SymbolTable::new(symbols))),
226 })
227}
228
229fn lookup_frames<'a>(
230 modules: &'a Modules,
231 backtrace: &'a Backtrace,
232) -> impl Iterator<Item = FrameInfo> + 'a {
233 backtrace
234 .frames()
235 // Samply needs to see the oldest frame first, but we list the newest
236 // first, so iterate in reverse.
237 .rev()
238 .filter_map(|frame| {
239 // Find the first module whose start address includes this PC.
240 let module_idx = modules.partition_point(|(range, _)| range.start > frame.pc());
241 if let Some((range, lib)) = modules.get(module_idx) {
242 if range.contains(&frame.pc()) {
243 return Some(FrameInfo {
244 frame: Frame::RelativeAddressFromReturnAddress(
245 *lib,
246 u32::try_from(frame.pc() - range.start).unwrap(),
247 ),
248 category_pair: CategoryHandle::OTHER.into(),
249 flags: FrameFlags::empty(),
250 });
251 }
252 }
253 None
254 })
255}
256
257struct CallMarker;
258
259impl ProfilerMarker for CallMarker {
260 const MARKER_TYPE_NAME: &'static str = "hostcall";
261
262 fn schema() -> MarkerSchema {
263 MarkerSchema {
264 type_name: Self::MARKER_TYPE_NAME,
265 locations: vec![
266 MarkerLocation::MarkerChart,
267 MarkerLocation::MarkerTable,
268 MarkerLocation::TimelineOverview,
269 ],
270 chart_label: None,
271 tooltip_label: None,
272 table_label: None,
273 fields: vec![],
274 }
275 }
276
277 fn json_marker_data(&self) -> serde_json::Value {
278 serde_json::json!({ "type": Self::MARKER_TYPE_NAME })
279 }
280}