wasmtime_environ/address_map.rs
1//! Data structures to provide transformation of the source
2
3use crate::bytes::{read_sleb, read_uleb};
4use core::fmt;
5use object::{Bytes, LittleEndian, U32};
6use serde_derive::{Deserialize, Serialize};
7
8/// Single source location to generated address mapping.
9#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
10pub struct InstructionAddressMap {
11 /// Where in the source wasm binary this instruction comes from, specified
12 /// in an offset of bytes from the front of the file.
13 pub srcloc: FilePos,
14
15 /// Offset from the start of the function's compiled code to where this
16 /// instruction is located, or the region where it starts.
17 pub code_offset: u32,
18}
19
20/// A position within an original source file,
21///
22/// This structure is used as a newtype wrapper around a 32-bit integer which
23/// represents an offset within a file where a wasm instruction or function is
24/// to be originally found.
25#[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)]
26pub struct FilePos(u32);
27
28impl FilePos {
29 /// Create a new file position with the given offset.
30 pub fn new(pos: u32) -> FilePos {
31 assert!(pos != u32::MAX);
32 FilePos(pos)
33 }
34
35 /// Get the null file position.
36 pub fn none() -> FilePos {
37 FilePos(u32::MAX)
38 }
39
40 /// Is this the null file position?
41 #[inline]
42 pub fn is_none(&self) -> bool {
43 *self == FilePos::none()
44 }
45
46 /// Returns the offset that this offset was created with.
47 ///
48 /// Note that positions created with `FilePos::none` and the `Default`
49 /// implementation will return `None` here, whereas positions created with
50 /// `FilePos::new` will return `Some`.
51 pub fn file_offset(self) -> Option<u32> {
52 if self.0 == u32::MAX {
53 None
54 } else {
55 Some(self.0)
56 }
57 }
58}
59
60impl Default for FilePos {
61 fn default() -> FilePos {
62 FilePos::none()
63 }
64}
65
66/// A Wasm bytecode offset relative to the start of a component (or
67/// top-level module) binary.
68///
69/// When compiling a component, the Wasm parser returns source
70/// positions relative to the entire component binary. This type
71/// captures that convention. Use
72/// [`ComponentPC::to_module_pc`] to convert to a
73/// [`ModulePC`] given the byte offset of the module within the
74/// component.
75#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
76pub struct ComponentPC(u32);
77
78impl ComponentPC {
79 /// Create a new component-relative PC from a raw offset.
80 pub fn new(offset: u32) -> Self {
81 Self(offset)
82 }
83
84 /// Get the raw u32 offset.
85 pub fn raw(self) -> u32 {
86 self.0
87 }
88
89 /// Convert to a module-relative PC by subtracting the byte offset
90 /// of the module within the component binary.
91 pub fn to_module_pc(self, wasm_module_offset: u64) -> ModulePC {
92 let offset = u32::try_from(wasm_module_offset).unwrap();
93 ModulePC(self.0 - offset)
94 }
95}
96
97impl fmt::Debug for ComponentPC {
98 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
99 write!(f, "ComponentPC({:#x})", self.0)
100 }
101}
102
103impl fmt::Display for ComponentPC {
104 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
105 write!(f, "{:#x}", self.0)
106 }
107}
108
109/// A Wasm bytecode offset relative to the start of a core Wasm
110/// module binary.
111///
112/// In the guest-debug system, PCs are always module-relative because
113/// the debugger presents a core-Wasm view of the world where
114/// components are deconstructed into individual core Wasm modules.
115///
116/// For standalone (non-component) modules, `ModulePC` and
117/// [`ComponentPC`] values are numerically identical.
118#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
119pub struct ModulePC(u32);
120
121impl ModulePC {
122 /// Create a new module-relative PC from a raw offset.
123 pub fn new(offset: u32) -> Self {
124 Self(offset)
125 }
126
127 /// Get the raw u32 offset.
128 pub fn raw(self) -> u32 {
129 self.0
130 }
131}
132
133impl fmt::Debug for ModulePC {
134 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
135 write!(f, "ModulePC({:#x})", self.0)
136 }
137}
138
139impl fmt::Display for ModulePC {
140 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
141 write!(f, "{:#x}", self.0)
142 }
143}
144
145/// Number of address-mapping entries packed into one block of the address map
146/// section.
147///
148/// See `AddressMapSection` in `crate::compile` for the full section format.
149/// Chosen as a balance between the fixed-width index overhead per block (8
150/// bytes, amortized across entries) and the amount of linear decoding required
151/// to look up a single pc within a block.
152pub(crate) const ADDRMAP_BLOCK_SIZE: usize = 128;
153
154/// A parsed view of the address map section.
155///
156/// The fields here correspond to the pieces of the section layout described
157/// on `AddressMapSection` in `crate::compile`.
158#[derive(Clone, Copy)]
159struct AddressMap<'a> {
160 /// Total number of address-mapping entries in this section.
161 entries: usize,
162 /// One `(first_offset, block_pos)` pair per block.
163 block_index: &'a [[U32<LittleEndian>; 2]],
164 /// Variable-length block bodies, indexed by `block_pos` in the
165 /// `block_index` table above.
166 block_bodies: &'a [u8],
167}
168
169impl<'a> AddressMap<'a> {
170 /// Returns an iterator of `(text_offset, FilePos)` for all entries in
171 /// `block`, or `None` if the section is malformed.
172 fn block_entries(&self, block_index: usize) -> Option<BlockEntries<'a>> {
173 let [first_offset, block_pos] = self.block_index.get(block_index)?;
174 let first_offset = first_offset.get(LittleEndian);
175 let block_pos = block_pos.get(LittleEndian);
176 let block = self.block_bodies.get(usize::try_from(block_pos).ok()?..)?;
177 let remaining = core::cmp::min(
178 ADDRMAP_BLOCK_SIZE,
179 self.entries.checked_sub(block_index * ADDRMAP_BLOCK_SIZE)?,
180 );
181 Some(BlockEntries {
182 block,
183 prev_offset: first_offset,
184 prev_pos: None,
185 remaining,
186 })
187 }
188}
189
190/// Iterator over the entries of a single block, decoding the delta-and-flag
191/// varints described in the "block body" portion of the section format on
192/// `AddressMapSection` in `crate::compile`.
193struct BlockEntries<'a> {
194 block: &'a [u8],
195 prev_offset: u32,
196 prev_pos: Option<u32>,
197 remaining: usize,
198}
199
200impl Iterator for BlockEntries<'_> {
201 type Item = (u32, FilePos);
202
203 fn next(&mut self) -> Option<(u32, FilePos)> {
204 self.remaining = self.remaining.checked_sub(1)?;
205 let token = read_uleb(&mut self.block)?;
206 let delta = u32::try_from(token >> 1).ok()?;
207 let cur_offset = self.prev_offset.checked_add(delta)?;
208 self.prev_offset = cur_offset;
209 if token & 1 != 0 {
210 return Some((cur_offset, FilePos::none()));
211 }
212 let pos = match self.prev_pos {
213 // The first non-none position of a block is encoded absolutely...
214 None => u32::try_from(read_uleb(&mut self.block)?).ok()?,
215 // ... and subsequent positions are sleb deltas from the previous
216 // non-none position.
217 Some(prev) => {
218 let delta = read_sleb(&mut self.block)?;
219 prev.checked_add_signed(i32::try_from(delta).ok()?)?
220 }
221 };
222 self.prev_pos = Some(pos);
223 Some((cur_offset, FilePos(pos)))
224 }
225}
226
227/// Parse an `ELF_WASMTIME_ADDRMAP` section into its header, block index, and
228/// block bodies.
229fn parse(section: &[u8]) -> Option<AddressMap<'_>> {
230 let mut section = Bytes(section);
231 // NB: this matches the encoding written by `AddressMapSection` in the
232 // `compile::address_map` module.
233 let entries = section.read::<U32<LittleEndian>>().ok()?;
234 let entries = usize::try_from(entries.get(LittleEndian)).ok()?;
235 let num_blocks = section.read::<U32<LittleEndian>>().ok()?;
236 let num_blocks = usize::try_from(num_blocks.get(LittleEndian)).ok()?;
237 let (block_index, block_bodies) =
238 object::slice_from_bytes::<[U32<LittleEndian>; 2]>(section.0, num_blocks).ok()?;
239 Some(AddressMap {
240 entries,
241 block_index,
242 block_bodies,
243 })
244}
245
246/// Lookup an `offset` within an encoded address map section, returning the
247/// original `FilePos` that corresponds to the offset, if found.
248///
249/// This function takes a `section` as its first argument which must have been
250/// created with `AddressMapSection` in `crate::compile`, whose documentation
251/// describes the format decoded here. This is intended to be the raw
252/// `ELF_WASMTIME_ADDRMAP` section from the compilation artifact.
253///
254/// The `offset` provided is a relative offset from the start of the text
255/// section of the pc that is being looked up. If `offset` is out of range or
256/// doesn't correspond to anything in this file then `None` is returned.
257pub fn lookup_file_pos(section: &[u8], offset: usize) -> Option<FilePos> {
258 let section = parse(section)?;
259 let offset = u32::try_from(offset).ok()?;
260
261 // Find the last block whose first pc is `<= offset`. Note that, unlike the
262 // trap section, this is a bucket-style search: each entry covers addresses
263 // from its own `text_offset` until the next entry's, so `offset` need not
264 // match an entry exactly. The covering entry is wholly contained in this
265 // block since the next block only takes over at its own `first_offset`.
266 let block = section
267 .block_index
268 .partition_point(|[first_offset, _]| first_offset.get(LittleEndian) <= offset)
269 .checked_sub(1)?;
270
271 // Find the last entry within this block whose offset is `<= offset`; that
272 // entry's bucket covers `offset`. At least the block's first entry always
273 // qualifies due to the index search above.
274 let mut pos = None;
275 for (entry_offset, entry_pos) in section.block_entries(block)? {
276 if entry_offset > offset {
277 break;
278 }
279 pos = Some(entry_pos);
280 }
281 pos
282}
283
284/// Iterate over the address map contained in the given address map section.
285///
286/// This function takes a `section` as its first argument which must have been
287/// created with `AddressMapSection` in `crate::compile`. This is intended to
288/// be the raw `ELF_WASMTIME_ADDRMAP` section from the compilation artifact.
289///
290/// The yielded offsets are relative to the start of the text section for this
291/// map's code object.
292pub fn iterate_address_map<'a>(
293 section: &'a [u8],
294) -> Option<impl Iterator<Item = (u32, FilePos)> + 'a> {
295 let section = parse(section)?;
296
297 Some(
298 (0..section.block_index.len())
299 .flat_map(move |block| section.block_entries(block).into_iter().flatten()),
300 )
301}