Skip to main content

wasmtime_environ/
address_map.rs

1//! Data structures to provide transformation of the source
2
3use core::fmt;
4use object::{Bytes, LittleEndian, U32};
5use serde_derive::{Deserialize, Serialize};
6
7/// Single source location to generated address mapping.
8#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
9pub struct InstructionAddressMap {
10    /// Where in the source wasm binary this instruction comes from, specified
11    /// in an offset of bytes from the front of the file.
12    pub srcloc: FilePos,
13
14    /// Offset from the start of the function's compiled code to where this
15    /// instruction is located, or the region where it starts.
16    pub code_offset: u32,
17}
18
19/// A position within an original source file,
20///
21/// This structure is used as a newtype wrapper around a 32-bit integer which
22/// represents an offset within a file where a wasm instruction or function is
23/// to be originally found.
24#[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)]
25pub struct FilePos(u32);
26
27impl FilePos {
28    /// Create a new file position with the given offset.
29    pub fn new(pos: u32) -> FilePos {
30        assert!(pos != u32::MAX);
31        FilePos(pos)
32    }
33
34    /// Get the null file position.
35    pub fn none() -> FilePos {
36        FilePos(u32::MAX)
37    }
38
39    /// Is this the null file position?
40    #[inline]
41    pub fn is_none(&self) -> bool {
42        *self == FilePos::none()
43    }
44
45    /// Returns the offset that this offset was created with.
46    ///
47    /// Note that positions created with `FilePos::none` and the `Default`
48    /// implementation will return `None` here, whereas positions created with
49    /// `FilePos::new` will return `Some`.
50    pub fn file_offset(self) -> Option<u32> {
51        if self.0 == u32::MAX {
52            None
53        } else {
54            Some(self.0)
55        }
56    }
57}
58
59impl Default for FilePos {
60    fn default() -> FilePos {
61        FilePos::none()
62    }
63}
64
65/// A Wasm bytecode offset relative to the start of a component (or
66/// top-level module) binary.
67///
68/// When compiling a component, the Wasm parser returns source
69/// positions relative to the entire component binary. This type
70/// captures that convention. Use
71/// [`ComponentPC::to_module_pc`] to convert to a
72/// [`ModulePC`] given the byte offset of the module within the
73/// component.
74#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
75pub struct ComponentPC(u32);
76
77impl ComponentPC {
78    /// Create a new component-relative PC from a raw offset.
79    pub fn new(offset: u32) -> Self {
80        Self(offset)
81    }
82
83    /// Get the raw u32 offset.
84    pub fn raw(self) -> u32 {
85        self.0
86    }
87
88    /// Convert to a module-relative PC by subtracting the byte offset
89    /// of the module within the component binary.
90    pub fn to_module_pc(self, wasm_module_offset: u64) -> ModulePC {
91        let offset = u32::try_from(wasm_module_offset).unwrap();
92        ModulePC(self.0 - offset)
93    }
94}
95
96impl fmt::Debug for ComponentPC {
97    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
98        write!(f, "ComponentPC({:#x})", self.0)
99    }
100}
101
102impl fmt::Display for ComponentPC {
103    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
104        write!(f, "{:#x}", self.0)
105    }
106}
107
108/// A Wasm bytecode offset relative to the start of a core Wasm
109/// module binary.
110///
111/// In the guest-debug system, PCs are always module-relative because
112/// the debugger presents a core-Wasm view of the world where
113/// components are deconstructed into individual core Wasm modules.
114///
115/// For standalone (non-component) modules, `ModulePC` and
116/// [`ComponentPC`] values are numerically identical.
117#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
118pub struct ModulePC(u32);
119
120impl ModulePC {
121    /// Create a new module-relative PC from a raw offset.
122    pub fn new(offset: u32) -> Self {
123        Self(offset)
124    }
125
126    /// Get the raw u32 offset.
127    pub fn raw(self) -> u32 {
128        self.0
129    }
130}
131
132impl fmt::Debug for ModulePC {
133    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
134        write!(f, "ModulePC({:#x})", self.0)
135    }
136}
137
138impl fmt::Display for ModulePC {
139    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
140        write!(f, "{:#x}", self.0)
141    }
142}
143
144/// Parse an `ELF_WASMTIME_ADDRMAP` section, returning the slice of code offsets
145/// and the slice of associated file positions for each offset.
146fn parse_address_map(section: &[u8]) -> Option<(&[U32<LittleEndian>], &[U32<LittleEndian>])> {
147    let mut section = Bytes(section);
148    // NB: this matches the encoding written by `append_to` in the
149    // `compile::address_map` module.
150    let count = section.read::<U32<LittleEndian>>().ok()?;
151    let count = usize::try_from(count.get(LittleEndian)).ok()?;
152    let (offsets, section) =
153        object::slice_from_bytes::<U32<LittleEndian>>(section.0, count).ok()?;
154    let (positions, section) =
155        object::slice_from_bytes::<U32<LittleEndian>>(section, count).ok()?;
156    debug_assert!(section.is_empty());
157    Some((offsets, positions))
158}
159
160/// Lookup an `offset` within an encoded address map section, returning the
161/// original `FilePos` that corresponds to the offset, if found.
162///
163/// This function takes a `section` as its first argument which must have been
164/// created with `AddressMapSection` above. This is intended to be the raw
165/// `ELF_WASMTIME_ADDRMAP` section from the compilation artifact.
166///
167/// The `offset` provided is a relative offset from the start of the text
168/// section of the pc that is being looked up. If `offset` is out of range or
169/// doesn't correspond to anything in this file then `None` is returned.
170pub fn lookup_file_pos(section: &[u8], offset: usize) -> Option<FilePos> {
171    let (offsets, positions) = parse_address_map(section)?;
172
173    // First perform a binary search on the `offsets` array. This is a sorted
174    // array of offsets within the text section, which is conveniently what our
175    // `offset` also is. Note that we are somewhat unlikely to find a precise
176    // match on the element in the array, so we're largely interested in which
177    // "bucket" the `offset` falls into.
178    let offset = u32::try_from(offset).ok()?;
179    let index = match offsets.binary_search_by_key(&offset, |v| v.get(LittleEndian)) {
180        // Exact hit!
181        Ok(i) => i,
182
183        // This *would* be at the first slot in the array, so no
184        // instructions cover `pc`.
185        Err(0) => return None,
186
187        // This would be at the `nth` slot, so we're at the `n-1`th slot.
188        Err(n) => n - 1,
189    };
190
191    // Using the `index` we found of which bucket `offset` corresponds to we can
192    // lookup the actual `FilePos` value in the `positions` array.
193    let pos = positions.get(index)?;
194    Some(FilePos(pos.get(LittleEndian)))
195}
196
197/// Iterate over the address map contained in the given address map section.
198///
199/// This function takes a `section` as its first argument which must have been
200/// created with `AddressMapSection` above. This is intended to be the raw
201/// `ELF_WASMTIME_ADDRMAP` section from the compilation artifact.
202///
203/// The yielded offsets are relative to the start of the text section for this
204/// map's code object.
205pub fn iterate_address_map<'a>(
206    section: &'a [u8],
207) -> Option<impl Iterator<Item = (u32, FilePos)> + 'a> {
208    let (offsets, positions) = parse_address_map(section)?;
209
210    Some(
211        offsets
212            .iter()
213            .map(|o| o.get(LittleEndian))
214            .zip(positions.iter().map(|pos| FilePos(pos.get(LittleEndian)))),
215    )
216}