wasmtime_environ/compile/stack_maps.rs
1use crate::obj::ELF_WASMTIME_STACK_MAP;
2use crate::prelude::*;
3use cranelift_bitset::CompoundBitSet;
4use object::write::{Object, StandardSegment};
5use object::{LittleEndian, SectionKind, U32Bytes};
6
7/// Builder for the `ELF_WASMTIME_STACK_MAP` section in compiled executables.
8///
9/// This format is parsed by `crate::stack_map`.
10///
11/// The current layout of the format is:
12///
13/// ```text
14/// ┌─────────────────────┬───── 0x00 (relative, not necessarily aligned)
15/// │ count: 4-byte LE │
16/// ├─────────────────────┼───── 0x04
17/// │ pc1: 4-byte LE │
18/// │ pc2: 4-byte LE │
19/// │ ... │
20/// │ pcN: 4-byte LE │
21/// ├─────────────────────┼───── 0x04 + 4 * count
22/// │ offset1: 4-byte LE │
23/// │ offset1: 4-byte LE │
24/// │ ... │
25/// │ offsetN: 4-byte LE │
26/// ├─────────────────────┼───── 0x04 + 8 * count
27/// │ data[0]: 4-byte LE │
28/// │ data[1]: 4-byte LE │
29/// │ ... │
30/// │ data[M]: 4-byte LE │
31/// └─────────────────────┴───── 0x04 + 8 * count + 4 * M
32/// ```
33///
34/// Here `count` is the size of the `pcN` and `offsetN` arrays. The two arrays
35/// are the same size and have corresponding entries in one another. When
36/// looking up a stack map for a particular program counter:
37///
38/// * A binary search is performed on the `pcN` array.
39/// * The corresponding `offsetM` value is looked up once the `pcM` entry,
40/// matching the lookup pc, is found.
41/// * The `offsetM` value is used to access `data[offsetM]` which is an array of
42/// 4-byte entries located after the `offset*` array. This stack map is then
43/// encoded as below.
44///
45/// This encoding scheme is chosen so parsing this data structure effectively
46/// isn't required. It's usable at-rest from a compiled artifact in a section of
47/// an executable. Notably having offsets into the data array means that a stack
48/// map is just a slice into the data array, and the entire data structure can
49/// be "parsed" by reading `count` and otherwise just making sure various
50/// offsets are in-bounds.
51///
52/// A stack map located at `data[offsetM]` is encoded as:
53///
54/// ```text
55/// ┌───────────────────────────────────────────────────────┐
56/// │ data[offsetM + 0]: frame_size: 4-byte LE │
57/// ├───────────────────────────────────────────────────────┤
58/// │ data[offsetM + 1]: count: 4-byte LE │
59/// ├───────────────────────────────────────────────────────┤
60/// │ data[offsetM + 2 + 0]: bitmap: 4-byte LE │
61/// │ data[offsetM + 2 + 1]: bitmap: 4-byte LE │
62/// │ ... │
63/// │ data[offsetM + 2 + count - 1]: bitmap: 4-byte LE │
64/// └───────────────────────────────────────────────────────┘
65/// ```
66///
67/// Here `frame_size` and `count` are always greater than 0. Entries in the bit
68/// map represent `stack_slot / 4` so must be multiplied by 4 to get the actual
69/// stack offset entry. This is because all stack slots are aligned at 4 bytes
70/// so by dividing them all by 4 we're able to compress the bit map that much
71/// more.
72#[derive(Default)]
73pub struct StackMapSection {
74 pcs: Vec<U32Bytes<LittleEndian>>,
75 pointers_to_stack_map: Vec<U32Bytes<LittleEndian>>,
76 stack_map_data: Vec<U32Bytes<LittleEndian>>,
77 last_offset: u32,
78}
79
80impl StackMapSection {
81 /// Appends stack map information for `code_offset` which has the specified
82 /// `frame_size` and `frame_offsets` are the active GC references.
83 pub fn push(
84 &mut self,
85 code_offset: u64,
86 frame_size: u32,
87 frame_offsets: impl ExactSizeIterator<Item = u32>,
88 ) {
89 // NB: for now this only supports <=4GB text sections in object files.
90 // Alternative schemes will need to be created for >32-bit offsets to
91 // avoid making this section overly large.
92 let code_offset = u32::try_from(code_offset).unwrap();
93
94 // Sanity-check to ensure that functions are pushed in-order, otherwise
95 // the `pcs` array won't be sorted which is our goal.
96 assert!(code_offset >= self.last_offset);
97 self.last_offset = code_offset;
98
99 // Skip encoding information for this code offset if there's not
100 // actually anything in the stack map.
101 if frame_offsets.len() == 0 {
102 return;
103 }
104
105 // Record parallel entries in `pcs`/`pointers_to_stack_map`.
106 self.pcs.push(U32Bytes::new(LittleEndian, code_offset));
107 self.pointers_to_stack_map.push(U32Bytes::new(
108 LittleEndian,
109 u32::try_from(self.stack_map_data.len()).unwrap(),
110 ));
111
112 // The frame data starts with the frame size and is then followed by
113 // `offsets` represented as a bit set.
114 self.stack_map_data
115 .push(U32Bytes::new(LittleEndian, frame_size));
116
117 let mut bits = CompoundBitSet::<u32>::default();
118 for offset in frame_offsets {
119 assert!(offset % 4 == 0);
120 bits.insert((offset / 4) as usize);
121 }
122 let count = bits.iter_scalars().count();
123 self.stack_map_data
124 .push(U32Bytes::new(LittleEndian, count as u32));
125 for scalar in bits.iter_scalars() {
126 self.stack_map_data
127 .push(U32Bytes::new(LittleEndian, scalar.0));
128 }
129 }
130
131 /// Finishes encoding this section into the `Object` provided.
132 pub fn append_to(self, obj: &mut Object) {
133 // Don't append anything for this section if there weren't any actual
134 // stack maps present, no need to waste space!
135 if self.pcs.is_empty() {
136 return;
137 }
138 let section = obj.add_section(
139 obj.segment_name(StandardSegment::Data).to_vec(),
140 ELF_WASMTIME_STACK_MAP.as_bytes().to_vec(),
141 SectionKind::ReadOnlyData,
142 );
143
144 // NB: this matches the encoding expected by `lookup` in the
145 // `crate::stack_maps` module.
146 let amt = u32::try_from(self.pcs.len()).unwrap();
147 obj.append_section_data(section, &amt.to_le_bytes(), 1);
148 obj.append_section_data(section, object::bytes_of_slice(&self.pcs), 1);
149 obj.append_section_data(
150 section,
151 object::bytes_of_slice(&self.pointers_to_stack_map),
152 1,
153 );
154 obj.append_section_data(section, object::bytes_of_slice(&self.stack_map_data), 1);
155 }
156}
157
158#[cfg(test)]
159mod tests {
160 use super::*;
161 use crate::stack_map::StackMap;
162 use object::{Object, ObjectSection};
163
164 fn roundtrip(maps: &[(u64, u32, &[u32])]) {
165 let mut section = StackMapSection::default();
166 for (pc, frame, offsets) in maps {
167 println!("append {pc}");
168 section.push(*pc, *frame, offsets.iter().copied());
169 }
170 let mut object = object::write::Object::new(
171 object::BinaryFormat::Elf,
172 object::Architecture::X86_64,
173 object::Endianness::Little,
174 );
175 section.append_to(&mut object);
176 let elf = object.write().unwrap();
177
178 let image = object::File::parse(&elf[..]).unwrap();
179 let data = image
180 .sections()
181 .find(|s| s.name().ok() == Some(ELF_WASMTIME_STACK_MAP))
182 .unwrap()
183 .data()
184 .unwrap();
185
186 for (pc, frame, offsets) in maps {
187 println!("lookup {pc}");
188 let map = match StackMap::lookup(*pc as u32, data) {
189 Some(map) => map,
190 None => {
191 assert!(offsets.is_empty());
192 continue;
193 }
194 };
195 assert_eq!(map.frame_size(), *frame);
196
197 let map_offsets = map.offsets().collect::<Vec<_>>();
198 assert_eq!(map_offsets, *offsets);
199 }
200
201 let mut expected = maps.iter();
202 'outer: for (pc, map) in StackMap::iter(data).unwrap() {
203 while let Some((expected_pc, expected_frame, expected_offsets)) = expected.next() {
204 if expected_offsets.is_empty() {
205 continue;
206 }
207 assert_eq!(*expected_pc, u64::from(pc));
208 assert_eq!(*expected_frame, map.frame_size());
209 let offsets = map.offsets().collect::<Vec<_>>();
210 assert_eq!(offsets, *expected_offsets);
211 continue 'outer;
212 }
213 panic!("didn't find {pc:#x} in expected list");
214 }
215 assert!(expected.next().is_none());
216 }
217
218 #[test]
219 fn roundtrip_many() {
220 roundtrip(&[(0, 4, &[0])]);
221 roundtrip(&[
222 (0, 4, &[0]),
223 (4, 200, &[0, 4, 20, 180]),
224 (200, 20, &[12]),
225 (600, 0, &[]),
226 (800, 20, &[0, 4, 8, 12, 16]),
227 (1200, 2000, &[1800, 1804, 1808, 1900]),
228 ]);
229 }
230}