wasmtime_environ/compile/module_artifacts.rs
1//! Definitions of runtime structures and metadata which are serialized into ELF
2//! with `postcard` as part of a module's compilation process.
3
4use crate::prelude::*;
5use crate::{
6 CompiledModuleInfo, DebugInfoData, FunctionName, MemoryInitialization, Metadata,
7 ModuleTranslation, Tunables, obj,
8};
9use anyhow::{Result, bail};
10use object::SectionKind;
11use object::write::{Object, SectionId, StandardSegment, WritableBuffer};
12use std::ops::Range;
13
14/// Helper structure to create an ELF file as a compilation artifact.
15///
16/// This structure exposes the process which Wasmtime will encode a core wasm
17/// module into an ELF file, notably managing data sections and all that good
18/// business going into the final file.
19pub struct ObjectBuilder<'a> {
20 /// The `object`-crate-defined ELF file write we're using.
21 obj: Object<'a>,
22
23 /// General compilation configuration.
24 tunables: &'a Tunables,
25
26 /// The section identifier for "rodata" which is where wasm data segments
27 /// will go.
28 data: SectionId,
29
30 /// The section identifier for function name information, or otherwise where
31 /// the `name` custom section of wasm is copied into.
32 ///
33 /// This is optional and lazily created on demand.
34 names: Option<SectionId>,
35
36 /// The section identifier for dwarf information copied from the original
37 /// wasm files.
38 ///
39 /// This is optional and lazily created on demand.
40 dwarf: Option<SectionId>,
41}
42
43impl<'a> ObjectBuilder<'a> {
44 /// Creates a new builder for the `obj` specified.
45 pub fn new(mut obj: Object<'a>, tunables: &'a Tunables) -> ObjectBuilder<'a> {
46 let data = obj.add_section(
47 obj.segment_name(StandardSegment::Data).to_vec(),
48 obj::ELF_WASM_DATA.as_bytes().to_vec(),
49 SectionKind::ReadOnlyData,
50 );
51 ObjectBuilder {
52 obj,
53 tunables,
54 data,
55 names: None,
56 dwarf: None,
57 }
58 }
59
60 /// Insert the wasm raw wasm-based debuginfo into the output.
61 /// Note that this is distinct from the native debuginfo
62 /// possibly generated by the native compiler, hence these sections
63 /// getting wasm-specific names.
64 pub fn push_debuginfo(
65 &mut self,
66 dwarf: &mut Vec<(u8, Range<u64>)>,
67 debuginfo: &DebugInfoData<'_>,
68 ) {
69 self.push_debug(dwarf, &debuginfo.dwarf.debug_abbrev);
70 self.push_debug(dwarf, &debuginfo.dwarf.debug_addr);
71 self.push_debug(dwarf, &debuginfo.dwarf.debug_aranges);
72 self.push_debug(dwarf, &debuginfo.dwarf.debug_info);
73 self.push_debug(dwarf, &debuginfo.dwarf.debug_line);
74 self.push_debug(dwarf, &debuginfo.dwarf.debug_line_str);
75 self.push_debug(dwarf, &debuginfo.dwarf.debug_str);
76 self.push_debug(dwarf, &debuginfo.dwarf.debug_str_offsets);
77 self.push_debug(dwarf, &debuginfo.debug_ranges);
78 self.push_debug(dwarf, &debuginfo.debug_rnglists);
79 self.push_debug(dwarf, &debuginfo.debug_cu_index);
80
81 // Sort this for binary-search-lookup later in `symbolize_context`.
82 dwarf.sort_by_key(|(id, _)| *id);
83 }
84
85 /// Completes compilation of the `translation` specified, inserting
86 /// everything necessary into the `Object` being built.
87 ///
88 /// This function will consume the final results of compiling a wasm module
89 /// and finish the ELF image in-progress as part of `self.obj` by appending
90 /// any compiler-agnostic sections.
91 ///
92 /// The auxiliary `CompiledModuleInfo` structure returned here has also been
93 /// serialized into the object returned, but if the caller will quickly
94 /// turn-around and invoke `CompiledModule::from_artifacts` after this then
95 /// the information can be passed to that method to avoid extra
96 /// deserialization. This is done to avoid a serialize-then-deserialize for
97 /// API calls like `Module::new` where the compiled module is immediately
98 /// going to be used.
99 ///
100 /// The various arguments here are:
101 ///
102 /// * `translation` - the core wasm translation that's being completed.
103 ///
104 /// * `funcs` - compilation metadata about functions within the translation
105 /// as well as where the functions are located in the text section and any
106 /// associated trampolines.
107 ///
108 /// * `wasm_to_array_trampolines` - list of all trampolines necessary for
109 /// Wasm callers calling array callees (e.g. `Func::wrap`). One for each
110 /// function signature in the module. Must be sorted by `SignatureIndex`.
111 ///
112 /// Returns the `CompiledModuleInfo` corresponding to this core Wasm module
113 /// as a result of this append operation. This is then serialized into the
114 /// final artifact by the caller.
115 pub fn append(&mut self, translation: ModuleTranslation<'_>) -> Result<CompiledModuleInfo> {
116 let ModuleTranslation {
117 mut module,
118 debuginfo,
119 has_unparsed_debuginfo,
120 data,
121 data_align,
122 passive_data,
123 ..
124 } = translation;
125
126 // Place all data from the wasm module into a section which will the
127 // source of the data later at runtime. This additionally keeps track of
128 // the offset of
129 let mut total_data_len = 0;
130 let data_offset = self
131 .obj
132 .append_section_data(self.data, &[], data_align.unwrap_or(1));
133 for (i, data) in data.iter().enumerate() {
134 // The first data segment has its alignment specified as the alignment
135 // for the entire section, but everything afterwards is adjacent so it
136 // has alignment of 1.
137 let align = if i == 0 { data_align.unwrap_or(1) } else { 1 };
138 self.obj.append_section_data(self.data, data, align);
139 total_data_len += data.len();
140 }
141 for data in passive_data.iter() {
142 self.obj.append_section_data(self.data, data, 1);
143 }
144
145 // If any names are present in the module then the `ELF_NAME_DATA` section
146 // is create and appended.
147 let mut func_names = Vec::new();
148 if debuginfo.name_section.func_names.len() > 0 {
149 let name_id = *self.names.get_or_insert_with(|| {
150 self.obj.add_section(
151 self.obj.segment_name(StandardSegment::Data).to_vec(),
152 obj::ELF_NAME_DATA.as_bytes().to_vec(),
153 SectionKind::ReadOnlyData,
154 )
155 });
156 let mut sorted_names = debuginfo.name_section.func_names.iter().collect::<Vec<_>>();
157 sorted_names.sort_by_key(|(idx, _name)| *idx);
158 for (idx, name) in sorted_names {
159 let offset = self.obj.append_section_data(name_id, name.as_bytes(), 1);
160 let offset = match u32::try_from(offset) {
161 Ok(offset) => offset,
162 Err(_) => bail!("name section too large (> 4gb)"),
163 };
164 let len = u32::try_from(name.len()).unwrap();
165 func_names.push(FunctionName {
166 idx: *idx,
167 offset,
168 len,
169 });
170 }
171 }
172
173 // Data offsets in `MemoryInitialization` are offsets within the
174 // `translation.data` list concatenated which is now present in the data
175 // segment that's appended to the object. Increase the offsets by
176 // `self.data_size` to account for any previously added module.
177 let data_offset = u32::try_from(data_offset).unwrap();
178 match &mut module.memory_initialization {
179 MemoryInitialization::Segmented(list) => {
180 for segment in list {
181 segment.data.start = segment.data.start.checked_add(data_offset).unwrap();
182 segment.data.end = segment.data.end.checked_add(data_offset).unwrap();
183 }
184 }
185 MemoryInitialization::Static { map } => {
186 for (_, segment) in map {
187 if let Some(segment) = segment {
188 segment.data.start = segment.data.start.checked_add(data_offset).unwrap();
189 segment.data.end = segment.data.end.checked_add(data_offset).unwrap();
190 }
191 }
192 }
193 }
194
195 // Data offsets for passive data are relative to the start of
196 // `translation.passive_data` which was appended to the data segment
197 // of this object, after active data in `translation.data`. Update the
198 // offsets to account prior modules added in addition to active data.
199 let data_offset = data_offset + u32::try_from(total_data_len).unwrap();
200 for (_, range) in module.passive_data_map.iter_mut() {
201 range.start = range.start.checked_add(data_offset).unwrap();
202 range.end = range.end.checked_add(data_offset).unwrap();
203 }
204
205 // Insert the wasm raw wasm-based debuginfo into the output, if
206 // requested. Note that this is distinct from the native debuginfo
207 // possibly generated by the native compiler, hence these sections
208 // getting wasm-specific names.
209 let mut dwarf = Vec::new();
210 if self.tunables.parse_wasm_debuginfo {
211 self.push_debuginfo(&mut dwarf, &debuginfo);
212 }
213
214 Ok(CompiledModuleInfo {
215 module,
216 func_names,
217 meta: Metadata {
218 has_unparsed_debuginfo,
219 code_section_offset: debuginfo.wasm_file.code_section_offset,
220 has_wasm_debuginfo: self.tunables.parse_wasm_debuginfo,
221 dwarf,
222 },
223 })
224 }
225
226 fn push_debug<'b, T>(&mut self, dwarf: &mut Vec<(u8, Range<u64>)>, section: &T)
227 where
228 T: gimli::Section<gimli::EndianSlice<'b, gimli::LittleEndian>>,
229 {
230 let data = section.reader().slice();
231 if data.is_empty() {
232 return;
233 }
234 let section_id = *self.dwarf.get_or_insert_with(|| {
235 self.obj.add_section(
236 self.obj.segment_name(StandardSegment::Debug).to_vec(),
237 obj::ELF_WASMTIME_DWARF.as_bytes().to_vec(),
238 SectionKind::Debug,
239 )
240 });
241 let offset = self.obj.append_section_data(section_id, data, 1);
242 dwarf.push((T::id() as u8, offset..offset + data.len() as u64));
243 }
244
245 /// Creates the `ELF_WASMTIME_INFO` section from the given serializable data
246 /// structure.
247 pub fn serialize_info<T>(&mut self, info: &T)
248 where
249 T: serde::Serialize,
250 {
251 let section = self.obj.add_section(
252 self.obj.segment_name(StandardSegment::Data).to_vec(),
253 obj::ELF_WASMTIME_INFO.as_bytes().to_vec(),
254 SectionKind::ReadOnlyData,
255 );
256 let data = postcard::to_allocvec(info).unwrap();
257 self.obj.set_section_data(section, data, 1);
258 }
259
260 /// Serializes `self` into a buffer. This can be used for execution as well
261 /// as serialization.
262 pub fn finish<T: WritableBuffer>(self, t: &mut T) -> Result<()> {
263 self.obj.emit(t).map_err(|e| e.into())
264 }
265}
266
267/// A type which can be the result of serializing an object.
268pub trait FinishedObject: Sized {
269 /// State required for `finish_object`, if any.
270 type State;
271
272 /// Emit the object as `Self`.
273 fn finish_object(obj: ObjectBuilder<'_>, state: &Self::State) -> Result<Self>;
274}
275
276impl FinishedObject for Vec<u8> {
277 type State = ();
278 fn finish_object(obj: ObjectBuilder<'_>, _state: &Self::State) -> Result<Self> {
279 let mut result = ObjectVec::default();
280 obj.finish(&mut result)?;
281 return Ok(result.0);
282
283 #[derive(Default)]
284 struct ObjectVec(Vec<u8>);
285
286 impl WritableBuffer for ObjectVec {
287 fn len(&self) -> usize {
288 self.0.len()
289 }
290
291 fn reserve(&mut self, additional: usize) -> Result<(), ()> {
292 assert_eq!(self.0.len(), 0, "cannot reserve twice");
293 self.0 = Vec::with_capacity(additional);
294 Ok(())
295 }
296
297 fn resize(&mut self, new_len: usize) {
298 if new_len <= self.0.len() {
299 self.0.truncate(new_len)
300 } else {
301 self.0.extend(vec![0; new_len - self.0.len()])
302 }
303 }
304
305 fn write_bytes(&mut self, val: &[u8]) {
306 self.0.extend(val);
307 }
308 }
309 }
310}