Skip to main content

wasmtime_environ/compile/
module_artifacts.rs

1//! Definitions of runtime structures and metadata which are serialized into ELF
2//! with `postcard` as part of a module's compilation process.
3
4use crate::WasmChecksum;
5use crate::error::{Result, bail};
6use crate::prelude::*;
7use crate::{
8    CompiledModuleInfo, DebugInfoData, FunctionName, Metadata, ModuleTranslation, Tunables, obj,
9};
10use object::SectionKind;
11use object::write::{Object, SectionId, StandardSegment, WritableBuffer};
12use std::ops::Range;
13
14/// Helper structure to create an ELF file as a compilation artifact.
15///
16/// This structure exposes the process which Wasmtime will encode a core wasm
17/// module into an ELF file, notably managing data sections and all that good
18/// business going into the final file.
19pub struct ObjectBuilder<'a> {
20    /// The `object`-crate-defined ELF file write we're using.
21    obj: Object<'a>,
22
23    /// General compilation configuration.
24    tunables: &'a Tunables,
25
26    /// The section identifier for "rodata" which is where wasm data segments
27    /// will go.
28    data: SectionId,
29
30    /// The section identifier for function name information, or otherwise where
31    /// the `name` custom section of wasm is copied into.
32    ///
33    /// This is optional and lazily created on demand.
34    names: Option<SectionId>,
35
36    /// The section identifier for dwarf information copied from the original
37    /// wasm files.
38    ///
39    /// This is optional and lazily created on demand.
40    dwarf: Option<SectionId>,
41}
42
43impl<'a> ObjectBuilder<'a> {
44    /// Creates a new builder for the `obj` specified.
45    pub fn new(mut obj: Object<'a>, tunables: &'a Tunables) -> ObjectBuilder<'a> {
46        let data = obj.add_section(
47            obj.segment_name(StandardSegment::Data).to_vec(),
48            obj::ELF_WASM_DATA.as_bytes().to_vec(),
49            SectionKind::ReadOnlyData,
50        );
51        ObjectBuilder {
52            obj,
53            tunables,
54            data,
55            names: None,
56            dwarf: None,
57        }
58    }
59
60    /// Insert the wasm raw wasm-based debuginfo into the output.
61    /// Note that this is distinct from the native debuginfo
62    /// possibly generated by the native compiler, hence these sections
63    /// getting wasm-specific names.
64    pub fn push_debuginfo(
65        &mut self,
66        dwarf: &mut Vec<(u8, Range<u64>)>,
67        debuginfo: &DebugInfoData<'_>,
68    ) {
69        self.push_debug(dwarf, &debuginfo.dwarf.debug_abbrev);
70        self.push_debug(dwarf, &debuginfo.dwarf.debug_addr);
71        self.push_debug(dwarf, &debuginfo.dwarf.debug_aranges);
72        self.push_debug(dwarf, &debuginfo.dwarf.debug_info);
73        self.push_debug(dwarf, &debuginfo.dwarf.debug_line);
74        self.push_debug(dwarf, &debuginfo.dwarf.debug_line_str);
75        self.push_debug(dwarf, &debuginfo.dwarf.debug_str);
76        self.push_debug(dwarf, &debuginfo.dwarf.debug_str_offsets);
77        self.push_debug(dwarf, &debuginfo.debug_ranges);
78        self.push_debug(dwarf, &debuginfo.debug_rnglists);
79        self.push_debug(dwarf, &debuginfo.debug_cu_index);
80
81        // Sort this for binary-search-lookup later in `symbolize_context`.
82        dwarf.sort_by_key(|(id, _)| *id);
83    }
84
85    /// Completes compilation of the `translation` specified, inserting
86    /// everything necessary into the `Object` being built.
87    ///
88    /// This function will consume the final results of compiling a wasm module
89    /// and finish the ELF image in-progress as part of `self.obj` by appending
90    /// any compiler-agnostic sections.
91    ///
92    /// The auxiliary `CompiledModuleInfo` structure returned here has also been
93    /// serialized into the object returned, but if the caller will quickly
94    /// turn-around and invoke `CompiledModule::from_artifacts` after this then
95    /// the information can be passed to that method to avoid extra
96    /// deserialization. This is done to avoid a serialize-then-deserialize for
97    /// API calls like `Module::new` where the compiled module is immediately
98    /// going to be used.
99    ///
100    /// The various arguments here are:
101    ///
102    /// * `translation` - the core wasm translation that's being completed.
103    ///
104    /// * `funcs` - compilation metadata about functions within the translation
105    ///   as well as where the functions are located in the text section and any
106    ///   associated trampolines.
107    ///
108    /// * `wasm_to_array_trampolines` - list of all trampolines necessary for
109    ///   Wasm callers calling array callees (e.g. `Func::wrap`). One for each
110    ///   function signature in the module. Must be sorted by `SignatureIndex`.
111    ///
112    /// Returns the `CompiledModuleInfo` corresponding to this core Wasm module
113    /// as a result of this append operation. This is then serialized into the
114    /// final artifact by the caller.
115    pub fn append(&mut self, translation: ModuleTranslation<'_>) -> Result<CompiledModuleInfo> {
116        let ModuleTranslation {
117            mut module,
118            debuginfo,
119            has_unparsed_debuginfo,
120            data_align,
121            runtime_data,
122            wasm,
123            ..
124        } = translation;
125
126        // Place all data from the wasm module into a section which will the
127        // source of the data later at runtime. This additionally keeps track of
128        // the offset of
129        let data_offset = self
130            .obj
131            .append_section_data(self.data, &[], data_align.unwrap_or(1));
132        for (i, (_, data)) in runtime_data.iter().enumerate() {
133            // The first data segment has its alignment specified as the
134            // alignment for the entire section, but everything afterwards is
135            // adjacent so it has alignment of 1.
136            let align = if i == 0 { data_align.unwrap_or(1) } else { 1 };
137            self.obj.append_section_data(self.data, data, align);
138        }
139
140        // If any names are present in the module then the `ELF_NAME_DATA` section
141        // is create and appended.
142        let mut func_names = Vec::new();
143        if debuginfo.name_section.func_names.len() > 0 {
144            let name_id = *self.names.get_or_insert_with(|| {
145                self.obj.add_section(
146                    self.obj.segment_name(StandardSegment::Data).to_vec(),
147                    obj::ELF_NAME_DATA.as_bytes().to_vec(),
148                    SectionKind::ReadOnlyData,
149                )
150            });
151            let mut sorted_names = debuginfo.name_section.func_names.iter().collect::<Vec<_>>();
152            sorted_names.sort_by_key(|(idx, _name)| *idx);
153            for (idx, name) in sorted_names {
154                let offset = self.obj.append_section_data(name_id, name.as_bytes(), 1);
155                let offset = match u32::try_from(offset) {
156                    Ok(offset) => offset,
157                    Err(_) => bail!("name section too large (> 4gb)"),
158                };
159                let len = u32::try_from(name.len()).unwrap();
160                func_names.push(FunctionName {
161                    idx: *idx,
162                    offset,
163                    len,
164                });
165            }
166        }
167
168        // Data offsets for passive data are relative to the start of
169        // `translation.runtime_data` which was appended to the data segment
170        // of this object, after active data in `translation.data`. Update the
171        // offsets to account prior modules added in addition to active data.
172        let data_offset = u32::try_from(data_offset).unwrap();
173        for (_, range) in module.runtime_data.iter_mut() {
174            range.start = range.start.checked_add(data_offset).unwrap();
175            range.end = range.end.checked_add(data_offset).unwrap();
176        }
177
178        // Insert the wasm raw wasm-based debuginfo into the output, if
179        // requested. Note that this is distinct from the native debuginfo
180        // possibly generated by the native compiler, hence these sections
181        // getting wasm-specific names.
182        let mut dwarf = Vec::new();
183        if self.tunables.parse_wasm_debuginfo {
184            self.push_debuginfo(&mut dwarf, &debuginfo);
185        }
186
187        Ok(CompiledModuleInfo {
188            module,
189            func_names,
190            meta: Metadata {
191                has_unparsed_debuginfo,
192                code_section_offset: debuginfo.wasm_file.code_section_offset,
193                has_wasm_debuginfo: self.tunables.parse_wasm_debuginfo,
194                dwarf,
195            },
196            checksum: WasmChecksum::from_binary(wasm, self.tunables.recording),
197        })
198    }
199
200    fn push_debug<'b, T>(&mut self, dwarf: &mut Vec<(u8, Range<u64>)>, section: &T)
201    where
202        T: gimli::Section<gimli::EndianSlice<'b, gimli::LittleEndian>>,
203    {
204        let data = section.reader().slice();
205        if data.is_empty() {
206            return;
207        }
208        let section_id = *self.dwarf.get_or_insert_with(|| {
209            self.obj.add_section(
210                self.obj.segment_name(StandardSegment::Debug).to_vec(),
211                obj::ELF_WASMTIME_DWARF.as_bytes().to_vec(),
212                SectionKind::Debug,
213            )
214        });
215        let offset = self.obj.append_section_data(section_id, data, 1);
216        dwarf.push((T::id() as u8, offset..offset + data.len() as u64));
217    }
218
219    /// Appends the original Wasm bytecode for one or more core modules as a
220    /// pair of new ELF sections.
221    ///
222    /// `modules` is an iterator of raw Wasm binary slices, one per core
223    /// module, in `StaticModuleIndex` order.
224    pub fn append_wasm_bytecode<'b>(&mut self, modules: impl IntoIterator<Item = &'b [u8]>) {
225        let bytecode_id = self.obj.add_section(
226            self.obj.segment_name(StandardSegment::Data).to_vec(),
227            obj::ELF_WASMTIME_WASM_BYTECODE.as_bytes().to_vec(),
228            SectionKind::ReadOnlyData,
229        );
230        let ends_id = self.obj.add_section(
231            self.obj.segment_name(StandardSegment::Data).to_vec(),
232            obj::ELF_WASMTIME_WASM_BYTECODE_ENDS.as_bytes().to_vec(),
233            SectionKind::ReadOnlyData,
234        );
235        let mut end: u32 = 0;
236        for wasm in modules {
237            self.obj.append_section_data(bytecode_id, wasm, 1);
238            end = end
239                .checked_add(u32::try_from(wasm.len()).expect("module bytecode exceeds 4 GiB"))
240                .expect("total bytecode exceeds 4 GiB");
241            self.obj.append_section_data(ends_id, &end.to_le_bytes(), 4);
242        }
243    }
244
245    /// Creates the `ELF_WASMTIME_INFO` section from the given serializable data
246    /// structure.
247    pub fn serialize_info<T>(&mut self, info: &T)
248    where
249        T: serde::Serialize,
250    {
251        let section = self.obj.add_section(
252            self.obj.segment_name(StandardSegment::Data).to_vec(),
253            obj::ELF_WASMTIME_INFO.as_bytes().to_vec(),
254            SectionKind::ReadOnlyData,
255        );
256        let data = postcard::to_allocvec(info).unwrap();
257        self.obj.set_section_data(section, data, 1);
258    }
259
260    /// Serializes `self` into a buffer. This can be used for execution as well
261    /// as serialization.
262    pub fn finish<T: WritableBuffer>(self, t: &mut T) -> Result<()> {
263        self.obj.emit(t).map_err(|e| e.into())
264    }
265}
266
267/// A type which can be the result of serializing an object.
268pub trait FinishedObject: Sized {
269    /// State required for `finish_object`, if any.
270    type State;
271
272    /// Emit the object as `Self`.
273    fn finish_object(obj: ObjectBuilder<'_>, state: &Self::State) -> Result<Self>;
274}
275
276impl FinishedObject for Vec<u8> {
277    type State = ();
278    fn finish_object(obj: ObjectBuilder<'_>, _state: &Self::State) -> Result<Self> {
279        let mut result = ObjectVec::default();
280        obj.finish(&mut result)?;
281        return Ok(result.0);
282
283        #[derive(Default)]
284        struct ObjectVec(Vec<u8>);
285
286        impl WritableBuffer for ObjectVec {
287            fn len(&self) -> usize {
288                self.0.len()
289            }
290
291            fn reserve(&mut self, additional: usize) -> Result<(), ()> {
292                assert_eq!(self.0.len(), 0, "cannot reserve twice");
293                self.0 = Vec::with_capacity(additional);
294                Ok(())
295            }
296
297            fn resize(&mut self, new_len: usize) {
298                if new_len <= self.0.len() {
299                    self.0.truncate(new_len)
300                } else {
301                    self.0.extend(vec![0; new_len - self.0.len()])
302                }
303            }
304
305            fn write_bytes(&mut self, val: &[u8]) {
306                self.0.extend(val);
307            }
308        }
309    }
310}