wasmtime_environ/compile/
module_artifacts.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
//! Definitions of runtime structures and metadata which are serialized into ELF
//! with `postcard` as part of a module's compilation process.

use crate::prelude::*;
use crate::{
    obj, CompiledFunctionInfo, CompiledModuleInfo, DebugInfoData, DefinedFuncIndex, FunctionLoc,
    FunctionName, MemoryInitialization, Metadata, ModuleInternedTypeIndex, ModuleTranslation,
    PrimaryMap, Tunables,
};
use anyhow::{bail, Result};
use object::write::{Object, SectionId, StandardSegment, WritableBuffer};
use object::SectionKind;
use std::ops::Range;

/// Helper structure to create an ELF file as a compilation artifact.
///
/// This structure exposes the process which Wasmtime will encode a core wasm
/// module into an ELF file, notably managing data sections and all that good
/// business going into the final file.
pub struct ObjectBuilder<'a> {
    /// The `object`-crate-defined ELF file write we're using.
    obj: Object<'a>,

    /// General compilation configuration.
    tunables: &'a Tunables,

    /// The section identifier for "rodata" which is where wasm data segments
    /// will go.
    data: SectionId,

    /// The target triple for this compilation.
    triple: target_lexicon::Triple,

    /// The section identifier for function name information, or otherwise where
    /// the `name` custom section of wasm is copied into.
    ///
    /// This is optional and lazily created on demand.
    names: Option<SectionId>,

    /// The section identifier for dwarf information copied from the original
    /// wasm files.
    ///
    /// This is optional and lazily created on demand.
    dwarf: Option<SectionId>,
}

impl<'a> ObjectBuilder<'a> {
    /// Creates a new builder for the `obj` specified.
    pub fn new(
        mut obj: Object<'a>,
        tunables: &'a Tunables,
        triple: target_lexicon::Triple,
    ) -> ObjectBuilder<'a> {
        let data = obj.add_section(
            obj.segment_name(StandardSegment::Data).to_vec(),
            obj::ELF_WASM_DATA.as_bytes().to_vec(),
            SectionKind::ReadOnlyData,
        );
        ObjectBuilder {
            obj,
            tunables,
            data,
            triple,
            names: None,
            dwarf: None,
        }
    }

    /// Insert the wasm raw wasm-based debuginfo into the output.
    /// Note that this is distinct from the native debuginfo
    /// possibly generated by the native compiler, hence these sections
    /// getting wasm-specific names.
    pub fn push_debuginfo(
        &mut self,
        dwarf: &mut Vec<(u8, Range<u64>)>,
        debuginfo: &DebugInfoData<'_>,
    ) {
        self.push_debug(dwarf, &debuginfo.dwarf.debug_abbrev);
        self.push_debug(dwarf, &debuginfo.dwarf.debug_addr);
        self.push_debug(dwarf, &debuginfo.dwarf.debug_aranges);
        self.push_debug(dwarf, &debuginfo.dwarf.debug_info);
        self.push_debug(dwarf, &debuginfo.dwarf.debug_line);
        self.push_debug(dwarf, &debuginfo.dwarf.debug_line_str);
        self.push_debug(dwarf, &debuginfo.dwarf.debug_str);
        self.push_debug(dwarf, &debuginfo.dwarf.debug_str_offsets);
        self.push_debug(dwarf, &debuginfo.debug_ranges);
        self.push_debug(dwarf, &debuginfo.debug_rnglists);
        self.push_debug(dwarf, &debuginfo.debug_cu_index);

        // Sort this for binary-search-lookup later in `symbolize_context`.
        dwarf.sort_by_key(|(id, _)| *id);
    }

    /// Completes compilation of the `translation` specified, inserting
    /// everything necessary into the `Object` being built.
    ///
    /// This function will consume the final results of compiling a wasm module
    /// and finish the ELF image in-progress as part of `self.obj` by appending
    /// any compiler-agnostic sections.
    ///
    /// The auxiliary `CompiledModuleInfo` structure returned here has also been
    /// serialized into the object returned, but if the caller will quickly
    /// turn-around and invoke `CompiledModule::from_artifacts` after this then
    /// the information can be passed to that method to avoid extra
    /// deserialization. This is done to avoid a serialize-then-deserialize for
    /// API calls like `Module::new` where the compiled module is immediately
    /// going to be used.
    ///
    /// The various arguments here are:
    ///
    /// * `translation` - the core wasm translation that's being completed.
    ///
    /// * `funcs` - compilation metadata about functions within the translation
    ///   as well as where the functions are located in the text section and any
    ///   associated trampolines.
    ///
    /// * `wasm_to_array_trampolines` - list of all trampolines necessary for
    ///   Wasm callers calling array callees (e.g. `Func::wrap`). One for each
    ///   function signature in the module. Must be sorted by `SignatureIndex`.
    ///
    /// Returns the `CompiledModuleInfo` corresponding to this core Wasm module
    /// as a result of this append operation. This is then serialized into the
    /// final artifact by the caller.
    pub fn append(
        &mut self,
        translation: ModuleTranslation<'_>,
        funcs: PrimaryMap<DefinedFuncIndex, CompiledFunctionInfo>,
        wasm_to_array_trampolines: Vec<(ModuleInternedTypeIndex, FunctionLoc)>,
    ) -> Result<CompiledModuleInfo> {
        let ModuleTranslation {
            mut module,
            debuginfo,
            has_unparsed_debuginfo,
            data,
            data_align,
            passive_data,
            ..
        } = translation;

        // Place all data from the wasm module into a section which will the
        // source of the data later at runtime. This additionally keeps track of
        // the offset of
        let mut total_data_len = 0;
        let data_offset = self
            .obj
            .append_section_data(self.data, &[], data_align.unwrap_or(1));
        for (i, data) in data.iter().enumerate() {
            // The first data segment has its alignment specified as the alignment
            // for the entire section, but everything afterwards is adjacent so it
            // has alignment of 1.
            let align = if i == 0 { data_align.unwrap_or(1) } else { 1 };
            self.obj.append_section_data(self.data, data, align);
            total_data_len += data.len();
        }
        for data in passive_data.iter() {
            self.obj.append_section_data(self.data, data, 1);
        }

        // If any names are present in the module then the `ELF_NAME_DATA` section
        // is create and appended.
        let mut func_names = Vec::new();
        if debuginfo.name_section.func_names.len() > 0 {
            let name_id = *self.names.get_or_insert_with(|| {
                self.obj.add_section(
                    self.obj.segment_name(StandardSegment::Data).to_vec(),
                    obj::ELF_NAME_DATA.as_bytes().to_vec(),
                    SectionKind::ReadOnlyData,
                )
            });
            let mut sorted_names = debuginfo.name_section.func_names.iter().collect::<Vec<_>>();
            sorted_names.sort_by_key(|(idx, _name)| *idx);
            for (idx, name) in sorted_names {
                let offset = self.obj.append_section_data(name_id, name.as_bytes(), 1);
                let offset = match u32::try_from(offset) {
                    Ok(offset) => offset,
                    Err(_) => bail!("name section too large (> 4gb)"),
                };
                let len = u32::try_from(name.len()).unwrap();
                func_names.push(FunctionName {
                    idx: *idx,
                    offset,
                    len,
                });
            }
        }

        // Data offsets in `MemoryInitialization` are offsets within the
        // `translation.data` list concatenated which is now present in the data
        // segment that's appended to the object. Increase the offsets by
        // `self.data_size` to account for any previously added module.
        let data_offset = u32::try_from(data_offset).unwrap();
        match &mut module.memory_initialization {
            MemoryInitialization::Segmented(list) => {
                for segment in list {
                    segment.data.start = segment.data.start.checked_add(data_offset).unwrap();
                    segment.data.end = segment.data.end.checked_add(data_offset).unwrap();
                }
            }
            MemoryInitialization::Static { map } => {
                for (_, segment) in map {
                    if let Some(segment) = segment {
                        segment.data.start = segment.data.start.checked_add(data_offset).unwrap();
                        segment.data.end = segment.data.end.checked_add(data_offset).unwrap();
                    }
                }
            }
        }

        // Data offsets for passive data are relative to the start of
        // `translation.passive_data` which was appended to the data segment
        // of this object, after active data in `translation.data`. Update the
        // offsets to account prior modules added in addition to active data.
        let data_offset = data_offset + u32::try_from(total_data_len).unwrap();
        for (_, range) in module.passive_data_map.iter_mut() {
            range.start = range.start.checked_add(data_offset).unwrap();
            range.end = range.end.checked_add(data_offset).unwrap();
        }

        // Insert the wasm raw wasm-based debuginfo into the output, if
        // requested. Note that this is distinct from the native debuginfo
        // possibly generated by the native compiler, hence these sections
        // getting wasm-specific names.
        let mut dwarf = Vec::new();
        if self.tunables.parse_wasm_debuginfo {
            self.push_debuginfo(&mut dwarf, &debuginfo);
        }

        let is_pulley = matches!(
            self.triple.architecture,
            target_lexicon::Architecture::Pulley32 | target_lexicon::Architecture::Pulley64
        );
        assert!(!is_pulley || wasm_to_array_trampolines.is_empty());

        Ok(CompiledModuleInfo {
            module,
            funcs,
            wasm_to_array_trampolines,
            func_names,
            meta: Metadata {
                has_unparsed_debuginfo,
                code_section_offset: debuginfo.wasm_file.code_section_offset,
                has_wasm_debuginfo: self.tunables.parse_wasm_debuginfo,
                is_pulley,
                dwarf,
            },
        })
    }

    fn push_debug<'b, T>(&mut self, dwarf: &mut Vec<(u8, Range<u64>)>, section: &T)
    where
        T: gimli::Section<gimli::EndianSlice<'b, gimli::LittleEndian>>,
    {
        let data = section.reader().slice();
        if data.is_empty() {
            return;
        }
        let section_id = *self.dwarf.get_or_insert_with(|| {
            self.obj.add_section(
                self.obj.segment_name(StandardSegment::Debug).to_vec(),
                obj::ELF_WASMTIME_DWARF.as_bytes().to_vec(),
                SectionKind::Debug,
            )
        });
        let offset = self.obj.append_section_data(section_id, data, 1);
        dwarf.push((T::id() as u8, offset..offset + data.len() as u64));
    }

    /// Creates the `ELF_WASMTIME_INFO` section from the given serializable data
    /// structure.
    pub fn serialize_info<T>(&mut self, info: &T)
    where
        T: serde::Serialize,
    {
        let section = self.obj.add_section(
            self.obj.segment_name(StandardSegment::Data).to_vec(),
            obj::ELF_WASMTIME_INFO.as_bytes().to_vec(),
            SectionKind::ReadOnlyData,
        );
        let data = postcard::to_allocvec(info).unwrap();
        self.obj.set_section_data(section, data, 1);
    }

    /// Serializes `self` into a buffer. This can be used for execution as well
    /// as serialization.
    pub fn finish<T: WritableBuffer>(self, t: &mut T) -> Result<()> {
        self.obj.emit(t).map_err(|e| e.into())
    }
}

/// A type which can be the result of serializing an object.
pub trait FinishedObject: Sized {
    /// Emit the object as `Self`.
    fn finish_object(obj: ObjectBuilder<'_>) -> Result<Self>;
}

impl FinishedObject for Vec<u8> {
    fn finish_object(obj: ObjectBuilder<'_>) -> Result<Self> {
        let mut result = ObjectVec::default();
        obj.finish(&mut result)?;
        return Ok(result.0);

        #[derive(Default)]
        struct ObjectVec(Vec<u8>);

        impl WritableBuffer for ObjectVec {
            fn len(&self) -> usize {
                self.0.len()
            }

            fn reserve(&mut self, additional: usize) -> Result<(), ()> {
                assert_eq!(self.0.len(), 0, "cannot reserve twice");
                self.0 = Vec::with_capacity(additional);
                Ok(())
            }

            fn resize(&mut self, new_len: usize) {
                if new_len <= self.0.len() {
                    self.0.truncate(new_len)
                } else {
                    self.0.extend(vec![0; new_len - self.0.len()])
                }
            }

            fn write_bytes(&mut self, val: &[u8]) {
                self.0.extend(val);
            }
        }
    }
}