wasmtime/
compile.rs

1//! Wasm compilation orchestration.
2//!
3//! It works roughly like this:
4//!
5//! * We walk over the Wasm module/component and make a list of all the things
6//!   we need to compile. This is a `CompileInputs`.
7//!
8//! * The `CompileInputs::compile` method compiles each of these in parallel,
9//!   producing a `UnlinkedCompileOutputs`. This is an unlinked set of compiled
10//!   functions, bucketed by type of function.
11//!
12//! * The `UnlinkedCompileOutputs::pre_link` method re-arranges the compiled
13//!   functions into a flat list. This is the order we will place them within
14//!   the ELF file, so we must also keep track of all the functions' indices
15//!   within this list, because we will need them for resolving
16//!   relocations. These indices are kept track of in the resulting
17//!   `FunctionIndices`.
18//!
19//! * The `FunctionIndices::link_and_append_code` method appends the functions
20//!   to the given ELF file and resolves relocations. It produces an `Artifacts`
21//!   which contains the data needed at runtime to find and call Wasm
22//!   functions. It is up to the caller to serialize the relevant parts of the
23//!   `Artifacts` into the ELF file.
24
25use crate::Engine;
26use crate::hash_map::HashMap;
27use crate::hash_set::HashSet;
28use crate::prelude::*;
29use std::{any::Any, borrow::Cow, collections::BTreeMap, mem, ops::Range};
30
31use call_graph::CallGraph;
32#[cfg(feature = "component-model")]
33use wasmtime_environ::component::Translator;
34use wasmtime_environ::{
35    BuiltinFunctionIndex, CompiledFunctionBody, CompiledFunctionsTable,
36    CompiledFunctionsTableBuilder, CompiledModuleInfo, Compiler, DefinedFuncIndex, FilePos,
37    FinishedObject, FuncKey, FunctionBodyData, InliningCompiler, IntraModuleInlining,
38    ModuleEnvironment, ModuleTranslation, ModuleTypes, ModuleTypesBuilder, ObjectKind, PrimaryMap,
39    StaticModuleIndex, Tunables,
40};
41
42mod call_graph;
43mod scc;
44mod stratify;
45
46mod code_builder;
47pub use self::code_builder::{CodeBuilder, CodeHint, HashedEngineCompileEnv};
48
49#[cfg(feature = "runtime")]
50mod runtime;
51
52/// Converts an input binary-encoded WebAssembly module to compilation
53/// artifacts and type information.
54///
55/// This is where compilation actually happens of WebAssembly modules and
56/// translation/parsing/validation of the binary input occurs. The binary
57/// artifact represented in the `MmapVec` returned here is an in-memory ELF
58/// file in an owned area of virtual linear memory where permissions (such
59/// as the executable bit) can be applied.
60///
61/// Additionally compilation returns an `Option` here which is always
62/// `Some`, notably compiled metadata about the module in addition to the
63/// type information found within.
64pub(crate) fn build_artifacts<T: FinishedObject>(
65    engine: &Engine,
66    wasm: &[u8],
67    dwarf_package: Option<&[u8]>,
68    obj_state: &T::State,
69) -> Result<(
70    T,
71    Option<(CompiledModuleInfo, CompiledFunctionsTable, ModuleTypes)>,
72)> {
73    let tunables = engine.tunables();
74
75    // First a `ModuleEnvironment` is created which records type information
76    // about the wasm module. This is where the WebAssembly is parsed and
77    // validated. Afterwards `types` will have all the type information for
78    // this module.
79    let mut parser = wasmparser::Parser::new(0);
80    let mut validator = wasmparser::Validator::new_with_features(engine.features());
81    parser.set_features(*validator.features());
82    let mut types = ModuleTypesBuilder::new(&validator);
83    let mut translation = ModuleEnvironment::new(
84        tunables,
85        &mut validator,
86        &mut types,
87        StaticModuleIndex::from_u32(0),
88    )
89    .translate(parser, wasm)
90    .context("failed to parse WebAssembly module")?;
91    let functions = mem::take(&mut translation.function_body_inputs);
92
93    let compile_inputs = CompileInputs::for_module(&types, &translation, functions);
94    let unlinked_compile_outputs = compile_inputs.compile(engine)?;
95    let PreLinkOutput {
96        needs_gc_heap,
97        compiled_funcs,
98        indices,
99    } = unlinked_compile_outputs.pre_link();
100    translation.module.needs_gc_heap |= needs_gc_heap;
101
102    // Emplace all compiled functions into the object file with any other
103    // sections associated with code as well.
104    let mut object = engine.compiler().object(ObjectKind::Module)?;
105    // Insert `Engine` and type-level information into the compiled
106    // artifact so if this module is deserialized later it contains all
107    // information necessary.
108    //
109    // Note that `append_compiler_info` and `append_types` here in theory
110    // can both be skipped if this module will never get serialized.
111    // They're only used during deserialization and not during runtime for
112    // the module itself. Currently there's no need for that, however, so
113    // it's left as an exercise for later.
114    engine.append_compiler_info(&mut object);
115    engine.append_bti(&mut object);
116
117    let (mut object, compilation_artifacts) = indices.link_and_append_code(
118        object,
119        engine,
120        compiled_funcs,
121        std::iter::once(translation).collect(),
122        dwarf_package,
123    )?;
124
125    let (info, index) = compilation_artifacts.unwrap_as_module_info();
126    let types = types.finish();
127    object.serialize_info(&(&info, &index, &types));
128    let result = T::finish_object(object, obj_state)?;
129
130    Ok((result, Some((info, index, types))))
131}
132
133/// Performs the compilation phase for a component, translating and
134/// validating the provided wasm binary to machine code.
135///
136/// This method will compile all nested core wasm binaries in addition to
137/// any necessary extra functions required for operation with components.
138/// The output artifact here is the serialized object file contained within
139/// an owned mmap along with metadata about the compilation itself.
140#[cfg(feature = "component-model")]
141pub(crate) fn build_component_artifacts<T: FinishedObject>(
142    engine: &Engine,
143    binary: &[u8],
144    _dwarf_package: Option<&[u8]>,
145    obj_state: &T::State,
146) -> Result<(T, Option<wasmtime_environ::component::ComponentArtifacts>)> {
147    use wasmtime_environ::ScopeVec;
148    use wasmtime_environ::component::{
149        CompiledComponentInfo, ComponentArtifacts, ComponentTypesBuilder,
150    };
151
152    let tunables = engine.tunables();
153    let compiler = engine.compiler();
154
155    let scope = ScopeVec::new();
156    let mut validator = wasmparser::Validator::new_with_features(engine.features());
157    let mut types = ComponentTypesBuilder::new(&validator);
158    let (component, mut module_translations) =
159        Translator::new(tunables, &mut validator, &mut types, &scope)
160            .translate(binary)
161            .context("failed to parse WebAssembly module")?;
162
163    let compile_inputs = CompileInputs::for_component(
164        engine,
165        &types,
166        &component,
167        module_translations.iter_mut().map(|(i, translation)| {
168            let functions = mem::take(&mut translation.function_body_inputs);
169            (i, &*translation, functions)
170        }),
171    );
172    let unlinked_compile_outputs = compile_inputs.compile(&engine)?;
173
174    let PreLinkOutput {
175        needs_gc_heap,
176        compiled_funcs,
177        indices,
178    } = unlinked_compile_outputs.pre_link();
179    for (_, t) in &mut module_translations {
180        t.module.needs_gc_heap |= needs_gc_heap
181    }
182
183    let mut object = compiler.object(ObjectKind::Component)?;
184    engine.append_compiler_info(&mut object);
185    engine.append_bti(&mut object);
186
187    let (mut object, compilation_artifacts) = indices.link_and_append_code(
188        object,
189        engine,
190        compiled_funcs,
191        module_translations,
192        None, // TODO: Support dwarf packages for components.
193    )?;
194    let (types, ty) = types.finish(&component.component);
195
196    let info = CompiledComponentInfo {
197        component: component.component,
198    };
199    let artifacts = ComponentArtifacts {
200        info,
201        table: compilation_artifacts.table,
202        ty,
203        types,
204        static_modules: compilation_artifacts.modules,
205    };
206    object.serialize_info(&artifacts);
207
208    let result = T::finish_object(object, obj_state)?;
209    Ok((result, Some(artifacts)))
210}
211
212type CompileInput<'a> = Box<dyn FnOnce(&dyn Compiler) -> Result<CompileOutput<'a>> + Send + 'a>;
213
214struct CompileOutput<'a> {
215    key: FuncKey,
216    symbol: String,
217    function: CompiledFunctionBody,
218    start_srcloc: FilePos,
219
220    // Only present when `self.key` is a `FuncKey::DefinedWasmFunction(..)`.
221    translation: Option<&'a ModuleTranslation<'a>>,
222
223    // Only present when `self.key` is a `FuncKey::DefinedWasmFunction(..)`.
224    func_body: Option<wasmparser::FunctionBody<'a>>,
225}
226
227/// Inputs to our inlining heuristics.
228struct InlineHeuristicParams<'a> {
229    tunables: &'a Tunables,
230    caller_size: u32,
231    caller_module: StaticModuleIndex,
232    caller_def_func: DefinedFuncIndex,
233    caller_needs_gc_heap: bool,
234    callee_size: u32,
235    callee_module: StaticModuleIndex,
236    callee_def_func: DefinedFuncIndex,
237    callee_needs_gc_heap: bool,
238}
239
240/// The collection of things we need to compile for a Wasm module or component.
241#[derive(Default)]
242struct CompileInputs<'a> {
243    inputs: Vec<CompileInput<'a>>,
244}
245
246impl<'a> CompileInputs<'a> {
247    fn push_input(
248        &mut self,
249        f: impl FnOnce(&dyn Compiler) -> Result<CompileOutput<'a>> + Send + 'a,
250    ) {
251        self.inputs.push(Box::new(f));
252    }
253
254    /// Create the `CompileInputs` for a core Wasm module.
255    fn for_module(
256        types: &'a ModuleTypesBuilder,
257        translation: &'a ModuleTranslation<'a>,
258        functions: PrimaryMap<DefinedFuncIndex, FunctionBodyData<'a>>,
259    ) -> Self {
260        let mut ret = CompileInputs { inputs: vec![] };
261
262        let module_index = StaticModuleIndex::from_u32(0);
263        ret.collect_inputs_in_translations(types, [(module_index, translation, functions)]);
264
265        ret
266    }
267
268    /// Create a `CompileInputs` for a component.
269    #[cfg(feature = "component-model")]
270    fn for_component(
271        engine: &'a Engine,
272        types: &'a wasmtime_environ::component::ComponentTypesBuilder,
273        component: &'a wasmtime_environ::component::ComponentTranslation,
274        module_translations: impl IntoIterator<
275            Item = (
276                StaticModuleIndex,
277                &'a ModuleTranslation<'a>,
278                PrimaryMap<DefinedFuncIndex, FunctionBodyData<'a>>,
279            ),
280        >,
281    ) -> Self {
282        use wasmtime_environ::Abi;
283
284        let mut ret = CompileInputs { inputs: vec![] };
285
286        ret.collect_inputs_in_translations(types.module_types_builder(), module_translations);
287        let tunables = engine.tunables();
288
289        for (idx, trampoline) in component.trampolines.iter() {
290            for abi in [Abi::Wasm, Abi::Array] {
291                ret.push_input(move |compiler| {
292                    let key = FuncKey::ComponentTrampoline(abi, idx);
293                    let mut symbol = trampoline.symbol_name();
294                    symbol.push_str(match abi {
295                        Abi::Wasm => "_wasm_call",
296                        Abi::Array => "_array_call",
297                    });
298                    let function = compiler
299                        .component_compiler()
300                        .compile_trampoline(component, types, key, abi, tunables, &symbol)
301                        .with_context(|| format!("failed to compile {symbol}"))?;
302                    Ok(CompileOutput {
303                        key,
304                        function,
305                        symbol,
306                        start_srcloc: FilePos::default(),
307                        translation: None,
308                        func_body: None,
309                    })
310                });
311            }
312        }
313
314        // If there are any resources defined within this component, the
315        // signature for `resource.drop` is mentioned somewhere, and the
316        // wasm-to-native trampoline for `resource.drop` hasn't been created yet
317        // then insert that here. This is possibly required by destruction of
318        // resources from the embedder and otherwise won't be explicitly
319        // requested through initializers above or such.
320        if component.component.num_resources > 0 {
321            if let Some(sig) = types.find_resource_drop_signature() {
322                ret.push_input(move |compiler| {
323                    let key = FuncKey::ResourceDropTrampoline;
324                    let symbol = "resource_drop_trampoline".to_string();
325                    let function = compiler
326                        .compile_wasm_to_array_trampoline(types[sig].unwrap_func(), key, &symbol)
327                        .with_context(|| format!("failed to compile `{symbol}`"))?;
328                    Ok(CompileOutput {
329                        key,
330                        function,
331                        symbol,
332                        start_srcloc: FilePos::default(),
333                        translation: None,
334                        func_body: None,
335                    })
336                });
337            }
338        }
339
340        ret
341    }
342
343    fn clean_symbol(name: &str) -> Cow<'_, str> {
344        /// Maximum length of symbols generated in objects.
345        const MAX_SYMBOL_LEN: usize = 96;
346
347        // Just to be on the safe side, filter out characters that could
348        // pose issues to tools such as "perf" or "objdump".  To avoid
349        // having to update a list of allowed characters for each different
350        // language that compiles to Wasm, allows only graphic ASCII
351        // characters; replace runs of everything else with a "?".
352        let bad_char = |c: char| !c.is_ascii_graphic();
353        if name.chars().any(bad_char) {
354            let mut last_char_seen = '\u{0000}';
355            Cow::Owned(
356                name.chars()
357                    .map(|c| if bad_char(c) { '?' } else { c })
358                    .filter(|c| {
359                        let skip = last_char_seen == '?' && *c == '?';
360                        last_char_seen = *c;
361                        !skip
362                    })
363                    .take(MAX_SYMBOL_LEN)
364                    .collect::<String>(),
365            )
366        } else if name.len() <= MAX_SYMBOL_LEN {
367            Cow::Borrowed(&name[..])
368        } else {
369            Cow::Borrowed(&name[..MAX_SYMBOL_LEN])
370        }
371    }
372
373    fn collect_inputs_in_translations(
374        &mut self,
375        types: &'a ModuleTypesBuilder,
376        translations: impl IntoIterator<
377            Item = (
378                StaticModuleIndex,
379                &'a ModuleTranslation<'a>,
380                PrimaryMap<DefinedFuncIndex, FunctionBodyData<'a>>,
381            ),
382        >,
383    ) {
384        for (module, translation, functions) in translations {
385            for (def_func_index, func_body_data) in functions {
386                self.push_input(move |compiler| {
387                    let key = FuncKey::DefinedWasmFunction(module, def_func_index);
388                    let func_index = translation.module.func_index(def_func_index);
389                    let symbol = match translation
390                        .debuginfo
391                        .name_section
392                        .func_names
393                        .get(&func_index)
394                    {
395                        Some(name) => format!(
396                            "wasm[{}]::function[{}]::{}",
397                            module.as_u32(),
398                            func_index.as_u32(),
399                            Self::clean_symbol(&name)
400                        ),
401                        None => format!(
402                            "wasm[{}]::function[{}]",
403                            module.as_u32(),
404                            func_index.as_u32()
405                        ),
406                    };
407                    let func_body = func_body_data.body.clone();
408                    let data = func_body.get_binary_reader();
409                    let offset = data.original_position();
410                    let start_srcloc = FilePos::new(u32::try_from(offset).unwrap());
411                    let function = compiler
412                        .compile_function(translation, key, func_body_data, types, &symbol)
413                        .with_context(|| format!("failed to compile: {symbol}"))?;
414
415                    Ok(CompileOutput {
416                        key,
417                        symbol,
418                        function,
419                        start_srcloc,
420                        translation: Some(translation),
421                        func_body: Some(func_body),
422                    })
423                });
424
425                let func_index = translation.module.func_index(def_func_index);
426                if translation.module.functions[func_index].is_escaping() {
427                    self.push_input(move |compiler| {
428                        let key = FuncKey::ArrayToWasmTrampoline(module, def_func_index);
429                        let func_index = translation.module.func_index(def_func_index);
430                        let symbol = format!(
431                            "wasm[{}]::array_to_wasm_trampoline[{}]",
432                            module.as_u32(),
433                            func_index.as_u32()
434                        );
435                        let function = compiler
436                            .compile_array_to_wasm_trampoline(translation, types, key, &symbol)
437                            .with_context(|| format!("failed to compile: {symbol}"))?;
438                        Ok(CompileOutput {
439                            key,
440                            symbol,
441                            function,
442                            start_srcloc: FilePos::default(),
443                            translation: None,
444                            func_body: None,
445                        })
446                    });
447                }
448            }
449        }
450
451        let mut trampoline_types_seen = HashSet::new();
452        for (_func_type_index, trampoline_type_index) in types.trampoline_types() {
453            let is_new = trampoline_types_seen.insert(trampoline_type_index);
454            if !is_new {
455                continue;
456            }
457            let trampoline_func_ty = types[trampoline_type_index].unwrap_func();
458            self.push_input(move |compiler| {
459                let key = FuncKey::WasmToArrayTrampoline(trampoline_type_index);
460                let symbol = format!(
461                    "signatures[{}]::wasm_to_array_trampoline",
462                    trampoline_type_index.as_u32()
463                );
464                let function = compiler
465                    .compile_wasm_to_array_trampoline(trampoline_func_ty, key, &symbol)
466                    .with_context(|| format!("failed to compile: {symbol}"))?;
467                Ok(CompileOutput {
468                    key,
469                    function,
470                    symbol,
471                    start_srcloc: FilePos::default(),
472                    translation: None,
473                    func_body: None,
474                })
475            });
476        }
477    }
478
479    /// Compile these `CompileInput`s (maybe in parallel) and return the
480    /// resulting `UnlinkedCompileOutput`s.
481    fn compile(self, engine: &Engine) -> Result<UnlinkedCompileOutputs<'a>> {
482        let compiler = engine.compiler();
483
484        if self.inputs.len() > 0 && cfg!(miri) {
485            bail!(
486                "\
487You are attempting to compile a WebAssembly module or component that contains
488functions in Miri. Running Cranelift through Miri is known to take quite a long
489time and isn't what we want in CI at least. If this is a mistake then you should
490ignore this test in Miri with:
491
492    #[cfg_attr(miri, ignore)]
493
494If this is not a mistake then try to edit the `pulley_provenance_test` test
495which runs Cranelift outside of Miri. If you still feel this is a mistake then
496please open an issue or a topic on Zulip to talk about how best to accommodate
497the use case.
498"
499            );
500        }
501
502        let mut raw_outputs = if let Some(inlining_compiler) = compiler.inlining_compiler() {
503            if engine.tunables().inlining {
504                self.compile_with_inlining(engine, compiler, inlining_compiler)?
505            } else {
506                // Inlining compiler but inlining is disabled: compile each
507                // input and immediately finish its output in parallel, skipping
508                // call graph computation and all that.
509                engine.run_maybe_parallel::<_, _, Error, _>(self.inputs, |f| {
510                    let mut compiled = f(compiler)?;
511                    inlining_compiler.finish_compiling(
512                        &mut compiled.function,
513                        compiled.func_body.take(),
514                        &compiled.symbol,
515                    )?;
516                    Ok(compiled)
517                })?
518            }
519        } else {
520            // No inlining: just compile each individual input in parallel.
521            engine.run_maybe_parallel(self.inputs, |f| f(compiler))?
522        };
523
524        // Now that all functions have been compiled see if any
525        // wasmtime-builtin functions are necessary. If so those need to be
526        // collected and then those trampolines additionally need to be
527        // compiled.
528        compile_required_builtins(engine, &mut raw_outputs)?;
529
530        // Bucket the outputs by kind.
531        let mut outputs: BTreeMap<FuncKey, CompileOutput> = BTreeMap::new();
532        for output in raw_outputs {
533            outputs.insert(output.key, output);
534        }
535
536        Ok(UnlinkedCompileOutputs { outputs })
537    }
538
539    fn compile_with_inlining(
540        self,
541        engine: &Engine,
542        compiler: &dyn Compiler,
543        inlining_compiler: &dyn InliningCompiler,
544    ) -> Result<Vec<CompileOutput<'a>>, Error> {
545        /// The index of a function (of any kind: Wasm function, trampoline, or
546        /// etc...) in our list of unlinked outputs.
547        #[derive(Clone, Copy, Debug, Default, PartialEq, Eq, PartialOrd, Ord, Hash)]
548        struct OutputIndex(u32);
549        wasmtime_environ::entity_impl!(OutputIndex);
550
551        // Our list of unlinked outputs.
552        let mut outputs = PrimaryMap::<OutputIndex, Option<CompileOutput<'_>>>::from(
553            engine.run_maybe_parallel(self.inputs, |f| f(compiler).map(Some))?,
554        );
555
556        /// Get just the output indices of the Wasm functions from our unlinked
557        /// outputs.
558        fn wasm_functions<'a>(
559            outputs: &'a PrimaryMap<OutputIndex, Option<CompileOutput<'_>>>,
560        ) -> impl Iterator<Item = OutputIndex> + 'a {
561            outputs.iter().filter_map(|(i, o)| match o.as_ref()?.key {
562                FuncKey::DefinedWasmFunction(..) => Some(i),
563                _ => None,
564            })
565        }
566
567        // A map from a `FuncKey` to its index in our unlinked outputs.
568        //
569        // We will generally just be working with `OutputIndex`es, but
570        // occasionally we must translate from keys back to our index space, for
571        // example when we know that one module's function import is always
572        // satisfied with a particular `FuncKey::DefinedWasmFunction`. This map
573        // enables that translation.
574        let key_to_output: HashMap<FuncKey, OutputIndex> = outputs
575            .iter()
576            .filter(|(_, output)| {
577                matches!(
578                    output.as_ref().unwrap().key,
579                    FuncKey::DefinedWasmFunction(..)
580                )
581            })
582            .map(|(output_index, output)| {
583                let output = output.as_ref().unwrap();
584                (output.key, output_index)
585            })
586            .collect();
587
588        // Construct the call graph for inlining.
589        //
590        // We only inline Wasm functions, not trampolines, because we rely on
591        // trampolines being in their own stack frame when we save the entry and
592        // exit SP, FP, and PC for backtraces in trampolines.
593        let call_graph = CallGraph::<OutputIndex>::new(wasm_functions(&outputs), {
594            let mut func_keys = IndexSet::default();
595            let outputs = &outputs;
596            let key_to_output = &key_to_output;
597            move |output_index, calls| {
598                debug_assert!(calls.is_empty());
599
600                let output = outputs[output_index].as_ref().unwrap();
601                debug_assert!(matches!(output.key, FuncKey::DefinedWasmFunction(..)));
602
603                // Get this function's call graph edges as `FuncKey`s.
604                func_keys.clear();
605                inlining_compiler.calls(&output.function, &mut func_keys)?;
606
607                // Translate each of those to keys to output indices, which is
608                // what we actually need.
609                calls.extend(
610                    func_keys
611                        .iter()
612                        .copied()
613                        .filter_map(|key| key_to_output.get(&key)),
614                );
615                log::trace!(
616                    "call graph edges for {output_index:?} = {:?}: {calls:?}",
617                    output.key
618                );
619                Ok(())
620            }
621        })?;
622
623        // Stratify the call graph into a sequence of layers. We process each
624        // layer in order, but process functions within a layer in parallel
625        // (because they either do not call each other or are part of a
626        // mutual-recursion cycle; either way we won't inline members of the
627        // same layer into each other).
628        let strata = stratify::Strata::<OutputIndex>::new(wasm_functions(&outputs), &call_graph);
629        let mut layer_outputs = vec![];
630        for layer in strata.layers() {
631            // Temporarily take this layer's outputs out of our unlinked outputs
632            // list so that we can mutate these outputs (by inlining callee
633            // functions into them) while also accessing shared borrows of the
634            // unlinked outputs list (finding the callee functions we will
635            // inline).
636            debug_assert!(layer_outputs.is_empty());
637            layer_outputs.extend(layer.iter().map(|f| outputs[*f].take().unwrap()));
638
639            // Process this layer's members in parallel.
640            engine.run_maybe_parallel_mut(
641                &mut layer_outputs,
642                |output: &mut CompileOutput<'_>| {
643                    log::trace!("processing inlining for {:?}", output.key);
644                    debug_assert!(matches!(output.key, FuncKey::DefinedWasmFunction(..)));
645
646                    let caller_translation = output.translation.unwrap();
647                    let (caller_module, caller_def_func) =
648                        output.key.unwrap_defined_wasm_function();
649                    let caller_needs_gc_heap = caller_translation.module.needs_gc_heap;
650                    let caller = &mut output.function;
651
652                    let mut caller_size = inlining_compiler.size(caller);
653
654                    inlining_compiler.inline(caller, &mut |callee_key: FuncKey| {
655                        let callee_output_index: OutputIndex = key_to_output[&callee_key];
656
657                        // NB: If the callee is not inside `outputs`, then it is
658                        // in the same `Strata` layer as the caller (and
659                        // therefore is in the same strongly-connected component
660                        // as the caller, and they mutually recursive). In this
661                        // case, we do not do any inlining; communicate this
662                        // command via `?`-propagation.
663                        let callee_output = outputs[callee_output_index].as_ref()?;
664
665                        debug_assert_eq!(callee_output.key, callee_key);
666
667                        let callee = &callee_output.function;
668                        let callee_size = inlining_compiler.size(callee);
669
670                        let callee_needs_gc_heap =
671                            callee_output.translation.unwrap().module.needs_gc_heap;
672
673                        let (callee_module, callee_def_func) =
674                            callee_key.unwrap_defined_wasm_function();
675                        if Self::should_inline(InlineHeuristicParams {
676                            tunables: engine.tunables(),
677                            caller_size,
678                            caller_module,
679                            caller_def_func,
680                            caller_needs_gc_heap,
681                            callee_size,
682                            callee_module,
683                            callee_def_func,
684                            callee_needs_gc_heap,
685                        }) {
686                            caller_size = caller_size.saturating_add(callee_size);
687                            Some(callee)
688                        } else {
689                            None
690                        }
691                    })
692                },
693            )?;
694
695            for (f, func) in layer.iter().zip(layer_outputs.drain(..)) {
696                debug_assert!(outputs[*f].is_none());
697                outputs[*f] = Some(func);
698            }
699        }
700
701        // Fan out in parallel again and finish compiling each function.
702        engine.run_maybe_parallel(outputs.into(), |output| {
703            let mut output = output.unwrap();
704            inlining_compiler.finish_compiling(
705                &mut output.function,
706                output.func_body.take(),
707                &output.symbol,
708            )?;
709            Ok(output)
710        })
711    }
712
713    /// Implementation of our inlining heuristics.
714    ///
715    /// TODO: We should improve our heuristics:
716    ///
717    /// * One potentially promising hint that we don't currently make use of is
718    ///   how many times a function appears as the callee in call sites. For
719    ///   example, a function that appears in only a single call site, and does
720    ///   not otherwise escape, is often beneficial to inline regardless of its
721    ///   size (assuming we can then GC away the non-inlined version of the
722    ///   function, which we do not currently attempt to do).
723    ///
724    /// * Another potentially promising hint would be whether any of the call
725    ///   site's actual arguments are constants.
726    ///
727    /// * A general improvement would be removing the decision-tree style of
728    ///   control flow below and replacing it with (1) a pure estimated-benefit
729    ///   formula and (2) a benefit threshold. Whenever the estimated benefit
730    ///   reaches the threshold, we would inline the call. Both the formula and
731    ///   the threshold would be parameterized by tunables. This would
732    ///   effectively allow reprioritizing the relative importance of different
733    ///   hint sources, rather than being stuck with the sequence hard-coded in
734    ///   the decision tree below.
735    fn should_inline(
736        InlineHeuristicParams {
737            tunables,
738            caller_size,
739            caller_module,
740            caller_def_func,
741            caller_needs_gc_heap,
742            callee_size,
743            callee_module,
744            callee_def_func,
745            callee_needs_gc_heap,
746        }: InlineHeuristicParams,
747    ) -> bool {
748        log::trace!(
749            "considering inlining:\n\
750             \tcaller = ({caller_module:?}, {caller_def_func:?})\n\
751             \t\tsize = {caller_size}\n\
752             \t\tneeds_gc_heap = {caller_needs_gc_heap}\n\
753             \tcallee = ({callee_module:?}, {callee_def_func:?})\n\
754             \t\tsize = {callee_size}\n\
755             \t\tneeds_gc_heap = {callee_needs_gc_heap}"
756        );
757
758        debug_assert!(
759            tunables.inlining,
760            "shouldn't even call this method if we aren't configured for inlining"
761        );
762        debug_assert!(
763            caller_module != callee_module || caller_def_func != callee_def_func,
764            "we never inline recursion"
765        );
766
767        // Put a limit on how large we can make a function via inlining to cap
768        // code bloat.
769        let sum_size = caller_size.saturating_add(callee_size);
770        if sum_size > tunables.inlining_sum_size_threshold {
771            log::trace!(
772                "  --> not inlining: the sum of the caller's and callee's sizes is greater than \
773                 the inlining-sum-size threshold: {callee_size} + {caller_size} > {}",
774                tunables.inlining_sum_size_threshold
775            );
776            return false;
777        }
778
779        // Consider whether this is an intra-module call.
780        //
781        // Inlining within a single core module has most often already been done
782        // by the toolchain that produced the module, e.g. LLVM, and any extant
783        // function calls to small callees were presumably annotated with the
784        // equivalent of `#[inline(never)]` or `#[cold]` but we don't have that
785        // information anymore.
786        if caller_module == callee_module {
787            match tunables.inlining_intra_module {
788                IntraModuleInlining::Yes => {}
789
790                IntraModuleInlining::WhenUsingGc
791                    if caller_needs_gc_heap || callee_needs_gc_heap => {}
792
793                IntraModuleInlining::WhenUsingGc => {
794                    log::trace!("  --> not inlining: intra-module call that does not use GC");
795                    return false;
796                }
797
798                IntraModuleInlining::No => {
799                    log::trace!("  --> not inlining: intra-module call");
800                    return false;
801                }
802            }
803        }
804
805        // Small callees are often worth inlining regardless of the size of the
806        // caller.
807        if callee_size <= tunables.inlining_small_callee_size {
808            log::trace!(
809                "  --> inlining: callee's size is less than the small-callee size: \
810                 {callee_size} <= {}",
811                tunables.inlining_small_callee_size
812            );
813            return true;
814        }
815
816        log::trace!("  --> inlining: did not find a reason we should not");
817        true
818    }
819}
820
821fn compile_required_builtins(engine: &Engine, raw_outputs: &mut Vec<CompileOutput>) -> Result<()> {
822    let compiler = engine.compiler();
823    let mut builtins = HashSet::new();
824    let mut new_inputs: Vec<CompileInput<'_>> = Vec::new();
825
826    let compile_builtin = |builtin: BuiltinFunctionIndex| {
827        Box::new(move |compiler: &dyn Compiler| {
828            let key = FuncKey::WasmToBuiltinTrampoline(builtin);
829            let symbol = format!("wasmtime_builtin_{}", builtin.name());
830            let mut function = compiler
831                .compile_wasm_to_builtin(key, &symbol)
832                .with_context(|| format!("failed to compile `{symbol}`"))?;
833            if let Some(compiler) = compiler.inlining_compiler() {
834                compiler.finish_compiling(&mut function, None, &symbol)?;
835            }
836            Ok(CompileOutput {
837                key,
838                function,
839                symbol,
840                start_srcloc: FilePos::default(),
841                translation: None,
842                func_body: None,
843            })
844        })
845    };
846
847    for output in raw_outputs.iter() {
848        for reloc in compiler.compiled_function_relocation_targets(&*output.function.code) {
849            if let FuncKey::WasmToBuiltinTrampoline(builtin) = reloc {
850                if builtins.insert(builtin) {
851                    new_inputs.push(compile_builtin(builtin));
852                }
853            }
854        }
855    }
856    raw_outputs.extend(engine.run_maybe_parallel(new_inputs, |c| c(compiler))?);
857    Ok(())
858}
859
860#[derive(Default)]
861struct UnlinkedCompileOutputs<'a> {
862    // A map from kind to `CompileOutput`.
863    outputs: BTreeMap<FuncKey, CompileOutput<'a>>,
864}
865
866impl UnlinkedCompileOutputs<'_> {
867    /// Flatten all our functions into a single list and remember each of their
868    /// indices within it.
869    fn pre_link(self) -> PreLinkOutput {
870        // We must ensure that `compiled_funcs` contains the function bodies
871        // sorted by their `FuncKey`, as `CompiledFunctionsTable` relies on that
872        // property.
873        //
874        // Furthermore, note that, because the order functions end up in
875        // `compiled_funcs` is the order they will ultimately be laid out inside
876        // the object file, we will group all trampolines together, all defined
877        // Wasm functions from the same module together, and etc... This is a
878        // nice property, because it means that (a) cold functions, like builtin
879        // trampolines, are not interspersed between hot Wasm functions, and (b)
880        // Wasm functions that are likely to call each other (i.e. are in the
881        // same module together) are grouped together.
882        let mut compiled_funcs = vec![];
883
884        let mut indices = FunctionIndices::default();
885        let mut needs_gc_heap = false;
886
887        // NB: Iteration over this `BTreeMap` ensures that we uphold
888        // `compiled_func`'s sorted property.
889        for output in self.outputs.into_values() {
890            needs_gc_heap |= output.function.needs_gc_heap;
891
892            let index = compiled_funcs.len();
893            compiled_funcs.push((output.symbol, output.function.code));
894
895            if output.start_srcloc != FilePos::none() {
896                indices
897                    .start_srclocs
898                    .insert(output.key, output.start_srcloc);
899            }
900
901            indices.indices.insert(output.key, index);
902        }
903
904        PreLinkOutput {
905            needs_gc_heap,
906            compiled_funcs,
907            indices,
908        }
909    }
910}
911
912/// Our pre-link functions that have been flattened into a single list.
913struct PreLinkOutput {
914    /// Whether or not any of these functions require a GC heap
915    needs_gc_heap: bool,
916    /// The flattened list of (symbol name, compiled function) pairs, as they
917    /// will be laid out in the object file.
918    compiled_funcs: Vec<(String, Box<dyn Any + Send + Sync>)>,
919    /// The `FunctionIndices` mapping our function keys to indices in that flat
920    /// list.
921    indices: FunctionIndices,
922}
923
924#[derive(Default)]
925struct FunctionIndices {
926    // A map of wasm functions and where they're located in the original file.
927    start_srclocs: HashMap<FuncKey, FilePos>,
928
929    // The index of each compiled function in `compiled_funcs`.
930    indices: BTreeMap<FuncKey, usize>,
931}
932
933impl FunctionIndices {
934    /// Link the compiled functions together, resolving relocations, and append
935    /// them to the given ELF file.
936    fn link_and_append_code<'a>(
937        self,
938        mut obj: object::write::Object<'static>,
939        engine: &'a Engine,
940        compiled_funcs: Vec<(String, Box<dyn Any + Send + Sync>)>,
941        translations: PrimaryMap<StaticModuleIndex, ModuleTranslation<'_>>,
942        dwarf_package_bytes: Option<&[u8]>,
943    ) -> Result<(wasmtime_environ::ObjectBuilder<'a>, Artifacts)> {
944        // Append all the functions to the ELF file.
945        //
946        // The result is a vector parallel to `compiled_funcs` where
947        // `symbol_ids_and_locs[i]` is the symbol ID and function location of
948        // `compiled_funcs[i]`.
949        let compiler = engine.compiler();
950        let tunables = engine.tunables();
951        let symbol_ids_and_locs = compiler.append_code(
952            &mut obj,
953            &compiled_funcs,
954            &|_caller_index: usize, callee: FuncKey| {
955                self.indices.get(&callee).copied().unwrap_or_else(|| {
956                    panic!("cannot resolve relocation! no index for callee {callee:?}")
957                })
958            },
959        )?;
960
961        // If requested, generate and add DWARF information.
962        if tunables.generate_native_debuginfo {
963            compiler.append_dwarf(
964                &mut obj,
965                &translations,
966                &|module, func| {
967                    let i = self.indices[&FuncKey::DefinedWasmFunction(module, func)];
968                    let (symbol, _) = symbol_ids_and_locs[i];
969                    let (_, compiled_func) = &compiled_funcs[i];
970                    (symbol, &**compiled_func)
971                },
972                dwarf_package_bytes,
973                tunables,
974            )?;
975        }
976
977        let mut table_builder = CompiledFunctionsTableBuilder::new();
978        for (key, compiled_func_index) in &self.indices {
979            let (_, func_loc) = symbol_ids_and_locs[*compiled_func_index];
980            let src_loc = self
981                .start_srclocs
982                .get(key)
983                .copied()
984                .unwrap_or_else(FilePos::none);
985            table_builder.push_func(*key, func_loc, src_loc);
986        }
987
988        let mut obj = wasmtime_environ::ObjectBuilder::new(obj, tunables);
989        let modules = translations
990            .into_iter()
991            .map(|(_, mut translation)| {
992                // If configured attempt to use static memory initialization
993                // which can either at runtime be implemented as a single memcpy
994                // to initialize memory or otherwise enabling
995                // virtual-memory-tricks such as mmap'ing from a file to get
996                // copy-on-write.
997                if engine.tunables().memory_init_cow {
998                    let align = compiler.page_size_align();
999                    let max_always_allowed = engine.config().memory_guaranteed_dense_image_size;
1000                    translation.try_static_init(align, max_always_allowed);
1001                }
1002
1003                // Attempt to convert table initializer segments to FuncTable
1004                // representation where possible, to enable table lazy init.
1005                if engine.tunables().table_lazy_init {
1006                    translation.try_func_table_init();
1007                }
1008
1009                obj.append(translation)
1010            })
1011            .collect::<Result<PrimaryMap<_, _>>>()?;
1012
1013        let artifacts = Artifacts {
1014            modules,
1015            table: table_builder.finish(),
1016        };
1017
1018        Ok((obj, artifacts))
1019    }
1020}
1021
1022/// The artifacts necessary for finding and calling Wasm functions at runtime,
1023/// to be serialized into an ELF file.
1024struct Artifacts {
1025    modules: PrimaryMap<StaticModuleIndex, CompiledModuleInfo>,
1026    table: CompiledFunctionsTable,
1027}
1028
1029impl Artifacts {
1030    /// Assuming this compilation was for a single core Wasm module, get the
1031    /// resulting `CompiledModuleInfo`.
1032    fn unwrap_as_module_info(self) -> (CompiledModuleInfo, CompiledFunctionsTable) {
1033        assert_eq!(self.modules.len(), 1);
1034        let info = self.modules.into_iter().next().unwrap().1;
1035        let table = self.table;
1036        (info, table)
1037    }
1038}
1039
1040/// Extend `dest` with `items` and return the range of indices in `dest` where
1041/// they ended up.
1042fn extend_with_range<T>(dest: &mut Vec<T>, items: impl IntoIterator<Item = T>) -> Range<u32> {
1043    let start = dest.len();
1044    let start = u32::try_from(start).unwrap();
1045
1046    dest.extend(items);
1047
1048    let end = dest.len();
1049    let end = u32::try_from(end).unwrap();
1050
1051    start..end
1052}