wasmtime/
compile.rs

1//! Wasm compilation orchestration.
2//!
3//! It works roughly like this:
4//!
5//! * We walk over the Wasm module/component and make a list of all the things
6//!   we need to compile. This is a `CompileInputs`.
7//!
8//! * The `CompileInputs::compile` method compiles each of these in parallel,
9//!   producing a `UnlinkedCompileOutputs`. This is an unlinked set of compiled
10//!   functions, bucketed by type of function.
11//!
12//! * The `UnlinkedCompileOutputs::pre_link` method re-arranges the compiled
13//!   functions into a flat list. This is the order we will place them within
14//!   the ELF file, so we must also keep track of all the functions' indices
15//!   within this list, because we will need them for resolving
16//!   relocations. These indices are kept track of in the resulting
17//!   `FunctionIndices`.
18//!
19//! * The `FunctionIndices::link_and_append_code` method appends the functions
20//!   to the given ELF file and resolves relocations. It produces an `Artifacts`
21//!   which contains the data needed at runtime to find and call Wasm
22//!   functions. It is up to the caller to serialize the relevant parts of the
23//!   `Artifacts` into the ELF file.
24
25use crate::Engine;
26use crate::hash_map::HashMap;
27use crate::hash_set::HashSet;
28use crate::prelude::*;
29use std::{any::Any, borrow::Cow, collections::BTreeMap, mem, ops::Range};
30
31use call_graph::CallGraph;
32#[cfg(feature = "component-model")]
33use wasmtime_environ::component::Translator;
34use wasmtime_environ::{
35    Abi, CompiledFunctionBody, CompiledFunctionsTable, CompiledFunctionsTableBuilder,
36    CompiledModuleInfo, Compiler, DefinedFuncIndex, FilePos, FinishedObject, FuncKey,
37    FunctionBodyData, InliningCompiler, IntraModuleInlining, ModuleEnvironment, ModuleTranslation,
38    ModuleTypes, ModuleTypesBuilder, ObjectKind, PrimaryMap, StaticModuleIndex, Tunables,
39};
40
41mod call_graph;
42mod scc;
43mod stratify;
44
45mod code_builder;
46pub use self::code_builder::{CodeBuilder, CodeHint, HashedEngineCompileEnv};
47
48#[cfg(feature = "runtime")]
49mod runtime;
50
51/// Converts an input binary-encoded WebAssembly module to compilation
52/// artifacts and type information.
53///
54/// This is where compilation actually happens of WebAssembly modules and
55/// translation/parsing/validation of the binary input occurs. The binary
56/// artifact represented in the `MmapVec` returned here is an in-memory ELF
57/// file in an owned area of virtual linear memory where permissions (such
58/// as the executable bit) can be applied.
59///
60/// Additionally compilation returns an `Option` here which is always
61/// `Some`, notably compiled metadata about the module in addition to the
62/// type information found within.
63pub(crate) fn build_module_artifacts<T: FinishedObject>(
64    engine: &Engine,
65    wasm: &[u8],
66    dwarf_package: Option<&[u8]>,
67    obj_state: &T::State,
68) -> Result<(
69    T,
70    Option<(CompiledModuleInfo, CompiledFunctionsTable, ModuleTypes)>,
71)> {
72    let compiler = engine.try_compiler()?;
73    let tunables = engine.tunables();
74
75    // First a `ModuleEnvironment` is created which records type information
76    // about the wasm module. This is where the WebAssembly is parsed and
77    // validated. Afterwards `types` will have all the type information for
78    // this module.
79    let mut parser = wasmparser::Parser::new(0);
80    let mut validator = wasmparser::Validator::new_with_features(engine.features());
81    parser.set_features(*validator.features());
82    let mut types = ModuleTypesBuilder::new(&validator);
83    let mut translation = ModuleEnvironment::new(
84        tunables,
85        &mut validator,
86        &mut types,
87        StaticModuleIndex::from_u32(0),
88    )
89    .translate(parser, wasm)
90    .context("failed to parse WebAssembly module")?;
91    let functions = mem::take(&mut translation.function_body_inputs);
92
93    let compile_inputs = CompileInputs::for_module(&types, &translation, functions);
94    let unlinked_compile_outputs = compile_inputs.compile(engine)?;
95    let PreLinkOutput {
96        needs_gc_heap,
97        compiled_funcs,
98        indices,
99    } = unlinked_compile_outputs.pre_link();
100    translation.module.needs_gc_heap |= needs_gc_heap;
101
102    // Emplace all compiled functions into the object file with any other
103    // sections associated with code as well.
104    let mut object = compiler.object(ObjectKind::Module)?;
105    // Insert `Engine` and type-level information into the compiled
106    // artifact so if this module is deserialized later it contains all
107    // information necessary.
108    //
109    // Note that `append_compiler_info` and `append_types` here in theory
110    // can both be skipped if this module will never get serialized.
111    // They're only used during deserialization and not during runtime for
112    // the module itself. Currently there's no need for that, however, so
113    // it's left as an exercise for later.
114    engine.append_compiler_info(&mut object)?;
115    engine.append_bti(&mut object);
116
117    let (mut object, compilation_artifacts) = indices.link_and_append_code(
118        object,
119        engine,
120        compiled_funcs,
121        std::iter::once(translation).collect(),
122        dwarf_package,
123    )?;
124
125    let (info, index) = compilation_artifacts.unwrap_as_module_info();
126    let types = types.finish();
127    object.serialize_info(&(&info, &index, &types));
128    let result = T::finish_object(object, obj_state)?;
129
130    Ok((result, Some((info, index, types))))
131}
132
133/// Performs the compilation phase for a component, translating and
134/// validating the provided wasm binary to machine code.
135///
136/// This method will compile all nested core wasm binaries in addition to
137/// any necessary extra functions required for operation with components.
138/// The output artifact here is the serialized object file contained within
139/// an owned mmap along with metadata about the compilation itself.
140#[cfg(feature = "component-model")]
141pub(crate) fn build_component_artifacts<T: FinishedObject>(
142    engine: &Engine,
143    binary: &[u8],
144    _dwarf_package: Option<&[u8]>,
145    unsafe_intrinsics_import: Option<&str>,
146    obj_state: &T::State,
147) -> Result<(T, Option<wasmtime_environ::component::ComponentArtifacts>)> {
148    use wasmtime_environ::ScopeVec;
149    use wasmtime_environ::component::{
150        CompiledComponentInfo, ComponentArtifacts, ComponentTypesBuilder,
151    };
152
153    let compiler = engine.try_compiler()?;
154    let tunables = engine.tunables();
155
156    let scope = ScopeVec::new();
157    let mut validator = wasmparser::Validator::new_with_features(engine.features());
158    let mut types = ComponentTypesBuilder::new(&validator);
159    let mut translator = Translator::new(tunables, &mut validator, &mut types, &scope);
160    if let Some(name) = unsafe_intrinsics_import {
161        translator.expose_unsafe_intrinsics(name);
162    }
163    let (component, mut module_translations) = translator
164        .translate(binary)
165        .context("failed to parse WebAssembly module")?;
166
167    let compile_inputs = CompileInputs::for_component(
168        engine,
169        &types,
170        &component,
171        module_translations.iter_mut().map(|(i, translation)| {
172            let functions = mem::take(&mut translation.function_body_inputs);
173            (i, &*translation, functions)
174        }),
175    );
176    let unlinked_compile_outputs = compile_inputs.compile(&engine)?;
177
178    let PreLinkOutput {
179        needs_gc_heap,
180        compiled_funcs,
181        indices,
182    } = unlinked_compile_outputs.pre_link();
183    for (_, t) in &mut module_translations {
184        t.module.needs_gc_heap |= needs_gc_heap
185    }
186
187    let mut object = compiler.object(ObjectKind::Component)?;
188    engine.append_compiler_info(&mut object)?;
189    engine.append_bti(&mut object);
190
191    let (mut object, compilation_artifacts) = indices.link_and_append_code(
192        object,
193        engine,
194        compiled_funcs,
195        module_translations,
196        None, // TODO: Support dwarf packages for components.
197    )?;
198    let (types, ty) = types.finish(&component.component);
199
200    let info = CompiledComponentInfo {
201        component: component.component,
202    };
203    let artifacts = ComponentArtifacts {
204        info,
205        table: compilation_artifacts.table,
206        ty,
207        types,
208        static_modules: compilation_artifacts.modules,
209    };
210    object.serialize_info(&artifacts);
211
212    let result = T::finish_object(object, obj_state)?;
213    Ok((result, Some(artifacts)))
214}
215
216type CompileInput<'a> = Box<dyn FnOnce(&dyn Compiler) -> Result<CompileOutput<'a>> + Send + 'a>;
217
218struct CompileOutput<'a> {
219    key: FuncKey,
220    symbol: String,
221    function: CompiledFunctionBody,
222    start_srcloc: FilePos,
223
224    // Only present when `self.key` is a `FuncKey::DefinedWasmFunction(..)`.
225    translation: Option<&'a ModuleTranslation<'a>>,
226
227    // Only present when `self.key` is a `FuncKey::DefinedWasmFunction(..)`.
228    func_body: Option<wasmparser::FunctionBody<'a>>,
229}
230
231/// Inputs to our inlining heuristics.
232struct InlineHeuristicParams<'a> {
233    tunables: &'a Tunables,
234    caller_size: u32,
235    caller_key: FuncKey,
236    caller_needs_gc_heap: bool,
237    callee_size: u32,
238    callee_key: FuncKey,
239    callee_needs_gc_heap: bool,
240}
241
242/// The collection of things we need to compile for a Wasm module or component.
243#[derive(Default)]
244struct CompileInputs<'a> {
245    inputs: Vec<CompileInput<'a>>,
246}
247
248impl<'a> CompileInputs<'a> {
249    fn push_input(
250        &mut self,
251        f: impl FnOnce(&dyn Compiler) -> Result<CompileOutput<'a>> + Send + 'a,
252    ) {
253        self.inputs.push(Box::new(f));
254    }
255
256    /// Create the `CompileInputs` for a core Wasm module.
257    fn for_module(
258        types: &'a ModuleTypesBuilder,
259        translation: &'a ModuleTranslation<'a>,
260        functions: PrimaryMap<DefinedFuncIndex, FunctionBodyData<'a>>,
261    ) -> Self {
262        let mut ret = CompileInputs { inputs: vec![] };
263
264        let module_index = StaticModuleIndex::from_u32(0);
265        ret.collect_inputs_in_translations(types, [(module_index, translation, functions)]);
266
267        ret
268    }
269
270    /// Create a `CompileInputs` for a component.
271    #[cfg(feature = "component-model")]
272    fn for_component(
273        engine: &'a Engine,
274        types: &'a wasmtime_environ::component::ComponentTypesBuilder,
275        component: &'a wasmtime_environ::component::ComponentTranslation,
276        module_translations: impl IntoIterator<
277            Item = (
278                StaticModuleIndex,
279                &'a ModuleTranslation<'a>,
280                PrimaryMap<DefinedFuncIndex, FunctionBodyData<'a>>,
281            ),
282        >,
283    ) -> Self {
284        use wasmtime_environ::Abi;
285        use wasmtime_environ::component::UnsafeIntrinsic;
286
287        let mut ret = CompileInputs { inputs: vec![] };
288
289        ret.collect_inputs_in_translations(types.module_types_builder(), module_translations);
290        let tunables = engine.tunables();
291
292        for i in component
293            .component
294            .unsafe_intrinsics
295            .iter()
296            .enumerate()
297            .filter_map(|(i, ty)| if ty.is_some() { Some(i) } else { None })
298        {
299            let i = u32::try_from(i).unwrap();
300            let intrinsic = UnsafeIntrinsic::from_u32(i);
301            for abi in [Abi::Wasm, Abi::Array] {
302                ret.push_input(move |compiler| {
303                    let symbol = format!(
304                        "unsafe-intrinsics-{}-{}",
305                        match abi {
306                            Abi::Wasm => "wasm-call",
307                            Abi::Array => "array-call",
308                            Abi::Patchable => "patchable-call",
309                        },
310                        intrinsic.name(),
311                    );
312                    Ok(CompileOutput {
313                        key: FuncKey::UnsafeIntrinsic(abi, intrinsic),
314                        function: compiler
315                            .component_compiler()
316                            .compile_intrinsic(tunables, component, types, intrinsic, abi, &symbol)
317                            .with_context(|| format!("failed to compile `{symbol}`"))?,
318                        symbol,
319                        start_srcloc: FilePos::default(),
320                        translation: None,
321                        func_body: None,
322                    })
323                });
324            }
325        }
326
327        for (idx, trampoline) in component.trampolines.iter() {
328            for abi in [Abi::Wasm, Abi::Array] {
329                ret.push_input(move |compiler| {
330                    let key = FuncKey::ComponentTrampoline(abi, idx);
331                    let symbol = format!(
332                        "component-trampolines[{}]-{}-{}",
333                        idx.as_u32(),
334                        match abi {
335                            Abi::Wasm => "wasm-call",
336                            Abi::Array => "array-call",
337                            Abi::Patchable => "patchable-call",
338                        },
339                        trampoline.symbol_name(),
340                    );
341                    let function = compiler
342                        .component_compiler()
343                        .compile_trampoline(component, types, key, abi, tunables, &symbol)
344                        .with_context(|| format!("failed to compile {symbol}"))?;
345                    Ok(CompileOutput {
346                        key,
347                        function,
348                        symbol,
349                        start_srcloc: FilePos::default(),
350                        translation: None,
351                        func_body: None,
352                    })
353                });
354            }
355        }
356
357        // If there are any resources defined within this component, the
358        // signature for `resource.drop` is mentioned somewhere, and the
359        // wasm-to-native trampoline for `resource.drop` hasn't been created yet
360        // then insert that here. This is possibly required by destruction of
361        // resources from the embedder and otherwise won't be explicitly
362        // requested through initializers above or such.
363        if component.component.num_resources > 0 {
364            if let Some(sig) = types.find_resource_drop_signature() {
365                ret.push_input(move |compiler| {
366                    let key = FuncKey::ResourceDropTrampoline;
367                    let symbol = "resource_drop_trampoline".to_string();
368                    let function = compiler
369                        .compile_wasm_to_array_trampoline(types[sig].unwrap_func(), key, &symbol)
370                        .with_context(|| format!("failed to compile `{symbol}`"))?;
371                    Ok(CompileOutput {
372                        key,
373                        function,
374                        symbol,
375                        start_srcloc: FilePos::default(),
376                        translation: None,
377                        func_body: None,
378                    })
379                });
380            }
381        }
382
383        ret
384    }
385
386    fn clean_symbol(name: &str) -> Cow<'_, str> {
387        /// Maximum length of symbols generated in objects.
388        const MAX_SYMBOL_LEN: usize = 96;
389
390        // Just to be on the safe side, filter out characters that could
391        // pose issues to tools such as "perf" or "objdump".  To avoid
392        // having to update a list of allowed characters for each different
393        // language that compiles to Wasm, allows only graphic ASCII
394        // characters; replace runs of everything else with a "?".
395        let bad_char = |c: char| !c.is_ascii_graphic();
396        if name.chars().any(bad_char) {
397            let mut last_char_seen = '\u{0000}';
398            Cow::Owned(
399                name.chars()
400                    .map(|c| if bad_char(c) { '?' } else { c })
401                    .filter(|c| {
402                        let skip = last_char_seen == '?' && *c == '?';
403                        last_char_seen = *c;
404                        !skip
405                    })
406                    .take(MAX_SYMBOL_LEN)
407                    .collect::<String>(),
408            )
409        } else if name.len() <= MAX_SYMBOL_LEN {
410            Cow::Borrowed(&name[..])
411        } else {
412            Cow::Borrowed(&name[..MAX_SYMBOL_LEN])
413        }
414    }
415
416    fn collect_inputs_in_translations(
417        &mut self,
418        types: &'a ModuleTypesBuilder,
419        translations: impl IntoIterator<
420            Item = (
421                StaticModuleIndex,
422                &'a ModuleTranslation<'a>,
423                PrimaryMap<DefinedFuncIndex, FunctionBodyData<'a>>,
424            ),
425        >,
426    ) {
427        for (module, translation, functions) in translations {
428            for (def_func_index, func_body_data) in functions {
429                self.push_input(move |compiler| {
430                    let key = FuncKey::DefinedWasmFunction(module, def_func_index);
431                    let func_index = translation.module.func_index(def_func_index);
432                    let symbol = match translation
433                        .debuginfo
434                        .name_section
435                        .func_names
436                        .get(&func_index)
437                    {
438                        Some(name) => format!(
439                            "wasm[{}]::function[{}]::{}",
440                            module.as_u32(),
441                            func_index.as_u32(),
442                            Self::clean_symbol(&name)
443                        ),
444                        None => format!(
445                            "wasm[{}]::function[{}]",
446                            module.as_u32(),
447                            func_index.as_u32()
448                        ),
449                    };
450                    let func_body = func_body_data.body.clone();
451                    let data = func_body.get_binary_reader();
452                    let offset = data.original_position();
453                    let start_srcloc = FilePos::new(u32::try_from(offset).unwrap());
454                    let function = compiler
455                        .compile_function(translation, key, func_body_data, types, &symbol)
456                        .with_context(|| format!("failed to compile: {symbol}"))?;
457
458                    Ok(CompileOutput {
459                        key,
460                        symbol,
461                        function,
462                        start_srcloc,
463                        translation: Some(translation),
464                        func_body: Some(func_body),
465                    })
466                });
467
468                let func_index = translation.module.func_index(def_func_index);
469                if translation.module.functions[func_index].is_escaping() {
470                    self.push_input(move |compiler| {
471                        let key = FuncKey::ArrayToWasmTrampoline(module, def_func_index);
472                        let func_index = translation.module.func_index(def_func_index);
473                        let symbol = format!(
474                            "wasm[{}]::array_to_wasm_trampoline[{}]",
475                            module.as_u32(),
476                            func_index.as_u32()
477                        );
478                        let function = compiler
479                            .compile_array_to_wasm_trampoline(translation, types, key, &symbol)
480                            .with_context(|| format!("failed to compile: {symbol}"))?;
481                        Ok(CompileOutput {
482                            key,
483                            symbol,
484                            function,
485                            start_srcloc: FilePos::default(),
486                            translation: None,
487                            func_body: None,
488                        })
489                    });
490                }
491            }
492        }
493
494        let mut trampoline_types_seen = HashSet::new();
495        for (_func_type_index, trampoline_type_index) in types.trampoline_types() {
496            let is_new = trampoline_types_seen.insert(trampoline_type_index);
497            if !is_new {
498                continue;
499            }
500            let trampoline_func_ty = types[trampoline_type_index].unwrap_func();
501            self.push_input(move |compiler| {
502                let key = FuncKey::WasmToArrayTrampoline(trampoline_type_index);
503                let symbol = format!(
504                    "signatures[{}]::wasm_to_array_trampoline",
505                    trampoline_type_index.as_u32()
506                );
507                let function = compiler
508                    .compile_wasm_to_array_trampoline(trampoline_func_ty, key, &symbol)
509                    .with_context(|| format!("failed to compile: {symbol}"))?;
510                Ok(CompileOutput {
511                    key,
512                    function,
513                    symbol,
514                    start_srcloc: FilePos::default(),
515                    translation: None,
516                    func_body: None,
517                })
518            });
519        }
520    }
521
522    /// Compile these `CompileInput`s (maybe in parallel) and return the
523    /// resulting `UnlinkedCompileOutput`s.
524    fn compile(self, engine: &Engine) -> Result<UnlinkedCompileOutputs<'a>> {
525        let compiler = engine.try_compiler()?;
526
527        if self.inputs.len() > 0 && cfg!(miri) {
528            bail!(
529                "\
530You are attempting to compile a WebAssembly module or component that contains
531functions in Miri. Running Cranelift through Miri is known to take quite a long
532time and isn't what we want in CI at least. If this is a mistake then you should
533ignore this test in Miri with:
534
535    #[cfg_attr(miri, ignore)]
536
537If this is not a mistake then try to edit the `pulley_provenance_test` test
538which runs Cranelift outside of Miri. If you still feel this is a mistake then
539please open an issue or a topic on Zulip to talk about how best to accommodate
540the use case.
541"
542            );
543        }
544
545        let mut raw_outputs = if let Some(inlining_compiler) = compiler.inlining_compiler() {
546            if engine.tunables().inlining {
547                self.compile_with_inlining(engine, compiler, inlining_compiler)?
548            } else {
549                // Inlining compiler but inlining is disabled: compile each
550                // input and immediately finish its output in parallel, skipping
551                // call graph computation and all that.
552                engine.run_maybe_parallel::<_, _, Error, _>(self.inputs, |f| {
553                    let mut compiled = f(compiler)?;
554                    inlining_compiler.finish_compiling(
555                        &mut compiled.function,
556                        compiled.func_body.take(),
557                        &compiled.symbol,
558                    )?;
559                    Ok(compiled)
560                })?
561            }
562        } else {
563            // No inlining: just compile each individual input in parallel.
564            engine.run_maybe_parallel(self.inputs, |f| f(compiler))?
565        };
566
567        if cfg!(debug_assertions) {
568            let mut symbols: Vec<_> = raw_outputs.iter().map(|i| &i.symbol).collect();
569            symbols.sort();
570            for w in symbols.windows(2) {
571                assert_ne!(
572                    w[0], w[1],
573                    "should never have duplicate symbols, but found two functions with the symbol `{}`",
574                    w[0]
575                );
576            }
577        }
578
579        // Now that all functions have been compiled see if any
580        // wasmtime-builtin functions are necessary. If so those need to be
581        // collected and then those trampolines additionally need to be
582        // compiled.
583        compile_required_builtins(engine, &mut raw_outputs)?;
584
585        // Bucket the outputs by kind.
586        let mut outputs: BTreeMap<FuncKey, CompileOutput> = BTreeMap::new();
587        for output in raw_outputs {
588            outputs.insert(output.key, output);
589        }
590
591        Ok(UnlinkedCompileOutputs { outputs })
592    }
593
594    fn compile_with_inlining(
595        self,
596        engine: &Engine,
597        compiler: &dyn Compiler,
598        inlining_compiler: &dyn InliningCompiler,
599    ) -> Result<Vec<CompileOutput<'a>>, Error> {
600        /// The index of a function (of any kind: Wasm function, trampoline, or
601        /// etc...) in our list of unlinked outputs.
602        #[derive(Clone, Copy, Debug, Default, PartialEq, Eq, PartialOrd, Ord, Hash)]
603        struct OutputIndex(u32);
604        wasmtime_environ::entity_impl!(OutputIndex);
605
606        // Our list of unlinked outputs.
607        let mut outputs = PrimaryMap::<OutputIndex, Option<CompileOutput<'_>>>::from(
608            engine.run_maybe_parallel(self.inputs, |f| f(compiler).map(Some))?,
609        );
610
611        /// Whether a function (as described by the given `FuncKey`) can
612        /// participate in inlining or not (either as a candidate for being
613        /// inlined into a caller or having a callee inlined into a callsite
614        /// within itself).
615        fn is_inlining_function(key: FuncKey) -> bool {
616            match key {
617                // Wasm functions can both be inlined into other functions and
618                // have other functions inlined into them.
619                FuncKey::DefinedWasmFunction(..) => true,
620
621                // Intrinsics can be inlined into other functions.
622                #[cfg(feature = "component-model")]
623                FuncKey::UnsafeIntrinsic(..) => true,
624
625                // Trampolines cannot participate in inlining since our
626                // unwinding and exceptions infrastructure relies on them being
627                // in their own call frames.
628                FuncKey::ArrayToWasmTrampoline(..)
629                | FuncKey::WasmToArrayTrampoline(..)
630                | FuncKey::WasmToBuiltinTrampoline(..)
631                | FuncKey::PatchableToBuiltinTrampoline(..) => false,
632                #[cfg(feature = "component-model")]
633                FuncKey::ComponentTrampoline(..) | FuncKey::ResourceDropTrampoline => false,
634
635                FuncKey::PulleyHostCall(_) => {
636                    unreachable!("we don't compile artifacts for Pulley host calls")
637                }
638            }
639        }
640
641        /// Get just the output indices of the functions that can participate in
642        /// inlining from our unlinked outputs.
643        fn inlining_functions<'a>(
644            outputs: &'a PrimaryMap<OutputIndex, Option<CompileOutput<'_>>>,
645        ) -> impl Iterator<Item = OutputIndex> + 'a {
646            outputs.iter().filter_map(|(index, output)| {
647                if is_inlining_function(output.as_ref().unwrap().key) {
648                    Some(index)
649                } else {
650                    None
651                }
652            })
653        }
654
655        // A map from a `FuncKey` to its index in our unlinked outputs.
656        //
657        // We will generally just be working with `OutputIndex`es, but
658        // occasionally we must translate from keys back to our index space, for
659        // example when we know that one module's function import is always
660        // satisfied with a particular `FuncKey::DefinedWasmFunction`. This map
661        // enables that translation.
662        let key_to_output: HashMap<FuncKey, OutputIndex> = inlining_functions(&outputs)
663            .map(|output_index| {
664                let output = outputs[output_index].as_ref().unwrap();
665                (output.key, output_index)
666            })
667            .collect();
668
669        // Construct the call graph for inlining.
670        //
671        // We only inline Wasm functions, not trampolines, because we rely on
672        // trampolines being in their own stack frame when we save the entry and
673        // exit SP, FP, and PC for backtraces in trampolines.
674        let call_graph = CallGraph::<OutputIndex>::new(inlining_functions(&outputs), {
675            let mut func_keys = IndexSet::default();
676            let outputs = &outputs;
677            let key_to_output = &key_to_output;
678            move |output_index, calls| {
679                debug_assert!(calls.is_empty());
680
681                let output = outputs[output_index].as_ref().unwrap();
682                debug_assert!(is_inlining_function(output.key));
683
684                // Get this function's call graph edges as `FuncKey`s.
685                func_keys.clear();
686                inlining_compiler.calls(&output.function, &mut func_keys)?;
687
688                // Translate each of those to keys to output indices, which is
689                // what we actually need.
690                calls.extend(
691                    func_keys
692                        .iter()
693                        .copied()
694                        .filter_map(|key| key_to_output.get(&key)),
695                );
696
697                log::trace!(
698                    "call graph edges for {output_index:?} = {:?}: {calls:?}",
699                    output.key
700                );
701                Ok(())
702            }
703        })?;
704
705        // Stratify the call graph into a sequence of layers. We process each
706        // layer in order, but process functions within a layer in parallel
707        // (because they either do not call each other or are part of a
708        // mutual-recursion cycle; either way we won't inline members of the
709        // same layer into each other).
710        let strata =
711            stratify::Strata::<OutputIndex>::new(inlining_functions(&outputs), &call_graph);
712        let mut layer_outputs = vec![];
713        for layer in strata.layers() {
714            // Temporarily take this layer's outputs out of our unlinked outputs
715            // list so that we can mutate these outputs (by inlining callee
716            // functions into them) while also accessing shared borrows of the
717            // unlinked outputs list (finding the callee functions we will
718            // inline).
719            debug_assert!(layer_outputs.is_empty());
720            layer_outputs.extend(layer.iter().map(|f| outputs[*f].take().unwrap()));
721
722            // Process this layer's members in parallel.
723            engine.run_maybe_parallel_mut(
724                &mut layer_outputs,
725                |output: &mut CompileOutput<'_>| {
726                    log::trace!("processing inlining for {:?}", output.key);
727                    debug_assert!(is_inlining_function(output.key));
728
729                    let caller_key = output.key;
730                    let caller_needs_gc_heap =
731                        output.translation.is_some_and(|t| t.module.needs_gc_heap);
732                    let caller = &mut output.function;
733
734                    let mut caller_size = inlining_compiler.size(caller);
735
736                    inlining_compiler.inline(caller, &mut |callee_key: FuncKey| {
737                        log::trace!("  --> considering call to {callee_key:?}");
738                        let callee_output_index: OutputIndex = key_to_output[&callee_key];
739
740                        // NB: If the callee is not inside `outputs`, then it is
741                        // in the same `Strata` layer as the caller (and
742                        // therefore is in the same strongly-connected component
743                        // as the caller, and they mutually recursive). In this
744                        // case, we do not do any inlining; communicate this
745                        // command via `?`-propagation.
746                        let callee_output = outputs[callee_output_index].as_ref()?;
747
748                        debug_assert_eq!(callee_output.key, callee_key);
749
750                        let callee = &callee_output.function;
751                        let callee_size = inlining_compiler.size(callee);
752
753                        let callee_needs_gc_heap = callee_output
754                            .translation
755                            .is_some_and(|t| t.module.needs_gc_heap);
756
757                        if Self::should_inline(InlineHeuristicParams {
758                            tunables: engine.tunables(),
759                            caller_size,
760                            caller_key,
761                            caller_needs_gc_heap,
762                            callee_size,
763                            callee_key,
764                            callee_needs_gc_heap,
765                        }) {
766                            caller_size = caller_size.saturating_add(callee_size);
767                            Some(callee)
768                        } else {
769                            None
770                        }
771                    })
772                },
773            )?;
774
775            for (f, func) in layer.iter().zip(layer_outputs.drain(..)) {
776                debug_assert!(outputs[*f].is_none());
777                outputs[*f] = Some(func);
778            }
779        }
780
781        // Fan out in parallel again and finish compiling each function.
782        engine.run_maybe_parallel(outputs.into(), |output| {
783            let mut output = output.unwrap();
784            inlining_compiler.finish_compiling(
785                &mut output.function,
786                output.func_body.take(),
787                &output.symbol,
788            )?;
789            Ok(output)
790        })
791    }
792
793    /// Implementation of our inlining heuristics.
794    ///
795    /// TODO: We should improve our heuristics:
796    ///
797    /// * One potentially promising hint that we don't currently make use of is
798    ///   how many times a function appears as the callee in call sites. For
799    ///   example, a function that appears in only a single call site, and does
800    ///   not otherwise escape, is often beneficial to inline regardless of its
801    ///   size (assuming we can then GC away the non-inlined version of the
802    ///   function, which we do not currently attempt to do).
803    ///
804    /// * Another potentially promising hint would be whether any of the call
805    ///   site's actual arguments are constants.
806    ///
807    /// * A general improvement would be removing the decision-tree style of
808    ///   control flow below and replacing it with (1) a pure estimated-benefit
809    ///   formula and (2) a benefit threshold. Whenever the estimated benefit
810    ///   reaches the threshold, we would inline the call. Both the formula and
811    ///   the threshold would be parameterized by tunables. This would
812    ///   effectively allow reprioritizing the relative importance of different
813    ///   hint sources, rather than being stuck with the sequence hard-coded in
814    ///   the decision tree below.
815    fn should_inline(
816        InlineHeuristicParams {
817            tunables,
818            caller_size,
819            caller_key,
820            caller_needs_gc_heap,
821            callee_size,
822            callee_key,
823            callee_needs_gc_heap,
824        }: InlineHeuristicParams,
825    ) -> bool {
826        log::trace!(
827            "considering inlining:\n\
828             \tcaller = {caller_key:?}\n\
829             \t\tsize = {caller_size}\n\
830             \t\tneeds_gc_heap = {caller_needs_gc_heap}\n\
831             \tcallee = {callee_key:?}\n\
832             \t\tsize = {callee_size}\n\
833             \t\tneeds_gc_heap = {callee_needs_gc_heap}"
834        );
835
836        debug_assert!(
837            tunables.inlining,
838            "shouldn't even call this method if we aren't configured for inlining"
839        );
840        debug_assert_ne!(caller_key, callee_key, "we never inline recursion");
841
842        // Put a limit on how large we can make a function via inlining to cap
843        // code bloat.
844        let sum_size = caller_size.saturating_add(callee_size);
845        if sum_size > tunables.inlining_sum_size_threshold {
846            log::trace!(
847                "  --> not inlining: the sum of the caller's and callee's sizes is greater than \
848                 the inlining-sum-size threshold: {callee_size} + {caller_size} > {}",
849                tunables.inlining_sum_size_threshold
850            );
851            return false;
852        }
853
854        // Skip inlining into array-abi functions which are entry
855        // trampolines into wasm. ABI-wise it's required that these have a
856        // single `try_call` into the module and it doesn't work if multiple
857        // get inlined or if the `try_call` goes away. Prevent all inlining
858        // to guarantee the structure of entry trampolines.
859        if caller_key.abi() == Abi::Array {
860            log::trace!("  --> not inlining: not inlining into array-abi caller");
861            return false;
862        }
863
864        // Consider whether this is an intra-module call.
865        //
866        // Inlining within a single core module has most often already been done
867        // by the toolchain that produced the module, e.g. LLVM, and any extant
868        // function calls to small callees were presumably annotated with the
869        // equivalent of `#[inline(never)]` or `#[cold]` but we don't have that
870        // information anymore.
871        match (caller_key, callee_key) {
872            (
873                FuncKey::DefinedWasmFunction(caller_module, _),
874                FuncKey::DefinedWasmFunction(callee_module, _),
875            ) => {
876                if caller_module == callee_module {
877                    match tunables.inlining_intra_module {
878                        IntraModuleInlining::Yes => {}
879
880                        IntraModuleInlining::WhenUsingGc
881                            if caller_needs_gc_heap || callee_needs_gc_heap => {}
882
883                        IntraModuleInlining::WhenUsingGc => {
884                            log::trace!(
885                                "  --> not inlining: intra-module call that does not use GC"
886                            );
887                            return false;
888                        }
889
890                        IntraModuleInlining::No => {
891                            log::trace!("  --> not inlining: intra-module call");
892                            return false;
893                        }
894                    }
895                }
896            }
897
898            _ => {}
899        }
900
901        // Small callees are often worth inlining regardless of the size of the
902        // caller.
903        if callee_size <= tunables.inlining_small_callee_size {
904            log::trace!(
905                "  --> inlining: callee's size is less than the small-callee size: \
906                 {callee_size} <= {}",
907                tunables.inlining_small_callee_size
908            );
909            return true;
910        }
911
912        log::trace!("  --> inlining: did not find a reason we should not");
913        true
914    }
915}
916
917fn compile_required_builtins(engine: &Engine, raw_outputs: &mut Vec<CompileOutput>) -> Result<()> {
918    let compiler = engine.try_compiler()?;
919    let mut builtins = HashSet::new();
920    let mut new_inputs: Vec<CompileInput<'_>> = Vec::new();
921
922    let compile_builtin = |key: FuncKey| {
923        Box::new(move |compiler: &dyn Compiler| {
924            let symbol = match key {
925                FuncKey::WasmToBuiltinTrampoline(builtin) => {
926                    format!("wasmtime_builtin_{}", builtin.name())
927                }
928                FuncKey::PatchableToBuiltinTrampoline(builtin) => {
929                    format!("wasmtime_patchable_builtin_{}", builtin.name())
930                }
931                _ => unreachable!(),
932            };
933            let mut function = compiler
934                .compile_wasm_to_builtin(key, &symbol)
935                .with_context(|| format!("failed to compile `{symbol}`"))?;
936            if let Some(compiler) = compiler.inlining_compiler() {
937                compiler.finish_compiling(&mut function, None, &symbol)?;
938            }
939            Ok(CompileOutput {
940                key,
941                function,
942                symbol,
943                start_srcloc: FilePos::default(),
944                translation: None,
945                func_body: None,
946            })
947        })
948    };
949
950    for output in raw_outputs.iter() {
951        for reloc in compiler.compiled_function_relocation_targets(&*output.function.code) {
952            match reloc {
953                FuncKey::WasmToBuiltinTrampoline(builtin)
954                | FuncKey::PatchableToBuiltinTrampoline(builtin) => {
955                    if builtins.insert(builtin) {
956                        new_inputs.push(compile_builtin(reloc));
957                    }
958                }
959                _ => {}
960            }
961        }
962    }
963    raw_outputs.extend(engine.run_maybe_parallel(new_inputs, |c| c(compiler))?);
964    Ok(())
965}
966
967#[derive(Default)]
968struct UnlinkedCompileOutputs<'a> {
969    // A map from kind to `CompileOutput`.
970    outputs: BTreeMap<FuncKey, CompileOutput<'a>>,
971}
972
973impl UnlinkedCompileOutputs<'_> {
974    /// Flatten all our functions into a single list and remember each of their
975    /// indices within it.
976    fn pre_link(self) -> PreLinkOutput {
977        // We must ensure that `compiled_funcs` contains the function bodies
978        // sorted by their `FuncKey`, as `CompiledFunctionsTable` relies on that
979        // property.
980        //
981        // Furthermore, note that, because the order functions end up in
982        // `compiled_funcs` is the order they will ultimately be laid out inside
983        // the object file, we will group all trampolines together, all defined
984        // Wasm functions from the same module together, and etc... This is a
985        // nice property, because it means that (a) cold functions, like builtin
986        // trampolines, are not interspersed between hot Wasm functions, and (b)
987        // Wasm functions that are likely to call each other (i.e. are in the
988        // same module together) are grouped together.
989        let mut compiled_funcs = vec![];
990
991        let mut indices = FunctionIndices::default();
992        let mut needs_gc_heap = false;
993
994        // NB: Iteration over this `BTreeMap` ensures that we uphold
995        // `compiled_func`'s sorted property.
996        for output in self.outputs.into_values() {
997            needs_gc_heap |= output.function.needs_gc_heap;
998
999            let index = compiled_funcs.len();
1000            compiled_funcs.push((output.symbol, output.key, output.function.code));
1001
1002            if output.start_srcloc != FilePos::none() {
1003                indices
1004                    .start_srclocs
1005                    .insert(output.key, output.start_srcloc);
1006            }
1007
1008            indices.indices.insert(output.key, index);
1009        }
1010
1011        PreLinkOutput {
1012            needs_gc_heap,
1013            compiled_funcs,
1014            indices,
1015        }
1016    }
1017}
1018
1019/// Our pre-link functions that have been flattened into a single list.
1020struct PreLinkOutput {
1021    /// Whether or not any of these functions require a GC heap
1022    needs_gc_heap: bool,
1023    /// The flattened list of (symbol name, FuncKey, compiled
1024    /// function) triples, as they will be laid out in the object
1025    /// file.
1026    compiled_funcs: Vec<(String, FuncKey, Box<dyn Any + Send + Sync>)>,
1027    /// The `FunctionIndices` mapping our function keys to indices in that flat
1028    /// list.
1029    indices: FunctionIndices,
1030}
1031
1032#[derive(Default)]
1033struct FunctionIndices {
1034    // A map of wasm functions and where they're located in the original file.
1035    start_srclocs: HashMap<FuncKey, FilePos>,
1036
1037    // The index of each compiled function in `compiled_funcs`.
1038    indices: BTreeMap<FuncKey, usize>,
1039}
1040
1041impl FunctionIndices {
1042    /// Link the compiled functions together, resolving relocations, and append
1043    /// them to the given ELF file.
1044    fn link_and_append_code<'a>(
1045        self,
1046        mut obj: object::write::Object<'static>,
1047        engine: &'a Engine,
1048        compiled_funcs: Vec<(String, FuncKey, Box<dyn Any + Send + Sync>)>,
1049        translations: PrimaryMap<StaticModuleIndex, ModuleTranslation<'_>>,
1050        dwarf_package_bytes: Option<&[u8]>,
1051    ) -> Result<(wasmtime_environ::ObjectBuilder<'a>, Artifacts)> {
1052        // Append all the functions to the ELF file.
1053        //
1054        // The result is a vector parallel to `compiled_funcs` where
1055        // `symbol_ids_and_locs[i]` is the symbol ID and function location of
1056        // `compiled_funcs[i]`.
1057        let compiler = engine.try_compiler()?;
1058        let tunables = engine.tunables();
1059        let symbol_ids_and_locs = compiler.append_code(
1060            &mut obj,
1061            &compiled_funcs,
1062            &|_caller_index: usize, callee: FuncKey| {
1063                self.indices.get(&callee).copied().unwrap_or_else(|| {
1064                    panic!("cannot resolve relocation! no index for callee {callee:?}")
1065                })
1066            },
1067        )?;
1068
1069        // If requested, generate and add DWARF information.
1070        if tunables.debug_native {
1071            compiler.append_dwarf(
1072                &mut obj,
1073                &translations,
1074                &|module, func| {
1075                    let i = self.indices[&FuncKey::DefinedWasmFunction(module, func)];
1076                    let (symbol, _) = symbol_ids_and_locs[i];
1077                    let (_, _, compiled_func) = &compiled_funcs[i];
1078                    (symbol, &**compiled_func)
1079                },
1080                dwarf_package_bytes,
1081                tunables,
1082            )?;
1083        }
1084
1085        let mut table_builder = CompiledFunctionsTableBuilder::new();
1086        for (key, compiled_func_index) in &self.indices {
1087            let (_, func_loc) = symbol_ids_and_locs[*compiled_func_index];
1088            let src_loc = self
1089                .start_srclocs
1090                .get(key)
1091                .copied()
1092                .unwrap_or_else(FilePos::none);
1093            table_builder.push_func(*key, func_loc, src_loc);
1094        }
1095
1096        let mut obj = wasmtime_environ::ObjectBuilder::new(obj, tunables);
1097        let modules = translations
1098            .into_iter()
1099            .map(|(_, mut translation)| {
1100                // If configured attempt to use static memory initialization
1101                // which can either at runtime be implemented as a single memcpy
1102                // to initialize memory or otherwise enabling
1103                // virtual-memory-tricks such as mmap'ing from a file to get
1104                // copy-on-write.
1105                if engine.tunables().memory_init_cow {
1106                    let align = compiler.page_size_align();
1107                    let max_always_allowed = engine.config().memory_guaranteed_dense_image_size;
1108                    translation.try_static_init(align, max_always_allowed);
1109                }
1110
1111                // Attempt to convert table initializer segments to FuncTable
1112                // representation where possible, to enable table lazy init.
1113                if engine.tunables().table_lazy_init {
1114                    translation.try_func_table_init();
1115                }
1116
1117                obj.append(translation)
1118            })
1119            .collect::<Result<PrimaryMap<_, _>>>()?;
1120
1121        let artifacts = Artifacts {
1122            modules,
1123            table: table_builder.finish(),
1124        };
1125
1126        Ok((obj, artifacts))
1127    }
1128}
1129
1130/// The artifacts necessary for finding and calling Wasm functions at runtime,
1131/// to be serialized into an ELF file.
1132struct Artifacts {
1133    modules: PrimaryMap<StaticModuleIndex, CompiledModuleInfo>,
1134    table: CompiledFunctionsTable,
1135}
1136
1137impl Artifacts {
1138    /// Assuming this compilation was for a single core Wasm module, get the
1139    /// resulting `CompiledModuleInfo`.
1140    fn unwrap_as_module_info(self) -> (CompiledModuleInfo, CompiledFunctionsTable) {
1141        assert_eq!(self.modules.len(), 1);
1142        let info = self.modules.into_iter().next().unwrap().1;
1143        let table = self.table;
1144        (info, table)
1145    }
1146}
1147
1148/// Extend `dest` with `items` and return the range of indices in `dest` where
1149/// they ended up.
1150fn extend_with_range<T>(dest: &mut Vec<T>, items: impl IntoIterator<Item = T>) -> Range<u32> {
1151    let start = dest.len();
1152    let start = u32::try_from(start).unwrap();
1153
1154    dest.extend(items);
1155
1156    let end = dest.len();
1157    let end = u32::try_from(end).unwrap();
1158
1159    start..end
1160}