wasmtime/
compile.rs

1//! Wasm compilation orchestration.
2//!
3//! It works roughly like this:
4//!
5//! * We walk over the Wasm module/component and make a list of all the things
6//!   we need to compile. This is a `CompileInputs`.
7//!
8//! * The `CompileInputs::compile` method compiles each of these in parallel,
9//!   producing a `UnlinkedCompileOutputs`. This is an unlinked set of compiled
10//!   functions, bucketed by type of function.
11//!
12//! * The `UnlinkedCompileOutputs::pre_link` method re-arranges the compiled
13//!   functions into a flat list. This is the order we will place them within
14//!   the ELF file, so we must also keep track of all the functions' indices
15//!   within this list, because we will need them for resolving
16//!   relocations. These indices are kept track of in the resulting
17//!   `FunctionIndices`.
18//!
19//! * The `FunctionIndices::link_and_append_code` method appends the functions
20//!   to the given ELF file and resolves relocations. It produces an `Artifacts`
21//!   which contains the data needed at runtime to find and call Wasm
22//!   functions. It is up to the caller to serialize the relevant parts of the
23//!   `Artifacts` into the ELF file.
24
25use crate::Engine;
26use crate::hash_map::HashMap;
27use crate::hash_set::HashSet;
28use crate::prelude::*;
29use std::{any::Any, borrow::Cow, collections::BTreeMap, mem, ops::Range};
30
31use call_graph::CallGraph;
32#[cfg(feature = "component-model")]
33use wasmtime_environ::component::Translator;
34use wasmtime_environ::{
35    BuiltinFunctionIndex, CompiledFunctionBody, CompiledFunctionsTable,
36    CompiledFunctionsTableBuilder, CompiledModuleInfo, Compiler, DefinedFuncIndex, FilePos,
37    FinishedObject, FuncKey, FunctionBodyData, InliningCompiler, IntraModuleInlining,
38    ModuleEnvironment, ModuleTranslation, ModuleTypes, ModuleTypesBuilder, ObjectKind, PrimaryMap,
39    StaticModuleIndex, Tunables,
40};
41
42mod call_graph;
43mod scc;
44mod stratify;
45
46mod code_builder;
47pub use self::code_builder::{CodeBuilder, CodeHint, HashedEngineCompileEnv};
48
49#[cfg(feature = "runtime")]
50mod runtime;
51
52/// Converts an input binary-encoded WebAssembly module to compilation
53/// artifacts and type information.
54///
55/// This is where compilation actually happens of WebAssembly modules and
56/// translation/parsing/validation of the binary input occurs. The binary
57/// artifact represented in the `MmapVec` returned here is an in-memory ELF
58/// file in an owned area of virtual linear memory where permissions (such
59/// as the executable bit) can be applied.
60///
61/// Additionally compilation returns an `Option` here which is always
62/// `Some`, notably compiled metadata about the module in addition to the
63/// type information found within.
64pub(crate) fn build_module_artifacts<T: FinishedObject>(
65    engine: &Engine,
66    wasm: &[u8],
67    dwarf_package: Option<&[u8]>,
68    obj_state: &T::State,
69) -> Result<(
70    T,
71    Option<(CompiledModuleInfo, CompiledFunctionsTable, ModuleTypes)>,
72)> {
73    let tunables = engine.tunables();
74
75    // First a `ModuleEnvironment` is created which records type information
76    // about the wasm module. This is where the WebAssembly is parsed and
77    // validated. Afterwards `types` will have all the type information for
78    // this module.
79    let mut parser = wasmparser::Parser::new(0);
80    let mut validator = wasmparser::Validator::new_with_features(engine.features());
81    parser.set_features(*validator.features());
82    let mut types = ModuleTypesBuilder::new(&validator);
83    let mut translation = ModuleEnvironment::new(
84        tunables,
85        &mut validator,
86        &mut types,
87        StaticModuleIndex::from_u32(0),
88    )
89    .translate(parser, wasm)
90    .context("failed to parse WebAssembly module")?;
91    let functions = mem::take(&mut translation.function_body_inputs);
92
93    let compile_inputs = CompileInputs::for_module(&types, &translation, functions);
94    let unlinked_compile_outputs = compile_inputs.compile(engine)?;
95    let PreLinkOutput {
96        needs_gc_heap,
97        compiled_funcs,
98        indices,
99    } = unlinked_compile_outputs.pre_link();
100    translation.module.needs_gc_heap |= needs_gc_heap;
101
102    // Emplace all compiled functions into the object file with any other
103    // sections associated with code as well.
104    let mut object = engine.compiler().object(ObjectKind::Module)?;
105    // Insert `Engine` and type-level information into the compiled
106    // artifact so if this module is deserialized later it contains all
107    // information necessary.
108    //
109    // Note that `append_compiler_info` and `append_types` here in theory
110    // can both be skipped if this module will never get serialized.
111    // They're only used during deserialization and not during runtime for
112    // the module itself. Currently there's no need for that, however, so
113    // it's left as an exercise for later.
114    engine.append_compiler_info(&mut object);
115    engine.append_bti(&mut object);
116
117    let (mut object, compilation_artifacts) = indices.link_and_append_code(
118        object,
119        engine,
120        compiled_funcs,
121        std::iter::once(translation).collect(),
122        dwarf_package,
123    )?;
124
125    let (info, index) = compilation_artifacts.unwrap_as_module_info();
126    let types = types.finish();
127    object.serialize_info(&(&info, &index, &types));
128    let result = T::finish_object(object, obj_state)?;
129
130    Ok((result, Some((info, index, types))))
131}
132
133/// Performs the compilation phase for a component, translating and
134/// validating the provided wasm binary to machine code.
135///
136/// This method will compile all nested core wasm binaries in addition to
137/// any necessary extra functions required for operation with components.
138/// The output artifact here is the serialized object file contained within
139/// an owned mmap along with metadata about the compilation itself.
140#[cfg(feature = "component-model")]
141pub(crate) fn build_component_artifacts<T: FinishedObject>(
142    engine: &Engine,
143    binary: &[u8],
144    _dwarf_package: Option<&[u8]>,
145    unsafe_intrinsics_import: Option<&str>,
146    obj_state: &T::State,
147) -> Result<(T, Option<wasmtime_environ::component::ComponentArtifacts>)> {
148    use wasmtime_environ::ScopeVec;
149    use wasmtime_environ::component::{
150        CompiledComponentInfo, ComponentArtifacts, ComponentTypesBuilder,
151    };
152
153    let tunables = engine.tunables();
154    let compiler = engine.compiler();
155
156    let scope = ScopeVec::new();
157    let mut validator = wasmparser::Validator::new_with_features(engine.features());
158    let mut types = ComponentTypesBuilder::new(&validator);
159    let mut translator = Translator::new(tunables, &mut validator, &mut types, &scope);
160    if let Some(name) = unsafe_intrinsics_import {
161        translator.expose_unsafe_intrinsics(name);
162    }
163    let (component, mut module_translations) = translator
164        .translate(binary)
165        .context("failed to parse WebAssembly module")?;
166
167    let compile_inputs = CompileInputs::for_component(
168        engine,
169        &types,
170        &component,
171        module_translations.iter_mut().map(|(i, translation)| {
172            let functions = mem::take(&mut translation.function_body_inputs);
173            (i, &*translation, functions)
174        }),
175    );
176    let unlinked_compile_outputs = compile_inputs.compile(&engine)?;
177
178    let PreLinkOutput {
179        needs_gc_heap,
180        compiled_funcs,
181        indices,
182    } = unlinked_compile_outputs.pre_link();
183    for (_, t) in &mut module_translations {
184        t.module.needs_gc_heap |= needs_gc_heap
185    }
186
187    let mut object = compiler.object(ObjectKind::Component)?;
188    engine.append_compiler_info(&mut object);
189    engine.append_bti(&mut object);
190
191    let (mut object, compilation_artifacts) = indices.link_and_append_code(
192        object,
193        engine,
194        compiled_funcs,
195        module_translations,
196        None, // TODO: Support dwarf packages for components.
197    )?;
198    let (types, ty) = types.finish(&component.component);
199
200    let info = CompiledComponentInfo {
201        component: component.component,
202    };
203    let artifacts = ComponentArtifacts {
204        info,
205        table: compilation_artifacts.table,
206        ty,
207        types,
208        static_modules: compilation_artifacts.modules,
209    };
210    object.serialize_info(&artifacts);
211
212    let result = T::finish_object(object, obj_state)?;
213    Ok((result, Some(artifacts)))
214}
215
216type CompileInput<'a> = Box<dyn FnOnce(&dyn Compiler) -> Result<CompileOutput<'a>> + Send + 'a>;
217
218struct CompileOutput<'a> {
219    key: FuncKey,
220    symbol: String,
221    function: CompiledFunctionBody,
222    start_srcloc: FilePos,
223
224    // Only present when `self.key` is a `FuncKey::DefinedWasmFunction(..)`.
225    translation: Option<&'a ModuleTranslation<'a>>,
226
227    // Only present when `self.key` is a `FuncKey::DefinedWasmFunction(..)`.
228    func_body: Option<wasmparser::FunctionBody<'a>>,
229}
230
231/// Inputs to our inlining heuristics.
232struct InlineHeuristicParams<'a> {
233    tunables: &'a Tunables,
234    caller_size: u32,
235    caller_key: FuncKey,
236    caller_needs_gc_heap: bool,
237    callee_size: u32,
238    callee_key: FuncKey,
239    callee_needs_gc_heap: bool,
240}
241
242/// The collection of things we need to compile for a Wasm module or component.
243#[derive(Default)]
244struct CompileInputs<'a> {
245    inputs: Vec<CompileInput<'a>>,
246}
247
248impl<'a> CompileInputs<'a> {
249    fn push_input(
250        &mut self,
251        f: impl FnOnce(&dyn Compiler) -> Result<CompileOutput<'a>> + Send + 'a,
252    ) {
253        self.inputs.push(Box::new(f));
254    }
255
256    /// Create the `CompileInputs` for a core Wasm module.
257    fn for_module(
258        types: &'a ModuleTypesBuilder,
259        translation: &'a ModuleTranslation<'a>,
260        functions: PrimaryMap<DefinedFuncIndex, FunctionBodyData<'a>>,
261    ) -> Self {
262        let mut ret = CompileInputs { inputs: vec![] };
263
264        let module_index = StaticModuleIndex::from_u32(0);
265        ret.collect_inputs_in_translations(types, [(module_index, translation, functions)]);
266
267        ret
268    }
269
270    /// Create a `CompileInputs` for a component.
271    #[cfg(feature = "component-model")]
272    fn for_component(
273        engine: &'a Engine,
274        types: &'a wasmtime_environ::component::ComponentTypesBuilder,
275        component: &'a wasmtime_environ::component::ComponentTranslation,
276        module_translations: impl IntoIterator<
277            Item = (
278                StaticModuleIndex,
279                &'a ModuleTranslation<'a>,
280                PrimaryMap<DefinedFuncIndex, FunctionBodyData<'a>>,
281            ),
282        >,
283    ) -> Self {
284        use wasmtime_environ::Abi;
285        use wasmtime_environ::component::UnsafeIntrinsic;
286
287        let mut ret = CompileInputs { inputs: vec![] };
288
289        ret.collect_inputs_in_translations(types.module_types_builder(), module_translations);
290        let tunables = engine.tunables();
291
292        for i in component
293            .component
294            .unsafe_intrinsics
295            .iter()
296            .enumerate()
297            .filter_map(|(i, ty)| if ty.is_some() { Some(i) } else { None })
298        {
299            let i = u32::try_from(i).unwrap();
300            let intrinsic = UnsafeIntrinsic::from_u32(i);
301            for abi in [Abi::Wasm, Abi::Array] {
302                ret.push_input(move |compiler| {
303                    let symbol = format!(
304                        "unsafe-intrinsics-{}-{}",
305                        match abi {
306                            Abi::Wasm => "wasm-call",
307                            Abi::Array => "array-call",
308                        },
309                        intrinsic.name(),
310                    );
311                    Ok(CompileOutput {
312                        key: FuncKey::UnsafeIntrinsic(abi, intrinsic),
313                        function: compiler
314                            .component_compiler()
315                            .compile_intrinsic(tunables, component, intrinsic, abi, &symbol)
316                            .with_context(|| format!("failed to compile `{symbol}`"))?,
317                        symbol,
318                        start_srcloc: FilePos::default(),
319                        translation: None,
320                        func_body: None,
321                    })
322                });
323            }
324        }
325
326        for (idx, trampoline) in component.trampolines.iter() {
327            for abi in [Abi::Wasm, Abi::Array] {
328                ret.push_input(move |compiler| {
329                    let key = FuncKey::ComponentTrampoline(abi, idx);
330                    let symbol = format!(
331                        "component-trampolines[{}]-{}-{}",
332                        idx.as_u32(),
333                        match abi {
334                            Abi::Wasm => "wasm-call",
335                            Abi::Array => "array-call",
336                        },
337                        trampoline.symbol_name(),
338                    );
339                    let function = compiler
340                        .component_compiler()
341                        .compile_trampoline(component, types, key, abi, tunables, &symbol)
342                        .with_context(|| format!("failed to compile {symbol}"))?;
343                    Ok(CompileOutput {
344                        key,
345                        function,
346                        symbol,
347                        start_srcloc: FilePos::default(),
348                        translation: None,
349                        func_body: None,
350                    })
351                });
352            }
353        }
354
355        // If there are any resources defined within this component, the
356        // signature for `resource.drop` is mentioned somewhere, and the
357        // wasm-to-native trampoline for `resource.drop` hasn't been created yet
358        // then insert that here. This is possibly required by destruction of
359        // resources from the embedder and otherwise won't be explicitly
360        // requested through initializers above or such.
361        if component.component.num_resources > 0 {
362            if let Some(sig) = types.find_resource_drop_signature() {
363                ret.push_input(move |compiler| {
364                    let key = FuncKey::ResourceDropTrampoline;
365                    let symbol = "resource_drop_trampoline".to_string();
366                    let function = compiler
367                        .compile_wasm_to_array_trampoline(types[sig].unwrap_func(), key, &symbol)
368                        .with_context(|| format!("failed to compile `{symbol}`"))?;
369                    Ok(CompileOutput {
370                        key,
371                        function,
372                        symbol,
373                        start_srcloc: FilePos::default(),
374                        translation: None,
375                        func_body: None,
376                    })
377                });
378            }
379        }
380
381        ret
382    }
383
384    fn clean_symbol(name: &str) -> Cow<'_, str> {
385        /// Maximum length of symbols generated in objects.
386        const MAX_SYMBOL_LEN: usize = 96;
387
388        // Just to be on the safe side, filter out characters that could
389        // pose issues to tools such as "perf" or "objdump".  To avoid
390        // having to update a list of allowed characters for each different
391        // language that compiles to Wasm, allows only graphic ASCII
392        // characters; replace runs of everything else with a "?".
393        let bad_char = |c: char| !c.is_ascii_graphic();
394        if name.chars().any(bad_char) {
395            let mut last_char_seen = '\u{0000}';
396            Cow::Owned(
397                name.chars()
398                    .map(|c| if bad_char(c) { '?' } else { c })
399                    .filter(|c| {
400                        let skip = last_char_seen == '?' && *c == '?';
401                        last_char_seen = *c;
402                        !skip
403                    })
404                    .take(MAX_SYMBOL_LEN)
405                    .collect::<String>(),
406            )
407        } else if name.len() <= MAX_SYMBOL_LEN {
408            Cow::Borrowed(&name[..])
409        } else {
410            Cow::Borrowed(&name[..MAX_SYMBOL_LEN])
411        }
412    }
413
414    fn collect_inputs_in_translations(
415        &mut self,
416        types: &'a ModuleTypesBuilder,
417        translations: impl IntoIterator<
418            Item = (
419                StaticModuleIndex,
420                &'a ModuleTranslation<'a>,
421                PrimaryMap<DefinedFuncIndex, FunctionBodyData<'a>>,
422            ),
423        >,
424    ) {
425        for (module, translation, functions) in translations {
426            for (def_func_index, func_body_data) in functions {
427                self.push_input(move |compiler| {
428                    let key = FuncKey::DefinedWasmFunction(module, def_func_index);
429                    let func_index = translation.module.func_index(def_func_index);
430                    let symbol = match translation
431                        .debuginfo
432                        .name_section
433                        .func_names
434                        .get(&func_index)
435                    {
436                        Some(name) => format!(
437                            "wasm[{}]::function[{}]::{}",
438                            module.as_u32(),
439                            func_index.as_u32(),
440                            Self::clean_symbol(&name)
441                        ),
442                        None => format!(
443                            "wasm[{}]::function[{}]",
444                            module.as_u32(),
445                            func_index.as_u32()
446                        ),
447                    };
448                    let func_body = func_body_data.body.clone();
449                    let data = func_body.get_binary_reader();
450                    let offset = data.original_position();
451                    let start_srcloc = FilePos::new(u32::try_from(offset).unwrap());
452                    let function = compiler
453                        .compile_function(translation, key, func_body_data, types, &symbol)
454                        .with_context(|| format!("failed to compile: {symbol}"))?;
455
456                    Ok(CompileOutput {
457                        key,
458                        symbol,
459                        function,
460                        start_srcloc,
461                        translation: Some(translation),
462                        func_body: Some(func_body),
463                    })
464                });
465
466                let func_index = translation.module.func_index(def_func_index);
467                if translation.module.functions[func_index].is_escaping() {
468                    self.push_input(move |compiler| {
469                        let key = FuncKey::ArrayToWasmTrampoline(module, def_func_index);
470                        let func_index = translation.module.func_index(def_func_index);
471                        let symbol = format!(
472                            "wasm[{}]::array_to_wasm_trampoline[{}]",
473                            module.as_u32(),
474                            func_index.as_u32()
475                        );
476                        let function = compiler
477                            .compile_array_to_wasm_trampoline(translation, types, key, &symbol)
478                            .with_context(|| format!("failed to compile: {symbol}"))?;
479                        Ok(CompileOutput {
480                            key,
481                            symbol,
482                            function,
483                            start_srcloc: FilePos::default(),
484                            translation: None,
485                            func_body: None,
486                        })
487                    });
488                }
489            }
490        }
491
492        let mut trampoline_types_seen = HashSet::new();
493        for (_func_type_index, trampoline_type_index) in types.trampoline_types() {
494            let is_new = trampoline_types_seen.insert(trampoline_type_index);
495            if !is_new {
496                continue;
497            }
498            let trampoline_func_ty = types[trampoline_type_index].unwrap_func();
499            self.push_input(move |compiler| {
500                let key = FuncKey::WasmToArrayTrampoline(trampoline_type_index);
501                let symbol = format!(
502                    "signatures[{}]::wasm_to_array_trampoline",
503                    trampoline_type_index.as_u32()
504                );
505                let function = compiler
506                    .compile_wasm_to_array_trampoline(trampoline_func_ty, key, &symbol)
507                    .with_context(|| format!("failed to compile: {symbol}"))?;
508                Ok(CompileOutput {
509                    key,
510                    function,
511                    symbol,
512                    start_srcloc: FilePos::default(),
513                    translation: None,
514                    func_body: None,
515                })
516            });
517        }
518    }
519
520    /// Compile these `CompileInput`s (maybe in parallel) and return the
521    /// resulting `UnlinkedCompileOutput`s.
522    fn compile(self, engine: &Engine) -> Result<UnlinkedCompileOutputs<'a>> {
523        let compiler = engine.compiler();
524
525        if self.inputs.len() > 0 && cfg!(miri) {
526            bail!(
527                "\
528You are attempting to compile a WebAssembly module or component that contains
529functions in Miri. Running Cranelift through Miri is known to take quite a long
530time and isn't what we want in CI at least. If this is a mistake then you should
531ignore this test in Miri with:
532
533    #[cfg_attr(miri, ignore)]
534
535If this is not a mistake then try to edit the `pulley_provenance_test` test
536which runs Cranelift outside of Miri. If you still feel this is a mistake then
537please open an issue or a topic on Zulip to talk about how best to accommodate
538the use case.
539"
540            );
541        }
542
543        let mut raw_outputs = if let Some(inlining_compiler) = compiler.inlining_compiler() {
544            if engine.tunables().inlining {
545                self.compile_with_inlining(engine, compiler, inlining_compiler)?
546            } else {
547                // Inlining compiler but inlining is disabled: compile each
548                // input and immediately finish its output in parallel, skipping
549                // call graph computation and all that.
550                engine.run_maybe_parallel::<_, _, Error, _>(self.inputs, |f| {
551                    let mut compiled = f(compiler)?;
552                    inlining_compiler.finish_compiling(
553                        &mut compiled.function,
554                        compiled.func_body.take(),
555                        &compiled.symbol,
556                    )?;
557                    Ok(compiled)
558                })?
559            }
560        } else {
561            // No inlining: just compile each individual input in parallel.
562            engine.run_maybe_parallel(self.inputs, |f| f(compiler))?
563        };
564
565        if cfg!(debug_assertions) {
566            let mut symbols: Vec<_> = raw_outputs.iter().map(|i| &i.symbol).collect();
567            symbols.sort();
568            for w in symbols.windows(2) {
569                assert_ne!(
570                    w[0], w[1],
571                    "should never have duplicate symbols, but found two functions with the symbol `{}`",
572                    w[0]
573                );
574            }
575        }
576
577        // Now that all functions have been compiled see if any
578        // wasmtime-builtin functions are necessary. If so those need to be
579        // collected and then those trampolines additionally need to be
580        // compiled.
581        compile_required_builtins(engine, &mut raw_outputs)?;
582
583        // Bucket the outputs by kind.
584        let mut outputs: BTreeMap<FuncKey, CompileOutput> = BTreeMap::new();
585        for output in raw_outputs {
586            outputs.insert(output.key, output);
587        }
588
589        Ok(UnlinkedCompileOutputs { outputs })
590    }
591
592    fn compile_with_inlining(
593        self,
594        engine: &Engine,
595        compiler: &dyn Compiler,
596        inlining_compiler: &dyn InliningCompiler,
597    ) -> Result<Vec<CompileOutput<'a>>, Error> {
598        /// The index of a function (of any kind: Wasm function, trampoline, or
599        /// etc...) in our list of unlinked outputs.
600        #[derive(Clone, Copy, Debug, Default, PartialEq, Eq, PartialOrd, Ord, Hash)]
601        struct OutputIndex(u32);
602        wasmtime_environ::entity_impl!(OutputIndex);
603
604        // Our list of unlinked outputs.
605        let mut outputs = PrimaryMap::<OutputIndex, Option<CompileOutput<'_>>>::from(
606            engine.run_maybe_parallel(self.inputs, |f| f(compiler).map(Some))?,
607        );
608
609        /// Whether a function (as described by the given `FuncKey`) can
610        /// participate in inlining or not (either as a candidate for being
611        /// inlined into a caller or having a callee inlined into a callsite
612        /// within itself).
613        fn is_inlining_function(key: FuncKey) -> bool {
614            match key {
615                // Wasm functions can both be inlined into other functions and
616                // have other functions inlined into them.
617                FuncKey::DefinedWasmFunction(..) => true,
618
619                // Intrinsics can be inlined into other functions.
620                #[cfg(feature = "component-model")]
621                FuncKey::UnsafeIntrinsic(..) => true,
622
623                // Trampolines cannot participate in inlining since our
624                // unwinding and exceptions infrastructure relies on them being
625                // in their own call frames.
626                FuncKey::ArrayToWasmTrampoline(..)
627                | FuncKey::WasmToArrayTrampoline(..)
628                | FuncKey::WasmToBuiltinTrampoline(..) => false,
629                #[cfg(feature = "component-model")]
630                FuncKey::ComponentTrampoline(..) | FuncKey::ResourceDropTrampoline => false,
631
632                FuncKey::PulleyHostCall(_) => {
633                    unreachable!("we don't compile artifacts for Pulley host calls")
634                }
635            }
636        }
637
638        /// Get just the output indices of the functions that can participate in
639        /// inlining from our unlinked outputs.
640        fn inlining_functions<'a>(
641            outputs: &'a PrimaryMap<OutputIndex, Option<CompileOutput<'_>>>,
642        ) -> impl Iterator<Item = OutputIndex> + 'a {
643            outputs.iter().filter_map(|(index, output)| {
644                if is_inlining_function(output.as_ref().unwrap().key) {
645                    Some(index)
646                } else {
647                    None
648                }
649            })
650        }
651
652        // A map from a `FuncKey` to its index in our unlinked outputs.
653        //
654        // We will generally just be working with `OutputIndex`es, but
655        // occasionally we must translate from keys back to our index space, for
656        // example when we know that one module's function import is always
657        // satisfied with a particular `FuncKey::DefinedWasmFunction`. This map
658        // enables that translation.
659        let key_to_output: HashMap<FuncKey, OutputIndex> = inlining_functions(&outputs)
660            .map(|output_index| {
661                let output = outputs[output_index].as_ref().unwrap();
662                (output.key, output_index)
663            })
664            .collect();
665
666        // Construct the call graph for inlining.
667        //
668        // We only inline Wasm functions, not trampolines, because we rely on
669        // trampolines being in their own stack frame when we save the entry and
670        // exit SP, FP, and PC for backtraces in trampolines.
671        let call_graph = CallGraph::<OutputIndex>::new(inlining_functions(&outputs), {
672            let mut func_keys = IndexSet::default();
673            let outputs = &outputs;
674            let key_to_output = &key_to_output;
675            move |output_index, calls| {
676                debug_assert!(calls.is_empty());
677
678                let output = outputs[output_index].as_ref().unwrap();
679                debug_assert!(is_inlining_function(output.key));
680
681                // Get this function's call graph edges as `FuncKey`s.
682                func_keys.clear();
683                inlining_compiler.calls(&output.function, &mut func_keys)?;
684
685                // Translate each of those to keys to output indices, which is
686                // what we actually need.
687                calls.extend(
688                    func_keys
689                        .iter()
690                        .copied()
691                        .filter_map(|key| key_to_output.get(&key)),
692                );
693
694                log::trace!(
695                    "call graph edges for {output_index:?} = {:?}: {calls:?}",
696                    output.key
697                );
698                Ok(())
699            }
700        })?;
701
702        // Stratify the call graph into a sequence of layers. We process each
703        // layer in order, but process functions within a layer in parallel
704        // (because they either do not call each other or are part of a
705        // mutual-recursion cycle; either way we won't inline members of the
706        // same layer into each other).
707        let strata =
708            stratify::Strata::<OutputIndex>::new(inlining_functions(&outputs), &call_graph);
709        let mut layer_outputs = vec![];
710        for layer in strata.layers() {
711            // Temporarily take this layer's outputs out of our unlinked outputs
712            // list so that we can mutate these outputs (by inlining callee
713            // functions into them) while also accessing shared borrows of the
714            // unlinked outputs list (finding the callee functions we will
715            // inline).
716            debug_assert!(layer_outputs.is_empty());
717            layer_outputs.extend(layer.iter().map(|f| outputs[*f].take().unwrap()));
718
719            // Process this layer's members in parallel.
720            engine.run_maybe_parallel_mut(
721                &mut layer_outputs,
722                |output: &mut CompileOutput<'_>| {
723                    log::trace!("processing inlining for {:?}", output.key);
724                    debug_assert!(is_inlining_function(output.key));
725
726                    let caller_key = output.key;
727                    let caller_needs_gc_heap =
728                        output.translation.is_some_and(|t| t.module.needs_gc_heap);
729                    let caller = &mut output.function;
730
731                    let mut caller_size = inlining_compiler.size(caller);
732
733                    inlining_compiler.inline(caller, &mut |callee_key: FuncKey| {
734                        log::trace!("  --> considering call to {callee_key:?}");
735                        let callee_output_index: OutputIndex = key_to_output[&callee_key];
736
737                        // NB: If the callee is not inside `outputs`, then it is
738                        // in the same `Strata` layer as the caller (and
739                        // therefore is in the same strongly-connected component
740                        // as the caller, and they mutually recursive). In this
741                        // case, we do not do any inlining; communicate this
742                        // command via `?`-propagation.
743                        let callee_output = outputs[callee_output_index].as_ref()?;
744
745                        debug_assert_eq!(callee_output.key, callee_key);
746
747                        let callee = &callee_output.function;
748                        let callee_size = inlining_compiler.size(callee);
749
750                        let callee_needs_gc_heap = callee_output
751                            .translation
752                            .is_some_and(|t| t.module.needs_gc_heap);
753
754                        if Self::should_inline(InlineHeuristicParams {
755                            tunables: engine.tunables(),
756                            caller_size,
757                            caller_key,
758                            caller_needs_gc_heap,
759                            callee_size,
760                            callee_key,
761                            callee_needs_gc_heap,
762                        }) {
763                            caller_size = caller_size.saturating_add(callee_size);
764                            Some(callee)
765                        } else {
766                            None
767                        }
768                    })
769                },
770            )?;
771
772            for (f, func) in layer.iter().zip(layer_outputs.drain(..)) {
773                debug_assert!(outputs[*f].is_none());
774                outputs[*f] = Some(func);
775            }
776        }
777
778        // Fan out in parallel again and finish compiling each function.
779        engine.run_maybe_parallel(outputs.into(), |output| {
780            let mut output = output.unwrap();
781            inlining_compiler.finish_compiling(
782                &mut output.function,
783                output.func_body.take(),
784                &output.symbol,
785            )?;
786            Ok(output)
787        })
788    }
789
790    /// Implementation of our inlining heuristics.
791    ///
792    /// TODO: We should improve our heuristics:
793    ///
794    /// * One potentially promising hint that we don't currently make use of is
795    ///   how many times a function appears as the callee in call sites. For
796    ///   example, a function that appears in only a single call site, and does
797    ///   not otherwise escape, is often beneficial to inline regardless of its
798    ///   size (assuming we can then GC away the non-inlined version of the
799    ///   function, which we do not currently attempt to do).
800    ///
801    /// * Another potentially promising hint would be whether any of the call
802    ///   site's actual arguments are constants.
803    ///
804    /// * A general improvement would be removing the decision-tree style of
805    ///   control flow below and replacing it with (1) a pure estimated-benefit
806    ///   formula and (2) a benefit threshold. Whenever the estimated benefit
807    ///   reaches the threshold, we would inline the call. Both the formula and
808    ///   the threshold would be parameterized by tunables. This would
809    ///   effectively allow reprioritizing the relative importance of different
810    ///   hint sources, rather than being stuck with the sequence hard-coded in
811    ///   the decision tree below.
812    fn should_inline(
813        InlineHeuristicParams {
814            tunables,
815            caller_size,
816            caller_key,
817            caller_needs_gc_heap,
818            callee_size,
819            callee_key,
820            callee_needs_gc_heap,
821        }: InlineHeuristicParams,
822    ) -> bool {
823        log::trace!(
824            "considering inlining:\n\
825             \tcaller = {caller_key:?}\n\
826             \t\tsize = {caller_size}\n\
827             \t\tneeds_gc_heap = {caller_needs_gc_heap}\n\
828             \tcallee = {callee_key:?}\n\
829             \t\tsize = {callee_size}\n\
830             \t\tneeds_gc_heap = {callee_needs_gc_heap}"
831        );
832
833        debug_assert!(
834            tunables.inlining,
835            "shouldn't even call this method if we aren't configured for inlining"
836        );
837        debug_assert_ne!(caller_key, callee_key, "we never inline recursion");
838
839        // Put a limit on how large we can make a function via inlining to cap
840        // code bloat.
841        let sum_size = caller_size.saturating_add(callee_size);
842        if sum_size > tunables.inlining_sum_size_threshold {
843            log::trace!(
844                "  --> not inlining: the sum of the caller's and callee's sizes is greater than \
845                 the inlining-sum-size threshold: {callee_size} + {caller_size} > {}",
846                tunables.inlining_sum_size_threshold
847            );
848            return false;
849        }
850
851        // Consider whether this is an intra-module call.
852        //
853        // Inlining within a single core module has most often already been done
854        // by the toolchain that produced the module, e.g. LLVM, and any extant
855        // function calls to small callees were presumably annotated with the
856        // equivalent of `#[inline(never)]` or `#[cold]` but we don't have that
857        // information anymore.
858        match (caller_key, callee_key) {
859            (
860                FuncKey::DefinedWasmFunction(caller_module, _),
861                FuncKey::DefinedWasmFunction(callee_module, _),
862            ) => {
863                if caller_module == callee_module {
864                    match tunables.inlining_intra_module {
865                        IntraModuleInlining::Yes => {}
866
867                        IntraModuleInlining::WhenUsingGc
868                            if caller_needs_gc_heap || callee_needs_gc_heap => {}
869
870                        IntraModuleInlining::WhenUsingGc => {
871                            log::trace!(
872                                "  --> not inlining: intra-module call that does not use GC"
873                            );
874                            return false;
875                        }
876
877                        IntraModuleInlining::No => {
878                            log::trace!("  --> not inlining: intra-module call");
879                            return false;
880                        }
881                    }
882                }
883            }
884            _ => {}
885        }
886
887        // Small callees are often worth inlining regardless of the size of the
888        // caller.
889        if callee_size <= tunables.inlining_small_callee_size {
890            log::trace!(
891                "  --> inlining: callee's size is less than the small-callee size: \
892                 {callee_size} <= {}",
893                tunables.inlining_small_callee_size
894            );
895            return true;
896        }
897
898        log::trace!("  --> inlining: did not find a reason we should not");
899        true
900    }
901}
902
903fn compile_required_builtins(engine: &Engine, raw_outputs: &mut Vec<CompileOutput>) -> Result<()> {
904    let compiler = engine.compiler();
905    let mut builtins = HashSet::new();
906    let mut new_inputs: Vec<CompileInput<'_>> = Vec::new();
907
908    let compile_builtin = |builtin: BuiltinFunctionIndex| {
909        Box::new(move |compiler: &dyn Compiler| {
910            let key = FuncKey::WasmToBuiltinTrampoline(builtin);
911            let symbol = format!("wasmtime_builtin_{}", builtin.name());
912            let mut function = compiler
913                .compile_wasm_to_builtin(key, &symbol)
914                .with_context(|| format!("failed to compile `{symbol}`"))?;
915            if let Some(compiler) = compiler.inlining_compiler() {
916                compiler.finish_compiling(&mut function, None, &symbol)?;
917            }
918            Ok(CompileOutput {
919                key,
920                function,
921                symbol,
922                start_srcloc: FilePos::default(),
923                translation: None,
924                func_body: None,
925            })
926        })
927    };
928
929    for output in raw_outputs.iter() {
930        for reloc in compiler.compiled_function_relocation_targets(&*output.function.code) {
931            if let FuncKey::WasmToBuiltinTrampoline(builtin) = reloc {
932                if builtins.insert(builtin) {
933                    new_inputs.push(compile_builtin(builtin));
934                }
935            }
936        }
937    }
938    raw_outputs.extend(engine.run_maybe_parallel(new_inputs, |c| c(compiler))?);
939    Ok(())
940}
941
942#[derive(Default)]
943struct UnlinkedCompileOutputs<'a> {
944    // A map from kind to `CompileOutput`.
945    outputs: BTreeMap<FuncKey, CompileOutput<'a>>,
946}
947
948impl UnlinkedCompileOutputs<'_> {
949    /// Flatten all our functions into a single list and remember each of their
950    /// indices within it.
951    fn pre_link(self) -> PreLinkOutput {
952        // We must ensure that `compiled_funcs` contains the function bodies
953        // sorted by their `FuncKey`, as `CompiledFunctionsTable` relies on that
954        // property.
955        //
956        // Furthermore, note that, because the order functions end up in
957        // `compiled_funcs` is the order they will ultimately be laid out inside
958        // the object file, we will group all trampolines together, all defined
959        // Wasm functions from the same module together, and etc... This is a
960        // nice property, because it means that (a) cold functions, like builtin
961        // trampolines, are not interspersed between hot Wasm functions, and (b)
962        // Wasm functions that are likely to call each other (i.e. are in the
963        // same module together) are grouped together.
964        let mut compiled_funcs = vec![];
965
966        let mut indices = FunctionIndices::default();
967        let mut needs_gc_heap = false;
968
969        // NB: Iteration over this `BTreeMap` ensures that we uphold
970        // `compiled_func`'s sorted property.
971        for output in self.outputs.into_values() {
972            needs_gc_heap |= output.function.needs_gc_heap;
973
974            let index = compiled_funcs.len();
975            compiled_funcs.push((output.symbol, output.key, output.function.code));
976
977            if output.start_srcloc != FilePos::none() {
978                indices
979                    .start_srclocs
980                    .insert(output.key, output.start_srcloc);
981            }
982
983            indices.indices.insert(output.key, index);
984        }
985
986        PreLinkOutput {
987            needs_gc_heap,
988            compiled_funcs,
989            indices,
990        }
991    }
992}
993
994/// Our pre-link functions that have been flattened into a single list.
995struct PreLinkOutput {
996    /// Whether or not any of these functions require a GC heap
997    needs_gc_heap: bool,
998    /// The flattened list of (symbol name, FuncKey, compiled
999    /// function) triples, as they will be laid out in the object
1000    /// file.
1001    compiled_funcs: Vec<(String, FuncKey, Box<dyn Any + Send + Sync>)>,
1002    /// The `FunctionIndices` mapping our function keys to indices in that flat
1003    /// list.
1004    indices: FunctionIndices,
1005}
1006
1007#[derive(Default)]
1008struct FunctionIndices {
1009    // A map of wasm functions and where they're located in the original file.
1010    start_srclocs: HashMap<FuncKey, FilePos>,
1011
1012    // The index of each compiled function in `compiled_funcs`.
1013    indices: BTreeMap<FuncKey, usize>,
1014}
1015
1016impl FunctionIndices {
1017    /// Link the compiled functions together, resolving relocations, and append
1018    /// them to the given ELF file.
1019    fn link_and_append_code<'a>(
1020        self,
1021        mut obj: object::write::Object<'static>,
1022        engine: &'a Engine,
1023        compiled_funcs: Vec<(String, FuncKey, Box<dyn Any + Send + Sync>)>,
1024        translations: PrimaryMap<StaticModuleIndex, ModuleTranslation<'_>>,
1025        dwarf_package_bytes: Option<&[u8]>,
1026    ) -> Result<(wasmtime_environ::ObjectBuilder<'a>, Artifacts)> {
1027        // Append all the functions to the ELF file.
1028        //
1029        // The result is a vector parallel to `compiled_funcs` where
1030        // `symbol_ids_and_locs[i]` is the symbol ID and function location of
1031        // `compiled_funcs[i]`.
1032        let compiler = engine.compiler();
1033        let tunables = engine.tunables();
1034        let symbol_ids_and_locs = compiler.append_code(
1035            &mut obj,
1036            &compiled_funcs,
1037            &|_caller_index: usize, callee: FuncKey| {
1038                self.indices.get(&callee).copied().unwrap_or_else(|| {
1039                    panic!("cannot resolve relocation! no index for callee {callee:?}")
1040                })
1041            },
1042        )?;
1043
1044        // If requested, generate and add DWARF information.
1045        if tunables.debug_native {
1046            compiler.append_dwarf(
1047                &mut obj,
1048                &translations,
1049                &|module, func| {
1050                    let i = self.indices[&FuncKey::DefinedWasmFunction(module, func)];
1051                    let (symbol, _) = symbol_ids_and_locs[i];
1052                    let (_, _, compiled_func) = &compiled_funcs[i];
1053                    (symbol, &**compiled_func)
1054                },
1055                dwarf_package_bytes,
1056                tunables,
1057            )?;
1058        }
1059
1060        let mut table_builder = CompiledFunctionsTableBuilder::new();
1061        for (key, compiled_func_index) in &self.indices {
1062            let (_, func_loc) = symbol_ids_and_locs[*compiled_func_index];
1063            let src_loc = self
1064                .start_srclocs
1065                .get(key)
1066                .copied()
1067                .unwrap_or_else(FilePos::none);
1068            table_builder.push_func(*key, func_loc, src_loc);
1069        }
1070
1071        let mut obj = wasmtime_environ::ObjectBuilder::new(obj, tunables);
1072        let modules = translations
1073            .into_iter()
1074            .map(|(_, mut translation)| {
1075                // If configured attempt to use static memory initialization
1076                // which can either at runtime be implemented as a single memcpy
1077                // to initialize memory or otherwise enabling
1078                // virtual-memory-tricks such as mmap'ing from a file to get
1079                // copy-on-write.
1080                if engine.tunables().memory_init_cow {
1081                    let align = compiler.page_size_align();
1082                    let max_always_allowed = engine.config().memory_guaranteed_dense_image_size;
1083                    translation.try_static_init(align, max_always_allowed);
1084                }
1085
1086                // Attempt to convert table initializer segments to FuncTable
1087                // representation where possible, to enable table lazy init.
1088                if engine.tunables().table_lazy_init {
1089                    translation.try_func_table_init();
1090                }
1091
1092                obj.append(translation)
1093            })
1094            .collect::<Result<PrimaryMap<_, _>>>()?;
1095
1096        let artifacts = Artifacts {
1097            modules,
1098            table: table_builder.finish(),
1099        };
1100
1101        Ok((obj, artifacts))
1102    }
1103}
1104
1105/// The artifacts necessary for finding and calling Wasm functions at runtime,
1106/// to be serialized into an ELF file.
1107struct Artifacts {
1108    modules: PrimaryMap<StaticModuleIndex, CompiledModuleInfo>,
1109    table: CompiledFunctionsTable,
1110}
1111
1112impl Artifacts {
1113    /// Assuming this compilation was for a single core Wasm module, get the
1114    /// resulting `CompiledModuleInfo`.
1115    fn unwrap_as_module_info(self) -> (CompiledModuleInfo, CompiledFunctionsTable) {
1116        assert_eq!(self.modules.len(), 1);
1117        let info = self.modules.into_iter().next().unwrap().1;
1118        let table = self.table;
1119        (info, table)
1120    }
1121}
1122
1123/// Extend `dest` with `items` and return the range of indices in `dest` where
1124/// they ended up.
1125fn extend_with_range<T>(dest: &mut Vec<T>, items: impl IntoIterator<Item = T>) -> Range<u32> {
1126    let start = dest.len();
1127    let start = u32::try_from(start).unwrap();
1128
1129    dest.extend(items);
1130
1131    let end = dest.len();
1132    let end = u32::try_from(end).unwrap();
1133
1134    start..end
1135}