Skip to main content

wasmtime/
compile.rs

1//! Wasm compilation orchestration.
2//!
3//! It works roughly like this:
4//!
5//! * We walk over the Wasm module/component and make a list of all the things
6//!   we need to compile. This is a `CompileInputs`.
7//!
8//! * The `CompileInputs::compile` method compiles each of these in parallel,
9//!   producing a `UnlinkedCompileOutputs`. This is an unlinked set of compiled
10//!   functions, bucketed by type of function.
11//!
12//! * The `UnlinkedCompileOutputs::pre_link` method re-arranges the compiled
13//!   functions into a flat list. This is the order we will place them within
14//!   the ELF file, so we must also keep track of all the functions' indices
15//!   within this list, because we will need them for resolving
16//!   relocations. These indices are kept track of in the resulting
17//!   `FunctionIndices`.
18//!
19//! * The `FunctionIndices::link_and_append_code` method appends the functions
20//!   to the given ELF file and resolves relocations. It produces an `Artifacts`
21//!   which contains the data needed at runtime to find and call Wasm
22//!   functions. It is up to the caller to serialize the relevant parts of the
23//!   `Artifacts` into the ELF file.
24
25use crate::Engine;
26use crate::hash_map::HashMap;
27use crate::hash_set::HashSet;
28use crate::prelude::*;
29use std::{any::Any, borrow::Cow, collections::BTreeMap, mem, ops::Range};
30use wasmtime_environ::{
31    Abi, CompiledFunctionBody, CompiledFunctionsTable, CompiledFunctionsTableBuilder,
32    CompiledModuleInfo, Compiler, DefinedFuncIndex, FilePos, FinishedObject, FuncKey,
33    FunctionBodyData, InliningCompiler, IntraModuleInlining, ModuleEnvironment, ModuleTranslation,
34    ModuleTypes, ModuleTypesBuilder, ObjectKind, PrimaryMap, StaticModuleIndex, Tunables,
35    graphs::{EntityGraph, Graph as _},
36};
37#[cfg(feature = "component-model")]
38use wasmtime_environ::{WasmChecksum, component::Translator};
39
40mod stratify;
41
42mod code_builder;
43pub use self::code_builder::{CodeBuilder, CodeHint, HashedEngineCompileEnv};
44
45#[cfg(feature = "runtime")]
46mod runtime;
47
48/// Converts an input binary-encoded WebAssembly module to compilation
49/// artifacts and type information.
50///
51/// This is where compilation actually happens of WebAssembly modules and
52/// translation/parsing/validation of the binary input occurs. The binary
53/// artifact represented in the `MmapVec` returned here is an in-memory ELF
54/// file in an owned area of virtual linear memory where permissions (such
55/// as the executable bit) can be applied.
56///
57/// Additionally compilation returns an `Option` here which is always
58/// `Some`, notably compiled metadata about the module in addition to the
59/// type information found within.
60pub(crate) fn build_module_artifacts<T: FinishedObject>(
61    engine: &Engine,
62    wasm: &[u8],
63    dwarf_package: Option<&[u8]>,
64    obj_state: &T::State,
65) -> Result<(
66    T,
67    Option<(CompiledModuleInfo, CompiledFunctionsTable, ModuleTypes)>,
68)> {
69    let compiler = engine.try_compiler()?;
70    let tunables = engine.tunables();
71
72    // First a `ModuleEnvironment` is created which records type information
73    // about the wasm module. This is where the WebAssembly is parsed and
74    // validated. Afterwards `types` will have all the type information for
75    // this module.
76    let mut parser = wasmparser::Parser::new(0);
77    let mut validator = wasmparser::Validator::new_with_features(engine.features());
78    parser.set_features(*validator.features());
79    let mut types = ModuleTypesBuilder::new(&validator);
80    let mut translation = ModuleEnvironment::new(
81        tunables,
82        &mut validator,
83        &mut types,
84        StaticModuleIndex::from_u32(0),
85    )
86    .translate(parser, wasm)
87    .context("failed to parse WebAssembly module")?;
88    let functions = mem::take(&mut translation.function_body_inputs);
89
90    let compile_inputs = CompileInputs::for_module(&types, &translation, functions);
91    let unlinked_compile_outputs = compile_inputs.compile(engine)?;
92    let PreLinkOutput {
93        needs_gc_heap,
94        compiled_funcs,
95        indices,
96    } = unlinked_compile_outputs.pre_link();
97    translation.module.needs_gc_heap |= needs_gc_heap;
98
99    // Emplace all compiled functions into the object file with any other
100    // sections associated with code as well.
101    let mut object = compiler.object(ObjectKind::Module)?;
102    // Insert `Engine` and type-level information into the compiled
103    // artifact so if this module is deserialized later it contains all
104    // information necessary.
105    //
106    // Note that `append_compiler_info` and `append_types` here in theory
107    // can both be skipped if this module will never get serialized.
108    // They're only used during deserialization and not during runtime for
109    // the module itself. Currently there's no need for that, however, so
110    // it's left as an exercise for later.
111    engine.append_compiler_info(&mut object)?;
112    engine.append_bti(&mut object);
113
114    let (mut object, compilation_artifacts) = indices.link_and_append_code(
115        object,
116        engine,
117        compiled_funcs,
118        std::iter::once(translation).collect(),
119        dwarf_package,
120    )?;
121
122    if tunables.debug_guest {
123        object.append_wasm_bytecode(std::iter::once(wasm));
124    }
125
126    let (info, index) = compilation_artifacts.unwrap_as_module_info();
127    let types = types.finish();
128    object.serialize_info(&(&info, &index, &types));
129    let result = T::finish_object(object, obj_state)?;
130
131    Ok((result, Some((info, index, types))))
132}
133
134/// Performs the compilation phase for a component, translating and
135/// validating the provided wasm binary to machine code.
136///
137/// This method will compile all nested core wasm binaries in addition to
138/// any necessary extra functions required for operation with components.
139/// The output artifact here is the serialized object file contained within
140/// an owned mmap along with metadata about the compilation itself.
141#[cfg(feature = "component-model")]
142pub(crate) fn build_component_artifacts<T: FinishedObject>(
143    engine: &Engine,
144    binary: &[u8],
145    _dwarf_package: Option<&[u8]>,
146    unsafe_intrinsics_import: Option<&str>,
147    obj_state: &T::State,
148) -> Result<(T, Option<wasmtime_environ::component::ComponentArtifacts>)> {
149    use wasmtime_environ::ScopeVec;
150    use wasmtime_environ::component::{
151        CompiledComponentInfo, ComponentArtifacts, ComponentTypesBuilder,
152    };
153
154    let compiler = engine.try_compiler()?;
155    let tunables = engine.tunables();
156
157    let scope = ScopeVec::new();
158    let mut validator = wasmparser::Validator::new_with_features(engine.features());
159    let mut types = ComponentTypesBuilder::new(&validator);
160    let mut translator = Translator::new(tunables, &mut validator, &mut types, &scope);
161    if let Some(name) = unsafe_intrinsics_import {
162        translator.expose_unsafe_intrinsics(name);
163    }
164    let (component, mut module_translations) = translator
165        .translate(binary)
166        .context("failed to parse WebAssembly module")?;
167
168    let compile_inputs = CompileInputs::for_component(
169        engine,
170        &types,
171        &component,
172        module_translations.iter_mut().map(|(i, translation)| {
173            let functions = mem::take(&mut translation.function_body_inputs);
174            (i, &*translation, functions)
175        }),
176    );
177    let unlinked_compile_outputs = compile_inputs.compile(&engine)?;
178
179    let PreLinkOutput {
180        needs_gc_heap,
181        compiled_funcs,
182        indices,
183    } = unlinked_compile_outputs.pre_link();
184    for (_, t) in &mut module_translations {
185        t.module.needs_gc_heap |= needs_gc_heap
186    }
187
188    // Collect bytecode slices here before moving `module_translations` below.
189    let module_wasms = if tunables.debug_guest {
190        module_translations
191            .values()
192            .map(|t| t.wasm)
193            .collect::<Vec<_>>()
194    } else {
195        vec![]
196    };
197
198    let mut object = compiler.object(ObjectKind::Component)?;
199    engine.append_compiler_info(&mut object)?;
200    engine.append_bti(&mut object);
201
202    let (mut object, compilation_artifacts) = indices.link_and_append_code(
203        object,
204        engine,
205        compiled_funcs,
206        module_translations,
207        None, // TODO: Support dwarf packages for components.
208    )?;
209
210    if tunables.debug_guest {
211        object.append_wasm_bytecode(module_wasms);
212    }
213
214    let (types, ty) = types.finish(&component.component);
215
216    let info = CompiledComponentInfo {
217        component: component.component,
218    };
219    let artifacts = ComponentArtifacts {
220        info,
221        table: compilation_artifacts.table,
222        ty,
223        types,
224        static_modules: compilation_artifacts.modules,
225        checksum: WasmChecksum::from_binary(binary, tunables.recording),
226    };
227    object.serialize_info(&artifacts);
228
229    let result = T::finish_object(object, obj_state)?;
230    Ok((result, Some(artifacts)))
231}
232
233type CompileInput<'a> = Box<dyn FnOnce(&dyn Compiler) -> Result<CompileOutput<'a>> + Send + 'a>;
234
235struct CompileOutput<'a> {
236    key: FuncKey,
237    symbol: String,
238    function: CompiledFunctionBody,
239    start_srcloc: FilePos,
240
241    // Only present when `self.key` is a `FuncKey::DefinedWasmFunction(..)`.
242    translation: Option<&'a ModuleTranslation<'a>>,
243
244    // Only present when `self.key` is a `FuncKey::DefinedWasmFunction(..)`.
245    func_body: Option<wasmparser::FunctionBody<'a>>,
246}
247
248/// Inputs to our inlining heuristics.
249struct InlineHeuristicParams<'a> {
250    tunables: &'a Tunables,
251    caller_size: u32,
252    caller_key: FuncKey,
253    caller_needs_gc_heap: bool,
254    callee_size: u32,
255    callee_key: FuncKey,
256    callee_needs_gc_heap: bool,
257}
258
259/// The collection of things we need to compile for a Wasm module or component.
260#[derive(Default)]
261struct CompileInputs<'a> {
262    inputs: Vec<CompileInput<'a>>,
263}
264
265impl<'a> CompileInputs<'a> {
266    fn push_input(
267        &mut self,
268        f: impl FnOnce(&dyn Compiler) -> Result<CompileOutput<'a>> + Send + 'a,
269    ) {
270        self.inputs.push(Box::new(f));
271    }
272
273    /// Create the `CompileInputs` for a core Wasm module.
274    fn for_module(
275        types: &'a ModuleTypesBuilder,
276        translation: &'a ModuleTranslation<'a>,
277        functions: PrimaryMap<DefinedFuncIndex, FunctionBodyData<'a>>,
278    ) -> Self {
279        let mut ret = CompileInputs { inputs: vec![] };
280
281        let module_index = StaticModuleIndex::from_u32(0);
282        ret.collect_inputs_in_translations(types, [(module_index, translation, functions)]);
283
284        ret
285    }
286
287    /// Create a `CompileInputs` for a component.
288    #[cfg(feature = "component-model")]
289    fn for_component(
290        engine: &'a Engine,
291        types: &'a wasmtime_environ::component::ComponentTypesBuilder,
292        component: &'a wasmtime_environ::component::ComponentTranslation,
293        module_translations: impl IntoIterator<
294            Item = (
295                StaticModuleIndex,
296                &'a ModuleTranslation<'a>,
297                PrimaryMap<DefinedFuncIndex, FunctionBodyData<'a>>,
298            ),
299        >,
300    ) -> Self {
301        use wasmtime_environ::Abi;
302        use wasmtime_environ::component::UnsafeIntrinsic;
303
304        let mut ret = CompileInputs { inputs: vec![] };
305
306        ret.collect_inputs_in_translations(types.module_types_builder(), module_translations);
307        let tunables = engine.tunables();
308
309        for i in component
310            .component
311            .unsafe_intrinsics
312            .iter()
313            .enumerate()
314            .filter_map(|(i, ty)| if ty.is_some() { Some(i) } else { None })
315        {
316            let i = u32::try_from(i).unwrap();
317            let intrinsic = UnsafeIntrinsic::from_u32(i);
318            for abi in [Abi::Wasm, Abi::Array] {
319                ret.push_input(move |compiler| {
320                    let symbol = format!(
321                        "unsafe-intrinsics-{}-{}",
322                        match abi {
323                            Abi::Wasm => "wasm-call",
324                            Abi::Array => "array-call",
325                            Abi::Patchable => "patchable-call",
326                        },
327                        intrinsic.name(),
328                    );
329                    Ok(CompileOutput {
330                        key: FuncKey::UnsafeIntrinsic(abi, intrinsic),
331                        function: compiler
332                            .component_compiler()
333                            .compile_intrinsic(tunables, component, types, intrinsic, abi, &symbol)
334                            .with_context(|| format!("failed to compile `{symbol}`"))?,
335                        symbol,
336                        start_srcloc: FilePos::default(),
337                        translation: None,
338                        func_body: None,
339                    })
340                });
341            }
342        }
343
344        for (idx, trampoline) in component.trampolines.iter() {
345            for abi in [Abi::Wasm, Abi::Array] {
346                ret.push_input(move |compiler| {
347                    let key = FuncKey::ComponentTrampoline(abi, idx);
348                    let symbol = format!(
349                        "component-trampolines[{}]-{}-{}",
350                        idx.as_u32(),
351                        match abi {
352                            Abi::Wasm => "wasm-call",
353                            Abi::Array => "array-call",
354                            Abi::Patchable => "patchable-call",
355                        },
356                        trampoline.symbol_name(),
357                    );
358                    let function = compiler
359                        .component_compiler()
360                        .compile_trampoline(component, types, key, abi, tunables, &symbol)
361                        .with_context(|| format!("failed to compile {symbol}"))?;
362                    Ok(CompileOutput {
363                        key,
364                        function,
365                        symbol,
366                        start_srcloc: FilePos::default(),
367                        translation: None,
368                        func_body: None,
369                    })
370                });
371            }
372        }
373
374        // If there are any resources defined within this component, the
375        // signature for `resource.drop` is mentioned somewhere, and the
376        // wasm-to-native trampoline for `resource.drop` hasn't been created yet
377        // then insert that here. This is possibly required by destruction of
378        // resources from the embedder and otherwise won't be explicitly
379        // requested through initializers above or such.
380        if component.component.num_resources > 0 {
381            if let Some(sig) = types.find_resource_drop_signature() {
382                ret.push_input(move |compiler| {
383                    let key = FuncKey::ResourceDropTrampoline;
384                    let symbol = "resource_drop_trampoline".to_string();
385                    let function = compiler
386                        .compile_wasm_to_array_trampoline(types[sig].unwrap_func(), key, &symbol)
387                        .with_context(|| format!("failed to compile `{symbol}`"))?;
388                    Ok(CompileOutput {
389                        key,
390                        function,
391                        symbol,
392                        start_srcloc: FilePos::default(),
393                        translation: None,
394                        func_body: None,
395                    })
396                });
397            }
398        }
399
400        ret
401    }
402
403    fn clean_symbol(name: &str) -> Cow<'_, str> {
404        /// Maximum length of symbols generated in objects.
405        const MAX_SYMBOL_LEN: usize = 96;
406
407        // Just to be on the safe side, filter out characters that could
408        // pose issues to tools such as "perf" or "objdump".  To avoid
409        // having to update a list of allowed characters for each different
410        // language that compiles to Wasm, allows only graphic ASCII
411        // characters; replace runs of everything else with a "?".
412        let bad_char = |c: char| !c.is_ascii_graphic();
413        if name.chars().any(bad_char) {
414            let mut last_char_seen = '\u{0000}';
415            Cow::Owned(
416                name.chars()
417                    .map(|c| if bad_char(c) { '?' } else { c })
418                    .filter(|c| {
419                        let skip = last_char_seen == '?' && *c == '?';
420                        last_char_seen = *c;
421                        !skip
422                    })
423                    .take(MAX_SYMBOL_LEN)
424                    .collect::<String>(),
425            )
426        } else if name.len() <= MAX_SYMBOL_LEN {
427            Cow::Borrowed(&name[..])
428        } else {
429            Cow::Borrowed(&name[..MAX_SYMBOL_LEN])
430        }
431    }
432
433    fn collect_inputs_in_translations(
434        &mut self,
435        types: &'a ModuleTypesBuilder,
436        translations: impl IntoIterator<
437            Item = (
438                StaticModuleIndex,
439                &'a ModuleTranslation<'a>,
440                PrimaryMap<DefinedFuncIndex, FunctionBodyData<'a>>,
441            ),
442        >,
443    ) {
444        for (module, translation, functions) in translations {
445            for (def_func_index, func_body_data) in functions {
446                self.push_input(move |compiler| {
447                    let key = FuncKey::DefinedWasmFunction(module, def_func_index);
448                    let func_index = translation.module.func_index(def_func_index);
449                    let symbol = match translation
450                        .debuginfo
451                        .name_section
452                        .func_names
453                        .get(&func_index)
454                    {
455                        Some(name) => format!(
456                            "wasm[{}]::function[{}]::{}",
457                            module.as_u32(),
458                            func_index.as_u32(),
459                            Self::clean_symbol(&name)
460                        ),
461                        None => format!(
462                            "wasm[{}]::function[{}]",
463                            module.as_u32(),
464                            func_index.as_u32()
465                        ),
466                    };
467                    let func_body = func_body_data.body.clone();
468                    let data = func_body.get_binary_reader();
469                    let offset = data.original_position();
470                    let start_srcloc = FilePos::new(u32::try_from(offset).unwrap());
471                    let function = compiler
472                        .compile_function(translation, key, func_body_data, types, &symbol)
473                        .with_context(|| format!("failed to compile: {symbol}"))?;
474
475                    Ok(CompileOutput {
476                        key,
477                        symbol,
478                        function,
479                        start_srcloc,
480                        translation: Some(translation),
481                        func_body: Some(func_body),
482                    })
483                });
484
485                let func_index = translation.module.func_index(def_func_index);
486                if translation.module.functions[func_index].is_escaping() {
487                    self.push_input(move |compiler| {
488                        let key = FuncKey::ArrayToWasmTrampoline(module, def_func_index);
489                        let func_index = translation.module.func_index(def_func_index);
490                        let symbol = format!(
491                            "wasm[{}]::array_to_wasm_trampoline[{}]",
492                            module.as_u32(),
493                            func_index.as_u32()
494                        );
495                        let function = compiler
496                            .compile_array_to_wasm_trampoline(translation, types, key, &symbol)
497                            .with_context(|| format!("failed to compile: {symbol}"))?;
498                        Ok(CompileOutput {
499                            key,
500                            symbol,
501                            function,
502                            start_srcloc: FilePos::default(),
503                            translation: None,
504                            func_body: None,
505                        })
506                    });
507                }
508            }
509        }
510
511        let mut trampoline_types_seen = HashSet::new();
512        for (_func_type_index, trampoline_type_index) in types.trampoline_types() {
513            let is_new = trampoline_types_seen.insert(trampoline_type_index);
514            if !is_new {
515                continue;
516            }
517            let trampoline_func_ty = types[trampoline_type_index].unwrap_func();
518            self.push_input(move |compiler| {
519                let key = FuncKey::WasmToArrayTrampoline(trampoline_type_index);
520                let symbol = format!(
521                    "signatures[{}]::wasm_to_array_trampoline",
522                    trampoline_type_index.as_u32()
523                );
524                let function = compiler
525                    .compile_wasm_to_array_trampoline(trampoline_func_ty, key, &symbol)
526                    .with_context(|| format!("failed to compile: {symbol}"))?;
527                Ok(CompileOutput {
528                    key,
529                    function,
530                    symbol,
531                    start_srcloc: FilePos::default(),
532                    translation: None,
533                    func_body: None,
534                })
535            });
536        }
537    }
538
539    /// Compile these `CompileInput`s (maybe in parallel) and return the
540    /// resulting `UnlinkedCompileOutput`s.
541    fn compile(self, engine: &Engine) -> Result<UnlinkedCompileOutputs<'a>> {
542        let compiler = engine.try_compiler()?;
543
544        if self.inputs.len() > 0 && cfg!(miri) {
545            bail!(
546                "\
547You are attempting to compile a WebAssembly module or component that contains
548functions in Miri. Running Cranelift through Miri is known to take quite a long
549time and isn't what we want in CI at least. If this is a mistake then you should
550ignore this test in Miri with:
551
552    #[cfg_attr(miri, ignore)]
553
554If this is not a mistake then try to edit the `pulley_provenance_test` test
555which runs Cranelift outside of Miri. If you still feel this is a mistake then
556please open an issue or a topic on Zulip to talk about how best to accommodate
557the use case.
558"
559            );
560        }
561
562        let mut raw_outputs = if let Some(inlining_compiler) = compiler.inlining_compiler() {
563            if engine.tunables().inlining {
564                self.compile_with_inlining(engine, compiler, inlining_compiler)?
565            } else {
566                // Inlining compiler but inlining is disabled: compile each
567                // input and immediately finish its output in parallel, skipping
568                // call graph computation and all that.
569                engine.run_maybe_parallel::<_, _, Error, _>(self.inputs, |f| {
570                    let mut compiled = f(compiler)?;
571                    inlining_compiler.finish_compiling(
572                        &mut compiled.function,
573                        compiled.func_body.take(),
574                        &compiled.symbol,
575                    )?;
576                    Ok(compiled)
577                })?
578            }
579        } else {
580            // No inlining: just compile each individual input in parallel.
581            engine.run_maybe_parallel(self.inputs, |f| f(compiler))?
582        };
583
584        if cfg!(debug_assertions) {
585            let mut symbols: Vec<_> = raw_outputs.iter().map(|i| &i.symbol).collect();
586            symbols.sort();
587            for w in symbols.windows(2) {
588                assert_ne!(
589                    w[0], w[1],
590                    "should never have duplicate symbols, but found two functions with the symbol `{}`",
591                    w[0]
592                );
593            }
594        }
595
596        // Now that all functions have been compiled see if any
597        // wasmtime-builtin functions are necessary. If so those need to be
598        // collected and then those trampolines additionally need to be
599        // compiled.
600        compile_required_builtins(engine, &mut raw_outputs)?;
601
602        // Bucket the outputs by kind.
603        let mut outputs: BTreeMap<FuncKey, CompileOutput> = BTreeMap::new();
604        for output in raw_outputs {
605            outputs.insert(output.key, output);
606        }
607
608        Ok(UnlinkedCompileOutputs { outputs })
609    }
610
611    fn compile_with_inlining(
612        self,
613        engine: &Engine,
614        compiler: &dyn Compiler,
615        inlining_compiler: &dyn InliningCompiler,
616    ) -> Result<Vec<CompileOutput<'a>>, Error> {
617        // Our list of unlinked outputs.
618        let mut outputs = PrimaryMap::<OutputIndex, Option<CompileOutput<'_>>>::from(
619            engine.run_maybe_parallel(self.inputs, |f| f(compiler).map(Some))?,
620        );
621
622        // A map from a `FuncKey` to its index in our unlinked outputs.
623        //
624        // We will generally just be working with `OutputIndex`es, but
625        // occasionally we must translate from keys back to our index space, for
626        // example when we know that one module's function import is always
627        // satisfied with a particular `FuncKey::DefinedWasmFunction`. This map
628        // enables that translation.
629        let key_to_output: HashMap<FuncKey, OutputIndex> = inlining_functions(&outputs)
630            .map(|output_index| {
631                let output = outputs[output_index].as_ref().unwrap();
632                (output.key, output_index)
633            })
634            .collect();
635
636        // Construct the call graph for inlining.
637        //
638        // We only inline Wasm functions, not trampolines, because we rely on
639        // trampolines being in their own stack frame when we save the entry and
640        // exit SP, FP, and PC for backtraces in trampolines.
641        let call_graph = EntityGraph::<OutputIndex>::new(inlining_functions(&outputs), {
642            let mut func_keys = IndexSet::default();
643            let outputs = &outputs;
644            let key_to_output = &key_to_output;
645            move |output_index, calls| {
646                let output = outputs[output_index].as_ref().unwrap();
647                debug_assert!(is_inlining_function(output.key));
648
649                // Get this function's call graph edges as `FuncKey`s.
650                func_keys.clear();
651                inlining_compiler.calls(&output.function, &mut func_keys)?;
652
653                // Translate each of those to keys to output indices, which is
654                // what we actually need.
655                debug_assert!(calls.is_empty());
656                calls.extend(
657                    func_keys
658                        .iter()
659                        .copied()
660                        .filter_map(|key| key_to_output.get(&key).copied()),
661                );
662
663                log::trace!(
664                    "call graph edges for {output_index:?} = {:?}: {calls:?}",
665                    output.key
666                );
667
668                crate::error::Ok(())
669            }
670        })?;
671
672        // Stratify the call graph into a sequence of layers. We process each
673        // layer in order, but process functions within a layer in parallel
674        // (because they either do not call each other or are part of a
675        // mutual-recursion cycle; either way we won't inline members of the
676        // same layer into each other).
677        let strata = stratify::Strata::<OutputIndex>::new(&call_graph.filter_nodes(|f| {
678            let key = outputs[*f].as_ref().unwrap().key;
679            is_inlining_function(key)
680        }));
681        let mut layer_outputs = vec![];
682        for layer in strata.layers() {
683            // Temporarily take this layer's outputs out of our unlinked outputs
684            // list so that we can mutate these outputs (by inlining callee
685            // functions into them) while also accessing shared borrows of the
686            // unlinked outputs list (finding the callee functions we will
687            // inline).
688            debug_assert!(layer_outputs.is_empty());
689            layer_outputs.extend(layer.iter().map(|f| outputs[*f].take().unwrap()));
690
691            // Process this layer's members in parallel.
692            engine.run_maybe_parallel_mut(
693                &mut layer_outputs,
694                |output: &mut CompileOutput<'_>| {
695                    log::trace!("processing inlining for {:?}", output.key);
696                    debug_assert!(is_inlining_function(output.key));
697
698                    let caller_key = output.key;
699                    let caller_needs_gc_heap =
700                        output.translation.is_some_and(|t| t.module.needs_gc_heap);
701                    let caller = &mut output.function;
702
703                    let mut caller_size = inlining_compiler.size(caller);
704
705                    inlining_compiler.inline(caller, &mut |callee_key: FuncKey| {
706                        log::trace!("  --> considering call to {callee_key:?}");
707                        let callee_output_index: OutputIndex = key_to_output[&callee_key];
708
709                        // NB: If the callee is not inside `outputs`, then it is
710                        // in the same `Strata` layer as the caller (and
711                        // therefore is in the same strongly-connected component
712                        // as the caller, and they mutually recursive). In this
713                        // case, we do not do any inlining; communicate this
714                        // command via `?`-propagation.
715                        let callee_output = outputs[callee_output_index].as_ref()?;
716
717                        debug_assert_eq!(callee_output.key, callee_key);
718
719                        let callee = &callee_output.function;
720                        let callee_size = inlining_compiler.size(callee);
721
722                        let callee_needs_gc_heap = callee_output
723                            .translation
724                            .is_some_and(|t| t.module.needs_gc_heap);
725
726                        if Self::should_inline(InlineHeuristicParams {
727                            tunables: engine.tunables(),
728                            caller_size,
729                            caller_key,
730                            caller_needs_gc_heap,
731                            callee_size,
732                            callee_key,
733                            callee_needs_gc_heap,
734                        }) {
735                            caller_size = caller_size.saturating_add(callee_size);
736                            Some(callee)
737                        } else {
738                            None
739                        }
740                    })
741                },
742            )?;
743
744            for (f, func) in layer.iter().zip(layer_outputs.drain(..)) {
745                debug_assert!(outputs[*f].is_none());
746                outputs[*f] = Some(func);
747            }
748        }
749
750        // Fan out in parallel again and finish compiling each function.
751        engine.run_maybe_parallel(outputs.into(), |output| {
752            let mut output = output.unwrap();
753            inlining_compiler.finish_compiling(
754                &mut output.function,
755                output.func_body.take(),
756                &output.symbol,
757            )?;
758            Ok(output)
759        })
760    }
761
762    /// Implementation of our inlining heuristics.
763    ///
764    /// TODO: We should improve our heuristics:
765    ///
766    /// * One potentially promising hint that we don't currently make use of is
767    ///   how many times a function appears as the callee in call sites. For
768    ///   example, a function that appears in only a single call site, and does
769    ///   not otherwise escape, is often beneficial to inline regardless of its
770    ///   size (assuming we can then GC away the non-inlined version of the
771    ///   function, which we do not currently attempt to do).
772    ///
773    /// * Another potentially promising hint would be whether any of the call
774    ///   site's actual arguments are constants.
775    ///
776    /// * A general improvement would be removing the decision-tree style of
777    ///   control flow below and replacing it with (1) a pure estimated-benefit
778    ///   formula and (2) a benefit threshold. Whenever the estimated benefit
779    ///   reaches the threshold, we would inline the call. Both the formula and
780    ///   the threshold would be parameterized by tunables. This would
781    ///   effectively allow reprioritizing the relative importance of different
782    ///   hint sources, rather than being stuck with the sequence hard-coded in
783    ///   the decision tree below.
784    fn should_inline(
785        InlineHeuristicParams {
786            tunables,
787            caller_size,
788            caller_key,
789            caller_needs_gc_heap,
790            callee_size,
791            callee_key,
792            callee_needs_gc_heap,
793        }: InlineHeuristicParams,
794    ) -> bool {
795        log::trace!(
796            "considering inlining:\n\
797             \tcaller = {caller_key:?}\n\
798             \t\tsize = {caller_size}\n\
799             \t\tneeds_gc_heap = {caller_needs_gc_heap}\n\
800             \tcallee = {callee_key:?}\n\
801             \t\tsize = {callee_size}\n\
802             \t\tneeds_gc_heap = {callee_needs_gc_heap}"
803        );
804
805        debug_assert!(
806            tunables.inlining,
807            "shouldn't even call this method if we aren't configured for inlining"
808        );
809        debug_assert_ne!(caller_key, callee_key, "we never inline recursion");
810
811        // Put a limit on how large we can make a function via inlining to cap
812        // code bloat.
813        let sum_size = caller_size.saturating_add(callee_size);
814        if sum_size > tunables.inlining_sum_size_threshold {
815            log::trace!(
816                "  --> not inlining: the sum of the caller's and callee's sizes is greater than \
817                 the inlining-sum-size threshold: {callee_size} + {caller_size} > {}",
818                tunables.inlining_sum_size_threshold
819            );
820            return false;
821        }
822
823        // Skip inlining into array-abi functions which are entry
824        // trampolines into wasm. ABI-wise it's required that these have a
825        // single `try_call` into the module and it doesn't work if multiple
826        // get inlined or if the `try_call` goes away. Prevent all inlining
827        // to guarantee the structure of entry trampolines.
828        if caller_key.abi() == Abi::Array {
829            log::trace!("  --> not inlining: not inlining into array-abi caller");
830            return false;
831        }
832
833        // Consider whether this is an intra-module call.
834        //
835        // Inlining within a single core module has most often already been done
836        // by the toolchain that produced the module, e.g. LLVM, and any extant
837        // function calls to small callees were presumably annotated with the
838        // equivalent of `#[inline(never)]` or `#[cold]` but we don't have that
839        // information anymore.
840        match (caller_key, callee_key) {
841            (
842                FuncKey::DefinedWasmFunction(caller_module, _),
843                FuncKey::DefinedWasmFunction(callee_module, _),
844            ) => {
845                if caller_module == callee_module {
846                    match tunables.inlining_intra_module {
847                        IntraModuleInlining::Yes => {}
848
849                        IntraModuleInlining::WhenUsingGc
850                            if caller_needs_gc_heap || callee_needs_gc_heap => {}
851
852                        IntraModuleInlining::WhenUsingGc => {
853                            log::trace!(
854                                "  --> not inlining: intra-module call that does not use GC"
855                            );
856                            return false;
857                        }
858
859                        IntraModuleInlining::No => {
860                            log::trace!("  --> not inlining: intra-module call");
861                            return false;
862                        }
863                    }
864                }
865            }
866
867            _ => {}
868        }
869
870        // Small callees are often worth inlining regardless of the size of the
871        // caller.
872        if callee_size <= tunables.inlining_small_callee_size {
873            log::trace!(
874                "  --> inlining: callee's size is less than the small-callee size: \
875                 {callee_size} <= {}",
876                tunables.inlining_small_callee_size
877            );
878            return true;
879        }
880
881        log::trace!("  --> inlining: did not find a reason we should not");
882        true
883    }
884}
885
886/// The index of a function (of any kind: Wasm function, trampoline, or
887/// etc...) in our list of unlinked outputs.
888#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, PartialOrd, Ord, Hash)]
889struct OutputIndex(u32);
890wasmtime_environ::entity_impl!(OutputIndex);
891
892/// Whether a function (as described by the given `FuncKey`) can
893/// participate in inlining or not (either as a candidate for being
894/// inlined into a caller or having a callee inlined into a callsite
895/// within itself).
896fn is_inlining_function(key: FuncKey) -> bool {
897    match key {
898        // Wasm functions can both be inlined into other functions and
899        // have other functions inlined into them.
900        FuncKey::DefinedWasmFunction(..) => true,
901
902        // Intrinsics can be inlined into other functions.
903        #[cfg(feature = "component-model")]
904        FuncKey::UnsafeIntrinsic(..) => true,
905
906        // Trampolines cannot participate in inlining since our
907        // unwinding and exceptions infrastructure relies on them being
908        // in their own call frames.
909        FuncKey::ArrayToWasmTrampoline(..)
910        | FuncKey::WasmToArrayTrampoline(..)
911        | FuncKey::WasmToBuiltinTrampoline(..)
912        | FuncKey::PatchableToBuiltinTrampoline(..) => false,
913        #[cfg(feature = "component-model")]
914        FuncKey::ComponentTrampoline(..) | FuncKey::ResourceDropTrampoline => false,
915
916        FuncKey::PulleyHostCall(_) => {
917            unreachable!("we don't compile artifacts for Pulley host calls")
918        }
919    }
920}
921
922/// Get just the output indices of the functions that can participate in
923/// inlining from our unlinked outputs.
924fn inlining_functions<'a>(
925    outputs: &'a PrimaryMap<OutputIndex, Option<CompileOutput<'_>>>,
926) -> impl Iterator<Item = OutputIndex> + 'a {
927    outputs.iter().filter_map(|(index, output)| {
928        if is_inlining_function(output.as_ref().unwrap().key) {
929            Some(index)
930        } else {
931            None
932        }
933    })
934}
935
936fn compile_required_builtins(engine: &Engine, raw_outputs: &mut Vec<CompileOutput>) -> Result<()> {
937    let compiler = engine.try_compiler()?;
938    let mut builtins = HashSet::new();
939    let mut new_inputs: Vec<CompileInput<'_>> = Vec::new();
940
941    let compile_builtin = |key: FuncKey| {
942        Box::new(move |compiler: &dyn Compiler| {
943            let symbol = match key {
944                FuncKey::WasmToBuiltinTrampoline(builtin) => {
945                    format!("wasmtime_builtin_{}", builtin.name())
946                }
947                FuncKey::PatchableToBuiltinTrampoline(builtin) => {
948                    format!("wasmtime_patchable_builtin_{}", builtin.name())
949                }
950                _ => unreachable!(),
951            };
952            let mut function = compiler
953                .compile_wasm_to_builtin(key, &symbol)
954                .with_context(|| format!("failed to compile `{symbol}`"))?;
955            if let Some(compiler) = compiler.inlining_compiler() {
956                compiler.finish_compiling(&mut function, None, &symbol)?;
957            }
958            Ok(CompileOutput {
959                key,
960                function,
961                symbol,
962                start_srcloc: FilePos::default(),
963                translation: None,
964                func_body: None,
965            })
966        })
967    };
968
969    for output in raw_outputs.iter() {
970        for reloc in compiler.compiled_function_relocation_targets(&*output.function.code) {
971            match reloc {
972                FuncKey::WasmToBuiltinTrampoline(builtin)
973                | FuncKey::PatchableToBuiltinTrampoline(builtin) => {
974                    if builtins.insert(builtin) {
975                        new_inputs.push(compile_builtin(reloc));
976                    }
977                }
978                _ => {}
979            }
980        }
981    }
982    raw_outputs.extend(engine.run_maybe_parallel(new_inputs, |c| c(compiler))?);
983    Ok(())
984}
985
986#[derive(Default)]
987struct UnlinkedCompileOutputs<'a> {
988    // A map from kind to `CompileOutput`.
989    outputs: BTreeMap<FuncKey, CompileOutput<'a>>,
990}
991
992impl UnlinkedCompileOutputs<'_> {
993    /// Flatten all our functions into a single list and remember each of their
994    /// indices within it.
995    fn pre_link(self) -> PreLinkOutput {
996        // We must ensure that `compiled_funcs` contains the function bodies
997        // sorted by their `FuncKey`, as `CompiledFunctionsTable` relies on that
998        // property.
999        //
1000        // Furthermore, note that, because the order functions end up in
1001        // `compiled_funcs` is the order they will ultimately be laid out inside
1002        // the object file, we will group all trampolines together, all defined
1003        // Wasm functions from the same module together, and etc... This is a
1004        // nice property, because it means that (a) cold functions, like builtin
1005        // trampolines, are not interspersed between hot Wasm functions, and (b)
1006        // Wasm functions that are likely to call each other (i.e. are in the
1007        // same module together) are grouped together.
1008        let mut compiled_funcs = vec![];
1009
1010        let mut indices = FunctionIndices::default();
1011        let mut needs_gc_heap = false;
1012
1013        // NB: Iteration over this `BTreeMap` ensures that we uphold
1014        // `compiled_func`'s sorted property.
1015        for output in self.outputs.into_values() {
1016            needs_gc_heap |= output.function.needs_gc_heap;
1017
1018            let index = compiled_funcs.len();
1019            compiled_funcs.push((output.symbol, output.key, output.function.code));
1020
1021            if output.start_srcloc != FilePos::none() {
1022                indices
1023                    .start_srclocs
1024                    .insert(output.key, output.start_srcloc);
1025            }
1026
1027            indices.indices.insert(output.key, index);
1028        }
1029
1030        PreLinkOutput {
1031            needs_gc_heap,
1032            compiled_funcs,
1033            indices,
1034        }
1035    }
1036}
1037
1038/// Our pre-link functions that have been flattened into a single list.
1039struct PreLinkOutput {
1040    /// Whether or not any of these functions require a GC heap
1041    needs_gc_heap: bool,
1042    /// The flattened list of (symbol name, FuncKey, compiled
1043    /// function) triples, as they will be laid out in the object
1044    /// file.
1045    compiled_funcs: Vec<(String, FuncKey, Box<dyn Any + Send + Sync>)>,
1046    /// The `FunctionIndices` mapping our function keys to indices in that flat
1047    /// list.
1048    indices: FunctionIndices,
1049}
1050
1051#[derive(Default)]
1052struct FunctionIndices {
1053    // A map of wasm functions and where they're located in the original file.
1054    start_srclocs: HashMap<FuncKey, FilePos>,
1055
1056    // The index of each compiled function in `compiled_funcs`.
1057    indices: BTreeMap<FuncKey, usize>,
1058}
1059
1060impl FunctionIndices {
1061    /// Link the compiled functions together, resolving relocations, and append
1062    /// them to the given ELF file.
1063    fn link_and_append_code<'a>(
1064        self,
1065        mut obj: object::write::Object<'static>,
1066        engine: &'a Engine,
1067        compiled_funcs: Vec<(String, FuncKey, Box<dyn Any + Send + Sync>)>,
1068        translations: PrimaryMap<StaticModuleIndex, ModuleTranslation<'_>>,
1069        dwarf_package_bytes: Option<&[u8]>,
1070    ) -> Result<(wasmtime_environ::ObjectBuilder<'a>, Artifacts)> {
1071        // Append all the functions to the ELF file.
1072        //
1073        // The result is a vector parallel to `compiled_funcs` where
1074        // `symbol_ids_and_locs[i]` is the symbol ID and function location of
1075        // `compiled_funcs[i]`.
1076        let compiler = engine.try_compiler()?;
1077        let tunables = engine.tunables();
1078        let symbol_ids_and_locs = compiler.append_code(
1079            &mut obj,
1080            &compiled_funcs,
1081            &|_caller_index: usize, callee: FuncKey| {
1082                self.indices.get(&callee).copied().unwrap_or_else(|| {
1083                    panic!("cannot resolve relocation! no index for callee {callee:?}")
1084                })
1085            },
1086        )?;
1087
1088        // If requested, generate and add DWARF information.
1089        if tunables.debug_native {
1090            compiler.append_dwarf(
1091                &mut obj,
1092                &translations,
1093                &|module, func| {
1094                    let i = self.indices[&FuncKey::DefinedWasmFunction(module, func)];
1095                    let (symbol, _) = symbol_ids_and_locs[i];
1096                    let (_, _, compiled_func) = &compiled_funcs[i];
1097                    (symbol, &**compiled_func)
1098                },
1099                dwarf_package_bytes,
1100                tunables,
1101            )?;
1102        }
1103
1104        let mut table_builder = CompiledFunctionsTableBuilder::new();
1105        for (key, compiled_func_index) in &self.indices {
1106            let (_, func_loc) = symbol_ids_and_locs[*compiled_func_index];
1107            let src_loc = self
1108                .start_srclocs
1109                .get(key)
1110                .copied()
1111                .unwrap_or_else(FilePos::none);
1112            table_builder.push_func(*key, func_loc, src_loc);
1113        }
1114
1115        let mut obj = wasmtime_environ::ObjectBuilder::new(obj, tunables);
1116        let modules = translations
1117            .into_iter()
1118            .map(|(_, mut translation)| {
1119                // If configured attempt to use static memory initialization
1120                // which can either at runtime be implemented as a single memcpy
1121                // to initialize memory or otherwise enabling
1122                // virtual-memory-tricks such as mmap'ing from a file to get
1123                // copy-on-write.
1124                if engine.tunables().memory_init_cow {
1125                    let align = compiler.page_size_align();
1126                    let max_always_allowed = engine.config().memory_guaranteed_dense_image_size;
1127                    translation.try_static_init(align, max_always_allowed);
1128                }
1129
1130                // Attempt to convert table initializer segments to FuncTable
1131                // representation where possible, to enable table lazy init.
1132                if engine.tunables().table_lazy_init {
1133                    translation.try_func_table_init();
1134                }
1135
1136                obj.append(translation)
1137            })
1138            .collect::<Result<PrimaryMap<_, _>>>()?;
1139
1140        let artifacts = Artifacts {
1141            modules,
1142            table: table_builder.finish(),
1143        };
1144
1145        Ok((obj, artifacts))
1146    }
1147}
1148
1149/// The artifacts necessary for finding and calling Wasm functions at runtime,
1150/// to be serialized into an ELF file.
1151struct Artifacts {
1152    modules: PrimaryMap<StaticModuleIndex, CompiledModuleInfo>,
1153    table: CompiledFunctionsTable,
1154}
1155
1156impl Artifacts {
1157    /// Assuming this compilation was for a single core Wasm module, get the
1158    /// resulting `CompiledModuleInfo`.
1159    fn unwrap_as_module_info(self) -> (CompiledModuleInfo, CompiledFunctionsTable) {
1160        assert_eq!(self.modules.len(), 1);
1161        let info = self.modules.into_iter().next().unwrap().1;
1162        let table = self.table;
1163        (info, table)
1164    }
1165}
1166
1167/// Extend `dest` with `items` and return the range of indices in `dest` where
1168/// they ended up.
1169fn extend_with_range<T>(dest: &mut Vec<T>, items: impl IntoIterator<Item = T>) -> Range<u32> {
1170    let start = dest.len();
1171    let start = u32::try_from(start).unwrap();
1172
1173    dest.extend(items);
1174
1175    let end = dest.len();
1176    let end = u32::try_from(end).unwrap();
1177
1178    start..end
1179}