cranelift_codegen/
inline.rs

1//! Function inlining infrastructure.
2//!
3//! This module provides "inlining as a library" to Cranelift users; it does
4//! _not_ provide a complete, off-the-shelf inlining solution. Cranelift's
5//! compilation context is per-function and does not encompass the full call
6//! graph. It does not know which functions are hot and which are cold, which
7//! have been marked the equivalent of `#[inline(never)]`, etc... Only the
8//! Cranelift user can understand these aspects of the full compilation
9//! pipeline, and these things can be very different between (say) Wasmtime and
10//! `cg_clif`. Therefore, this module does not attempt to define hueristics for
11//! when inlining a particular call is likely beneficial. This module only
12//! provides hooks for the Cranelift user to define whether a given call should
13//! be inlined or not, and the mechanics to inline a callee into a particular
14//! call site when directed to do so by the Cranelift user.
15//!
16//! The top-level inlining entry point during Cranelift compilation is
17//! [`Context::inline`][crate::Context::inline]. It takes an [`Inline`] trait
18//! implementation, which is authored by the Cranelift user and directs
19//! Cranelift whether to inline a particular call, and, when inlining, gives
20//! Cranelift the body of the callee that is to be inlined.
21
22use crate::cursor::{Cursor as _, FuncCursor};
23use crate::ir::{self, DebugTag, ExceptionTableData, ExceptionTableItem, InstBuilder as _};
24use crate::result::CodegenResult;
25use crate::trace;
26use crate::traversals::Dfs;
27use alloc::borrow::Cow;
28use alloc::vec::Vec;
29use cranelift_entity::{SecondaryMap, packed_option::PackedOption};
30use smallvec::SmallVec;
31
32type SmallValueVec = SmallVec<[ir::Value; 8]>;
33type SmallBlockArgVec = SmallVec<[ir::BlockArg; 8]>;
34type SmallBlockCallVec = SmallVec<[ir::BlockCall; 8]>;
35
36/// A command directing Cranelift whether or not to inline a particular call.
37pub enum InlineCommand<'a> {
38    /// Keep the call as-is, out-of-line, and do not inline the callee.
39    KeepCall,
40
41    /// Inline the call, using this function as the body of the callee.
42    ///
43    /// It is the `Inline` implementor's responsibility to ensure that this
44    /// function is the correct callee. Providing the wrong function may result
45    /// in panics during compilation or incorrect runtime behavior.
46    Inline {
47        /// The callee function's body.
48        callee: Cow<'a, ir::Function>,
49        /// Whether to visit any function calls within the callee body after
50        /// inlining and consider them for further inlining.
51        visit_callee: bool,
52    },
53}
54
55/// A trait for directing Cranelift whether to inline a particular call or not.
56///
57/// Used in combination with the [`Context::inline`][crate::Context::inline]
58/// method.
59pub trait Inline {
60    /// A hook invoked for each direct call instruction in a function, whose
61    /// result determines whether Cranelift should inline a given call.
62    ///
63    /// The Cranelift user is responsible for defining their own hueristics and
64    /// deciding whether inlining the call is beneficial.
65    ///
66    /// When returning a function and directing Cranelift to inline its body
67    /// into the call site, the `Inline` implementer must ensure the following:
68    ///
69    /// * The returned function's signature exactly matches the `callee`
70    ///   `FuncRef`'s signature.
71    ///
72    /// * The returned function must be legalized.
73    ///
74    /// * The returned function must be valid (i.e. it must pass the CLIF
75    ///   verifier).
76    ///
77    /// * The returned function is a correct and valid implementation of the
78    ///   `callee` according to your language's semantics.
79    ///
80    /// Failure to uphold these invariants may result in panics during
81    /// compilation or incorrect runtime behavior in the generated code.
82    fn inline(
83        &mut self,
84        caller: &ir::Function,
85        call_inst: ir::Inst,
86        call_opcode: ir::Opcode,
87        callee: ir::FuncRef,
88        call_args: &[ir::Value],
89    ) -> InlineCommand<'_>;
90}
91
92impl<'a, T> Inline for &'a mut T
93where
94    T: Inline,
95{
96    fn inline(
97        &mut self,
98        caller: &ir::Function,
99        inst: ir::Inst,
100        opcode: ir::Opcode,
101        callee: ir::FuncRef,
102        args: &[ir::Value],
103    ) -> InlineCommand<'_> {
104        (*self).inline(caller, inst, opcode, callee, args)
105    }
106}
107
108/// Walk the given function, invoke the `Inline` implementation for each call
109/// instruction, and inline the callee when directed to do so.
110///
111/// Returns whether any call was inlined.
112pub(crate) fn do_inlining(
113    func: &mut ir::Function,
114    mut inliner: impl Inline,
115) -> CodegenResult<bool> {
116    trace!("function {} before inlining: {}", func.name, func);
117
118    let mut inlined_any = false;
119    let mut allocs = InliningAllocs::default();
120
121    let mut cursor = FuncCursor::new(func);
122    'block_loop: while let Some(block) = cursor.next_block() {
123        // Always keep track of our previous cursor position. Assuming that the
124        // current position is a function call that we will inline, then the
125        // previous position is just before the inlined callee function. After
126        // inlining a call, the Cranelift user can decide whether to consider
127        // any function calls in the inlined callee for further inlining or
128        // not. When they do, then we back up to this previous cursor position
129        // so that our traversal will then continue over the inlined body.
130        let mut prev_pos;
131
132        while let Some(inst) = {
133            prev_pos = cursor.position();
134            cursor.next_inst()
135        } {
136            // Make sure that `block` is always `inst`'s block, even with all of
137            // our cursor-position-updating and block-splitting-during-inlining
138            // shenanigans below.
139            debug_assert_eq!(Some(block), cursor.func.layout.inst_block(inst));
140
141            match cursor.func.dfg.insts[inst] {
142                ir::InstructionData::Call { func_ref, .. }
143                    if cursor.func.dfg.ext_funcs[func_ref].patchable =>
144                {
145                    // Can't inline patchable calls; they need to
146                    // remain patchable and inlining the whole body is
147                    // decidedly *not* patchable!
148                }
149
150                ir::InstructionData::Call {
151                    opcode: opcode @ ir::Opcode::Call | opcode @ ir::Opcode::ReturnCall,
152                    args: _,
153                    func_ref,
154                } => {
155                    trace!(
156                        "considering call site for inlining: {inst}: {}",
157                        cursor.func.dfg.display_inst(inst),
158                    );
159                    let args = cursor.func.dfg.inst_args(inst);
160                    match inliner.inline(&cursor.func, inst, opcode, func_ref, args) {
161                        InlineCommand::KeepCall => {
162                            trace!("  --> keeping call");
163                        }
164                        InlineCommand::Inline {
165                            callee,
166                            visit_callee,
167                        } => {
168                            let last_inlined_block = inline_one(
169                                &mut allocs,
170                                cursor.func,
171                                func_ref,
172                                block,
173                                inst,
174                                opcode,
175                                &callee,
176                                None,
177                            );
178                            inlined_any = true;
179                            if visit_callee {
180                                cursor.set_position(prev_pos);
181                            } else {
182                                // Arrange it so that the `next_block()` loop
183                                // will continue to the next block that is not
184                                // associated with the just-inlined callee.
185                                cursor.goto_bottom(last_inlined_block);
186                                continue 'block_loop;
187                            }
188                        }
189                    }
190                }
191                ir::InstructionData::TryCall {
192                    opcode: opcode @ ir::Opcode::TryCall,
193                    args: _,
194                    func_ref,
195                    exception,
196                } => {
197                    trace!(
198                        "considering call site for inlining: {inst}: {}",
199                        cursor.func.dfg.display_inst(inst),
200                    );
201                    let args = cursor.func.dfg.inst_args(inst);
202                    match inliner.inline(&cursor.func, inst, opcode, func_ref, args) {
203                        InlineCommand::KeepCall => {
204                            trace!("  --> keeping call");
205                        }
206                        InlineCommand::Inline {
207                            callee,
208                            visit_callee,
209                        } => {
210                            let last_inlined_block = inline_one(
211                                &mut allocs,
212                                cursor.func,
213                                func_ref,
214                                block,
215                                inst,
216                                opcode,
217                                &callee,
218                                Some(exception),
219                            );
220                            inlined_any = true;
221                            if visit_callee {
222                                cursor.set_position(prev_pos);
223                            } else {
224                                // Arrange it so that the `next_block()` loop
225                                // will continue to the next block that is not
226                                // associated with the just-inlined callee.
227                                cursor.goto_bottom(last_inlined_block);
228                                continue 'block_loop;
229                            }
230                        }
231                    }
232                }
233                ir::InstructionData::CallIndirect { .. }
234                | ir::InstructionData::TryCallIndirect { .. } => {
235                    // Can't inline indirect calls; need to have some earlier
236                    // pass rewrite them into direct calls first, when possible.
237                }
238                _ => {
239                    debug_assert!(
240                        !cursor.func.dfg.insts[inst].opcode().is_call(),
241                        "should have matched all call instructions, but found: {inst}: {}",
242                        cursor.func.dfg.display_inst(inst),
243                    );
244                }
245            }
246        }
247    }
248
249    if inlined_any {
250        trace!("function {} after inlining: {}", func.name, func);
251    } else {
252        trace!("function {} did not have any callees inlined", func.name);
253    }
254
255    Ok(inlined_any)
256}
257
258#[derive(Default)]
259struct InliningAllocs {
260    /// Map from callee value to inlined caller value.
261    values: SecondaryMap<ir::Value, PackedOption<ir::Value>>,
262
263    /// Map from callee constant to inlined caller constant.
264    ///
265    /// Not in `EntityMap` because these are hash-consed inside the
266    /// `ir::Function`.
267    constants: SecondaryMap<ir::Constant, PackedOption<ir::Constant>>,
268
269    /// Map from callee to inlined caller external name refs.
270    ///
271    /// Not in `EntityMap` because these are hash-consed inside the
272    /// `ir::Function`.
273    user_external_name_refs:
274        SecondaryMap<ir::UserExternalNameRef, PackedOption<ir::UserExternalNameRef>>,
275
276    /// The set of _caller_ inlined call instructions that need exception table
277    /// fixups at the end of inlining.
278    ///
279    /// This includes all kinds of non-returning calls, not just the literal
280    /// `call` instruction: `call_indirect`, `try_call`, `try_call_indirect`,
281    /// etc... However, it does not include `return_call` and
282    /// `return_call_indirect` instructions because the caller cannot catch
283    /// exceptions that those calls throw because the caller is no longer on the
284    /// stack as soon as they are executed.
285    ///
286    /// Note: this is a simple `Vec`, and not an `EntitySet`, because it is very
287    /// sparse: most of the caller's instructions are not inlined call
288    /// instructions. Additionally, we require deterministic iteration order and
289    /// do not require set-membership testing, so a hash set is not a good
290    /// choice either.
291    calls_needing_exception_table_fixup: Vec<ir::Inst>,
292}
293
294impl InliningAllocs {
295    fn reset(&mut self, callee: &ir::Function) {
296        let InliningAllocs {
297            values,
298            constants,
299            user_external_name_refs,
300            calls_needing_exception_table_fixup,
301        } = self;
302
303        values.clear();
304        values.resize(callee.dfg.len_values());
305
306        constants.clear();
307        constants.resize(callee.dfg.constants.len());
308
309        user_external_name_refs.clear();
310        user_external_name_refs.resize(callee.params.user_named_funcs().len());
311
312        // Note: We do not reserve capacity for
313        // `calls_needing_exception_table_fixup` because it is a sparse set and
314        // we don't know how large it needs to be ahead of time.
315        calls_needing_exception_table_fixup.clear();
316    }
317
318    fn set_inlined_value(
319        &mut self,
320        callee: &ir::Function,
321        callee_val: ir::Value,
322        inlined_val: ir::Value,
323    ) {
324        trace!("  --> callee {callee_val:?} = inlined {inlined_val:?}");
325        debug_assert!(self.values[callee_val].is_none());
326        let resolved_callee_val = callee.dfg.resolve_aliases(callee_val);
327        debug_assert!(self.values[resolved_callee_val].is_none());
328        self.values[resolved_callee_val] = Some(inlined_val).into();
329    }
330
331    fn get_inlined_value(&self, callee: &ir::Function, callee_val: ir::Value) -> Option<ir::Value> {
332        let resolved_callee_val = callee.dfg.resolve_aliases(callee_val);
333        self.values[resolved_callee_val].expand()
334    }
335}
336
337/// Inline one particular function call.
338///
339/// Returns the last inlined block in the layout.
340fn inline_one(
341    allocs: &mut InliningAllocs,
342    func: &mut ir::Function,
343    callee_func_ref: ir::FuncRef,
344    call_block: ir::Block,
345    call_inst: ir::Inst,
346    call_opcode: ir::Opcode,
347    callee: &ir::Function,
348    call_exception_table: Option<ir::ExceptionTable>,
349) -> ir::Block {
350    trace!(
351        "Inlining call {call_inst:?}: {}\n\
352         with callee = {callee:?}",
353        func.dfg.display_inst(call_inst)
354    );
355
356    // Type check callee signature.
357    let expected_callee_sig = func.dfg.ext_funcs[callee_func_ref].signature;
358    let expected_callee_sig = &func.dfg.signatures[expected_callee_sig];
359    assert_eq!(expected_callee_sig, &callee.signature);
360
361    allocs.reset(callee);
362
363    // First, append various callee entity arenas to the end of the caller's
364    // entity arenas.
365    let entity_map = create_entities(allocs, func, callee);
366
367    // Inlined prologue: split the call instruction's block at the point of the
368    // call and replace the call with a jump.
369    let return_block = split_off_return_block(func, call_inst, call_opcode, callee);
370    let call_stack_map = replace_call_with_jump(allocs, func, call_inst, callee, &entity_map);
371
372    // Prepare for translating the actual instructions by inserting the inlined
373    // blocks into the caller's layout in the same order that they appear in the
374    // callee.
375    let mut last_inlined_block = inline_block_layout(func, call_block, callee, &entity_map);
376
377    // Get a copy of debug tags on the call instruction; these are
378    // prepended to debug tags on inlined instructions. Remove them
379    // from the call itself as it will be rewritten to a jump (which
380    // cannot have tags).
381    let call_debug_tags = func.debug_tags.get(call_inst).to_vec();
382    func.debug_tags.set(call_inst, []);
383
384    // Translate each instruction from the callee into the caller,
385    // appending them to their associated block in the caller.
386    //
387    // Note that we iterate over the callee with a pre-order traversal so that
388    // we see value defs before uses.
389    for callee_block in Dfs::new().pre_order_iter(callee) {
390        let inlined_block = entity_map.inlined_block(callee_block);
391        trace!(
392            "Processing instructions in callee block {callee_block:?} (inlined block {inlined_block:?}"
393        );
394
395        let mut next_callee_inst = callee.layout.first_inst(callee_block);
396        while let Some(callee_inst) = next_callee_inst {
397            trace!(
398                "Processing callee instruction {callee_inst:?}: {}",
399                callee.dfg.display_inst(callee_inst)
400            );
401
402            assert_ne!(
403                callee.dfg.insts[callee_inst].opcode(),
404                ir::Opcode::GlobalValue,
405                "callee must already be legalized, we shouldn't see any `global_value` \
406                 instructions when inlining; found {callee_inst:?}: {}",
407                callee.dfg.display_inst(callee_inst)
408            );
409
410            // Remap the callee instruction's entities and insert it into the
411            // caller's DFG.
412            let inlined_inst_data = callee.dfg.insts[callee_inst].map(InliningInstRemapper {
413                allocs: &allocs,
414                func,
415                callee,
416                entity_map: &entity_map,
417            });
418            let inlined_inst = func.dfg.make_inst(inlined_inst_data);
419            func.layout.append_inst(inlined_inst, inlined_block);
420
421            // Copy over debug tags, translating referenced entities
422            // as appropriate.
423            let debug_tags = callee.debug_tags.get(callee_inst);
424            // If there are tags on the inlined instruction, we always
425            // add tags, and we prepend any tags from the call
426            // instruction; but we don't add tags if only the callsite
427            // had them (this would otherwise mean that every single
428            // instruction in an inlined function body would get
429            // tags).
430            if !debug_tags.is_empty() {
431                let tags = call_debug_tags
432                    .iter()
433                    .cloned()
434                    .chain(debug_tags.iter().map(|tag| match *tag {
435                        DebugTag::User(value) => DebugTag::User(value),
436                        DebugTag::StackSlot(slot) => {
437                            DebugTag::StackSlot(entity_map.inlined_stack_slot(slot))
438                        }
439                    }))
440                    .collect::<SmallVec<[_; 4]>>();
441                func.debug_tags.set(inlined_inst, tags);
442            }
443
444            let opcode = callee.dfg.insts[callee_inst].opcode();
445            if opcode.is_return() {
446                // Instructions that return do not define any values, so we
447                // don't need to worry about that, but we do need to fix them up
448                // so that they return by jumping to our control-flow join
449                // block, rather than returning from the caller.
450                if let Some(return_block) = return_block {
451                    fixup_inst_that_returns(
452                        allocs,
453                        func,
454                        callee,
455                        &entity_map,
456                        call_opcode,
457                        inlined_inst,
458                        callee_inst,
459                        return_block,
460                        call_stack_map.as_ref().map(|es| &**es),
461                    );
462                } else {
463                    // If we are inlining a callee that was invoked via
464                    // `return_call`, we leave inlined return instructions
465                    // as-is: there is no logical caller frame on the stack to
466                    // continue to.
467                    debug_assert_eq!(call_opcode, ir::Opcode::ReturnCall);
468                }
469            } else {
470                // Make the instruction's result values.
471                let ctrl_typevar = callee.dfg.ctrl_typevar(callee_inst);
472                func.dfg.make_inst_results(inlined_inst, ctrl_typevar);
473
474                // Update the value map for this instruction's defs.
475                let callee_results = callee.dfg.inst_results(callee_inst);
476                let inlined_results = func.dfg.inst_results(inlined_inst);
477                debug_assert_eq!(callee_results.len(), inlined_results.len());
478                for (callee_val, inlined_val) in callee_results.iter().zip(inlined_results) {
479                    allocs.set_inlined_value(callee, *callee_val, *inlined_val);
480                }
481
482                if opcode.is_call() {
483                    append_stack_map_entries(
484                        func,
485                        callee,
486                        &entity_map,
487                        call_stack_map.as_deref(),
488                        inlined_inst,
489                        callee_inst,
490                    );
491
492                    // When we are inlining a `try_call` call site, we need to merge
493                    // the call site's exception table into the inlined calls'
494                    // exception tables. This can involve rewriting regular `call`s
495                    // into `try_call`s, which requires mutating the CFG because
496                    // `try_call` is a block terminator. However, we can't mutate
497                    // the CFG in the middle of this traversal because we rely on
498                    // the existence of a one-to-one mapping between the callee
499                    // layout and the inlined layout. Instead, we record the set of
500                    // inlined call instructions that will need fixing up, and
501                    // perform that possibly-CFG-mutating exception table merging in
502                    // a follow up pass, when we no longer rely on that one-to-one
503                    // layout mapping.
504                    debug_assert_eq!(
505                        call_opcode == ir::Opcode::TryCall,
506                        call_exception_table.is_some()
507                    );
508                    if call_opcode == ir::Opcode::TryCall {
509                        allocs
510                            .calls_needing_exception_table_fixup
511                            .push(inlined_inst);
512                    }
513                }
514            }
515
516            trace!(
517                "  --> inserted inlined instruction {inlined_inst:?}: {}",
518                func.dfg.display_inst(inlined_inst)
519            );
520
521            next_callee_inst = callee.layout.next_inst(callee_inst);
522        }
523    }
524
525    // We copied *all* callee blocks into the caller's layout, but only copied
526    // the callee instructions in *reachable* callee blocks into the caller's
527    // associated blocks. Therefore, any *unreachable* blocks are empty in the
528    // caller, which is invalid CLIF because all blocks must end in a
529    // terminator, so do a quick pass over the inlined blocks and remove any
530    // empty blocks from the caller's layout.
531    for block in entity_map.iter_inlined_blocks(func) {
532        if func.layout.is_block_inserted(block) && func.layout.first_inst(block).is_none() {
533            log::trace!("removing unreachable inlined block from layout: {block}");
534
535            // If the block being removed is our last-inlined block, then back
536            // it up to the previous block in the layout, which will be the new
537            // last-inlined block after this one's removal.
538            if block == last_inlined_block {
539                last_inlined_block = func.layout.prev_block(last_inlined_block).expect(
540                    "there will always at least be the block that contained the call we are \
541                     inlining",
542                );
543            }
544
545            func.layout.remove_block(block);
546        }
547    }
548
549    // Final step: fixup the exception tables of any inlined calls when we are
550    // inlining a `try_call` site.
551    //
552    // Subtly, this requires rewriting non-catching `call[_indirect]`
553    // instructions into `try_call[_indirect]` instructions so that exceptions
554    // that unwound through the original callee frame and were caught by the
555    // caller's `try_call` do not unwind past this inlined frame. And turning a
556    // `call` into a `try_call` mutates the CFG, breaking our one-to-one mapping
557    // between callee blocks and inlined blocks, so we delay these fixups to
558    // this final step, when we no longer rely on that mapping.
559    debug_assert!(
560        allocs.calls_needing_exception_table_fixup.is_empty() || call_exception_table.is_some()
561    );
562    debug_assert_eq!(
563        call_opcode == ir::Opcode::TryCall,
564        call_exception_table.is_some()
565    );
566    if let Some(call_exception_table) = call_exception_table {
567        fixup_inlined_call_exception_tables(allocs, func, call_exception_table);
568    }
569
570    debug_assert!(
571        func.layout.is_block_inserted(last_inlined_block),
572        "last_inlined_block={last_inlined_block} should be inserted in the layout"
573    );
574    last_inlined_block
575}
576
577/// Append stack map entries from the caller and callee to the given inlined
578/// instruction.
579fn append_stack_map_entries(
580    func: &mut ir::Function,
581    callee: &ir::Function,
582    entity_map: &EntityMap,
583    call_stack_map: Option<&[ir::UserStackMapEntry]>,
584    inlined_inst: ir::Inst,
585    callee_inst: ir::Inst,
586) {
587    // Add the caller's stack map to this call. These entries
588    // already refer to caller entities and do not need further
589    // translation.
590    func.dfg.append_user_stack_map_entries(
591        inlined_inst,
592        call_stack_map
593            .iter()
594            .flat_map(|entries| entries.iter().cloned()),
595    );
596
597    // Append the callee's stack map to this call. These entries
598    // refer to callee entities and therefore do require
599    // translation into the caller's index space.
600    func.dfg.append_user_stack_map_entries(
601        inlined_inst,
602        callee
603            .dfg
604            .user_stack_map_entries(callee_inst)
605            .iter()
606            .flat_map(|entries| entries.iter())
607            .map(|entry| ir::UserStackMapEntry {
608                ty: entry.ty,
609                slot: entity_map.inlined_stack_slot(entry.slot),
610                offset: entry.offset,
611            }),
612    );
613}
614
615/// Create or update the exception tables for any inlined call instructions:
616/// when inlining at a `try_call` site, we must forward our exceptional edges
617/// into each inlined call instruction.
618fn fixup_inlined_call_exception_tables(
619    allocs: &mut InliningAllocs,
620    func: &mut ir::Function,
621    call_exception_table: ir::ExceptionTable,
622) {
623    // Split a block at a `call[_indirect]` instruction, detach the
624    // instruction's results, and alias them to the new block's parameters.
625    let split_block_for_new_try_call = |func: &mut ir::Function, inst: ir::Inst| -> ir::Block {
626        debug_assert!(func.dfg.insts[inst].opcode().is_call());
627        debug_assert!(!func.dfg.insts[inst].opcode().is_terminator());
628
629        // Split the block.
630        let next_inst = func
631            .layout
632            .next_inst(inst)
633            .expect("inst is not a terminator, should have a successor");
634        let new_block = func.dfg.blocks.add();
635        func.layout.split_block(new_block, next_inst);
636
637        // `try_call[_indirect]` instructions do not define values themselves;
638        // the normal-return block has parameters for the results. So remove
639        // this instruction's results, create an associated block parameter for
640        // each of them, and alias them to the new block parameter.
641        let old_results = SmallValueVec::from_iter(func.dfg.inst_results(inst).iter().copied());
642        func.dfg.detach_inst_results(inst);
643        for old_result in old_results {
644            let ty = func.dfg.value_type(old_result);
645            let new_block_param = func.dfg.append_block_param(new_block, ty);
646            func.dfg.change_to_alias(old_result, new_block_param);
647        }
648
649        new_block
650    };
651
652    // Clone the caller's exception table, updating it for use in the current
653    // `call[_indirect]` instruction as it becomes a `try_call[_indirect]`.
654    let clone_exception_table_for_this_call = |func: &mut ir::Function,
655                                               signature: ir::SigRef,
656                                               new_block: ir::Block|
657     -> ir::ExceptionTable {
658        let mut exception = func.stencil.dfg.exception_tables[call_exception_table]
659            .deep_clone(&mut func.stencil.dfg.value_lists);
660
661        *exception.signature_mut() = signature;
662
663        let returns_len = func.dfg.signatures[signature].returns.len();
664        let returns_len = u32::try_from(returns_len).unwrap();
665
666        *exception.normal_return_mut() = ir::BlockCall::new(
667            new_block,
668            (0..returns_len).map(|i| ir::BlockArg::TryCallRet(i)),
669            &mut func.dfg.value_lists,
670        );
671
672        func.dfg.exception_tables.push(exception)
673    };
674
675    for inst in allocs.calls_needing_exception_table_fixup.drain(..) {
676        debug_assert!(func.dfg.insts[inst].opcode().is_call());
677        debug_assert!(!func.dfg.insts[inst].opcode().is_return());
678        match func.dfg.insts[inst] {
679            //     current_block:
680            //         preds...
681            //         rets... = call f(args...)
682            //         succs...
683            //
684            // becomes
685            //
686            //     current_block:
687            //         preds...
688            //         try_call f(args...), new_block(rets...), [call_exception_table...]
689            //     new_block(rets...):
690            //         succs...
691            ir::InstructionData::Call {
692                opcode: ir::Opcode::Call,
693                args,
694                func_ref,
695            } => {
696                let new_block = split_block_for_new_try_call(func, inst);
697                let signature = func.dfg.ext_funcs[func_ref].signature;
698                let exception = clone_exception_table_for_this_call(func, signature, new_block);
699                func.dfg.insts[inst] = ir::InstructionData::TryCall {
700                    opcode: ir::Opcode::TryCall,
701                    args,
702                    func_ref,
703                    exception,
704                };
705            }
706
707            //     current_block:
708            //         preds...
709            //         rets... = call_indirect sig, val(args...)
710            //         succs...
711            //
712            // becomes
713            //
714            //     current_block:
715            //         preds...
716            //         try_call_indirect sig, val(args...), new_block(rets...), [call_exception_table...]
717            //     new_block(rets...):
718            //         succs...
719            ir::InstructionData::CallIndirect {
720                opcode: ir::Opcode::CallIndirect,
721                args,
722                sig_ref,
723            } => {
724                let new_block = split_block_for_new_try_call(func, inst);
725                let exception = clone_exception_table_for_this_call(func, sig_ref, new_block);
726                func.dfg.insts[inst] = ir::InstructionData::TryCallIndirect {
727                    opcode: ir::Opcode::TryCallIndirect,
728                    args,
729                    exception,
730                };
731            }
732
733            // For `try_call[_indirect]` instructions, we just need to merge the
734            // exception tables.
735            ir::InstructionData::TryCall {
736                opcode: ir::Opcode::TryCall,
737                exception,
738                ..
739            }
740            | ir::InstructionData::TryCallIndirect {
741                opcode: ir::Opcode::TryCallIndirect,
742                exception,
743                ..
744            } => {
745                // Construct a new exception table that consists of
746                // the inlined instruction's exception table match
747                // sequence, with the inlining site's exception table
748                // appended. This will ensure that the first-match
749                // semantics emulates the original behavior of
750                // matching in the inner frame first.
751                let sig = func.dfg.exception_tables[exception].signature();
752                let normal_return = *func.dfg.exception_tables[exception].normal_return();
753                let exception_data = ExceptionTableData::new(
754                    sig,
755                    normal_return,
756                    func.dfg.exception_tables[exception]
757                        .items()
758                        .chain(func.dfg.exception_tables[call_exception_table].items()),
759                )
760                .deep_clone(&mut func.dfg.value_lists);
761
762                func.dfg.exception_tables[exception] = exception_data;
763            }
764
765            otherwise => unreachable!("unknown non-return call instruction: {otherwise:?}"),
766        }
767    }
768}
769
770/// After having created an inlined version of a callee instruction that returns
771/// in the caller, we need to fix it up so that it doesn't actually return
772/// (since we are already in the caller's frame) and instead just jumps to the
773/// control-flow join point.
774fn fixup_inst_that_returns(
775    allocs: &mut InliningAllocs,
776    func: &mut ir::Function,
777    callee: &ir::Function,
778    entity_map: &EntityMap,
779    call_opcode: ir::Opcode,
780    inlined_inst: ir::Inst,
781    callee_inst: ir::Inst,
782    return_block: ir::Block,
783    call_stack_map: Option<&[ir::UserStackMapEntry]>,
784) {
785    debug_assert!(func.dfg.insts[inlined_inst].opcode().is_return());
786    match func.dfg.insts[inlined_inst] {
787        //     return rets...
788        //
789        // becomes
790        //
791        //     jump return_block(rets...)
792        ir::InstructionData::MultiAry {
793            opcode: ir::Opcode::Return,
794            args,
795        } => {
796            let rets = SmallBlockArgVec::from_iter(
797                args.as_slice(&func.dfg.value_lists)
798                    .iter()
799                    .copied()
800                    .map(|v| v.into()),
801            );
802            func.dfg.replace(inlined_inst).jump(return_block, &rets);
803        }
804
805        //     return_call f(args...)
806        //
807        // becomes
808        //
809        //     rets... = call f(args...)
810        //     jump return_block(rets...)
811        ir::InstructionData::Call {
812            opcode: ir::Opcode::ReturnCall,
813            args,
814            func_ref,
815        } => {
816            func.dfg.insts[inlined_inst] = ir::InstructionData::Call {
817                opcode: ir::Opcode::Call,
818                args,
819                func_ref,
820            };
821            func.dfg.make_inst_results(inlined_inst, ir::types::INVALID);
822
823            append_stack_map_entries(
824                func,
825                callee,
826                &entity_map,
827                call_stack_map,
828                inlined_inst,
829                callee_inst,
830            );
831
832            let rets = SmallBlockArgVec::from_iter(
833                func.dfg
834                    .inst_results(inlined_inst)
835                    .iter()
836                    .copied()
837                    .map(|v| v.into()),
838            );
839            let mut cursor = FuncCursor::new(func);
840            cursor.goto_after_inst(inlined_inst);
841            cursor.ins().jump(return_block, &rets);
842
843            if call_opcode == ir::Opcode::TryCall {
844                allocs
845                    .calls_needing_exception_table_fixup
846                    .push(inlined_inst);
847            }
848        }
849
850        //     return_call_indirect val(args...)
851        //
852        // becomes
853        //
854        //     rets... = call_indirect val(args...)
855        //     jump return_block(rets...)
856        ir::InstructionData::CallIndirect {
857            opcode: ir::Opcode::ReturnCallIndirect,
858            args,
859            sig_ref,
860        } => {
861            func.dfg.insts[inlined_inst] = ir::InstructionData::CallIndirect {
862                opcode: ir::Opcode::CallIndirect,
863                args,
864                sig_ref,
865            };
866            func.dfg.make_inst_results(inlined_inst, ir::types::INVALID);
867
868            append_stack_map_entries(
869                func,
870                callee,
871                &entity_map,
872                call_stack_map,
873                inlined_inst,
874                callee_inst,
875            );
876
877            let rets = SmallBlockArgVec::from_iter(
878                func.dfg
879                    .inst_results(inlined_inst)
880                    .iter()
881                    .copied()
882                    .map(|v| v.into()),
883            );
884            let mut cursor = FuncCursor::new(func);
885            cursor.goto_after_inst(inlined_inst);
886            cursor.ins().jump(return_block, &rets);
887
888            if call_opcode == ir::Opcode::TryCall {
889                allocs
890                    .calls_needing_exception_table_fixup
891                    .push(inlined_inst);
892            }
893        }
894
895        inst_data => unreachable!(
896            "should have handled all `is_return() == true` instructions above; \
897             got {inst_data:?}"
898        ),
899    }
900}
901
902/// An `InstructionMapper` implementation that remaps a callee instruction's
903/// entity references to their new indices in the caller function.
904struct InliningInstRemapper<'a> {
905    allocs: &'a InliningAllocs,
906    func: &'a mut ir::Function,
907    callee: &'a ir::Function,
908    entity_map: &'a EntityMap,
909}
910
911impl<'a> ir::instructions::InstructionMapper for InliningInstRemapper<'a> {
912    fn map_value(&mut self, value: ir::Value) -> ir::Value {
913        self.allocs.get_inlined_value(self.callee, value).expect(
914            "defs come before uses; we should have already inlined all values \
915             used by an instruction",
916        )
917    }
918
919    fn map_value_list(&mut self, value_list: ir::ValueList) -> ir::ValueList {
920        let mut inlined_list = ir::ValueList::new();
921        for callee_val in value_list.as_slice(&self.callee.dfg.value_lists) {
922            let inlined_val = self.map_value(*callee_val);
923            inlined_list.push(inlined_val, &mut self.func.dfg.value_lists);
924        }
925        inlined_list
926    }
927
928    fn map_global_value(&mut self, global_value: ir::GlobalValue) -> ir::GlobalValue {
929        self.entity_map.inlined_global_value(global_value)
930    }
931
932    fn map_jump_table(&mut self, jump_table: ir::JumpTable) -> ir::JumpTable {
933        let inlined_default =
934            self.map_block_call(self.callee.dfg.jump_tables[jump_table].default_block());
935        let inlined_table = self.callee.dfg.jump_tables[jump_table]
936            .as_slice()
937            .iter()
938            .map(|callee_block_call| self.map_block_call(*callee_block_call))
939            .collect::<SmallBlockCallVec>();
940        self.func
941            .dfg
942            .jump_tables
943            .push(ir::JumpTableData::new(inlined_default, &inlined_table))
944    }
945
946    fn map_exception_table(&mut self, exception_table: ir::ExceptionTable) -> ir::ExceptionTable {
947        let exception_table = &self.callee.dfg.exception_tables[exception_table];
948        let inlined_sig_ref = self.map_sig_ref(exception_table.signature());
949        let inlined_normal_return = self.map_block_call(*exception_table.normal_return());
950        let inlined_table = exception_table
951            .items()
952            .map(|item| match item {
953                ExceptionTableItem::Tag(tag, block_call) => {
954                    ExceptionTableItem::Tag(tag, self.map_block_call(block_call))
955                }
956                ExceptionTableItem::Default(block_call) => {
957                    ExceptionTableItem::Default(self.map_block_call(block_call))
958                }
959                ExceptionTableItem::Context(value) => {
960                    ExceptionTableItem::Context(self.map_value(value))
961                }
962            })
963            .collect::<SmallVec<[_; 8]>>();
964        self.func
965            .dfg
966            .exception_tables
967            .push(ir::ExceptionTableData::new(
968                inlined_sig_ref,
969                inlined_normal_return,
970                inlined_table,
971            ))
972    }
973
974    fn map_block_call(&mut self, block_call: ir::BlockCall) -> ir::BlockCall {
975        let callee_block = block_call.block(&self.callee.dfg.value_lists);
976        let inlined_block = self.entity_map.inlined_block(callee_block);
977        let args = block_call
978            .args(&self.callee.dfg.value_lists)
979            .map(|arg| match arg {
980                ir::BlockArg::Value(value) => self.map_value(value).into(),
981                ir::BlockArg::TryCallRet(_) | ir::BlockArg::TryCallExn(_) => arg,
982            })
983            .collect::<SmallBlockArgVec>();
984        ir::BlockCall::new(inlined_block, args, &mut self.func.dfg.value_lists)
985    }
986
987    fn map_block(&mut self, block: ir::Block) -> ir::Block {
988        self.entity_map.inlined_block(block)
989    }
990
991    fn map_func_ref(&mut self, func_ref: ir::FuncRef) -> ir::FuncRef {
992        self.entity_map.inlined_func_ref(func_ref)
993    }
994
995    fn map_sig_ref(&mut self, sig_ref: ir::SigRef) -> ir::SigRef {
996        self.entity_map.inlined_sig_ref(sig_ref)
997    }
998
999    fn map_stack_slot(&mut self, stack_slot: ir::StackSlot) -> ir::StackSlot {
1000        self.entity_map.inlined_stack_slot(stack_slot)
1001    }
1002
1003    fn map_dynamic_stack_slot(
1004        &mut self,
1005        dynamic_stack_slot: ir::DynamicStackSlot,
1006    ) -> ir::DynamicStackSlot {
1007        self.entity_map
1008            .inlined_dynamic_stack_slot(dynamic_stack_slot)
1009    }
1010
1011    fn map_constant(&mut self, constant: ir::Constant) -> ir::Constant {
1012        self.allocs
1013            .constants
1014            .get(constant)
1015            .and_then(|o| o.expand())
1016            .expect("should have inlined all callee constants")
1017    }
1018
1019    fn map_immediate(&mut self, immediate: ir::Immediate) -> ir::Immediate {
1020        self.entity_map.inlined_immediate(immediate)
1021    }
1022}
1023
1024/// Inline the callee's layout into the caller's layout.
1025///
1026/// Returns the last inlined block in the layout.
1027fn inline_block_layout(
1028    func: &mut ir::Function,
1029    call_block: ir::Block,
1030    callee: &ir::Function,
1031    entity_map: &EntityMap,
1032) -> ir::Block {
1033    debug_assert!(func.layout.is_block_inserted(call_block));
1034
1035    // Iterate over callee blocks in layout order, inserting their associated
1036    // inlined block into the caller's layout.
1037    let mut prev_inlined_block = call_block;
1038    let mut next_callee_block = callee.layout.entry_block();
1039    while let Some(callee_block) = next_callee_block {
1040        debug_assert!(func.layout.is_block_inserted(prev_inlined_block));
1041
1042        let inlined_block = entity_map.inlined_block(callee_block);
1043        func.layout
1044            .insert_block_after(inlined_block, prev_inlined_block);
1045
1046        prev_inlined_block = inlined_block;
1047        next_callee_block = callee.layout.next_block(callee_block);
1048    }
1049
1050    debug_assert!(func.layout.is_block_inserted(prev_inlined_block));
1051    prev_inlined_block
1052}
1053
1054/// Split the call instruction's block just after the call instruction to create
1055/// the point where control-flow joins after the inlined callee "returns".
1056///
1057/// Note that tail calls do not return to the caller and therefore do not have a
1058/// control-flow join point.
1059fn split_off_return_block(
1060    func: &mut ir::Function,
1061    call_inst: ir::Inst,
1062    opcode: ir::Opcode,
1063    callee: &ir::Function,
1064) -> Option<ir::Block> {
1065    // When the `call_inst` is not a block terminator, we need to split the
1066    // block.
1067    let return_block = func.layout.next_inst(call_inst).map(|next_inst| {
1068        let return_block = func.dfg.blocks.add();
1069        func.layout.split_block(return_block, next_inst);
1070
1071        // Add block parameters for each return value and alias the call
1072        // instruction's results to them.
1073        let old_results =
1074            SmallValueVec::from_iter(func.dfg.inst_results(call_inst).iter().copied());
1075        debug_assert_eq!(old_results.len(), callee.signature.returns.len());
1076        func.dfg.detach_inst_results(call_inst);
1077        for (abi, old_val) in callee.signature.returns.iter().zip(old_results) {
1078            debug_assert_eq!(abi.value_type, func.dfg.value_type(old_val));
1079            let ret_param = func.dfg.append_block_param(return_block, abi.value_type);
1080            func.dfg.change_to_alias(old_val, ret_param);
1081        }
1082
1083        return_block
1084    });
1085
1086    // When the `call_inst` is a block terminator, then it is either a
1087    // `return_call` or a `try_call`:
1088    //
1089    // * For `return_call`s, we don't have a control-flow join point, because
1090    //   the caller permanently transfers control to the callee.
1091    //
1092    // * For `try_call`s, we probably already have a block for the control-flow
1093    //   join point, but it isn't guaranteed: the `try_call` might ignore the
1094    //   call's returns and not forward them to the normal-return block or it
1095    //   might also pass additional arguments. We can only reuse the existing
1096    //   normal-return block when the `try_call` forwards exactly our callee's
1097    //   returns to that block (and therefore that block's parameter types also
1098    //   exactly match the callee's return types). Otherwise, we must create a new
1099    //   return block that forwards to the existing normal-return
1100    //   block. (Elsewhere, at the end of inlining, we will also update any inlined
1101    //   calls to forward any raised exceptions to the caller's exception table,
1102    //   as necessary.)
1103    //
1104    //   Finally, note that reusing the normal-return's target block is just an
1105    //   optimization to emit a simpler CFG when we can, and is not
1106    //   fundamentally required for correctness. We could always insert a
1107    //   temporary block as our control-flow join point that then forwards to
1108    //   the normal-return's target block. However, at the time of writing,
1109    //   Cranelift doesn't currently do any jump-threading or branch
1110    //   simplification in the mid-end, and removing unnecessary blocks in this
1111    //   way can help some subsequent mid-end optimizations. If, in the future,
1112    //   we gain support for jump-threading optimizations in the mid-end, we can
1113    //   come back and simplify the below code a bit to always generate the
1114    //   temporary block, and then rely on the subsequent optimizations to clean
1115    //   everything up.
1116    debug_assert_eq!(
1117        return_block.is_none(),
1118        opcode == ir::Opcode::ReturnCall || opcode == ir::Opcode::TryCall,
1119    );
1120    return_block.or_else(|| match func.dfg.insts[call_inst] {
1121        ir::InstructionData::TryCall {
1122            opcode: ir::Opcode::TryCall,
1123            args: _,
1124            func_ref: _,
1125            exception,
1126        } => {
1127            let normal_return = func.dfg.exception_tables[exception].normal_return();
1128            let normal_return_block = normal_return.block(&func.dfg.value_lists);
1129
1130            // Check to see if we can reuse the existing normal-return block.
1131            {
1132                let normal_return_args = normal_return.args(&func.dfg.value_lists);
1133                if normal_return_args.len() == callee.signature.returns.len()
1134                    && normal_return_args.enumerate().all(|(i, arg)| {
1135                        let i = u32::try_from(i).unwrap();
1136                        arg == ir::BlockArg::TryCallRet(i)
1137                    })
1138                {
1139                    return Some(normal_return_block);
1140                }
1141            }
1142
1143            // Okay, we cannot reuse the normal-return block. Create a new block
1144            // that has the expected block parameter types and have it jump to
1145            // the normal-return block.
1146            let return_block = func.dfg.blocks.add();
1147            func.layout.insert_block(return_block, normal_return_block);
1148
1149            let return_block_params = callee
1150                .signature
1151                .returns
1152                .iter()
1153                .map(|abi| func.dfg.append_block_param(return_block, abi.value_type))
1154                .collect::<SmallValueVec>();
1155
1156            let normal_return_args = func.dfg.exception_tables[exception]
1157                .normal_return()
1158                .args(&func.dfg.value_lists)
1159                .collect::<SmallBlockArgVec>();
1160            let jump_args = normal_return_args
1161                .into_iter()
1162                .map(|arg| match arg {
1163                    ir::BlockArg::Value(value) => ir::BlockArg::Value(value),
1164                    ir::BlockArg::TryCallRet(i) => {
1165                        let i = usize::try_from(i).unwrap();
1166                        ir::BlockArg::Value(return_block_params[i])
1167                    }
1168                    ir::BlockArg::TryCallExn(_) => {
1169                        unreachable!("normal-return edges cannot use exceptional results")
1170                    }
1171                })
1172                .collect::<SmallBlockArgVec>();
1173
1174            let mut cursor = FuncCursor::new(func);
1175            cursor.goto_first_insertion_point(return_block);
1176            cursor.ins().jump(normal_return_block, &jump_args);
1177
1178            Some(return_block)
1179        }
1180        _ => None,
1181    })
1182}
1183
1184/// Replace the caller's call instruction with a jump to the caller's inlined
1185/// copy of the callee's entry block.
1186///
1187/// Also associates the callee's parameters with the caller's arguments in our
1188/// value map.
1189///
1190/// Returns the caller's stack map entries, if any.
1191fn replace_call_with_jump(
1192    allocs: &mut InliningAllocs,
1193    func: &mut ir::Function,
1194    call_inst: ir::Inst,
1195    callee: &ir::Function,
1196    entity_map: &EntityMap,
1197) -> Option<ir::UserStackMapEntryVec> {
1198    trace!("Replacing `call` with `jump`");
1199    trace!(
1200        "  --> call instruction: {call_inst:?}: {}",
1201        func.dfg.display_inst(call_inst)
1202    );
1203
1204    let callee_entry_block = callee
1205        .layout
1206        .entry_block()
1207        .expect("callee function should have an entry block");
1208    let callee_param_values = callee.dfg.block_params(callee_entry_block);
1209    let caller_arg_values = SmallValueVec::from_iter(func.dfg.inst_args(call_inst).iter().copied());
1210    debug_assert_eq!(callee_param_values.len(), caller_arg_values.len());
1211    debug_assert_eq!(callee_param_values.len(), callee.signature.params.len());
1212    for (abi, (callee_param_value, caller_arg_value)) in callee
1213        .signature
1214        .params
1215        .iter()
1216        .zip(callee_param_values.into_iter().zip(caller_arg_values))
1217    {
1218        debug_assert_eq!(abi.value_type, callee.dfg.value_type(*callee_param_value));
1219        debug_assert_eq!(abi.value_type, func.dfg.value_type(caller_arg_value));
1220        allocs.set_inlined_value(callee, *callee_param_value, caller_arg_value);
1221    }
1222
1223    // Replace the caller's call instruction with a jump to the caller's inlined
1224    // copy of the callee's entry block.
1225    //
1226    // Note that the call block dominates the inlined entry block (and also all
1227    // other inlined blocks) so we can reference the arguments directly, and do
1228    // not need to add block parameters to the inlined entry block.
1229    let inlined_entry_block = entity_map.inlined_block(callee_entry_block);
1230    func.dfg.replace(call_inst).jump(inlined_entry_block, &[]);
1231    trace!(
1232        "  --> replaced with jump instruction: {call_inst:?}: {}",
1233        func.dfg.display_inst(call_inst)
1234    );
1235
1236    let stack_map_entries = func.dfg.take_user_stack_map_entries(call_inst);
1237    stack_map_entries
1238}
1239
1240/// Keeps track of mapping callee entities to their associated inlined caller
1241/// entities.
1242#[derive(Default)]
1243struct EntityMap {
1244    // Rather than doing an implicit, demand-based, DCE'ing translation of
1245    // entities, which would require maps from each callee entity to its
1246    // associated caller entity, we copy all entities into the caller, remember
1247    // each entity's initial offset, and then mapping from the callee to the
1248    // inlined caller entity is just adding that initial offset to the callee's
1249    // index. This should be both faster and simpler than the alternative. Most
1250    // of these sets are relatively small, and they rarely have too much dead
1251    // code in practice, so this is a good trade off.
1252    //
1253    // Note that there are a few kinds of entities that are excluded from the
1254    // `EntityMap`, and for which we do actually take the demand-based approach:
1255    // values and value lists being the notable ones.
1256    block_offset: Option<u32>,
1257    global_value_offset: Option<u32>,
1258    sig_ref_offset: Option<u32>,
1259    func_ref_offset: Option<u32>,
1260    stack_slot_offset: Option<u32>,
1261    dynamic_type_offset: Option<u32>,
1262    dynamic_stack_slot_offset: Option<u32>,
1263    immediate_offset: Option<u32>,
1264}
1265
1266impl EntityMap {
1267    fn inlined_block(&self, callee_block: ir::Block) -> ir::Block {
1268        let offset = self
1269            .block_offset
1270            .expect("must create inlined `ir::Block`s before calling `EntityMap::inlined_block`");
1271        ir::Block::from_u32(offset + callee_block.as_u32())
1272    }
1273
1274    fn iter_inlined_blocks(&self, func: &ir::Function) -> impl Iterator<Item = ir::Block> + use<> {
1275        let start = self.block_offset.expect(
1276            "must create inlined `ir::Block`s before calling `EntityMap::iter_inlined_blocks`",
1277        );
1278
1279        let end = func.dfg.blocks.len();
1280        let end = u32::try_from(end).unwrap();
1281
1282        (start..end).map(|i| ir::Block::from_u32(i))
1283    }
1284
1285    fn inlined_global_value(&self, callee_global_value: ir::GlobalValue) -> ir::GlobalValue {
1286        let offset = self
1287            .global_value_offset
1288            .expect("must create inlined `ir::GlobalValue`s before calling `EntityMap::inlined_global_value`");
1289        ir::GlobalValue::from_u32(offset + callee_global_value.as_u32())
1290    }
1291
1292    fn inlined_sig_ref(&self, callee_sig_ref: ir::SigRef) -> ir::SigRef {
1293        let offset = self.sig_ref_offset.expect(
1294            "must create inlined `ir::SigRef`s before calling `EntityMap::inlined_sig_ref`",
1295        );
1296        ir::SigRef::from_u32(offset + callee_sig_ref.as_u32())
1297    }
1298
1299    fn inlined_func_ref(&self, callee_func_ref: ir::FuncRef) -> ir::FuncRef {
1300        let offset = self.func_ref_offset.expect(
1301            "must create inlined `ir::FuncRef`s before calling `EntityMap::inlined_func_ref`",
1302        );
1303        ir::FuncRef::from_u32(offset + callee_func_ref.as_u32())
1304    }
1305
1306    fn inlined_stack_slot(&self, callee_stack_slot: ir::StackSlot) -> ir::StackSlot {
1307        let offset = self.stack_slot_offset.expect(
1308            "must create inlined `ir::StackSlot`s before calling `EntityMap::inlined_stack_slot`",
1309        );
1310        ir::StackSlot::from_u32(offset + callee_stack_slot.as_u32())
1311    }
1312
1313    fn inlined_dynamic_type(&self, callee_dynamic_type: ir::DynamicType) -> ir::DynamicType {
1314        let offset = self.dynamic_type_offset.expect(
1315            "must create inlined `ir::DynamicType`s before calling `EntityMap::inlined_dynamic_type`",
1316        );
1317        ir::DynamicType::from_u32(offset + callee_dynamic_type.as_u32())
1318    }
1319
1320    fn inlined_dynamic_stack_slot(
1321        &self,
1322        callee_dynamic_stack_slot: ir::DynamicStackSlot,
1323    ) -> ir::DynamicStackSlot {
1324        let offset = self.dynamic_stack_slot_offset.expect(
1325            "must create inlined `ir::DynamicStackSlot`s before calling `EntityMap::inlined_dynamic_stack_slot`",
1326        );
1327        ir::DynamicStackSlot::from_u32(offset + callee_dynamic_stack_slot.as_u32())
1328    }
1329
1330    fn inlined_immediate(&self, callee_immediate: ir::Immediate) -> ir::Immediate {
1331        let offset = self.immediate_offset.expect(
1332            "must create inlined `ir::Immediate`s before calling `EntityMap::inlined_immediate`",
1333        );
1334        ir::Immediate::from_u32(offset + callee_immediate.as_u32())
1335    }
1336}
1337
1338/// Translate all of the callee's various entities into the caller, producing an
1339/// `EntityMap` that can be used to translate callee entity references into
1340/// inlined caller entity references.
1341fn create_entities(
1342    allocs: &mut InliningAllocs,
1343    func: &mut ir::Function,
1344    callee: &ir::Function,
1345) -> EntityMap {
1346    let mut entity_map = EntityMap::default();
1347
1348    entity_map.block_offset = Some(create_blocks(allocs, func, callee));
1349    entity_map.global_value_offset = Some(create_global_values(func, callee));
1350    entity_map.sig_ref_offset = Some(create_sig_refs(func, callee));
1351    create_user_external_name_refs(allocs, func, callee);
1352    entity_map.func_ref_offset = Some(create_func_refs(allocs, func, callee, &entity_map));
1353    entity_map.stack_slot_offset = Some(create_stack_slots(func, callee));
1354    entity_map.dynamic_type_offset = Some(create_dynamic_types(func, callee, &entity_map));
1355    entity_map.dynamic_stack_slot_offset =
1356        Some(create_dynamic_stack_slots(func, callee, &entity_map));
1357    entity_map.immediate_offset = Some(create_immediates(func, callee));
1358
1359    // `ir::ConstantData` is deduplicated, so we cannot use our offset scheme
1360    // for `ir::Constant`s. Nonetheless, we still insert them into the caller
1361    // now, at the same time as the rest of our entities.
1362    create_constants(allocs, func, callee);
1363
1364    entity_map
1365}
1366
1367/// Create inlined blocks in the caller for every block in the callee.
1368fn create_blocks(
1369    allocs: &mut InliningAllocs,
1370    func: &mut ir::Function,
1371    callee: &ir::Function,
1372) -> u32 {
1373    let offset = func.dfg.blocks.len();
1374    let offset = u32::try_from(offset).unwrap();
1375
1376    func.dfg.blocks.reserve(callee.dfg.blocks.len());
1377    for callee_block in callee.dfg.blocks.iter() {
1378        let caller_block = func.dfg.blocks.add();
1379        trace!("Callee {callee_block:?} = inlined {caller_block:?}");
1380
1381        if callee.layout.is_cold(callee_block) {
1382            func.layout.set_cold(caller_block);
1383        }
1384
1385        // Note: the entry block does not need parameters because the only
1386        // predecessor is the call block and we associate the callee's
1387        // parameters with the caller's arguments directly.
1388        if callee.layout.entry_block() != Some(callee_block) {
1389            for callee_param in callee.dfg.blocks[callee_block].params(&callee.dfg.value_lists) {
1390                let ty = callee.dfg.value_type(*callee_param);
1391                let caller_param = func.dfg.append_block_param(caller_block, ty);
1392
1393                allocs.set_inlined_value(callee, *callee_param, caller_param);
1394            }
1395        }
1396    }
1397
1398    offset
1399}
1400
1401/// Copy and translate global values from the callee into the caller.
1402fn create_global_values(func: &mut ir::Function, callee: &ir::Function) -> u32 {
1403    let gv_offset = func.global_values.len();
1404    let gv_offset = u32::try_from(gv_offset).unwrap();
1405
1406    func.global_values.reserve(callee.global_values.len());
1407    for gv in callee.global_values.values() {
1408        func.global_values.push(match gv {
1409            // These kinds of global values reference other global values, so we
1410            // need to fixup that reference.
1411            ir::GlobalValueData::Load {
1412                base,
1413                offset,
1414                global_type,
1415                flags,
1416            } => ir::GlobalValueData::Load {
1417                base: ir::GlobalValue::from_u32(base.as_u32() + gv_offset),
1418                offset: *offset,
1419                global_type: *global_type,
1420                flags: *flags,
1421            },
1422            ir::GlobalValueData::IAddImm {
1423                base,
1424                offset,
1425                global_type,
1426            } => ir::GlobalValueData::IAddImm {
1427                base: ir::GlobalValue::from_u32(base.as_u32() + gv_offset),
1428                offset: *offset,
1429                global_type: *global_type,
1430            },
1431
1432            // These kinds of global values do not reference other global
1433            // values, so we can just clone them.
1434            ir::GlobalValueData::VMContext
1435            | ir::GlobalValueData::Symbol { .. }
1436            | ir::GlobalValueData::DynScaleTargetConst { .. } => gv.clone(),
1437        });
1438    }
1439
1440    gv_offset
1441}
1442
1443/// Copy `ir::SigRef`s from the callee into the caller.
1444fn create_sig_refs(func: &mut ir::Function, callee: &ir::Function) -> u32 {
1445    let offset = func.dfg.signatures.len();
1446    let offset = u32::try_from(offset).unwrap();
1447
1448    func.dfg.signatures.reserve(callee.dfg.signatures.len());
1449    for sig in callee.dfg.signatures.values() {
1450        func.dfg.signatures.push(sig.clone());
1451    }
1452
1453    offset
1454}
1455
1456fn create_user_external_name_refs(
1457    allocs: &mut InliningAllocs,
1458    func: &mut ir::Function,
1459    callee: &ir::Function,
1460) {
1461    for (callee_named_func_ref, name) in callee.params.user_named_funcs().iter() {
1462        let caller_named_func_ref = func.declare_imported_user_function(name.clone());
1463        allocs.user_external_name_refs[callee_named_func_ref] = Some(caller_named_func_ref).into();
1464    }
1465}
1466
1467/// Translate `ir::FuncRef`s from the callee into the caller.
1468fn create_func_refs(
1469    allocs: &InliningAllocs,
1470    func: &mut ir::Function,
1471    callee: &ir::Function,
1472    entity_map: &EntityMap,
1473) -> u32 {
1474    let offset = func.dfg.ext_funcs.len();
1475    let offset = u32::try_from(offset).unwrap();
1476
1477    func.dfg.ext_funcs.reserve(callee.dfg.ext_funcs.len());
1478    for ir::ExtFuncData {
1479        name,
1480        signature,
1481        colocated,
1482        patchable,
1483    } in callee.dfg.ext_funcs.values()
1484    {
1485        func.dfg.ext_funcs.push(ir::ExtFuncData {
1486            name: match name {
1487                ir::ExternalName::User(name_ref) => {
1488                    ir::ExternalName::User(allocs.user_external_name_refs[*name_ref].expect(
1489                        "should have translated all `ir::UserExternalNameRef`s before translating \
1490                         `ir::FuncRef`s",
1491                    ))
1492                }
1493                ir::ExternalName::TestCase(_)
1494                | ir::ExternalName::LibCall(_)
1495                | ir::ExternalName::KnownSymbol(_) => name.clone(),
1496            },
1497            signature: entity_map.inlined_sig_ref(*signature),
1498            colocated: *colocated,
1499            patchable: *patchable,
1500        });
1501    }
1502
1503    offset
1504}
1505
1506/// Copy stack slots from the callee into the caller.
1507fn create_stack_slots(func: &mut ir::Function, callee: &ir::Function) -> u32 {
1508    let offset = func.sized_stack_slots.len();
1509    let offset = u32::try_from(offset).unwrap();
1510
1511    func.sized_stack_slots
1512        .reserve(callee.sized_stack_slots.len());
1513    for slot in callee.sized_stack_slots.values() {
1514        func.sized_stack_slots.push(slot.clone());
1515    }
1516
1517    offset
1518}
1519
1520/// Copy dynamic types from the callee into the caller.
1521fn create_dynamic_types(
1522    func: &mut ir::Function,
1523    callee: &ir::Function,
1524    entity_map: &EntityMap,
1525) -> u32 {
1526    let offset = func.dynamic_stack_slots.len();
1527    let offset = u32::try_from(offset).unwrap();
1528
1529    func.dfg
1530        .dynamic_types
1531        .reserve(callee.dfg.dynamic_types.len());
1532    for ir::DynamicTypeData {
1533        base_vector_ty,
1534        dynamic_scale,
1535    } in callee.dfg.dynamic_types.values()
1536    {
1537        func.dfg.dynamic_types.push(ir::DynamicTypeData {
1538            base_vector_ty: *base_vector_ty,
1539            dynamic_scale: entity_map.inlined_global_value(*dynamic_scale),
1540        });
1541    }
1542
1543    offset
1544}
1545
1546/// Copy dynamic stack slots from the callee into the caller.
1547fn create_dynamic_stack_slots(
1548    func: &mut ir::Function,
1549    callee: &ir::Function,
1550    entity_map: &EntityMap,
1551) -> u32 {
1552    let offset = func.dynamic_stack_slots.len();
1553    let offset = u32::try_from(offset).unwrap();
1554
1555    func.dynamic_stack_slots
1556        .reserve(callee.dynamic_stack_slots.len());
1557    for ir::DynamicStackSlotData { kind, dyn_ty } in callee.dynamic_stack_slots.values() {
1558        func.dynamic_stack_slots.push(ir::DynamicStackSlotData {
1559            kind: *kind,
1560            dyn_ty: entity_map.inlined_dynamic_type(*dyn_ty),
1561        });
1562    }
1563
1564    offset
1565}
1566
1567/// Copy immediates from the callee into the caller.
1568fn create_immediates(func: &mut ir::Function, callee: &ir::Function) -> u32 {
1569    let offset = func.dfg.immediates.len();
1570    let offset = u32::try_from(offset).unwrap();
1571
1572    func.dfg.immediates.reserve(callee.dfg.immediates.len());
1573    for imm in callee.dfg.immediates.values() {
1574        func.dfg.immediates.push(imm.clone());
1575    }
1576
1577    offset
1578}
1579
1580/// Copy constants from the callee into the caller.
1581fn create_constants(allocs: &mut InliningAllocs, func: &mut ir::Function, callee: &ir::Function) {
1582    for (callee_constant, data) in callee.dfg.constants.iter() {
1583        let inlined_constant = func.dfg.constants.insert(data.clone());
1584        allocs.constants[*callee_constant] = Some(inlined_constant).into();
1585    }
1586}