cranelift_codegen/
inline.rs

1//! Function inlining infrastructure.
2//!
3//! This module provides "inlining as a library" to Cranelift users; it does
4//! _not_ provide a complete, off-the-shelf inlining solution. Cranelift's
5//! compilation context is per-function and does not encompass the full call
6//! graph. It does not know which functions are hot and which are cold, which
7//! have been marked the equivalent of `#[inline(never)]`, etc... Only the
8//! Cranelift user can understand these aspects of the full compilation
9//! pipeline, and these things can be very different between (say) Wasmtime and
10//! `cg_clif`. Therefore, this module does not attempt to define hueristics for
11//! when inlining a particular call is likely beneficial. This module only
12//! provides hooks for the Cranelift user to define whether a given call should
13//! be inlined or not, and the mechanics to inline a callee into a particular
14//! call site when directed to do so by the Cranelift user.
15//!
16//! The top-level inlining entry point during Cranelift compilation is
17//! [`Context::inline`][crate::Context::inline]. It takes an [`Inline`] trait
18//! implementation, which is authored by the Cranelift user and directs
19//! Cranelift whether to inline a particular call, and, when inlining, gives
20//! Cranelift the body of the callee that is to be inlined.
21
22use crate::cursor::{Cursor as _, FuncCursor};
23use crate::ir::{self, DebugTag, ExceptionTableData, ExceptionTableItem, InstBuilder as _};
24use crate::result::CodegenResult;
25use crate::trace;
26use crate::traversals::Dfs;
27use alloc::borrow::Cow;
28use alloc::vec::Vec;
29use cranelift_entity::{SecondaryMap, packed_option::PackedOption};
30use smallvec::SmallVec;
31
32type SmallValueVec = SmallVec<[ir::Value; 8]>;
33type SmallBlockArgVec = SmallVec<[ir::BlockArg; 8]>;
34type SmallBlockCallVec = SmallVec<[ir::BlockCall; 8]>;
35
36/// A command directing Cranelift whether or not to inline a particular call.
37pub enum InlineCommand<'a> {
38    /// Keep the call as-is, out-of-line, and do not inline the callee.
39    KeepCall,
40
41    /// Inline the call, using this function as the body of the callee.
42    ///
43    /// It is the `Inline` implementor's responsibility to ensure that this
44    /// function is the correct callee. Providing the wrong function may result
45    /// in panics during compilation or incorrect runtime behavior.
46    Inline {
47        /// The callee function's body.
48        callee: Cow<'a, ir::Function>,
49        /// Whether to visit any function calls within the callee body after
50        /// inlining and consider them for further inlining.
51        visit_callee: bool,
52    },
53}
54
55/// A trait for directing Cranelift whether to inline a particular call or not.
56///
57/// Used in combination with the [`Context::inline`][crate::Context::inline]
58/// method.
59pub trait Inline {
60    /// A hook invoked for each direct call instruction in a function, whose
61    /// result determines whether Cranelift should inline a given call.
62    ///
63    /// The Cranelift user is responsible for defining their own hueristics and
64    /// deciding whether inlining the call is beneficial.
65    ///
66    /// When returning a function and directing Cranelift to inline its body
67    /// into the call site, the `Inline` implementer must ensure the following:
68    ///
69    /// * The returned function's signature exactly matches the `callee`
70    ///   `FuncRef`'s signature.
71    ///
72    /// * The returned function must be legalized.
73    ///
74    /// * The returned function must be valid (i.e. it must pass the CLIF
75    ///   verifier).
76    ///
77    /// * The returned function is a correct and valid implementation of the
78    ///   `callee` according to your language's semantics.
79    ///
80    /// Failure to uphold these invariants may result in panics during
81    /// compilation or incorrect runtime behavior in the generated code.
82    fn inline(
83        &mut self,
84        caller: &ir::Function,
85        call_inst: ir::Inst,
86        call_opcode: ir::Opcode,
87        callee: ir::FuncRef,
88        call_args: &[ir::Value],
89    ) -> InlineCommand<'_>;
90}
91
92impl<'a, T> Inline for &'a mut T
93where
94    T: Inline,
95{
96    fn inline(
97        &mut self,
98        caller: &ir::Function,
99        inst: ir::Inst,
100        opcode: ir::Opcode,
101        callee: ir::FuncRef,
102        args: &[ir::Value],
103    ) -> InlineCommand<'_> {
104        (*self).inline(caller, inst, opcode, callee, args)
105    }
106}
107
108/// Walk the given function, invoke the `Inline` implementation for each call
109/// instruction, and inline the callee when directed to do so.
110///
111/// Returns whether any call was inlined.
112pub(crate) fn do_inlining(
113    func: &mut ir::Function,
114    mut inliner: impl Inline,
115) -> CodegenResult<bool> {
116    trace!("function {} before inlining: {}", func.name, func);
117
118    let mut inlined_any = false;
119    let mut allocs = InliningAllocs::default();
120
121    let mut cursor = FuncCursor::new(func);
122    'block_loop: while let Some(block) = cursor.next_block() {
123        // Always keep track of our previous cursor position. Assuming that the
124        // current position is a function call that we will inline, then the
125        // previous position is just before the inlined callee function. After
126        // inlining a call, the Cranelift user can decide whether to consider
127        // any function calls in the inlined callee for further inlining or
128        // not. When they do, then we back up to this previous cursor position
129        // so that our traversal will then continue over the inlined body.
130        let mut prev_pos;
131
132        while let Some(inst) = {
133            prev_pos = cursor.position();
134            cursor.next_inst()
135        } {
136            // Make sure that `block` is always `inst`'s block, even with all of
137            // our cursor-position-updating and block-splitting-during-inlining
138            // shenanigans below.
139            debug_assert_eq!(Some(block), cursor.func.layout.inst_block(inst));
140
141            match cursor.func.dfg.insts[inst] {
142                ir::InstructionData::Call {
143                    opcode: opcode @ ir::Opcode::Call | opcode @ ir::Opcode::ReturnCall,
144                    args: _,
145                    func_ref,
146                } => {
147                    trace!(
148                        "considering call site for inlining: {inst}: {}",
149                        cursor.func.dfg.display_inst(inst),
150                    );
151                    let args = cursor.func.dfg.inst_args(inst);
152                    match inliner.inline(&cursor.func, inst, opcode, func_ref, args) {
153                        InlineCommand::KeepCall => {
154                            trace!("  --> keeping call");
155                        }
156                        InlineCommand::Inline {
157                            callee,
158                            visit_callee,
159                        } => {
160                            let last_inlined_block = inline_one(
161                                &mut allocs,
162                                cursor.func,
163                                func_ref,
164                                block,
165                                inst,
166                                opcode,
167                                &callee,
168                                None,
169                            );
170                            inlined_any = true;
171                            if visit_callee {
172                                cursor.set_position(prev_pos);
173                            } else {
174                                // Arrange it so that the `next_block()` loop
175                                // will continue to the next block that is not
176                                // associated with the just-inlined callee.
177                                cursor.goto_bottom(last_inlined_block);
178                                continue 'block_loop;
179                            }
180                        }
181                    }
182                }
183                ir::InstructionData::TryCall {
184                    opcode: opcode @ ir::Opcode::TryCall,
185                    args: _,
186                    func_ref,
187                    exception,
188                } => {
189                    trace!(
190                        "considering call site for inlining: {inst}: {}",
191                        cursor.func.dfg.display_inst(inst),
192                    );
193                    let args = cursor.func.dfg.inst_args(inst);
194                    match inliner.inline(&cursor.func, inst, opcode, func_ref, args) {
195                        InlineCommand::KeepCall => {
196                            trace!("  --> keeping call");
197                        }
198                        InlineCommand::Inline {
199                            callee,
200                            visit_callee,
201                        } => {
202                            let last_inlined_block = inline_one(
203                                &mut allocs,
204                                cursor.func,
205                                func_ref,
206                                block,
207                                inst,
208                                opcode,
209                                &callee,
210                                Some(exception),
211                            );
212                            inlined_any = true;
213                            if visit_callee {
214                                cursor.set_position(prev_pos);
215                            } else {
216                                // Arrange it so that the `next_block()` loop
217                                // will continue to the next block that is not
218                                // associated with the just-inlined callee.
219                                cursor.goto_bottom(last_inlined_block);
220                                continue 'block_loop;
221                            }
222                        }
223                    }
224                }
225                ir::InstructionData::CallIndirect { .. }
226                | ir::InstructionData::TryCallIndirect { .. } => {
227                    // Can't inline indirect calls; need to have some earlier
228                    // pass rewrite them into direct calls first, when possible.
229                }
230                _ => {
231                    debug_assert!(
232                        !cursor.func.dfg.insts[inst].opcode().is_call(),
233                        "should have matched all call instructions, but found: {inst}: {}",
234                        cursor.func.dfg.display_inst(inst),
235                    );
236                }
237            }
238        }
239    }
240
241    if inlined_any {
242        trace!("function {} after inlining: {}", func.name, func);
243    } else {
244        trace!("function {} did not have any callees inlined", func.name);
245    }
246
247    Ok(inlined_any)
248}
249
250#[derive(Default)]
251struct InliningAllocs {
252    /// Map from callee value to inlined caller value.
253    values: SecondaryMap<ir::Value, PackedOption<ir::Value>>,
254
255    /// Map from callee constant to inlined caller constant.
256    ///
257    /// Not in `EntityMap` because these are hash-consed inside the
258    /// `ir::Function`.
259    constants: SecondaryMap<ir::Constant, PackedOption<ir::Constant>>,
260
261    /// Map from callee to inlined caller external name refs.
262    ///
263    /// Not in `EntityMap` because these are hash-consed inside the
264    /// `ir::Function`.
265    user_external_name_refs:
266        SecondaryMap<ir::UserExternalNameRef, PackedOption<ir::UserExternalNameRef>>,
267
268    /// The set of _caller_ inlined call instructions that need exception table
269    /// fixups at the end of inlining.
270    ///
271    /// This includes all kinds of non-returning calls, not just the literal
272    /// `call` instruction: `call_indirect`, `try_call`, `try_call_indirect`,
273    /// etc... However, it does not include `return_call` and
274    /// `return_call_indirect` instructions because the caller cannot catch
275    /// exceptions that those calls throw because the caller is no longer on the
276    /// stack as soon as they are executed.
277    ///
278    /// Note: this is a simple `Vec`, and not an `EntitySet`, because it is very
279    /// sparse: most of the caller's instructions are not inlined call
280    /// instructions. Additionally, we require deterministic iteration order and
281    /// do not require set-membership testing, so a hash set is not a good
282    /// choice either.
283    calls_needing_exception_table_fixup: Vec<ir::Inst>,
284}
285
286impl InliningAllocs {
287    fn reset(&mut self, callee: &ir::Function) {
288        let InliningAllocs {
289            values,
290            constants,
291            user_external_name_refs,
292            calls_needing_exception_table_fixup,
293        } = self;
294
295        values.clear();
296        values.resize(callee.dfg.len_values());
297
298        constants.clear();
299        constants.resize(callee.dfg.constants.len());
300
301        user_external_name_refs.clear();
302        user_external_name_refs.resize(callee.params.user_named_funcs().len());
303
304        // Note: We do not reserve capacity for
305        // `calls_needing_exception_table_fixup` because it is a sparse set and
306        // we don't know how large it needs to be ahead of time.
307        calls_needing_exception_table_fixup.clear();
308    }
309
310    fn set_inlined_value(
311        &mut self,
312        callee: &ir::Function,
313        callee_val: ir::Value,
314        inlined_val: ir::Value,
315    ) {
316        trace!("  --> callee {callee_val:?} = inlined {inlined_val:?}");
317        debug_assert!(self.values[callee_val].is_none());
318        let resolved_callee_val = callee.dfg.resolve_aliases(callee_val);
319        debug_assert!(self.values[resolved_callee_val].is_none());
320        self.values[resolved_callee_val] = Some(inlined_val).into();
321    }
322
323    fn get_inlined_value(&self, callee: &ir::Function, callee_val: ir::Value) -> Option<ir::Value> {
324        let resolved_callee_val = callee.dfg.resolve_aliases(callee_val);
325        self.values[resolved_callee_val].expand()
326    }
327}
328
329/// Inline one particular function call.
330///
331/// Returns the last inlined block in the layout.
332fn inline_one(
333    allocs: &mut InliningAllocs,
334    func: &mut ir::Function,
335    callee_func_ref: ir::FuncRef,
336    call_block: ir::Block,
337    call_inst: ir::Inst,
338    call_opcode: ir::Opcode,
339    callee: &ir::Function,
340    call_exception_table: Option<ir::ExceptionTable>,
341) -> ir::Block {
342    trace!(
343        "Inlining call {call_inst:?}: {}\n\
344         with callee = {callee:?}",
345        func.dfg.display_inst(call_inst)
346    );
347
348    // Type check callee signature.
349    let expected_callee_sig = func.dfg.ext_funcs[callee_func_ref].signature;
350    let expected_callee_sig = &func.dfg.signatures[expected_callee_sig];
351    assert_eq!(expected_callee_sig, &callee.signature);
352
353    allocs.reset(callee);
354
355    // First, append various callee entity arenas to the end of the caller's
356    // entity arenas.
357    let entity_map = create_entities(allocs, func, callee);
358
359    // Inlined prologue: split the call instruction's block at the point of the
360    // call and replace the call with a jump.
361    let return_block = split_off_return_block(func, call_inst, call_opcode, callee);
362    let call_stack_map = replace_call_with_jump(allocs, func, call_inst, callee, &entity_map);
363
364    // Prepare for translating the actual instructions by inserting the inlined
365    // blocks into the caller's layout in the same order that they appear in the
366    // callee.
367    let mut last_inlined_block = inline_block_layout(func, call_block, callee, &entity_map);
368
369    // Get a copy of debug tags on the call instruction; these are
370    // prepended to debug tags on inlined instructions. Remove them
371    // from the call itself as it will be rewritten to a jump (which
372    // cannot have tags).
373    let call_debug_tags = func.debug_tags.get(call_inst).to_vec();
374    func.debug_tags.set(call_inst, []);
375
376    // Translate each instruction from the callee into the caller,
377    // appending them to their associated block in the caller.
378    //
379    // Note that we iterate over the callee with a pre-order traversal so that
380    // we see value defs before uses.
381    for callee_block in Dfs::new().pre_order_iter(callee) {
382        let inlined_block = entity_map.inlined_block(callee_block);
383        trace!(
384            "Processing instructions in callee block {callee_block:?} (inlined block {inlined_block:?}"
385        );
386
387        let mut next_callee_inst = callee.layout.first_inst(callee_block);
388        while let Some(callee_inst) = next_callee_inst {
389            trace!(
390                "Processing callee instruction {callee_inst:?}: {}",
391                callee.dfg.display_inst(callee_inst)
392            );
393
394            assert_ne!(
395                callee.dfg.insts[callee_inst].opcode(),
396                ir::Opcode::GlobalValue,
397                "callee must already be legalized, we shouldn't see any `global_value` \
398                 instructions when inlining; found {callee_inst:?}: {}",
399                callee.dfg.display_inst(callee_inst)
400            );
401
402            // Remap the callee instruction's entities and insert it into the
403            // caller's DFG.
404            let inlined_inst_data = callee.dfg.insts[callee_inst].map(InliningInstRemapper {
405                allocs: &allocs,
406                func,
407                callee,
408                entity_map: &entity_map,
409            });
410            let inlined_inst = func.dfg.make_inst(inlined_inst_data);
411            func.layout.append_inst(inlined_inst, inlined_block);
412
413            // Copy over debug tags, translating referenced entities
414            // as appropriate.
415            let debug_tags = callee.debug_tags.get(callee_inst);
416            // If there are tags on the inlined instruction, we always
417            // add tags, and we prepend any tags from the call
418            // instruction; but we don't add tags if only the callsite
419            // had them (this would otherwise mean that every single
420            // instruction in an inlined function body would get
421            // tags).
422            if !debug_tags.is_empty() {
423                let tags = call_debug_tags
424                    .iter()
425                    .cloned()
426                    .chain(debug_tags.iter().map(|tag| match *tag {
427                        DebugTag::User(value) => DebugTag::User(value),
428                        DebugTag::StackSlot(slot) => {
429                            DebugTag::StackSlot(entity_map.inlined_stack_slot(slot))
430                        }
431                    }))
432                    .collect::<SmallVec<[_; 4]>>();
433                func.debug_tags.set(inlined_inst, tags);
434            }
435
436            let opcode = callee.dfg.insts[callee_inst].opcode();
437            if opcode.is_return() {
438                // Instructions that return do not define any values, so we
439                // don't need to worry about that, but we do need to fix them up
440                // so that they return by jumping to our control-flow join
441                // block, rather than returning from the caller.
442                if let Some(return_block) = return_block {
443                    fixup_inst_that_returns(
444                        allocs,
445                        func,
446                        callee,
447                        &entity_map,
448                        call_opcode,
449                        inlined_inst,
450                        callee_inst,
451                        return_block,
452                        call_stack_map.as_ref().map(|es| &**es),
453                    );
454                } else {
455                    // If we are inlining a callee that was invoked via
456                    // `return_call`, we leave inlined return instructions
457                    // as-is: there is no logical caller frame on the stack to
458                    // continue to.
459                    debug_assert_eq!(call_opcode, ir::Opcode::ReturnCall);
460                }
461            } else {
462                // Make the instruction's result values.
463                let ctrl_typevar = callee.dfg.ctrl_typevar(callee_inst);
464                func.dfg.make_inst_results(inlined_inst, ctrl_typevar);
465
466                // Update the value map for this instruction's defs.
467                let callee_results = callee.dfg.inst_results(callee_inst);
468                let inlined_results = func.dfg.inst_results(inlined_inst);
469                debug_assert_eq!(callee_results.len(), inlined_results.len());
470                for (callee_val, inlined_val) in callee_results.iter().zip(inlined_results) {
471                    allocs.set_inlined_value(callee, *callee_val, *inlined_val);
472                }
473
474                if opcode.is_call() {
475                    append_stack_map_entries(
476                        func,
477                        callee,
478                        &entity_map,
479                        call_stack_map.as_deref(),
480                        inlined_inst,
481                        callee_inst,
482                    );
483
484                    // When we are inlining a `try_call` call site, we need to merge
485                    // the call site's exception table into the inlined calls'
486                    // exception tables. This can involve rewriting regular `call`s
487                    // into `try_call`s, which requires mutating the CFG because
488                    // `try_call` is a block terminator. However, we can't mutate
489                    // the CFG in the middle of this traversal because we rely on
490                    // the existence of a one-to-one mapping between the callee
491                    // layout and the inlined layout. Instead, we record the set of
492                    // inlined call instructions that will need fixing up, and
493                    // perform that possibly-CFG-mutating exception table merging in
494                    // a follow up pass, when we no longer rely on that one-to-one
495                    // layout mapping.
496                    debug_assert_eq!(
497                        call_opcode == ir::Opcode::TryCall,
498                        call_exception_table.is_some()
499                    );
500                    if call_opcode == ir::Opcode::TryCall {
501                        allocs
502                            .calls_needing_exception_table_fixup
503                            .push(inlined_inst);
504                    }
505                }
506            }
507
508            trace!(
509                "  --> inserted inlined instruction {inlined_inst:?}: {}",
510                func.dfg.display_inst(inlined_inst)
511            );
512
513            next_callee_inst = callee.layout.next_inst(callee_inst);
514        }
515    }
516
517    // We copied *all* callee blocks into the caller's layout, but only copied
518    // the callee instructions in *reachable* callee blocks into the caller's
519    // associated blocks. Therefore, any *unreachable* blocks are empty in the
520    // caller, which is invalid CLIF because all blocks must end in a
521    // terminator, so do a quick pass over the inlined blocks and remove any
522    // empty blocks from the caller's layout.
523    for block in entity_map.iter_inlined_blocks(func) {
524        if func.layout.is_block_inserted(block) && func.layout.first_inst(block).is_none() {
525            log::trace!("removing unreachable inlined block from layout: {block}");
526
527            // If the block being removed is our last-inlined block, then back
528            // it up to the previous block in the layout, which will be the new
529            // last-inlined block after this one's removal.
530            if block == last_inlined_block {
531                last_inlined_block = func.layout.prev_block(last_inlined_block).expect(
532                    "there will always at least be the block that contained the call we are \
533                     inlining",
534                );
535            }
536
537            func.layout.remove_block(block);
538        }
539    }
540
541    // Final step: fixup the exception tables of any inlined calls when we are
542    // inlining a `try_call` site.
543    //
544    // Subtly, this requires rewriting non-catching `call[_indirect]`
545    // instructions into `try_call[_indirect]` instructions so that exceptions
546    // that unwound through the original callee frame and were caught by the
547    // caller's `try_call` do not unwind past this inlined frame. And turning a
548    // `call` into a `try_call` mutates the CFG, breaking our one-to-one mapping
549    // between callee blocks and inlined blocks, so we delay these fixups to
550    // this final step, when we no longer rely on that mapping.
551    debug_assert!(
552        allocs.calls_needing_exception_table_fixup.is_empty() || call_exception_table.is_some()
553    );
554    debug_assert_eq!(
555        call_opcode == ir::Opcode::TryCall,
556        call_exception_table.is_some()
557    );
558    if let Some(call_exception_table) = call_exception_table {
559        fixup_inlined_call_exception_tables(allocs, func, call_exception_table);
560    }
561
562    debug_assert!(
563        func.layout.is_block_inserted(last_inlined_block),
564        "last_inlined_block={last_inlined_block} should be inserted in the layout"
565    );
566    last_inlined_block
567}
568
569/// Append stack map entries from the caller and callee to the given inlined
570/// instruction.
571fn append_stack_map_entries(
572    func: &mut ir::Function,
573    callee: &ir::Function,
574    entity_map: &EntityMap,
575    call_stack_map: Option<&[ir::UserStackMapEntry]>,
576    inlined_inst: ir::Inst,
577    callee_inst: ir::Inst,
578) {
579    // Add the caller's stack map to this call. These entries
580    // already refer to caller entities and do not need further
581    // translation.
582    func.dfg.append_user_stack_map_entries(
583        inlined_inst,
584        call_stack_map
585            .iter()
586            .flat_map(|entries| entries.iter().cloned()),
587    );
588
589    // Append the callee's stack map to this call. These entries
590    // refer to callee entities and therefore do require
591    // translation into the caller's index space.
592    func.dfg.append_user_stack_map_entries(
593        inlined_inst,
594        callee
595            .dfg
596            .user_stack_map_entries(callee_inst)
597            .iter()
598            .flat_map(|entries| entries.iter())
599            .map(|entry| ir::UserStackMapEntry {
600                ty: entry.ty,
601                slot: entity_map.inlined_stack_slot(entry.slot),
602                offset: entry.offset,
603            }),
604    );
605}
606
607/// Create or update the exception tables for any inlined call instructions:
608/// when inlining at a `try_call` site, we must forward our exceptional edges
609/// into each inlined call instruction.
610fn fixup_inlined_call_exception_tables(
611    allocs: &mut InliningAllocs,
612    func: &mut ir::Function,
613    call_exception_table: ir::ExceptionTable,
614) {
615    // Split a block at a `call[_indirect]` instruction, detach the
616    // instruction's results, and alias them to the new block's parameters.
617    let split_block_for_new_try_call = |func: &mut ir::Function, inst: ir::Inst| -> ir::Block {
618        debug_assert!(func.dfg.insts[inst].opcode().is_call());
619        debug_assert!(!func.dfg.insts[inst].opcode().is_terminator());
620
621        // Split the block.
622        let next_inst = func
623            .layout
624            .next_inst(inst)
625            .expect("inst is not a terminator, should have a successor");
626        let new_block = func.dfg.blocks.add();
627        func.layout.split_block(new_block, next_inst);
628
629        // `try_call[_indirect]` instructions do not define values themselves;
630        // the normal-return block has parameters for the results. So remove
631        // this instruction's results, create an associated block parameter for
632        // each of them, and alias them to the new block parameter.
633        let old_results = SmallValueVec::from_iter(func.dfg.inst_results(inst).iter().copied());
634        func.dfg.detach_inst_results(inst);
635        for old_result in old_results {
636            let ty = func.dfg.value_type(old_result);
637            let new_block_param = func.dfg.append_block_param(new_block, ty);
638            func.dfg.change_to_alias(old_result, new_block_param);
639        }
640
641        new_block
642    };
643
644    // Clone the caller's exception table, updating it for use in the current
645    // `call[_indirect]` instruction as it becomes a `try_call[_indirect]`.
646    let clone_exception_table_for_this_call = |func: &mut ir::Function,
647                                               signature: ir::SigRef,
648                                               new_block: ir::Block|
649     -> ir::ExceptionTable {
650        let mut exception = func.stencil.dfg.exception_tables[call_exception_table]
651            .deep_clone(&mut func.stencil.dfg.value_lists);
652
653        *exception.signature_mut() = signature;
654
655        let returns_len = func.dfg.signatures[signature].returns.len();
656        let returns_len = u32::try_from(returns_len).unwrap();
657
658        *exception.normal_return_mut() = ir::BlockCall::new(
659            new_block,
660            (0..returns_len).map(|i| ir::BlockArg::TryCallRet(i)),
661            &mut func.dfg.value_lists,
662        );
663
664        func.dfg.exception_tables.push(exception)
665    };
666
667    for inst in allocs.calls_needing_exception_table_fixup.drain(..) {
668        debug_assert!(func.dfg.insts[inst].opcode().is_call());
669        debug_assert!(!func.dfg.insts[inst].opcode().is_return());
670        match func.dfg.insts[inst] {
671            //     current_block:
672            //         preds...
673            //         rets... = call f(args...)
674            //         succs...
675            //
676            // becomes
677            //
678            //     current_block:
679            //         preds...
680            //         try_call f(args...), new_block(rets...), [call_exception_table...]
681            //     new_block(rets...):
682            //         succs...
683            ir::InstructionData::Call {
684                opcode: ir::Opcode::Call,
685                args,
686                func_ref,
687            } => {
688                let new_block = split_block_for_new_try_call(func, inst);
689                let signature = func.dfg.ext_funcs[func_ref].signature;
690                let exception = clone_exception_table_for_this_call(func, signature, new_block);
691                func.dfg.insts[inst] = ir::InstructionData::TryCall {
692                    opcode: ir::Opcode::TryCall,
693                    args,
694                    func_ref,
695                    exception,
696                };
697            }
698
699            //     current_block:
700            //         preds...
701            //         rets... = call_indirect sig, val(args...)
702            //         succs...
703            //
704            // becomes
705            //
706            //     current_block:
707            //         preds...
708            //         try_call_indirect sig, val(args...), new_block(rets...), [call_exception_table...]
709            //     new_block(rets...):
710            //         succs...
711            ir::InstructionData::CallIndirect {
712                opcode: ir::Opcode::CallIndirect,
713                args,
714                sig_ref,
715            } => {
716                let new_block = split_block_for_new_try_call(func, inst);
717                let exception = clone_exception_table_for_this_call(func, sig_ref, new_block);
718                func.dfg.insts[inst] = ir::InstructionData::TryCallIndirect {
719                    opcode: ir::Opcode::TryCallIndirect,
720                    args,
721                    exception,
722                };
723            }
724
725            // For `try_call[_indirect]` instructions, we just need to merge the
726            // exception tables.
727            ir::InstructionData::TryCall {
728                opcode: ir::Opcode::TryCall,
729                exception,
730                ..
731            }
732            | ir::InstructionData::TryCallIndirect {
733                opcode: ir::Opcode::TryCallIndirect,
734                exception,
735                ..
736            } => {
737                // Construct a new exception table that consists of
738                // the inlined instruction's exception table match
739                // sequence, with the inlining site's exception table
740                // appended. This will ensure that the first-match
741                // semantics emulates the original behavior of
742                // matching in the inner frame first.
743                let sig = func.dfg.exception_tables[exception].signature();
744                let normal_return = *func.dfg.exception_tables[exception].normal_return();
745                let exception_data = ExceptionTableData::new(
746                    sig,
747                    normal_return,
748                    func.dfg.exception_tables[exception]
749                        .items()
750                        .chain(func.dfg.exception_tables[call_exception_table].items()),
751                )
752                .deep_clone(&mut func.dfg.value_lists);
753
754                func.dfg.exception_tables[exception] = exception_data;
755            }
756
757            otherwise => unreachable!("unknown non-return call instruction: {otherwise:?}"),
758        }
759    }
760}
761
762/// After having created an inlined version of a callee instruction that returns
763/// in the caller, we need to fix it up so that it doesn't actually return
764/// (since we are already in the caller's frame) and instead just jumps to the
765/// control-flow join point.
766fn fixup_inst_that_returns(
767    allocs: &mut InliningAllocs,
768    func: &mut ir::Function,
769    callee: &ir::Function,
770    entity_map: &EntityMap,
771    call_opcode: ir::Opcode,
772    inlined_inst: ir::Inst,
773    callee_inst: ir::Inst,
774    return_block: ir::Block,
775    call_stack_map: Option<&[ir::UserStackMapEntry]>,
776) {
777    debug_assert!(func.dfg.insts[inlined_inst].opcode().is_return());
778    match func.dfg.insts[inlined_inst] {
779        //     return rets...
780        //
781        // becomes
782        //
783        //     jump return_block(rets...)
784        ir::InstructionData::MultiAry {
785            opcode: ir::Opcode::Return,
786            args,
787        } => {
788            let rets = SmallBlockArgVec::from_iter(
789                args.as_slice(&func.dfg.value_lists)
790                    .iter()
791                    .copied()
792                    .map(|v| v.into()),
793            );
794            func.dfg.replace(inlined_inst).jump(return_block, &rets);
795        }
796
797        //     return_call f(args...)
798        //
799        // becomes
800        //
801        //     rets... = call f(args...)
802        //     jump return_block(rets...)
803        ir::InstructionData::Call {
804            opcode: ir::Opcode::ReturnCall,
805            args,
806            func_ref,
807        } => {
808            func.dfg.insts[inlined_inst] = ir::InstructionData::Call {
809                opcode: ir::Opcode::Call,
810                args,
811                func_ref,
812            };
813            func.dfg.make_inst_results(inlined_inst, ir::types::INVALID);
814
815            append_stack_map_entries(
816                func,
817                callee,
818                &entity_map,
819                call_stack_map,
820                inlined_inst,
821                callee_inst,
822            );
823
824            let rets = SmallBlockArgVec::from_iter(
825                func.dfg
826                    .inst_results(inlined_inst)
827                    .iter()
828                    .copied()
829                    .map(|v| v.into()),
830            );
831            let mut cursor = FuncCursor::new(func);
832            cursor.goto_after_inst(inlined_inst);
833            cursor.ins().jump(return_block, &rets);
834
835            if call_opcode == ir::Opcode::TryCall {
836                allocs
837                    .calls_needing_exception_table_fixup
838                    .push(inlined_inst);
839            }
840        }
841
842        //     return_call_indirect val(args...)
843        //
844        // becomes
845        //
846        //     rets... = call_indirect val(args...)
847        //     jump return_block(rets...)
848        ir::InstructionData::CallIndirect {
849            opcode: ir::Opcode::ReturnCallIndirect,
850            args,
851            sig_ref,
852        } => {
853            func.dfg.insts[inlined_inst] = ir::InstructionData::CallIndirect {
854                opcode: ir::Opcode::CallIndirect,
855                args,
856                sig_ref,
857            };
858            func.dfg.make_inst_results(inlined_inst, ir::types::INVALID);
859
860            append_stack_map_entries(
861                func,
862                callee,
863                &entity_map,
864                call_stack_map,
865                inlined_inst,
866                callee_inst,
867            );
868
869            let rets = SmallBlockArgVec::from_iter(
870                func.dfg
871                    .inst_results(inlined_inst)
872                    .iter()
873                    .copied()
874                    .map(|v| v.into()),
875            );
876            let mut cursor = FuncCursor::new(func);
877            cursor.goto_after_inst(inlined_inst);
878            cursor.ins().jump(return_block, &rets);
879
880            if call_opcode == ir::Opcode::TryCall {
881                allocs
882                    .calls_needing_exception_table_fixup
883                    .push(inlined_inst);
884            }
885        }
886
887        inst_data => unreachable!(
888            "should have handled all `is_return() == true` instructions above; \
889             got {inst_data:?}"
890        ),
891    }
892}
893
894/// An `InstructionMapper` implementation that remaps a callee instruction's
895/// entity references to their new indices in the caller function.
896struct InliningInstRemapper<'a> {
897    allocs: &'a InliningAllocs,
898    func: &'a mut ir::Function,
899    callee: &'a ir::Function,
900    entity_map: &'a EntityMap,
901}
902
903impl<'a> ir::instructions::InstructionMapper for InliningInstRemapper<'a> {
904    fn map_value(&mut self, value: ir::Value) -> ir::Value {
905        self.allocs.get_inlined_value(self.callee, value).expect(
906            "defs come before uses; we should have already inlined all values \
907             used by an instruction",
908        )
909    }
910
911    fn map_value_list(&mut self, value_list: ir::ValueList) -> ir::ValueList {
912        let mut inlined_list = ir::ValueList::new();
913        for callee_val in value_list.as_slice(&self.callee.dfg.value_lists) {
914            let inlined_val = self.map_value(*callee_val);
915            inlined_list.push(inlined_val, &mut self.func.dfg.value_lists);
916        }
917        inlined_list
918    }
919
920    fn map_global_value(&mut self, global_value: ir::GlobalValue) -> ir::GlobalValue {
921        self.entity_map.inlined_global_value(global_value)
922    }
923
924    fn map_jump_table(&mut self, jump_table: ir::JumpTable) -> ir::JumpTable {
925        let inlined_default =
926            self.map_block_call(self.callee.dfg.jump_tables[jump_table].default_block());
927        let inlined_table = self.callee.dfg.jump_tables[jump_table]
928            .as_slice()
929            .iter()
930            .map(|callee_block_call| self.map_block_call(*callee_block_call))
931            .collect::<SmallBlockCallVec>();
932        self.func
933            .dfg
934            .jump_tables
935            .push(ir::JumpTableData::new(inlined_default, &inlined_table))
936    }
937
938    fn map_exception_table(&mut self, exception_table: ir::ExceptionTable) -> ir::ExceptionTable {
939        let exception_table = &self.callee.dfg.exception_tables[exception_table];
940        let inlined_sig_ref = self.map_sig_ref(exception_table.signature());
941        let inlined_normal_return = self.map_block_call(*exception_table.normal_return());
942        let inlined_table = exception_table
943            .items()
944            .map(|item| match item {
945                ExceptionTableItem::Tag(tag, block_call) => {
946                    ExceptionTableItem::Tag(tag, self.map_block_call(block_call))
947                }
948                ExceptionTableItem::Default(block_call) => {
949                    ExceptionTableItem::Default(self.map_block_call(block_call))
950                }
951                ExceptionTableItem::Context(value) => {
952                    ExceptionTableItem::Context(self.map_value(value))
953                }
954            })
955            .collect::<SmallVec<[_; 8]>>();
956        self.func
957            .dfg
958            .exception_tables
959            .push(ir::ExceptionTableData::new(
960                inlined_sig_ref,
961                inlined_normal_return,
962                inlined_table,
963            ))
964    }
965
966    fn map_block_call(&mut self, block_call: ir::BlockCall) -> ir::BlockCall {
967        let callee_block = block_call.block(&self.callee.dfg.value_lists);
968        let inlined_block = self.entity_map.inlined_block(callee_block);
969        let args = block_call
970            .args(&self.callee.dfg.value_lists)
971            .map(|arg| match arg {
972                ir::BlockArg::Value(value) => self.map_value(value).into(),
973                ir::BlockArg::TryCallRet(_) | ir::BlockArg::TryCallExn(_) => arg,
974            })
975            .collect::<SmallBlockArgVec>();
976        ir::BlockCall::new(inlined_block, args, &mut self.func.dfg.value_lists)
977    }
978
979    fn map_block(&mut self, block: ir::Block) -> ir::Block {
980        self.entity_map.inlined_block(block)
981    }
982
983    fn map_func_ref(&mut self, func_ref: ir::FuncRef) -> ir::FuncRef {
984        self.entity_map.inlined_func_ref(func_ref)
985    }
986
987    fn map_sig_ref(&mut self, sig_ref: ir::SigRef) -> ir::SigRef {
988        self.entity_map.inlined_sig_ref(sig_ref)
989    }
990
991    fn map_stack_slot(&mut self, stack_slot: ir::StackSlot) -> ir::StackSlot {
992        self.entity_map.inlined_stack_slot(stack_slot)
993    }
994
995    fn map_dynamic_stack_slot(
996        &mut self,
997        dynamic_stack_slot: ir::DynamicStackSlot,
998    ) -> ir::DynamicStackSlot {
999        self.entity_map
1000            .inlined_dynamic_stack_slot(dynamic_stack_slot)
1001    }
1002
1003    fn map_constant(&mut self, constant: ir::Constant) -> ir::Constant {
1004        self.allocs
1005            .constants
1006            .get(constant)
1007            .and_then(|o| o.expand())
1008            .expect("should have inlined all callee constants")
1009    }
1010
1011    fn map_immediate(&mut self, immediate: ir::Immediate) -> ir::Immediate {
1012        self.entity_map.inlined_immediate(immediate)
1013    }
1014}
1015
1016/// Inline the callee's layout into the caller's layout.
1017///
1018/// Returns the last inlined block in the layout.
1019fn inline_block_layout(
1020    func: &mut ir::Function,
1021    call_block: ir::Block,
1022    callee: &ir::Function,
1023    entity_map: &EntityMap,
1024) -> ir::Block {
1025    debug_assert!(func.layout.is_block_inserted(call_block));
1026
1027    // Iterate over callee blocks in layout order, inserting their associated
1028    // inlined block into the caller's layout.
1029    let mut prev_inlined_block = call_block;
1030    let mut next_callee_block = callee.layout.entry_block();
1031    while let Some(callee_block) = next_callee_block {
1032        debug_assert!(func.layout.is_block_inserted(prev_inlined_block));
1033
1034        let inlined_block = entity_map.inlined_block(callee_block);
1035        func.layout
1036            .insert_block_after(inlined_block, prev_inlined_block);
1037
1038        prev_inlined_block = inlined_block;
1039        next_callee_block = callee.layout.next_block(callee_block);
1040    }
1041
1042    debug_assert!(func.layout.is_block_inserted(prev_inlined_block));
1043    prev_inlined_block
1044}
1045
1046/// Split the call instruction's block just after the call instruction to create
1047/// the point where control-flow joins after the inlined callee "returns".
1048///
1049/// Note that tail calls do not return to the caller and therefore do not have a
1050/// control-flow join point.
1051fn split_off_return_block(
1052    func: &mut ir::Function,
1053    call_inst: ir::Inst,
1054    opcode: ir::Opcode,
1055    callee: &ir::Function,
1056) -> Option<ir::Block> {
1057    // When the `call_inst` is not a block terminator, we need to split the
1058    // block.
1059    let return_block = func.layout.next_inst(call_inst).map(|next_inst| {
1060        let return_block = func.dfg.blocks.add();
1061        func.layout.split_block(return_block, next_inst);
1062
1063        // Add block parameters for each return value and alias the call
1064        // instruction's results to them.
1065        let old_results =
1066            SmallValueVec::from_iter(func.dfg.inst_results(call_inst).iter().copied());
1067        debug_assert_eq!(old_results.len(), callee.signature.returns.len());
1068        func.dfg.detach_inst_results(call_inst);
1069        for (abi, old_val) in callee.signature.returns.iter().zip(old_results) {
1070            debug_assert_eq!(abi.value_type, func.dfg.value_type(old_val));
1071            let ret_param = func.dfg.append_block_param(return_block, abi.value_type);
1072            func.dfg.change_to_alias(old_val, ret_param);
1073        }
1074
1075        return_block
1076    });
1077
1078    // When the `call_inst` is a block terminator, then it is either a
1079    // `return_call` or a `try_call`:
1080    //
1081    // * For `return_call`s, we don't have a control-flow join point, because
1082    //   the caller permanently transfers control to the callee.
1083    //
1084    // * For `try_call`s, we probably already have a block for the control-flow
1085    //   join point, but it isn't guaranteed: the `try_call` might ignore the
1086    //   call's returns and not forward them to the normal-return block or it
1087    //   might also pass additional arguments. We can only reuse the existing
1088    //   normal-return block when the `try_call` forwards exactly our callee's
1089    //   returns to that block (and therefore that block's parameter types also
1090    //   exactly match the callee's return types). Otherwise, we must create a new
1091    //   return block that forwards to the existing normal-return
1092    //   block. (Elsewhere, at the end of inlining, we will also update any inlined
1093    //   calls to forward any raised exceptions to the caller's exception table,
1094    //   as necessary.)
1095    //
1096    //   Finally, note that reusing the normal-return's target block is just an
1097    //   optimization to emit a simpler CFG when we can, and is not
1098    //   fundamentally required for correctness. We could always insert a
1099    //   temporary block as our control-flow join point that then forwards to
1100    //   the normal-return's target block. However, at the time of writing,
1101    //   Cranelift doesn't currently do any jump-threading or branch
1102    //   simplification in the mid-end, and removing unnecessary blocks in this
1103    //   way can help some subsequent mid-end optimizations. If, in the future,
1104    //   we gain support for jump-threading optimizations in the mid-end, we can
1105    //   come back and simplify the below code a bit to always generate the
1106    //   temporary block, and then rely on the subsequent optimizations to clean
1107    //   everything up.
1108    debug_assert_eq!(
1109        return_block.is_none(),
1110        opcode == ir::Opcode::ReturnCall || opcode == ir::Opcode::TryCall,
1111    );
1112    return_block.or_else(|| match func.dfg.insts[call_inst] {
1113        ir::InstructionData::TryCall {
1114            opcode: ir::Opcode::TryCall,
1115            args: _,
1116            func_ref: _,
1117            exception,
1118        } => {
1119            let normal_return = func.dfg.exception_tables[exception].normal_return();
1120            let normal_return_block = normal_return.block(&func.dfg.value_lists);
1121
1122            // Check to see if we can reuse the existing normal-return block.
1123            {
1124                let normal_return_args = normal_return.args(&func.dfg.value_lists);
1125                if normal_return_args.len() == callee.signature.returns.len()
1126                    && normal_return_args.enumerate().all(|(i, arg)| {
1127                        let i = u32::try_from(i).unwrap();
1128                        arg == ir::BlockArg::TryCallRet(i)
1129                    })
1130                {
1131                    return Some(normal_return_block);
1132                }
1133            }
1134
1135            // Okay, we cannot reuse the normal-return block. Create a new block
1136            // that has the expected block parameter types and have it jump to
1137            // the normal-return block.
1138            let return_block = func.dfg.blocks.add();
1139            func.layout.insert_block(return_block, normal_return_block);
1140
1141            let return_block_params = callee
1142                .signature
1143                .returns
1144                .iter()
1145                .map(|abi| func.dfg.append_block_param(return_block, abi.value_type))
1146                .collect::<SmallValueVec>();
1147
1148            let normal_return_args = func.dfg.exception_tables[exception]
1149                .normal_return()
1150                .args(&func.dfg.value_lists)
1151                .collect::<SmallBlockArgVec>();
1152            let jump_args = normal_return_args
1153                .into_iter()
1154                .map(|arg| match arg {
1155                    ir::BlockArg::Value(value) => ir::BlockArg::Value(value),
1156                    ir::BlockArg::TryCallRet(i) => {
1157                        let i = usize::try_from(i).unwrap();
1158                        ir::BlockArg::Value(return_block_params[i])
1159                    }
1160                    ir::BlockArg::TryCallExn(_) => {
1161                        unreachable!("normal-return edges cannot use exceptional results")
1162                    }
1163                })
1164                .collect::<SmallBlockArgVec>();
1165
1166            let mut cursor = FuncCursor::new(func);
1167            cursor.goto_first_insertion_point(return_block);
1168            cursor.ins().jump(normal_return_block, &jump_args);
1169
1170            Some(return_block)
1171        }
1172        _ => None,
1173    })
1174}
1175
1176/// Replace the caller's call instruction with a jump to the caller's inlined
1177/// copy of the callee's entry block.
1178///
1179/// Also associates the callee's parameters with the caller's arguments in our
1180/// value map.
1181///
1182/// Returns the caller's stack map entries, if any.
1183fn replace_call_with_jump(
1184    allocs: &mut InliningAllocs,
1185    func: &mut ir::Function,
1186    call_inst: ir::Inst,
1187    callee: &ir::Function,
1188    entity_map: &EntityMap,
1189) -> Option<ir::UserStackMapEntryVec> {
1190    trace!("Replacing `call` with `jump`");
1191    trace!(
1192        "  --> call instruction: {call_inst:?}: {}",
1193        func.dfg.display_inst(call_inst)
1194    );
1195
1196    let callee_entry_block = callee
1197        .layout
1198        .entry_block()
1199        .expect("callee function should have an entry block");
1200    let callee_param_values = callee.dfg.block_params(callee_entry_block);
1201    let caller_arg_values = SmallValueVec::from_iter(func.dfg.inst_args(call_inst).iter().copied());
1202    debug_assert_eq!(callee_param_values.len(), caller_arg_values.len());
1203    debug_assert_eq!(callee_param_values.len(), callee.signature.params.len());
1204    for (abi, (callee_param_value, caller_arg_value)) in callee
1205        .signature
1206        .params
1207        .iter()
1208        .zip(callee_param_values.into_iter().zip(caller_arg_values))
1209    {
1210        debug_assert_eq!(abi.value_type, callee.dfg.value_type(*callee_param_value));
1211        debug_assert_eq!(abi.value_type, func.dfg.value_type(caller_arg_value));
1212        allocs.set_inlined_value(callee, *callee_param_value, caller_arg_value);
1213    }
1214
1215    // Replace the caller's call instruction with a jump to the caller's inlined
1216    // copy of the callee's entry block.
1217    //
1218    // Note that the call block dominates the inlined entry block (and also all
1219    // other inlined blocks) so we can reference the arguments directly, and do
1220    // not need to add block parameters to the inlined entry block.
1221    let inlined_entry_block = entity_map.inlined_block(callee_entry_block);
1222    func.dfg.replace(call_inst).jump(inlined_entry_block, &[]);
1223    trace!(
1224        "  --> replaced with jump instruction: {call_inst:?}: {}",
1225        func.dfg.display_inst(call_inst)
1226    );
1227
1228    let stack_map_entries = func.dfg.take_user_stack_map_entries(call_inst);
1229    stack_map_entries
1230}
1231
1232/// Keeps track of mapping callee entities to their associated inlined caller
1233/// entities.
1234#[derive(Default)]
1235struct EntityMap {
1236    // Rather than doing an implicit, demand-based, DCE'ing translation of
1237    // entities, which would require maps from each callee entity to its
1238    // associated caller entity, we copy all entities into the caller, remember
1239    // each entity's initial offset, and then mapping from the callee to the
1240    // inlined caller entity is just adding that initial offset to the callee's
1241    // index. This should be both faster and simpler than the alternative. Most
1242    // of these sets are relatively small, and they rarely have too much dead
1243    // code in practice, so this is a good trade off.
1244    //
1245    // Note that there are a few kinds of entities that are excluded from the
1246    // `EntityMap`, and for which we do actually take the demand-based approach:
1247    // values and value lists being the notable ones.
1248    block_offset: Option<u32>,
1249    global_value_offset: Option<u32>,
1250    sig_ref_offset: Option<u32>,
1251    func_ref_offset: Option<u32>,
1252    stack_slot_offset: Option<u32>,
1253    dynamic_type_offset: Option<u32>,
1254    dynamic_stack_slot_offset: Option<u32>,
1255    immediate_offset: Option<u32>,
1256}
1257
1258impl EntityMap {
1259    fn inlined_block(&self, callee_block: ir::Block) -> ir::Block {
1260        let offset = self
1261            .block_offset
1262            .expect("must create inlined `ir::Block`s before calling `EntityMap::inlined_block`");
1263        ir::Block::from_u32(offset + callee_block.as_u32())
1264    }
1265
1266    fn iter_inlined_blocks(&self, func: &ir::Function) -> impl Iterator<Item = ir::Block> + use<> {
1267        let start = self.block_offset.expect(
1268            "must create inlined `ir::Block`s before calling `EntityMap::iter_inlined_blocks`",
1269        );
1270
1271        let end = func.dfg.blocks.len();
1272        let end = u32::try_from(end).unwrap();
1273
1274        (start..end).map(|i| ir::Block::from_u32(i))
1275    }
1276
1277    fn inlined_global_value(&self, callee_global_value: ir::GlobalValue) -> ir::GlobalValue {
1278        let offset = self
1279            .global_value_offset
1280            .expect("must create inlined `ir::GlobalValue`s before calling `EntityMap::inlined_global_value`");
1281        ir::GlobalValue::from_u32(offset + callee_global_value.as_u32())
1282    }
1283
1284    fn inlined_sig_ref(&self, callee_sig_ref: ir::SigRef) -> ir::SigRef {
1285        let offset = self.sig_ref_offset.expect(
1286            "must create inlined `ir::SigRef`s before calling `EntityMap::inlined_sig_ref`",
1287        );
1288        ir::SigRef::from_u32(offset + callee_sig_ref.as_u32())
1289    }
1290
1291    fn inlined_func_ref(&self, callee_func_ref: ir::FuncRef) -> ir::FuncRef {
1292        let offset = self.func_ref_offset.expect(
1293            "must create inlined `ir::FuncRef`s before calling `EntityMap::inlined_func_ref`",
1294        );
1295        ir::FuncRef::from_u32(offset + callee_func_ref.as_u32())
1296    }
1297
1298    fn inlined_stack_slot(&self, callee_stack_slot: ir::StackSlot) -> ir::StackSlot {
1299        let offset = self.stack_slot_offset.expect(
1300            "must create inlined `ir::StackSlot`s before calling `EntityMap::inlined_stack_slot`",
1301        );
1302        ir::StackSlot::from_u32(offset + callee_stack_slot.as_u32())
1303    }
1304
1305    fn inlined_dynamic_type(&self, callee_dynamic_type: ir::DynamicType) -> ir::DynamicType {
1306        let offset = self.dynamic_type_offset.expect(
1307            "must create inlined `ir::DynamicType`s before calling `EntityMap::inlined_dynamic_type`",
1308        );
1309        ir::DynamicType::from_u32(offset + callee_dynamic_type.as_u32())
1310    }
1311
1312    fn inlined_dynamic_stack_slot(
1313        &self,
1314        callee_dynamic_stack_slot: ir::DynamicStackSlot,
1315    ) -> ir::DynamicStackSlot {
1316        let offset = self.dynamic_stack_slot_offset.expect(
1317            "must create inlined `ir::DynamicStackSlot`s before calling `EntityMap::inlined_dynamic_stack_slot`",
1318        );
1319        ir::DynamicStackSlot::from_u32(offset + callee_dynamic_stack_slot.as_u32())
1320    }
1321
1322    fn inlined_immediate(&self, callee_immediate: ir::Immediate) -> ir::Immediate {
1323        let offset = self.immediate_offset.expect(
1324            "must create inlined `ir::Immediate`s before calling `EntityMap::inlined_immediate`",
1325        );
1326        ir::Immediate::from_u32(offset + callee_immediate.as_u32())
1327    }
1328}
1329
1330/// Translate all of the callee's various entities into the caller, producing an
1331/// `EntityMap` that can be used to translate callee entity references into
1332/// inlined caller entity references.
1333fn create_entities(
1334    allocs: &mut InliningAllocs,
1335    func: &mut ir::Function,
1336    callee: &ir::Function,
1337) -> EntityMap {
1338    let mut entity_map = EntityMap::default();
1339
1340    entity_map.block_offset = Some(create_blocks(allocs, func, callee));
1341    entity_map.global_value_offset = Some(create_global_values(func, callee));
1342    entity_map.sig_ref_offset = Some(create_sig_refs(func, callee));
1343    create_user_external_name_refs(allocs, func, callee);
1344    entity_map.func_ref_offset = Some(create_func_refs(allocs, func, callee, &entity_map));
1345    entity_map.stack_slot_offset = Some(create_stack_slots(func, callee));
1346    entity_map.dynamic_type_offset = Some(create_dynamic_types(func, callee, &entity_map));
1347    entity_map.dynamic_stack_slot_offset =
1348        Some(create_dynamic_stack_slots(func, callee, &entity_map));
1349    entity_map.immediate_offset = Some(create_immediates(func, callee));
1350
1351    // `ir::ConstantData` is deduplicated, so we cannot use our offset scheme
1352    // for `ir::Constant`s. Nonetheless, we still insert them into the caller
1353    // now, at the same time as the rest of our entities.
1354    create_constants(allocs, func, callee);
1355
1356    entity_map
1357}
1358
1359/// Create inlined blocks in the caller for every block in the callee.
1360fn create_blocks(
1361    allocs: &mut InliningAllocs,
1362    func: &mut ir::Function,
1363    callee: &ir::Function,
1364) -> u32 {
1365    let offset = func.dfg.blocks.len();
1366    let offset = u32::try_from(offset).unwrap();
1367
1368    func.dfg.blocks.reserve(callee.dfg.blocks.len());
1369    for callee_block in callee.dfg.blocks.iter() {
1370        let caller_block = func.dfg.blocks.add();
1371        trace!("Callee {callee_block:?} = inlined {caller_block:?}");
1372
1373        if callee.layout.is_cold(callee_block) {
1374            func.layout.set_cold(caller_block);
1375        }
1376
1377        // Note: the entry block does not need parameters because the only
1378        // predecessor is the call block and we associate the callee's
1379        // parameters with the caller's arguments directly.
1380        if callee.layout.entry_block() != Some(callee_block) {
1381            for callee_param in callee.dfg.blocks[callee_block].params(&callee.dfg.value_lists) {
1382                let ty = callee.dfg.value_type(*callee_param);
1383                let caller_param = func.dfg.append_block_param(caller_block, ty);
1384
1385                allocs.set_inlined_value(callee, *callee_param, caller_param);
1386            }
1387        }
1388    }
1389
1390    offset
1391}
1392
1393/// Copy and translate global values from the callee into the caller.
1394fn create_global_values(func: &mut ir::Function, callee: &ir::Function) -> u32 {
1395    let gv_offset = func.global_values.len();
1396    let gv_offset = u32::try_from(gv_offset).unwrap();
1397
1398    func.global_values.reserve(callee.global_values.len());
1399    for gv in callee.global_values.values() {
1400        func.global_values.push(match gv {
1401            // These kinds of global values reference other global values, so we
1402            // need to fixup that reference.
1403            ir::GlobalValueData::Load {
1404                base,
1405                offset,
1406                global_type,
1407                flags,
1408            } => ir::GlobalValueData::Load {
1409                base: ir::GlobalValue::from_u32(base.as_u32() + gv_offset),
1410                offset: *offset,
1411                global_type: *global_type,
1412                flags: *flags,
1413            },
1414            ir::GlobalValueData::IAddImm {
1415                base,
1416                offset,
1417                global_type,
1418            } => ir::GlobalValueData::IAddImm {
1419                base: ir::GlobalValue::from_u32(base.as_u32() + gv_offset),
1420                offset: *offset,
1421                global_type: *global_type,
1422            },
1423
1424            // These kinds of global values do not reference other global
1425            // values, so we can just clone them.
1426            ir::GlobalValueData::VMContext
1427            | ir::GlobalValueData::Symbol { .. }
1428            | ir::GlobalValueData::DynScaleTargetConst { .. } => gv.clone(),
1429        });
1430    }
1431
1432    gv_offset
1433}
1434
1435/// Copy `ir::SigRef`s from the callee into the caller.
1436fn create_sig_refs(func: &mut ir::Function, callee: &ir::Function) -> u32 {
1437    let offset = func.dfg.signatures.len();
1438    let offset = u32::try_from(offset).unwrap();
1439
1440    func.dfg.signatures.reserve(callee.dfg.signatures.len());
1441    for sig in callee.dfg.signatures.values() {
1442        func.dfg.signatures.push(sig.clone());
1443    }
1444
1445    offset
1446}
1447
1448fn create_user_external_name_refs(
1449    allocs: &mut InliningAllocs,
1450    func: &mut ir::Function,
1451    callee: &ir::Function,
1452) {
1453    for (callee_named_func_ref, name) in callee.params.user_named_funcs().iter() {
1454        let caller_named_func_ref = func.declare_imported_user_function(name.clone());
1455        allocs.user_external_name_refs[callee_named_func_ref] = Some(caller_named_func_ref).into();
1456    }
1457}
1458
1459/// Translate `ir::FuncRef`s from the callee into the caller.
1460fn create_func_refs(
1461    allocs: &InliningAllocs,
1462    func: &mut ir::Function,
1463    callee: &ir::Function,
1464    entity_map: &EntityMap,
1465) -> u32 {
1466    let offset = func.dfg.ext_funcs.len();
1467    let offset = u32::try_from(offset).unwrap();
1468
1469    func.dfg.ext_funcs.reserve(callee.dfg.ext_funcs.len());
1470    for ir::ExtFuncData {
1471        name,
1472        signature,
1473        colocated,
1474    } in callee.dfg.ext_funcs.values()
1475    {
1476        func.dfg.ext_funcs.push(ir::ExtFuncData {
1477            name: match name {
1478                ir::ExternalName::User(name_ref) => {
1479                    ir::ExternalName::User(allocs.user_external_name_refs[*name_ref].expect(
1480                        "should have translated all `ir::UserExternalNameRef`s before translating \
1481                         `ir::FuncRef`s",
1482                    ))
1483                }
1484                ir::ExternalName::TestCase(_)
1485                | ir::ExternalName::LibCall(_)
1486                | ir::ExternalName::KnownSymbol(_) => name.clone(),
1487            },
1488            signature: entity_map.inlined_sig_ref(*signature),
1489            colocated: *colocated,
1490        });
1491    }
1492
1493    offset
1494}
1495
1496/// Copy stack slots from the callee into the caller.
1497fn create_stack_slots(func: &mut ir::Function, callee: &ir::Function) -> u32 {
1498    let offset = func.sized_stack_slots.len();
1499    let offset = u32::try_from(offset).unwrap();
1500
1501    func.sized_stack_slots
1502        .reserve(callee.sized_stack_slots.len());
1503    for slot in callee.sized_stack_slots.values() {
1504        func.sized_stack_slots.push(slot.clone());
1505    }
1506
1507    offset
1508}
1509
1510/// Copy dynamic types from the callee into the caller.
1511fn create_dynamic_types(
1512    func: &mut ir::Function,
1513    callee: &ir::Function,
1514    entity_map: &EntityMap,
1515) -> u32 {
1516    let offset = func.dynamic_stack_slots.len();
1517    let offset = u32::try_from(offset).unwrap();
1518
1519    func.dfg
1520        .dynamic_types
1521        .reserve(callee.dfg.dynamic_types.len());
1522    for ir::DynamicTypeData {
1523        base_vector_ty,
1524        dynamic_scale,
1525    } in callee.dfg.dynamic_types.values()
1526    {
1527        func.dfg.dynamic_types.push(ir::DynamicTypeData {
1528            base_vector_ty: *base_vector_ty,
1529            dynamic_scale: entity_map.inlined_global_value(*dynamic_scale),
1530        });
1531    }
1532
1533    offset
1534}
1535
1536/// Copy dynamic stack slots from the callee into the caller.
1537fn create_dynamic_stack_slots(
1538    func: &mut ir::Function,
1539    callee: &ir::Function,
1540    entity_map: &EntityMap,
1541) -> u32 {
1542    let offset = func.dynamic_stack_slots.len();
1543    let offset = u32::try_from(offset).unwrap();
1544
1545    func.dynamic_stack_slots
1546        .reserve(callee.dynamic_stack_slots.len());
1547    for ir::DynamicStackSlotData { kind, dyn_ty } in callee.dynamic_stack_slots.values() {
1548        func.dynamic_stack_slots.push(ir::DynamicStackSlotData {
1549            kind: *kind,
1550            dyn_ty: entity_map.inlined_dynamic_type(*dyn_ty),
1551        });
1552    }
1553
1554    offset
1555}
1556
1557/// Copy immediates from the callee into the caller.
1558fn create_immediates(func: &mut ir::Function, callee: &ir::Function) -> u32 {
1559    let offset = func.dfg.immediates.len();
1560    let offset = u32::try_from(offset).unwrap();
1561
1562    func.dfg.immediates.reserve(callee.dfg.immediates.len());
1563    for imm in callee.dfg.immediates.values() {
1564        func.dfg.immediates.push(imm.clone());
1565    }
1566
1567    offset
1568}
1569
1570/// Copy constants from the callee into the caller.
1571fn create_constants(allocs: &mut InliningAllocs, func: &mut ir::Function, callee: &ir::Function) {
1572    for (callee_constant, data) in callee.dfg.constants.iter() {
1573        let inlined_constant = func.dfg.constants.insert(data.clone());
1574        allocs.constants[*callee_constant] = Some(inlined_constant).into();
1575    }
1576}