Skip to main content

cranelift_codegen/
inline.rs

1//! Function inlining infrastructure.
2//!
3//! This module provides "inlining as a library" to Cranelift users; it does
4//! _not_ provide a complete, off-the-shelf inlining solution. Cranelift's
5//! compilation context is per-function and does not encompass the full call
6//! graph. It does not know which functions are hot and which are cold, which
7//! have been marked the equivalent of `#[inline(never)]`, etc... Only the
8//! Cranelift user can understand these aspects of the full compilation
9//! pipeline, and these things can be very different between (say) Wasmtime and
10//! `cg_clif`. Therefore, this module does not attempt to define heuristics for
11//! when inlining a particular call is likely beneficial. This module only
12//! provides hooks for the Cranelift user to define whether a given call should
13//! be inlined or not, and the mechanics to inline a callee into a particular
14//! call site when directed to do so by the Cranelift user.
15//!
16//! The top-level inlining entry point during Cranelift compilation is
17//! [`Context::inline`][crate::Context::inline]. It takes an [`Inline`] trait
18//! implementation, which is authored by the Cranelift user and directs
19//! Cranelift whether to inline a particular call, and, when inlining, gives
20//! Cranelift the body of the callee that is to be inlined.
21
22use crate::cursor::{Cursor as _, FuncCursor};
23use crate::ir::{self, DebugTag, ExceptionTableData, ExceptionTableItem, InstBuilder as _};
24use crate::result::CodegenResult;
25use crate::trace;
26use crate::traversals::Dfs;
27use alloc::borrow::Cow;
28use alloc::vec::Vec;
29use cranelift_entity::{SecondaryMap, packed_option::PackedOption};
30use smallvec::SmallVec;
31
32type SmallValueVec = SmallVec<[ir::Value; 8]>;
33type SmallBlockArgVec = SmallVec<[ir::BlockArg; 8]>;
34type SmallBlockCallVec = SmallVec<[ir::BlockCall; 8]>;
35
36/// A command directing Cranelift whether or not to inline a particular call.
37pub enum InlineCommand<'a> {
38    /// Keep the call as-is, out-of-line, and do not inline the callee.
39    KeepCall,
40
41    /// Inline the call, using this function as the body of the callee.
42    ///
43    /// It is the `Inline` implementor's responsibility to ensure that this
44    /// function is the correct callee. Providing the wrong function may result
45    /// in panics during compilation or incorrect runtime behavior.
46    Inline {
47        /// The callee function's body.
48        callee: Cow<'a, ir::Function>,
49        /// Whether to visit any function calls within the callee body after
50        /// inlining and consider them for further inlining.
51        visit_callee: bool,
52    },
53}
54
55/// A trait for directing Cranelift whether to inline a particular call or not.
56///
57/// Used in combination with the [`Context::inline`][crate::Context::inline]
58/// method.
59pub trait Inline {
60    /// A hook invoked for each direct call instruction in a function, whose
61    /// result determines whether Cranelift should inline a given call.
62    ///
63    /// The Cranelift user is responsible for defining their own heuristics and
64    /// deciding whether inlining the call is beneficial.
65    ///
66    /// When returning a function and directing Cranelift to inline its body
67    /// into the call site, the `Inline` implementer must ensure the following:
68    ///
69    /// * The returned function's signature exactly matches the `callee`
70    ///   `FuncRef`'s signature.
71    ///
72    /// * The returned function must be legalized.
73    ///
74    /// * The returned function must be valid (i.e. it must pass the CLIF
75    ///   verifier).
76    ///
77    /// * The returned function is a correct and valid implementation of the
78    ///   `callee` according to your language's semantics.
79    ///
80    /// Failure to uphold these invariants may result in panics during
81    /// compilation or incorrect runtime behavior in the generated code.
82    fn inline(
83        &mut self,
84        caller: &ir::Function,
85        call_inst: ir::Inst,
86        call_opcode: ir::Opcode,
87        callee: ir::FuncRef,
88        call_args: &[ir::Value],
89    ) -> InlineCommand<'_>;
90}
91
92impl<'a, T> Inline for &'a mut T
93where
94    T: Inline,
95{
96    fn inline(
97        &mut self,
98        caller: &ir::Function,
99        inst: ir::Inst,
100        opcode: ir::Opcode,
101        callee: ir::FuncRef,
102        args: &[ir::Value],
103    ) -> InlineCommand<'_> {
104        (*self).inline(caller, inst, opcode, callee, args)
105    }
106}
107
108/// Walk the given function, invoke the `Inline` implementation for each call
109/// instruction, and inline the callee when directed to do so.
110///
111/// Returns whether any call was inlined.
112pub(crate) fn do_inlining(
113    func: &mut ir::Function,
114    mut inliner: impl Inline,
115) -> CodegenResult<bool> {
116    trace!("function {} before inlining: {}", func.name, func);
117
118    let mut inlined_any = false;
119    let mut allocs = InliningAllocs::default();
120
121    let mut cursor = FuncCursor::new(func);
122    'block_loop: while let Some(block) = cursor.next_block() {
123        // Always keep track of our previous cursor position. Assuming that the
124        // current position is a function call that we will inline, then the
125        // previous position is just before the inlined callee function. After
126        // inlining a call, the Cranelift user can decide whether to consider
127        // any function calls in the inlined callee for further inlining or
128        // not. When they do, then we back up to this previous cursor position
129        // so that our traversal will then continue over the inlined body.
130        let mut prev_pos;
131
132        while let Some(inst) = {
133            prev_pos = cursor.position();
134            cursor.next_inst()
135        } {
136            // Make sure that `block` is always `inst`'s block, even with all of
137            // our cursor-position-updating and block-splitting-during-inlining
138            // shenanigans below.
139            debug_assert_eq!(Some(block), cursor.func.layout.inst_block(inst));
140
141            match cursor.func.dfg.insts[inst] {
142                ir::InstructionData::Call { func_ref, .. }
143                    if cursor.func.dfg.ext_funcs[func_ref].patchable =>
144                {
145                    // Can't inline patchable calls; they need to
146                    // remain patchable and inlining the whole body is
147                    // decidedly *not* patchable!
148                }
149
150                ir::InstructionData::Call {
151                    opcode: opcode @ ir::Opcode::Call | opcode @ ir::Opcode::ReturnCall,
152                    args: _,
153                    func_ref,
154                } => {
155                    trace!(
156                        "considering call site for inlining: {inst}: {}",
157                        cursor.func.dfg.display_inst(inst),
158                    );
159                    let args = cursor.func.dfg.inst_args(inst);
160                    match inliner.inline(&cursor.func, inst, opcode, func_ref, args) {
161                        InlineCommand::KeepCall => {
162                            trace!("  --> keeping call");
163                        }
164                        InlineCommand::Inline {
165                            callee,
166                            visit_callee,
167                        } => {
168                            let last_inlined_block = inline_one(
169                                &mut allocs,
170                                cursor.func,
171                                func_ref,
172                                block,
173                                inst,
174                                opcode,
175                                &callee,
176                                None,
177                            )?;
178                            inlined_any = true;
179                            if visit_callee {
180                                cursor.set_position(prev_pos);
181                            } else {
182                                // Arrange it so that the `next_block()` loop
183                                // will continue to the next block that is not
184                                // associated with the just-inlined callee.
185                                cursor.goto_bottom(last_inlined_block);
186                                continue 'block_loop;
187                            }
188                        }
189                    }
190                }
191                ir::InstructionData::TryCall {
192                    opcode: opcode @ ir::Opcode::TryCall,
193                    args: _,
194                    func_ref,
195                    exception,
196                } => {
197                    trace!(
198                        "considering call site for inlining: {inst}: {}",
199                        cursor.func.dfg.display_inst(inst),
200                    );
201                    let args = cursor.func.dfg.inst_args(inst);
202                    match inliner.inline(&cursor.func, inst, opcode, func_ref, args) {
203                        InlineCommand::KeepCall => {
204                            trace!("  --> keeping call");
205                        }
206                        InlineCommand::Inline {
207                            callee,
208                            visit_callee,
209                        } => {
210                            let last_inlined_block = inline_one(
211                                &mut allocs,
212                                cursor.func,
213                                func_ref,
214                                block,
215                                inst,
216                                opcode,
217                                &callee,
218                                Some(exception),
219                            )?;
220                            inlined_any = true;
221                            if visit_callee {
222                                cursor.set_position(prev_pos);
223                            } else {
224                                // Arrange it so that the `next_block()` loop
225                                // will continue to the next block that is not
226                                // associated with the just-inlined callee.
227                                cursor.goto_bottom(last_inlined_block);
228                                continue 'block_loop;
229                            }
230                        }
231                    }
232                }
233                ir::InstructionData::CallIndirect { .. }
234                | ir::InstructionData::TryCallIndirect { .. } => {
235                    // Can't inline indirect calls; need to have some earlier
236                    // pass rewrite them into direct calls first, when possible.
237                }
238                _ => {
239                    debug_assert!(
240                        !cursor.func.dfg.insts[inst].opcode().is_call(),
241                        "should have matched all call instructions, but found: {inst}: {}",
242                        cursor.func.dfg.display_inst(inst),
243                    );
244                }
245            }
246        }
247    }
248
249    if inlined_any {
250        trace!("function {} after inlining: {}", func.name, func);
251    } else {
252        trace!("function {} did not have any callees inlined", func.name);
253    }
254
255    Ok(inlined_any)
256}
257
258#[derive(Default)]
259struct InliningAllocs {
260    /// Map from callee value to inlined caller value.
261    values: SecondaryMap<ir::Value, PackedOption<ir::Value>>,
262
263    /// Map from callee constant to inlined caller constant.
264    ///
265    /// Not in `EntityMap` because these are hash-consed inside the
266    /// `ir::Function`.
267    constants: SecondaryMap<ir::Constant, PackedOption<ir::Constant>>,
268
269    /// Map from callee to inlined caller external name refs.
270    ///
271    /// Not in `EntityMap` because these are hash-consed inside the
272    /// `ir::Function`.
273    user_external_name_refs:
274        SecondaryMap<ir::UserExternalNameRef, PackedOption<ir::UserExternalNameRef>>,
275
276    /// The set of _caller_ inlined call instructions that need exception table
277    /// fixups at the end of inlining.
278    ///
279    /// This includes all kinds of non-returning calls, not just the literal
280    /// `call` instruction: `call_indirect`, `try_call`, `try_call_indirect`,
281    /// etc... However, it does not include `return_call` and
282    /// `return_call_indirect` instructions because the caller cannot catch
283    /// exceptions that those calls throw because the caller is no longer on the
284    /// stack as soon as they are executed.
285    ///
286    /// Note: this is a simple `Vec`, and not an `EntitySet`, because it is very
287    /// sparse: most of the caller's instructions are not inlined call
288    /// instructions. Additionally, we require deterministic iteration order and
289    /// do not require set-membership testing, so a hash set is not a good
290    /// choice either.
291    calls_needing_exception_table_fixup: Vec<ir::Inst>,
292}
293
294impl InliningAllocs {
295    fn reset(&mut self, callee: &ir::Function) {
296        let InliningAllocs {
297            values,
298            constants,
299            user_external_name_refs,
300            calls_needing_exception_table_fixup,
301        } = self;
302
303        values.clear();
304        values.resize(callee.dfg.len_values());
305
306        constants.clear();
307        constants.resize(callee.dfg.constants.len());
308
309        user_external_name_refs.clear();
310        user_external_name_refs.resize(callee.params.user_named_funcs().len());
311
312        // Note: We do not reserve capacity for
313        // `calls_needing_exception_table_fixup` because it is a sparse set and
314        // we don't know how large it needs to be ahead of time.
315        calls_needing_exception_table_fixup.clear();
316    }
317
318    fn set_inlined_value(
319        &mut self,
320        callee: &ir::Function,
321        callee_val: ir::Value,
322        inlined_val: ir::Value,
323    ) {
324        trace!("  --> callee {callee_val:?} = inlined {inlined_val:?}");
325        debug_assert!(self.values[callee_val].is_none());
326        let resolved_callee_val = callee.dfg.resolve_aliases(callee_val);
327        debug_assert!(self.values[resolved_callee_val].is_none());
328        self.values[resolved_callee_val] = Some(inlined_val).into();
329    }
330
331    fn get_inlined_value(&self, callee: &ir::Function, callee_val: ir::Value) -> Option<ir::Value> {
332        let resolved_callee_val = callee.dfg.resolve_aliases(callee_val);
333        self.values[resolved_callee_val].expand()
334    }
335}
336
337/// Inline one particular function call.
338///
339/// Returns the last inlined block in the layout.
340fn inline_one(
341    allocs: &mut InliningAllocs,
342    func: &mut ir::Function,
343    callee_func_ref: ir::FuncRef,
344    call_block: ir::Block,
345    call_inst: ir::Inst,
346    call_opcode: ir::Opcode,
347    callee: &ir::Function,
348    call_exception_table: Option<ir::ExceptionTable>,
349) -> CodegenResult<ir::Block> {
350    trace!(
351        "Inlining call {call_inst:?}: {}\n\
352         with callee = {callee:?}",
353        func.dfg.display_inst(call_inst)
354    );
355
356    // Type check callee signature.
357    let expected_callee_sig = func.dfg.ext_funcs[callee_func_ref].signature;
358    let expected_callee_sig = &func.dfg.signatures[expected_callee_sig];
359    assert_eq!(expected_callee_sig, &callee.signature);
360
361    allocs.reset(callee);
362
363    // First, append various callee entity arenas to the end of the caller's
364    // entity arenas.
365    let entity_map = create_entities(allocs, func, callee)?;
366
367    // Inlined prologue: split the call instruction's block at the point of the
368    // call and replace the call with a jump.
369    let return_block = split_off_return_block(func, call_inst, call_opcode, callee);
370    let call_stack_map = replace_call_with_jump(allocs, func, call_inst, callee, &entity_map);
371
372    // Prepare for translating the actual instructions by inserting the inlined
373    // blocks into the caller's layout in the same order that they appear in the
374    // callee.
375    let mut last_inlined_block = inline_block_layout(func, call_block, callee, &entity_map);
376
377    // Get a copy of debug tags on the call instruction; these are
378    // prepended to debug tags on inlined instructions. Remove them
379    // from the call itself as it will be rewritten to a jump (which
380    // cannot have tags).
381    let call_debug_tags = func.debug_tags.get(call_inst).to_vec();
382    func.debug_tags.set(call_inst, []);
383
384    // Translate each instruction from the callee into the caller,
385    // appending them to their associated block in the caller.
386    //
387    // Note that we iterate over the callee with a pre-order traversal so that
388    // we see value defs before uses.
389    for callee_block in Dfs::new().pre_order_iter(callee) {
390        let inlined_block = entity_map.inlined_block(callee_block);
391        trace!(
392            "Processing instructions in callee block {callee_block:?} (inlined block {inlined_block:?}"
393        );
394
395        let mut next_callee_inst = callee.layout.first_inst(callee_block);
396        while let Some(callee_inst) = next_callee_inst {
397            trace!(
398                "Processing callee instruction {callee_inst:?}: {}",
399                callee.dfg.display_inst(callee_inst)
400            );
401
402            assert_ne!(
403                callee.dfg.insts[callee_inst].opcode(),
404                ir::Opcode::GlobalValue,
405                "callee must already be legalized, we shouldn't see any `global_value` \
406                 instructions when inlining; found {callee_inst:?}: {}",
407                callee.dfg.display_inst(callee_inst)
408            );
409
410            // Remap the callee instruction's entities and insert it into the
411            // caller's DFG.
412            let mut inst_remapper = InliningInstRemapper {
413                allocs: &allocs,
414                func,
415                callee,
416                entity_map: &entity_map,
417                error: None,
418            };
419            let inlined_inst_data = callee.dfg.insts[callee_inst].map(&mut inst_remapper);
420            if let Some(err) = inst_remapper.error.take() {
421                return Err(err);
422            }
423            let inlined_inst = func.dfg.make_inst(inlined_inst_data);
424            func.layout.append_inst(inlined_inst, inlined_block);
425
426            // Copy over debug tags, translating referenced entities
427            // as appropriate.
428            let debug_tags = callee.debug_tags.get(callee_inst);
429            // If there are tags on the inlined instruction, we always
430            // add tags, and we prepend any tags from the call
431            // instruction; but we don't add tags if only the callsite
432            // had them (this would otherwise mean that every single
433            // instruction in an inlined function body would get
434            // tags).
435            if !debug_tags.is_empty() {
436                let tags = call_debug_tags
437                    .iter()
438                    .cloned()
439                    .chain(debug_tags.iter().map(|tag| match *tag {
440                        DebugTag::User(value) => DebugTag::User(value),
441                        DebugTag::StackSlot(slot) => {
442                            DebugTag::StackSlot(entity_map.inlined_stack_slot(slot))
443                        }
444                    }))
445                    .collect::<SmallVec<[_; 4]>>();
446                func.debug_tags.set(inlined_inst, tags);
447            }
448
449            let opcode = callee.dfg.insts[callee_inst].opcode();
450            if opcode.is_return() {
451                // Instructions that return do not define any values, so we
452                // don't need to worry about that, but we do need to fix them up
453                // so that they return by jumping to our control-flow join
454                // block, rather than returning from the caller.
455                if let Some(return_block) = return_block {
456                    fixup_inst_that_returns(
457                        allocs,
458                        func,
459                        callee,
460                        &entity_map,
461                        call_opcode,
462                        inlined_inst,
463                        callee_inst,
464                        return_block,
465                        call_stack_map.as_ref().map(|es| &**es),
466                    );
467                } else {
468                    // If we are inlining a callee that was invoked via
469                    // `return_call`, we leave inlined return instructions
470                    // as-is: there is no logical caller frame on the stack to
471                    // continue to.
472                    debug_assert_eq!(call_opcode, ir::Opcode::ReturnCall);
473                }
474            } else {
475                // Make the instruction's result values.
476                let ctrl_typevar = callee.dfg.ctrl_typevar(callee_inst);
477                func.dfg.make_inst_results(inlined_inst, ctrl_typevar);
478
479                // Update the value map for this instruction's defs.
480                let callee_results = callee.dfg.inst_results(callee_inst);
481                let inlined_results = func.dfg.inst_results(inlined_inst);
482                debug_assert_eq!(callee_results.len(), inlined_results.len());
483                for (callee_val, inlined_val) in callee_results.iter().zip(inlined_results) {
484                    allocs.set_inlined_value(callee, *callee_val, *inlined_val);
485                }
486
487                if opcode.is_call() {
488                    append_stack_map_entries(
489                        func,
490                        callee,
491                        &entity_map,
492                        call_stack_map.as_deref(),
493                        inlined_inst,
494                        callee_inst,
495                    );
496
497                    // When we are inlining a `try_call` call site, we need to merge
498                    // the call site's exception table into the inlined calls'
499                    // exception tables. This can involve rewriting regular `call`s
500                    // into `try_call`s, which requires mutating the CFG because
501                    // `try_call` is a block terminator. However, we can't mutate
502                    // the CFG in the middle of this traversal because we rely on
503                    // the existence of a one-to-one mapping between the callee
504                    // layout and the inlined layout. Instead, we record the set of
505                    // inlined call instructions that will need fixing up, and
506                    // perform that possibly-CFG-mutating exception table merging in
507                    // a follow up pass, when we no longer rely on that one-to-one
508                    // layout mapping.
509                    debug_assert_eq!(
510                        call_opcode == ir::Opcode::TryCall,
511                        call_exception_table.is_some()
512                    );
513                    if call_opcode == ir::Opcode::TryCall {
514                        allocs
515                            .calls_needing_exception_table_fixup
516                            .push(inlined_inst);
517                    }
518                }
519            }
520
521            trace!(
522                "  --> inserted inlined instruction {inlined_inst:?}: {}",
523                func.dfg.display_inst(inlined_inst)
524            );
525
526            next_callee_inst = callee.layout.next_inst(callee_inst);
527        }
528    }
529
530    // We copied *all* callee blocks into the caller's layout, but only copied
531    // the callee instructions in *reachable* callee blocks into the caller's
532    // associated blocks. Therefore, any *unreachable* blocks are empty in the
533    // caller, which is invalid CLIF because all blocks must end in a
534    // terminator, so do a quick pass over the inlined blocks and remove any
535    // empty blocks from the caller's layout.
536    for block in entity_map.iter_inlined_blocks(func) {
537        if func.layout.is_block_inserted(block) && func.layout.first_inst(block).is_none() {
538            log::trace!("removing unreachable inlined block from layout: {block}");
539
540            // If the block being removed is our last-inlined block, then back
541            // it up to the previous block in the layout, which will be the new
542            // last-inlined block after this one's removal.
543            if block == last_inlined_block {
544                last_inlined_block = func.layout.prev_block(last_inlined_block).expect(
545                    "there will always at least be the block that contained the call we are \
546                     inlining",
547                );
548            }
549
550            func.layout.remove_block(block);
551        }
552    }
553
554    // Final step: fixup the exception tables of any inlined calls when we are
555    // inlining a `try_call` site.
556    //
557    // Subtly, this requires rewriting non-catching `call[_indirect]`
558    // instructions into `try_call[_indirect]` instructions so that exceptions
559    // that unwound through the original callee frame and were caught by the
560    // caller's `try_call` do not unwind past this inlined frame. And turning a
561    // `call` into a `try_call` mutates the CFG, breaking our one-to-one mapping
562    // between callee blocks and inlined blocks, so we delay these fixups to
563    // this final step, when we no longer rely on that mapping.
564    debug_assert!(
565        allocs.calls_needing_exception_table_fixup.is_empty() || call_exception_table.is_some()
566    );
567    debug_assert_eq!(
568        call_opcode == ir::Opcode::TryCall,
569        call_exception_table.is_some()
570    );
571    if let Some(call_exception_table) = call_exception_table {
572        fixup_inlined_call_exception_tables(allocs, func, call_exception_table);
573    }
574
575    debug_assert!(
576        func.layout.is_block_inserted(last_inlined_block),
577        "last_inlined_block={last_inlined_block} should be inserted in the layout"
578    );
579    Ok(last_inlined_block)
580}
581
582/// Append stack map entries from the caller and callee to the given inlined
583/// instruction.
584fn append_stack_map_entries(
585    func: &mut ir::Function,
586    callee: &ir::Function,
587    entity_map: &EntityMap,
588    call_stack_map: Option<&[ir::UserStackMapEntry]>,
589    inlined_inst: ir::Inst,
590    callee_inst: ir::Inst,
591) {
592    // Add the caller's stack map to this call. These entries
593    // already refer to caller entities and do not need further
594    // translation.
595    func.dfg.append_user_stack_map_entries(
596        inlined_inst,
597        call_stack_map
598            .iter()
599            .flat_map(|entries| entries.iter().cloned()),
600    );
601
602    // Append the callee's stack map to this call. These entries
603    // refer to callee entities and therefore do require
604    // translation into the caller's index space.
605    func.dfg.append_user_stack_map_entries(
606        inlined_inst,
607        callee
608            .dfg
609            .user_stack_map_entries(callee_inst)
610            .iter()
611            .flat_map(|entries| entries.iter())
612            .map(|entry| ir::UserStackMapEntry {
613                ty: entry.ty,
614                slot: entity_map.inlined_stack_slot(entry.slot),
615                offset: entry.offset,
616            }),
617    );
618}
619
620/// Create or update the exception tables for any inlined call instructions:
621/// when inlining at a `try_call` site, we must forward our exceptional edges
622/// into each inlined call instruction.
623fn fixup_inlined_call_exception_tables(
624    allocs: &mut InliningAllocs,
625    func: &mut ir::Function,
626    call_exception_table: ir::ExceptionTable,
627) {
628    // Split a block at a `call[_indirect]` instruction, detach the
629    // instruction's results, and alias them to the new block's parameters.
630    let split_block_for_new_try_call = |func: &mut ir::Function, inst: ir::Inst| -> ir::Block {
631        debug_assert!(func.dfg.insts[inst].opcode().is_call());
632        debug_assert!(!func.dfg.insts[inst].opcode().is_terminator());
633
634        // Split the block.
635        let next_inst = func
636            .layout
637            .next_inst(inst)
638            .expect("inst is not a terminator, should have a successor");
639        let new_block = func.dfg.blocks.add();
640        func.layout.split_block(new_block, next_inst);
641
642        // `try_call[_indirect]` instructions do not define values themselves;
643        // the normal-return block has parameters for the results. So remove
644        // this instruction's results, create an associated block parameter for
645        // each of them, and alias them to the new block parameter.
646        let old_results = SmallValueVec::from_iter(func.dfg.inst_results(inst).iter().copied());
647        func.dfg.detach_inst_results(inst);
648        for old_result in old_results {
649            let ty = func.dfg.value_type(old_result);
650            let new_block_param = func.dfg.append_block_param(new_block, ty);
651            func.dfg.change_to_alias(old_result, new_block_param);
652        }
653
654        new_block
655    };
656
657    // Clone the caller's exception table, updating it for use in the current
658    // `call[_indirect]` instruction as it becomes a `try_call[_indirect]`.
659    let clone_exception_table_for_this_call = |func: &mut ir::Function,
660                                               signature: ir::SigRef,
661                                               new_block: ir::Block|
662     -> ir::ExceptionTable {
663        let mut exception = func.stencil.dfg.exception_tables[call_exception_table]
664            .deep_clone(&mut func.stencil.dfg.value_lists);
665
666        *exception.signature_mut() = signature;
667
668        let returns_len = func.dfg.signatures[signature].returns.len();
669        let returns_len = u32::try_from(returns_len).unwrap();
670
671        *exception.normal_return_mut() = ir::BlockCall::new(
672            new_block,
673            (0..returns_len).map(|i| ir::BlockArg::TryCallRet(i)),
674            &mut func.dfg.value_lists,
675        );
676
677        func.dfg.exception_tables.push(exception)
678    };
679
680    for inst in allocs.calls_needing_exception_table_fixup.drain(..) {
681        debug_assert!(func.dfg.insts[inst].opcode().is_call());
682        debug_assert!(!func.dfg.insts[inst].opcode().is_return());
683        match func.dfg.insts[inst] {
684            //     current_block:
685            //         preds...
686            //         rets... = call f(args...)
687            //         succs...
688            //
689            // becomes
690            //
691            //     current_block:
692            //         preds...
693            //         try_call f(args...), new_block(rets...), [call_exception_table...]
694            //     new_block(rets...):
695            //         succs...
696            ir::InstructionData::Call {
697                opcode: ir::Opcode::Call,
698                args,
699                func_ref,
700            } => {
701                let new_block = split_block_for_new_try_call(func, inst);
702                let signature = func.dfg.ext_funcs[func_ref].signature;
703                let exception = clone_exception_table_for_this_call(func, signature, new_block);
704                func.dfg.insts[inst] = ir::InstructionData::TryCall {
705                    opcode: ir::Opcode::TryCall,
706                    args,
707                    func_ref,
708                    exception,
709                };
710            }
711
712            //     current_block:
713            //         preds...
714            //         rets... = call_indirect sig, val(args...)
715            //         succs...
716            //
717            // becomes
718            //
719            //     current_block:
720            //         preds...
721            //         try_call_indirect sig, val(args...), new_block(rets...), [call_exception_table...]
722            //     new_block(rets...):
723            //         succs...
724            ir::InstructionData::CallIndirect {
725                opcode: ir::Opcode::CallIndirect,
726                args,
727                sig_ref,
728            } => {
729                let new_block = split_block_for_new_try_call(func, inst);
730                let exception = clone_exception_table_for_this_call(func, sig_ref, new_block);
731                func.dfg.insts[inst] = ir::InstructionData::TryCallIndirect {
732                    opcode: ir::Opcode::TryCallIndirect,
733                    args,
734                    exception,
735                };
736            }
737
738            // For `try_call[_indirect]` instructions, we just need to merge the
739            // exception tables.
740            ir::InstructionData::TryCall {
741                opcode: ir::Opcode::TryCall,
742                exception,
743                ..
744            }
745            | ir::InstructionData::TryCallIndirect {
746                opcode: ir::Opcode::TryCallIndirect,
747                exception,
748                ..
749            } => {
750                // Construct a new exception table that consists of
751                // the inlined instruction's exception table match
752                // sequence, with the inlining site's exception table
753                // appended. This will ensure that the first-match
754                // semantics emulates the original behavior of
755                // matching in the inner frame first.
756                let sig = func.dfg.exception_tables[exception].signature();
757                let normal_return = *func.dfg.exception_tables[exception].normal_return();
758                let exception_data = ExceptionTableData::new(
759                    sig,
760                    normal_return,
761                    func.dfg.exception_tables[exception]
762                        .items()
763                        .chain(func.dfg.exception_tables[call_exception_table].items()),
764                )
765                .deep_clone(&mut func.dfg.value_lists);
766
767                func.dfg.exception_tables[exception] = exception_data;
768            }
769
770            otherwise => unreachable!("unknown non-return call instruction: {otherwise:?}"),
771        }
772    }
773}
774
775/// After having created an inlined version of a callee instruction that returns
776/// in the caller, we need to fix it up so that it doesn't actually return
777/// (since we are already in the caller's frame) and instead just jumps to the
778/// control-flow join point.
779fn fixup_inst_that_returns(
780    allocs: &mut InliningAllocs,
781    func: &mut ir::Function,
782    callee: &ir::Function,
783    entity_map: &EntityMap,
784    call_opcode: ir::Opcode,
785    inlined_inst: ir::Inst,
786    callee_inst: ir::Inst,
787    return_block: ir::Block,
788    call_stack_map: Option<&[ir::UserStackMapEntry]>,
789) {
790    debug_assert!(func.dfg.insts[inlined_inst].opcode().is_return());
791    match func.dfg.insts[inlined_inst] {
792        //     return rets...
793        //
794        // becomes
795        //
796        //     jump return_block(rets...)
797        ir::InstructionData::MultiAry {
798            opcode: ir::Opcode::Return,
799            args,
800        } => {
801            let rets = SmallBlockArgVec::from_iter(
802                args.as_slice(&func.dfg.value_lists)
803                    .iter()
804                    .copied()
805                    .map(|v| v.into()),
806            );
807            func.replace(inlined_inst).jump(return_block, &rets);
808        }
809
810        //     return_call f(args...)
811        //
812        // becomes
813        //
814        //     rets... = call f(args...)
815        //     jump return_block(rets...)
816        ir::InstructionData::Call {
817            opcode: ir::Opcode::ReturnCall,
818            args,
819            func_ref,
820        } => {
821            func.dfg.insts[inlined_inst] = ir::InstructionData::Call {
822                opcode: ir::Opcode::Call,
823                args,
824                func_ref,
825            };
826            func.dfg.make_inst_results(inlined_inst, ir::types::INVALID);
827
828            append_stack_map_entries(
829                func,
830                callee,
831                &entity_map,
832                call_stack_map,
833                inlined_inst,
834                callee_inst,
835            );
836
837            let rets = SmallBlockArgVec::from_iter(
838                func.dfg
839                    .inst_results(inlined_inst)
840                    .iter()
841                    .copied()
842                    .map(|v| v.into()),
843            );
844            let mut cursor = FuncCursor::new(func);
845            cursor.goto_after_inst(inlined_inst);
846            cursor.ins().jump(return_block, &rets);
847
848            if call_opcode == ir::Opcode::TryCall {
849                allocs
850                    .calls_needing_exception_table_fixup
851                    .push(inlined_inst);
852            }
853        }
854
855        //     return_call_indirect val(args...)
856        //
857        // becomes
858        //
859        //     rets... = call_indirect val(args...)
860        //     jump return_block(rets...)
861        ir::InstructionData::CallIndirect {
862            opcode: ir::Opcode::ReturnCallIndirect,
863            args,
864            sig_ref,
865        } => {
866            func.dfg.insts[inlined_inst] = ir::InstructionData::CallIndirect {
867                opcode: ir::Opcode::CallIndirect,
868                args,
869                sig_ref,
870            };
871            func.dfg.make_inst_results(inlined_inst, ir::types::INVALID);
872
873            append_stack_map_entries(
874                func,
875                callee,
876                &entity_map,
877                call_stack_map,
878                inlined_inst,
879                callee_inst,
880            );
881
882            let rets = SmallBlockArgVec::from_iter(
883                func.dfg
884                    .inst_results(inlined_inst)
885                    .iter()
886                    .copied()
887                    .map(|v| v.into()),
888            );
889            let mut cursor = FuncCursor::new(func);
890            cursor.goto_after_inst(inlined_inst);
891            cursor.ins().jump(return_block, &rets);
892
893            if call_opcode == ir::Opcode::TryCall {
894                allocs
895                    .calls_needing_exception_table_fixup
896                    .push(inlined_inst);
897            }
898        }
899
900        inst_data => unreachable!(
901            "should have handled all `is_return() == true` instructions above; \
902             got {inst_data:?}"
903        ),
904    }
905}
906
907/// An `InstructionMapper` implementation that remaps a callee instruction's
908/// entity references to their new indices in the caller function.
909struct InliningInstRemapper<'a> {
910    allocs: &'a InliningAllocs,
911    func: &'a mut ir::Function,
912    callee: &'a ir::Function,
913    entity_map: &'a EntityMap,
914    error: Option<crate::result::CodegenError>,
915}
916
917impl<'a> ir::instructions::InstructionMapper for InliningInstRemapper<'a> {
918    fn map_value(&mut self, value: ir::Value) -> ir::Value {
919        self.allocs.get_inlined_value(self.callee, value).expect(
920            "defs come before uses; we should have already inlined all values \
921             used by an instruction",
922        )
923    }
924
925    fn map_value_list(&mut self, value_list: ir::ValueList) -> ir::ValueList {
926        let mut inlined_list = ir::ValueList::new();
927        for callee_val in value_list.as_slice(&self.callee.dfg.value_lists) {
928            let inlined_val = self.map_value(*callee_val);
929            inlined_list.push(inlined_val, &mut self.func.dfg.value_lists);
930        }
931        inlined_list
932    }
933
934    fn map_global_value(&mut self, global_value: ir::GlobalValue) -> ir::GlobalValue {
935        self.entity_map.inlined_global_value(global_value)
936    }
937
938    fn map_jump_table(&mut self, jump_table: ir::JumpTable) -> ir::JumpTable {
939        let inlined_default =
940            self.map_block_call(self.callee.dfg.jump_tables[jump_table].default_block());
941        let inlined_table = self.callee.dfg.jump_tables[jump_table]
942            .as_slice()
943            .iter()
944            .map(|callee_block_call| self.map_block_call(*callee_block_call))
945            .collect::<SmallBlockCallVec>();
946        self.func
947            .dfg
948            .jump_tables
949            .push(ir::JumpTableData::new(inlined_default, &inlined_table))
950    }
951
952    fn map_exception_table(&mut self, exception_table: ir::ExceptionTable) -> ir::ExceptionTable {
953        let exception_table = &self.callee.dfg.exception_tables[exception_table];
954        let inlined_sig_ref = self.map_sig_ref(exception_table.signature());
955        let inlined_normal_return = self.map_block_call(*exception_table.normal_return());
956        let inlined_table = exception_table
957            .items()
958            .map(|item| match item {
959                ExceptionTableItem::Tag(tag, block_call) => {
960                    ExceptionTableItem::Tag(tag, self.map_block_call(block_call))
961                }
962                ExceptionTableItem::Default(block_call) => {
963                    ExceptionTableItem::Default(self.map_block_call(block_call))
964                }
965                ExceptionTableItem::Context(value) => {
966                    ExceptionTableItem::Context(self.map_value(value))
967                }
968            })
969            .collect::<SmallVec<[_; 8]>>();
970        self.func
971            .dfg
972            .exception_tables
973            .push(ir::ExceptionTableData::new(
974                inlined_sig_ref,
975                inlined_normal_return,
976                inlined_table,
977            ))
978    }
979
980    fn map_block_call(&mut self, block_call: ir::BlockCall) -> ir::BlockCall {
981        let callee_block = block_call.block(&self.callee.dfg.value_lists);
982        let inlined_block = self.entity_map.inlined_block(callee_block);
983        let args = block_call
984            .args(&self.callee.dfg.value_lists)
985            .map(|arg| match arg {
986                ir::BlockArg::Value(value) => self.map_value(value).into(),
987                ir::BlockArg::TryCallRet(_) | ir::BlockArg::TryCallExn(_) => arg,
988            })
989            .collect::<SmallBlockArgVec>();
990        ir::BlockCall::new(inlined_block, args, &mut self.func.dfg.value_lists)
991    }
992
993    fn map_block(&mut self, block: ir::Block) -> ir::Block {
994        self.entity_map.inlined_block(block)
995    }
996
997    fn map_func_ref(&mut self, func_ref: ir::FuncRef) -> ir::FuncRef {
998        self.entity_map.inlined_func_ref(func_ref)
999    }
1000
1001    fn map_sig_ref(&mut self, sig_ref: ir::SigRef) -> ir::SigRef {
1002        self.entity_map.inlined_sig_ref(sig_ref)
1003    }
1004
1005    fn map_stack_slot(&mut self, stack_slot: ir::StackSlot) -> ir::StackSlot {
1006        self.entity_map.inlined_stack_slot(stack_slot)
1007    }
1008
1009    fn map_dynamic_stack_slot(
1010        &mut self,
1011        dynamic_stack_slot: ir::DynamicStackSlot,
1012    ) -> ir::DynamicStackSlot {
1013        self.entity_map
1014            .inlined_dynamic_stack_slot(dynamic_stack_slot)
1015    }
1016
1017    fn map_constant(&mut self, constant: ir::Constant) -> ir::Constant {
1018        self.allocs
1019            .constants
1020            .get(constant)
1021            .and_then(|o| o.expand())
1022            .expect("should have inlined all callee constants")
1023    }
1024
1025    fn map_immediate(&mut self, immediate: ir::Immediate) -> ir::Immediate {
1026        self.entity_map.inlined_immediate(immediate)
1027    }
1028    fn map_mem_flags(&mut self, flags: ir::MemFlags) -> ir::MemFlags {
1029        let mut flags_data = self.callee.dfg.mem_flags[flags];
1030        // Remap the alias region entity from callee to caller.
1031        if let Some(callee_region) = flags_data.alias_region() {
1032            let region_data = self.callee.dfg.alias_regions[callee_region].clone();
1033            let caller_region = self.func.dfg.alias_regions.insert(region_data);
1034            flags_data.set_alias_region(Some(caller_region));
1035        }
1036        match self.func.dfg.mem_flags.insert(flags_data) {
1037            Ok(flags) => flags,
1038            Err(_) => {
1039                self.error = Some(crate::result::CodegenError::ImplLimitExceeded);
1040                self.func
1041                    .dfg
1042                    .mem_flags
1043                    .insert(ir::MemFlagsData::trusted())
1044                    .unwrap()
1045            }
1046        }
1047    }
1048}
1049
1050/// Inline the callee's layout into the caller's layout.
1051///
1052/// Returns the last inlined block in the layout.
1053fn inline_block_layout(
1054    func: &mut ir::Function,
1055    call_block: ir::Block,
1056    callee: &ir::Function,
1057    entity_map: &EntityMap,
1058) -> ir::Block {
1059    debug_assert!(func.layout.is_block_inserted(call_block));
1060
1061    // Iterate over callee blocks in layout order, inserting their associated
1062    // inlined block into the caller's layout.
1063    let mut prev_inlined_block = call_block;
1064    let mut next_callee_block = callee.layout.entry_block();
1065    while let Some(callee_block) = next_callee_block {
1066        debug_assert!(func.layout.is_block_inserted(prev_inlined_block));
1067
1068        let inlined_block = entity_map.inlined_block(callee_block);
1069        func.layout
1070            .insert_block_after(inlined_block, prev_inlined_block);
1071
1072        prev_inlined_block = inlined_block;
1073        next_callee_block = callee.layout.next_block(callee_block);
1074    }
1075
1076    debug_assert!(func.layout.is_block_inserted(prev_inlined_block));
1077    prev_inlined_block
1078}
1079
1080/// Split the call instruction's block just after the call instruction to create
1081/// the point where control-flow joins after the inlined callee "returns".
1082///
1083/// Note that tail calls do not return to the caller and therefore do not have a
1084/// control-flow join point.
1085fn split_off_return_block(
1086    func: &mut ir::Function,
1087    call_inst: ir::Inst,
1088    opcode: ir::Opcode,
1089    callee: &ir::Function,
1090) -> Option<ir::Block> {
1091    // When the `call_inst` is not a block terminator, we need to split the
1092    // block.
1093    let return_block = func.layout.next_inst(call_inst).map(|next_inst| {
1094        let return_block = func.dfg.blocks.add();
1095        func.layout.split_block(return_block, next_inst);
1096
1097        // Add block parameters for each return value and alias the call
1098        // instruction's results to them.
1099        let old_results =
1100            SmallValueVec::from_iter(func.dfg.inst_results(call_inst).iter().copied());
1101        debug_assert_eq!(old_results.len(), callee.signature.returns.len());
1102        func.dfg.detach_inst_results(call_inst);
1103        for (abi, old_val) in callee.signature.returns.iter().zip(old_results) {
1104            debug_assert_eq!(abi.value_type, func.dfg.value_type(old_val));
1105            let ret_param = func.dfg.append_block_param(return_block, abi.value_type);
1106            func.dfg.change_to_alias(old_val, ret_param);
1107        }
1108
1109        return_block
1110    });
1111
1112    // When the `call_inst` is a block terminator, then it is either a
1113    // `return_call` or a `try_call`:
1114    //
1115    // * For `return_call`s, we don't have a control-flow join point, because
1116    //   the caller permanently transfers control to the callee.
1117    //
1118    // * For `try_call`s, we probably already have a block for the control-flow
1119    //   join point, but it isn't guaranteed: the `try_call` might ignore the
1120    //   call's returns and not forward them to the normal-return block or it
1121    //   might also pass additional arguments. We can only reuse the existing
1122    //   normal-return block when the `try_call` forwards exactly our callee's
1123    //   returns to that block (and therefore that block's parameter types also
1124    //   exactly match the callee's return types). Otherwise, we must create a new
1125    //   return block that forwards to the existing normal-return
1126    //   block. (Elsewhere, at the end of inlining, we will also update any inlined
1127    //   calls to forward any raised exceptions to the caller's exception table,
1128    //   as necessary.)
1129    //
1130    //   Finally, note that reusing the normal-return's target block is just an
1131    //   optimization to emit a simpler CFG when we can, and is not
1132    //   fundamentally required for correctness. We could always insert a
1133    //   temporary block as our control-flow join point that then forwards to
1134    //   the normal-return's target block. However, at the time of writing,
1135    //   Cranelift doesn't currently do any jump-threading or branch
1136    //   simplification in the mid-end, and removing unnecessary blocks in this
1137    //   way can help some subsequent mid-end optimizations. If, in the future,
1138    //   we gain support for jump-threading optimizations in the mid-end, we can
1139    //   come back and simplify the below code a bit to always generate the
1140    //   temporary block, and then rely on the subsequent optimizations to clean
1141    //   everything up.
1142    debug_assert_eq!(
1143        return_block.is_none(),
1144        opcode == ir::Opcode::ReturnCall || opcode == ir::Opcode::TryCall,
1145    );
1146    return_block.or_else(|| match func.dfg.insts[call_inst] {
1147        ir::InstructionData::TryCall {
1148            opcode: ir::Opcode::TryCall,
1149            args: _,
1150            func_ref: _,
1151            exception,
1152        } => {
1153            let normal_return = func.dfg.exception_tables[exception].normal_return();
1154            let normal_return_block = normal_return.block(&func.dfg.value_lists);
1155
1156            // Check to see if we can reuse the existing normal-return block.
1157            {
1158                let normal_return_args = normal_return.args(&func.dfg.value_lists);
1159                if normal_return_args.len() == callee.signature.returns.len()
1160                    && normal_return_args.enumerate().all(|(i, arg)| {
1161                        let i = u32::try_from(i).unwrap();
1162                        arg == ir::BlockArg::TryCallRet(i)
1163                    })
1164                {
1165                    return Some(normal_return_block);
1166                }
1167            }
1168
1169            // Okay, we cannot reuse the normal-return block. Create a new block
1170            // that has the expected block parameter types and have it jump to
1171            // the normal-return block.
1172            let return_block = func.dfg.blocks.add();
1173            func.layout.insert_block(return_block, normal_return_block);
1174
1175            let return_block_params = callee
1176                .signature
1177                .returns
1178                .iter()
1179                .map(|abi| func.dfg.append_block_param(return_block, abi.value_type))
1180                .collect::<SmallValueVec>();
1181
1182            let normal_return_args = func.dfg.exception_tables[exception]
1183                .normal_return()
1184                .args(&func.dfg.value_lists)
1185                .collect::<SmallBlockArgVec>();
1186            let jump_args = normal_return_args
1187                .into_iter()
1188                .map(|arg| match arg {
1189                    ir::BlockArg::Value(value) => ir::BlockArg::Value(value),
1190                    ir::BlockArg::TryCallRet(i) => {
1191                        let i = usize::try_from(i).unwrap();
1192                        ir::BlockArg::Value(return_block_params[i])
1193                    }
1194                    ir::BlockArg::TryCallExn(_) => {
1195                        unreachable!("normal-return edges cannot use exceptional results")
1196                    }
1197                })
1198                .collect::<SmallBlockArgVec>();
1199
1200            let mut cursor = FuncCursor::new(func);
1201            cursor.goto_first_insertion_point(return_block);
1202            cursor.ins().jump(normal_return_block, &jump_args);
1203
1204            Some(return_block)
1205        }
1206        _ => None,
1207    })
1208}
1209
1210/// Replace the caller's call instruction with a jump to the caller's inlined
1211/// copy of the callee's entry block.
1212///
1213/// Also associates the callee's parameters with the caller's arguments in our
1214/// value map.
1215///
1216/// Returns the caller's stack map entries, if any.
1217fn replace_call_with_jump(
1218    allocs: &mut InliningAllocs,
1219    func: &mut ir::Function,
1220    call_inst: ir::Inst,
1221    callee: &ir::Function,
1222    entity_map: &EntityMap,
1223) -> Option<ir::UserStackMapEntryVec> {
1224    trace!("Replacing `call` with `jump`");
1225    trace!(
1226        "  --> call instruction: {call_inst:?}: {}",
1227        func.dfg.display_inst(call_inst)
1228    );
1229
1230    let callee_entry_block = callee
1231        .layout
1232        .entry_block()
1233        .expect("callee function should have an entry block");
1234    let callee_param_values = callee.dfg.block_params(callee_entry_block);
1235    let caller_arg_values = SmallValueVec::from_iter(func.dfg.inst_args(call_inst).iter().copied());
1236    debug_assert_eq!(callee_param_values.len(), caller_arg_values.len());
1237    debug_assert_eq!(callee_param_values.len(), callee.signature.params.len());
1238    for (abi, (callee_param_value, caller_arg_value)) in callee
1239        .signature
1240        .params
1241        .iter()
1242        .zip(callee_param_values.into_iter().zip(caller_arg_values))
1243    {
1244        debug_assert_eq!(abi.value_type, callee.dfg.value_type(*callee_param_value));
1245        debug_assert_eq!(abi.value_type, func.dfg.value_type(caller_arg_value));
1246        allocs.set_inlined_value(callee, *callee_param_value, caller_arg_value);
1247    }
1248
1249    // Replace the caller's call instruction with a jump to the caller's inlined
1250    // copy of the callee's entry block.
1251    //
1252    // Note that the call block dominates the inlined entry block (and also all
1253    // other inlined blocks) so we can reference the arguments directly, and do
1254    // not need to add block parameters to the inlined entry block.
1255    let inlined_entry_block = entity_map.inlined_block(callee_entry_block);
1256    func.replace(call_inst).jump(inlined_entry_block, &[]);
1257    trace!(
1258        "  --> replaced with jump instruction: {call_inst:?}: {}",
1259        func.dfg.display_inst(call_inst)
1260    );
1261
1262    let stack_map_entries = func.dfg.take_user_stack_map_entries(call_inst);
1263    stack_map_entries
1264}
1265
1266/// Keeps track of mapping callee entities to their associated inlined caller
1267/// entities.
1268#[derive(Default)]
1269struct EntityMap {
1270    // Rather than doing an implicit, demand-based, DCE'ing translation of
1271    // entities, which would require maps from each callee entity to its
1272    // associated caller entity, we copy all entities into the caller, remember
1273    // each entity's initial offset, and then mapping from the callee to the
1274    // inlined caller entity is just adding that initial offset to the callee's
1275    // index. This should be both faster and simpler than the alternative. Most
1276    // of these sets are relatively small, and they rarely have too much dead
1277    // code in practice, so this is a good trade off.
1278    //
1279    // Note that there are a few kinds of entities that are excluded from the
1280    // `EntityMap`, and for which we do actually take the demand-based approach:
1281    // values and value lists being the notable ones.
1282    block_offset: Option<u32>,
1283    global_value_offset: Option<u32>,
1284    sig_ref_offset: Option<u32>,
1285    func_ref_offset: Option<u32>,
1286    stack_slot_offset: Option<u32>,
1287    dynamic_type_offset: Option<u32>,
1288    dynamic_stack_slot_offset: Option<u32>,
1289    immediate_offset: Option<u32>,
1290}
1291
1292impl EntityMap {
1293    fn inlined_block(&self, callee_block: ir::Block) -> ir::Block {
1294        let offset = self
1295            .block_offset
1296            .expect("must create inlined `ir::Block`s before calling `EntityMap::inlined_block`");
1297        ir::Block::from_u32(offset + callee_block.as_u32())
1298    }
1299
1300    fn iter_inlined_blocks(&self, func: &ir::Function) -> impl Iterator<Item = ir::Block> + use<> {
1301        let start = self.block_offset.expect(
1302            "must create inlined `ir::Block`s before calling `EntityMap::iter_inlined_blocks`",
1303        );
1304
1305        let end = func.dfg.blocks.len();
1306        let end = u32::try_from(end).unwrap();
1307
1308        (start..end).map(|i| ir::Block::from_u32(i))
1309    }
1310
1311    fn inlined_global_value(&self, callee_global_value: ir::GlobalValue) -> ir::GlobalValue {
1312        let offset = self
1313            .global_value_offset
1314            .expect("must create inlined `ir::GlobalValue`s before calling `EntityMap::inlined_global_value`");
1315        ir::GlobalValue::from_u32(offset + callee_global_value.as_u32())
1316    }
1317
1318    fn inlined_sig_ref(&self, callee_sig_ref: ir::SigRef) -> ir::SigRef {
1319        let offset = self.sig_ref_offset.expect(
1320            "must create inlined `ir::SigRef`s before calling `EntityMap::inlined_sig_ref`",
1321        );
1322        ir::SigRef::from_u32(offset + callee_sig_ref.as_u32())
1323    }
1324
1325    fn inlined_func_ref(&self, callee_func_ref: ir::FuncRef) -> ir::FuncRef {
1326        let offset = self.func_ref_offset.expect(
1327            "must create inlined `ir::FuncRef`s before calling `EntityMap::inlined_func_ref`",
1328        );
1329        ir::FuncRef::from_u32(offset + callee_func_ref.as_u32())
1330    }
1331
1332    fn inlined_stack_slot(&self, callee_stack_slot: ir::StackSlot) -> ir::StackSlot {
1333        let offset = self.stack_slot_offset.expect(
1334            "must create inlined `ir::StackSlot`s before calling `EntityMap::inlined_stack_slot`",
1335        );
1336        ir::StackSlot::from_u32(offset + callee_stack_slot.as_u32())
1337    }
1338
1339    fn inlined_dynamic_type(&self, callee_dynamic_type: ir::DynamicType) -> ir::DynamicType {
1340        let offset = self.dynamic_type_offset.expect(
1341            "must create inlined `ir::DynamicType`s before calling `EntityMap::inlined_dynamic_type`",
1342        );
1343        ir::DynamicType::from_u32(offset + callee_dynamic_type.as_u32())
1344    }
1345
1346    fn inlined_dynamic_stack_slot(
1347        &self,
1348        callee_dynamic_stack_slot: ir::DynamicStackSlot,
1349    ) -> ir::DynamicStackSlot {
1350        let offset = self.dynamic_stack_slot_offset.expect(
1351            "must create inlined `ir::DynamicStackSlot`s before calling `EntityMap::inlined_dynamic_stack_slot`",
1352        );
1353        ir::DynamicStackSlot::from_u32(offset + callee_dynamic_stack_slot.as_u32())
1354    }
1355
1356    fn inlined_immediate(&self, callee_immediate: ir::Immediate) -> ir::Immediate {
1357        let offset = self.immediate_offset.expect(
1358            "must create inlined `ir::Immediate`s before calling `EntityMap::inlined_immediate`",
1359        );
1360        ir::Immediate::from_u32(offset + callee_immediate.as_u32())
1361    }
1362}
1363
1364/// Translate all of the callee's various entities into the caller, producing an
1365/// `EntityMap` that can be used to translate callee entity references into
1366/// inlined caller entity references.
1367fn create_entities(
1368    allocs: &mut InliningAllocs,
1369    func: &mut ir::Function,
1370    callee: &ir::Function,
1371) -> CodegenResult<EntityMap> {
1372    let mut entity_map = EntityMap::default();
1373
1374    entity_map.block_offset = Some(create_blocks(allocs, func, callee));
1375    entity_map.global_value_offset = Some(create_global_values(func, callee)?);
1376    entity_map.sig_ref_offset = Some(create_sig_refs(func, callee));
1377    create_user_external_name_refs(allocs, func, callee);
1378    entity_map.func_ref_offset = Some(create_func_refs(allocs, func, callee, &entity_map));
1379    entity_map.stack_slot_offset = Some(create_stack_slots(func, callee));
1380    entity_map.dynamic_type_offset = Some(create_dynamic_types(func, callee, &entity_map));
1381    entity_map.dynamic_stack_slot_offset =
1382        Some(create_dynamic_stack_slots(func, callee, &entity_map));
1383    entity_map.immediate_offset = Some(create_immediates(func, callee));
1384
1385    // `ir::ConstantData` is deduplicated, so we cannot use our offset scheme
1386    // for `ir::Constant`s. Nonetheless, we still insert them into the caller
1387    // now, at the same time as the rest of our entities.
1388    create_constants(allocs, func, callee);
1389
1390    Ok(entity_map)
1391}
1392
1393/// Create inlined blocks in the caller for every block in the callee.
1394fn create_blocks(
1395    allocs: &mut InliningAllocs,
1396    func: &mut ir::Function,
1397    callee: &ir::Function,
1398) -> u32 {
1399    let offset = func.dfg.blocks.len();
1400    let offset = u32::try_from(offset).unwrap();
1401
1402    func.dfg.blocks.reserve(callee.dfg.blocks.len());
1403    for callee_block in callee.dfg.blocks.iter() {
1404        let caller_block = func.dfg.blocks.add();
1405        trace!("Callee {callee_block:?} = inlined {caller_block:?}");
1406
1407        if callee.layout.is_cold(callee_block) {
1408            func.layout.set_cold(caller_block);
1409        }
1410
1411        // Note: the entry block does not need parameters because the only
1412        // predecessor is the call block and we associate the callee's
1413        // parameters with the caller's arguments directly.
1414        if callee.layout.entry_block() != Some(callee_block) {
1415            for callee_param in callee.dfg.blocks[callee_block].params(&callee.dfg.value_lists) {
1416                let ty = callee.dfg.value_type(*callee_param);
1417                let caller_param = func.dfg.append_block_param(caller_block, ty);
1418
1419                allocs.set_inlined_value(callee, *callee_param, caller_param);
1420            }
1421        }
1422    }
1423
1424    offset
1425}
1426
1427/// Copy and translate global values from the callee into the caller.
1428fn create_global_values(func: &mut ir::Function, callee: &ir::Function) -> CodegenResult<u32> {
1429    let gv_offset = func.global_values.len();
1430    let gv_offset = u32::try_from(gv_offset).unwrap();
1431
1432    func.global_values.reserve(callee.global_values.len());
1433    for gv in callee.global_values.values() {
1434        // Re-insert callee mem flags into the caller's DFG before constructing
1435        // the global value data, to avoid borrow conflicts.
1436        let remapped_flags = match gv {
1437            ir::GlobalValueData::Load { flags, .. } => {
1438                let mut flags_data = callee.dfg.mem_flags[*flags];
1439                // Remap alias region entity from callee to caller.
1440                if let Some(callee_region) = flags_data.alias_region() {
1441                    let region_data = callee.dfg.alias_regions[callee_region].clone();
1442                    let caller_region = func.dfg.alias_regions.insert(region_data);
1443                    flags_data.set_alias_region(Some(caller_region));
1444                }
1445                Some(
1446                    func.dfg
1447                        .mem_flags
1448                        .insert(flags_data)
1449                        .map_err(|_| crate::result::CodegenError::ImplLimitExceeded)?,
1450                )
1451            }
1452            _ => None,
1453        };
1454        func.global_values.push(match gv {
1455            // These kinds of global values reference other global values, so we
1456            // need to fixup that reference.
1457            ir::GlobalValueData::Load {
1458                base,
1459                offset,
1460                global_type,
1461                flags: _,
1462            } => ir::GlobalValueData::Load {
1463                base: ir::GlobalValue::from_u32(base.as_u32() + gv_offset),
1464                offset: *offset,
1465                global_type: *global_type,
1466                flags: remapped_flags.unwrap(),
1467            },
1468            ir::GlobalValueData::IAddImm {
1469                base,
1470                offset,
1471                global_type,
1472            } => ir::GlobalValueData::IAddImm {
1473                base: ir::GlobalValue::from_u32(base.as_u32() + gv_offset),
1474                offset: *offset,
1475                global_type: *global_type,
1476            },
1477
1478            // These kinds of global values do not reference other global
1479            // values, so we can just clone them.
1480            ir::GlobalValueData::VMContext
1481            | ir::GlobalValueData::Symbol { .. }
1482            | ir::GlobalValueData::DynScaleTargetConst { .. } => gv.clone(),
1483        });
1484    }
1485
1486    Ok(gv_offset)
1487}
1488
1489/// Copy `ir::SigRef`s from the callee into the caller.
1490fn create_sig_refs(func: &mut ir::Function, callee: &ir::Function) -> u32 {
1491    let offset = func.dfg.signatures.len();
1492    let offset = u32::try_from(offset).unwrap();
1493
1494    func.dfg.signatures.reserve(callee.dfg.signatures.len());
1495    for sig in callee.dfg.signatures.values() {
1496        func.dfg.signatures.push(sig.clone());
1497    }
1498
1499    offset
1500}
1501
1502fn create_user_external_name_refs(
1503    allocs: &mut InliningAllocs,
1504    func: &mut ir::Function,
1505    callee: &ir::Function,
1506) {
1507    for (callee_named_func_ref, name) in callee.params.user_named_funcs().iter() {
1508        let caller_named_func_ref = func.declare_imported_user_function(name.clone());
1509        allocs.user_external_name_refs[callee_named_func_ref] = Some(caller_named_func_ref).into();
1510    }
1511}
1512
1513/// Translate `ir::FuncRef`s from the callee into the caller.
1514fn create_func_refs(
1515    allocs: &InliningAllocs,
1516    func: &mut ir::Function,
1517    callee: &ir::Function,
1518    entity_map: &EntityMap,
1519) -> u32 {
1520    let offset = func.dfg.ext_funcs.len();
1521    let offset = u32::try_from(offset).unwrap();
1522
1523    func.dfg.ext_funcs.reserve(callee.dfg.ext_funcs.len());
1524    for ir::ExtFuncData {
1525        name,
1526        signature,
1527        colocated,
1528        patchable,
1529    } in callee.dfg.ext_funcs.values()
1530    {
1531        func.dfg.ext_funcs.push(ir::ExtFuncData {
1532            name: match name {
1533                ir::ExternalName::User(name_ref) => {
1534                    ir::ExternalName::User(allocs.user_external_name_refs[*name_ref].expect(
1535                        "should have translated all `ir::UserExternalNameRef`s before translating \
1536                         `ir::FuncRef`s",
1537                    ))
1538                }
1539                ir::ExternalName::TestCase(_)
1540                | ir::ExternalName::LibCall(_)
1541                | ir::ExternalName::KnownSymbol(_) => name.clone(),
1542            },
1543            signature: entity_map.inlined_sig_ref(*signature),
1544            colocated: *colocated,
1545            patchable: *patchable,
1546        });
1547    }
1548
1549    offset
1550}
1551
1552/// Copy stack slots from the callee into the caller.
1553fn create_stack_slots(func: &mut ir::Function, callee: &ir::Function) -> u32 {
1554    let offset = func.sized_stack_slots.len();
1555    let offset = u32::try_from(offset).unwrap();
1556
1557    func.sized_stack_slots
1558        .reserve(callee.sized_stack_slots.len());
1559    for slot in callee.sized_stack_slots.values() {
1560        func.sized_stack_slots.push(slot.clone());
1561    }
1562
1563    offset
1564}
1565
1566/// Copy dynamic types from the callee into the caller.
1567fn create_dynamic_types(
1568    func: &mut ir::Function,
1569    callee: &ir::Function,
1570    entity_map: &EntityMap,
1571) -> u32 {
1572    let offset = func.dynamic_stack_slots.len();
1573    let offset = u32::try_from(offset).unwrap();
1574
1575    func.dfg
1576        .dynamic_types
1577        .reserve(callee.dfg.dynamic_types.len());
1578    for ir::DynamicTypeData {
1579        base_vector_ty,
1580        dynamic_scale,
1581    } in callee.dfg.dynamic_types.values()
1582    {
1583        func.dfg.dynamic_types.push(ir::DynamicTypeData {
1584            base_vector_ty: *base_vector_ty,
1585            dynamic_scale: entity_map.inlined_global_value(*dynamic_scale),
1586        });
1587    }
1588
1589    offset
1590}
1591
1592/// Copy dynamic stack slots from the callee into the caller.
1593fn create_dynamic_stack_slots(
1594    func: &mut ir::Function,
1595    callee: &ir::Function,
1596    entity_map: &EntityMap,
1597) -> u32 {
1598    let offset = func.dynamic_stack_slots.len();
1599    let offset = u32::try_from(offset).unwrap();
1600
1601    func.dynamic_stack_slots
1602        .reserve(callee.dynamic_stack_slots.len());
1603    for ir::DynamicStackSlotData { kind, dyn_ty } in callee.dynamic_stack_slots.values() {
1604        func.dynamic_stack_slots.push(ir::DynamicStackSlotData {
1605            kind: *kind,
1606            dyn_ty: entity_map.inlined_dynamic_type(*dyn_ty),
1607        });
1608    }
1609
1610    offset
1611}
1612
1613/// Copy immediates from the callee into the caller.
1614fn create_immediates(func: &mut ir::Function, callee: &ir::Function) -> u32 {
1615    let offset = func.dfg.immediates.len();
1616    let offset = u32::try_from(offset).unwrap();
1617
1618    func.dfg.immediates.reserve(callee.dfg.immediates.len());
1619    for imm in callee.dfg.immediates.values() {
1620        func.dfg.immediates.push(imm.clone());
1621    }
1622
1623    offset
1624}
1625
1626/// Copy constants from the callee into the caller.
1627fn create_constants(allocs: &mut InliningAllocs, func: &mut ir::Function, callee: &ir::Function) {
1628    for (callee_constant, data) in callee.dfg.constants.iter() {
1629        let inlined_constant = func.dfg.constants.insert(data.clone());
1630        allocs.constants[*callee_constant] = Some(inlined_constant).into();
1631    }
1632}