cranelift_codegen/isa/pulley_shared/
abi.rs

1//! Implementation of a standard Pulley ABI.
2
3use super::{inst::*, PulleyFlags, PulleyTargetKind};
4use crate::isa::pulley_shared::{PointerWidth, PulleyBackend};
5use crate::{
6    ir::{self, types::*, MemFlags, Signature},
7    isa,
8    machinst::*,
9    settings, CodegenResult,
10};
11use alloc::{boxed::Box, vec::Vec};
12use core::marker::PhantomData;
13use cranelift_bitset::ScalarBitSet;
14use regalloc2::{MachineEnv, PReg, PRegSet};
15use smallvec::{smallvec, SmallVec};
16use std::borrow::ToOwned;
17use std::sync::OnceLock;
18
19/// Support for the Pulley ABI from the callee side (within a function body).
20pub(crate) type PulleyCallee<P> = Callee<PulleyMachineDeps<P>>;
21
22/// Support for the Pulley ABI from the caller side (at a callsite).
23pub(crate) type PulleyABICallSite<P> = CallSite<PulleyMachineDeps<P>>;
24
25/// Pulley-specific ABI behavior. This struct just serves as an implementation
26/// point for the trait; it is never actually instantiated.
27pub struct PulleyMachineDeps<P>
28where
29    P: PulleyTargetKind,
30{
31    _phantom: PhantomData<P>,
32}
33
34impl<P> ABIMachineSpec for PulleyMachineDeps<P>
35where
36    P: PulleyTargetKind,
37{
38    type I = InstAndKind<P>;
39    type F = PulleyFlags;
40
41    /// This is the limit for the size of argument and return-value areas on the
42    /// stack. We place a reasonable limit here to avoid integer overflow issues
43    /// with 32-bit arithmetic: for now, 128 MB.
44    const STACK_ARG_RET_SIZE_LIMIT: u32 = 128 * 1024 * 1024;
45
46    fn word_bits() -> u32 {
47        P::pointer_width().bits().into()
48    }
49
50    /// Return required stack alignment in bytes.
51    fn stack_align(_call_conv: isa::CallConv) -> u32 {
52        16
53    }
54
55    fn compute_arg_locs(
56        call_conv: isa::CallConv,
57        flags: &settings::Flags,
58        params: &[ir::AbiParam],
59        args_or_rets: ArgsOrRets,
60        add_ret_area_ptr: bool,
61        mut args: ArgsAccumulator,
62    ) -> CodegenResult<(u32, Option<usize>)> {
63        // NB: make sure this method stays in sync with
64        // `cranelift_pulley::interp::Vm::call`.
65        //
66        // In general we use the first half of all register banks as argument
67        // passing registers because, well, why not for now. Currently the only
68        // exception is x15 which is reserved as a single caller-saved register
69        // not used for arguments. This is used in `ReturnCallIndirect` to hold
70        // the location of where we're jumping to.
71
72        let x_end = 14;
73        let f_end = 15;
74        let v_end = 15;
75
76        let mut next_x_reg = 0;
77        let mut next_f_reg = 0;
78        let mut next_v_reg = 0;
79        let mut next_stack: u32 = 0;
80
81        let ret_area_ptr = if add_ret_area_ptr {
82            debug_assert_eq!(args_or_rets, ArgsOrRets::Args);
83            next_x_reg += 1;
84            Some(ABIArg::reg(
85                x_reg(next_x_reg - 1).to_real_reg().unwrap(),
86                I64,
87                ir::ArgumentExtension::None,
88                ir::ArgumentPurpose::Normal,
89            ))
90        } else {
91            None
92        };
93
94        for param in params {
95            // Find the regclass(es) of the register(s) used to store a value of
96            // this type.
97            let (rcs, reg_tys) = Self::I::rc_for_type(param.value_type)?;
98
99            let mut slots = ABIArgSlotVec::new();
100            for (rc, reg_ty) in rcs.iter().zip(reg_tys.iter()) {
101                let next_reg = if (next_x_reg <= x_end) && *rc == RegClass::Int {
102                    let x = Some(x_reg(next_x_reg));
103                    next_x_reg += 1;
104                    x
105                } else if (next_f_reg <= f_end) && *rc == RegClass::Float {
106                    let f = Some(f_reg(next_f_reg));
107                    next_f_reg += 1;
108                    f
109                } else if (next_v_reg <= v_end) && *rc == RegClass::Vector {
110                    let v = Some(v_reg(next_v_reg));
111                    next_v_reg += 1;
112                    v
113                } else {
114                    None
115                };
116
117                if let Some(reg) = next_reg {
118                    slots.push(ABIArgSlot::Reg {
119                        reg: reg.to_real_reg().unwrap(),
120                        ty: *reg_ty,
121                        extension: param.extension,
122                    });
123                } else {
124                    if args_or_rets == ArgsOrRets::Rets && !flags.enable_multi_ret_implicit_sret() {
125                        return Err(crate::CodegenError::Unsupported(
126                            "Too many return values to fit in registers. \
127                            Use a StructReturn argument instead. (#9510)"
128                                .to_owned(),
129                        ));
130                    }
131
132                    // Compute size and 16-byte stack alignment happens
133                    // separately after all args.
134                    let size = reg_ty.bits() / 8;
135                    let size = std::cmp::max(size, 8);
136
137                    // Align.
138                    debug_assert!(size.is_power_of_two());
139                    next_stack = align_to(next_stack, size);
140
141                    slots.push(ABIArgSlot::Stack {
142                        offset: i64::from(next_stack),
143                        ty: *reg_ty,
144                        extension: param.extension,
145                    });
146
147                    next_stack += size;
148                }
149            }
150
151            args.push(ABIArg::Slots {
152                slots,
153                purpose: param.purpose,
154            });
155        }
156
157        let pos = if let Some(ret_area_ptr) = ret_area_ptr {
158            args.push_non_formal(ret_area_ptr);
159            Some(args.args().len() - 1)
160        } else {
161            None
162        };
163
164        next_stack = align_to(next_stack, Self::stack_align(call_conv));
165
166        Ok((next_stack, pos))
167    }
168
169    fn gen_load_stack(mem: StackAMode, into_reg: Writable<Reg>, ty: Type) -> Self::I {
170        let mut flags = MemFlags::trusted();
171        // Stack loads/stores of vectors always use little-endianess to avoid
172        // implementing a byte-swap of vectors on big-endian platforms.
173        if ty.is_vector() {
174            flags.set_endianness(ir::Endianness::Little);
175        }
176        Inst::gen_load(into_reg, mem.into(), ty, flags).into()
177    }
178
179    fn gen_store_stack(mem: StackAMode, from_reg: Reg, ty: Type) -> Self::I {
180        let mut flags = MemFlags::trusted();
181        // Stack loads/stores of vectors always use little-endianess to avoid
182        // implementing a byte-swap of vectors on big-endian platforms.
183        if ty.is_vector() {
184            flags.set_endianness(ir::Endianness::Little);
185        }
186        Inst::gen_store(mem.into(), from_reg, ty, flags).into()
187    }
188
189    fn gen_move(to_reg: Writable<Reg>, from_reg: Reg, ty: Type) -> Self::I {
190        Self::I::gen_move(to_reg, from_reg, ty)
191    }
192
193    fn gen_extend(
194        dst: Writable<Reg>,
195        src: Reg,
196        signed: bool,
197        from_bits: u8,
198        to_bits: u8,
199    ) -> Self::I {
200        assert!(from_bits < to_bits);
201        let src = XReg::new(src).unwrap();
202        let dst = dst.try_into().unwrap();
203        match (signed, from_bits) {
204            (true, 8) => RawInst::Sext8 { dst, src }.into(),
205            (true, 16) => RawInst::Sext16 { dst, src }.into(),
206            (true, 32) => RawInst::Sext32 { dst, src }.into(),
207            (false, 8) => RawInst::Zext8 { dst, src }.into(),
208            (false, 16) => RawInst::Zext16 { dst, src }.into(),
209            (false, 32) => RawInst::Zext32 { dst, src }.into(),
210            _ => unimplemented!("extend {from_bits} to {to_bits} as signed? {signed}"),
211        }
212    }
213
214    fn get_ext_mode(
215        _call_conv: isa::CallConv,
216        specified: ir::ArgumentExtension,
217    ) -> ir::ArgumentExtension {
218        specified
219    }
220
221    fn gen_args(args: Vec<ArgPair>) -> Self::I {
222        Inst::Args { args }.into()
223    }
224
225    fn gen_rets(rets: Vec<RetPair>) -> Self::I {
226        Inst::Rets { rets }.into()
227    }
228
229    fn get_stacklimit_reg(_call_conv: isa::CallConv) -> Reg {
230        spilltmp_reg()
231    }
232
233    fn gen_add_imm(
234        _call_conv: isa::CallConv,
235        into_reg: Writable<Reg>,
236        from_reg: Reg,
237        imm: u32,
238    ) -> SmallInstVec<Self::I> {
239        let dst = into_reg.try_into().unwrap();
240        let imm = imm as i32;
241        smallvec![
242            RawInst::Xconst32 { dst, imm }.into(),
243            RawInst::Xadd32 {
244                dst,
245                src1: from_reg.try_into().unwrap(),
246                src2: dst.to_reg(),
247            }
248            .into()
249        ]
250    }
251
252    fn gen_stack_lower_bound_trap(_limit_reg: Reg) -> SmallInstVec<Self::I> {
253        unimplemented!("pulley shouldn't need stack bound checks")
254    }
255
256    fn gen_get_stack_addr(mem: StackAMode, dst: Writable<Reg>) -> Self::I {
257        let dst = dst.to_reg();
258        let dst = XReg::new(dst).unwrap();
259        let dst = WritableXReg::from_reg(dst);
260        let mem = mem.into();
261        Inst::LoadAddr { dst, mem }.into()
262    }
263
264    fn gen_load_base_offset(into_reg: Writable<Reg>, base: Reg, offset: i32, ty: Type) -> Self::I {
265        let base = XReg::try_from(base).unwrap();
266        let mem = Amode::RegOffset { base, offset };
267        Inst::gen_load(into_reg, mem, ty, MemFlags::trusted()).into()
268    }
269
270    fn gen_store_base_offset(base: Reg, offset: i32, from_reg: Reg, ty: Type) -> Self::I {
271        let base = XReg::try_from(base).unwrap();
272        let mem = Amode::RegOffset { base, offset };
273        Inst::gen_store(mem, from_reg, ty, MemFlags::trusted()).into()
274    }
275
276    fn gen_sp_reg_adjust(amount: i32) -> SmallInstVec<Self::I> {
277        if amount == 0 {
278            return smallvec![];
279        }
280
281        let inst = if amount < 0 {
282            let amount = amount.checked_neg().unwrap();
283            if let Ok(amt) = u32::try_from(amount) {
284                RawInst::StackAlloc32 { amt }
285            } else {
286                unreachable!()
287            }
288        } else {
289            if let Ok(amt) = u32::try_from(amount) {
290                RawInst::StackFree32 { amt }
291            } else {
292                unreachable!()
293            }
294        };
295        smallvec![inst.into()]
296    }
297
298    /// Generates the entire prologue for the function.
299    ///
300    /// Note that this is different from other backends where it's not spread
301    /// out among a few individual functions. That's because the goal here is to
302    /// generate a single macro-instruction for the entire prologue in the most
303    /// common cases and we don't want to spread the logic over multiple
304    /// functions.
305    ///
306    /// The general machinst methods are split to accommodate stack checks and
307    /// things like stack probes, all of which are empty on Pulley because
308    /// Pulley has its own stack check mechanism.
309    fn gen_prologue_frame_setup(
310        _call_conv: isa::CallConv,
311        _flags: &settings::Flags,
312        _isa_flags: &PulleyFlags,
313        frame_layout: &FrameLayout,
314    ) -> SmallInstVec<Self::I> {
315        let mut insts = SmallVec::new();
316
317        let incoming_args_diff = frame_layout.tail_args_size - frame_layout.incoming_args_size;
318        if incoming_args_diff > 0 {
319            // Decrement SP by the amount of additional incoming argument space
320            // we need
321            insts.extend(Self::gen_sp_reg_adjust(-(incoming_args_diff as i32)));
322        }
323
324        let style = frame_layout.pulley_frame_style();
325
326        match &style {
327            FrameStyle::None => {}
328            FrameStyle::PulleyBasicSetup { frame_size } => {
329                insts.push(RawInst::PushFrame.into());
330                insts.extend(Self::gen_sp_reg_adjust(
331                    -i32::try_from(*frame_size).unwrap(),
332                ));
333            }
334            FrameStyle::PulleySetupAndSaveClobbers {
335                frame_size,
336                saved_by_pulley,
337            } => insts.push(
338                RawInst::PushFrameSave {
339                    amt: *frame_size,
340                    regs: pulley_interpreter::UpperRegSet::from_bitset(*saved_by_pulley),
341                }
342                .into(),
343            ),
344            FrameStyle::Manual { frame_size } => insts.extend(Self::gen_sp_reg_adjust(
345                -i32::try_from(*frame_size).unwrap(),
346            )),
347        }
348
349        for (offset, ty, reg) in frame_layout.manually_managed_clobbers(&style) {
350            insts.push(
351                Inst::gen_store(Amode::SpOffset { offset }, reg, ty, MemFlags::trusted()).into(),
352            );
353        }
354
355        insts
356    }
357
358    /// Reverse of `gen_prologue_frame_setup`.
359    fn gen_epilogue_frame_restore(
360        _call_conv: isa::CallConv,
361        _flags: &settings::Flags,
362        _isa_flags: &PulleyFlags,
363        frame_layout: &FrameLayout,
364    ) -> SmallInstVec<Self::I> {
365        let mut insts = SmallVec::new();
366
367        let style = frame_layout.pulley_frame_style();
368
369        // Restore clobbered registers that are manually managed in Cranelift.
370        for (offset, ty, reg) in frame_layout.manually_managed_clobbers(&style) {
371            insts.push(
372                Inst::gen_load(
373                    Writable::from_reg(reg),
374                    Amode::SpOffset { offset },
375                    ty,
376                    MemFlags::trusted(),
377                )
378                .into(),
379            );
380        }
381
382        // Perform the inverse of `gen_prologue_frame_setup`.
383        match &style {
384            FrameStyle::None => {}
385            FrameStyle::PulleyBasicSetup { frame_size } => {
386                insts.extend(Self::gen_sp_reg_adjust(i32::try_from(*frame_size).unwrap()));
387                insts.push(RawInst::PopFrame.into());
388            }
389            FrameStyle::PulleySetupAndSaveClobbers {
390                frame_size,
391                saved_by_pulley,
392            } => insts.push(
393                RawInst::PopFrameRestore {
394                    amt: *frame_size,
395                    regs: pulley_interpreter::UpperRegSet::from_bitset(*saved_by_pulley),
396                }
397                .into(),
398            ),
399            FrameStyle::Manual { frame_size } => {
400                insts.extend(Self::gen_sp_reg_adjust(i32::try_from(*frame_size).unwrap()))
401            }
402        }
403
404        insts
405    }
406
407    fn gen_return(
408        _call_conv: isa::CallConv,
409        _isa_flags: &PulleyFlags,
410        frame_layout: &FrameLayout,
411    ) -> SmallInstVec<Self::I> {
412        let mut insts = SmallVec::new();
413
414        // Handle final stack adjustments for the tail-call ABI.
415        if frame_layout.tail_args_size > 0 {
416            insts.extend(Self::gen_sp_reg_adjust(
417                frame_layout.tail_args_size.try_into().unwrap(),
418            ));
419        }
420        insts.push(RawInst::Ret {}.into());
421
422        insts
423    }
424
425    fn gen_probestack(_insts: &mut SmallInstVec<Self::I>, _frame_size: u32) {
426        // Pulley doesn't implement stack probes since all stack pointer
427        // decrements are checked already.
428    }
429
430    fn gen_clobber_save(
431        _call_conv: isa::CallConv,
432        _flags: &settings::Flags,
433        _frame_layout: &FrameLayout,
434    ) -> SmallVec<[Self::I; 16]> {
435        // Note that this is intentionally empty because everything necessary
436        // was already done in `gen_prologue_frame_setup`.
437        SmallVec::new()
438    }
439
440    fn gen_clobber_restore(
441        _call_conv: isa::CallConv,
442        _flags: &settings::Flags,
443        _frame_layout: &FrameLayout,
444    ) -> SmallVec<[Self::I; 16]> {
445        // Intentionally empty as restores happen for Pulley in `gen_return`.
446        SmallVec::new()
447    }
448
449    fn gen_call(
450        dest: &CallDest,
451        _tmp: Writable<Reg>,
452        mut info: CallInfo<()>,
453    ) -> SmallVec<[Self::I; 2]> {
454        match dest {
455            // "near" calls are pulley->pulley calls so they use a normal "call"
456            // opcode
457            CallDest::ExtName(name, RelocDistance::Near) => {
458                // The first four integer arguments to a call can be handled via
459                // special pulley call instructions. Assert here that
460                // `info.uses` is sorted in order and then take out x0-x3 if
461                // they're present and move them from `info.uses` to
462                // `info.dest.args` to be handled differently during register
463                // allocation.
464                let mut args = SmallVec::new();
465                info.uses.sort_by_key(|arg| arg.preg);
466                info.uses.retain(|arg| {
467                    if arg.preg != x0() && arg.preg != x1() && arg.preg != x2() && arg.preg != x3()
468                    {
469                        return true;
470                    }
471                    args.push(XReg::new(arg.vreg).unwrap());
472                    false
473                });
474                smallvec![Inst::Call {
475                    info: Box::new(info.map(|()| PulleyCall {
476                        name: name.clone(),
477                        args,
478                    }))
479                }
480                .into()]
481            }
482            // "far" calls are pulley->host calls so they use a different opcode
483            // which is lowered with a special relocation in the backend.
484            CallDest::ExtName(name, RelocDistance::Far) => {
485                smallvec![Inst::IndirectCallHost {
486                    info: Box::new(info.map(|()| name.clone()))
487                }
488                .into()]
489            }
490            // Indirect calls are all assumed to be pulley->pulley calls
491            CallDest::Reg(reg) => {
492                smallvec![Inst::IndirectCall {
493                    info: Box::new(info.map(|()| XReg::new(*reg).unwrap()))
494                }
495                .into()]
496            }
497        }
498    }
499
500    fn gen_memcpy<F: FnMut(Type) -> Writable<Reg>>(
501        _call_conv: isa::CallConv,
502        _dst: Reg,
503        _src: Reg,
504        _size: usize,
505        _alloc_tmp: F,
506    ) -> SmallVec<[Self::I; 8]> {
507        todo!()
508    }
509
510    fn get_number_of_spillslots_for_value(
511        rc: RegClass,
512        _target_vector_bytes: u32,
513        _isa_flags: &PulleyFlags,
514    ) -> u32 {
515        // Spill slots are the size of a "word" or a pointer, but Pulley
516        // registers are 8-byte for integers/floats regardless of pointer size.
517        // Calculate the number of slots necessary to store 8 bytes.
518        let slots_for_8bytes = match P::pointer_width() {
519            PointerWidth::PointerWidth32 => 2,
520            PointerWidth::PointerWidth64 => 1,
521        };
522        match rc {
523            // Int/float registers are 8-bytes
524            RegClass::Int | RegClass::Float => slots_for_8bytes,
525            // Vector registers are 16 bytes
526            RegClass::Vector => 2 * slots_for_8bytes,
527        }
528    }
529
530    fn get_machine_env(_flags: &settings::Flags, _call_conv: isa::CallConv) -> &MachineEnv {
531        static MACHINE_ENV: OnceLock<MachineEnv> = OnceLock::new();
532        MACHINE_ENV.get_or_init(create_reg_environment)
533    }
534
535    fn get_regs_clobbered_by_call(_call_conv_of_callee: isa::CallConv) -> PRegSet {
536        DEFAULT_CLOBBERS
537    }
538
539    fn compute_frame_layout(
540        _call_conv: isa::CallConv,
541        flags: &settings::Flags,
542        _sig: &Signature,
543        regs: &[Writable<RealReg>],
544        is_leaf: bool,
545        incoming_args_size: u32,
546        tail_args_size: u32,
547        fixed_frame_storage_size: u32,
548        outgoing_args_size: u32,
549    ) -> FrameLayout {
550        let mut regs: Vec<Writable<RealReg>> = regs
551            .iter()
552            .cloned()
553            .filter(|r| DEFAULT_CALLEE_SAVES.contains(r.to_reg().into()))
554            .collect();
555
556        regs.sort_unstable();
557
558        // Compute clobber size.
559        let clobber_size = compute_clobber_size(&regs);
560
561        // Compute linkage frame size.
562        let setup_area_size = if flags.preserve_frame_pointers()
563            || !is_leaf
564            // The function arguments that are passed on the stack are addressed
565            // relative to the Frame Pointer.
566            || incoming_args_size > 0
567            || clobber_size > 0
568            || fixed_frame_storage_size > 0
569        {
570            P::pointer_width().bytes() * 2 // FP, LR
571        } else {
572            0
573        };
574
575        FrameLayout {
576            incoming_args_size,
577            tail_args_size,
578            setup_area_size: setup_area_size.into(),
579            clobber_size,
580            fixed_frame_storage_size,
581            outgoing_args_size,
582            clobbered_callee_saves: regs,
583        }
584    }
585
586    fn gen_inline_probestack(
587        _insts: &mut SmallInstVec<Self::I>,
588        _call_conv: isa::CallConv,
589        _frame_size: u32,
590        _guard_size: u32,
591    ) {
592        // Pulley doesn't need inline probestacks because it always checks stack
593        // decrements.
594    }
595}
596
597/// Different styles of management of fp/lr and clobbered registers.
598///
599/// This helps decide, depending on Cranelift settings and frame layout, what
600/// macro instruction is used to setup the pulley frame.
601enum FrameStyle {
602    /// No management is happening, fp/lr aren't saved by Pulley or Cranelift.
603    /// No stack is being allocated either.
604    None,
605
606    /// Pulley saves the fp/lr combo and then stack adjustments/clobbers are
607    /// handled manually.
608    PulleyBasicSetup { frame_size: u32 },
609
610    /// Pulley is managing the fp/lr combo, the stack size, and clobbered
611    /// X-class registers.
612    ///
613    /// Note that `saved_by_pulley` is not the exhaustive set of clobbered
614    /// registers. It's only those that are part of the `PushFrameSave`
615    /// instruction.
616    PulleySetupAndSaveClobbers {
617        /// The size of the frame, including clobbers, that's being allocated.
618        frame_size: u16,
619        /// Registers that pulley is saving/restoring.
620        saved_by_pulley: ScalarBitSet<u16>,
621    },
622
623    /// Cranelift is manually managing everything, both clobbers and stack
624    /// increments/decrements.
625    ///
626    /// Note that fp/lr are not saved in this mode.
627    Manual {
628        /// The size of the stack being allocated.
629        frame_size: u32,
630    },
631}
632
633/// Pulley-specific helpers when dealing with ABI code.
634impl FrameLayout {
635    /// Whether or not this frame saves fp/lr.
636    fn setup_frame(&self) -> bool {
637        self.setup_area_size > 0
638    }
639
640    /// Returns the stack size allocated by this function, excluding incoming
641    /// tail args or the optional "setup area" of fp/lr.
642    fn stack_size(&self) -> u32 {
643        self.clobber_size + self.fixed_frame_storage_size + self.outgoing_args_size
644    }
645
646    /// Returns the style of frame being used for this function.
647    ///
648    /// See `FrameStyle` for more information.
649    fn pulley_frame_style(&self) -> FrameStyle {
650        let saved_by_pulley = self.clobbered_xregs_saved_by_pulley();
651        match (
652            self.stack_size(),
653            self.setup_frame(),
654            saved_by_pulley.is_empty(),
655        ) {
656            // No stack allocated, not saving fp/lr, no clobbers, nothing to do
657            (0, false, true) => FrameStyle::None,
658
659            // No stack allocated, saving fp/lr, no clobbers, so this is
660            // pulley-managed via push/pop_frame.
661            (0, true, true) => FrameStyle::PulleyBasicSetup { frame_size: 0 },
662
663            // Some stack is being allocated and pulley is managing fp/lr. Let
664            // pulley manage clobbered registers as well, regardless if they're
665            // present or not.
666            //
667            // If the stack is too large then `PulleyBasicSetup` is used
668            // otherwise we'll be pushing `PushFrameSave` and `PopFrameRestore`.
669            (frame_size, true, _) => match frame_size.try_into() {
670                Ok(frame_size) => FrameStyle::PulleySetupAndSaveClobbers {
671                    frame_size,
672                    saved_by_pulley,
673                },
674                Err(_) => FrameStyle::PulleyBasicSetup { frame_size },
675            },
676
677            // Some stack is being allocated, but pulley isn't managing fp/lr,
678            // so we're manually doing everything.
679            (frame_size, false, true) => FrameStyle::Manual { frame_size },
680
681            // If there's no frame setup and there's clobbered registers this
682            // technically should have already hit a case above, so panic here.
683            (_, false, false) => unreachable!(),
684        }
685    }
686
687    /// Returns the set of clobbered registers that Pulley is managing via its
688    /// macro instructions rather than the generated code.
689    fn clobbered_xregs_saved_by_pulley(&self) -> ScalarBitSet<u16> {
690        let mut clobbered: ScalarBitSet<u16> = ScalarBitSet::new();
691        // Pulley only manages clobbers if it's also managing fp/lr.
692        if !self.setup_frame() {
693            return clobbered;
694        }
695        let mut found_manual_clobber = false;
696        for reg in self.clobbered_callee_saves.iter() {
697            let r_reg = reg.to_reg();
698            // Pulley can only manage clobbers of integer registers at this
699            // time, float registers are managed manually.
700            //
701            // Also assert that all pulley-managed clobbers come first,
702            // otherwise the loop below in `manually_managed_clobbers` is
703            // incorrect.
704            if r_reg.class() == RegClass::Int {
705                assert!(!found_manual_clobber);
706                if let Some(offset) = r_reg.hw_enc().checked_sub(16) {
707                    clobbered.insert(offset);
708                }
709            } else {
710                found_manual_clobber = true;
711            }
712        }
713        clobbered
714    }
715
716    /// Returns an iterator over the clobbers that Cranelift is managing, not
717    /// Pulley.
718    ///
719    /// If this frame has clobbers then they're either saved by Pulley with
720    /// `FrameStyle::PulleySetupAndSaveClobbers`. Cranelift might need to manage
721    /// these registers depending on Cranelift settings. Cranelift also always
722    /// manages floating-point registers.
723    fn manually_managed_clobbers<'a>(
724        &'a self,
725        style: &'a FrameStyle,
726    ) -> impl Iterator<Item = (i32, Type, Reg)> + 'a {
727        let mut offset = self.stack_size();
728        self.clobbered_callee_saves.iter().filter_map(move |reg| {
729            // Allocate space for this clobber no matter what. If pulley is
730            // managing this then we're just accounting for the pulley-saved
731            // registers as well. Note that all pulley-managed registers come
732            // first in the list here.
733            offset -= 8;
734            let r_reg = reg.to_reg();
735            let ty = match r_reg.class() {
736                RegClass::Int => {
737                    // If this register is saved by pulley, skip this clobber.
738                    if let FrameStyle::PulleySetupAndSaveClobbers {
739                        saved_by_pulley, ..
740                    } = style
741                    {
742                        if let Some(reg) = r_reg.hw_enc().checked_sub(16) {
743                            if saved_by_pulley.contains(reg) {
744                                return None;
745                            }
746                        }
747                    }
748                    I64
749                }
750                RegClass::Float => F64,
751                RegClass::Vector => unreachable!("no vector registers are callee-save"),
752            };
753            let offset = i32::try_from(offset).unwrap();
754            Some((offset, ty, Reg::from(reg.to_reg())))
755        })
756    }
757}
758
759impl<P> PulleyABICallSite<P>
760where
761    P: PulleyTargetKind,
762{
763    pub fn emit_return_call(
764        mut self,
765        ctx: &mut Lower<InstAndKind<P>>,
766        args: isle::ValueSlice,
767        _backend: &PulleyBackend<P>,
768    ) {
769        let new_stack_arg_size =
770            u32::try_from(self.sig(ctx.sigs()).sized_stack_arg_space()).unwrap();
771
772        ctx.abi_mut().accumulate_tail_args_size(new_stack_arg_size);
773
774        // Put all arguments in registers and stack slots (within that newly
775        // allocated stack space).
776        self.emit_args(ctx, args);
777        self.emit_stack_ret_arg_for_tail_call(ctx);
778
779        let dest = self.dest().clone();
780        let uses = self.take_uses();
781
782        match dest {
783            CallDest::ExtName(name, RelocDistance::Near) => {
784                let info = Box::new(ReturnCallInfo {
785                    dest: name,
786                    uses,
787                    new_stack_arg_size,
788                });
789                ctx.emit(Inst::ReturnCall { info }.into());
790            }
791            CallDest::ExtName(_name, RelocDistance::Far) => {
792                unimplemented!("return-call of a host function")
793            }
794            CallDest::Reg(callee) => {
795                let info = Box::new(ReturnCallInfo {
796                    dest: XReg::new(callee).unwrap(),
797                    uses,
798                    new_stack_arg_size,
799                });
800                ctx.emit(Inst::ReturnIndirectCall { info }.into());
801            }
802        }
803    }
804}
805
806const DEFAULT_CALLEE_SAVES: PRegSet = PRegSet::empty()
807    // Integer registers.
808    .with(px_reg(16))
809    .with(px_reg(17))
810    .with(px_reg(18))
811    .with(px_reg(19))
812    .with(px_reg(20))
813    .with(px_reg(21))
814    .with(px_reg(22))
815    .with(px_reg(23))
816    .with(px_reg(24))
817    .with(px_reg(25))
818    .with(px_reg(26))
819    .with(px_reg(27))
820    .with(px_reg(28))
821    .with(px_reg(29))
822    .with(px_reg(30))
823    .with(px_reg(31))
824    // Float registers.
825    .with(pf_reg(16))
826    .with(pf_reg(17))
827    .with(pf_reg(18))
828    .with(pf_reg(19))
829    .with(pf_reg(20))
830    .with(pf_reg(21))
831    .with(pf_reg(22))
832    .with(pf_reg(23))
833    .with(pf_reg(24))
834    .with(pf_reg(25))
835    .with(pf_reg(26))
836    .with(pf_reg(27))
837    .with(pf_reg(28))
838    .with(pf_reg(29))
839    .with(pf_reg(30))
840    .with(pf_reg(31))
841    // Note: no vector registers are callee-saved.
842;
843
844fn compute_clobber_size(clobbers: &[Writable<RealReg>]) -> u32 {
845    let mut clobbered_size = 0;
846    for reg in clobbers {
847        match reg.to_reg().class() {
848            RegClass::Int => {
849                clobbered_size += 8;
850            }
851            RegClass::Float => {
852                clobbered_size += 8;
853            }
854            RegClass::Vector => unimplemented!("Vector Size Clobbered"),
855        }
856    }
857    align_to(clobbered_size, 16)
858}
859
860const DEFAULT_CLOBBERS: PRegSet = PRegSet::empty()
861    // Integer registers: the first 16 get clobbered.
862    .with(px_reg(0))
863    .with(px_reg(1))
864    .with(px_reg(2))
865    .with(px_reg(3))
866    .with(px_reg(4))
867    .with(px_reg(5))
868    .with(px_reg(6))
869    .with(px_reg(7))
870    .with(px_reg(8))
871    .with(px_reg(9))
872    .with(px_reg(10))
873    .with(px_reg(11))
874    .with(px_reg(12))
875    .with(px_reg(13))
876    .with(px_reg(14))
877    .with(px_reg(15))
878    // Float registers: the first 16 get clobbered.
879    .with(pf_reg(0))
880    .with(pf_reg(1))
881    .with(pf_reg(2))
882    .with(pf_reg(3))
883    .with(pf_reg(4))
884    .with(pf_reg(5))
885    .with(pf_reg(6))
886    .with(pf_reg(7))
887    .with(pf_reg(8))
888    .with(pf_reg(9))
889    .with(pf_reg(10))
890    .with(pf_reg(11))
891    .with(pf_reg(12))
892    .with(pf_reg(13))
893    .with(pf_reg(14))
894    .with(pf_reg(15))
895    // All vector registers get clobbered.
896    .with(pv_reg(0))
897    .with(pv_reg(1))
898    .with(pv_reg(2))
899    .with(pv_reg(3))
900    .with(pv_reg(4))
901    .with(pv_reg(5))
902    .with(pv_reg(6))
903    .with(pv_reg(7))
904    .with(pv_reg(8))
905    .with(pv_reg(9))
906    .with(pv_reg(10))
907    .with(pv_reg(11))
908    .with(pv_reg(12))
909    .with(pv_reg(13))
910    .with(pv_reg(14))
911    .with(pv_reg(15))
912    .with(pv_reg(16))
913    .with(pv_reg(17))
914    .with(pv_reg(18))
915    .with(pv_reg(19))
916    .with(pv_reg(20))
917    .with(pv_reg(21))
918    .with(pv_reg(22))
919    .with(pv_reg(23))
920    .with(pv_reg(24))
921    .with(pv_reg(25))
922    .with(pv_reg(26))
923    .with(pv_reg(27))
924    .with(pv_reg(28))
925    .with(pv_reg(29))
926    .with(pv_reg(30))
927    .with(pv_reg(31));
928
929fn create_reg_environment() -> MachineEnv {
930    // Prefer caller-saved registers over callee-saved registers, because that
931    // way we don't need to emit code to save and restore them if we don't
932    // mutate them.
933
934    let preferred_regs_by_class: [Vec<PReg>; 3] = {
935        let x_registers: Vec<PReg> = (0..16).map(|x| px_reg(x)).collect();
936        let f_registers: Vec<PReg> = (0..16).map(|x| pf_reg(x)).collect();
937        let v_registers: Vec<PReg> = (0..32).map(|x| pv_reg(x)).collect();
938        [x_registers, f_registers, v_registers]
939    };
940
941    let non_preferred_regs_by_class: [Vec<PReg>; 3] = {
942        let x_registers: Vec<PReg> = (16..XReg::SPECIAL_START)
943            .map(|x| px_reg(x.into()))
944            .collect();
945        let f_registers: Vec<PReg> = (16..32).map(|x| pf_reg(x)).collect();
946        let v_registers: Vec<PReg> = vec![];
947        [x_registers, f_registers, v_registers]
948    };
949
950    MachineEnv {
951        preferred_regs_by_class,
952        non_preferred_regs_by_class,
953        fixed_stack_slots: vec![],
954        scratch_by_class: [None, None, None],
955    }
956}