cranelift_codegen/isa/pulley_shared/inst/
emit.rs

1//! Pulley binary code emission.
2
3use super::*;
4use crate::ir::{self, Endianness};
5use crate::isa;
6use crate::isa::pulley_shared::abi::PulleyMachineDeps;
7use crate::isa::pulley_shared::PointerWidth;
8use core::marker::PhantomData;
9use cranelift_control::ControlPlane;
10use pulley_interpreter::encode as enc;
11use pulley_interpreter::regs::BinaryOperands;
12
13pub struct EmitInfo {
14    call_conv: isa::CallConv,
15    shared_flags: settings::Flags,
16    isa_flags: crate::isa::pulley_shared::settings::Flags,
17}
18
19impl EmitInfo {
20    pub(crate) fn new(
21        call_conv: isa::CallConv,
22        shared_flags: settings::Flags,
23        isa_flags: crate::isa::pulley_shared::settings::Flags,
24    ) -> Self {
25        Self {
26            call_conv,
27            shared_flags,
28            isa_flags,
29        }
30    }
31
32    fn endianness(&self, flags: MemFlags) -> Endianness {
33        flags.endianness(self.isa_flags.endianness())
34    }
35}
36
37/// State carried between emissions of a sequence of instructions.
38#[derive(Default, Clone, Debug)]
39pub struct EmitState<P>
40where
41    P: PulleyTargetKind,
42{
43    _phantom: PhantomData<P>,
44    ctrl_plane: ControlPlane,
45    user_stack_map: Option<ir::UserStackMap>,
46    frame_layout: FrameLayout,
47}
48
49impl<P> EmitState<P>
50where
51    P: PulleyTargetKind,
52{
53    fn take_stack_map(&mut self) -> Option<ir::UserStackMap> {
54        self.user_stack_map.take()
55    }
56}
57
58impl<P> MachInstEmitState<InstAndKind<P>> for EmitState<P>
59where
60    P: PulleyTargetKind,
61{
62    fn new(abi: &Callee<PulleyMachineDeps<P>>, ctrl_plane: ControlPlane) -> Self {
63        EmitState {
64            _phantom: PhantomData,
65            ctrl_plane,
66            user_stack_map: None,
67            frame_layout: abi.frame_layout().clone(),
68        }
69    }
70
71    fn pre_safepoint(&mut self, user_stack_map: Option<ir::UserStackMap>) {
72        self.user_stack_map = user_stack_map;
73    }
74
75    fn ctrl_plane_mut(&mut self) -> &mut ControlPlane {
76        &mut self.ctrl_plane
77    }
78
79    fn take_ctrl_plane(self) -> ControlPlane {
80        self.ctrl_plane
81    }
82
83    fn frame_layout(&self) -> &FrameLayout {
84        &self.frame_layout
85    }
86}
87
88impl<P> MachInstEmit for InstAndKind<P>
89where
90    P: PulleyTargetKind,
91{
92    type State = EmitState<P>;
93    type Info = EmitInfo;
94
95    fn emit(&self, sink: &mut MachBuffer<Self>, emit_info: &Self::Info, state: &mut Self::State) {
96        // N.B.: we *must* not exceed the "worst-case size" used to compute
97        // where to insert islands, except when islands are explicitly triggered
98        // (with an `EmitIsland`). We check this in debug builds. This is `mut`
99        // to allow disabling the check for `JTSequence`, which is always
100        // emitted following an `EmitIsland`.
101        let mut start = sink.cur_offset();
102        pulley_emit(self, sink, emit_info, state, &mut start);
103
104        let end = sink.cur_offset();
105        assert!(
106            (end - start) <= InstAndKind::<P>::worst_case_size(),
107            "encoded inst {self:?} longer than worst-case size: length: {}, Inst::worst_case_size() = {}",
108            end - start,
109            InstAndKind::<P>::worst_case_size()
110        );
111    }
112
113    fn pretty_print_inst(&self, state: &mut Self::State) -> String {
114        self.print_with_state(state)
115    }
116}
117
118fn pulley_emit<P>(
119    inst: &Inst,
120    sink: &mut MachBuffer<InstAndKind<P>>,
121    emit_info: &EmitInfo,
122    state: &mut EmitState<P>,
123    start_offset: &mut u32,
124) where
125    P: PulleyTargetKind,
126{
127    match inst {
128        // Pseduo-instructions that don't actually encode to anything.
129        Inst::Args { .. } | Inst::Rets { .. } | Inst::DummyUse { .. } => {}
130
131        Inst::TrapIf { cond, code } => {
132            let trap = sink.defer_trap(*code);
133            let not_trap = sink.get_label();
134
135            <InstAndKind<P>>::from(Inst::BrIf {
136                cond: cond.clone(),
137                taken: trap,
138                not_taken: not_trap,
139            })
140            .emit(sink, emit_info, state);
141            sink.bind_label(not_trap, &mut state.ctrl_plane);
142        }
143
144        Inst::Nop => todo!(),
145
146        Inst::GetSpecial { dst, reg } => enc::xmov(sink, dst, reg),
147
148        Inst::LoadExtName { .. } => todo!(),
149
150        Inst::Call { info } => {
151            let offset = sink.cur_offset();
152
153            // If arguments happen to already be in the right register for the
154            // ABI then remove them from this list. Otherwise emit the
155            // appropriate `Call` instruction depending on how many arguments we
156            // have that aren't already in their correct register according to
157            // ABI conventions.
158            let mut args = &info.dest.args[..];
159            while !args.is_empty() && args.last().copied() == XReg::new(x_reg(args.len() - 1)) {
160                args = &args[..args.len() - 1];
161            }
162            match args {
163                [] => enc::call(sink, 0),
164                [x0] => enc::call1(sink, x0, 0),
165                [x0, x1] => enc::call2(sink, x0, x1, 0),
166                [x0, x1, x2] => enc::call3(sink, x0, x1, x2, 0),
167                [x0, x1, x2, x3] => enc::call4(sink, x0, x1, x2, x3, 0),
168                _ => unreachable!(),
169            }
170            let end = sink.cur_offset();
171            sink.add_reloc_at_offset(
172                end - 4,
173                // TODO: is it actually okay to reuse this reloc here?
174                Reloc::X86CallPCRel4,
175                &info.dest.name,
176                // This addend adjusts for the difference between the start of
177                // the instruction and the beginning of the immediate offset
178                // field which is always the final 4 bytes of the instruction.
179                -i64::from(end - offset - 4),
180            );
181            if let Some(s) = state.take_stack_map() {
182                let offset = sink.cur_offset();
183                sink.push_user_stack_map(state, offset, s);
184            }
185            sink.add_call_site();
186
187            let adjust = -i32::try_from(info.callee_pop_size).unwrap();
188            for i in PulleyMachineDeps::<P>::gen_sp_reg_adjust(adjust) {
189                <InstAndKind<P>>::from(i).emit(sink, emit_info, state);
190            }
191        }
192
193        Inst::IndirectCall { info } => {
194            enc::call_indirect(sink, info.dest);
195
196            if let Some(s) = state.take_stack_map() {
197                let offset = sink.cur_offset();
198                sink.push_user_stack_map(state, offset, s);
199            }
200
201            sink.add_call_site();
202
203            let adjust = -i32::try_from(info.callee_pop_size).unwrap();
204            for i in PulleyMachineDeps::<P>::gen_sp_reg_adjust(adjust) {
205                <InstAndKind<P>>::from(i).emit(sink, emit_info, state);
206            }
207        }
208
209        Inst::ReturnCall { info } => {
210            emit_return_call_common_sequence(sink, emit_info, state, &info);
211
212            // Emit an unconditional jump which is quite similar to `Inst::Call`
213            // except that a `jump` opcode is used instead of a `call` opcode.
214            sink.put1(pulley_interpreter::Opcode::Jump as u8);
215            sink.add_reloc(Reloc::X86CallPCRel4, &info.dest, -1);
216            sink.put4(0);
217
218            // Islands were manually handled in
219            // `emit_return_call_common_sequence`.
220            *start_offset = sink.cur_offset();
221        }
222
223        Inst::ReturnIndirectCall { info } => {
224            emit_return_call_common_sequence(sink, emit_info, state, &info);
225            enc::xjump(sink, info.dest);
226
227            // Islands were manually handled in
228            // `emit_return_call_common_sequence`.
229            *start_offset = sink.cur_offset();
230        }
231
232        Inst::IndirectCallHost { info } => {
233            // Emit a relocation to fill in the actual immediate argument here
234            // in `call_indirect_host`.
235            sink.add_reloc(Reloc::PulleyCallIndirectHost, &info.dest, 0);
236            enc::call_indirect_host(sink, 0_u8);
237
238            if let Some(s) = state.take_stack_map() {
239                let offset = sink.cur_offset();
240                sink.push_user_stack_map(state, offset, s);
241            }
242            sink.add_call_site();
243
244            // If a callee pop is happening here that means that something has
245            // messed up, these are expected to be "very simple" signatures.
246            assert!(info.callee_pop_size == 0);
247        }
248
249        Inst::Jump { label } => {
250            sink.use_label_at_offset(*start_offset + 1, *label, LabelUse::Jump(1));
251            sink.add_uncond_branch(*start_offset, *start_offset + 5, *label);
252            enc::jump(sink, 0x00000000);
253        }
254
255        Inst::BrIf {
256            cond,
257            taken,
258            not_taken,
259        } => {
260            // Encode the inverted form of the branch. Branches always have
261            // their trailing 4 bytes as the relative offset which is what we're
262            // going to target here within the `MachBuffer`.
263            let mut inverted = SmallVec::<[u8; 16]>::new();
264            cond.invert().encode(&mut inverted);
265            let len = inverted.len() as u32;
266            debug_assert!(len > 4);
267
268            // Use the `taken` label 4 bytes before the end of the instruction
269            // we're about to emit as that's the base of `PcRelOffset`. Note
270            // that the `Jump` here factors in the offset from the start of the
271            // instruction to the start of the relative offset, hence `len - 4`
272            // as the factor to adjust by.
273            let taken_end = *start_offset + len;
274            sink.use_label_at_offset(taken_end - 4, *taken, LabelUse::Jump(len - 4));
275            sink.add_cond_branch(*start_offset, taken_end, *taken, &inverted);
276            cond.encode(sink);
277            debug_assert_eq!(sink.cur_offset(), taken_end);
278
279            // For the not-taken branch use an unconditional jump to the
280            // relevant label, and we know that the jump instruction is 5 bytes
281            // long where the final 4 bytes are the offset to jump by.
282            let not_taken_start = taken_end + 1;
283            let not_taken_end = not_taken_start + 4;
284            sink.use_label_at_offset(not_taken_start, *not_taken, LabelUse::Jump(1));
285            sink.add_uncond_branch(taken_end, not_taken_end, *not_taken);
286            enc::jump(sink, 0x00000000);
287            assert_eq!(sink.cur_offset(), not_taken_end);
288        }
289
290        Inst::LoadAddr { dst, mem } => {
291            let base = mem.get_base_register();
292            let offset = mem.get_offset_with_state(state);
293
294            if let Some(base) = base {
295                if offset == 0 {
296                    enc::xmov(sink, dst, base);
297                } else {
298                    if let Ok(offset) = i8::try_from(offset) {
299                        enc::xconst8(sink, dst, offset);
300                    } else if let Ok(offset) = i16::try_from(offset) {
301                        enc::xconst16(sink, dst, offset);
302                    } else {
303                        enc::xconst32(sink, dst, offset);
304                    }
305
306                    match P::pointer_width() {
307                        PointerWidth::PointerWidth32 => {
308                            enc::xadd32(sink, BinaryOperands::new(dst, base, dst))
309                        }
310                        PointerWidth::PointerWidth64 => {
311                            enc::xadd64(sink, BinaryOperands::new(dst, base, dst))
312                        }
313                    }
314                }
315            } else {
316                unreachable!("all pulley amodes have a base register right now")
317            }
318        }
319
320        Inst::XLoad {
321            dst,
322            mem,
323            ty,
324            flags,
325        } => {
326            use Endianness as E;
327            assert!(flags.trap_code().is_none());
328            let addr = AddrO32::Base {
329                addr: mem.get_base_register().unwrap(),
330                offset: mem.get_offset_with_state(state),
331            };
332            let endian = emit_info.endianness(*flags);
333            match *ty {
334                I8 => enc::xload8_u32_o32(sink, dst, addr),
335                I16 => match endian {
336                    E::Little => enc::xload16le_s32_o32(sink, dst, addr),
337                    E::Big => enc::xload16be_s32_o32(sink, dst, addr),
338                },
339                I32 => match endian {
340                    E::Little => enc::xload32le_o32(sink, dst, addr),
341                    E::Big => enc::xload32be_o32(sink, dst, addr),
342                },
343                I64 => match endian {
344                    E::Little => enc::xload64le_o32(sink, dst, addr),
345                    E::Big => enc::xload64be_o32(sink, dst, addr),
346                },
347                _ => unimplemented!("xload ty={ty:?}"),
348            }
349        }
350
351        Inst::FLoad {
352            dst,
353            mem,
354            ty,
355            flags,
356        } => {
357            use Endianness as E;
358            assert!(flags.trap_code().is_none());
359            let addr = AddrO32::Base {
360                addr: mem.get_base_register().unwrap(),
361                offset: mem.get_offset_with_state(state),
362            };
363            let endian = emit_info.endianness(*flags);
364            match *ty {
365                F32 => match endian {
366                    E::Little => enc::fload32le_o32(sink, dst, addr),
367                    E::Big => enc::fload32be_o32(sink, dst, addr),
368                },
369                F64 => match endian {
370                    E::Little => enc::fload64le_o32(sink, dst, addr),
371                    E::Big => enc::fload64be_o32(sink, dst, addr),
372                },
373                _ => unimplemented!("fload ty={ty:?}"),
374            }
375        }
376
377        Inst::VLoad {
378            dst,
379            mem,
380            ty,
381            flags,
382        } => {
383            assert!(flags.trap_code().is_none());
384            let addr = AddrO32::Base {
385                addr: mem.get_base_register().unwrap(),
386                offset: mem.get_offset_with_state(state),
387            };
388            let endian = emit_info.endianness(*flags);
389            assert_eq!(endian, Endianness::Little);
390            assert_eq!(ty.bytes(), 16);
391            enc::vload128le_o32(sink, dst, addr);
392        }
393
394        Inst::XStore {
395            mem,
396            src,
397            ty,
398            flags,
399        } => {
400            use Endianness as E;
401            assert!(flags.trap_code().is_none());
402            let addr = AddrO32::Base {
403                addr: mem.get_base_register().unwrap(),
404                offset: mem.get_offset_with_state(state),
405            };
406            let endian = emit_info.endianness(*flags);
407            match *ty {
408                I8 => enc::xstore8_o32(sink, addr, src),
409                I16 => match endian {
410                    E::Little => enc::xstore16le_o32(sink, addr, src),
411                    E::Big => enc::xstore16be_o32(sink, addr, src),
412                },
413                I32 => match endian {
414                    E::Little => enc::xstore32le_o32(sink, addr, src),
415                    E::Big => enc::xstore32be_o32(sink, addr, src),
416                },
417                I64 => match endian {
418                    E::Little => enc::xstore64le_o32(sink, addr, src),
419                    E::Big => enc::xstore64be_o32(sink, addr, src),
420                },
421                _ => unimplemented!("xstore ty={ty:?}"),
422            }
423        }
424
425        Inst::FStore {
426            mem,
427            src,
428            ty,
429            flags,
430        } => {
431            use Endianness as E;
432            assert!(flags.trap_code().is_none());
433            let addr = AddrO32::Base {
434                addr: mem.get_base_register().unwrap(),
435                offset: mem.get_offset_with_state(state),
436            };
437            let endian = emit_info.endianness(*flags);
438            match *ty {
439                F32 => match endian {
440                    E::Little => enc::fstore32le_o32(sink, addr, src),
441                    E::Big => enc::fstore32be_o32(sink, addr, src),
442                },
443                F64 => match endian {
444                    E::Little => enc::fstore64le_o32(sink, addr, src),
445                    E::Big => enc::fstore64be_o32(sink, addr, src),
446                },
447                _ => unimplemented!("fstore ty={ty:?}"),
448            }
449        }
450
451        Inst::VStore {
452            mem,
453            src,
454            ty,
455            flags,
456        } => {
457            assert!(flags.trap_code().is_none());
458            let addr = AddrO32::Base {
459                addr: mem.get_base_register().unwrap(),
460                offset: mem.get_offset_with_state(state),
461            };
462            let endian = emit_info.endianness(*flags);
463            assert_eq!(endian, Endianness::Little);
464            assert_eq!(ty.bytes(), 16);
465            enc::vstore128le_o32(sink, addr, src);
466        }
467
468        Inst::BrTable {
469            idx,
470            default,
471            targets,
472        } => {
473            // Encode the `br_table32` instruction directly which expects the
474            // next `amt` 4-byte integers to all be relative offsets. Each
475            // offset is the pc-relative offset of the branch destination.
476            //
477            // Pulley clamps the branch targets to the `amt` specified so the
478            // final branch target is the default jump target.
479            //
480            // Note that this instruction may have many branch targets so it
481            // manually checks to see if an island is needed. If so we emit a
482            // jump around the island before the `br_table32` itself gets
483            // emitted.
484            let amt = u32::try_from(targets.len() + 1).expect("too many branch targets");
485            let br_table_size = amt * 4 + 6;
486            if sink.island_needed(br_table_size) {
487                let label = sink.get_label();
488                <InstAndKind<P>>::from(Inst::Jump { label }).emit(sink, emit_info, state);
489                sink.emit_island(br_table_size, &mut state.ctrl_plane);
490                sink.bind_label(label, &mut state.ctrl_plane);
491            }
492            enc::br_table32(sink, *idx, amt);
493            for target in targets.iter() {
494                let offset = sink.cur_offset();
495                sink.use_label_at_offset(offset, *target, LabelUse::Jump(0));
496                sink.put4(0);
497            }
498            let offset = sink.cur_offset();
499            sink.use_label_at_offset(offset, *default, LabelUse::Jump(0));
500            sink.put4(0);
501
502            // We manually handled `emit_island` above when dealing with
503            // `island_needed` so update the starting offset to the current
504            // offset so this instruction doesn't accidentally trigger
505            // the assertion that we're always under worst-case-size.
506            *start_offset = sink.cur_offset();
507        }
508
509        Inst::Raw { raw } => {
510            match raw {
511                RawInst::PushFrame
512                | RawInst::StackAlloc32 { .. }
513                | RawInst::PushFrameSave { .. } => {
514                    sink.add_trap(ir::TrapCode::STACK_OVERFLOW);
515                }
516                _ => {}
517            }
518            super::generated::emit(raw, sink)
519        }
520    }
521}
522
523fn emit_return_call_common_sequence<T, P>(
524    sink: &mut MachBuffer<InstAndKind<P>>,
525    emit_info: &EmitInfo,
526    state: &mut EmitState<P>,
527    info: &ReturnCallInfo<T>,
528) where
529    P: PulleyTargetKind,
530{
531    // The return call sequence can potentially emit a lot of instructions, so
532    // lets emit an island here if we need it.
533    //
534    // It is difficult to calculate exactly how many instructions are going to
535    // be emitted, so we calculate it by emitting it into a disposable buffer,
536    // and then checking how many instructions were actually emitted.
537    let mut buffer = MachBuffer::new();
538    let mut fake_emit_state = state.clone();
539
540    return_call_emit_impl(&mut buffer, emit_info, &mut fake_emit_state, info);
541
542    // Finalize the buffer and get the number of bytes emitted.
543    let buffer = buffer.finish(&Default::default(), &mut Default::default());
544    let length = buffer.data().len() as u32;
545
546    // And now emit the island inline with this instruction.
547    if sink.island_needed(length) {
548        let jump_around_label = sink.get_label();
549        <InstAndKind<P>>::gen_jump(jump_around_label).emit(sink, emit_info, state);
550        sink.emit_island(length + 4, &mut state.ctrl_plane);
551        sink.bind_label(jump_around_label, &mut state.ctrl_plane);
552    }
553
554    // Now that we're done, emit the *actual* return sequence.
555    return_call_emit_impl(sink, emit_info, state, info);
556}
557
558/// This should not be called directly, Instead prefer to call [emit_return_call_common_sequence].
559fn return_call_emit_impl<T, P>(
560    sink: &mut MachBuffer<InstAndKind<P>>,
561    emit_info: &EmitInfo,
562    state: &mut EmitState<P>,
563    info: &ReturnCallInfo<T>,
564) where
565    P: PulleyTargetKind,
566{
567    let epilogue = <PulleyMachineDeps<P>>::gen_epilogue_frame_restore(
568        emit_info.call_conv,
569        &emit_info.shared_flags,
570        &emit_info.isa_flags,
571        &state.frame_layout,
572    );
573
574    for inst in epilogue {
575        inst.emit(sink, emit_info, state);
576    }
577
578    // Now that `sp` is restored to what it was on function entry it may need to
579    // be adjusted if the stack arguments of our own function differ from the
580    // stack arguments of the callee. Perform any necessary adjustment here.
581    //
582    // Note that this means that there's a brief window where stack arguments
583    // might be below `sp` in the case that the callee has more stack arguments
584    // than ourselves. That's in theory ok though as we're inventing the pulley
585    // ABI and nothing like async signals are happening that we have to worry
586    // about.
587    let incoming_args_diff =
588        i64::from(state.frame_layout().tail_args_size - info.new_stack_arg_size);
589
590    if incoming_args_diff != 0 {
591        let amt = i32::try_from(incoming_args_diff).unwrap();
592        for inst in PulleyMachineDeps::<P>::gen_sp_reg_adjust(amt) {
593            <InstAndKind<P>>::from(inst).emit(sink, emit_info, state);
594        }
595    }
596}