cranelift_codegen/isa/riscv64/inst/
emit.rs

1//! Riscv64 ISA: binary code emission.
2
3use crate::ir::{self, LibCall, TrapCode};
4use crate::isa::riscv64::inst::*;
5use crate::isa::riscv64::lower::isle::generated_code::{
6    CaOp, CbOp, CiOp, CiwOp, ClOp, CrOp, CsOp, CssOp, CsznOp, FpuOPWidth, ZcbMemOp,
7};
8use cranelift_control::ControlPlane;
9
10pub struct EmitInfo {
11    shared_flag: settings::Flags,
12    isa_flags: super::super::riscv_settings::Flags,
13}
14
15impl EmitInfo {
16    pub(crate) fn new(
17        shared_flag: settings::Flags,
18        isa_flags: super::super::riscv_settings::Flags,
19    ) -> Self {
20        Self {
21            shared_flag,
22            isa_flags,
23        }
24    }
25}
26
27pub(crate) fn reg_to_gpr_num(m: Reg) -> u32 {
28    u32::from(m.to_real_reg().unwrap().hw_enc() & 31)
29}
30
31pub(crate) fn reg_to_compressed_gpr_num(m: Reg) -> u32 {
32    let real_reg = m.to_real_reg().unwrap().hw_enc();
33    debug_assert!(real_reg >= 8 && real_reg < 16);
34    let compressed_reg = real_reg - 8;
35    u32::from(compressed_reg)
36}
37
38#[derive(Clone, Debug, PartialEq, Default)]
39pub enum EmitVState {
40    #[default]
41    Unknown,
42    Known(VState),
43}
44
45/// State carried between emissions of a sequence of instructions.
46#[derive(Default, Clone, Debug)]
47pub struct EmitState {
48    /// The user stack map for the upcoming instruction, as provided to
49    /// `pre_safepoint()`.
50    user_stack_map: Option<ir::UserStackMap>,
51
52    /// Only used during fuzz-testing. Otherwise, it is a zero-sized struct and
53    /// optimized away at compiletime. See [cranelift_control].
54    ctrl_plane: ControlPlane,
55
56    /// Vector State
57    /// Controls the current state of the vector unit at the emission point.
58    vstate: EmitVState,
59
60    frame_layout: FrameLayout,
61}
62
63impl EmitState {
64    fn take_stack_map(&mut self) -> Option<ir::UserStackMap> {
65        self.user_stack_map.take()
66    }
67}
68
69impl MachInstEmitState<Inst> for EmitState {
70    fn new(
71        abi: &Callee<crate::isa::riscv64::abi::Riscv64MachineDeps>,
72        ctrl_plane: ControlPlane,
73    ) -> Self {
74        EmitState {
75            user_stack_map: None,
76            ctrl_plane,
77            vstate: EmitVState::Unknown,
78            frame_layout: abi.frame_layout().clone(),
79        }
80    }
81
82    fn pre_safepoint(&mut self, user_stack_map: Option<ir::UserStackMap>) {
83        self.user_stack_map = user_stack_map;
84    }
85
86    fn ctrl_plane_mut(&mut self) -> &mut ControlPlane {
87        &mut self.ctrl_plane
88    }
89
90    fn take_ctrl_plane(self) -> ControlPlane {
91        self.ctrl_plane
92    }
93
94    fn on_new_block(&mut self) {
95        // Reset the vector state.
96        self.vstate = EmitVState::Unknown;
97    }
98
99    fn frame_layout(&self) -> &FrameLayout {
100        &self.frame_layout
101    }
102}
103
104impl Inst {
105    /// Load int mask.
106    /// If ty is int then 0xff in rd.
107    pub(crate) fn load_int_mask(rd: Writable<Reg>, ty: Type) -> SmallInstVec<Inst> {
108        let mut insts = SmallInstVec::new();
109        assert!(ty.is_int() && ty.bits() <= 64);
110        match ty {
111            I64 => {
112                insts.push(Inst::load_imm12(rd, Imm12::from_i16(-1)));
113            }
114            I32 | I16 => {
115                insts.push(Inst::load_imm12(rd, Imm12::from_i16(-1)));
116                insts.push(Inst::Extend {
117                    rd: rd,
118                    rn: rd.to_reg(),
119                    signed: false,
120                    from_bits: ty.bits() as u8,
121                    to_bits: 64,
122                });
123            }
124            I8 => {
125                insts.push(Inst::load_imm12(rd, Imm12::from_i16(255)));
126            }
127            _ => unreachable!("ty:{:?}", ty),
128        }
129        insts
130    }
131    ///  inverse all bit
132    pub(crate) fn construct_bit_not(rd: Writable<Reg>, rs: Reg) -> Inst {
133        Inst::AluRRImm12 {
134            alu_op: AluOPRRI::Xori,
135            rd,
136            rs,
137            imm12: Imm12::from_i16(-1),
138        }
139    }
140
141    /// Returns Some(VState) if this instruction is expecting a specific vector state
142    /// before emission.
143    fn expected_vstate(&self) -> Option<&VState> {
144        match self {
145            Inst::Nop0
146            | Inst::Nop4
147            | Inst::BrTable { .. }
148            | Inst::Auipc { .. }
149            | Inst::Fli { .. }
150            | Inst::Lui { .. }
151            | Inst::LoadInlineConst { .. }
152            | Inst::AluRRR { .. }
153            | Inst::FpuRRR { .. }
154            | Inst::AluRRImm12 { .. }
155            | Inst::CsrReg { .. }
156            | Inst::CsrImm { .. }
157            | Inst::Load { .. }
158            | Inst::Store { .. }
159            | Inst::Args { .. }
160            | Inst::Rets { .. }
161            | Inst::Ret { .. }
162            | Inst::Extend { .. }
163            | Inst::Call { .. }
164            | Inst::CallInd { .. }
165            | Inst::ReturnCall { .. }
166            | Inst::ReturnCallInd { .. }
167            | Inst::Jal { .. }
168            | Inst::CondBr { .. }
169            | Inst::LoadExtName { .. }
170            | Inst::ElfTlsGetAddr { .. }
171            | Inst::LoadAddr { .. }
172            | Inst::Mov { .. }
173            | Inst::MovFromPReg { .. }
174            | Inst::Fence { .. }
175            | Inst::EBreak
176            | Inst::Udf { .. }
177            | Inst::FpuRR { .. }
178            | Inst::FpuRRRR { .. }
179            | Inst::Jalr { .. }
180            | Inst::Atomic { .. }
181            | Inst::Select { .. }
182            | Inst::AtomicCas { .. }
183            | Inst::RawData { .. }
184            | Inst::AtomicStore { .. }
185            | Inst::AtomicLoad { .. }
186            | Inst::AtomicRmwLoop { .. }
187            | Inst::TrapIf { .. }
188            | Inst::Unwind { .. }
189            | Inst::DummyUse { .. }
190            | Inst::Popcnt { .. }
191            | Inst::Cltz { .. }
192            | Inst::Brev8 { .. }
193            | Inst::StackProbeLoop { .. } => None,
194
195            // VecSetState does not expect any vstate, rather it updates it.
196            Inst::VecSetState { .. } => None,
197
198            // `vmv` instructions copy a set of registers and ignore vstate.
199            Inst::VecAluRRImm5 { op: VecAluOpRRImm5::VmvrV, .. } => None,
200
201            Inst::VecAluRR { vstate, .. } |
202            Inst::VecAluRRR { vstate, .. } |
203            Inst::VecAluRRRR { vstate, .. } |
204            Inst::VecAluRImm5 { vstate, .. } |
205            Inst::VecAluRRImm5 { vstate, .. } |
206            Inst::VecAluRRRImm5 { vstate, .. } |
207            // TODO: Unit-stride loads and stores only need the AVL to be correct, not
208            // the full vtype. A future optimization could be to decouple these two when
209            // updating vstate. This would allow us to avoid emitting a VecSetState in
210            // some cases.
211            Inst::VecLoad { vstate, .. }
212            | Inst::VecStore { vstate, .. } => Some(vstate),
213        }
214    }
215}
216
217impl MachInstEmit for Inst {
218    type State = EmitState;
219    type Info = EmitInfo;
220
221    fn emit(&self, sink: &mut MachBuffer<Inst>, emit_info: &Self::Info, state: &mut EmitState) {
222        // Check if we need to update the vector state before emitting this instruction
223        if let Some(expected) = self.expected_vstate() {
224            if state.vstate != EmitVState::Known(*expected) {
225                // Update the vector state.
226                Inst::VecSetState {
227                    rd: writable_zero_reg(),
228                    vstate: *expected,
229                }
230                .emit(sink, emit_info, state);
231            }
232        }
233
234        // N.B.: we *must* not exceed the "worst-case size" used to compute
235        // where to insert islands, except when islands are explicitly triggered
236        // (with an `EmitIsland`). We check this in debug builds. This is `mut`
237        // to allow disabling the check for `JTSequence`, which is always
238        // emitted following an `EmitIsland`.
239        let mut start_off = sink.cur_offset();
240
241        // First try to emit this as a compressed instruction
242        let res = self.try_emit_compressed(sink, emit_info, state, &mut start_off);
243        if res.is_none() {
244            // If we can't lets emit it as a normal instruction
245            self.emit_uncompressed(sink, emit_info, state, &mut start_off);
246        }
247
248        // We exclude br_table and return call from these checks since they emit
249        // their own islands, and thus are allowed to exceed the worst case size.
250        if !matches!(
251            self,
252            Inst::BrTable { .. } | Inst::ReturnCall { .. } | Inst::ReturnCallInd { .. }
253        ) {
254            let end_off = sink.cur_offset();
255            assert!(
256                (end_off - start_off) <= Inst::worst_case_size(),
257                "Inst:{:?} length:{} worst_case_size:{}",
258                self,
259                end_off - start_off,
260                Inst::worst_case_size()
261            );
262        }
263    }
264
265    fn pretty_print_inst(&self, state: &mut Self::State) -> String {
266        self.print_with_state(state)
267    }
268}
269
270impl Inst {
271    /// Tries to emit an instruction as compressed, if we can't return false.
272    fn try_emit_compressed(
273        &self,
274        sink: &mut MachBuffer<Inst>,
275        emit_info: &EmitInfo,
276        state: &mut EmitState,
277        start_off: &mut u32,
278    ) -> Option<()> {
279        let has_m = emit_info.isa_flags.has_m();
280        let has_zba = emit_info.isa_flags.has_zba();
281        let has_zbb = emit_info.isa_flags.has_zbb();
282        let has_zca = emit_info.isa_flags.has_zca();
283        let has_zcb = emit_info.isa_flags.has_zcb();
284        let has_zcd = emit_info.isa_flags.has_zcd();
285
286        // Currently all compressed extensions (Zcb, Zcd, Zcmp, Zcmt, etc..) require Zca
287        // to be enabled, so check it early.
288        if !has_zca {
289            return None;
290        }
291
292        fn reg_is_compressible(r: Reg) -> bool {
293            r.to_real_reg()
294                .map(|r| r.hw_enc() >= 8 && r.hw_enc() < 16)
295                .unwrap_or(false)
296        }
297
298        match *self {
299            // C.ADD
300            Inst::AluRRR {
301                alu_op: AluOPRRR::Add,
302                rd,
303                rs1,
304                rs2,
305            } if (rd.to_reg() == rs1 || rd.to_reg() == rs2)
306                && rs1 != zero_reg()
307                && rs2 != zero_reg() =>
308            {
309                // Technically `c.add rd, rs` expands to `add rd, rd, rs`, but we can
310                // also swap rs1 with rs2 and we get an equivalent instruction. i.e we
311                // can also compress `add rd, rs, rd` into `c.add rd, rs`.
312                let src = if rd.to_reg() == rs1 { rs2 } else { rs1 };
313
314                sink.put2(encode_cr_type(CrOp::CAdd, rd, src));
315            }
316
317            // C.MV
318            Inst::AluRRImm12 {
319                alu_op: AluOPRRI::Addi | AluOPRRI::Ori,
320                rd,
321                rs,
322                imm12,
323            } if rd.to_reg() != rs
324                && rd.to_reg() != zero_reg()
325                && rs != zero_reg()
326                && imm12.as_i16() == 0 =>
327            {
328                sink.put2(encode_cr_type(CrOp::CMv, rd, rs));
329            }
330
331            // CA Ops
332            Inst::AluRRR {
333                alu_op:
334                    alu_op @ (AluOPRRR::And
335                    | AluOPRRR::Or
336                    | AluOPRRR::Xor
337                    | AluOPRRR::Addw
338                    | AluOPRRR::Mul),
339                rd,
340                rs1,
341                rs2,
342            } if (rd.to_reg() == rs1 || rd.to_reg() == rs2)
343                && reg_is_compressible(rs1)
344                && reg_is_compressible(rs2) =>
345            {
346                let op = match alu_op {
347                    AluOPRRR::And => CaOp::CAnd,
348                    AluOPRRR::Or => CaOp::COr,
349                    AluOPRRR::Xor => CaOp::CXor,
350                    AluOPRRR::Addw => CaOp::CAddw,
351                    AluOPRRR::Mul if has_zcb && has_m => CaOp::CMul,
352                    _ => return None,
353                };
354                // The canonical expansion for these instruction has `rd == rs1`, but
355                // these are all commutative operations, so we can swap the operands.
356                let src = if rd.to_reg() == rs1 { rs2 } else { rs1 };
357
358                sink.put2(encode_ca_type(op, rd, src));
359            }
360
361            // The sub instructions are non commutative, so we can't swap the operands.
362            Inst::AluRRR {
363                alu_op: alu_op @ (AluOPRRR::Sub | AluOPRRR::Subw),
364                rd,
365                rs1,
366                rs2,
367            } if rd.to_reg() == rs1 && reg_is_compressible(rs1) && reg_is_compressible(rs2) => {
368                let op = match alu_op {
369                    AluOPRRR::Sub => CaOp::CSub,
370                    AluOPRRR::Subw => CaOp::CSubw,
371                    _ => return None,
372                };
373                sink.put2(encode_ca_type(op, rd, rs2));
374            }
375
376            // c.j
377            //
378            // We don't have a separate JAL as that is only available in RV32C
379            Inst::Jal { label } => {
380                sink.use_label_at_offset(*start_off, label, LabelUse::RVCJump);
381                sink.add_uncond_branch(*start_off, *start_off + 2, label);
382                sink.put2(encode_cj_type(CjOp::CJ, Imm12::ZERO));
383            }
384
385            // c.jr
386            Inst::Jalr { rd, base, offset }
387                if rd.to_reg() == zero_reg() && base != zero_reg() && offset.as_i16() == 0 =>
388            {
389                sink.put2(encode_cr2_type(CrOp::CJr, base));
390            }
391
392            // c.jalr
393            Inst::Jalr { rd, base, offset }
394                if rd.to_reg() == link_reg() && base != zero_reg() && offset.as_i16() == 0 =>
395            {
396                sink.put2(encode_cr2_type(CrOp::CJalr, base));
397            }
398
399            // c.ebreak
400            Inst::EBreak => {
401                sink.put2(encode_cr_type(
402                    CrOp::CEbreak,
403                    writable_zero_reg(),
404                    zero_reg(),
405                ));
406            }
407
408            // c.unimp
409            Inst::Udf { trap_code } => {
410                sink.add_trap(trap_code);
411                sink.put2(0x0000);
412            }
413            // c.addi16sp
414            //
415            // c.addi16sp shares the opcode with c.lui, but has a destination field of x2.
416            // c.addi16sp adds the non-zero sign-extended 6-bit immediate to the value in the stack pointer (sp=x2),
417            // where the immediate is scaled to represent multiples of 16 in the range (-512,496). c.addi16sp is used
418            // to adjust the stack pointer in procedure prologues and epilogues. It expands into addi x2, x2, nzimm. c.addi16sp
419            // is only valid when nzimm≠0; the code point with nzimm=0 is reserved.
420            Inst::AluRRImm12 {
421                alu_op: AluOPRRI::Addi,
422                rd,
423                rs,
424                imm12,
425            } if rd.to_reg() == rs
426                && rs == stack_reg()
427                && imm12.as_i16() != 0
428                && (imm12.as_i16() % 16) == 0
429                && Imm6::maybe_from_i16(imm12.as_i16() / 16).is_some() =>
430            {
431                let imm6 = Imm6::maybe_from_i16(imm12.as_i16() / 16).unwrap();
432                sink.put2(encode_c_addi16sp(imm6));
433            }
434
435            // c.addi4spn
436            //
437            // c.addi4spn is a CIW-format instruction that adds a zero-extended non-zero
438            // immediate, scaled by 4, to the stack pointer, x2, and writes the result to
439            // rd. This instruction is used to generate pointers to stack-allocated variables
440            // and expands to addi rd, x2, nzuimm. c.addi4spn is only valid when nzuimm≠0;
441            // the code points with nzuimm=0 are reserved.
442            Inst::AluRRImm12 {
443                alu_op: AluOPRRI::Addi,
444                rd,
445                rs,
446                imm12,
447            } if reg_is_compressible(rd.to_reg())
448                && rs == stack_reg()
449                && imm12.as_i16() != 0
450                && (imm12.as_i16() % 4) == 0
451                && u8::try_from(imm12.as_i16() / 4).is_ok() =>
452            {
453                let imm = u8::try_from(imm12.as_i16() / 4).unwrap();
454                sink.put2(encode_ciw_type(CiwOp::CAddi4spn, rd, imm));
455            }
456
457            // c.li
458            Inst::AluRRImm12 {
459                alu_op: AluOPRRI::Addi,
460                rd,
461                rs,
462                imm12,
463            } if rd.to_reg() != zero_reg() && rs == zero_reg() => {
464                let imm6 = Imm6::maybe_from_imm12(imm12)?;
465                sink.put2(encode_ci_type(CiOp::CLi, rd, imm6));
466            }
467
468            // c.addi
469            Inst::AluRRImm12 {
470                alu_op: AluOPRRI::Addi,
471                rd,
472                rs,
473                imm12,
474            } if rd.to_reg() == rs && rs != zero_reg() && imm12.as_i16() != 0 => {
475                let imm6 = Imm6::maybe_from_imm12(imm12)?;
476                sink.put2(encode_ci_type(CiOp::CAddi, rd, imm6));
477            }
478
479            // c.addiw
480            Inst::AluRRImm12 {
481                alu_op: AluOPRRI::Addiw,
482                rd,
483                rs,
484                imm12,
485            } if rd.to_reg() == rs && rs != zero_reg() => {
486                let imm6 = Imm6::maybe_from_imm12(imm12)?;
487                sink.put2(encode_ci_type(CiOp::CAddiw, rd, imm6));
488            }
489
490            // c.lui
491            //
492            // c.lui loads the non-zero 6-bit immediate field into bits 17–12
493            // of the destination register, clears the bottom 12 bits, and
494            // sign-extends bit 17 into all higher bits of the destination.
495            Inst::Lui { rd, imm: imm20 }
496                if rd.to_reg() != zero_reg()
497                    && rd.to_reg() != stack_reg()
498                    && imm20.as_i32() != 0 =>
499            {
500                // Check that the top bits are sign extended
501                let imm = imm20.as_i32() << 14 >> 14;
502                if imm != imm20.as_i32() {
503                    return None;
504                }
505                let imm6 = Imm6::maybe_from_i32(imm)?;
506                sink.put2(encode_ci_type(CiOp::CLui, rd, imm6));
507            }
508
509            // c.slli
510            Inst::AluRRImm12 {
511                alu_op: AluOPRRI::Slli,
512                rd,
513                rs,
514                imm12,
515            } if rd.to_reg() == rs && rs != zero_reg() && imm12.as_i16() != 0 => {
516                // The shift amount is unsigned, but we encode it as signed.
517                let shift = imm12.as_i16() & 0x3f;
518                let imm6 = Imm6::maybe_from_i16(shift << 10 >> 10).unwrap();
519                sink.put2(encode_ci_type(CiOp::CSlli, rd, imm6));
520            }
521
522            // c.srli / c.srai
523            Inst::AluRRImm12 {
524                alu_op: op @ (AluOPRRI::Srli | AluOPRRI::Srai),
525                rd,
526                rs,
527                imm12,
528            } if rd.to_reg() == rs && reg_is_compressible(rs) && imm12.as_i16() != 0 => {
529                let op = match op {
530                    AluOPRRI::Srli => CbOp::CSrli,
531                    AluOPRRI::Srai => CbOp::CSrai,
532                    _ => unreachable!(),
533                };
534
535                // The shift amount is unsigned, but we encode it as signed.
536                let shift = imm12.as_i16() & 0x3f;
537                let imm6 = Imm6::maybe_from_i16(shift << 10 >> 10).unwrap();
538                sink.put2(encode_cb_type(op, rd, imm6));
539            }
540
541            // c.zextb
542            //
543            // This is an alias for `andi rd, rd, 0xff`
544            Inst::AluRRImm12 {
545                alu_op: AluOPRRI::Andi,
546                rd,
547                rs,
548                imm12,
549            } if has_zcb
550                && rd.to_reg() == rs
551                && reg_is_compressible(rs)
552                && imm12.as_i16() == 0xff =>
553            {
554                sink.put2(encode_cszn_type(CsznOp::CZextb, rd));
555            }
556
557            // c.andi
558            Inst::AluRRImm12 {
559                alu_op: AluOPRRI::Andi,
560                rd,
561                rs,
562                imm12,
563            } if rd.to_reg() == rs && reg_is_compressible(rs) => {
564                let imm6 = Imm6::maybe_from_imm12(imm12)?;
565                sink.put2(encode_cb_type(CbOp::CAndi, rd, imm6));
566            }
567
568            // Stack Based Loads
569            Inst::Load {
570                rd,
571                op: op @ (LoadOP::Lw | LoadOP::Ld | LoadOP::Fld),
572                from,
573                flags,
574            } if from.get_base_register() == Some(stack_reg())
575                && (from.get_offset_with_state(state) % op.size()) == 0 =>
576            {
577                // We encode the offset in multiples of the load size.
578                let offset = from.get_offset_with_state(state);
579                let imm6 = u8::try_from(offset / op.size())
580                    .ok()
581                    .and_then(Uimm6::maybe_from_u8)?;
582
583                // Some additional constraints on these instructions.
584                //
585                // Integer loads are not allowed to target x0, but floating point loads
586                // are, since f0 is not a special register.
587                //
588                // Floating point loads are not included in the base Zca extension
589                // but in a separate Zcd extension. Both of these are part of the C Extension.
590                let rd_is_zero = rd.to_reg() == zero_reg();
591                let op = match op {
592                    LoadOP::Lw if !rd_is_zero => CiOp::CLwsp,
593                    LoadOP::Ld if !rd_is_zero => CiOp::CLdsp,
594                    LoadOP::Fld if has_zcd => CiOp::CFldsp,
595                    _ => return None,
596                };
597
598                if let Some(trap_code) = flags.trap_code() {
599                    // Register the offset at which the actual load instruction starts.
600                    sink.add_trap(trap_code);
601                }
602                sink.put2(encode_ci_sp_load(op, rd, imm6));
603            }
604
605            // Regular Loads
606            Inst::Load {
607                rd,
608                op:
609                    op
610                    @ (LoadOP::Lw | LoadOP::Ld | LoadOP::Fld | LoadOP::Lbu | LoadOP::Lhu | LoadOP::Lh),
611                from,
612                flags,
613            } if reg_is_compressible(rd.to_reg())
614                && from
615                    .get_base_register()
616                    .map(reg_is_compressible)
617                    .unwrap_or(false)
618                && (from.get_offset_with_state(state) % op.size()) == 0 =>
619            {
620                let base = from.get_base_register().unwrap();
621
622                // We encode the offset in multiples of the store size.
623                let offset = from.get_offset_with_state(state);
624                let offset = u8::try_from(offset / op.size()).ok()?;
625
626                // We mix two different formats here.
627                //
628                // c.lw / c.ld / c.fld instructions are available in the standard Zca
629                // extension using the CL format.
630                //
631                // c.lbu / c.lhu / c.lh are only available in the Zcb extension and
632                // are also encoded differently. Technically they each have a different
633                // format, but they are similar enough that we can group them.
634                let is_zcb_load = matches!(op, LoadOP::Lbu | LoadOP::Lhu | LoadOP::Lh);
635                let encoded = if is_zcb_load {
636                    if !has_zcb {
637                        return None;
638                    }
639
640                    let op = match op {
641                        LoadOP::Lbu => ZcbMemOp::CLbu,
642                        LoadOP::Lhu => ZcbMemOp::CLhu,
643                        LoadOP::Lh => ZcbMemOp::CLh,
644                        _ => unreachable!(),
645                    };
646
647                    // Byte stores & loads have 2 bits of immediate offset. Halfword stores
648                    // and loads only have 1 bit.
649                    let imm2 = Uimm2::maybe_from_u8(offset)?;
650                    if (offset & !((1 << op.imm_bits()) - 1)) != 0 {
651                        return None;
652                    }
653
654                    encode_zcbmem_load(op, rd, base, imm2)
655                } else {
656                    // Floating point loads are not included in the base Zca extension
657                    // but in a separate Zcd extension. Both of these are part of the C Extension.
658                    let op = match op {
659                        LoadOP::Lw => ClOp::CLw,
660                        LoadOP::Ld => ClOp::CLd,
661                        LoadOP::Fld if has_zcd => ClOp::CFld,
662                        _ => return None,
663                    };
664                    let imm5 = Uimm5::maybe_from_u8(offset)?;
665
666                    encode_cl_type(op, rd, base, imm5)
667                };
668
669                if let Some(trap_code) = flags.trap_code() {
670                    // Register the offset at which the actual load instruction starts.
671                    sink.add_trap(trap_code);
672                }
673                sink.put2(encoded);
674            }
675
676            // Stack Based Stores
677            Inst::Store {
678                src,
679                op: op @ (StoreOP::Sw | StoreOP::Sd | StoreOP::Fsd),
680                to,
681                flags,
682            } if to.get_base_register() == Some(stack_reg())
683                && (to.get_offset_with_state(state) % op.size()) == 0 =>
684            {
685                // We encode the offset in multiples of the store size.
686                let offset = to.get_offset_with_state(state);
687                let imm6 = u8::try_from(offset / op.size())
688                    .ok()
689                    .and_then(Uimm6::maybe_from_u8)?;
690
691                // Floating point stores are not included in the base Zca extension
692                // but in a separate Zcd extension. Both of these are part of the C Extension.
693                let op = match op {
694                    StoreOP::Sw => CssOp::CSwsp,
695                    StoreOP::Sd => CssOp::CSdsp,
696                    StoreOP::Fsd if has_zcd => CssOp::CFsdsp,
697                    _ => return None,
698                };
699
700                if let Some(trap_code) = flags.trap_code() {
701                    // Register the offset at which the actual load instruction starts.
702                    sink.add_trap(trap_code);
703                }
704                sink.put2(encode_css_type(op, src, imm6));
705            }
706
707            // Regular Stores
708            Inst::Store {
709                src,
710                op: op @ (StoreOP::Sw | StoreOP::Sd | StoreOP::Fsd | StoreOP::Sh | StoreOP::Sb),
711                to,
712                flags,
713            } if reg_is_compressible(src)
714                && to
715                    .get_base_register()
716                    .map(reg_is_compressible)
717                    .unwrap_or(false)
718                && (to.get_offset_with_state(state) % op.size()) == 0 =>
719            {
720                let base = to.get_base_register().unwrap();
721
722                // We encode the offset in multiples of the store size.
723                let offset = to.get_offset_with_state(state);
724                let offset = u8::try_from(offset / op.size()).ok()?;
725
726                // We mix two different formats here.
727                //
728                // c.sw / c.sd / c.fsd instructions are available in the standard Zca
729                // extension using the CL format.
730                //
731                // c.sb / c.sh are only available in the Zcb extension and are also
732                // encoded differently.
733                let is_zcb_store = matches!(op, StoreOP::Sh | StoreOP::Sb);
734                let encoded = if is_zcb_store {
735                    if !has_zcb {
736                        return None;
737                    }
738
739                    let op = match op {
740                        StoreOP::Sh => ZcbMemOp::CSh,
741                        StoreOP::Sb => ZcbMemOp::CSb,
742                        _ => unreachable!(),
743                    };
744
745                    // Byte stores & loads have 2 bits of immediate offset. Halfword stores
746                    // and loads only have 1 bit.
747                    let imm2 = Uimm2::maybe_from_u8(offset)?;
748                    if (offset & !((1 << op.imm_bits()) - 1)) != 0 {
749                        return None;
750                    }
751
752                    encode_zcbmem_store(op, src, base, imm2)
753                } else {
754                    // Floating point stores are not included in the base Zca extension
755                    // but in a separate Zcd extension. Both of these are part of the C Extension.
756                    let op = match op {
757                        StoreOP::Sw => CsOp::CSw,
758                        StoreOP::Sd => CsOp::CSd,
759                        StoreOP::Fsd if has_zcd => CsOp::CFsd,
760                        _ => return None,
761                    };
762                    let imm5 = Uimm5::maybe_from_u8(offset)?;
763
764                    encode_cs_type(op, src, base, imm5)
765                };
766
767                if let Some(trap_code) = flags.trap_code() {
768                    // Register the offset at which the actual load instruction starts.
769                    sink.add_trap(trap_code);
770                }
771                sink.put2(encoded);
772            }
773
774            // c.not
775            //
776            // This is an alias for `xori rd, rd, -1`
777            Inst::AluRRImm12 {
778                alu_op: AluOPRRI::Xori,
779                rd,
780                rs,
781                imm12,
782            } if has_zcb
783                && rd.to_reg() == rs
784                && reg_is_compressible(rs)
785                && imm12.as_i16() == -1 =>
786            {
787                sink.put2(encode_cszn_type(CsznOp::CNot, rd));
788            }
789
790            // c.sext.b / c.sext.h / c.zext.h
791            //
792            // These are all the extend instructions present in `Zcb`, they
793            // also require `Zbb` since they aren't available in the base ISA.
794            Inst::AluRRImm12 {
795                alu_op: alu_op @ (AluOPRRI::Sextb | AluOPRRI::Sexth | AluOPRRI::Zexth),
796                rd,
797                rs,
798                imm12,
799            } if has_zcb
800                && has_zbb
801                && rd.to_reg() == rs
802                && reg_is_compressible(rs)
803                && imm12.as_i16() == 0 =>
804            {
805                let op = match alu_op {
806                    AluOPRRI::Sextb => CsznOp::CSextb,
807                    AluOPRRI::Sexth => CsznOp::CSexth,
808                    AluOPRRI::Zexth => CsznOp::CZexth,
809                    _ => unreachable!(),
810                };
811                sink.put2(encode_cszn_type(op, rd));
812            }
813
814            // c.zext.w
815            //
816            // This is an alias for `add.uw rd, rd, zero`
817            Inst::AluRRR {
818                alu_op: AluOPRRR::Adduw,
819                rd,
820                rs1,
821                rs2,
822            } if has_zcb
823                && has_zba
824                && rd.to_reg() == rs1
825                && reg_is_compressible(rs1)
826                && rs2 == zero_reg() =>
827            {
828                sink.put2(encode_cszn_type(CsznOp::CZextw, rd));
829            }
830
831            _ => return None,
832        }
833
834        return Some(());
835    }
836
837    fn emit_uncompressed(
838        &self,
839        sink: &mut MachBuffer<Inst>,
840        emit_info: &EmitInfo,
841        state: &mut EmitState,
842        start_off: &mut u32,
843    ) {
844        match self {
845            &Inst::Nop0 => {
846                // do nothing
847            }
848            // Addi x0, x0, 0
849            &Inst::Nop4 => {
850                let x = Inst::AluRRImm12 {
851                    alu_op: AluOPRRI::Addi,
852                    rd: Writable::from_reg(zero_reg()),
853                    rs: zero_reg(),
854                    imm12: Imm12::ZERO,
855                };
856                x.emit(sink, emit_info, state)
857            }
858            &Inst::RawData { ref data } => {
859                // Right now we only put a u32 or u64 in this instruction.
860                // It is not very long, no need to check if need `emit_island`.
861                // If data is very long , this is a bug because RawData is typically
862                // use to load some data and rely on some position in the code stream.
863                // and we may exceed `Inst::worst_case_size`.
864                // for more information see https://github.com/bytecodealliance/wasmtime/pull/5612.
865                sink.put_data(&data[..]);
866            }
867            &Inst::Lui { rd, ref imm } => {
868                let x: u32 = 0b0110111 | reg_to_gpr_num(rd.to_reg()) << 7 | (imm.bits() << 12);
869                sink.put4(x);
870            }
871            &Inst::Fli { rd, ty, imm } => {
872                sink.put4(encode_fli(ty, imm, rd));
873            }
874            &Inst::LoadInlineConst { rd, ty, imm } => {
875                let data = &imm.to_le_bytes()[..ty.bytes() as usize];
876
877                let label_data: MachLabel = sink.get_label();
878                let label_end: MachLabel = sink.get_label();
879
880                // Load into rd
881                Inst::Load {
882                    rd,
883                    op: LoadOP::from_type(ty),
884                    flags: MemFlags::new(),
885                    from: AMode::Label(label_data),
886                }
887                .emit(sink, emit_info, state);
888
889                // Jump over the inline pool
890                Inst::gen_jump(label_end).emit(sink, emit_info, state);
891
892                // Emit the inline data
893                sink.bind_label(label_data, &mut state.ctrl_plane);
894                Inst::RawData { data: data.into() }.emit(sink, emit_info, state);
895
896                sink.bind_label(label_end, &mut state.ctrl_plane);
897            }
898            &Inst::FpuRR {
899                alu_op,
900                width,
901                frm,
902                rd,
903                rs,
904            } => {
905                if alu_op.is_convert_to_int() {
906                    sink.add_trap(TrapCode::BAD_CONVERSION_TO_INTEGER);
907                }
908                sink.put4(encode_fp_rr(alu_op, width, frm, rd, rs));
909            }
910            &Inst::FpuRRRR {
911                alu_op,
912                rd,
913                rs1,
914                rs2,
915                rs3,
916                frm,
917                width,
918            } => {
919                sink.put4(encode_fp_rrrr(alu_op, width, frm, rd, rs1, rs2, rs3));
920            }
921            &Inst::FpuRRR {
922                alu_op,
923                width,
924                frm,
925                rd,
926                rs1,
927                rs2,
928            } => {
929                sink.put4(encode_fp_rrr(alu_op, width, frm, rd, rs1, rs2));
930            }
931            &Inst::Unwind { ref inst } => {
932                sink.add_unwind(inst.clone());
933            }
934            &Inst::DummyUse { .. } => {
935                // This has already been handled by Inst::allocate.
936            }
937            &Inst::AluRRR {
938                alu_op,
939                rd,
940                rs1,
941                rs2,
942            } => {
943                let (rs1, rs2) = if alu_op.reverse_rs() {
944                    (rs2, rs1)
945                } else {
946                    (rs1, rs2)
947                };
948
949                sink.put4(encode_r_type(
950                    alu_op.op_code(),
951                    rd,
952                    alu_op.funct3(),
953                    rs1,
954                    rs2,
955                    alu_op.funct7(),
956                ));
957            }
958            &Inst::AluRRImm12 {
959                alu_op,
960                rd,
961                rs,
962                imm12,
963            } => {
964                let x = alu_op.op_code()
965                    | reg_to_gpr_num(rd.to_reg()) << 7
966                    | alu_op.funct3() << 12
967                    | reg_to_gpr_num(rs) << 15
968                    | alu_op.imm12(imm12) << 20;
969                sink.put4(x);
970            }
971            &Inst::CsrReg { op, rd, rs, csr } => {
972                sink.put4(encode_csr_reg(op, rd, rs, csr));
973            }
974            &Inst::CsrImm { op, rd, csr, imm } => {
975                sink.put4(encode_csr_imm(op, rd, csr, imm));
976            }
977            &Inst::Load {
978                rd,
979                op,
980                from,
981                flags,
982            } => {
983                let base = from.get_base_register();
984                let offset = from.get_offset_with_state(state);
985                let offset_imm12 = Imm12::maybe_from_i64(offset);
986                let label = from.get_label_with_sink(sink);
987
988                let (addr, imm12) = match (base, offset_imm12, label) {
989                    // When loading from a Reg+Offset, if the offset fits into an imm12 we can directly encode it.
990                    (Some(base), Some(imm12), None) => (base, imm12),
991
992                    // Otherwise, if the offset does not fit into a imm12, we need to materialize it into a
993                    // register and load from that.
994                    (Some(_), None, None) => {
995                        let tmp = writable_spilltmp_reg();
996                        Inst::LoadAddr { rd: tmp, mem: from }.emit(sink, emit_info, state);
997                        (tmp.to_reg(), Imm12::ZERO)
998                    }
999
1000                    // If the AMode contains a label we can emit an internal relocation that gets
1001                    // resolved with the correct address later.
1002                    (None, Some(imm), Some(label)) => {
1003                        debug_assert_eq!(imm.as_i16(), 0);
1004
1005                        // Get the current PC.
1006                        sink.use_label_at_offset(sink.cur_offset(), label, LabelUse::PCRelHi20);
1007                        Inst::Auipc {
1008                            rd,
1009                            imm: Imm20::ZERO,
1010                        }
1011                        .emit_uncompressed(sink, emit_info, state, start_off);
1012
1013                        // Emit a relocation for the load. This patches the offset into the instruction.
1014                        sink.use_label_at_offset(sink.cur_offset(), label, LabelUse::PCRelLo12I);
1015
1016                        // Imm12 here is meaningless since it's going to get replaced.
1017                        (rd.to_reg(), Imm12::ZERO)
1018                    }
1019
1020                    // These cases are impossible with the current AModes that we have. We either
1021                    // always have a register, or always have a label. Never both, and never neither.
1022                    (None, None, None)
1023                    | (None, Some(_), None)
1024                    | (Some(_), None, Some(_))
1025                    | (Some(_), Some(_), Some(_))
1026                    | (None, None, Some(_)) => {
1027                        unreachable!("Invalid load address")
1028                    }
1029                };
1030
1031                if let Some(trap_code) = flags.trap_code() {
1032                    // Register the offset at which the actual load instruction starts.
1033                    sink.add_trap(trap_code);
1034                }
1035
1036                sink.put4(encode_i_type(op.op_code(), rd, op.funct3(), addr, imm12));
1037            }
1038            &Inst::Store { op, src, flags, to } => {
1039                let base = to.get_base_register();
1040                let offset = to.get_offset_with_state(state);
1041                let offset_imm12 = Imm12::maybe_from_i64(offset);
1042
1043                let (addr, imm12) = match (base, offset_imm12) {
1044                    // If the offset fits into an imm12 we can directly encode it.
1045                    (Some(base), Some(imm12)) => (base, imm12),
1046                    // Otherwise load the address it into a reg and load from it.
1047                    _ => {
1048                        let tmp = writable_spilltmp_reg();
1049                        Inst::LoadAddr { rd: tmp, mem: to }.emit(sink, emit_info, state);
1050                        (tmp.to_reg(), Imm12::ZERO)
1051                    }
1052                };
1053
1054                if let Some(trap_code) = flags.trap_code() {
1055                    // Register the offset at which the actual load instruction starts.
1056                    sink.add_trap(trap_code);
1057                }
1058
1059                sink.put4(encode_s_type(op.op_code(), op.funct3(), addr, src, imm12));
1060            }
1061            &Inst::Args { .. } | &Inst::Rets { .. } => {
1062                // Nothing: this is a pseudoinstruction that serves
1063                // only to constrain registers at a certain point.
1064            }
1065            &Inst::Ret {} => {
1066                // RISC-V does not have a dedicated ret instruction, instead we emit the equivalent
1067                // `jalr x0, x1, 0` that jumps to the return address.
1068                Inst::Jalr {
1069                    rd: writable_zero_reg(),
1070                    base: link_reg(),
1071                    offset: Imm12::ZERO,
1072                }
1073                .emit(sink, emit_info, state);
1074            }
1075
1076            &Inst::Extend {
1077                rd,
1078                rn,
1079                signed,
1080                from_bits,
1081                to_bits: _to_bits,
1082            } => {
1083                let mut insts = SmallInstVec::new();
1084                let shift_bits = (64 - from_bits) as i16;
1085                let is_u8 = || from_bits == 8 && signed == false;
1086                if is_u8() {
1087                    // special for u8.
1088                    insts.push(Inst::AluRRImm12 {
1089                        alu_op: AluOPRRI::Andi,
1090                        rd,
1091                        rs: rn,
1092                        imm12: Imm12::from_i16(255),
1093                    });
1094                } else {
1095                    insts.push(Inst::AluRRImm12 {
1096                        alu_op: AluOPRRI::Slli,
1097                        rd,
1098                        rs: rn,
1099                        imm12: Imm12::from_i16(shift_bits),
1100                    });
1101                    insts.push(Inst::AluRRImm12 {
1102                        alu_op: if signed {
1103                            AluOPRRI::Srai
1104                        } else {
1105                            AluOPRRI::Srli
1106                        },
1107                        rd,
1108                        rs: rd.to_reg(),
1109                        imm12: Imm12::from_i16(shift_bits),
1110                    });
1111                }
1112                insts
1113                    .into_iter()
1114                    .for_each(|i| i.emit(sink, emit_info, state));
1115            }
1116
1117            &Inst::Call { ref info } => {
1118                sink.add_call_site();
1119                sink.add_reloc(Reloc::RiscvCallPlt, &info.dest, 0);
1120
1121                Inst::construct_auipc_and_jalr(Some(writable_link_reg()), writable_link_reg(), 0)
1122                    .into_iter()
1123                    .for_each(|i| i.emit_uncompressed(sink, emit_info, state, start_off));
1124
1125                if let Some(s) = state.take_stack_map() {
1126                    let offset = sink.cur_offset();
1127                    sink.push_user_stack_map(state, offset, s);
1128                }
1129
1130                let callee_pop_size = i32::try_from(info.callee_pop_size).unwrap();
1131                if callee_pop_size > 0 {
1132                    for inst in Riscv64MachineDeps::gen_sp_reg_adjust(-callee_pop_size) {
1133                        inst.emit(sink, emit_info, state);
1134                    }
1135                }
1136            }
1137            &Inst::CallInd { ref info } => {
1138                Inst::Jalr {
1139                    rd: writable_link_reg(),
1140                    base: info.dest,
1141                    offset: Imm12::ZERO,
1142                }
1143                .emit(sink, emit_info, state);
1144
1145                if let Some(s) = state.take_stack_map() {
1146                    let offset = sink.cur_offset();
1147                    sink.push_user_stack_map(state, offset, s);
1148                }
1149
1150                sink.add_call_site();
1151
1152                let callee_pop_size = i32::try_from(info.callee_pop_size).unwrap();
1153                if callee_pop_size > 0 {
1154                    for inst in Riscv64MachineDeps::gen_sp_reg_adjust(-callee_pop_size) {
1155                        inst.emit(sink, emit_info, state);
1156                    }
1157                }
1158            }
1159
1160            &Inst::ReturnCall { ref info } => {
1161                emit_return_call_common_sequence(sink, emit_info, state, info);
1162
1163                sink.add_call_site();
1164                sink.add_reloc(Reloc::RiscvCallPlt, &info.dest, 0);
1165                Inst::construct_auipc_and_jalr(None, writable_spilltmp_reg(), 0)
1166                    .into_iter()
1167                    .for_each(|i| i.emit_uncompressed(sink, emit_info, state, start_off));
1168            }
1169
1170            &Inst::ReturnCallInd { ref info } => {
1171                emit_return_call_common_sequence(sink, emit_info, state, &info);
1172
1173                Inst::Jalr {
1174                    rd: writable_zero_reg(),
1175                    base: info.dest,
1176                    offset: Imm12::ZERO,
1177                }
1178                .emit(sink, emit_info, state);
1179            }
1180            &Inst::Jal { label } => {
1181                sink.use_label_at_offset(*start_off, label, LabelUse::Jal20);
1182                sink.add_uncond_branch(*start_off, *start_off + 4, label);
1183                sink.put4(0b1101111);
1184            }
1185            &Inst::CondBr {
1186                taken,
1187                not_taken,
1188                kind,
1189            } => {
1190                match taken {
1191                    CondBrTarget::Label(label) => {
1192                        let code = kind.emit();
1193                        let code_inverse = kind.inverse().emit().to_le_bytes();
1194                        sink.use_label_at_offset(*start_off, label, LabelUse::B12);
1195                        sink.add_cond_branch(*start_off, *start_off + 4, label, &code_inverse);
1196                        sink.put4(code);
1197                    }
1198                    CondBrTarget::Fallthrough => panic!("Cannot fallthrough in taken target"),
1199                }
1200
1201                match not_taken {
1202                    CondBrTarget::Label(label) => {
1203                        Inst::gen_jump(label).emit(sink, emit_info, state)
1204                    }
1205                    CondBrTarget::Fallthrough => {}
1206                };
1207            }
1208
1209            &Inst::Mov { rd, rm, ty } => {
1210                debug_assert_eq!(rd.to_reg().class(), rm.class());
1211                if rd.to_reg() == rm {
1212                    return;
1213                }
1214
1215                match rm.class() {
1216                    RegClass::Int => Inst::AluRRImm12 {
1217                        alu_op: AluOPRRI::Addi,
1218                        rd: rd,
1219                        rs: rm,
1220                        imm12: Imm12::ZERO,
1221                    },
1222                    RegClass::Float => Inst::FpuRRR {
1223                        alu_op: FpuOPRRR::Fsgnj,
1224                        width: FpuOPWidth::try_from(ty).unwrap(),
1225                        frm: FRM::RNE,
1226                        rd: rd,
1227                        rs1: rm,
1228                        rs2: rm,
1229                    },
1230                    RegClass::Vector => Inst::VecAluRRImm5 {
1231                        op: VecAluOpRRImm5::VmvrV,
1232                        vd: rd,
1233                        vs2: rm,
1234                        // Imm 0 means copy 1 register.
1235                        imm: Imm5::maybe_from_i8(0).unwrap(),
1236                        mask: VecOpMasking::Disabled,
1237                        // Vstate for this instruction is ignored.
1238                        vstate: VState::from_type(ty),
1239                    },
1240                }
1241                .emit(sink, emit_info, state);
1242            }
1243
1244            &Inst::MovFromPReg { rd, rm } => {
1245                Inst::gen_move(rd, Reg::from(rm), I64).emit(sink, emit_info, state);
1246            }
1247
1248            &Inst::BrTable {
1249                index,
1250                tmp1,
1251                tmp2,
1252                ref targets,
1253            } => {
1254                let ext_index = writable_spilltmp_reg();
1255
1256                let label_compute_target = sink.get_label();
1257
1258                // The default target is passed in as the 0th element of `targets`
1259                // separate it here for clarity.
1260                let default_target = targets[0];
1261                let targets = &targets[1..];
1262
1263                // We are going to potentially emit a large amount of instructions, so ensure that we emit an island
1264                // now if we need one.
1265                //
1266                // The worse case PC calculations are 12 instructions. And each entry in the jump table is 2 instructions.
1267                // Check if we need to emit a jump table here to support that jump.
1268                let inst_count = 12 + (targets.len() * 2);
1269                let distance = (inst_count * Inst::UNCOMPRESSED_INSTRUCTION_SIZE as usize) as u32;
1270                if sink.island_needed(distance) {
1271                    let jump_around_label = sink.get_label();
1272                    Inst::gen_jump(jump_around_label).emit(sink, emit_info, state);
1273                    sink.emit_island(distance + 4, &mut state.ctrl_plane);
1274                    sink.bind_label(jump_around_label, &mut state.ctrl_plane);
1275                }
1276
1277                // We emit a bounds check on the index, if the index is larger than the number of
1278                // jump table entries, we jump to the default block.  Otherwise we compute a jump
1279                // offset by multiplying the index by 8 (the size of each entry) and then jump to
1280                // that offset. Each jump table entry is a regular auipc+jalr which we emit sequentially.
1281                //
1282                // Build the following sequence:
1283                //
1284                // extend_index:
1285                //     zext.w  ext_index, index
1286                // bounds_check:
1287                //     li      tmp, n_labels
1288                //     bltu    ext_index, tmp, compute_target
1289                // jump_to_default_block:
1290                //     auipc   pc, 0
1291                //     jalr    zero, pc, default_block
1292                // compute_target:
1293                //     auipc   pc, 0
1294                //     slli    tmp, ext_index, 3
1295                //     add     pc, pc, tmp
1296                //     jalr    zero, pc, 0x10
1297                // jump_table:
1298                //     ; This repeats for each entry in the jumptable
1299                //     auipc   pc, 0
1300                //     jalr    zero, pc, block_target
1301
1302                // Extend the index to 64 bits.
1303                //
1304                // This prevents us branching on the top 32 bits of the index, which
1305                // are undefined.
1306                Inst::Extend {
1307                    rd: ext_index,
1308                    rn: index,
1309                    signed: false,
1310                    from_bits: 32,
1311                    to_bits: 64,
1312                }
1313                .emit(sink, emit_info, state);
1314
1315                // Bounds check.
1316                //
1317                // Check if the index passed in is larger than the number of jumptable
1318                // entries that we have. If it is, we fallthrough to a jump into the
1319                // default block.
1320                Inst::load_constant_u32(tmp2, targets.len() as u64)
1321                    .iter()
1322                    .for_each(|i| i.emit(sink, emit_info, state));
1323                Inst::CondBr {
1324                    taken: CondBrTarget::Label(label_compute_target),
1325                    not_taken: CondBrTarget::Fallthrough,
1326                    kind: IntegerCompare {
1327                        kind: IntCC::UnsignedLessThan,
1328                        rs1: ext_index.to_reg(),
1329                        rs2: tmp2.to_reg(),
1330                    },
1331                }
1332                .emit(sink, emit_info, state);
1333
1334                sink.use_label_at_offset(sink.cur_offset(), default_target, LabelUse::PCRel32);
1335                Inst::construct_auipc_and_jalr(None, tmp2, 0)
1336                    .iter()
1337                    .for_each(|i| i.emit_uncompressed(sink, emit_info, state, start_off));
1338
1339                // Compute the jump table offset.
1340                // We need to emit a PC relative offset,
1341                sink.bind_label(label_compute_target, &mut state.ctrl_plane);
1342
1343                // Get the current PC.
1344                Inst::Auipc {
1345                    rd: tmp1,
1346                    imm: Imm20::ZERO,
1347                }
1348                .emit_uncompressed(sink, emit_info, state, start_off);
1349
1350                // These instructions must be emitted as uncompressed since we
1351                // are manually computing the offset from the PC.
1352
1353                // Multiply the index by 8, since that is the size in
1354                // bytes of each jump table entry
1355                Inst::AluRRImm12 {
1356                    alu_op: AluOPRRI::Slli,
1357                    rd: tmp2,
1358                    rs: ext_index.to_reg(),
1359                    imm12: Imm12::from_i16(3),
1360                }
1361                .emit_uncompressed(sink, emit_info, state, start_off);
1362
1363                // Calculate the base of the jump, PC + the offset from above.
1364                Inst::AluRRR {
1365                    alu_op: AluOPRRR::Add,
1366                    rd: tmp1,
1367                    rs1: tmp1.to_reg(),
1368                    rs2: tmp2.to_reg(),
1369                }
1370                .emit_uncompressed(sink, emit_info, state, start_off);
1371
1372                // Jump to the middle of the jump table.
1373                // We add a 16 byte offset here, since we used 4 instructions
1374                // since the AUIPC that was used to get the PC.
1375                Inst::Jalr {
1376                    rd: writable_zero_reg(),
1377                    base: tmp1.to_reg(),
1378                    offset: Imm12::from_i16((4 * Inst::UNCOMPRESSED_INSTRUCTION_SIZE) as i16),
1379                }
1380                .emit_uncompressed(sink, emit_info, state, start_off);
1381
1382                // Emit the jump table.
1383                //
1384                // Each entry is a auipc + jalr to the target block. We also start with a island
1385                // if necessary.
1386
1387                // Emit the jumps back to back
1388                for target in targets.iter() {
1389                    sink.use_label_at_offset(sink.cur_offset(), *target, LabelUse::PCRel32);
1390
1391                    Inst::construct_auipc_and_jalr(None, tmp2, 0)
1392                        .iter()
1393                        .for_each(|i| i.emit_uncompressed(sink, emit_info, state, start_off));
1394                }
1395
1396                // We've just emitted an island that is safe up to *here*.
1397                // Mark it as such so that we don't needlessly emit additional islands.
1398                *start_off = sink.cur_offset();
1399            }
1400
1401            &Inst::Atomic {
1402                op,
1403                rd,
1404                addr,
1405                src,
1406                amo,
1407            } => {
1408                // TODO: get flags from original CLIF atomic instruction
1409                let flags = MemFlags::new();
1410                if let Some(trap_code) = flags.trap_code() {
1411                    sink.add_trap(trap_code);
1412                }
1413                let x = op.op_code()
1414                    | reg_to_gpr_num(rd.to_reg()) << 7
1415                    | op.funct3() << 12
1416                    | reg_to_gpr_num(addr) << 15
1417                    | reg_to_gpr_num(src) << 20
1418                    | op.funct7(amo) << 25;
1419
1420                sink.put4(x);
1421            }
1422            &Inst::Fence { pred, succ } => {
1423                let x = 0b0001111
1424                    | 0b00000 << 7
1425                    | 0b000 << 12
1426                    | 0b00000 << 15
1427                    | (succ as u32) << 20
1428                    | (pred as u32) << 24;
1429
1430                sink.put4(x);
1431            }
1432            &Inst::Auipc { rd, imm } => {
1433                sink.put4(enc_auipc(rd, imm));
1434            }
1435
1436            &Inst::LoadAddr { rd, mem } => {
1437                let base = mem.get_base_register();
1438                let offset = mem.get_offset_with_state(state);
1439                let offset_imm12 = Imm12::maybe_from_i64(offset);
1440
1441                match (mem, base, offset_imm12) {
1442                    (_, Some(rs), Some(imm12)) => {
1443                        Inst::AluRRImm12 {
1444                            alu_op: AluOPRRI::Addi,
1445                            rd,
1446                            rs,
1447                            imm12,
1448                        }
1449                        .emit(sink, emit_info, state);
1450                    }
1451                    (_, Some(rs), None) => {
1452                        let mut insts = Inst::load_constant_u64(rd, offset as u64);
1453                        insts.push(Inst::AluRRR {
1454                            alu_op: AluOPRRR::Add,
1455                            rd,
1456                            rs1: rd.to_reg(),
1457                            rs2: rs,
1458                        });
1459                        insts
1460                            .into_iter()
1461                            .for_each(|inst| inst.emit(sink, emit_info, state));
1462                    }
1463                    (AMode::Const(addr), None, _) => {
1464                        // Get an address label for the constant and recurse.
1465                        let label = sink.get_label_for_constant(addr);
1466                        Inst::LoadAddr {
1467                            rd,
1468                            mem: AMode::Label(label),
1469                        }
1470                        .emit(sink, emit_info, state);
1471                    }
1472                    (AMode::Label(label), None, _) => {
1473                        // Get the current PC.
1474                        sink.use_label_at_offset(sink.cur_offset(), label, LabelUse::PCRelHi20);
1475                        let inst = Inst::Auipc {
1476                            rd,
1477                            imm: Imm20::ZERO,
1478                        };
1479                        inst.emit_uncompressed(sink, emit_info, state, start_off);
1480
1481                        // Emit an add to the address with a relocation.
1482                        // This later gets patched up with the correct offset.
1483                        sink.use_label_at_offset(sink.cur_offset(), label, LabelUse::PCRelLo12I);
1484                        Inst::AluRRImm12 {
1485                            alu_op: AluOPRRI::Addi,
1486                            rd,
1487                            rs: rd.to_reg(),
1488                            imm12: Imm12::ZERO,
1489                        }
1490                        .emit_uncompressed(sink, emit_info, state, start_off);
1491                    }
1492                    (amode, _, _) => {
1493                        unimplemented!("LoadAddr: {:?}", amode);
1494                    }
1495                }
1496            }
1497
1498            &Inst::Select {
1499                ref dst,
1500                condition,
1501                ref x,
1502                ref y,
1503            } => {
1504                // The general form for this select is the following:
1505                //
1506                //     mv rd, x
1507                //     b{cond} rcond, label_end
1508                //     mv rd, y
1509                // label_end:
1510                //     ... etc
1511                //
1512                // This is built on the assumption that moves are cheap, but branches and jumps
1513                // are not. So with this format we always avoid one jump instruction at the expense
1514                // of an unconditional move.
1515                //
1516                // We also perform another optimization here. If the destination register is the same
1517                // as one of the input registers, we can avoid emitting the first unconditional move
1518                // and emit just the branch and the second move.
1519                //
1520                // To make sure that this happens as often as possible, we also try to invert the
1521                // condition, so that if either of the input registers are the same as the destination
1522                // we avoid that move.
1523
1524                let label_end = sink.get_label();
1525
1526                let xregs = x.regs();
1527                let yregs = y.regs();
1528                let dstregs: Vec<Reg> = dst.regs().into_iter().map(|r| r.to_reg()).collect();
1529                let condregs = condition.regs();
1530
1531                // We are going to write to the destination register before evaluating
1532                // the condition, so we need to make sure that the destination register
1533                // is not one of the condition registers.
1534                //
1535                // This should never happen, since hopefully the regalloc constraints
1536                // for this register are set up correctly.
1537                debug_assert_ne!(dstregs, condregs);
1538
1539                // Check if we can invert the condition and avoid moving the y registers into
1540                // the destination. This allows us to only emit the branch and one of the moves.
1541                let (uncond_move, cond_move, condition) = if yregs == dstregs {
1542                    (yregs, xregs, condition.inverse())
1543                } else {
1544                    (xregs, yregs, condition)
1545                };
1546
1547                // Unconditionally move one of the values to the destination register.
1548                //
1549                // These moves may not end up being emitted if the source and
1550                // destination registers are the same. That logic is built into
1551                // the emit function for `Inst::Mov`.
1552                for i in gen_moves(dst.regs(), uncond_move) {
1553                    i.emit(sink, emit_info, state);
1554                }
1555
1556                // If the condition passes we skip over the conditional move
1557                Inst::CondBr {
1558                    taken: CondBrTarget::Label(label_end),
1559                    not_taken: CondBrTarget::Fallthrough,
1560                    kind: condition,
1561                }
1562                .emit(sink, emit_info, state);
1563
1564                // Move the conditional value to the destination register.
1565                for i in gen_moves(dst.regs(), cond_move) {
1566                    i.emit(sink, emit_info, state);
1567                }
1568
1569                sink.bind_label(label_end, &mut state.ctrl_plane);
1570            }
1571            &Inst::Jalr { rd, base, offset } => {
1572                sink.put4(enc_jalr(rd, base, offset));
1573            }
1574            &Inst::EBreak => {
1575                sink.put4(0x00100073);
1576            }
1577            &Inst::AtomicCas {
1578                offset,
1579                t0,
1580                dst,
1581                e,
1582                addr,
1583                v,
1584                ty,
1585            } => {
1586                //     # addr holds address of memory location
1587                //     # e holds expected value
1588                //     # v holds desired value
1589                //     # dst holds return value
1590                // cas:
1591                //     lr.w dst, (addr)       # Load original value.
1592                //     bne dst, e, fail       # Doesn’t match, so fail.
1593                //     sc.w t0, v, (addr)     # Try to update.
1594                //     bnez t0 , cas          # if store not ok,retry.
1595                // fail:
1596                let fail_label = sink.get_label();
1597                let cas_lebel = sink.get_label();
1598                sink.bind_label(cas_lebel, &mut state.ctrl_plane);
1599                Inst::Atomic {
1600                    op: AtomicOP::load_op(ty),
1601                    rd: dst,
1602                    addr,
1603                    src: zero_reg(),
1604                    amo: AMO::SeqCst,
1605                }
1606                .emit(sink, emit_info, state);
1607                if ty.bits() < 32 {
1608                    AtomicOP::extract(dst, offset, dst.to_reg(), ty)
1609                        .iter()
1610                        .for_each(|i| i.emit(sink, emit_info, state));
1611                } else if ty.bits() == 32 {
1612                    Inst::Extend {
1613                        rd: dst,
1614                        rn: dst.to_reg(),
1615                        signed: false,
1616                        from_bits: 32,
1617                        to_bits: 64,
1618                    }
1619                    .emit(sink, emit_info, state);
1620                }
1621                Inst::CondBr {
1622                    taken: CondBrTarget::Label(fail_label),
1623                    not_taken: CondBrTarget::Fallthrough,
1624                    kind: IntegerCompare {
1625                        kind: IntCC::NotEqual,
1626                        rs1: e,
1627                        rs2: dst.to_reg(),
1628                    },
1629                }
1630                .emit(sink, emit_info, state);
1631                let store_value = if ty.bits() < 32 {
1632                    // reload value to t0.
1633                    Inst::Atomic {
1634                        op: AtomicOP::load_op(ty),
1635                        rd: t0,
1636                        addr,
1637                        src: zero_reg(),
1638                        amo: AMO::SeqCst,
1639                    }
1640                    .emit(sink, emit_info, state);
1641                    // set reset part.
1642                    AtomicOP::merge(t0, writable_spilltmp_reg(), offset, v, ty)
1643                        .iter()
1644                        .for_each(|i| i.emit(sink, emit_info, state));
1645                    t0.to_reg()
1646                } else {
1647                    v
1648                };
1649                Inst::Atomic {
1650                    op: AtomicOP::store_op(ty),
1651                    rd: t0,
1652                    addr,
1653                    src: store_value,
1654                    amo: AMO::SeqCst,
1655                }
1656                .emit(sink, emit_info, state);
1657                // check is our value stored.
1658                Inst::CondBr {
1659                    taken: CondBrTarget::Label(cas_lebel),
1660                    not_taken: CondBrTarget::Fallthrough,
1661                    kind: IntegerCompare {
1662                        kind: IntCC::NotEqual,
1663                        rs1: t0.to_reg(),
1664                        rs2: zero_reg(),
1665                    },
1666                }
1667                .emit(sink, emit_info, state);
1668                sink.bind_label(fail_label, &mut state.ctrl_plane);
1669            }
1670            &Inst::AtomicRmwLoop {
1671                offset,
1672                op,
1673                dst,
1674                ty,
1675                p,
1676                x,
1677                t0,
1678            } => {
1679                let retry = sink.get_label();
1680                sink.bind_label(retry, &mut state.ctrl_plane);
1681                // load old value.
1682                Inst::Atomic {
1683                    op: AtomicOP::load_op(ty),
1684                    rd: dst,
1685                    addr: p,
1686                    src: zero_reg(),
1687                    amo: AMO::SeqCst,
1688                }
1689                .emit(sink, emit_info, state);
1690                //
1691
1692                let store_value: Reg = match op {
1693                    crate::ir::AtomicRmwOp::Add
1694                    | crate::ir::AtomicRmwOp::Sub
1695                    | crate::ir::AtomicRmwOp::And
1696                    | crate::ir::AtomicRmwOp::Or
1697                    | crate::ir::AtomicRmwOp::Xor => {
1698                        AtomicOP::extract(dst, offset, dst.to_reg(), ty)
1699                            .iter()
1700                            .for_each(|i| i.emit(sink, emit_info, state));
1701                        Inst::AluRRR {
1702                            alu_op: match op {
1703                                crate::ir::AtomicRmwOp::Add => AluOPRRR::Add,
1704                                crate::ir::AtomicRmwOp::Sub => AluOPRRR::Sub,
1705                                crate::ir::AtomicRmwOp::And => AluOPRRR::And,
1706                                crate::ir::AtomicRmwOp::Or => AluOPRRR::Or,
1707                                crate::ir::AtomicRmwOp::Xor => AluOPRRR::Xor,
1708                                _ => unreachable!(),
1709                            },
1710                            rd: t0,
1711                            rs1: dst.to_reg(),
1712                            rs2: x,
1713                        }
1714                        .emit(sink, emit_info, state);
1715                        Inst::Atomic {
1716                            op: AtomicOP::load_op(ty),
1717                            rd: writable_spilltmp_reg2(),
1718                            addr: p,
1719                            src: zero_reg(),
1720                            amo: AMO::SeqCst,
1721                        }
1722                        .emit(sink, emit_info, state);
1723                        AtomicOP::merge(
1724                            writable_spilltmp_reg2(),
1725                            writable_spilltmp_reg(),
1726                            offset,
1727                            t0.to_reg(),
1728                            ty,
1729                        )
1730                        .iter()
1731                        .for_each(|i| i.emit(sink, emit_info, state));
1732                        spilltmp_reg2()
1733                    }
1734                    crate::ir::AtomicRmwOp::Nand => {
1735                        if ty.bits() < 32 {
1736                            AtomicOP::extract(dst, offset, dst.to_reg(), ty)
1737                                .iter()
1738                                .for_each(|i| i.emit(sink, emit_info, state));
1739                        }
1740                        Inst::AluRRR {
1741                            alu_op: AluOPRRR::And,
1742                            rd: t0,
1743                            rs1: x,
1744                            rs2: dst.to_reg(),
1745                        }
1746                        .emit(sink, emit_info, state);
1747                        Inst::construct_bit_not(t0, t0.to_reg()).emit(sink, emit_info, state);
1748                        if ty.bits() < 32 {
1749                            Inst::Atomic {
1750                                op: AtomicOP::load_op(ty),
1751                                rd: writable_spilltmp_reg2(),
1752                                addr: p,
1753                                src: zero_reg(),
1754                                amo: AMO::SeqCst,
1755                            }
1756                            .emit(sink, emit_info, state);
1757                            AtomicOP::merge(
1758                                writable_spilltmp_reg2(),
1759                                writable_spilltmp_reg(),
1760                                offset,
1761                                t0.to_reg(),
1762                                ty,
1763                            )
1764                            .iter()
1765                            .for_each(|i| i.emit(sink, emit_info, state));
1766                            spilltmp_reg2()
1767                        } else {
1768                            t0.to_reg()
1769                        }
1770                    }
1771
1772                    crate::ir::AtomicRmwOp::Umin
1773                    | crate::ir::AtomicRmwOp::Umax
1774                    | crate::ir::AtomicRmwOp::Smin
1775                    | crate::ir::AtomicRmwOp::Smax => {
1776                        let label_select_dst = sink.get_label();
1777                        let label_select_done = sink.get_label();
1778                        if op == crate::ir::AtomicRmwOp::Umin || op == crate::ir::AtomicRmwOp::Umax
1779                        {
1780                            AtomicOP::extract(dst, offset, dst.to_reg(), ty)
1781                        } else {
1782                            AtomicOP::extract_sext(dst, offset, dst.to_reg(), ty)
1783                        }
1784                        .iter()
1785                        .for_each(|i| i.emit(sink, emit_info, state));
1786
1787                        Inst::CondBr {
1788                            taken: CondBrTarget::Label(label_select_dst),
1789                            not_taken: CondBrTarget::Fallthrough,
1790                            kind: IntegerCompare {
1791                                kind: match op {
1792                                    crate::ir::AtomicRmwOp::Umin => IntCC::UnsignedLessThan,
1793                                    crate::ir::AtomicRmwOp::Umax => IntCC::UnsignedGreaterThan,
1794                                    crate::ir::AtomicRmwOp::Smin => IntCC::SignedLessThan,
1795                                    crate::ir::AtomicRmwOp::Smax => IntCC::SignedGreaterThan,
1796                                    _ => unreachable!(),
1797                                },
1798                                rs1: dst.to_reg(),
1799                                rs2: x,
1800                            },
1801                        }
1802                        .emit(sink, emit_info, state);
1803                        // here we select x.
1804                        Inst::gen_move(t0, x, I64).emit(sink, emit_info, state);
1805                        Inst::gen_jump(label_select_done).emit(sink, emit_info, state);
1806                        sink.bind_label(label_select_dst, &mut state.ctrl_plane);
1807                        Inst::gen_move(t0, dst.to_reg(), I64).emit(sink, emit_info, state);
1808                        sink.bind_label(label_select_done, &mut state.ctrl_plane);
1809                        Inst::Atomic {
1810                            op: AtomicOP::load_op(ty),
1811                            rd: writable_spilltmp_reg2(),
1812                            addr: p,
1813                            src: zero_reg(),
1814                            amo: AMO::SeqCst,
1815                        }
1816                        .emit(sink, emit_info, state);
1817                        AtomicOP::merge(
1818                            writable_spilltmp_reg2(),
1819                            writable_spilltmp_reg(),
1820                            offset,
1821                            t0.to_reg(),
1822                            ty,
1823                        )
1824                        .iter()
1825                        .for_each(|i| i.emit(sink, emit_info, state));
1826                        spilltmp_reg2()
1827                    }
1828                    crate::ir::AtomicRmwOp::Xchg => {
1829                        AtomicOP::extract(dst, offset, dst.to_reg(), ty)
1830                            .iter()
1831                            .for_each(|i| i.emit(sink, emit_info, state));
1832                        Inst::Atomic {
1833                            op: AtomicOP::load_op(ty),
1834                            rd: writable_spilltmp_reg2(),
1835                            addr: p,
1836                            src: zero_reg(),
1837                            amo: AMO::SeqCst,
1838                        }
1839                        .emit(sink, emit_info, state);
1840                        AtomicOP::merge(
1841                            writable_spilltmp_reg2(),
1842                            writable_spilltmp_reg(),
1843                            offset,
1844                            x,
1845                            ty,
1846                        )
1847                        .iter()
1848                        .for_each(|i| i.emit(sink, emit_info, state));
1849                        spilltmp_reg2()
1850                    }
1851                };
1852
1853                Inst::Atomic {
1854                    op: AtomicOP::store_op(ty),
1855                    rd: t0,
1856                    addr: p,
1857                    src: store_value,
1858                    amo: AMO::SeqCst,
1859                }
1860                .emit(sink, emit_info, state);
1861
1862                // if store is not ok,retry.
1863                Inst::CondBr {
1864                    taken: CondBrTarget::Label(retry),
1865                    not_taken: CondBrTarget::Fallthrough,
1866                    kind: IntegerCompare {
1867                        kind: IntCC::NotEqual,
1868                        rs1: t0.to_reg(),
1869                        rs2: zero_reg(),
1870                    },
1871                }
1872                .emit(sink, emit_info, state);
1873            }
1874
1875            &Inst::LoadExtName {
1876                rd,
1877                ref name,
1878                offset,
1879            } => {
1880                if emit_info.shared_flag.is_pic() {
1881                    // Load a PC-relative address into a register.
1882                    // RISC-V does this slightly differently from other arches. We emit a relocation
1883                    // with a label, instead of the symbol itself.
1884                    //
1885                    // See: https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-elf.adoc#pc-relative-symbol-addresses
1886                    //
1887                    // Emit the following code:
1888                    // label:
1889                    //   auipc rd, 0              # R_RISCV_GOT_HI20 (symbol_name)
1890                    //   ld    rd, rd, 0          # R_RISCV_PCREL_LO12_I (label)
1891
1892                    // Create the label that is going to be published to the final binary object.
1893                    let auipc_label = sink.get_label();
1894                    sink.bind_label(auipc_label, &mut state.ctrl_plane);
1895
1896                    // Get the current PC.
1897                    sink.add_reloc(Reloc::RiscvGotHi20, &**name, 0);
1898                    Inst::Auipc {
1899                        rd: rd,
1900                        imm: Imm20::from_i32(0),
1901                    }
1902                    .emit_uncompressed(sink, emit_info, state, start_off);
1903
1904                    // The `ld` here, points to the `auipc` label instead of directly to the symbol.
1905                    sink.add_reloc(Reloc::RiscvPCRelLo12I, &auipc_label, 0);
1906                    Inst::Load {
1907                        rd,
1908                        op: LoadOP::Ld,
1909                        flags: MemFlags::trusted(),
1910                        from: AMode::RegOffset(rd.to_reg(), 0),
1911                    }
1912                    .emit_uncompressed(sink, emit_info, state, start_off);
1913                } else {
1914                    // In the non PIC sequence we relocate the absolute address into
1915                    // a prealocatted space, load it into a register and jump over it.
1916                    //
1917                    // Emit the following code:
1918                    //   ld rd, label_data
1919                    //   j label_end
1920                    // label_data:
1921                    //   <8 byte space>           # ABS8
1922                    // label_end:
1923
1924                    let label_data = sink.get_label();
1925                    let label_end = sink.get_label();
1926
1927                    // Load the value from a label
1928                    Inst::Load {
1929                        rd,
1930                        op: LoadOP::Ld,
1931                        flags: MemFlags::trusted(),
1932                        from: AMode::Label(label_data),
1933                    }
1934                    .emit(sink, emit_info, state);
1935
1936                    // Jump over the data
1937                    Inst::gen_jump(label_end).emit(sink, emit_info, state);
1938
1939                    sink.bind_label(label_data, &mut state.ctrl_plane);
1940                    sink.add_reloc(Reloc::Abs8, name.as_ref(), offset);
1941                    sink.put8(0);
1942
1943                    sink.bind_label(label_end, &mut state.ctrl_plane);
1944                }
1945            }
1946
1947            &Inst::ElfTlsGetAddr { rd, ref name } => {
1948                // RISC-V's TLS GD model is slightly different from other arches.
1949                //
1950                // We have a relocation (R_RISCV_TLS_GD_HI20) that loads the high 20 bits
1951                // of the address relative to the GOT entry. This relocation points to
1952                // the symbol as usual.
1953                //
1954                // However when loading the bottom 12bits of the address, we need to
1955                // use a label that points to the previous AUIPC instruction.
1956                //
1957                // label:
1958                //    auipc a0,0                    # R_RISCV_TLS_GD_HI20 (symbol)
1959                //    addi  a0,a0,0                 # R_RISCV_PCREL_LO12_I (label)
1960                //
1961                // https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-elf.adoc#global-dynamic
1962
1963                // Create the label that is going to be published to the final binary object.
1964                let auipc_label = sink.get_label();
1965                sink.bind_label(auipc_label, &mut state.ctrl_plane);
1966
1967                // Get the current PC.
1968                sink.add_reloc(Reloc::RiscvTlsGdHi20, &**name, 0);
1969                Inst::Auipc {
1970                    rd: rd,
1971                    imm: Imm20::from_i32(0),
1972                }
1973                .emit_uncompressed(sink, emit_info, state, start_off);
1974
1975                // The `addi` here, points to the `auipc` label instead of directly to the symbol.
1976                sink.add_reloc(Reloc::RiscvPCRelLo12I, &auipc_label, 0);
1977                Inst::AluRRImm12 {
1978                    alu_op: AluOPRRI::Addi,
1979                    rd: rd,
1980                    rs: rd.to_reg(),
1981                    imm12: Imm12::from_i16(0),
1982                }
1983                .emit_uncompressed(sink, emit_info, state, start_off);
1984
1985                Inst::Call {
1986                    info: Box::new(CallInfo::empty(
1987                        ExternalName::LibCall(LibCall::ElfTlsGetAddr),
1988                        CallConv::SystemV,
1989                    )),
1990                }
1991                .emit_uncompressed(sink, emit_info, state, start_off);
1992            }
1993
1994            &Inst::TrapIf {
1995                rs1,
1996                rs2,
1997                cc,
1998                trap_code,
1999            } => {
2000                let label_end = sink.get_label();
2001                let cond = IntegerCompare { kind: cc, rs1, rs2 };
2002
2003                // Jump over the trap if we the condition is false.
2004                Inst::CondBr {
2005                    taken: CondBrTarget::Label(label_end),
2006                    not_taken: CondBrTarget::Fallthrough,
2007                    kind: cond.inverse(),
2008                }
2009                .emit(sink, emit_info, state);
2010                Inst::Udf { trap_code }.emit(sink, emit_info, state);
2011
2012                sink.bind_label(label_end, &mut state.ctrl_plane);
2013            }
2014            &Inst::Udf { trap_code } => {
2015                sink.add_trap(trap_code);
2016                sink.put_data(Inst::TRAP_OPCODE);
2017            }
2018            &Inst::AtomicLoad { rd, ty, p } => {
2019                // emit the fence.
2020                Inst::Fence {
2021                    pred: Inst::FENCE_REQ_R | Inst::FENCE_REQ_W,
2022                    succ: Inst::FENCE_REQ_R | Inst::FENCE_REQ_W,
2023                }
2024                .emit(sink, emit_info, state);
2025                // load.
2026                Inst::Load {
2027                    rd: rd,
2028                    op: LoadOP::from_type(ty),
2029                    flags: MemFlags::new(),
2030                    from: AMode::RegOffset(p, 0),
2031                }
2032                .emit(sink, emit_info, state);
2033                Inst::Fence {
2034                    pred: Inst::FENCE_REQ_R,
2035                    succ: Inst::FENCE_REQ_R | Inst::FENCE_REQ_W,
2036                }
2037                .emit(sink, emit_info, state);
2038            }
2039            &Inst::AtomicStore { src, ty, p } => {
2040                Inst::Fence {
2041                    pred: Inst::FENCE_REQ_R | Inst::FENCE_REQ_W,
2042                    succ: Inst::FENCE_REQ_W,
2043                }
2044                .emit(sink, emit_info, state);
2045                Inst::Store {
2046                    to: AMode::RegOffset(p, 0),
2047                    op: StoreOP::from_type(ty),
2048                    flags: MemFlags::new(),
2049                    src,
2050                }
2051                .emit(sink, emit_info, state);
2052            }
2053
2054            &Inst::Popcnt {
2055                sum,
2056                tmp,
2057                step,
2058                rs,
2059                ty,
2060            } => {
2061                // load 0 to sum , init.
2062                Inst::gen_move(sum, zero_reg(), I64).emit(sink, emit_info, state);
2063                // load
2064                Inst::load_imm12(step, Imm12::from_i16(ty.bits() as i16))
2065                    .emit(sink, emit_info, state);
2066                //
2067                Inst::load_imm12(tmp, Imm12::ONE).emit(sink, emit_info, state);
2068                Inst::AluRRImm12 {
2069                    alu_op: AluOPRRI::Slli,
2070                    rd: tmp,
2071                    rs: tmp.to_reg(),
2072                    imm12: Imm12::from_i16((ty.bits() - 1) as i16),
2073                }
2074                .emit(sink, emit_info, state);
2075                let label_done = sink.get_label();
2076                let label_loop = sink.get_label();
2077                sink.bind_label(label_loop, &mut state.ctrl_plane);
2078                Inst::CondBr {
2079                    taken: CondBrTarget::Label(label_done),
2080                    not_taken: CondBrTarget::Fallthrough,
2081                    kind: IntegerCompare {
2082                        kind: IntCC::SignedLessThanOrEqual,
2083                        rs1: step.to_reg(),
2084                        rs2: zero_reg(),
2085                    },
2086                }
2087                .emit(sink, emit_info, state);
2088                // test and add sum.
2089                {
2090                    Inst::AluRRR {
2091                        alu_op: AluOPRRR::And,
2092                        rd: writable_spilltmp_reg2(),
2093                        rs1: tmp.to_reg(),
2094                        rs2: rs,
2095                    }
2096                    .emit(sink, emit_info, state);
2097                    let label_over = sink.get_label();
2098                    Inst::CondBr {
2099                        taken: CondBrTarget::Label(label_over),
2100                        not_taken: CondBrTarget::Fallthrough,
2101                        kind: IntegerCompare {
2102                            kind: IntCC::Equal,
2103                            rs1: zero_reg(),
2104                            rs2: spilltmp_reg2(),
2105                        },
2106                    }
2107                    .emit(sink, emit_info, state);
2108                    Inst::AluRRImm12 {
2109                        alu_op: AluOPRRI::Addi,
2110                        rd: sum,
2111                        rs: sum.to_reg(),
2112                        imm12: Imm12::ONE,
2113                    }
2114                    .emit(sink, emit_info, state);
2115                    sink.bind_label(label_over, &mut state.ctrl_plane);
2116                }
2117                // set step and tmp.
2118                {
2119                    Inst::AluRRImm12 {
2120                        alu_op: AluOPRRI::Addi,
2121                        rd: step,
2122                        rs: step.to_reg(),
2123                        imm12: Imm12::from_i16(-1),
2124                    }
2125                    .emit(sink, emit_info, state);
2126                    Inst::AluRRImm12 {
2127                        alu_op: AluOPRRI::Srli,
2128                        rd: tmp,
2129                        rs: tmp.to_reg(),
2130                        imm12: Imm12::ONE,
2131                    }
2132                    .emit(sink, emit_info, state);
2133                    Inst::gen_jump(label_loop).emit(sink, emit_info, state);
2134                }
2135                sink.bind_label(label_done, &mut state.ctrl_plane);
2136            }
2137            &Inst::Cltz {
2138                sum,
2139                tmp,
2140                step,
2141                rs,
2142                leading,
2143                ty,
2144            } => {
2145                // load 0 to sum , init.
2146                Inst::gen_move(sum, zero_reg(), I64).emit(sink, emit_info, state);
2147                // load
2148                Inst::load_imm12(step, Imm12::from_i16(ty.bits() as i16))
2149                    .emit(sink, emit_info, state);
2150                //
2151                Inst::load_imm12(tmp, Imm12::ONE).emit(sink, emit_info, state);
2152                if leading {
2153                    Inst::AluRRImm12 {
2154                        alu_op: AluOPRRI::Slli,
2155                        rd: tmp,
2156                        rs: tmp.to_reg(),
2157                        imm12: Imm12::from_i16((ty.bits() - 1) as i16),
2158                    }
2159                    .emit(sink, emit_info, state);
2160                }
2161                let label_done = sink.get_label();
2162                let label_loop = sink.get_label();
2163                sink.bind_label(label_loop, &mut state.ctrl_plane);
2164                Inst::CondBr {
2165                    taken: CondBrTarget::Label(label_done),
2166                    not_taken: CondBrTarget::Fallthrough,
2167                    kind: IntegerCompare {
2168                        kind: IntCC::SignedLessThanOrEqual,
2169                        rs1: step.to_reg(),
2170                        rs2: zero_reg(),
2171                    },
2172                }
2173                .emit(sink, emit_info, state);
2174                // test and add sum.
2175                {
2176                    Inst::AluRRR {
2177                        alu_op: AluOPRRR::And,
2178                        rd: writable_spilltmp_reg2(),
2179                        rs1: tmp.to_reg(),
2180                        rs2: rs,
2181                    }
2182                    .emit(sink, emit_info, state);
2183                    Inst::CondBr {
2184                        taken: CondBrTarget::Label(label_done),
2185                        not_taken: CondBrTarget::Fallthrough,
2186                        kind: IntegerCompare {
2187                            kind: IntCC::NotEqual,
2188                            rs1: zero_reg(),
2189                            rs2: spilltmp_reg2(),
2190                        },
2191                    }
2192                    .emit(sink, emit_info, state);
2193                    Inst::AluRRImm12 {
2194                        alu_op: AluOPRRI::Addi,
2195                        rd: sum,
2196                        rs: sum.to_reg(),
2197                        imm12: Imm12::ONE,
2198                    }
2199                    .emit(sink, emit_info, state);
2200                }
2201                // set step and tmp.
2202                {
2203                    Inst::AluRRImm12 {
2204                        alu_op: AluOPRRI::Addi,
2205                        rd: step,
2206                        rs: step.to_reg(),
2207                        imm12: Imm12::from_i16(-1),
2208                    }
2209                    .emit(sink, emit_info, state);
2210                    Inst::AluRRImm12 {
2211                        alu_op: if leading {
2212                            AluOPRRI::Srli
2213                        } else {
2214                            AluOPRRI::Slli
2215                        },
2216                        rd: tmp,
2217                        rs: tmp.to_reg(),
2218                        imm12: Imm12::ONE,
2219                    }
2220                    .emit(sink, emit_info, state);
2221                    Inst::gen_jump(label_loop).emit(sink, emit_info, state);
2222                }
2223                sink.bind_label(label_done, &mut state.ctrl_plane);
2224            }
2225            &Inst::Brev8 {
2226                rs,
2227                ty,
2228                step,
2229                tmp,
2230                tmp2,
2231                rd,
2232            } => {
2233                Inst::gen_move(rd, zero_reg(), I64).emit(sink, emit_info, state);
2234                Inst::load_imm12(step, Imm12::from_i16(ty.bits() as i16))
2235                    .emit(sink, emit_info, state);
2236                //
2237                Inst::load_imm12(tmp, Imm12::ONE).emit(sink, emit_info, state);
2238                Inst::AluRRImm12 {
2239                    alu_op: AluOPRRI::Slli,
2240                    rd: tmp,
2241                    rs: tmp.to_reg(),
2242                    imm12: Imm12::from_i16((ty.bits() - 1) as i16),
2243                }
2244                .emit(sink, emit_info, state);
2245                Inst::load_imm12(tmp2, Imm12::ONE).emit(sink, emit_info, state);
2246                Inst::AluRRImm12 {
2247                    alu_op: AluOPRRI::Slli,
2248                    rd: tmp2,
2249                    rs: tmp2.to_reg(),
2250                    imm12: Imm12::from_i16((ty.bits() - 8) as i16),
2251                }
2252                .emit(sink, emit_info, state);
2253
2254                let label_done = sink.get_label();
2255                let label_loop = sink.get_label();
2256                sink.bind_label(label_loop, &mut state.ctrl_plane);
2257                Inst::CondBr {
2258                    taken: CondBrTarget::Label(label_done),
2259                    not_taken: CondBrTarget::Fallthrough,
2260                    kind: IntegerCompare {
2261                        kind: IntCC::SignedLessThanOrEqual,
2262                        rs1: step.to_reg(),
2263                        rs2: zero_reg(),
2264                    },
2265                }
2266                .emit(sink, emit_info, state);
2267                // test and set bit.
2268                {
2269                    Inst::AluRRR {
2270                        alu_op: AluOPRRR::And,
2271                        rd: writable_spilltmp_reg2(),
2272                        rs1: tmp.to_reg(),
2273                        rs2: rs,
2274                    }
2275                    .emit(sink, emit_info, state);
2276                    let label_over = sink.get_label();
2277                    Inst::CondBr {
2278                        taken: CondBrTarget::Label(label_over),
2279                        not_taken: CondBrTarget::Fallthrough,
2280                        kind: IntegerCompare {
2281                            kind: IntCC::Equal,
2282                            rs1: zero_reg(),
2283                            rs2: spilltmp_reg2(),
2284                        },
2285                    }
2286                    .emit(sink, emit_info, state);
2287                    Inst::AluRRR {
2288                        alu_op: AluOPRRR::Or,
2289                        rd: rd,
2290                        rs1: rd.to_reg(),
2291                        rs2: tmp2.to_reg(),
2292                    }
2293                    .emit(sink, emit_info, state);
2294                    sink.bind_label(label_over, &mut state.ctrl_plane);
2295                }
2296                // set step and tmp.
2297                {
2298                    Inst::AluRRImm12 {
2299                        alu_op: AluOPRRI::Addi,
2300                        rd: step,
2301                        rs: step.to_reg(),
2302                        imm12: Imm12::from_i16(-1),
2303                    }
2304                    .emit(sink, emit_info, state);
2305                    Inst::AluRRImm12 {
2306                        alu_op: AluOPRRI::Srli,
2307                        rd: tmp,
2308                        rs: tmp.to_reg(),
2309                        imm12: Imm12::ONE,
2310                    }
2311                    .emit(sink, emit_info, state);
2312                    {
2313                        // reset tmp2
2314                        // if (step %=8 == 0) then tmp2 = tmp2 >> 15
2315                        // if (step %=8 != 0) then tmp2 = tmp2 << 1
2316                        let label_over = sink.get_label();
2317                        let label_sll_1 = sink.get_label();
2318                        Inst::load_imm12(writable_spilltmp_reg2(), Imm12::from_i16(8))
2319                            .emit(sink, emit_info, state);
2320                        Inst::AluRRR {
2321                            alu_op: AluOPRRR::Rem,
2322                            rd: writable_spilltmp_reg2(),
2323                            rs1: step.to_reg(),
2324                            rs2: spilltmp_reg2(),
2325                        }
2326                        .emit(sink, emit_info, state);
2327                        Inst::CondBr {
2328                            taken: CondBrTarget::Label(label_sll_1),
2329                            not_taken: CondBrTarget::Fallthrough,
2330                            kind: IntegerCompare {
2331                                kind: IntCC::NotEqual,
2332                                rs1: spilltmp_reg2(),
2333                                rs2: zero_reg(),
2334                            },
2335                        }
2336                        .emit(sink, emit_info, state);
2337                        Inst::AluRRImm12 {
2338                            alu_op: AluOPRRI::Srli,
2339                            rd: tmp2,
2340                            rs: tmp2.to_reg(),
2341                            imm12: Imm12::from_i16(15),
2342                        }
2343                        .emit(sink, emit_info, state);
2344                        Inst::gen_jump(label_over).emit(sink, emit_info, state);
2345                        sink.bind_label(label_sll_1, &mut state.ctrl_plane);
2346                        Inst::AluRRImm12 {
2347                            alu_op: AluOPRRI::Slli,
2348                            rd: tmp2,
2349                            rs: tmp2.to_reg(),
2350                            imm12: Imm12::ONE,
2351                        }
2352                        .emit(sink, emit_info, state);
2353                        sink.bind_label(label_over, &mut state.ctrl_plane);
2354                    }
2355                    Inst::gen_jump(label_loop).emit(sink, emit_info, state);
2356                }
2357                sink.bind_label(label_done, &mut state.ctrl_plane);
2358            }
2359            &Inst::StackProbeLoop {
2360                guard_size,
2361                probe_count,
2362                tmp: guard_size_tmp,
2363            } => {
2364                let step = writable_spilltmp_reg();
2365                Inst::load_constant_u64(step, (guard_size as u64) * (probe_count as u64))
2366                    .iter()
2367                    .for_each(|i| i.emit(sink, emit_info, state));
2368                Inst::load_constant_u64(guard_size_tmp, guard_size as u64)
2369                    .iter()
2370                    .for_each(|i| i.emit(sink, emit_info, state));
2371
2372                let loop_start = sink.get_label();
2373                let label_done = sink.get_label();
2374                sink.bind_label(loop_start, &mut state.ctrl_plane);
2375                Inst::CondBr {
2376                    taken: CondBrTarget::Label(label_done),
2377                    not_taken: CondBrTarget::Fallthrough,
2378                    kind: IntegerCompare {
2379                        kind: IntCC::UnsignedLessThanOrEqual,
2380                        rs1: step.to_reg(),
2381                        rs2: guard_size_tmp.to_reg(),
2382                    },
2383                }
2384                .emit(sink, emit_info, state);
2385                // compute address.
2386                Inst::AluRRR {
2387                    alu_op: AluOPRRR::Sub,
2388                    rd: writable_spilltmp_reg2(),
2389                    rs1: stack_reg(),
2390                    rs2: step.to_reg(),
2391                }
2392                .emit(sink, emit_info, state);
2393                Inst::Store {
2394                    to: AMode::RegOffset(spilltmp_reg2(), 0),
2395                    op: StoreOP::Sb,
2396                    flags: MemFlags::new(),
2397                    src: zero_reg(),
2398                }
2399                .emit(sink, emit_info, state);
2400                // reset step.
2401                Inst::AluRRR {
2402                    alu_op: AluOPRRR::Sub,
2403                    rd: step,
2404                    rs1: step.to_reg(),
2405                    rs2: guard_size_tmp.to_reg(),
2406                }
2407                .emit(sink, emit_info, state);
2408                Inst::gen_jump(loop_start).emit(sink, emit_info, state);
2409                sink.bind_label(label_done, &mut state.ctrl_plane);
2410            }
2411            &Inst::VecAluRRRImm5 {
2412                op,
2413                vd,
2414                vd_src,
2415                imm,
2416                vs2,
2417                ref mask,
2418                ..
2419            } => {
2420                debug_assert_eq!(vd.to_reg(), vd_src);
2421
2422                sink.put4(encode_valu_rrr_imm(op, vd, imm, vs2, *mask));
2423            }
2424            &Inst::VecAluRRRR {
2425                op,
2426                vd,
2427                vd_src,
2428                vs1,
2429                vs2,
2430                ref mask,
2431                ..
2432            } => {
2433                debug_assert_eq!(vd.to_reg(), vd_src);
2434
2435                sink.put4(encode_valu_rrrr(op, vd, vs2, vs1, *mask));
2436            }
2437            &Inst::VecAluRRR {
2438                op,
2439                vd,
2440                vs1,
2441                vs2,
2442                ref mask,
2443                ..
2444            } => {
2445                sink.put4(encode_valu(op, vd, vs1, vs2, *mask));
2446            }
2447            &Inst::VecAluRRImm5 {
2448                op,
2449                vd,
2450                imm,
2451                vs2,
2452                ref mask,
2453                ..
2454            } => {
2455                sink.put4(encode_valu_rr_imm(op, vd, imm, vs2, *mask));
2456            }
2457            &Inst::VecAluRR {
2458                op,
2459                vd,
2460                vs,
2461                ref mask,
2462                ..
2463            } => {
2464                sink.put4(encode_valu_rr(op, vd, vs, *mask));
2465            }
2466            &Inst::VecAluRImm5 {
2467                op,
2468                vd,
2469                imm,
2470                ref mask,
2471                ..
2472            } => {
2473                sink.put4(encode_valu_r_imm(op, vd, imm, *mask));
2474            }
2475            &Inst::VecSetState { rd, ref vstate } => {
2476                sink.put4(encode_vcfg_imm(
2477                    0x57,
2478                    rd.to_reg(),
2479                    vstate.avl.unwrap_static(),
2480                    &vstate.vtype,
2481                ));
2482
2483                // Update the current vector emit state.
2484                state.vstate = EmitVState::Known(*vstate);
2485            }
2486
2487            &Inst::VecLoad {
2488                eew,
2489                to,
2490                ref from,
2491                ref mask,
2492                flags,
2493                ..
2494            } => {
2495                // Vector Loads don't support immediate offsets, so we need to load it into a register.
2496                let addr = match from {
2497                    VecAMode::UnitStride { base } => {
2498                        let base_reg = base.get_base_register();
2499                        let offset = base.get_offset_with_state(state);
2500
2501                        // Reg+0 Offset can be directly encoded
2502                        if let (Some(base_reg), 0) = (base_reg, offset) {
2503                            base_reg
2504                        } else {
2505                            // Otherwise load the address it into a reg and load from it.
2506                            let tmp = writable_spilltmp_reg();
2507                            Inst::LoadAddr {
2508                                rd: tmp,
2509                                mem: *base,
2510                            }
2511                            .emit(sink, emit_info, state);
2512                            tmp.to_reg()
2513                        }
2514                    }
2515                };
2516
2517                if let Some(trap_code) = flags.trap_code() {
2518                    // Register the offset at which the actual load instruction starts.
2519                    sink.add_trap(trap_code);
2520                }
2521
2522                sink.put4(encode_vmem_load(
2523                    0x07,
2524                    to.to_reg(),
2525                    eew,
2526                    addr,
2527                    from.lumop(),
2528                    *mask,
2529                    from.mop(),
2530                    from.nf(),
2531                ));
2532            }
2533
2534            &Inst::VecStore {
2535                eew,
2536                ref to,
2537                from,
2538                ref mask,
2539                flags,
2540                ..
2541            } => {
2542                // Vector Stores don't support immediate offsets, so we need to load it into a register.
2543                let addr = match to {
2544                    VecAMode::UnitStride { base } => {
2545                        let base_reg = base.get_base_register();
2546                        let offset = base.get_offset_with_state(state);
2547
2548                        // Reg+0 Offset can be directly encoded
2549                        if let (Some(base_reg), 0) = (base_reg, offset) {
2550                            base_reg
2551                        } else {
2552                            // Otherwise load the address it into a reg and load from it.
2553                            let tmp = writable_spilltmp_reg();
2554                            Inst::LoadAddr {
2555                                rd: tmp,
2556                                mem: *base,
2557                            }
2558                            .emit(sink, emit_info, state);
2559                            tmp.to_reg()
2560                        }
2561                    }
2562                };
2563
2564                if let Some(trap_code) = flags.trap_code() {
2565                    // Register the offset at which the actual load instruction starts.
2566                    sink.add_trap(trap_code);
2567                }
2568
2569                sink.put4(encode_vmem_store(
2570                    0x27,
2571                    from,
2572                    eew,
2573                    addr,
2574                    to.sumop(),
2575                    *mask,
2576                    to.mop(),
2577                    to.nf(),
2578                ));
2579            }
2580        };
2581    }
2582}
2583
2584fn emit_return_call_common_sequence<T>(
2585    sink: &mut MachBuffer<Inst>,
2586    emit_info: &EmitInfo,
2587    state: &mut EmitState,
2588    info: &ReturnCallInfo<T>,
2589) {
2590    // The return call sequence can potentially emit a lot of instructions (up to 634 bytes!)
2591    // So lets emit an island here if we need it.
2592    //
2593    // It is difficult to calculate exactly how many instructions are going to be emitted, so
2594    // we calculate it by emitting it into a disposable buffer, and then checking how many instructions
2595    // were actually emitted.
2596    let mut buffer = MachBuffer::new();
2597    let mut fake_emit_state = state.clone();
2598
2599    return_call_emit_impl(&mut buffer, emit_info, &mut fake_emit_state, info);
2600
2601    // Finalize the buffer and get the number of bytes emitted.
2602    let buffer = buffer.finish(&Default::default(), &mut Default::default());
2603    let length = buffer.data().len() as u32;
2604
2605    // And now emit the island inline with this instruction.
2606    if sink.island_needed(length) {
2607        let jump_around_label = sink.get_label();
2608        Inst::gen_jump(jump_around_label).emit(sink, emit_info, state);
2609        sink.emit_island(length + 4, &mut state.ctrl_plane);
2610        sink.bind_label(jump_around_label, &mut state.ctrl_plane);
2611    }
2612
2613    // Now that we're done, emit the *actual* return sequence.
2614    return_call_emit_impl(sink, emit_info, state, info);
2615}
2616
2617/// This should not be called directly, Instead prefer to call [emit_return_call_common_sequence].
2618fn return_call_emit_impl<T>(
2619    sink: &mut MachBuffer<Inst>,
2620    emit_info: &EmitInfo,
2621    state: &mut EmitState,
2622    info: &ReturnCallInfo<T>,
2623) {
2624    let sp_to_fp_offset = {
2625        let frame_layout = state.frame_layout();
2626        i64::from(
2627            frame_layout.clobber_size
2628                + frame_layout.fixed_frame_storage_size
2629                + frame_layout.outgoing_args_size,
2630        )
2631    };
2632
2633    let mut clobber_offset = sp_to_fp_offset - 8;
2634    for reg in state.frame_layout().clobbered_callee_saves.clone() {
2635        let rreg = reg.to_reg();
2636        let ty = match rreg.class() {
2637            RegClass::Int => I64,
2638            RegClass::Float => F64,
2639            RegClass::Vector => unimplemented!("Vector Clobber Restores"),
2640        };
2641
2642        Inst::gen_load(
2643            reg.map(Reg::from),
2644            AMode::SPOffset(clobber_offset),
2645            ty,
2646            MemFlags::trusted(),
2647        )
2648        .emit(sink, emit_info, state);
2649
2650        clobber_offset -= 8
2651    }
2652
2653    // Restore the link register and frame pointer
2654    let setup_area_size = i64::from(state.frame_layout().setup_area_size);
2655    if setup_area_size > 0 {
2656        Inst::gen_load(
2657            writable_link_reg(),
2658            AMode::SPOffset(sp_to_fp_offset + 8),
2659            I64,
2660            MemFlags::trusted(),
2661        )
2662        .emit(sink, emit_info, state);
2663
2664        Inst::gen_load(
2665            writable_fp_reg(),
2666            AMode::SPOffset(sp_to_fp_offset),
2667            I64,
2668            MemFlags::trusted(),
2669        )
2670        .emit(sink, emit_info, state);
2671    }
2672
2673    // If we over-allocated the incoming args area in the prologue, resize down to what the callee
2674    // is expecting.
2675    let incoming_args_diff =
2676        i64::from(state.frame_layout().tail_args_size - info.new_stack_arg_size);
2677
2678    // Increment SP all at once
2679    let sp_increment = sp_to_fp_offset + setup_area_size + incoming_args_diff;
2680    if sp_increment > 0 {
2681        for inst in Riscv64MachineDeps::gen_sp_reg_adjust(i32::try_from(sp_increment).unwrap()) {
2682            inst.emit(sink, emit_info, state);
2683        }
2684    }
2685}