cranelift_codegen/isa/riscv64/inst/
emit.rs

1//! Riscv64 ISA: binary code emission.
2
3use crate::ir::{self, LibCall, TrapCode};
4use crate::isa::riscv64::inst::*;
5use crate::isa::riscv64::lower::isle::generated_code::{
6    CaOp, CbOp, CiOp, CiwOp, ClOp, CrOp, CsOp, CssOp, CsznOp, FpuOPWidth, ZcbMemOp,
7};
8use cranelift_control::ControlPlane;
9
10pub struct EmitInfo {
11    shared_flag: settings::Flags,
12    isa_flags: super::super::riscv_settings::Flags,
13}
14
15impl EmitInfo {
16    pub(crate) fn new(
17        shared_flag: settings::Flags,
18        isa_flags: super::super::riscv_settings::Flags,
19    ) -> Self {
20        Self {
21            shared_flag,
22            isa_flags,
23        }
24    }
25}
26
27pub(crate) fn reg_to_gpr_num(m: Reg) -> u32 {
28    u32::from(m.to_real_reg().unwrap().hw_enc() & 31)
29}
30
31pub(crate) fn reg_to_compressed_gpr_num(m: Reg) -> u32 {
32    let real_reg = m.to_real_reg().unwrap().hw_enc();
33    debug_assert!(real_reg >= 8 && real_reg < 16);
34    let compressed_reg = real_reg - 8;
35    u32::from(compressed_reg)
36}
37
38#[derive(Clone, Debug, PartialEq, Default)]
39pub enum EmitVState {
40    #[default]
41    Unknown,
42    Known(VState),
43}
44
45/// State carried between emissions of a sequence of instructions.
46#[derive(Default, Clone, Debug)]
47pub struct EmitState {
48    /// The user stack map for the upcoming instruction, as provided to
49    /// `pre_safepoint()`.
50    user_stack_map: Option<ir::UserStackMap>,
51
52    /// Only used during fuzz-testing. Otherwise, it is a zero-sized struct and
53    /// optimized away at compiletime. See [cranelift_control].
54    ctrl_plane: ControlPlane,
55
56    /// Vector State
57    /// Controls the current state of the vector unit at the emission point.
58    vstate: EmitVState,
59
60    frame_layout: FrameLayout,
61}
62
63impl EmitState {
64    fn take_stack_map(&mut self) -> Option<ir::UserStackMap> {
65        self.user_stack_map.take()
66    }
67
68    fn clobber_vstate(&mut self) {
69        self.vstate = EmitVState::Unknown;
70    }
71}
72
73impl MachInstEmitState<Inst> for EmitState {
74    fn new(
75        abi: &Callee<crate::isa::riscv64::abi::Riscv64MachineDeps>,
76        ctrl_plane: ControlPlane,
77    ) -> Self {
78        EmitState {
79            user_stack_map: None,
80            ctrl_plane,
81            vstate: EmitVState::Unknown,
82            frame_layout: abi.frame_layout().clone(),
83        }
84    }
85
86    fn pre_safepoint(&mut self, user_stack_map: Option<ir::UserStackMap>) {
87        self.user_stack_map = user_stack_map;
88    }
89
90    fn ctrl_plane_mut(&mut self) -> &mut ControlPlane {
91        &mut self.ctrl_plane
92    }
93
94    fn take_ctrl_plane(self) -> ControlPlane {
95        self.ctrl_plane
96    }
97
98    fn on_new_block(&mut self) {
99        // Reset the vector state.
100        self.clobber_vstate();
101    }
102
103    fn frame_layout(&self) -> &FrameLayout {
104        &self.frame_layout
105    }
106}
107
108impl Inst {
109    /// Load int mask.
110    /// If ty is int then 0xff in rd.
111    pub(crate) fn load_int_mask(rd: Writable<Reg>, ty: Type) -> SmallInstVec<Inst> {
112        let mut insts = SmallInstVec::new();
113        assert!(ty.is_int() && ty.bits() <= 64);
114        match ty {
115            I64 => {
116                insts.push(Inst::load_imm12(rd, Imm12::from_i16(-1)));
117            }
118            I32 | I16 => {
119                insts.push(Inst::load_imm12(rd, Imm12::from_i16(-1)));
120                insts.push(Inst::Extend {
121                    rd,
122                    rn: rd.to_reg(),
123                    signed: false,
124                    from_bits: ty.bits() as u8,
125                    to_bits: 64,
126                });
127            }
128            I8 => {
129                insts.push(Inst::load_imm12(rd, Imm12::from_i16(255)));
130            }
131            _ => unreachable!("ty:{:?}", ty),
132        }
133        insts
134    }
135    ///  inverse all bit
136    pub(crate) fn construct_bit_not(rd: Writable<Reg>, rs: Reg) -> Inst {
137        Inst::AluRRImm12 {
138            alu_op: AluOPRRI::Xori,
139            rd,
140            rs,
141            imm12: Imm12::from_i16(-1),
142        }
143    }
144
145    /// Returns Some(VState) if this instruction is expecting a specific vector state
146    /// before emission.
147    fn expected_vstate(&self) -> Option<&VState> {
148        match self {
149            Inst::Nop0
150            | Inst::Nop4
151            | Inst::BrTable { .. }
152            | Inst::Auipc { .. }
153            | Inst::Fli { .. }
154            | Inst::Lui { .. }
155            | Inst::LoadInlineConst { .. }
156            | Inst::AluRRR { .. }
157            | Inst::FpuRRR { .. }
158            | Inst::AluRRImm12 { .. }
159            | Inst::CsrReg { .. }
160            | Inst::CsrImm { .. }
161            | Inst::Load { .. }
162            | Inst::Store { .. }
163            | Inst::Args { .. }
164            | Inst::Rets { .. }
165            | Inst::Ret { .. }
166            | Inst::Extend { .. }
167            | Inst::Call { .. }
168            | Inst::CallInd { .. }
169            | Inst::ReturnCall { .. }
170            | Inst::ReturnCallInd { .. }
171            | Inst::Jal { .. }
172            | Inst::CondBr { .. }
173            | Inst::LoadExtName { .. }
174            | Inst::ElfTlsGetAddr { .. }
175            | Inst::LoadAddr { .. }
176            | Inst::Mov { .. }
177            | Inst::MovFromPReg { .. }
178            | Inst::Fence { .. }
179            | Inst::EBreak
180            | Inst::Udf { .. }
181            | Inst::FpuRR { .. }
182            | Inst::FpuRRRR { .. }
183            | Inst::Jalr { .. }
184            | Inst::Atomic { .. }
185            | Inst::Select { .. }
186            | Inst::AtomicCas { .. }
187            | Inst::RawData { .. }
188            | Inst::AtomicStore { .. }
189            | Inst::AtomicLoad { .. }
190            | Inst::AtomicRmwLoop { .. }
191            | Inst::TrapIf { .. }
192            | Inst::Unwind { .. }
193            | Inst::DummyUse { .. }
194            | Inst::Popcnt { .. }
195            | Inst::Cltz { .. }
196            | Inst::Brev8 { .. }
197            | Inst::StackProbeLoop { .. } => None,
198
199            // VecSetState does not expect any vstate, rather it updates it.
200            Inst::VecSetState { .. } => None,
201
202            // `vmv` instructions copy a set of registers and ignore vstate.
203            Inst::VecAluRRImm5 { op: VecAluOpRRImm5::VmvrV, .. } => None,
204
205            Inst::VecAluRR { vstate, .. } |
206            Inst::VecAluRRR { vstate, .. } |
207            Inst::VecAluRRRR { vstate, .. } |
208            Inst::VecAluRImm5 { vstate, .. } |
209            Inst::VecAluRRImm5 { vstate, .. } |
210            Inst::VecAluRRRImm5 { vstate, .. } |
211            // TODO: Unit-stride loads and stores only need the AVL to be correct, not
212            // the full vtype. A future optimization could be to decouple these two when
213            // updating vstate. This would allow us to avoid emitting a VecSetState in
214            // some cases.
215            Inst::VecLoad { vstate, .. }
216            | Inst::VecStore { vstate, .. } => Some(vstate),
217            Inst::EmitIsland { .. } => None,
218        }
219    }
220}
221
222impl MachInstEmit for Inst {
223    type State = EmitState;
224    type Info = EmitInfo;
225
226    fn emit(&self, sink: &mut MachBuffer<Inst>, emit_info: &Self::Info, state: &mut EmitState) {
227        // Check if we need to update the vector state before emitting this instruction
228        if let Some(expected) = self.expected_vstate() {
229            if state.vstate != EmitVState::Known(*expected) {
230                // Update the vector state.
231                Inst::VecSetState {
232                    rd: writable_zero_reg(),
233                    vstate: *expected,
234                }
235                .emit(sink, emit_info, state);
236            }
237        }
238
239        // N.B.: we *must* not exceed the "worst-case size" used to compute
240        // where to insert islands, except when islands are explicitly triggered
241        // (with an `EmitIsland`). We check this in debug builds. This is `mut`
242        // to allow disabling the check for `JTSequence`, which is always
243        // emitted following an `EmitIsland`.
244        let mut start_off = sink.cur_offset();
245
246        // First try to emit this as a compressed instruction
247        let res = self.try_emit_compressed(sink, emit_info, state, &mut start_off);
248        if res.is_none() {
249            // If we can't lets emit it as a normal instruction
250            self.emit_uncompressed(sink, emit_info, state, &mut start_off);
251        }
252
253        // We exclude br_table, call, return_call and try_call from
254        // these checks since they emit their own islands, and thus
255        // are allowed to exceed the worst case size.
256        let emits_own_island = match self {
257            Inst::BrTable { .. }
258            | Inst::ReturnCall { .. }
259            | Inst::ReturnCallInd { .. }
260            | Inst::Call { .. }
261            | Inst::CallInd { .. }
262            | Inst::EmitIsland { .. } => true,
263            _ => false,
264        };
265        if !emits_own_island {
266            let end_off = sink.cur_offset();
267            assert!(
268                (end_off - start_off) <= Inst::worst_case_size(),
269                "Inst:{:?} length:{} worst_case_size:{}",
270                self,
271                end_off - start_off,
272                Inst::worst_case_size()
273            );
274        }
275    }
276
277    fn pretty_print_inst(&self, state: &mut Self::State) -> String {
278        self.print_with_state(state)
279    }
280}
281
282impl Inst {
283    /// Tries to emit an instruction as compressed, if we can't return false.
284    fn try_emit_compressed(
285        &self,
286        sink: &mut MachBuffer<Inst>,
287        emit_info: &EmitInfo,
288        state: &mut EmitState,
289        start_off: &mut u32,
290    ) -> Option<()> {
291        let has_m = emit_info.isa_flags.has_m();
292        let has_zba = emit_info.isa_flags.has_zba();
293        let has_zbb = emit_info.isa_flags.has_zbb();
294        let has_zca = emit_info.isa_flags.has_zca();
295        let has_zcb = emit_info.isa_flags.has_zcb();
296        let has_zcd = emit_info.isa_flags.has_zcd();
297
298        // Currently all compressed extensions (Zcb, Zcd, Zcmp, Zcmt, etc..) require Zca
299        // to be enabled, so check it early.
300        if !has_zca {
301            return None;
302        }
303
304        fn reg_is_compressible(r: Reg) -> bool {
305            r.to_real_reg()
306                .map(|r| r.hw_enc() >= 8 && r.hw_enc() < 16)
307                .unwrap_or(false)
308        }
309
310        match *self {
311            // C.ADD
312            Inst::AluRRR {
313                alu_op: AluOPRRR::Add,
314                rd,
315                rs1,
316                rs2,
317            } if (rd.to_reg() == rs1 || rd.to_reg() == rs2)
318                && rs1 != zero_reg()
319                && rs2 != zero_reg() =>
320            {
321                // Technically `c.add rd, rs` expands to `add rd, rd, rs`, but we can
322                // also swap rs1 with rs2 and we get an equivalent instruction. i.e we
323                // can also compress `add rd, rs, rd` into `c.add rd, rs`.
324                let src = if rd.to_reg() == rs1 { rs2 } else { rs1 };
325
326                sink.put2(encode_cr_type(CrOp::CAdd, rd, src));
327            }
328
329            // C.MV
330            Inst::AluRRImm12 {
331                alu_op: AluOPRRI::Addi | AluOPRRI::Ori,
332                rd,
333                rs,
334                imm12,
335            } if rd.to_reg() != rs
336                && rd.to_reg() != zero_reg()
337                && rs != zero_reg()
338                && imm12.as_i16() == 0 =>
339            {
340                sink.put2(encode_cr_type(CrOp::CMv, rd, rs));
341            }
342
343            // CA Ops
344            Inst::AluRRR {
345                alu_op:
346                    alu_op @ (AluOPRRR::And
347                    | AluOPRRR::Or
348                    | AluOPRRR::Xor
349                    | AluOPRRR::Addw
350                    | AluOPRRR::Mul),
351                rd,
352                rs1,
353                rs2,
354            } if (rd.to_reg() == rs1 || rd.to_reg() == rs2)
355                && reg_is_compressible(rs1)
356                && reg_is_compressible(rs2) =>
357            {
358                let op = match alu_op {
359                    AluOPRRR::And => CaOp::CAnd,
360                    AluOPRRR::Or => CaOp::COr,
361                    AluOPRRR::Xor => CaOp::CXor,
362                    AluOPRRR::Addw => CaOp::CAddw,
363                    AluOPRRR::Mul if has_zcb && has_m => CaOp::CMul,
364                    _ => return None,
365                };
366                // The canonical expansion for these instruction has `rd == rs1`, but
367                // these are all commutative operations, so we can swap the operands.
368                let src = if rd.to_reg() == rs1 { rs2 } else { rs1 };
369
370                sink.put2(encode_ca_type(op, rd, src));
371            }
372
373            // The sub instructions are non commutative, so we can't swap the operands.
374            Inst::AluRRR {
375                alu_op: alu_op @ (AluOPRRR::Sub | AluOPRRR::Subw),
376                rd,
377                rs1,
378                rs2,
379            } if rd.to_reg() == rs1 && reg_is_compressible(rs1) && reg_is_compressible(rs2) => {
380                let op = match alu_op {
381                    AluOPRRR::Sub => CaOp::CSub,
382                    AluOPRRR::Subw => CaOp::CSubw,
383                    _ => return None,
384                };
385                sink.put2(encode_ca_type(op, rd, rs2));
386            }
387
388            // c.j
389            //
390            // We don't have a separate JAL as that is only available in RV32C
391            Inst::Jal { label } => {
392                sink.use_label_at_offset(*start_off, label, LabelUse::RVCJump);
393                sink.add_uncond_branch(*start_off, *start_off + 2, label);
394                sink.put2(encode_cj_type(CjOp::CJ, Imm12::ZERO));
395            }
396
397            // c.jr
398            Inst::Jalr { rd, base, offset }
399                if rd.to_reg() == zero_reg() && base != zero_reg() && offset.as_i16() == 0 =>
400            {
401                sink.put2(encode_cr2_type(CrOp::CJr, base));
402                state.clobber_vstate();
403            }
404
405            // c.jalr
406            Inst::Jalr { rd, base, offset }
407                if rd.to_reg() == link_reg() && base != zero_reg() && offset.as_i16() == 0 =>
408            {
409                sink.put2(encode_cr2_type(CrOp::CJalr, base));
410                state.clobber_vstate();
411            }
412
413            // c.ebreak
414            Inst::EBreak => {
415                sink.put2(encode_cr_type(
416                    CrOp::CEbreak,
417                    writable_zero_reg(),
418                    zero_reg(),
419                ));
420            }
421
422            // c.unimp
423            Inst::Udf { trap_code } => {
424                sink.add_trap(trap_code);
425                sink.put2(0x0000);
426            }
427            // c.addi16sp
428            //
429            // c.addi16sp shares the opcode with c.lui, but has a destination field of x2.
430            // c.addi16sp adds the non-zero sign-extended 6-bit immediate to the value in the stack pointer (sp=x2),
431            // where the immediate is scaled to represent multiples of 16 in the range (-512,496). c.addi16sp is used
432            // to adjust the stack pointer in procedure prologues and epilogues. It expands into addi x2, x2, nzimm. c.addi16sp
433            // is only valid when nzimm≠0; the code point with nzimm=0 is reserved.
434            Inst::AluRRImm12 {
435                alu_op: AluOPRRI::Addi,
436                rd,
437                rs,
438                imm12,
439            } if rd.to_reg() == rs
440                && rs == stack_reg()
441                && imm12.as_i16() != 0
442                && (imm12.as_i16() % 16) == 0
443                && Imm6::maybe_from_i16(imm12.as_i16() / 16).is_some() =>
444            {
445                let imm6 = Imm6::maybe_from_i16(imm12.as_i16() / 16).unwrap();
446                sink.put2(encode_c_addi16sp(imm6));
447            }
448
449            // c.addi4spn
450            //
451            // c.addi4spn is a CIW-format instruction that adds a zero-extended non-zero
452            // immediate, scaled by 4, to the stack pointer, x2, and writes the result to
453            // rd. This instruction is used to generate pointers to stack-allocated variables
454            // and expands to addi rd, x2, nzuimm. c.addi4spn is only valid when nzuimm≠0;
455            // the code points with nzuimm=0 are reserved.
456            Inst::AluRRImm12 {
457                alu_op: AluOPRRI::Addi,
458                rd,
459                rs,
460                imm12,
461            } if reg_is_compressible(rd.to_reg())
462                && rs == stack_reg()
463                && imm12.as_i16() != 0
464                && (imm12.as_i16() % 4) == 0
465                && u8::try_from(imm12.as_i16() / 4).is_ok() =>
466            {
467                let imm = u8::try_from(imm12.as_i16() / 4).unwrap();
468                sink.put2(encode_ciw_type(CiwOp::CAddi4spn, rd, imm));
469            }
470
471            // c.li
472            Inst::AluRRImm12 {
473                alu_op: AluOPRRI::Addi,
474                rd,
475                rs,
476                imm12,
477            } if rd.to_reg() != zero_reg() && rs == zero_reg() => {
478                let imm6 = Imm6::maybe_from_imm12(imm12)?;
479                sink.put2(encode_ci_type(CiOp::CLi, rd, imm6));
480            }
481
482            // c.addi
483            Inst::AluRRImm12 {
484                alu_op: AluOPRRI::Addi,
485                rd,
486                rs,
487                imm12,
488            } if rd.to_reg() == rs && rs != zero_reg() && imm12.as_i16() != 0 => {
489                let imm6 = Imm6::maybe_from_imm12(imm12)?;
490                sink.put2(encode_ci_type(CiOp::CAddi, rd, imm6));
491            }
492
493            // c.addiw
494            Inst::AluRRImm12 {
495                alu_op: AluOPRRI::Addiw,
496                rd,
497                rs,
498                imm12,
499            } if rd.to_reg() == rs && rs != zero_reg() => {
500                let imm6 = Imm6::maybe_from_imm12(imm12)?;
501                sink.put2(encode_ci_type(CiOp::CAddiw, rd, imm6));
502            }
503
504            // c.lui
505            //
506            // c.lui loads the non-zero 6-bit immediate field into bits 17–12
507            // of the destination register, clears the bottom 12 bits, and
508            // sign-extends bit 17 into all higher bits of the destination.
509            Inst::Lui { rd, imm: imm20 }
510                if rd.to_reg() != zero_reg()
511                    && rd.to_reg() != stack_reg()
512                    && imm20.as_i32() != 0 =>
513            {
514                // Check that the top bits are sign extended
515                let imm = imm20.as_i32() << 14 >> 14;
516                if imm != imm20.as_i32() {
517                    return None;
518                }
519                let imm6 = Imm6::maybe_from_i32(imm)?;
520                sink.put2(encode_ci_type(CiOp::CLui, rd, imm6));
521            }
522
523            // c.slli
524            Inst::AluRRImm12 {
525                alu_op: AluOPRRI::Slli,
526                rd,
527                rs,
528                imm12,
529            } if rd.to_reg() == rs && rs != zero_reg() && imm12.as_i16() != 0 => {
530                // The shift amount is unsigned, but we encode it as signed.
531                let shift = imm12.as_i16() & 0x3f;
532                let imm6 = Imm6::maybe_from_i16(shift << 10 >> 10).unwrap();
533                sink.put2(encode_ci_type(CiOp::CSlli, rd, imm6));
534            }
535
536            // c.srli / c.srai
537            Inst::AluRRImm12 {
538                alu_op: op @ (AluOPRRI::Srli | AluOPRRI::Srai),
539                rd,
540                rs,
541                imm12,
542            } if rd.to_reg() == rs && reg_is_compressible(rs) && imm12.as_i16() != 0 => {
543                let op = match op {
544                    AluOPRRI::Srli => CbOp::CSrli,
545                    AluOPRRI::Srai => CbOp::CSrai,
546                    _ => unreachable!(),
547                };
548
549                // The shift amount is unsigned, but we encode it as signed.
550                let shift = imm12.as_i16() & 0x3f;
551                let imm6 = Imm6::maybe_from_i16(shift << 10 >> 10).unwrap();
552                sink.put2(encode_cb_type(op, rd, imm6));
553            }
554
555            // c.zextb
556            //
557            // This is an alias for `andi rd, rd, 0xff`
558            Inst::AluRRImm12 {
559                alu_op: AluOPRRI::Andi,
560                rd,
561                rs,
562                imm12,
563            } if has_zcb
564                && rd.to_reg() == rs
565                && reg_is_compressible(rs)
566                && imm12.as_i16() == 0xff =>
567            {
568                sink.put2(encode_cszn_type(CsznOp::CZextb, rd));
569            }
570
571            // c.andi
572            Inst::AluRRImm12 {
573                alu_op: AluOPRRI::Andi,
574                rd,
575                rs,
576                imm12,
577            } if rd.to_reg() == rs && reg_is_compressible(rs) => {
578                let imm6 = Imm6::maybe_from_imm12(imm12)?;
579                sink.put2(encode_cb_type(CbOp::CAndi, rd, imm6));
580            }
581
582            // Stack Based Loads
583            Inst::Load {
584                rd,
585                op: op @ (LoadOP::Lw | LoadOP::Ld | LoadOP::Fld),
586                from,
587                flags,
588            } if from.get_base_register() == Some(stack_reg())
589                && (from.get_offset_with_state(state) % op.size()) == 0 =>
590            {
591                // We encode the offset in multiples of the load size.
592                let offset = from.get_offset_with_state(state);
593                let imm6 = u8::try_from(offset / op.size())
594                    .ok()
595                    .and_then(Uimm6::maybe_from_u8)?;
596
597                // Some additional constraints on these instructions.
598                //
599                // Integer loads are not allowed to target x0, but floating point loads
600                // are, since f0 is not a special register.
601                //
602                // Floating point loads are not included in the base Zca extension
603                // but in a separate Zcd extension. Both of these are part of the C Extension.
604                let rd_is_zero = rd.to_reg() == zero_reg();
605                let op = match op {
606                    LoadOP::Lw if !rd_is_zero => CiOp::CLwsp,
607                    LoadOP::Ld if !rd_is_zero => CiOp::CLdsp,
608                    LoadOP::Fld if has_zcd => CiOp::CFldsp,
609                    _ => return None,
610                };
611
612                if let Some(trap_code) = flags.trap_code() {
613                    // Register the offset at which the actual load instruction starts.
614                    sink.add_trap(trap_code);
615                }
616                sink.put2(encode_ci_sp_load(op, rd, imm6));
617            }
618
619            // Regular Loads
620            Inst::Load {
621                rd,
622                op:
623                    op
624                    @ (LoadOP::Lw | LoadOP::Ld | LoadOP::Fld | LoadOP::Lbu | LoadOP::Lhu | LoadOP::Lh),
625                from,
626                flags,
627            } if reg_is_compressible(rd.to_reg())
628                && from
629                    .get_base_register()
630                    .map(reg_is_compressible)
631                    .unwrap_or(false)
632                && (from.get_offset_with_state(state) % op.size()) == 0 =>
633            {
634                let base = from.get_base_register().unwrap();
635
636                // We encode the offset in multiples of the store size.
637                let offset = from.get_offset_with_state(state);
638                let offset = u8::try_from(offset / op.size()).ok()?;
639
640                // We mix two different formats here.
641                //
642                // c.lw / c.ld / c.fld instructions are available in the standard Zca
643                // extension using the CL format.
644                //
645                // c.lbu / c.lhu / c.lh are only available in the Zcb extension and
646                // are also encoded differently. Technically they each have a different
647                // format, but they are similar enough that we can group them.
648                let is_zcb_load = matches!(op, LoadOP::Lbu | LoadOP::Lhu | LoadOP::Lh);
649                let encoded = if is_zcb_load {
650                    if !has_zcb {
651                        return None;
652                    }
653
654                    let op = match op {
655                        LoadOP::Lbu => ZcbMemOp::CLbu,
656                        LoadOP::Lhu => ZcbMemOp::CLhu,
657                        LoadOP::Lh => ZcbMemOp::CLh,
658                        _ => unreachable!(),
659                    };
660
661                    // Byte stores & loads have 2 bits of immediate offset. Halfword stores
662                    // and loads only have 1 bit.
663                    let imm2 = Uimm2::maybe_from_u8(offset)?;
664                    if (offset & !((1 << op.imm_bits()) - 1)) != 0 {
665                        return None;
666                    }
667
668                    encode_zcbmem_load(op, rd, base, imm2)
669                } else {
670                    // Floating point loads are not included in the base Zca extension
671                    // but in a separate Zcd extension. Both of these are part of the C Extension.
672                    let op = match op {
673                        LoadOP::Lw => ClOp::CLw,
674                        LoadOP::Ld => ClOp::CLd,
675                        LoadOP::Fld if has_zcd => ClOp::CFld,
676                        _ => return None,
677                    };
678                    let imm5 = Uimm5::maybe_from_u8(offset)?;
679
680                    encode_cl_type(op, rd, base, imm5)
681                };
682
683                if let Some(trap_code) = flags.trap_code() {
684                    // Register the offset at which the actual load instruction starts.
685                    sink.add_trap(trap_code);
686                }
687                sink.put2(encoded);
688            }
689
690            // Stack Based Stores
691            Inst::Store {
692                src,
693                op: op @ (StoreOP::Sw | StoreOP::Sd | StoreOP::Fsd),
694                to,
695                flags,
696            } if to.get_base_register() == Some(stack_reg())
697                && (to.get_offset_with_state(state) % op.size()) == 0 =>
698            {
699                // We encode the offset in multiples of the store size.
700                let offset = to.get_offset_with_state(state);
701                let imm6 = u8::try_from(offset / op.size())
702                    .ok()
703                    .and_then(Uimm6::maybe_from_u8)?;
704
705                // Floating point stores are not included in the base Zca extension
706                // but in a separate Zcd extension. Both of these are part of the C Extension.
707                let op = match op {
708                    StoreOP::Sw => CssOp::CSwsp,
709                    StoreOP::Sd => CssOp::CSdsp,
710                    StoreOP::Fsd if has_zcd => CssOp::CFsdsp,
711                    _ => return None,
712                };
713
714                if let Some(trap_code) = flags.trap_code() {
715                    // Register the offset at which the actual load instruction starts.
716                    sink.add_trap(trap_code);
717                }
718                sink.put2(encode_css_type(op, src, imm6));
719            }
720
721            // Regular Stores
722            Inst::Store {
723                src,
724                op: op @ (StoreOP::Sw | StoreOP::Sd | StoreOP::Fsd | StoreOP::Sh | StoreOP::Sb),
725                to,
726                flags,
727            } if reg_is_compressible(src)
728                && to
729                    .get_base_register()
730                    .map(reg_is_compressible)
731                    .unwrap_or(false)
732                && (to.get_offset_with_state(state) % op.size()) == 0 =>
733            {
734                let base = to.get_base_register().unwrap();
735
736                // We encode the offset in multiples of the store size.
737                let offset = to.get_offset_with_state(state);
738                let offset = u8::try_from(offset / op.size()).ok()?;
739
740                // We mix two different formats here.
741                //
742                // c.sw / c.sd / c.fsd instructions are available in the standard Zca
743                // extension using the CL format.
744                //
745                // c.sb / c.sh are only available in the Zcb extension and are also
746                // encoded differently.
747                let is_zcb_store = matches!(op, StoreOP::Sh | StoreOP::Sb);
748                let encoded = if is_zcb_store {
749                    if !has_zcb {
750                        return None;
751                    }
752
753                    let op = match op {
754                        StoreOP::Sh => ZcbMemOp::CSh,
755                        StoreOP::Sb => ZcbMemOp::CSb,
756                        _ => unreachable!(),
757                    };
758
759                    // Byte stores & loads have 2 bits of immediate offset. Halfword stores
760                    // and loads only have 1 bit.
761                    let imm2 = Uimm2::maybe_from_u8(offset)?;
762                    if (offset & !((1 << op.imm_bits()) - 1)) != 0 {
763                        return None;
764                    }
765
766                    encode_zcbmem_store(op, src, base, imm2)
767                } else {
768                    // Floating point stores are not included in the base Zca extension
769                    // but in a separate Zcd extension. Both of these are part of the C Extension.
770                    let op = match op {
771                        StoreOP::Sw => CsOp::CSw,
772                        StoreOP::Sd => CsOp::CSd,
773                        StoreOP::Fsd if has_zcd => CsOp::CFsd,
774                        _ => return None,
775                    };
776                    let imm5 = Uimm5::maybe_from_u8(offset)?;
777
778                    encode_cs_type(op, src, base, imm5)
779                };
780
781                if let Some(trap_code) = flags.trap_code() {
782                    // Register the offset at which the actual load instruction starts.
783                    sink.add_trap(trap_code);
784                }
785                sink.put2(encoded);
786            }
787
788            // c.not
789            //
790            // This is an alias for `xori rd, rd, -1`
791            Inst::AluRRImm12 {
792                alu_op: AluOPRRI::Xori,
793                rd,
794                rs,
795                imm12,
796            } if has_zcb
797                && rd.to_reg() == rs
798                && reg_is_compressible(rs)
799                && imm12.as_i16() == -1 =>
800            {
801                sink.put2(encode_cszn_type(CsznOp::CNot, rd));
802            }
803
804            // c.sext.b / c.sext.h / c.zext.h
805            //
806            // These are all the extend instructions present in `Zcb`, they
807            // also require `Zbb` since they aren't available in the base ISA.
808            Inst::AluRRImm12 {
809                alu_op: alu_op @ (AluOPRRI::Sextb | AluOPRRI::Sexth | AluOPRRI::Zexth),
810                rd,
811                rs,
812                imm12,
813            } if has_zcb
814                && has_zbb
815                && rd.to_reg() == rs
816                && reg_is_compressible(rs)
817                && imm12.as_i16() == 0 =>
818            {
819                let op = match alu_op {
820                    AluOPRRI::Sextb => CsznOp::CSextb,
821                    AluOPRRI::Sexth => CsznOp::CSexth,
822                    AluOPRRI::Zexth => CsznOp::CZexth,
823                    _ => unreachable!(),
824                };
825                sink.put2(encode_cszn_type(op, rd));
826            }
827
828            // c.zext.w
829            //
830            // This is an alias for `add.uw rd, rd, zero`
831            Inst::AluRRR {
832                alu_op: AluOPRRR::Adduw,
833                rd,
834                rs1,
835                rs2,
836            } if has_zcb
837                && has_zba
838                && rd.to_reg() == rs1
839                && reg_is_compressible(rs1)
840                && rs2 == zero_reg() =>
841            {
842                sink.put2(encode_cszn_type(CsznOp::CZextw, rd));
843            }
844
845            _ => return None,
846        }
847
848        return Some(());
849    }
850
851    fn emit_uncompressed(
852        &self,
853        sink: &mut MachBuffer<Inst>,
854        emit_info: &EmitInfo,
855        state: &mut EmitState,
856        start_off: &mut u32,
857    ) {
858        match self {
859            &Inst::Nop0 => {
860                // do nothing
861            }
862            // Addi x0, x0, 0
863            &Inst::Nop4 => {
864                let x = Inst::AluRRImm12 {
865                    alu_op: AluOPRRI::Addi,
866                    rd: Writable::from_reg(zero_reg()),
867                    rs: zero_reg(),
868                    imm12: Imm12::ZERO,
869                };
870                x.emit(sink, emit_info, state)
871            }
872            &Inst::RawData { ref data } => {
873                // Right now we only put a u32 or u64 in this instruction.
874                // It is not very long, no need to check if need `emit_island`.
875                // If data is very long , this is a bug because RawData is typically
876                // use to load some data and rely on some position in the code stream.
877                // and we may exceed `Inst::worst_case_size`.
878                // for more information see https://github.com/bytecodealliance/wasmtime/pull/5612.
879                sink.put_data(&data[..]);
880            }
881            &Inst::Lui { rd, ref imm } => {
882                let x: u32 = 0b0110111 | reg_to_gpr_num(rd.to_reg()) << 7 | (imm.bits() << 12);
883                sink.put4(x);
884            }
885            &Inst::Fli { rd, width, imm } => {
886                sink.put4(encode_fli(width, imm, rd));
887            }
888            &Inst::LoadInlineConst { rd, ty, imm } => {
889                let data = &imm.to_le_bytes()[..ty.bytes() as usize];
890
891                let label_data: MachLabel = sink.get_label();
892                let label_end: MachLabel = sink.get_label();
893
894                // Load into rd
895                Inst::Load {
896                    rd,
897                    op: LoadOP::from_type(ty),
898                    flags: MemFlags::new(),
899                    from: AMode::Label(label_data),
900                }
901                .emit(sink, emit_info, state);
902
903                // Jump over the inline pool
904                Inst::gen_jump(label_end).emit(sink, emit_info, state);
905
906                // Emit the inline data
907                sink.bind_label(label_data, &mut state.ctrl_plane);
908                Inst::RawData { data: data.into() }.emit(sink, emit_info, state);
909
910                sink.bind_label(label_end, &mut state.ctrl_plane);
911            }
912            &Inst::FpuRR {
913                alu_op,
914                width,
915                frm,
916                rd,
917                rs,
918            } => {
919                if alu_op.is_convert_to_int() {
920                    sink.add_trap(TrapCode::BAD_CONVERSION_TO_INTEGER);
921                }
922                sink.put4(encode_fp_rr(alu_op, width, frm, rd, rs));
923            }
924            &Inst::FpuRRRR {
925                alu_op,
926                rd,
927                rs1,
928                rs2,
929                rs3,
930                frm,
931                width,
932            } => {
933                sink.put4(encode_fp_rrrr(alu_op, width, frm, rd, rs1, rs2, rs3));
934            }
935            &Inst::FpuRRR {
936                alu_op,
937                width,
938                frm,
939                rd,
940                rs1,
941                rs2,
942            } => {
943                sink.put4(encode_fp_rrr(alu_op, width, frm, rd, rs1, rs2));
944            }
945            &Inst::Unwind { ref inst } => {
946                sink.add_unwind(inst.clone());
947            }
948            &Inst::DummyUse { .. } => {
949                // This has already been handled by Inst::allocate.
950            }
951            &Inst::AluRRR {
952                alu_op,
953                rd,
954                rs1,
955                rs2,
956            } => {
957                let (rs1, rs2) = if alu_op.reverse_rs() {
958                    (rs2, rs1)
959                } else {
960                    (rs1, rs2)
961                };
962
963                sink.put4(encode_r_type(
964                    alu_op.op_code(),
965                    rd,
966                    alu_op.funct3(),
967                    rs1,
968                    rs2,
969                    alu_op.funct7(),
970                ));
971            }
972            &Inst::AluRRImm12 {
973                alu_op,
974                rd,
975                rs,
976                imm12,
977            } => {
978                let x = alu_op.op_code()
979                    | reg_to_gpr_num(rd.to_reg()) << 7
980                    | alu_op.funct3() << 12
981                    | reg_to_gpr_num(rs) << 15
982                    | alu_op.imm12(imm12) << 20;
983                sink.put4(x);
984            }
985            &Inst::CsrReg { op, rd, rs, csr } => {
986                sink.put4(encode_csr_reg(op, rd, rs, csr));
987            }
988            &Inst::CsrImm { op, rd, csr, imm } => {
989                sink.put4(encode_csr_imm(op, rd, csr, imm));
990            }
991            &Inst::Load {
992                rd,
993                op: LoadOP::Flh,
994                from,
995                flags,
996            } if !emit_info.isa_flags.has_zfhmin() => {
997                // flh unavailable, use an integer load instead
998                Inst::Load {
999                    rd: writable_spilltmp_reg(),
1000                    op: LoadOP::Lh,
1001                    flags,
1002                    from,
1003                }
1004                .emit(sink, emit_info, state);
1005                // NaN-box the `f16` before loading it into the floating-point
1006                // register with a 32-bit `fmv`.
1007                Inst::Lui {
1008                    rd: writable_spilltmp_reg2(),
1009                    imm: Imm20::from_i32((0xffff_0000_u32 as i32) >> 12),
1010                }
1011                .emit(sink, emit_info, state);
1012                Inst::AluRRR {
1013                    alu_op: AluOPRRR::Or,
1014                    rd: writable_spilltmp_reg(),
1015                    rs1: spilltmp_reg(),
1016                    rs2: spilltmp_reg2(),
1017                }
1018                .emit(sink, emit_info, state);
1019                Inst::FpuRR {
1020                    alu_op: FpuOPRR::FmvFmtX,
1021                    width: FpuOPWidth::S,
1022                    frm: FRM::RNE,
1023                    rd,
1024                    rs: spilltmp_reg(),
1025                }
1026                .emit(sink, emit_info, state);
1027            }
1028            &Inst::Load {
1029                rd,
1030                op,
1031                from,
1032                flags,
1033            } => {
1034                let base = from.get_base_register();
1035                let offset = from.get_offset_with_state(state);
1036                let offset_imm12 = Imm12::maybe_from_i64(offset);
1037                let label = from.get_label_with_sink(sink);
1038
1039                let (addr, imm12) = match (base, offset_imm12, label) {
1040                    // When loading from a Reg+Offset, if the offset fits into an imm12 we can directly encode it.
1041                    (Some(base), Some(imm12), None) => (base, imm12),
1042
1043                    // Otherwise, if the offset does not fit into a imm12, we need to materialize it into a
1044                    // register and load from that.
1045                    (Some(_), None, None) => {
1046                        let tmp = writable_spilltmp_reg();
1047                        Inst::LoadAddr { rd: tmp, mem: from }.emit(sink, emit_info, state);
1048                        (tmp.to_reg(), Imm12::ZERO)
1049                    }
1050
1051                    // If the AMode contains a label we can emit an internal relocation that gets
1052                    // resolved with the correct address later.
1053                    (None, Some(imm), Some(label)) => {
1054                        debug_assert_eq!(imm.as_i16(), 0);
1055
1056                        // Get the current PC.
1057                        sink.use_label_at_offset(sink.cur_offset(), label, LabelUse::PCRelHi20);
1058                        Inst::Auipc {
1059                            rd,
1060                            imm: Imm20::ZERO,
1061                        }
1062                        .emit_uncompressed(sink, emit_info, state, start_off);
1063
1064                        // Emit a relocation for the load. This patches the offset into the instruction.
1065                        sink.use_label_at_offset(sink.cur_offset(), label, LabelUse::PCRelLo12I);
1066
1067                        // Imm12 here is meaningless since it's going to get replaced.
1068                        (rd.to_reg(), Imm12::ZERO)
1069                    }
1070
1071                    // These cases are impossible with the current AModes that we have. We either
1072                    // always have a register, or always have a label. Never both, and never neither.
1073                    (None, None, None)
1074                    | (None, Some(_), None)
1075                    | (Some(_), None, Some(_))
1076                    | (Some(_), Some(_), Some(_))
1077                    | (None, None, Some(_)) => {
1078                        unreachable!("Invalid load address")
1079                    }
1080                };
1081
1082                if let Some(trap_code) = flags.trap_code() {
1083                    // Register the offset at which the actual load instruction starts.
1084                    sink.add_trap(trap_code);
1085                }
1086
1087                sink.put4(encode_i_type(op.op_code(), rd, op.funct3(), addr, imm12));
1088            }
1089            &Inst::Store {
1090                op: StoreOP::Fsh,
1091                src,
1092                flags,
1093                to,
1094            } if !emit_info.isa_flags.has_zfhmin() => {
1095                // fsh unavailable, use an integer store instead
1096                Inst::FpuRR {
1097                    alu_op: FpuOPRR::FmvXFmt,
1098                    width: FpuOPWidth::S,
1099                    frm: FRM::RNE,
1100                    rd: writable_spilltmp_reg(),
1101                    rs: src,
1102                }
1103                .emit(sink, emit_info, state);
1104                Inst::Store {
1105                    to,
1106                    op: StoreOP::Sh,
1107                    flags,
1108                    src: spilltmp_reg(),
1109                }
1110                .emit(sink, emit_info, state);
1111            }
1112            &Inst::Store { op, src, flags, to } => {
1113                let base = to.get_base_register();
1114                let offset = to.get_offset_with_state(state);
1115                let offset_imm12 = Imm12::maybe_from_i64(offset);
1116
1117                let (addr, imm12) = match (base, offset_imm12) {
1118                    // If the offset fits into an imm12 we can directly encode it.
1119                    (Some(base), Some(imm12)) => (base, imm12),
1120                    // Otherwise load the address it into a reg and load from it.
1121                    _ => {
1122                        let tmp = writable_spilltmp_reg();
1123                        Inst::LoadAddr { rd: tmp, mem: to }.emit(sink, emit_info, state);
1124                        (tmp.to_reg(), Imm12::ZERO)
1125                    }
1126                };
1127
1128                if let Some(trap_code) = flags.trap_code() {
1129                    // Register the offset at which the actual load instruction starts.
1130                    sink.add_trap(trap_code);
1131                }
1132
1133                sink.put4(encode_s_type(op.op_code(), op.funct3(), addr, src, imm12));
1134            }
1135            &Inst::Args { .. } | &Inst::Rets { .. } => {
1136                // Nothing: this is a pseudoinstruction that serves
1137                // only to constrain registers at a certain point.
1138            }
1139            &Inst::Ret {} => {
1140                // RISC-V does not have a dedicated ret instruction, instead we emit the equivalent
1141                // `jalr x0, x1, 0` that jumps to the return address.
1142                Inst::Jalr {
1143                    rd: writable_zero_reg(),
1144                    base: link_reg(),
1145                    offset: Imm12::ZERO,
1146                }
1147                .emit(sink, emit_info, state);
1148            }
1149
1150            &Inst::Extend {
1151                rd,
1152                rn,
1153                signed,
1154                from_bits,
1155                to_bits: _to_bits,
1156            } => {
1157                let mut insts = SmallInstVec::new();
1158                let shift_bits = (64 - from_bits) as i16;
1159                let is_u8 = || from_bits == 8 && signed == false;
1160                if is_u8() {
1161                    // special for u8.
1162                    insts.push(Inst::AluRRImm12 {
1163                        alu_op: AluOPRRI::Andi,
1164                        rd,
1165                        rs: rn,
1166                        imm12: Imm12::from_i16(255),
1167                    });
1168                } else {
1169                    insts.push(Inst::AluRRImm12 {
1170                        alu_op: AluOPRRI::Slli,
1171                        rd,
1172                        rs: rn,
1173                        imm12: Imm12::from_i16(shift_bits),
1174                    });
1175                    insts.push(Inst::AluRRImm12 {
1176                        alu_op: if signed {
1177                            AluOPRRI::Srai
1178                        } else {
1179                            AluOPRRI::Srli
1180                        },
1181                        rd,
1182                        rs: rd.to_reg(),
1183                        imm12: Imm12::from_i16(shift_bits),
1184                    });
1185                }
1186                insts
1187                    .into_iter()
1188                    .for_each(|i| i.emit(sink, emit_info, state));
1189            }
1190
1191            &Inst::Call { ref info } => {
1192                sink.add_reloc(Reloc::RiscvCallPlt, &info.dest, 0);
1193
1194                Inst::construct_auipc_and_jalr(Some(writable_link_reg()), writable_link_reg(), 0)
1195                    .into_iter()
1196                    .for_each(|i| i.emit_uncompressed(sink, emit_info, state, start_off));
1197
1198                if let Some(s) = state.take_stack_map() {
1199                    let offset = sink.cur_offset();
1200                    sink.push_user_stack_map(state, offset, s);
1201                }
1202
1203                if let Some(try_call) = info.try_call_info.as_ref() {
1204                    sink.add_call_site(&try_call.exception_dests);
1205                } else {
1206                    sink.add_call_site(&[]);
1207                }
1208
1209                let callee_pop_size = i32::try_from(info.callee_pop_size).unwrap();
1210                if callee_pop_size > 0 {
1211                    for inst in Riscv64MachineDeps::gen_sp_reg_adjust(-callee_pop_size) {
1212                        inst.emit(sink, emit_info, state);
1213                    }
1214                }
1215
1216                // Load any stack-carried return values.
1217                info.emit_retval_loads::<Riscv64MachineDeps, _, _>(
1218                    state.frame_layout().stackslots_size,
1219                    |inst| inst.emit(sink, emit_info, state),
1220                    |needed_space| Some(Inst::EmitIsland { needed_space }),
1221                );
1222
1223                // If this is a try-call, jump to the continuation
1224                // (normal-return) block.
1225                if let Some(try_call) = info.try_call_info.as_ref() {
1226                    let jmp = Inst::Jal {
1227                        label: try_call.continuation,
1228                    };
1229                    jmp.emit(sink, emit_info, state);
1230                }
1231
1232                *start_off = sink.cur_offset();
1233            }
1234            &Inst::CallInd { ref info } => {
1235                Inst::Jalr {
1236                    rd: writable_link_reg(),
1237                    base: info.dest,
1238                    offset: Imm12::ZERO,
1239                }
1240                .emit(sink, emit_info, state);
1241
1242                if let Some(s) = state.take_stack_map() {
1243                    let offset = sink.cur_offset();
1244                    sink.push_user_stack_map(state, offset, s);
1245                }
1246
1247                if let Some(try_call) = info.try_call_info.as_ref() {
1248                    sink.add_call_site(&try_call.exception_dests);
1249                } else {
1250                    sink.add_call_site(&[]);
1251                }
1252
1253                let callee_pop_size = i32::try_from(info.callee_pop_size).unwrap();
1254                if callee_pop_size > 0 {
1255                    for inst in Riscv64MachineDeps::gen_sp_reg_adjust(-callee_pop_size) {
1256                        inst.emit(sink, emit_info, state);
1257                    }
1258                }
1259
1260                // Load any stack-carried return values.
1261                info.emit_retval_loads::<Riscv64MachineDeps, _, _>(
1262                    state.frame_layout().stackslots_size,
1263                    |inst| inst.emit(sink, emit_info, state),
1264                    |needed_space| Some(Inst::EmitIsland { needed_space }),
1265                );
1266
1267                // If this is a try-call, jump to the continuation
1268                // (normal-return) block.
1269                if let Some(try_call) = info.try_call_info.as_ref() {
1270                    let jmp = Inst::Jal {
1271                        label: try_call.continuation,
1272                    };
1273                    jmp.emit(sink, emit_info, state);
1274                }
1275
1276                *start_off = sink.cur_offset();
1277            }
1278
1279            &Inst::ReturnCall { ref info } => {
1280                emit_return_call_common_sequence(sink, emit_info, state, info);
1281
1282                sink.add_call_site(&[]);
1283                sink.add_reloc(Reloc::RiscvCallPlt, &info.dest, 0);
1284                Inst::construct_auipc_and_jalr(None, writable_spilltmp_reg(), 0)
1285                    .into_iter()
1286                    .for_each(|i| i.emit_uncompressed(sink, emit_info, state, start_off));
1287            }
1288
1289            &Inst::ReturnCallInd { ref info } => {
1290                emit_return_call_common_sequence(sink, emit_info, state, &info);
1291
1292                Inst::Jalr {
1293                    rd: writable_zero_reg(),
1294                    base: info.dest,
1295                    offset: Imm12::ZERO,
1296                }
1297                .emit(sink, emit_info, state);
1298            }
1299            &Inst::Jal { label } => {
1300                sink.use_label_at_offset(*start_off, label, LabelUse::Jal20);
1301                sink.add_uncond_branch(*start_off, *start_off + 4, label);
1302                sink.put4(0b1101111);
1303                state.clobber_vstate();
1304            }
1305            &Inst::CondBr {
1306                taken,
1307                not_taken,
1308                kind,
1309            } => {
1310                match taken {
1311                    CondBrTarget::Label(label) => {
1312                        let code = kind.emit();
1313                        let code_inverse = kind.inverse().emit().to_le_bytes();
1314                        sink.use_label_at_offset(*start_off, label, LabelUse::B12);
1315                        sink.add_cond_branch(*start_off, *start_off + 4, label, &code_inverse);
1316                        sink.put4(code);
1317                    }
1318                    CondBrTarget::Fallthrough => panic!("Cannot fallthrough in taken target"),
1319                }
1320
1321                match not_taken {
1322                    CondBrTarget::Label(label) => {
1323                        Inst::gen_jump(label).emit(sink, emit_info, state)
1324                    }
1325                    CondBrTarget::Fallthrough => {}
1326                };
1327            }
1328
1329            &Inst::Mov { rd, rm, ty } => {
1330                debug_assert_eq!(rd.to_reg().class(), rm.class());
1331                if rd.to_reg() == rm {
1332                    return;
1333                }
1334
1335                match rm.class() {
1336                    RegClass::Int => Inst::AluRRImm12 {
1337                        alu_op: AluOPRRI::Addi,
1338                        rd,
1339                        rs: rm,
1340                        imm12: Imm12::ZERO,
1341                    },
1342                    RegClass::Float => Inst::FpuRRR {
1343                        alu_op: FpuOPRRR::Fsgnj,
1344                        width: FpuOPWidth::try_from(ty).unwrap(),
1345                        frm: FRM::RNE,
1346                        rd,
1347                        rs1: rm,
1348                        rs2: rm,
1349                    },
1350                    RegClass::Vector => Inst::VecAluRRImm5 {
1351                        op: VecAluOpRRImm5::VmvrV,
1352                        vd: rd,
1353                        vs2: rm,
1354                        // Imm 0 means copy 1 register.
1355                        imm: Imm5::maybe_from_i8(0).unwrap(),
1356                        mask: VecOpMasking::Disabled,
1357                        // Vstate for this instruction is ignored.
1358                        vstate: VState::from_type(ty),
1359                    },
1360                }
1361                .emit(sink, emit_info, state);
1362            }
1363
1364            &Inst::MovFromPReg { rd, rm } => {
1365                Inst::gen_move(rd, Reg::from(rm), I64).emit(sink, emit_info, state);
1366            }
1367
1368            &Inst::BrTable {
1369                index,
1370                tmp1,
1371                tmp2,
1372                ref targets,
1373            } => {
1374                let ext_index = writable_spilltmp_reg();
1375
1376                let label_compute_target = sink.get_label();
1377
1378                // The default target is passed in as the 0th element of `targets`
1379                // separate it here for clarity.
1380                let default_target = targets[0];
1381                let targets = &targets[1..];
1382
1383                // We are going to potentially emit a large amount of instructions, so ensure that we emit an island
1384                // now if we need one.
1385                //
1386                // The worse case PC calculations are 12 instructions. And each entry in the jump table is 2 instructions.
1387                // Check if we need to emit a jump table here to support that jump.
1388                let inst_count = 12 + (targets.len() * 2);
1389                let distance = (inst_count * Inst::UNCOMPRESSED_INSTRUCTION_SIZE as usize) as u32;
1390                if sink.island_needed(distance) {
1391                    let jump_around_label = sink.get_label();
1392                    Inst::gen_jump(jump_around_label).emit(sink, emit_info, state);
1393                    sink.emit_island(distance + 4, &mut state.ctrl_plane);
1394                    sink.bind_label(jump_around_label, &mut state.ctrl_plane);
1395                }
1396
1397                // We emit a bounds check on the index, if the index is larger than the number of
1398                // jump table entries, we jump to the default block.  Otherwise we compute a jump
1399                // offset by multiplying the index by 8 (the size of each entry) and then jump to
1400                // that offset. Each jump table entry is a regular auipc+jalr which we emit sequentially.
1401                //
1402                // Build the following sequence:
1403                //
1404                // extend_index:
1405                //     zext.w  ext_index, index
1406                // bounds_check:
1407                //     li      tmp, n_labels
1408                //     bltu    ext_index, tmp, compute_target
1409                // jump_to_default_block:
1410                //     auipc   pc, 0
1411                //     jalr    zero, pc, default_block
1412                // compute_target:
1413                //     auipc   pc, 0
1414                //     slli    tmp, ext_index, 3
1415                //     add     pc, pc, tmp
1416                //     jalr    zero, pc, 0x10
1417                // jump_table:
1418                //     ; This repeats for each entry in the jumptable
1419                //     auipc   pc, 0
1420                //     jalr    zero, pc, block_target
1421
1422                // Extend the index to 64 bits.
1423                //
1424                // This prevents us branching on the top 32 bits of the index, which
1425                // are undefined.
1426                Inst::Extend {
1427                    rd: ext_index,
1428                    rn: index,
1429                    signed: false,
1430                    from_bits: 32,
1431                    to_bits: 64,
1432                }
1433                .emit(sink, emit_info, state);
1434
1435                // Bounds check.
1436                //
1437                // Check if the index passed in is larger than the number of jumptable
1438                // entries that we have. If it is, we fallthrough to a jump into the
1439                // default block.
1440                Inst::load_constant_u32(tmp2, targets.len() as u64)
1441                    .iter()
1442                    .for_each(|i| i.emit(sink, emit_info, state));
1443                Inst::CondBr {
1444                    taken: CondBrTarget::Label(label_compute_target),
1445                    not_taken: CondBrTarget::Fallthrough,
1446                    kind: IntegerCompare {
1447                        kind: IntCC::UnsignedLessThan,
1448                        rs1: ext_index.to_reg(),
1449                        rs2: tmp2.to_reg(),
1450                    },
1451                }
1452                .emit(sink, emit_info, state);
1453
1454                sink.use_label_at_offset(sink.cur_offset(), default_target, LabelUse::PCRel32);
1455                Inst::construct_auipc_and_jalr(None, tmp2, 0)
1456                    .iter()
1457                    .for_each(|i| i.emit_uncompressed(sink, emit_info, state, start_off));
1458
1459                // Compute the jump table offset.
1460                // We need to emit a PC relative offset,
1461                sink.bind_label(label_compute_target, &mut state.ctrl_plane);
1462
1463                // Get the current PC.
1464                Inst::Auipc {
1465                    rd: tmp1,
1466                    imm: Imm20::ZERO,
1467                }
1468                .emit_uncompressed(sink, emit_info, state, start_off);
1469
1470                // These instructions must be emitted as uncompressed since we
1471                // are manually computing the offset from the PC.
1472
1473                // Multiply the index by 8, since that is the size in
1474                // bytes of each jump table entry
1475                Inst::AluRRImm12 {
1476                    alu_op: AluOPRRI::Slli,
1477                    rd: tmp2,
1478                    rs: ext_index.to_reg(),
1479                    imm12: Imm12::from_i16(3),
1480                }
1481                .emit_uncompressed(sink, emit_info, state, start_off);
1482
1483                // Calculate the base of the jump, PC + the offset from above.
1484                Inst::AluRRR {
1485                    alu_op: AluOPRRR::Add,
1486                    rd: tmp1,
1487                    rs1: tmp1.to_reg(),
1488                    rs2: tmp2.to_reg(),
1489                }
1490                .emit_uncompressed(sink, emit_info, state, start_off);
1491
1492                // Jump to the middle of the jump table.
1493                // We add a 16 byte offset here, since we used 4 instructions
1494                // since the AUIPC that was used to get the PC.
1495                Inst::Jalr {
1496                    rd: writable_zero_reg(),
1497                    base: tmp1.to_reg(),
1498                    offset: Imm12::from_i16((4 * Inst::UNCOMPRESSED_INSTRUCTION_SIZE) as i16),
1499                }
1500                .emit_uncompressed(sink, emit_info, state, start_off);
1501
1502                // Emit the jump table.
1503                //
1504                // Each entry is a auipc + jalr to the target block. We also start with a island
1505                // if necessary.
1506
1507                // Emit the jumps back to back
1508                for target in targets.iter() {
1509                    sink.use_label_at_offset(sink.cur_offset(), *target, LabelUse::PCRel32);
1510
1511                    Inst::construct_auipc_and_jalr(None, tmp2, 0)
1512                        .iter()
1513                        .for_each(|i| i.emit_uncompressed(sink, emit_info, state, start_off));
1514                }
1515
1516                // We've just emitted an island that is safe up to *here*.
1517                // Mark it as such so that we don't needlessly emit additional islands.
1518                *start_off = sink.cur_offset();
1519            }
1520
1521            &Inst::Atomic {
1522                op,
1523                rd,
1524                addr,
1525                src,
1526                amo,
1527            } => {
1528                // TODO: get flags from original CLIF atomic instruction
1529                let flags = MemFlags::new();
1530                if let Some(trap_code) = flags.trap_code() {
1531                    sink.add_trap(trap_code);
1532                }
1533                let x = op.op_code()
1534                    | reg_to_gpr_num(rd.to_reg()) << 7
1535                    | op.funct3() << 12
1536                    | reg_to_gpr_num(addr) << 15
1537                    | reg_to_gpr_num(src) << 20
1538                    | op.funct7(amo) << 25;
1539
1540                sink.put4(x);
1541            }
1542            &Inst::Fence { pred, succ } => {
1543                let x = 0b0001111
1544                    | 0b00000 << 7
1545                    | 0b000 << 12
1546                    | 0b00000 << 15
1547                    | (succ as u32) << 20
1548                    | (pred as u32) << 24;
1549
1550                sink.put4(x);
1551            }
1552            &Inst::Auipc { rd, imm } => {
1553                sink.put4(enc_auipc(rd, imm));
1554            }
1555
1556            &Inst::LoadAddr { rd, mem } => {
1557                let base = mem.get_base_register();
1558                let offset = mem.get_offset_with_state(state);
1559                let offset_imm12 = Imm12::maybe_from_i64(offset);
1560
1561                match (mem, base, offset_imm12) {
1562                    (_, Some(rs), Some(imm12)) => {
1563                        Inst::AluRRImm12 {
1564                            alu_op: AluOPRRI::Addi,
1565                            rd,
1566                            rs,
1567                            imm12,
1568                        }
1569                        .emit(sink, emit_info, state);
1570                    }
1571                    (_, Some(rs), None) => {
1572                        let mut insts = Inst::load_constant_u64(rd, offset as u64);
1573                        insts.push(Inst::AluRRR {
1574                            alu_op: AluOPRRR::Add,
1575                            rd,
1576                            rs1: rd.to_reg(),
1577                            rs2: rs,
1578                        });
1579                        insts
1580                            .into_iter()
1581                            .for_each(|inst| inst.emit(sink, emit_info, state));
1582                    }
1583                    (AMode::Const(addr), None, _) => {
1584                        // Get an address label for the constant and recurse.
1585                        let label = sink.get_label_for_constant(addr);
1586                        Inst::LoadAddr {
1587                            rd,
1588                            mem: AMode::Label(label),
1589                        }
1590                        .emit(sink, emit_info, state);
1591                    }
1592                    (AMode::Label(label), None, _) => {
1593                        // Get the current PC.
1594                        sink.use_label_at_offset(sink.cur_offset(), label, LabelUse::PCRelHi20);
1595                        let inst = Inst::Auipc {
1596                            rd,
1597                            imm: Imm20::ZERO,
1598                        };
1599                        inst.emit_uncompressed(sink, emit_info, state, start_off);
1600
1601                        // Emit an add to the address with a relocation.
1602                        // This later gets patched up with the correct offset.
1603                        sink.use_label_at_offset(sink.cur_offset(), label, LabelUse::PCRelLo12I);
1604                        Inst::AluRRImm12 {
1605                            alu_op: AluOPRRI::Addi,
1606                            rd,
1607                            rs: rd.to_reg(),
1608                            imm12: Imm12::ZERO,
1609                        }
1610                        .emit_uncompressed(sink, emit_info, state, start_off);
1611                    }
1612                    (amode, _, _) => {
1613                        unimplemented!("LoadAddr: {:?}", amode);
1614                    }
1615                }
1616            }
1617
1618            &Inst::Select {
1619                ref dst,
1620                condition,
1621                ref x,
1622                ref y,
1623            } => {
1624                // The general form for this select is the following:
1625                //
1626                //     mv rd, x
1627                //     b{cond} rcond, label_end
1628                //     mv rd, y
1629                // label_end:
1630                //     ... etc
1631                //
1632                // This is built on the assumption that moves are cheap, but branches and jumps
1633                // are not. So with this format we always avoid one jump instruction at the expense
1634                // of an unconditional move.
1635                //
1636                // We also perform another optimization here. If the destination register is the same
1637                // as one of the input registers, we can avoid emitting the first unconditional move
1638                // and emit just the branch and the second move.
1639                //
1640                // To make sure that this happens as often as possible, we also try to invert the
1641                // condition, so that if either of the input registers are the same as the destination
1642                // we avoid that move.
1643
1644                let label_end = sink.get_label();
1645
1646                let xregs = x.regs();
1647                let yregs = y.regs();
1648                let dstregs: Vec<Reg> = dst.regs().into_iter().map(|r| r.to_reg()).collect();
1649                let condregs = condition.regs();
1650
1651                // We are going to write to the destination register before evaluating
1652                // the condition, so we need to make sure that the destination register
1653                // is not one of the condition registers.
1654                //
1655                // This should never happen, since hopefully the regalloc constraints
1656                // for this register are set up correctly.
1657                debug_assert_ne!(dstregs, condregs);
1658
1659                // Check if we can invert the condition and avoid moving the y registers into
1660                // the destination. This allows us to only emit the branch and one of the moves.
1661                let (uncond_move, cond_move, condition) = if yregs == dstregs {
1662                    (yregs, xregs, condition.inverse())
1663                } else {
1664                    (xregs, yregs, condition)
1665                };
1666
1667                // Unconditionally move one of the values to the destination register.
1668                //
1669                // These moves may not end up being emitted if the source and
1670                // destination registers are the same. That logic is built into
1671                // the emit function for `Inst::Mov`.
1672                for i in gen_moves(dst.regs(), uncond_move) {
1673                    i.emit(sink, emit_info, state);
1674                }
1675
1676                // If the condition passes we skip over the conditional move
1677                Inst::CondBr {
1678                    taken: CondBrTarget::Label(label_end),
1679                    not_taken: CondBrTarget::Fallthrough,
1680                    kind: condition,
1681                }
1682                .emit(sink, emit_info, state);
1683
1684                // Move the conditional value to the destination register.
1685                for i in gen_moves(dst.regs(), cond_move) {
1686                    i.emit(sink, emit_info, state);
1687                }
1688
1689                sink.bind_label(label_end, &mut state.ctrl_plane);
1690            }
1691            &Inst::Jalr { rd, base, offset } => {
1692                sink.put4(enc_jalr(rd, base, offset));
1693                state.clobber_vstate();
1694            }
1695            &Inst::EBreak => {
1696                sink.put4(0x00100073);
1697            }
1698            &Inst::AtomicCas {
1699                offset,
1700                t0,
1701                dst,
1702                e,
1703                addr,
1704                v,
1705                ty,
1706            } => {
1707                //     # addr holds address of memory location
1708                //     # e holds expected value
1709                //     # v holds desired value
1710                //     # dst holds return value
1711                // cas:
1712                //     lr.w dst, (addr)       # Load original value.
1713                //     bne dst, e, fail       # Doesn’t match, so fail.
1714                //     sc.w t0, v, (addr)     # Try to update.
1715                //     bnez t0 , cas          # if store not ok,retry.
1716                // fail:
1717                let fail_label = sink.get_label();
1718                let cas_lebel = sink.get_label();
1719                sink.bind_label(cas_lebel, &mut state.ctrl_plane);
1720                Inst::Atomic {
1721                    op: AtomicOP::load_op(ty),
1722                    rd: dst,
1723                    addr,
1724                    src: zero_reg(),
1725                    amo: AMO::SeqCst,
1726                }
1727                .emit(sink, emit_info, state);
1728                if ty.bits() < 32 {
1729                    AtomicOP::extract(dst, offset, dst.to_reg(), ty)
1730                        .iter()
1731                        .for_each(|i| i.emit(sink, emit_info, state));
1732                } else if ty.bits() == 32 {
1733                    Inst::Extend {
1734                        rd: dst,
1735                        rn: dst.to_reg(),
1736                        signed: false,
1737                        from_bits: 32,
1738                        to_bits: 64,
1739                    }
1740                    .emit(sink, emit_info, state);
1741                }
1742                Inst::CondBr {
1743                    taken: CondBrTarget::Label(fail_label),
1744                    not_taken: CondBrTarget::Fallthrough,
1745                    kind: IntegerCompare {
1746                        kind: IntCC::NotEqual,
1747                        rs1: e,
1748                        rs2: dst.to_reg(),
1749                    },
1750                }
1751                .emit(sink, emit_info, state);
1752                let store_value = if ty.bits() < 32 {
1753                    // reload value to t0.
1754                    Inst::Atomic {
1755                        op: AtomicOP::load_op(ty),
1756                        rd: t0,
1757                        addr,
1758                        src: zero_reg(),
1759                        amo: AMO::SeqCst,
1760                    }
1761                    .emit(sink, emit_info, state);
1762                    // set reset part.
1763                    AtomicOP::merge(t0, writable_spilltmp_reg(), offset, v, ty)
1764                        .iter()
1765                        .for_each(|i| i.emit(sink, emit_info, state));
1766                    t0.to_reg()
1767                } else {
1768                    v
1769                };
1770                Inst::Atomic {
1771                    op: AtomicOP::store_op(ty),
1772                    rd: t0,
1773                    addr,
1774                    src: store_value,
1775                    amo: AMO::SeqCst,
1776                }
1777                .emit(sink, emit_info, state);
1778                // check is our value stored.
1779                Inst::CondBr {
1780                    taken: CondBrTarget::Label(cas_lebel),
1781                    not_taken: CondBrTarget::Fallthrough,
1782                    kind: IntegerCompare {
1783                        kind: IntCC::NotEqual,
1784                        rs1: t0.to_reg(),
1785                        rs2: zero_reg(),
1786                    },
1787                }
1788                .emit(sink, emit_info, state);
1789                sink.bind_label(fail_label, &mut state.ctrl_plane);
1790            }
1791            &Inst::AtomicRmwLoop {
1792                offset,
1793                op,
1794                dst,
1795                ty,
1796                p,
1797                x,
1798                t0,
1799            } => {
1800                let retry = sink.get_label();
1801                sink.bind_label(retry, &mut state.ctrl_plane);
1802                // load old value.
1803                Inst::Atomic {
1804                    op: AtomicOP::load_op(ty),
1805                    rd: dst,
1806                    addr: p,
1807                    src: zero_reg(),
1808                    amo: AMO::SeqCst,
1809                }
1810                .emit(sink, emit_info, state);
1811                //
1812
1813                let store_value: Reg = match op {
1814                    crate::ir::AtomicRmwOp::Add
1815                    | crate::ir::AtomicRmwOp::Sub
1816                    | crate::ir::AtomicRmwOp::And
1817                    | crate::ir::AtomicRmwOp::Or
1818                    | crate::ir::AtomicRmwOp::Xor => {
1819                        AtomicOP::extract(dst, offset, dst.to_reg(), ty)
1820                            .iter()
1821                            .for_each(|i| i.emit(sink, emit_info, state));
1822                        Inst::AluRRR {
1823                            alu_op: match op {
1824                                crate::ir::AtomicRmwOp::Add => AluOPRRR::Add,
1825                                crate::ir::AtomicRmwOp::Sub => AluOPRRR::Sub,
1826                                crate::ir::AtomicRmwOp::And => AluOPRRR::And,
1827                                crate::ir::AtomicRmwOp::Or => AluOPRRR::Or,
1828                                crate::ir::AtomicRmwOp::Xor => AluOPRRR::Xor,
1829                                _ => unreachable!(),
1830                            },
1831                            rd: t0,
1832                            rs1: dst.to_reg(),
1833                            rs2: x,
1834                        }
1835                        .emit(sink, emit_info, state);
1836                        Inst::Atomic {
1837                            op: AtomicOP::load_op(ty),
1838                            rd: writable_spilltmp_reg2(),
1839                            addr: p,
1840                            src: zero_reg(),
1841                            amo: AMO::SeqCst,
1842                        }
1843                        .emit(sink, emit_info, state);
1844                        AtomicOP::merge(
1845                            writable_spilltmp_reg2(),
1846                            writable_spilltmp_reg(),
1847                            offset,
1848                            t0.to_reg(),
1849                            ty,
1850                        )
1851                        .iter()
1852                        .for_each(|i| i.emit(sink, emit_info, state));
1853                        spilltmp_reg2()
1854                    }
1855                    crate::ir::AtomicRmwOp::Nand => {
1856                        if ty.bits() < 32 {
1857                            AtomicOP::extract(dst, offset, dst.to_reg(), ty)
1858                                .iter()
1859                                .for_each(|i| i.emit(sink, emit_info, state));
1860                        }
1861                        Inst::AluRRR {
1862                            alu_op: AluOPRRR::And,
1863                            rd: t0,
1864                            rs1: x,
1865                            rs2: dst.to_reg(),
1866                        }
1867                        .emit(sink, emit_info, state);
1868                        Inst::construct_bit_not(t0, t0.to_reg()).emit(sink, emit_info, state);
1869                        if ty.bits() < 32 {
1870                            Inst::Atomic {
1871                                op: AtomicOP::load_op(ty),
1872                                rd: writable_spilltmp_reg2(),
1873                                addr: p,
1874                                src: zero_reg(),
1875                                amo: AMO::SeqCst,
1876                            }
1877                            .emit(sink, emit_info, state);
1878                            AtomicOP::merge(
1879                                writable_spilltmp_reg2(),
1880                                writable_spilltmp_reg(),
1881                                offset,
1882                                t0.to_reg(),
1883                                ty,
1884                            )
1885                            .iter()
1886                            .for_each(|i| i.emit(sink, emit_info, state));
1887                            spilltmp_reg2()
1888                        } else {
1889                            t0.to_reg()
1890                        }
1891                    }
1892
1893                    crate::ir::AtomicRmwOp::Umin
1894                    | crate::ir::AtomicRmwOp::Umax
1895                    | crate::ir::AtomicRmwOp::Smin
1896                    | crate::ir::AtomicRmwOp::Smax => {
1897                        let label_select_dst = sink.get_label();
1898                        let label_select_done = sink.get_label();
1899                        if op == crate::ir::AtomicRmwOp::Umin || op == crate::ir::AtomicRmwOp::Umax
1900                        {
1901                            AtomicOP::extract(dst, offset, dst.to_reg(), ty)
1902                        } else {
1903                            AtomicOP::extract_sext(dst, offset, dst.to_reg(), ty)
1904                        }
1905                        .iter()
1906                        .for_each(|i| i.emit(sink, emit_info, state));
1907
1908                        Inst::CondBr {
1909                            taken: CondBrTarget::Label(label_select_dst),
1910                            not_taken: CondBrTarget::Fallthrough,
1911                            kind: IntegerCompare {
1912                                kind: match op {
1913                                    crate::ir::AtomicRmwOp::Umin => IntCC::UnsignedLessThan,
1914                                    crate::ir::AtomicRmwOp::Umax => IntCC::UnsignedGreaterThan,
1915                                    crate::ir::AtomicRmwOp::Smin => IntCC::SignedLessThan,
1916                                    crate::ir::AtomicRmwOp::Smax => IntCC::SignedGreaterThan,
1917                                    _ => unreachable!(),
1918                                },
1919                                rs1: dst.to_reg(),
1920                                rs2: x,
1921                            },
1922                        }
1923                        .emit(sink, emit_info, state);
1924                        // here we select x.
1925                        Inst::gen_move(t0, x, I64).emit(sink, emit_info, state);
1926                        Inst::gen_jump(label_select_done).emit(sink, emit_info, state);
1927                        sink.bind_label(label_select_dst, &mut state.ctrl_plane);
1928                        Inst::gen_move(t0, dst.to_reg(), I64).emit(sink, emit_info, state);
1929                        sink.bind_label(label_select_done, &mut state.ctrl_plane);
1930                        Inst::Atomic {
1931                            op: AtomicOP::load_op(ty),
1932                            rd: writable_spilltmp_reg2(),
1933                            addr: p,
1934                            src: zero_reg(),
1935                            amo: AMO::SeqCst,
1936                        }
1937                        .emit(sink, emit_info, state);
1938                        AtomicOP::merge(
1939                            writable_spilltmp_reg2(),
1940                            writable_spilltmp_reg(),
1941                            offset,
1942                            t0.to_reg(),
1943                            ty,
1944                        )
1945                        .iter()
1946                        .for_each(|i| i.emit(sink, emit_info, state));
1947                        spilltmp_reg2()
1948                    }
1949                    crate::ir::AtomicRmwOp::Xchg => {
1950                        AtomicOP::extract(dst, offset, dst.to_reg(), ty)
1951                            .iter()
1952                            .for_each(|i| i.emit(sink, emit_info, state));
1953                        Inst::Atomic {
1954                            op: AtomicOP::load_op(ty),
1955                            rd: writable_spilltmp_reg2(),
1956                            addr: p,
1957                            src: zero_reg(),
1958                            amo: AMO::SeqCst,
1959                        }
1960                        .emit(sink, emit_info, state);
1961                        AtomicOP::merge(
1962                            writable_spilltmp_reg2(),
1963                            writable_spilltmp_reg(),
1964                            offset,
1965                            x,
1966                            ty,
1967                        )
1968                        .iter()
1969                        .for_each(|i| i.emit(sink, emit_info, state));
1970                        spilltmp_reg2()
1971                    }
1972                };
1973
1974                Inst::Atomic {
1975                    op: AtomicOP::store_op(ty),
1976                    rd: t0,
1977                    addr: p,
1978                    src: store_value,
1979                    amo: AMO::SeqCst,
1980                }
1981                .emit(sink, emit_info, state);
1982
1983                // if store is not ok,retry.
1984                Inst::CondBr {
1985                    taken: CondBrTarget::Label(retry),
1986                    not_taken: CondBrTarget::Fallthrough,
1987                    kind: IntegerCompare {
1988                        kind: IntCC::NotEqual,
1989                        rs1: t0.to_reg(),
1990                        rs2: zero_reg(),
1991                    },
1992                }
1993                .emit(sink, emit_info, state);
1994            }
1995
1996            &Inst::LoadExtName {
1997                rd,
1998                ref name,
1999                offset,
2000            } => {
2001                if emit_info.shared_flag.is_pic() {
2002                    // Load a PC-relative address into a register.
2003                    // RISC-V does this slightly differently from other arches. We emit a relocation
2004                    // with a label, instead of the symbol itself.
2005                    //
2006                    // See: https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-elf.adoc#pc-relative-symbol-addresses
2007                    //
2008                    // Emit the following code:
2009                    // label:
2010                    //   auipc rd, 0              # R_RISCV_GOT_HI20 (symbol_name)
2011                    //   ld    rd, rd, 0          # R_RISCV_PCREL_LO12_I (label)
2012
2013                    // Create the label that is going to be published to the final binary object.
2014                    let auipc_label = sink.get_label();
2015                    sink.bind_label(auipc_label, &mut state.ctrl_plane);
2016
2017                    // Get the current PC.
2018                    sink.add_reloc(Reloc::RiscvGotHi20, &**name, 0);
2019                    Inst::Auipc {
2020                        rd,
2021                        imm: Imm20::from_i32(0),
2022                    }
2023                    .emit_uncompressed(sink, emit_info, state, start_off);
2024
2025                    // The `ld` here, points to the `auipc` label instead of directly to the symbol.
2026                    sink.add_reloc(Reloc::RiscvPCRelLo12I, &auipc_label, 0);
2027                    Inst::Load {
2028                        rd,
2029                        op: LoadOP::Ld,
2030                        flags: MemFlags::trusted(),
2031                        from: AMode::RegOffset(rd.to_reg(), 0),
2032                    }
2033                    .emit_uncompressed(sink, emit_info, state, start_off);
2034                } else {
2035                    // In the non PIC sequence we relocate the absolute address into
2036                    // a prealocatted space, load it into a register and jump over it.
2037                    //
2038                    // Emit the following code:
2039                    //   ld rd, label_data
2040                    //   j label_end
2041                    // label_data:
2042                    //   <8 byte space>           # ABS8
2043                    // label_end:
2044
2045                    let label_data = sink.get_label();
2046                    let label_end = sink.get_label();
2047
2048                    // Load the value from a label
2049                    Inst::Load {
2050                        rd,
2051                        op: LoadOP::Ld,
2052                        flags: MemFlags::trusted(),
2053                        from: AMode::Label(label_data),
2054                    }
2055                    .emit(sink, emit_info, state);
2056
2057                    // Jump over the data
2058                    Inst::gen_jump(label_end).emit(sink, emit_info, state);
2059
2060                    sink.bind_label(label_data, &mut state.ctrl_plane);
2061                    sink.add_reloc(Reloc::Abs8, name.as_ref(), offset);
2062                    sink.put8(0);
2063
2064                    sink.bind_label(label_end, &mut state.ctrl_plane);
2065                }
2066            }
2067
2068            &Inst::ElfTlsGetAddr { rd, ref name } => {
2069                // RISC-V's TLS GD model is slightly different from other arches.
2070                //
2071                // We have a relocation (R_RISCV_TLS_GD_HI20) that loads the high 20 bits
2072                // of the address relative to the GOT entry. This relocation points to
2073                // the symbol as usual.
2074                //
2075                // However when loading the bottom 12bits of the address, we need to
2076                // use a label that points to the previous AUIPC instruction.
2077                //
2078                // label:
2079                //    auipc a0,0                    # R_RISCV_TLS_GD_HI20 (symbol)
2080                //    addi  a0,a0,0                 # R_RISCV_PCREL_LO12_I (label)
2081                //
2082                // https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-elf.adoc#global-dynamic
2083
2084                // Create the label that is going to be published to the final binary object.
2085                let auipc_label = sink.get_label();
2086                sink.bind_label(auipc_label, &mut state.ctrl_plane);
2087
2088                // Get the current PC.
2089                sink.add_reloc(Reloc::RiscvTlsGdHi20, &**name, 0);
2090                Inst::Auipc {
2091                    rd,
2092                    imm: Imm20::from_i32(0),
2093                }
2094                .emit_uncompressed(sink, emit_info, state, start_off);
2095
2096                // The `addi` here, points to the `auipc` label instead of directly to the symbol.
2097                sink.add_reloc(Reloc::RiscvPCRelLo12I, &auipc_label, 0);
2098                Inst::AluRRImm12 {
2099                    alu_op: AluOPRRI::Addi,
2100                    rd,
2101                    rs: rd.to_reg(),
2102                    imm12: Imm12::from_i16(0),
2103                }
2104                .emit_uncompressed(sink, emit_info, state, start_off);
2105
2106                Inst::Call {
2107                    info: Box::new(CallInfo::empty(
2108                        ExternalName::LibCall(LibCall::ElfTlsGetAddr),
2109                        CallConv::SystemV,
2110                    )),
2111                }
2112                .emit_uncompressed(sink, emit_info, state, start_off);
2113            }
2114
2115            &Inst::TrapIf {
2116                rs1,
2117                rs2,
2118                cc,
2119                trap_code,
2120            } => {
2121                let label_end = sink.get_label();
2122                let cond = IntegerCompare { kind: cc, rs1, rs2 };
2123
2124                // Jump over the trap if we the condition is false.
2125                Inst::CondBr {
2126                    taken: CondBrTarget::Label(label_end),
2127                    not_taken: CondBrTarget::Fallthrough,
2128                    kind: cond.inverse(),
2129                }
2130                .emit(sink, emit_info, state);
2131                Inst::Udf { trap_code }.emit(sink, emit_info, state);
2132
2133                sink.bind_label(label_end, &mut state.ctrl_plane);
2134            }
2135            &Inst::Udf { trap_code } => {
2136                sink.add_trap(trap_code);
2137                sink.put_data(Inst::TRAP_OPCODE);
2138            }
2139            &Inst::AtomicLoad { rd, ty, p } => {
2140                // emit the fence.
2141                Inst::Fence {
2142                    pred: Inst::FENCE_REQ_R | Inst::FENCE_REQ_W,
2143                    succ: Inst::FENCE_REQ_R | Inst::FENCE_REQ_W,
2144                }
2145                .emit(sink, emit_info, state);
2146                // load.
2147                Inst::Load {
2148                    rd,
2149                    op: LoadOP::from_type(ty),
2150                    flags: MemFlags::new(),
2151                    from: AMode::RegOffset(p, 0),
2152                }
2153                .emit(sink, emit_info, state);
2154                Inst::Fence {
2155                    pred: Inst::FENCE_REQ_R,
2156                    succ: Inst::FENCE_REQ_R | Inst::FENCE_REQ_W,
2157                }
2158                .emit(sink, emit_info, state);
2159            }
2160            &Inst::AtomicStore { src, ty, p } => {
2161                Inst::Fence {
2162                    pred: Inst::FENCE_REQ_R | Inst::FENCE_REQ_W,
2163                    succ: Inst::FENCE_REQ_W,
2164                }
2165                .emit(sink, emit_info, state);
2166                Inst::Store {
2167                    to: AMode::RegOffset(p, 0),
2168                    op: StoreOP::from_type(ty),
2169                    flags: MemFlags::new(),
2170                    src,
2171                }
2172                .emit(sink, emit_info, state);
2173            }
2174
2175            &Inst::Popcnt {
2176                sum,
2177                tmp,
2178                step,
2179                rs,
2180                ty,
2181            } => {
2182                // load 0 to sum , init.
2183                Inst::gen_move(sum, zero_reg(), I64).emit(sink, emit_info, state);
2184                // load
2185                Inst::load_imm12(step, Imm12::from_i16(ty.bits() as i16))
2186                    .emit(sink, emit_info, state);
2187                //
2188                Inst::load_imm12(tmp, Imm12::ONE).emit(sink, emit_info, state);
2189                Inst::AluRRImm12 {
2190                    alu_op: AluOPRRI::Slli,
2191                    rd: tmp,
2192                    rs: tmp.to_reg(),
2193                    imm12: Imm12::from_i16((ty.bits() - 1) as i16),
2194                }
2195                .emit(sink, emit_info, state);
2196                let label_done = sink.get_label();
2197                let label_loop = sink.get_label();
2198                sink.bind_label(label_loop, &mut state.ctrl_plane);
2199                Inst::CondBr {
2200                    taken: CondBrTarget::Label(label_done),
2201                    not_taken: CondBrTarget::Fallthrough,
2202                    kind: IntegerCompare {
2203                        kind: IntCC::SignedLessThanOrEqual,
2204                        rs1: step.to_reg(),
2205                        rs2: zero_reg(),
2206                    },
2207                }
2208                .emit(sink, emit_info, state);
2209                // test and add sum.
2210                {
2211                    Inst::AluRRR {
2212                        alu_op: AluOPRRR::And,
2213                        rd: writable_spilltmp_reg2(),
2214                        rs1: tmp.to_reg(),
2215                        rs2: rs,
2216                    }
2217                    .emit(sink, emit_info, state);
2218                    let label_over = sink.get_label();
2219                    Inst::CondBr {
2220                        taken: CondBrTarget::Label(label_over),
2221                        not_taken: CondBrTarget::Fallthrough,
2222                        kind: IntegerCompare {
2223                            kind: IntCC::Equal,
2224                            rs1: zero_reg(),
2225                            rs2: spilltmp_reg2(),
2226                        },
2227                    }
2228                    .emit(sink, emit_info, state);
2229                    Inst::AluRRImm12 {
2230                        alu_op: AluOPRRI::Addi,
2231                        rd: sum,
2232                        rs: sum.to_reg(),
2233                        imm12: Imm12::ONE,
2234                    }
2235                    .emit(sink, emit_info, state);
2236                    sink.bind_label(label_over, &mut state.ctrl_plane);
2237                }
2238                // set step and tmp.
2239                {
2240                    Inst::AluRRImm12 {
2241                        alu_op: AluOPRRI::Addi,
2242                        rd: step,
2243                        rs: step.to_reg(),
2244                        imm12: Imm12::from_i16(-1),
2245                    }
2246                    .emit(sink, emit_info, state);
2247                    Inst::AluRRImm12 {
2248                        alu_op: AluOPRRI::Srli,
2249                        rd: tmp,
2250                        rs: tmp.to_reg(),
2251                        imm12: Imm12::ONE,
2252                    }
2253                    .emit(sink, emit_info, state);
2254                    Inst::gen_jump(label_loop).emit(sink, emit_info, state);
2255                }
2256                sink.bind_label(label_done, &mut state.ctrl_plane);
2257            }
2258            &Inst::Cltz {
2259                sum,
2260                tmp,
2261                step,
2262                rs,
2263                leading,
2264                ty,
2265            } => {
2266                // load 0 to sum , init.
2267                Inst::gen_move(sum, zero_reg(), I64).emit(sink, emit_info, state);
2268                // load
2269                Inst::load_imm12(step, Imm12::from_i16(ty.bits() as i16))
2270                    .emit(sink, emit_info, state);
2271                //
2272                Inst::load_imm12(tmp, Imm12::ONE).emit(sink, emit_info, state);
2273                if leading {
2274                    Inst::AluRRImm12 {
2275                        alu_op: AluOPRRI::Slli,
2276                        rd: tmp,
2277                        rs: tmp.to_reg(),
2278                        imm12: Imm12::from_i16((ty.bits() - 1) as i16),
2279                    }
2280                    .emit(sink, emit_info, state);
2281                }
2282                let label_done = sink.get_label();
2283                let label_loop = sink.get_label();
2284                sink.bind_label(label_loop, &mut state.ctrl_plane);
2285                Inst::CondBr {
2286                    taken: CondBrTarget::Label(label_done),
2287                    not_taken: CondBrTarget::Fallthrough,
2288                    kind: IntegerCompare {
2289                        kind: IntCC::SignedLessThanOrEqual,
2290                        rs1: step.to_reg(),
2291                        rs2: zero_reg(),
2292                    },
2293                }
2294                .emit(sink, emit_info, state);
2295                // test and add sum.
2296                {
2297                    Inst::AluRRR {
2298                        alu_op: AluOPRRR::And,
2299                        rd: writable_spilltmp_reg2(),
2300                        rs1: tmp.to_reg(),
2301                        rs2: rs,
2302                    }
2303                    .emit(sink, emit_info, state);
2304                    Inst::CondBr {
2305                        taken: CondBrTarget::Label(label_done),
2306                        not_taken: CondBrTarget::Fallthrough,
2307                        kind: IntegerCompare {
2308                            kind: IntCC::NotEqual,
2309                            rs1: zero_reg(),
2310                            rs2: spilltmp_reg2(),
2311                        },
2312                    }
2313                    .emit(sink, emit_info, state);
2314                    Inst::AluRRImm12 {
2315                        alu_op: AluOPRRI::Addi,
2316                        rd: sum,
2317                        rs: sum.to_reg(),
2318                        imm12: Imm12::ONE,
2319                    }
2320                    .emit(sink, emit_info, state);
2321                }
2322                // set step and tmp.
2323                {
2324                    Inst::AluRRImm12 {
2325                        alu_op: AluOPRRI::Addi,
2326                        rd: step,
2327                        rs: step.to_reg(),
2328                        imm12: Imm12::from_i16(-1),
2329                    }
2330                    .emit(sink, emit_info, state);
2331                    Inst::AluRRImm12 {
2332                        alu_op: if leading {
2333                            AluOPRRI::Srli
2334                        } else {
2335                            AluOPRRI::Slli
2336                        },
2337                        rd: tmp,
2338                        rs: tmp.to_reg(),
2339                        imm12: Imm12::ONE,
2340                    }
2341                    .emit(sink, emit_info, state);
2342                    Inst::gen_jump(label_loop).emit(sink, emit_info, state);
2343                }
2344                sink.bind_label(label_done, &mut state.ctrl_plane);
2345            }
2346            &Inst::Brev8 {
2347                rs,
2348                ty,
2349                step,
2350                tmp,
2351                tmp2,
2352                rd,
2353            } => {
2354                Inst::gen_move(rd, zero_reg(), I64).emit(sink, emit_info, state);
2355                Inst::load_imm12(step, Imm12::from_i16(ty.bits() as i16))
2356                    .emit(sink, emit_info, state);
2357                //
2358                Inst::load_imm12(tmp, Imm12::ONE).emit(sink, emit_info, state);
2359                Inst::AluRRImm12 {
2360                    alu_op: AluOPRRI::Slli,
2361                    rd: tmp,
2362                    rs: tmp.to_reg(),
2363                    imm12: Imm12::from_i16((ty.bits() - 1) as i16),
2364                }
2365                .emit(sink, emit_info, state);
2366                Inst::load_imm12(tmp2, Imm12::ONE).emit(sink, emit_info, state);
2367                Inst::AluRRImm12 {
2368                    alu_op: AluOPRRI::Slli,
2369                    rd: tmp2,
2370                    rs: tmp2.to_reg(),
2371                    imm12: Imm12::from_i16((ty.bits() - 8) as i16),
2372                }
2373                .emit(sink, emit_info, state);
2374
2375                let label_done = sink.get_label();
2376                let label_loop = sink.get_label();
2377                sink.bind_label(label_loop, &mut state.ctrl_plane);
2378                Inst::CondBr {
2379                    taken: CondBrTarget::Label(label_done),
2380                    not_taken: CondBrTarget::Fallthrough,
2381                    kind: IntegerCompare {
2382                        kind: IntCC::SignedLessThanOrEqual,
2383                        rs1: step.to_reg(),
2384                        rs2: zero_reg(),
2385                    },
2386                }
2387                .emit(sink, emit_info, state);
2388                // test and set bit.
2389                {
2390                    Inst::AluRRR {
2391                        alu_op: AluOPRRR::And,
2392                        rd: writable_spilltmp_reg2(),
2393                        rs1: tmp.to_reg(),
2394                        rs2: rs,
2395                    }
2396                    .emit(sink, emit_info, state);
2397                    let label_over = sink.get_label();
2398                    Inst::CondBr {
2399                        taken: CondBrTarget::Label(label_over),
2400                        not_taken: CondBrTarget::Fallthrough,
2401                        kind: IntegerCompare {
2402                            kind: IntCC::Equal,
2403                            rs1: zero_reg(),
2404                            rs2: spilltmp_reg2(),
2405                        },
2406                    }
2407                    .emit(sink, emit_info, state);
2408                    Inst::AluRRR {
2409                        alu_op: AluOPRRR::Or,
2410                        rd,
2411                        rs1: rd.to_reg(),
2412                        rs2: tmp2.to_reg(),
2413                    }
2414                    .emit(sink, emit_info, state);
2415                    sink.bind_label(label_over, &mut state.ctrl_plane);
2416                }
2417                // set step and tmp.
2418                {
2419                    Inst::AluRRImm12 {
2420                        alu_op: AluOPRRI::Addi,
2421                        rd: step,
2422                        rs: step.to_reg(),
2423                        imm12: Imm12::from_i16(-1),
2424                    }
2425                    .emit(sink, emit_info, state);
2426                    Inst::AluRRImm12 {
2427                        alu_op: AluOPRRI::Srli,
2428                        rd: tmp,
2429                        rs: tmp.to_reg(),
2430                        imm12: Imm12::ONE,
2431                    }
2432                    .emit(sink, emit_info, state);
2433                    {
2434                        // reset tmp2
2435                        // if (step %=8 == 0) then tmp2 = tmp2 >> 15
2436                        // if (step %=8 != 0) then tmp2 = tmp2 << 1
2437                        let label_over = sink.get_label();
2438                        let label_sll_1 = sink.get_label();
2439                        Inst::load_imm12(writable_spilltmp_reg2(), Imm12::from_i16(8))
2440                            .emit(sink, emit_info, state);
2441                        Inst::AluRRR {
2442                            alu_op: AluOPRRR::Rem,
2443                            rd: writable_spilltmp_reg2(),
2444                            rs1: step.to_reg(),
2445                            rs2: spilltmp_reg2(),
2446                        }
2447                        .emit(sink, emit_info, state);
2448                        Inst::CondBr {
2449                            taken: CondBrTarget::Label(label_sll_1),
2450                            not_taken: CondBrTarget::Fallthrough,
2451                            kind: IntegerCompare {
2452                                kind: IntCC::NotEqual,
2453                                rs1: spilltmp_reg2(),
2454                                rs2: zero_reg(),
2455                            },
2456                        }
2457                        .emit(sink, emit_info, state);
2458                        Inst::AluRRImm12 {
2459                            alu_op: AluOPRRI::Srli,
2460                            rd: tmp2,
2461                            rs: tmp2.to_reg(),
2462                            imm12: Imm12::from_i16(15),
2463                        }
2464                        .emit(sink, emit_info, state);
2465                        Inst::gen_jump(label_over).emit(sink, emit_info, state);
2466                        sink.bind_label(label_sll_1, &mut state.ctrl_plane);
2467                        Inst::AluRRImm12 {
2468                            alu_op: AluOPRRI::Slli,
2469                            rd: tmp2,
2470                            rs: tmp2.to_reg(),
2471                            imm12: Imm12::ONE,
2472                        }
2473                        .emit(sink, emit_info, state);
2474                        sink.bind_label(label_over, &mut state.ctrl_plane);
2475                    }
2476                    Inst::gen_jump(label_loop).emit(sink, emit_info, state);
2477                }
2478                sink.bind_label(label_done, &mut state.ctrl_plane);
2479            }
2480            &Inst::StackProbeLoop {
2481                guard_size,
2482                probe_count,
2483                tmp: guard_size_tmp,
2484            } => {
2485                let step = writable_spilltmp_reg();
2486                Inst::load_constant_u64(step, (guard_size as u64) * (probe_count as u64))
2487                    .iter()
2488                    .for_each(|i| i.emit(sink, emit_info, state));
2489                Inst::load_constant_u64(guard_size_tmp, guard_size as u64)
2490                    .iter()
2491                    .for_each(|i| i.emit(sink, emit_info, state));
2492
2493                let loop_start = sink.get_label();
2494                let label_done = sink.get_label();
2495                sink.bind_label(loop_start, &mut state.ctrl_plane);
2496                Inst::CondBr {
2497                    taken: CondBrTarget::Label(label_done),
2498                    not_taken: CondBrTarget::Fallthrough,
2499                    kind: IntegerCompare {
2500                        kind: IntCC::UnsignedLessThanOrEqual,
2501                        rs1: step.to_reg(),
2502                        rs2: guard_size_tmp.to_reg(),
2503                    },
2504                }
2505                .emit(sink, emit_info, state);
2506                // compute address.
2507                Inst::AluRRR {
2508                    alu_op: AluOPRRR::Sub,
2509                    rd: writable_spilltmp_reg2(),
2510                    rs1: stack_reg(),
2511                    rs2: step.to_reg(),
2512                }
2513                .emit(sink, emit_info, state);
2514                Inst::Store {
2515                    to: AMode::RegOffset(spilltmp_reg2(), 0),
2516                    op: StoreOP::Sb,
2517                    flags: MemFlags::new(),
2518                    src: zero_reg(),
2519                }
2520                .emit(sink, emit_info, state);
2521                // reset step.
2522                Inst::AluRRR {
2523                    alu_op: AluOPRRR::Sub,
2524                    rd: step,
2525                    rs1: step.to_reg(),
2526                    rs2: guard_size_tmp.to_reg(),
2527                }
2528                .emit(sink, emit_info, state);
2529                Inst::gen_jump(loop_start).emit(sink, emit_info, state);
2530                sink.bind_label(label_done, &mut state.ctrl_plane);
2531            }
2532            &Inst::VecAluRRRImm5 {
2533                op,
2534                vd,
2535                vd_src,
2536                imm,
2537                vs2,
2538                ref mask,
2539                ..
2540            } => {
2541                debug_assert_eq!(vd.to_reg(), vd_src);
2542
2543                sink.put4(encode_valu_rrr_imm(op, vd, imm, vs2, *mask));
2544            }
2545            &Inst::VecAluRRRR {
2546                op,
2547                vd,
2548                vd_src,
2549                vs1,
2550                vs2,
2551                ref mask,
2552                ..
2553            } => {
2554                debug_assert_eq!(vd.to_reg(), vd_src);
2555
2556                sink.put4(encode_valu_rrrr(op, vd, vs2, vs1, *mask));
2557            }
2558            &Inst::VecAluRRR {
2559                op,
2560                vd,
2561                vs1,
2562                vs2,
2563                ref mask,
2564                ..
2565            } => {
2566                sink.put4(encode_valu(op, vd, vs1, vs2, *mask));
2567            }
2568            &Inst::VecAluRRImm5 {
2569                op,
2570                vd,
2571                imm,
2572                vs2,
2573                ref mask,
2574                ..
2575            } => {
2576                sink.put4(encode_valu_rr_imm(op, vd, imm, vs2, *mask));
2577            }
2578            &Inst::VecAluRR {
2579                op,
2580                vd,
2581                vs,
2582                ref mask,
2583                ..
2584            } => {
2585                sink.put4(encode_valu_rr(op, vd, vs, *mask));
2586            }
2587            &Inst::VecAluRImm5 {
2588                op,
2589                vd,
2590                imm,
2591                ref mask,
2592                ..
2593            } => {
2594                sink.put4(encode_valu_r_imm(op, vd, imm, *mask));
2595            }
2596            &Inst::VecSetState { rd, ref vstate } => {
2597                sink.put4(encode_vcfg_imm(
2598                    0x57,
2599                    rd.to_reg(),
2600                    vstate.avl.unwrap_static(),
2601                    &vstate.vtype,
2602                ));
2603
2604                // Update the current vector emit state.
2605                state.vstate = EmitVState::Known(*vstate);
2606            }
2607
2608            &Inst::VecLoad {
2609                eew,
2610                to,
2611                ref from,
2612                ref mask,
2613                flags,
2614                ..
2615            } => {
2616                // Vector Loads don't support immediate offsets, so we need to load it into a register.
2617                let addr = match from {
2618                    VecAMode::UnitStride { base } => {
2619                        let base_reg = base.get_base_register();
2620                        let offset = base.get_offset_with_state(state);
2621
2622                        // Reg+0 Offset can be directly encoded
2623                        if let (Some(base_reg), 0) = (base_reg, offset) {
2624                            base_reg
2625                        } else {
2626                            // Otherwise load the address it into a reg and load from it.
2627                            let tmp = writable_spilltmp_reg();
2628                            Inst::LoadAddr {
2629                                rd: tmp,
2630                                mem: *base,
2631                            }
2632                            .emit(sink, emit_info, state);
2633                            tmp.to_reg()
2634                        }
2635                    }
2636                };
2637
2638                if let Some(trap_code) = flags.trap_code() {
2639                    // Register the offset at which the actual load instruction starts.
2640                    sink.add_trap(trap_code);
2641                }
2642
2643                sink.put4(encode_vmem_load(
2644                    0x07,
2645                    to.to_reg(),
2646                    eew,
2647                    addr,
2648                    from.lumop(),
2649                    *mask,
2650                    from.mop(),
2651                    from.nf(),
2652                ));
2653            }
2654
2655            &Inst::VecStore {
2656                eew,
2657                ref to,
2658                from,
2659                ref mask,
2660                flags,
2661                ..
2662            } => {
2663                // Vector Stores don't support immediate offsets, so we need to load it into a register.
2664                let addr = match to {
2665                    VecAMode::UnitStride { base } => {
2666                        let base_reg = base.get_base_register();
2667                        let offset = base.get_offset_with_state(state);
2668
2669                        // Reg+0 Offset can be directly encoded
2670                        if let (Some(base_reg), 0) = (base_reg, offset) {
2671                            base_reg
2672                        } else {
2673                            // Otherwise load the address it into a reg and load from it.
2674                            let tmp = writable_spilltmp_reg();
2675                            Inst::LoadAddr {
2676                                rd: tmp,
2677                                mem: *base,
2678                            }
2679                            .emit(sink, emit_info, state);
2680                            tmp.to_reg()
2681                        }
2682                    }
2683                };
2684
2685                if let Some(trap_code) = flags.trap_code() {
2686                    // Register the offset at which the actual load instruction starts.
2687                    sink.add_trap(trap_code);
2688                }
2689
2690                sink.put4(encode_vmem_store(
2691                    0x27,
2692                    from,
2693                    eew,
2694                    addr,
2695                    to.sumop(),
2696                    *mask,
2697                    to.mop(),
2698                    to.nf(),
2699                ));
2700            }
2701
2702            Inst::EmitIsland { needed_space } => {
2703                if sink.island_needed(*needed_space) {
2704                    let jump_around_label = sink.get_label();
2705                    Inst::gen_jump(jump_around_label).emit(sink, emit_info, state);
2706                    sink.emit_island(needed_space + 4, &mut state.ctrl_plane);
2707                    sink.bind_label(jump_around_label, &mut state.ctrl_plane);
2708                }
2709            }
2710        }
2711    }
2712}
2713
2714fn emit_return_call_common_sequence<T>(
2715    sink: &mut MachBuffer<Inst>,
2716    emit_info: &EmitInfo,
2717    state: &mut EmitState,
2718    info: &ReturnCallInfo<T>,
2719) {
2720    // The return call sequence can potentially emit a lot of instructions (up to 634 bytes!)
2721    // So lets emit an island here if we need it.
2722    //
2723    // It is difficult to calculate exactly how many instructions are going to be emitted, so
2724    // we calculate it by emitting it into a disposable buffer, and then checking how many instructions
2725    // were actually emitted.
2726    let mut buffer = MachBuffer::new();
2727    let mut fake_emit_state = state.clone();
2728
2729    return_call_emit_impl(&mut buffer, emit_info, &mut fake_emit_state, info);
2730
2731    // Finalize the buffer and get the number of bytes emitted.
2732    let buffer = buffer.finish(&Default::default(), &mut Default::default());
2733    let length = buffer.data().len() as u32;
2734
2735    // And now emit the island inline with this instruction.
2736    if sink.island_needed(length) {
2737        let jump_around_label = sink.get_label();
2738        Inst::gen_jump(jump_around_label).emit(sink, emit_info, state);
2739        sink.emit_island(length + 4, &mut state.ctrl_plane);
2740        sink.bind_label(jump_around_label, &mut state.ctrl_plane);
2741    }
2742
2743    // Now that we're done, emit the *actual* return sequence.
2744    return_call_emit_impl(sink, emit_info, state, info);
2745}
2746
2747/// This should not be called directly, Instead prefer to call [emit_return_call_common_sequence].
2748fn return_call_emit_impl<T>(
2749    sink: &mut MachBuffer<Inst>,
2750    emit_info: &EmitInfo,
2751    state: &mut EmitState,
2752    info: &ReturnCallInfo<T>,
2753) {
2754    let sp_to_fp_offset = {
2755        let frame_layout = state.frame_layout();
2756        i64::from(
2757            frame_layout.clobber_size
2758                + frame_layout.fixed_frame_storage_size
2759                + frame_layout.outgoing_args_size,
2760        )
2761    };
2762
2763    let mut clobber_offset = sp_to_fp_offset - 8;
2764    for reg in state.frame_layout().clobbered_callee_saves.clone() {
2765        let rreg = reg.to_reg();
2766        let ty = match rreg.class() {
2767            RegClass::Int => I64,
2768            RegClass::Float => F64,
2769            RegClass::Vector => unimplemented!("Vector Clobber Restores"),
2770        };
2771
2772        Inst::gen_load(
2773            reg.map(Reg::from),
2774            AMode::SPOffset(clobber_offset),
2775            ty,
2776            MemFlags::trusted(),
2777        )
2778        .emit(sink, emit_info, state);
2779
2780        clobber_offset -= 8
2781    }
2782
2783    // Restore the link register and frame pointer
2784    let setup_area_size = i64::from(state.frame_layout().setup_area_size);
2785    if setup_area_size > 0 {
2786        Inst::gen_load(
2787            writable_link_reg(),
2788            AMode::SPOffset(sp_to_fp_offset + 8),
2789            I64,
2790            MemFlags::trusted(),
2791        )
2792        .emit(sink, emit_info, state);
2793
2794        Inst::gen_load(
2795            writable_fp_reg(),
2796            AMode::SPOffset(sp_to_fp_offset),
2797            I64,
2798            MemFlags::trusted(),
2799        )
2800        .emit(sink, emit_info, state);
2801    }
2802
2803    // If we over-allocated the incoming args area in the prologue, resize down to what the callee
2804    // is expecting.
2805    let incoming_args_diff =
2806        i64::from(state.frame_layout().tail_args_size - info.new_stack_arg_size);
2807
2808    // Increment SP all at once
2809    let sp_increment = sp_to_fp_offset + setup_area_size + incoming_args_diff;
2810    if sp_increment > 0 {
2811        for inst in Riscv64MachineDeps::gen_sp_reg_adjust(i32::try_from(sp_increment).unwrap()) {
2812            inst.emit(sink, emit_info, state);
2813        }
2814    }
2815}