cranelift_codegen/isa/riscv64/inst/
emit.rs

1//! Riscv64 ISA: binary code emission.
2
3use crate::ir::{self, LibCall, TrapCode};
4use crate::isa::riscv64::inst::*;
5use crate::isa::riscv64::lower::isle::generated_code::{
6    CaOp, CbOp, CiOp, CiwOp, ClOp, CrOp, CsOp, CssOp, CsznOp, FpuOPWidth, ZcbMemOp,
7};
8use cranelift_control::ControlPlane;
9
10pub struct EmitInfo {
11    shared_flag: settings::Flags,
12    isa_flags: super::super::riscv_settings::Flags,
13}
14
15impl EmitInfo {
16    pub(crate) fn new(
17        shared_flag: settings::Flags,
18        isa_flags: super::super::riscv_settings::Flags,
19    ) -> Self {
20        Self {
21            shared_flag,
22            isa_flags,
23        }
24    }
25}
26
27pub(crate) fn reg_to_gpr_num(m: Reg) -> u32 {
28    u32::from(m.to_real_reg().unwrap().hw_enc() & 31)
29}
30
31pub(crate) fn reg_to_compressed_gpr_num(m: Reg) -> u32 {
32    let real_reg = m.to_real_reg().unwrap().hw_enc();
33    debug_assert!(real_reg >= 8 && real_reg < 16);
34    let compressed_reg = real_reg - 8;
35    u32::from(compressed_reg)
36}
37
38#[derive(Clone, Debug, PartialEq, Default)]
39pub enum EmitVState {
40    #[default]
41    Unknown,
42    Known(VState),
43}
44
45/// State carried between emissions of a sequence of instructions.
46#[derive(Default, Clone, Debug)]
47pub struct EmitState {
48    /// The user stack map for the upcoming instruction, as provided to
49    /// `pre_safepoint()`.
50    user_stack_map: Option<ir::UserStackMap>,
51
52    /// Only used during fuzz-testing. Otherwise, it is a zero-sized struct and
53    /// optimized away at compiletime. See [cranelift_control].
54    ctrl_plane: ControlPlane,
55
56    /// Vector State
57    /// Controls the current state of the vector unit at the emission point.
58    vstate: EmitVState,
59
60    frame_layout: FrameLayout,
61}
62
63impl EmitState {
64    fn take_stack_map(&mut self) -> Option<ir::UserStackMap> {
65        self.user_stack_map.take()
66    }
67}
68
69impl MachInstEmitState<Inst> for EmitState {
70    fn new(
71        abi: &Callee<crate::isa::riscv64::abi::Riscv64MachineDeps>,
72        ctrl_plane: ControlPlane,
73    ) -> Self {
74        EmitState {
75            user_stack_map: None,
76            ctrl_plane,
77            vstate: EmitVState::Unknown,
78            frame_layout: abi.frame_layout().clone(),
79        }
80    }
81
82    fn pre_safepoint(&mut self, user_stack_map: Option<ir::UserStackMap>) {
83        self.user_stack_map = user_stack_map;
84    }
85
86    fn ctrl_plane_mut(&mut self) -> &mut ControlPlane {
87        &mut self.ctrl_plane
88    }
89
90    fn take_ctrl_plane(self) -> ControlPlane {
91        self.ctrl_plane
92    }
93
94    fn on_new_block(&mut self) {
95        // Reset the vector state.
96        self.vstate = EmitVState::Unknown;
97    }
98
99    fn frame_layout(&self) -> &FrameLayout {
100        &self.frame_layout
101    }
102}
103
104impl Inst {
105    /// Load int mask.
106    /// If ty is int then 0xff in rd.
107    pub(crate) fn load_int_mask(rd: Writable<Reg>, ty: Type) -> SmallInstVec<Inst> {
108        let mut insts = SmallInstVec::new();
109        assert!(ty.is_int() && ty.bits() <= 64);
110        match ty {
111            I64 => {
112                insts.push(Inst::load_imm12(rd, Imm12::from_i16(-1)));
113            }
114            I32 | I16 => {
115                insts.push(Inst::load_imm12(rd, Imm12::from_i16(-1)));
116                insts.push(Inst::Extend {
117                    rd: rd,
118                    rn: rd.to_reg(),
119                    signed: false,
120                    from_bits: ty.bits() as u8,
121                    to_bits: 64,
122                });
123            }
124            I8 => {
125                insts.push(Inst::load_imm12(rd, Imm12::from_i16(255)));
126            }
127            _ => unreachable!("ty:{:?}", ty),
128        }
129        insts
130    }
131    ///  inverse all bit
132    pub(crate) fn construct_bit_not(rd: Writable<Reg>, rs: Reg) -> Inst {
133        Inst::AluRRImm12 {
134            alu_op: AluOPRRI::Xori,
135            rd,
136            rs,
137            imm12: Imm12::from_i16(-1),
138        }
139    }
140
141    /// Returns Some(VState) if this instruction is expecting a specific vector state
142    /// before emission.
143    fn expected_vstate(&self) -> Option<&VState> {
144        match self {
145            Inst::Nop0
146            | Inst::Nop4
147            | Inst::BrTable { .. }
148            | Inst::Auipc { .. }
149            | Inst::Fli { .. }
150            | Inst::Lui { .. }
151            | Inst::LoadInlineConst { .. }
152            | Inst::AluRRR { .. }
153            | Inst::FpuRRR { .. }
154            | Inst::AluRRImm12 { .. }
155            | Inst::CsrReg { .. }
156            | Inst::CsrImm { .. }
157            | Inst::Load { .. }
158            | Inst::Store { .. }
159            | Inst::Args { .. }
160            | Inst::Rets { .. }
161            | Inst::Ret { .. }
162            | Inst::Extend { .. }
163            | Inst::Call { .. }
164            | Inst::CallInd { .. }
165            | Inst::ReturnCall { .. }
166            | Inst::ReturnCallInd { .. }
167            | Inst::Jal { .. }
168            | Inst::CondBr { .. }
169            | Inst::LoadExtName { .. }
170            | Inst::ElfTlsGetAddr { .. }
171            | Inst::LoadAddr { .. }
172            | Inst::Mov { .. }
173            | Inst::MovFromPReg { .. }
174            | Inst::Fence { .. }
175            | Inst::EBreak
176            | Inst::Udf { .. }
177            | Inst::FpuRR { .. }
178            | Inst::FpuRRRR { .. }
179            | Inst::Jalr { .. }
180            | Inst::Atomic { .. }
181            | Inst::Select { .. }
182            | Inst::AtomicCas { .. }
183            | Inst::RawData { .. }
184            | Inst::AtomicStore { .. }
185            | Inst::AtomicLoad { .. }
186            | Inst::AtomicRmwLoop { .. }
187            | Inst::TrapIf { .. }
188            | Inst::Unwind { .. }
189            | Inst::DummyUse { .. }
190            | Inst::Popcnt { .. }
191            | Inst::Cltz { .. }
192            | Inst::Brev8 { .. }
193            | Inst::StackProbeLoop { .. } => None,
194
195            // VecSetState does not expect any vstate, rather it updates it.
196            Inst::VecSetState { .. } => None,
197
198            // `vmv` instructions copy a set of registers and ignore vstate.
199            Inst::VecAluRRImm5 { op: VecAluOpRRImm5::VmvrV, .. } => None,
200
201            Inst::VecAluRR { vstate, .. } |
202            Inst::VecAluRRR { vstate, .. } |
203            Inst::VecAluRRRR { vstate, .. } |
204            Inst::VecAluRImm5 { vstate, .. } |
205            Inst::VecAluRRImm5 { vstate, .. } |
206            Inst::VecAluRRRImm5 { vstate, .. } |
207            // TODO: Unit-stride loads and stores only need the AVL to be correct, not
208            // the full vtype. A future optimization could be to decouple these two when
209            // updating vstate. This would allow us to avoid emitting a VecSetState in
210            // some cases.
211            Inst::VecLoad { vstate, .. }
212            | Inst::VecStore { vstate, .. } => Some(vstate),
213            Inst::EmitIsland { .. } => None,
214        }
215    }
216}
217
218impl MachInstEmit for Inst {
219    type State = EmitState;
220    type Info = EmitInfo;
221
222    fn emit(&self, sink: &mut MachBuffer<Inst>, emit_info: &Self::Info, state: &mut EmitState) {
223        // Check if we need to update the vector state before emitting this instruction
224        if let Some(expected) = self.expected_vstate() {
225            if state.vstate != EmitVState::Known(*expected) {
226                // Update the vector state.
227                Inst::VecSetState {
228                    rd: writable_zero_reg(),
229                    vstate: *expected,
230                }
231                .emit(sink, emit_info, state);
232            }
233        }
234
235        // N.B.: we *must* not exceed the "worst-case size" used to compute
236        // where to insert islands, except when islands are explicitly triggered
237        // (with an `EmitIsland`). We check this in debug builds. This is `mut`
238        // to allow disabling the check for `JTSequence`, which is always
239        // emitted following an `EmitIsland`.
240        let mut start_off = sink.cur_offset();
241
242        // First try to emit this as a compressed instruction
243        let res = self.try_emit_compressed(sink, emit_info, state, &mut start_off);
244        if res.is_none() {
245            // If we can't lets emit it as a normal instruction
246            self.emit_uncompressed(sink, emit_info, state, &mut start_off);
247        }
248
249        // We exclude br_table, call, return_call and try_call from
250        // these checks since they emit their own islands, and thus
251        // are allowed to exceed the worst case size.
252        let emits_own_island = match self {
253            Inst::BrTable { .. }
254            | Inst::ReturnCall { .. }
255            | Inst::ReturnCallInd { .. }
256            | Inst::Call { .. }
257            | Inst::CallInd { .. }
258            | Inst::EmitIsland { .. } => true,
259            _ => false,
260        };
261        if !emits_own_island {
262            let end_off = sink.cur_offset();
263            assert!(
264                (end_off - start_off) <= Inst::worst_case_size(),
265                "Inst:{:?} length:{} worst_case_size:{}",
266                self,
267                end_off - start_off,
268                Inst::worst_case_size()
269            );
270        }
271    }
272
273    fn pretty_print_inst(&self, state: &mut Self::State) -> String {
274        self.print_with_state(state)
275    }
276}
277
278impl Inst {
279    /// Tries to emit an instruction as compressed, if we can't return false.
280    fn try_emit_compressed(
281        &self,
282        sink: &mut MachBuffer<Inst>,
283        emit_info: &EmitInfo,
284        state: &mut EmitState,
285        start_off: &mut u32,
286    ) -> Option<()> {
287        let has_m = emit_info.isa_flags.has_m();
288        let has_zba = emit_info.isa_flags.has_zba();
289        let has_zbb = emit_info.isa_flags.has_zbb();
290        let has_zca = emit_info.isa_flags.has_zca();
291        let has_zcb = emit_info.isa_flags.has_zcb();
292        let has_zcd = emit_info.isa_flags.has_zcd();
293
294        // Currently all compressed extensions (Zcb, Zcd, Zcmp, Zcmt, etc..) require Zca
295        // to be enabled, so check it early.
296        if !has_zca {
297            return None;
298        }
299
300        fn reg_is_compressible(r: Reg) -> bool {
301            r.to_real_reg()
302                .map(|r| r.hw_enc() >= 8 && r.hw_enc() < 16)
303                .unwrap_or(false)
304        }
305
306        match *self {
307            // C.ADD
308            Inst::AluRRR {
309                alu_op: AluOPRRR::Add,
310                rd,
311                rs1,
312                rs2,
313            } if (rd.to_reg() == rs1 || rd.to_reg() == rs2)
314                && rs1 != zero_reg()
315                && rs2 != zero_reg() =>
316            {
317                // Technically `c.add rd, rs` expands to `add rd, rd, rs`, but we can
318                // also swap rs1 with rs2 and we get an equivalent instruction. i.e we
319                // can also compress `add rd, rs, rd` into `c.add rd, rs`.
320                let src = if rd.to_reg() == rs1 { rs2 } else { rs1 };
321
322                sink.put2(encode_cr_type(CrOp::CAdd, rd, src));
323            }
324
325            // C.MV
326            Inst::AluRRImm12 {
327                alu_op: AluOPRRI::Addi | AluOPRRI::Ori,
328                rd,
329                rs,
330                imm12,
331            } if rd.to_reg() != rs
332                && rd.to_reg() != zero_reg()
333                && rs != zero_reg()
334                && imm12.as_i16() == 0 =>
335            {
336                sink.put2(encode_cr_type(CrOp::CMv, rd, rs));
337            }
338
339            // CA Ops
340            Inst::AluRRR {
341                alu_op:
342                    alu_op @ (AluOPRRR::And
343                    | AluOPRRR::Or
344                    | AluOPRRR::Xor
345                    | AluOPRRR::Addw
346                    | AluOPRRR::Mul),
347                rd,
348                rs1,
349                rs2,
350            } if (rd.to_reg() == rs1 || rd.to_reg() == rs2)
351                && reg_is_compressible(rs1)
352                && reg_is_compressible(rs2) =>
353            {
354                let op = match alu_op {
355                    AluOPRRR::And => CaOp::CAnd,
356                    AluOPRRR::Or => CaOp::COr,
357                    AluOPRRR::Xor => CaOp::CXor,
358                    AluOPRRR::Addw => CaOp::CAddw,
359                    AluOPRRR::Mul if has_zcb && has_m => CaOp::CMul,
360                    _ => return None,
361                };
362                // The canonical expansion for these instruction has `rd == rs1`, but
363                // these are all commutative operations, so we can swap the operands.
364                let src = if rd.to_reg() == rs1 { rs2 } else { rs1 };
365
366                sink.put2(encode_ca_type(op, rd, src));
367            }
368
369            // The sub instructions are non commutative, so we can't swap the operands.
370            Inst::AluRRR {
371                alu_op: alu_op @ (AluOPRRR::Sub | AluOPRRR::Subw),
372                rd,
373                rs1,
374                rs2,
375            } if rd.to_reg() == rs1 && reg_is_compressible(rs1) && reg_is_compressible(rs2) => {
376                let op = match alu_op {
377                    AluOPRRR::Sub => CaOp::CSub,
378                    AluOPRRR::Subw => CaOp::CSubw,
379                    _ => return None,
380                };
381                sink.put2(encode_ca_type(op, rd, rs2));
382            }
383
384            // c.j
385            //
386            // We don't have a separate JAL as that is only available in RV32C
387            Inst::Jal { label } => {
388                sink.use_label_at_offset(*start_off, label, LabelUse::RVCJump);
389                sink.add_uncond_branch(*start_off, *start_off + 2, label);
390                sink.put2(encode_cj_type(CjOp::CJ, Imm12::ZERO));
391            }
392
393            // c.jr
394            Inst::Jalr { rd, base, offset }
395                if rd.to_reg() == zero_reg() && base != zero_reg() && offset.as_i16() == 0 =>
396            {
397                sink.put2(encode_cr2_type(CrOp::CJr, base));
398            }
399
400            // c.jalr
401            Inst::Jalr { rd, base, offset }
402                if rd.to_reg() == link_reg() && base != zero_reg() && offset.as_i16() == 0 =>
403            {
404                sink.put2(encode_cr2_type(CrOp::CJalr, base));
405            }
406
407            // c.ebreak
408            Inst::EBreak => {
409                sink.put2(encode_cr_type(
410                    CrOp::CEbreak,
411                    writable_zero_reg(),
412                    zero_reg(),
413                ));
414            }
415
416            // c.unimp
417            Inst::Udf { trap_code } => {
418                sink.add_trap(trap_code);
419                sink.put2(0x0000);
420            }
421            // c.addi16sp
422            //
423            // c.addi16sp shares the opcode with c.lui, but has a destination field of x2.
424            // c.addi16sp adds the non-zero sign-extended 6-bit immediate to the value in the stack pointer (sp=x2),
425            // where the immediate is scaled to represent multiples of 16 in the range (-512,496). c.addi16sp is used
426            // to adjust the stack pointer in procedure prologues and epilogues. It expands into addi x2, x2, nzimm. c.addi16sp
427            // is only valid when nzimm≠0; the code point with nzimm=0 is reserved.
428            Inst::AluRRImm12 {
429                alu_op: AluOPRRI::Addi,
430                rd,
431                rs,
432                imm12,
433            } if rd.to_reg() == rs
434                && rs == stack_reg()
435                && imm12.as_i16() != 0
436                && (imm12.as_i16() % 16) == 0
437                && Imm6::maybe_from_i16(imm12.as_i16() / 16).is_some() =>
438            {
439                let imm6 = Imm6::maybe_from_i16(imm12.as_i16() / 16).unwrap();
440                sink.put2(encode_c_addi16sp(imm6));
441            }
442
443            // c.addi4spn
444            //
445            // c.addi4spn is a CIW-format instruction that adds a zero-extended non-zero
446            // immediate, scaled by 4, to the stack pointer, x2, and writes the result to
447            // rd. This instruction is used to generate pointers to stack-allocated variables
448            // and expands to addi rd, x2, nzuimm. c.addi4spn is only valid when nzuimm≠0;
449            // the code points with nzuimm=0 are reserved.
450            Inst::AluRRImm12 {
451                alu_op: AluOPRRI::Addi,
452                rd,
453                rs,
454                imm12,
455            } if reg_is_compressible(rd.to_reg())
456                && rs == stack_reg()
457                && imm12.as_i16() != 0
458                && (imm12.as_i16() % 4) == 0
459                && u8::try_from(imm12.as_i16() / 4).is_ok() =>
460            {
461                let imm = u8::try_from(imm12.as_i16() / 4).unwrap();
462                sink.put2(encode_ciw_type(CiwOp::CAddi4spn, rd, imm));
463            }
464
465            // c.li
466            Inst::AluRRImm12 {
467                alu_op: AluOPRRI::Addi,
468                rd,
469                rs,
470                imm12,
471            } if rd.to_reg() != zero_reg() && rs == zero_reg() => {
472                let imm6 = Imm6::maybe_from_imm12(imm12)?;
473                sink.put2(encode_ci_type(CiOp::CLi, rd, imm6));
474            }
475
476            // c.addi
477            Inst::AluRRImm12 {
478                alu_op: AluOPRRI::Addi,
479                rd,
480                rs,
481                imm12,
482            } if rd.to_reg() == rs && rs != zero_reg() && imm12.as_i16() != 0 => {
483                let imm6 = Imm6::maybe_from_imm12(imm12)?;
484                sink.put2(encode_ci_type(CiOp::CAddi, rd, imm6));
485            }
486
487            // c.addiw
488            Inst::AluRRImm12 {
489                alu_op: AluOPRRI::Addiw,
490                rd,
491                rs,
492                imm12,
493            } if rd.to_reg() == rs && rs != zero_reg() => {
494                let imm6 = Imm6::maybe_from_imm12(imm12)?;
495                sink.put2(encode_ci_type(CiOp::CAddiw, rd, imm6));
496            }
497
498            // c.lui
499            //
500            // c.lui loads the non-zero 6-bit immediate field into bits 17–12
501            // of the destination register, clears the bottom 12 bits, and
502            // sign-extends bit 17 into all higher bits of the destination.
503            Inst::Lui { rd, imm: imm20 }
504                if rd.to_reg() != zero_reg()
505                    && rd.to_reg() != stack_reg()
506                    && imm20.as_i32() != 0 =>
507            {
508                // Check that the top bits are sign extended
509                let imm = imm20.as_i32() << 14 >> 14;
510                if imm != imm20.as_i32() {
511                    return None;
512                }
513                let imm6 = Imm6::maybe_from_i32(imm)?;
514                sink.put2(encode_ci_type(CiOp::CLui, rd, imm6));
515            }
516
517            // c.slli
518            Inst::AluRRImm12 {
519                alu_op: AluOPRRI::Slli,
520                rd,
521                rs,
522                imm12,
523            } if rd.to_reg() == rs && rs != zero_reg() && imm12.as_i16() != 0 => {
524                // The shift amount is unsigned, but we encode it as signed.
525                let shift = imm12.as_i16() & 0x3f;
526                let imm6 = Imm6::maybe_from_i16(shift << 10 >> 10).unwrap();
527                sink.put2(encode_ci_type(CiOp::CSlli, rd, imm6));
528            }
529
530            // c.srli / c.srai
531            Inst::AluRRImm12 {
532                alu_op: op @ (AluOPRRI::Srli | AluOPRRI::Srai),
533                rd,
534                rs,
535                imm12,
536            } if rd.to_reg() == rs && reg_is_compressible(rs) && imm12.as_i16() != 0 => {
537                let op = match op {
538                    AluOPRRI::Srli => CbOp::CSrli,
539                    AluOPRRI::Srai => CbOp::CSrai,
540                    _ => unreachable!(),
541                };
542
543                // The shift amount is unsigned, but we encode it as signed.
544                let shift = imm12.as_i16() & 0x3f;
545                let imm6 = Imm6::maybe_from_i16(shift << 10 >> 10).unwrap();
546                sink.put2(encode_cb_type(op, rd, imm6));
547            }
548
549            // c.zextb
550            //
551            // This is an alias for `andi rd, rd, 0xff`
552            Inst::AluRRImm12 {
553                alu_op: AluOPRRI::Andi,
554                rd,
555                rs,
556                imm12,
557            } if has_zcb
558                && rd.to_reg() == rs
559                && reg_is_compressible(rs)
560                && imm12.as_i16() == 0xff =>
561            {
562                sink.put2(encode_cszn_type(CsznOp::CZextb, rd));
563            }
564
565            // c.andi
566            Inst::AluRRImm12 {
567                alu_op: AluOPRRI::Andi,
568                rd,
569                rs,
570                imm12,
571            } if rd.to_reg() == rs && reg_is_compressible(rs) => {
572                let imm6 = Imm6::maybe_from_imm12(imm12)?;
573                sink.put2(encode_cb_type(CbOp::CAndi, rd, imm6));
574            }
575
576            // Stack Based Loads
577            Inst::Load {
578                rd,
579                op: op @ (LoadOP::Lw | LoadOP::Ld | LoadOP::Fld),
580                from,
581                flags,
582            } if from.get_base_register() == Some(stack_reg())
583                && (from.get_offset_with_state(state) % op.size()) == 0 =>
584            {
585                // We encode the offset in multiples of the load size.
586                let offset = from.get_offset_with_state(state);
587                let imm6 = u8::try_from(offset / op.size())
588                    .ok()
589                    .and_then(Uimm6::maybe_from_u8)?;
590
591                // Some additional constraints on these instructions.
592                //
593                // Integer loads are not allowed to target x0, but floating point loads
594                // are, since f0 is not a special register.
595                //
596                // Floating point loads are not included in the base Zca extension
597                // but in a separate Zcd extension. Both of these are part of the C Extension.
598                let rd_is_zero = rd.to_reg() == zero_reg();
599                let op = match op {
600                    LoadOP::Lw if !rd_is_zero => CiOp::CLwsp,
601                    LoadOP::Ld if !rd_is_zero => CiOp::CLdsp,
602                    LoadOP::Fld if has_zcd => CiOp::CFldsp,
603                    _ => return None,
604                };
605
606                if let Some(trap_code) = flags.trap_code() {
607                    // Register the offset at which the actual load instruction starts.
608                    sink.add_trap(trap_code);
609                }
610                sink.put2(encode_ci_sp_load(op, rd, imm6));
611            }
612
613            // Regular Loads
614            Inst::Load {
615                rd,
616                op:
617                    op
618                    @ (LoadOP::Lw | LoadOP::Ld | LoadOP::Fld | LoadOP::Lbu | LoadOP::Lhu | LoadOP::Lh),
619                from,
620                flags,
621            } if reg_is_compressible(rd.to_reg())
622                && from
623                    .get_base_register()
624                    .map(reg_is_compressible)
625                    .unwrap_or(false)
626                && (from.get_offset_with_state(state) % op.size()) == 0 =>
627            {
628                let base = from.get_base_register().unwrap();
629
630                // We encode the offset in multiples of the store size.
631                let offset = from.get_offset_with_state(state);
632                let offset = u8::try_from(offset / op.size()).ok()?;
633
634                // We mix two different formats here.
635                //
636                // c.lw / c.ld / c.fld instructions are available in the standard Zca
637                // extension using the CL format.
638                //
639                // c.lbu / c.lhu / c.lh are only available in the Zcb extension and
640                // are also encoded differently. Technically they each have a different
641                // format, but they are similar enough that we can group them.
642                let is_zcb_load = matches!(op, LoadOP::Lbu | LoadOP::Lhu | LoadOP::Lh);
643                let encoded = if is_zcb_load {
644                    if !has_zcb {
645                        return None;
646                    }
647
648                    let op = match op {
649                        LoadOP::Lbu => ZcbMemOp::CLbu,
650                        LoadOP::Lhu => ZcbMemOp::CLhu,
651                        LoadOP::Lh => ZcbMemOp::CLh,
652                        _ => unreachable!(),
653                    };
654
655                    // Byte stores & loads have 2 bits of immediate offset. Halfword stores
656                    // and loads only have 1 bit.
657                    let imm2 = Uimm2::maybe_from_u8(offset)?;
658                    if (offset & !((1 << op.imm_bits()) - 1)) != 0 {
659                        return None;
660                    }
661
662                    encode_zcbmem_load(op, rd, base, imm2)
663                } else {
664                    // Floating point loads are not included in the base Zca extension
665                    // but in a separate Zcd extension. Both of these are part of the C Extension.
666                    let op = match op {
667                        LoadOP::Lw => ClOp::CLw,
668                        LoadOP::Ld => ClOp::CLd,
669                        LoadOP::Fld if has_zcd => ClOp::CFld,
670                        _ => return None,
671                    };
672                    let imm5 = Uimm5::maybe_from_u8(offset)?;
673
674                    encode_cl_type(op, rd, base, imm5)
675                };
676
677                if let Some(trap_code) = flags.trap_code() {
678                    // Register the offset at which the actual load instruction starts.
679                    sink.add_trap(trap_code);
680                }
681                sink.put2(encoded);
682            }
683
684            // Stack Based Stores
685            Inst::Store {
686                src,
687                op: op @ (StoreOP::Sw | StoreOP::Sd | StoreOP::Fsd),
688                to,
689                flags,
690            } if to.get_base_register() == Some(stack_reg())
691                && (to.get_offset_with_state(state) % op.size()) == 0 =>
692            {
693                // We encode the offset in multiples of the store size.
694                let offset = to.get_offset_with_state(state);
695                let imm6 = u8::try_from(offset / op.size())
696                    .ok()
697                    .and_then(Uimm6::maybe_from_u8)?;
698
699                // Floating point stores are not included in the base Zca extension
700                // but in a separate Zcd extension. Both of these are part of the C Extension.
701                let op = match op {
702                    StoreOP::Sw => CssOp::CSwsp,
703                    StoreOP::Sd => CssOp::CSdsp,
704                    StoreOP::Fsd if has_zcd => CssOp::CFsdsp,
705                    _ => return None,
706                };
707
708                if let Some(trap_code) = flags.trap_code() {
709                    // Register the offset at which the actual load instruction starts.
710                    sink.add_trap(trap_code);
711                }
712                sink.put2(encode_css_type(op, src, imm6));
713            }
714
715            // Regular Stores
716            Inst::Store {
717                src,
718                op: op @ (StoreOP::Sw | StoreOP::Sd | StoreOP::Fsd | StoreOP::Sh | StoreOP::Sb),
719                to,
720                flags,
721            } if reg_is_compressible(src)
722                && to
723                    .get_base_register()
724                    .map(reg_is_compressible)
725                    .unwrap_or(false)
726                && (to.get_offset_with_state(state) % op.size()) == 0 =>
727            {
728                let base = to.get_base_register().unwrap();
729
730                // We encode the offset in multiples of the store size.
731                let offset = to.get_offset_with_state(state);
732                let offset = u8::try_from(offset / op.size()).ok()?;
733
734                // We mix two different formats here.
735                //
736                // c.sw / c.sd / c.fsd instructions are available in the standard Zca
737                // extension using the CL format.
738                //
739                // c.sb / c.sh are only available in the Zcb extension and are also
740                // encoded differently.
741                let is_zcb_store = matches!(op, StoreOP::Sh | StoreOP::Sb);
742                let encoded = if is_zcb_store {
743                    if !has_zcb {
744                        return None;
745                    }
746
747                    let op = match op {
748                        StoreOP::Sh => ZcbMemOp::CSh,
749                        StoreOP::Sb => ZcbMemOp::CSb,
750                        _ => unreachable!(),
751                    };
752
753                    // Byte stores & loads have 2 bits of immediate offset. Halfword stores
754                    // and loads only have 1 bit.
755                    let imm2 = Uimm2::maybe_from_u8(offset)?;
756                    if (offset & !((1 << op.imm_bits()) - 1)) != 0 {
757                        return None;
758                    }
759
760                    encode_zcbmem_store(op, src, base, imm2)
761                } else {
762                    // Floating point stores are not included in the base Zca extension
763                    // but in a separate Zcd extension. Both of these are part of the C Extension.
764                    let op = match op {
765                        StoreOP::Sw => CsOp::CSw,
766                        StoreOP::Sd => CsOp::CSd,
767                        StoreOP::Fsd if has_zcd => CsOp::CFsd,
768                        _ => return None,
769                    };
770                    let imm5 = Uimm5::maybe_from_u8(offset)?;
771
772                    encode_cs_type(op, src, base, imm5)
773                };
774
775                if let Some(trap_code) = flags.trap_code() {
776                    // Register the offset at which the actual load instruction starts.
777                    sink.add_trap(trap_code);
778                }
779                sink.put2(encoded);
780            }
781
782            // c.not
783            //
784            // This is an alias for `xori rd, rd, -1`
785            Inst::AluRRImm12 {
786                alu_op: AluOPRRI::Xori,
787                rd,
788                rs,
789                imm12,
790            } if has_zcb
791                && rd.to_reg() == rs
792                && reg_is_compressible(rs)
793                && imm12.as_i16() == -1 =>
794            {
795                sink.put2(encode_cszn_type(CsznOp::CNot, rd));
796            }
797
798            // c.sext.b / c.sext.h / c.zext.h
799            //
800            // These are all the extend instructions present in `Zcb`, they
801            // also require `Zbb` since they aren't available in the base ISA.
802            Inst::AluRRImm12 {
803                alu_op: alu_op @ (AluOPRRI::Sextb | AluOPRRI::Sexth | AluOPRRI::Zexth),
804                rd,
805                rs,
806                imm12,
807            } if has_zcb
808                && has_zbb
809                && rd.to_reg() == rs
810                && reg_is_compressible(rs)
811                && imm12.as_i16() == 0 =>
812            {
813                let op = match alu_op {
814                    AluOPRRI::Sextb => CsznOp::CSextb,
815                    AluOPRRI::Sexth => CsznOp::CSexth,
816                    AluOPRRI::Zexth => CsznOp::CZexth,
817                    _ => unreachable!(),
818                };
819                sink.put2(encode_cszn_type(op, rd));
820            }
821
822            // c.zext.w
823            //
824            // This is an alias for `add.uw rd, rd, zero`
825            Inst::AluRRR {
826                alu_op: AluOPRRR::Adduw,
827                rd,
828                rs1,
829                rs2,
830            } if has_zcb
831                && has_zba
832                && rd.to_reg() == rs1
833                && reg_is_compressible(rs1)
834                && rs2 == zero_reg() =>
835            {
836                sink.put2(encode_cszn_type(CsznOp::CZextw, rd));
837            }
838
839            _ => return None,
840        }
841
842        return Some(());
843    }
844
845    fn emit_uncompressed(
846        &self,
847        sink: &mut MachBuffer<Inst>,
848        emit_info: &EmitInfo,
849        state: &mut EmitState,
850        start_off: &mut u32,
851    ) {
852        match self {
853            &Inst::Nop0 => {
854                // do nothing
855            }
856            // Addi x0, x0, 0
857            &Inst::Nop4 => {
858                let x = Inst::AluRRImm12 {
859                    alu_op: AluOPRRI::Addi,
860                    rd: Writable::from_reg(zero_reg()),
861                    rs: zero_reg(),
862                    imm12: Imm12::ZERO,
863                };
864                x.emit(sink, emit_info, state)
865            }
866            &Inst::RawData { ref data } => {
867                // Right now we only put a u32 or u64 in this instruction.
868                // It is not very long, no need to check if need `emit_island`.
869                // If data is very long , this is a bug because RawData is typically
870                // use to load some data and rely on some position in the code stream.
871                // and we may exceed `Inst::worst_case_size`.
872                // for more information see https://github.com/bytecodealliance/wasmtime/pull/5612.
873                sink.put_data(&data[..]);
874            }
875            &Inst::Lui { rd, ref imm } => {
876                let x: u32 = 0b0110111 | reg_to_gpr_num(rd.to_reg()) << 7 | (imm.bits() << 12);
877                sink.put4(x);
878            }
879            &Inst::Fli { rd, width, imm } => {
880                sink.put4(encode_fli(width, imm, rd));
881            }
882            &Inst::LoadInlineConst { rd, ty, imm } => {
883                let data = &imm.to_le_bytes()[..ty.bytes() as usize];
884
885                let label_data: MachLabel = sink.get_label();
886                let label_end: MachLabel = sink.get_label();
887
888                // Load into rd
889                Inst::Load {
890                    rd,
891                    op: LoadOP::from_type(ty),
892                    flags: MemFlags::new(),
893                    from: AMode::Label(label_data),
894                }
895                .emit(sink, emit_info, state);
896
897                // Jump over the inline pool
898                Inst::gen_jump(label_end).emit(sink, emit_info, state);
899
900                // Emit the inline data
901                sink.bind_label(label_data, &mut state.ctrl_plane);
902                Inst::RawData { data: data.into() }.emit(sink, emit_info, state);
903
904                sink.bind_label(label_end, &mut state.ctrl_plane);
905            }
906            &Inst::FpuRR {
907                alu_op,
908                width,
909                frm,
910                rd,
911                rs,
912            } => {
913                if alu_op.is_convert_to_int() {
914                    sink.add_trap(TrapCode::BAD_CONVERSION_TO_INTEGER);
915                }
916                sink.put4(encode_fp_rr(alu_op, width, frm, rd, rs));
917            }
918            &Inst::FpuRRRR {
919                alu_op,
920                rd,
921                rs1,
922                rs2,
923                rs3,
924                frm,
925                width,
926            } => {
927                sink.put4(encode_fp_rrrr(alu_op, width, frm, rd, rs1, rs2, rs3));
928            }
929            &Inst::FpuRRR {
930                alu_op,
931                width,
932                frm,
933                rd,
934                rs1,
935                rs2,
936            } => {
937                sink.put4(encode_fp_rrr(alu_op, width, frm, rd, rs1, rs2));
938            }
939            &Inst::Unwind { ref inst } => {
940                sink.add_unwind(inst.clone());
941            }
942            &Inst::DummyUse { .. } => {
943                // This has already been handled by Inst::allocate.
944            }
945            &Inst::AluRRR {
946                alu_op,
947                rd,
948                rs1,
949                rs2,
950            } => {
951                let (rs1, rs2) = if alu_op.reverse_rs() {
952                    (rs2, rs1)
953                } else {
954                    (rs1, rs2)
955                };
956
957                sink.put4(encode_r_type(
958                    alu_op.op_code(),
959                    rd,
960                    alu_op.funct3(),
961                    rs1,
962                    rs2,
963                    alu_op.funct7(),
964                ));
965            }
966            &Inst::AluRRImm12 {
967                alu_op,
968                rd,
969                rs,
970                imm12,
971            } => {
972                let x = alu_op.op_code()
973                    | reg_to_gpr_num(rd.to_reg()) << 7
974                    | alu_op.funct3() << 12
975                    | reg_to_gpr_num(rs) << 15
976                    | alu_op.imm12(imm12) << 20;
977                sink.put4(x);
978            }
979            &Inst::CsrReg { op, rd, rs, csr } => {
980                sink.put4(encode_csr_reg(op, rd, rs, csr));
981            }
982            &Inst::CsrImm { op, rd, csr, imm } => {
983                sink.put4(encode_csr_imm(op, rd, csr, imm));
984            }
985            &Inst::Load {
986                rd,
987                op: LoadOP::Flh,
988                from,
989                flags,
990            } if !emit_info.isa_flags.has_zfhmin() => {
991                // flh unavailable, use an integer load instead
992                Inst::Load {
993                    rd: writable_spilltmp_reg(),
994                    op: LoadOP::Lh,
995                    flags,
996                    from,
997                }
998                .emit(sink, emit_info, state);
999                // NaN-box the `f16` before loading it into the floating-point
1000                // register with a 32-bit `fmv`.
1001                Inst::Lui {
1002                    rd: writable_spilltmp_reg2(),
1003                    imm: Imm20::from_i32((0xffff_0000_u32 as i32) >> 12),
1004                }
1005                .emit(sink, emit_info, state);
1006                Inst::AluRRR {
1007                    alu_op: AluOPRRR::Or,
1008                    rd: writable_spilltmp_reg(),
1009                    rs1: spilltmp_reg(),
1010                    rs2: spilltmp_reg2(),
1011                }
1012                .emit(sink, emit_info, state);
1013                Inst::FpuRR {
1014                    alu_op: FpuOPRR::FmvFmtX,
1015                    width: FpuOPWidth::S,
1016                    frm: FRM::RNE,
1017                    rd,
1018                    rs: spilltmp_reg(),
1019                }
1020                .emit(sink, emit_info, state);
1021            }
1022            &Inst::Load {
1023                rd,
1024                op,
1025                from,
1026                flags,
1027            } => {
1028                let base = from.get_base_register();
1029                let offset = from.get_offset_with_state(state);
1030                let offset_imm12 = Imm12::maybe_from_i64(offset);
1031                let label = from.get_label_with_sink(sink);
1032
1033                let (addr, imm12) = match (base, offset_imm12, label) {
1034                    // When loading from a Reg+Offset, if the offset fits into an imm12 we can directly encode it.
1035                    (Some(base), Some(imm12), None) => (base, imm12),
1036
1037                    // Otherwise, if the offset does not fit into a imm12, we need to materialize it into a
1038                    // register and load from that.
1039                    (Some(_), None, None) => {
1040                        let tmp = writable_spilltmp_reg();
1041                        Inst::LoadAddr { rd: tmp, mem: from }.emit(sink, emit_info, state);
1042                        (tmp.to_reg(), Imm12::ZERO)
1043                    }
1044
1045                    // If the AMode contains a label we can emit an internal relocation that gets
1046                    // resolved with the correct address later.
1047                    (None, Some(imm), Some(label)) => {
1048                        debug_assert_eq!(imm.as_i16(), 0);
1049
1050                        // Get the current PC.
1051                        sink.use_label_at_offset(sink.cur_offset(), label, LabelUse::PCRelHi20);
1052                        Inst::Auipc {
1053                            rd,
1054                            imm: Imm20::ZERO,
1055                        }
1056                        .emit_uncompressed(sink, emit_info, state, start_off);
1057
1058                        // Emit a relocation for the load. This patches the offset into the instruction.
1059                        sink.use_label_at_offset(sink.cur_offset(), label, LabelUse::PCRelLo12I);
1060
1061                        // Imm12 here is meaningless since it's going to get replaced.
1062                        (rd.to_reg(), Imm12::ZERO)
1063                    }
1064
1065                    // These cases are impossible with the current AModes that we have. We either
1066                    // always have a register, or always have a label. Never both, and never neither.
1067                    (None, None, None)
1068                    | (None, Some(_), None)
1069                    | (Some(_), None, Some(_))
1070                    | (Some(_), Some(_), Some(_))
1071                    | (None, None, Some(_)) => {
1072                        unreachable!("Invalid load address")
1073                    }
1074                };
1075
1076                if let Some(trap_code) = flags.trap_code() {
1077                    // Register the offset at which the actual load instruction starts.
1078                    sink.add_trap(trap_code);
1079                }
1080
1081                sink.put4(encode_i_type(op.op_code(), rd, op.funct3(), addr, imm12));
1082            }
1083            &Inst::Store {
1084                op: StoreOP::Fsh,
1085                src,
1086                flags,
1087                to,
1088            } if !emit_info.isa_flags.has_zfhmin() => {
1089                // fsh unavailable, use an integer store instead
1090                Inst::FpuRR {
1091                    alu_op: FpuOPRR::FmvXFmt,
1092                    width: FpuOPWidth::S,
1093                    frm: FRM::RNE,
1094                    rd: writable_spilltmp_reg(),
1095                    rs: src,
1096                }
1097                .emit(sink, emit_info, state);
1098                Inst::Store {
1099                    to,
1100                    op: StoreOP::Sh,
1101                    flags,
1102                    src: spilltmp_reg(),
1103                }
1104                .emit(sink, emit_info, state);
1105            }
1106            &Inst::Store { op, src, flags, to } => {
1107                let base = to.get_base_register();
1108                let offset = to.get_offset_with_state(state);
1109                let offset_imm12 = Imm12::maybe_from_i64(offset);
1110
1111                let (addr, imm12) = match (base, offset_imm12) {
1112                    // If the offset fits into an imm12 we can directly encode it.
1113                    (Some(base), Some(imm12)) => (base, imm12),
1114                    // Otherwise load the address it into a reg and load from it.
1115                    _ => {
1116                        let tmp = writable_spilltmp_reg();
1117                        Inst::LoadAddr { rd: tmp, mem: to }.emit(sink, emit_info, state);
1118                        (tmp.to_reg(), Imm12::ZERO)
1119                    }
1120                };
1121
1122                if let Some(trap_code) = flags.trap_code() {
1123                    // Register the offset at which the actual load instruction starts.
1124                    sink.add_trap(trap_code);
1125                }
1126
1127                sink.put4(encode_s_type(op.op_code(), op.funct3(), addr, src, imm12));
1128            }
1129            &Inst::Args { .. } | &Inst::Rets { .. } => {
1130                // Nothing: this is a pseudoinstruction that serves
1131                // only to constrain registers at a certain point.
1132            }
1133            &Inst::Ret {} => {
1134                // RISC-V does not have a dedicated ret instruction, instead we emit the equivalent
1135                // `jalr x0, x1, 0` that jumps to the return address.
1136                Inst::Jalr {
1137                    rd: writable_zero_reg(),
1138                    base: link_reg(),
1139                    offset: Imm12::ZERO,
1140                }
1141                .emit(sink, emit_info, state);
1142            }
1143
1144            &Inst::Extend {
1145                rd,
1146                rn,
1147                signed,
1148                from_bits,
1149                to_bits: _to_bits,
1150            } => {
1151                let mut insts = SmallInstVec::new();
1152                let shift_bits = (64 - from_bits) as i16;
1153                let is_u8 = || from_bits == 8 && signed == false;
1154                if is_u8() {
1155                    // special for u8.
1156                    insts.push(Inst::AluRRImm12 {
1157                        alu_op: AluOPRRI::Andi,
1158                        rd,
1159                        rs: rn,
1160                        imm12: Imm12::from_i16(255),
1161                    });
1162                } else {
1163                    insts.push(Inst::AluRRImm12 {
1164                        alu_op: AluOPRRI::Slli,
1165                        rd,
1166                        rs: rn,
1167                        imm12: Imm12::from_i16(shift_bits),
1168                    });
1169                    insts.push(Inst::AluRRImm12 {
1170                        alu_op: if signed {
1171                            AluOPRRI::Srai
1172                        } else {
1173                            AluOPRRI::Srli
1174                        },
1175                        rd,
1176                        rs: rd.to_reg(),
1177                        imm12: Imm12::from_i16(shift_bits),
1178                    });
1179                }
1180                insts
1181                    .into_iter()
1182                    .for_each(|i| i.emit(sink, emit_info, state));
1183            }
1184
1185            &Inst::Call { ref info } => {
1186                sink.add_reloc(Reloc::RiscvCallPlt, &info.dest, 0);
1187
1188                Inst::construct_auipc_and_jalr(Some(writable_link_reg()), writable_link_reg(), 0)
1189                    .into_iter()
1190                    .for_each(|i| i.emit_uncompressed(sink, emit_info, state, start_off));
1191
1192                if let Some(s) = state.take_stack_map() {
1193                    let offset = sink.cur_offset();
1194                    sink.push_user_stack_map(state, offset, s);
1195                }
1196
1197                if let Some(try_call) = info.try_call_info.as_ref() {
1198                    sink.add_call_site(&try_call.exception_dests);
1199                } else {
1200                    sink.add_call_site(&[]);
1201                }
1202
1203                let callee_pop_size = i32::try_from(info.callee_pop_size).unwrap();
1204                if callee_pop_size > 0 {
1205                    for inst in Riscv64MachineDeps::gen_sp_reg_adjust(-callee_pop_size) {
1206                        inst.emit(sink, emit_info, state);
1207                    }
1208                }
1209
1210                // Load any stack-carried return values.
1211                info.emit_retval_loads::<Riscv64MachineDeps, _, _>(
1212                    state.frame_layout().stackslots_size,
1213                    |inst| inst.emit(sink, emit_info, state),
1214                    |needed_space| Some(Inst::EmitIsland { needed_space }),
1215                );
1216
1217                // If this is a try-call, jump to the continuation
1218                // (normal-return) block.
1219                if let Some(try_call) = info.try_call_info.as_ref() {
1220                    let jmp = Inst::Jal {
1221                        label: try_call.continuation,
1222                    };
1223                    jmp.emit(sink, emit_info, state);
1224                }
1225
1226                *start_off = sink.cur_offset();
1227            }
1228            &Inst::CallInd { ref info } => {
1229                Inst::Jalr {
1230                    rd: writable_link_reg(),
1231                    base: info.dest,
1232                    offset: Imm12::ZERO,
1233                }
1234                .emit(sink, emit_info, state);
1235
1236                if let Some(s) = state.take_stack_map() {
1237                    let offset = sink.cur_offset();
1238                    sink.push_user_stack_map(state, offset, s);
1239                }
1240
1241                if let Some(try_call) = info.try_call_info.as_ref() {
1242                    sink.add_call_site(&try_call.exception_dests);
1243                } else {
1244                    sink.add_call_site(&[]);
1245                }
1246
1247                let callee_pop_size = i32::try_from(info.callee_pop_size).unwrap();
1248                if callee_pop_size > 0 {
1249                    for inst in Riscv64MachineDeps::gen_sp_reg_adjust(-callee_pop_size) {
1250                        inst.emit(sink, emit_info, state);
1251                    }
1252                }
1253
1254                // Load any stack-carried return values.
1255                info.emit_retval_loads::<Riscv64MachineDeps, _, _>(
1256                    state.frame_layout().stackslots_size,
1257                    |inst| inst.emit(sink, emit_info, state),
1258                    |needed_space| Some(Inst::EmitIsland { needed_space }),
1259                );
1260
1261                // If this is a try-call, jump to the continuation
1262                // (normal-return) block.
1263                if let Some(try_call) = info.try_call_info.as_ref() {
1264                    let jmp = Inst::Jal {
1265                        label: try_call.continuation,
1266                    };
1267                    jmp.emit(sink, emit_info, state);
1268                }
1269
1270                *start_off = sink.cur_offset();
1271            }
1272
1273            &Inst::ReturnCall { ref info } => {
1274                emit_return_call_common_sequence(sink, emit_info, state, info);
1275
1276                sink.add_call_site(&[]);
1277                sink.add_reloc(Reloc::RiscvCallPlt, &info.dest, 0);
1278                Inst::construct_auipc_and_jalr(None, writable_spilltmp_reg(), 0)
1279                    .into_iter()
1280                    .for_each(|i| i.emit_uncompressed(sink, emit_info, state, start_off));
1281            }
1282
1283            &Inst::ReturnCallInd { ref info } => {
1284                emit_return_call_common_sequence(sink, emit_info, state, &info);
1285
1286                Inst::Jalr {
1287                    rd: writable_zero_reg(),
1288                    base: info.dest,
1289                    offset: Imm12::ZERO,
1290                }
1291                .emit(sink, emit_info, state);
1292            }
1293            &Inst::Jal { label } => {
1294                sink.use_label_at_offset(*start_off, label, LabelUse::Jal20);
1295                sink.add_uncond_branch(*start_off, *start_off + 4, label);
1296                sink.put4(0b1101111);
1297            }
1298            &Inst::CondBr {
1299                taken,
1300                not_taken,
1301                kind,
1302            } => {
1303                match taken {
1304                    CondBrTarget::Label(label) => {
1305                        let code = kind.emit();
1306                        let code_inverse = kind.inverse().emit().to_le_bytes();
1307                        sink.use_label_at_offset(*start_off, label, LabelUse::B12);
1308                        sink.add_cond_branch(*start_off, *start_off + 4, label, &code_inverse);
1309                        sink.put4(code);
1310                    }
1311                    CondBrTarget::Fallthrough => panic!("Cannot fallthrough in taken target"),
1312                }
1313
1314                match not_taken {
1315                    CondBrTarget::Label(label) => {
1316                        Inst::gen_jump(label).emit(sink, emit_info, state)
1317                    }
1318                    CondBrTarget::Fallthrough => {}
1319                };
1320            }
1321
1322            &Inst::Mov { rd, rm, ty } => {
1323                debug_assert_eq!(rd.to_reg().class(), rm.class());
1324                if rd.to_reg() == rm {
1325                    return;
1326                }
1327
1328                match rm.class() {
1329                    RegClass::Int => Inst::AluRRImm12 {
1330                        alu_op: AluOPRRI::Addi,
1331                        rd: rd,
1332                        rs: rm,
1333                        imm12: Imm12::ZERO,
1334                    },
1335                    RegClass::Float => Inst::FpuRRR {
1336                        alu_op: FpuOPRRR::Fsgnj,
1337                        width: FpuOPWidth::try_from(ty).unwrap(),
1338                        frm: FRM::RNE,
1339                        rd: rd,
1340                        rs1: rm,
1341                        rs2: rm,
1342                    },
1343                    RegClass::Vector => Inst::VecAluRRImm5 {
1344                        op: VecAluOpRRImm5::VmvrV,
1345                        vd: rd,
1346                        vs2: rm,
1347                        // Imm 0 means copy 1 register.
1348                        imm: Imm5::maybe_from_i8(0).unwrap(),
1349                        mask: VecOpMasking::Disabled,
1350                        // Vstate for this instruction is ignored.
1351                        vstate: VState::from_type(ty),
1352                    },
1353                }
1354                .emit(sink, emit_info, state);
1355            }
1356
1357            &Inst::MovFromPReg { rd, rm } => {
1358                Inst::gen_move(rd, Reg::from(rm), I64).emit(sink, emit_info, state);
1359            }
1360
1361            &Inst::BrTable {
1362                index,
1363                tmp1,
1364                tmp2,
1365                ref targets,
1366            } => {
1367                let ext_index = writable_spilltmp_reg();
1368
1369                let label_compute_target = sink.get_label();
1370
1371                // The default target is passed in as the 0th element of `targets`
1372                // separate it here for clarity.
1373                let default_target = targets[0];
1374                let targets = &targets[1..];
1375
1376                // We are going to potentially emit a large amount of instructions, so ensure that we emit an island
1377                // now if we need one.
1378                //
1379                // The worse case PC calculations are 12 instructions. And each entry in the jump table is 2 instructions.
1380                // Check if we need to emit a jump table here to support that jump.
1381                let inst_count = 12 + (targets.len() * 2);
1382                let distance = (inst_count * Inst::UNCOMPRESSED_INSTRUCTION_SIZE as usize) as u32;
1383                if sink.island_needed(distance) {
1384                    let jump_around_label = sink.get_label();
1385                    Inst::gen_jump(jump_around_label).emit(sink, emit_info, state);
1386                    sink.emit_island(distance + 4, &mut state.ctrl_plane);
1387                    sink.bind_label(jump_around_label, &mut state.ctrl_plane);
1388                }
1389
1390                // We emit a bounds check on the index, if the index is larger than the number of
1391                // jump table entries, we jump to the default block.  Otherwise we compute a jump
1392                // offset by multiplying the index by 8 (the size of each entry) and then jump to
1393                // that offset. Each jump table entry is a regular auipc+jalr which we emit sequentially.
1394                //
1395                // Build the following sequence:
1396                //
1397                // extend_index:
1398                //     zext.w  ext_index, index
1399                // bounds_check:
1400                //     li      tmp, n_labels
1401                //     bltu    ext_index, tmp, compute_target
1402                // jump_to_default_block:
1403                //     auipc   pc, 0
1404                //     jalr    zero, pc, default_block
1405                // compute_target:
1406                //     auipc   pc, 0
1407                //     slli    tmp, ext_index, 3
1408                //     add     pc, pc, tmp
1409                //     jalr    zero, pc, 0x10
1410                // jump_table:
1411                //     ; This repeats for each entry in the jumptable
1412                //     auipc   pc, 0
1413                //     jalr    zero, pc, block_target
1414
1415                // Extend the index to 64 bits.
1416                //
1417                // This prevents us branching on the top 32 bits of the index, which
1418                // are undefined.
1419                Inst::Extend {
1420                    rd: ext_index,
1421                    rn: index,
1422                    signed: false,
1423                    from_bits: 32,
1424                    to_bits: 64,
1425                }
1426                .emit(sink, emit_info, state);
1427
1428                // Bounds check.
1429                //
1430                // Check if the index passed in is larger than the number of jumptable
1431                // entries that we have. If it is, we fallthrough to a jump into the
1432                // default block.
1433                Inst::load_constant_u32(tmp2, targets.len() as u64)
1434                    .iter()
1435                    .for_each(|i| i.emit(sink, emit_info, state));
1436                Inst::CondBr {
1437                    taken: CondBrTarget::Label(label_compute_target),
1438                    not_taken: CondBrTarget::Fallthrough,
1439                    kind: IntegerCompare {
1440                        kind: IntCC::UnsignedLessThan,
1441                        rs1: ext_index.to_reg(),
1442                        rs2: tmp2.to_reg(),
1443                    },
1444                }
1445                .emit(sink, emit_info, state);
1446
1447                sink.use_label_at_offset(sink.cur_offset(), default_target, LabelUse::PCRel32);
1448                Inst::construct_auipc_and_jalr(None, tmp2, 0)
1449                    .iter()
1450                    .for_each(|i| i.emit_uncompressed(sink, emit_info, state, start_off));
1451
1452                // Compute the jump table offset.
1453                // We need to emit a PC relative offset,
1454                sink.bind_label(label_compute_target, &mut state.ctrl_plane);
1455
1456                // Get the current PC.
1457                Inst::Auipc {
1458                    rd: tmp1,
1459                    imm: Imm20::ZERO,
1460                }
1461                .emit_uncompressed(sink, emit_info, state, start_off);
1462
1463                // These instructions must be emitted as uncompressed since we
1464                // are manually computing the offset from the PC.
1465
1466                // Multiply the index by 8, since that is the size in
1467                // bytes of each jump table entry
1468                Inst::AluRRImm12 {
1469                    alu_op: AluOPRRI::Slli,
1470                    rd: tmp2,
1471                    rs: ext_index.to_reg(),
1472                    imm12: Imm12::from_i16(3),
1473                }
1474                .emit_uncompressed(sink, emit_info, state, start_off);
1475
1476                // Calculate the base of the jump, PC + the offset from above.
1477                Inst::AluRRR {
1478                    alu_op: AluOPRRR::Add,
1479                    rd: tmp1,
1480                    rs1: tmp1.to_reg(),
1481                    rs2: tmp2.to_reg(),
1482                }
1483                .emit_uncompressed(sink, emit_info, state, start_off);
1484
1485                // Jump to the middle of the jump table.
1486                // We add a 16 byte offset here, since we used 4 instructions
1487                // since the AUIPC that was used to get the PC.
1488                Inst::Jalr {
1489                    rd: writable_zero_reg(),
1490                    base: tmp1.to_reg(),
1491                    offset: Imm12::from_i16((4 * Inst::UNCOMPRESSED_INSTRUCTION_SIZE) as i16),
1492                }
1493                .emit_uncompressed(sink, emit_info, state, start_off);
1494
1495                // Emit the jump table.
1496                //
1497                // Each entry is a auipc + jalr to the target block. We also start with a island
1498                // if necessary.
1499
1500                // Emit the jumps back to back
1501                for target in targets.iter() {
1502                    sink.use_label_at_offset(sink.cur_offset(), *target, LabelUse::PCRel32);
1503
1504                    Inst::construct_auipc_and_jalr(None, tmp2, 0)
1505                        .iter()
1506                        .for_each(|i| i.emit_uncompressed(sink, emit_info, state, start_off));
1507                }
1508
1509                // We've just emitted an island that is safe up to *here*.
1510                // Mark it as such so that we don't needlessly emit additional islands.
1511                *start_off = sink.cur_offset();
1512            }
1513
1514            &Inst::Atomic {
1515                op,
1516                rd,
1517                addr,
1518                src,
1519                amo,
1520            } => {
1521                // TODO: get flags from original CLIF atomic instruction
1522                let flags = MemFlags::new();
1523                if let Some(trap_code) = flags.trap_code() {
1524                    sink.add_trap(trap_code);
1525                }
1526                let x = op.op_code()
1527                    | reg_to_gpr_num(rd.to_reg()) << 7
1528                    | op.funct3() << 12
1529                    | reg_to_gpr_num(addr) << 15
1530                    | reg_to_gpr_num(src) << 20
1531                    | op.funct7(amo) << 25;
1532
1533                sink.put4(x);
1534            }
1535            &Inst::Fence { pred, succ } => {
1536                let x = 0b0001111
1537                    | 0b00000 << 7
1538                    | 0b000 << 12
1539                    | 0b00000 << 15
1540                    | (succ as u32) << 20
1541                    | (pred as u32) << 24;
1542
1543                sink.put4(x);
1544            }
1545            &Inst::Auipc { rd, imm } => {
1546                sink.put4(enc_auipc(rd, imm));
1547            }
1548
1549            &Inst::LoadAddr { rd, mem } => {
1550                let base = mem.get_base_register();
1551                let offset = mem.get_offset_with_state(state);
1552                let offset_imm12 = Imm12::maybe_from_i64(offset);
1553
1554                match (mem, base, offset_imm12) {
1555                    (_, Some(rs), Some(imm12)) => {
1556                        Inst::AluRRImm12 {
1557                            alu_op: AluOPRRI::Addi,
1558                            rd,
1559                            rs,
1560                            imm12,
1561                        }
1562                        .emit(sink, emit_info, state);
1563                    }
1564                    (_, Some(rs), None) => {
1565                        let mut insts = Inst::load_constant_u64(rd, offset as u64);
1566                        insts.push(Inst::AluRRR {
1567                            alu_op: AluOPRRR::Add,
1568                            rd,
1569                            rs1: rd.to_reg(),
1570                            rs2: rs,
1571                        });
1572                        insts
1573                            .into_iter()
1574                            .for_each(|inst| inst.emit(sink, emit_info, state));
1575                    }
1576                    (AMode::Const(addr), None, _) => {
1577                        // Get an address label for the constant and recurse.
1578                        let label = sink.get_label_for_constant(addr);
1579                        Inst::LoadAddr {
1580                            rd,
1581                            mem: AMode::Label(label),
1582                        }
1583                        .emit(sink, emit_info, state);
1584                    }
1585                    (AMode::Label(label), None, _) => {
1586                        // Get the current PC.
1587                        sink.use_label_at_offset(sink.cur_offset(), label, LabelUse::PCRelHi20);
1588                        let inst = Inst::Auipc {
1589                            rd,
1590                            imm: Imm20::ZERO,
1591                        };
1592                        inst.emit_uncompressed(sink, emit_info, state, start_off);
1593
1594                        // Emit an add to the address with a relocation.
1595                        // This later gets patched up with the correct offset.
1596                        sink.use_label_at_offset(sink.cur_offset(), label, LabelUse::PCRelLo12I);
1597                        Inst::AluRRImm12 {
1598                            alu_op: AluOPRRI::Addi,
1599                            rd,
1600                            rs: rd.to_reg(),
1601                            imm12: Imm12::ZERO,
1602                        }
1603                        .emit_uncompressed(sink, emit_info, state, start_off);
1604                    }
1605                    (amode, _, _) => {
1606                        unimplemented!("LoadAddr: {:?}", amode);
1607                    }
1608                }
1609            }
1610
1611            &Inst::Select {
1612                ref dst,
1613                condition,
1614                ref x,
1615                ref y,
1616            } => {
1617                // The general form for this select is the following:
1618                //
1619                //     mv rd, x
1620                //     b{cond} rcond, label_end
1621                //     mv rd, y
1622                // label_end:
1623                //     ... etc
1624                //
1625                // This is built on the assumption that moves are cheap, but branches and jumps
1626                // are not. So with this format we always avoid one jump instruction at the expense
1627                // of an unconditional move.
1628                //
1629                // We also perform another optimization here. If the destination register is the same
1630                // as one of the input registers, we can avoid emitting the first unconditional move
1631                // and emit just the branch and the second move.
1632                //
1633                // To make sure that this happens as often as possible, we also try to invert the
1634                // condition, so that if either of the input registers are the same as the destination
1635                // we avoid that move.
1636
1637                let label_end = sink.get_label();
1638
1639                let xregs = x.regs();
1640                let yregs = y.regs();
1641                let dstregs: Vec<Reg> = dst.regs().into_iter().map(|r| r.to_reg()).collect();
1642                let condregs = condition.regs();
1643
1644                // We are going to write to the destination register before evaluating
1645                // the condition, so we need to make sure that the destination register
1646                // is not one of the condition registers.
1647                //
1648                // This should never happen, since hopefully the regalloc constraints
1649                // for this register are set up correctly.
1650                debug_assert_ne!(dstregs, condregs);
1651
1652                // Check if we can invert the condition and avoid moving the y registers into
1653                // the destination. This allows us to only emit the branch and one of the moves.
1654                let (uncond_move, cond_move, condition) = if yregs == dstregs {
1655                    (yregs, xregs, condition.inverse())
1656                } else {
1657                    (xregs, yregs, condition)
1658                };
1659
1660                // Unconditionally move one of the values to the destination register.
1661                //
1662                // These moves may not end up being emitted if the source and
1663                // destination registers are the same. That logic is built into
1664                // the emit function for `Inst::Mov`.
1665                for i in gen_moves(dst.regs(), uncond_move) {
1666                    i.emit(sink, emit_info, state);
1667                }
1668
1669                // If the condition passes we skip over the conditional move
1670                Inst::CondBr {
1671                    taken: CondBrTarget::Label(label_end),
1672                    not_taken: CondBrTarget::Fallthrough,
1673                    kind: condition,
1674                }
1675                .emit(sink, emit_info, state);
1676
1677                // Move the conditional value to the destination register.
1678                for i in gen_moves(dst.regs(), cond_move) {
1679                    i.emit(sink, emit_info, state);
1680                }
1681
1682                sink.bind_label(label_end, &mut state.ctrl_plane);
1683            }
1684            &Inst::Jalr { rd, base, offset } => {
1685                sink.put4(enc_jalr(rd, base, offset));
1686            }
1687            &Inst::EBreak => {
1688                sink.put4(0x00100073);
1689            }
1690            &Inst::AtomicCas {
1691                offset,
1692                t0,
1693                dst,
1694                e,
1695                addr,
1696                v,
1697                ty,
1698            } => {
1699                //     # addr holds address of memory location
1700                //     # e holds expected value
1701                //     # v holds desired value
1702                //     # dst holds return value
1703                // cas:
1704                //     lr.w dst, (addr)       # Load original value.
1705                //     bne dst, e, fail       # Doesn’t match, so fail.
1706                //     sc.w t0, v, (addr)     # Try to update.
1707                //     bnez t0 , cas          # if store not ok,retry.
1708                // fail:
1709                let fail_label = sink.get_label();
1710                let cas_lebel = sink.get_label();
1711                sink.bind_label(cas_lebel, &mut state.ctrl_plane);
1712                Inst::Atomic {
1713                    op: AtomicOP::load_op(ty),
1714                    rd: dst,
1715                    addr,
1716                    src: zero_reg(),
1717                    amo: AMO::SeqCst,
1718                }
1719                .emit(sink, emit_info, state);
1720                if ty.bits() < 32 {
1721                    AtomicOP::extract(dst, offset, dst.to_reg(), ty)
1722                        .iter()
1723                        .for_each(|i| i.emit(sink, emit_info, state));
1724                } else if ty.bits() == 32 {
1725                    Inst::Extend {
1726                        rd: dst,
1727                        rn: dst.to_reg(),
1728                        signed: false,
1729                        from_bits: 32,
1730                        to_bits: 64,
1731                    }
1732                    .emit(sink, emit_info, state);
1733                }
1734                Inst::CondBr {
1735                    taken: CondBrTarget::Label(fail_label),
1736                    not_taken: CondBrTarget::Fallthrough,
1737                    kind: IntegerCompare {
1738                        kind: IntCC::NotEqual,
1739                        rs1: e,
1740                        rs2: dst.to_reg(),
1741                    },
1742                }
1743                .emit(sink, emit_info, state);
1744                let store_value = if ty.bits() < 32 {
1745                    // reload value to t0.
1746                    Inst::Atomic {
1747                        op: AtomicOP::load_op(ty),
1748                        rd: t0,
1749                        addr,
1750                        src: zero_reg(),
1751                        amo: AMO::SeqCst,
1752                    }
1753                    .emit(sink, emit_info, state);
1754                    // set reset part.
1755                    AtomicOP::merge(t0, writable_spilltmp_reg(), offset, v, ty)
1756                        .iter()
1757                        .for_each(|i| i.emit(sink, emit_info, state));
1758                    t0.to_reg()
1759                } else {
1760                    v
1761                };
1762                Inst::Atomic {
1763                    op: AtomicOP::store_op(ty),
1764                    rd: t0,
1765                    addr,
1766                    src: store_value,
1767                    amo: AMO::SeqCst,
1768                }
1769                .emit(sink, emit_info, state);
1770                // check is our value stored.
1771                Inst::CondBr {
1772                    taken: CondBrTarget::Label(cas_lebel),
1773                    not_taken: CondBrTarget::Fallthrough,
1774                    kind: IntegerCompare {
1775                        kind: IntCC::NotEqual,
1776                        rs1: t0.to_reg(),
1777                        rs2: zero_reg(),
1778                    },
1779                }
1780                .emit(sink, emit_info, state);
1781                sink.bind_label(fail_label, &mut state.ctrl_plane);
1782            }
1783            &Inst::AtomicRmwLoop {
1784                offset,
1785                op,
1786                dst,
1787                ty,
1788                p,
1789                x,
1790                t0,
1791            } => {
1792                let retry = sink.get_label();
1793                sink.bind_label(retry, &mut state.ctrl_plane);
1794                // load old value.
1795                Inst::Atomic {
1796                    op: AtomicOP::load_op(ty),
1797                    rd: dst,
1798                    addr: p,
1799                    src: zero_reg(),
1800                    amo: AMO::SeqCst,
1801                }
1802                .emit(sink, emit_info, state);
1803                //
1804
1805                let store_value: Reg = match op {
1806                    crate::ir::AtomicRmwOp::Add
1807                    | crate::ir::AtomicRmwOp::Sub
1808                    | crate::ir::AtomicRmwOp::And
1809                    | crate::ir::AtomicRmwOp::Or
1810                    | crate::ir::AtomicRmwOp::Xor => {
1811                        AtomicOP::extract(dst, offset, dst.to_reg(), ty)
1812                            .iter()
1813                            .for_each(|i| i.emit(sink, emit_info, state));
1814                        Inst::AluRRR {
1815                            alu_op: match op {
1816                                crate::ir::AtomicRmwOp::Add => AluOPRRR::Add,
1817                                crate::ir::AtomicRmwOp::Sub => AluOPRRR::Sub,
1818                                crate::ir::AtomicRmwOp::And => AluOPRRR::And,
1819                                crate::ir::AtomicRmwOp::Or => AluOPRRR::Or,
1820                                crate::ir::AtomicRmwOp::Xor => AluOPRRR::Xor,
1821                                _ => unreachable!(),
1822                            },
1823                            rd: t0,
1824                            rs1: dst.to_reg(),
1825                            rs2: x,
1826                        }
1827                        .emit(sink, emit_info, state);
1828                        Inst::Atomic {
1829                            op: AtomicOP::load_op(ty),
1830                            rd: writable_spilltmp_reg2(),
1831                            addr: p,
1832                            src: zero_reg(),
1833                            amo: AMO::SeqCst,
1834                        }
1835                        .emit(sink, emit_info, state);
1836                        AtomicOP::merge(
1837                            writable_spilltmp_reg2(),
1838                            writable_spilltmp_reg(),
1839                            offset,
1840                            t0.to_reg(),
1841                            ty,
1842                        )
1843                        .iter()
1844                        .for_each(|i| i.emit(sink, emit_info, state));
1845                        spilltmp_reg2()
1846                    }
1847                    crate::ir::AtomicRmwOp::Nand => {
1848                        if ty.bits() < 32 {
1849                            AtomicOP::extract(dst, offset, dst.to_reg(), ty)
1850                                .iter()
1851                                .for_each(|i| i.emit(sink, emit_info, state));
1852                        }
1853                        Inst::AluRRR {
1854                            alu_op: AluOPRRR::And,
1855                            rd: t0,
1856                            rs1: x,
1857                            rs2: dst.to_reg(),
1858                        }
1859                        .emit(sink, emit_info, state);
1860                        Inst::construct_bit_not(t0, t0.to_reg()).emit(sink, emit_info, state);
1861                        if ty.bits() < 32 {
1862                            Inst::Atomic {
1863                                op: AtomicOP::load_op(ty),
1864                                rd: writable_spilltmp_reg2(),
1865                                addr: p,
1866                                src: zero_reg(),
1867                                amo: AMO::SeqCst,
1868                            }
1869                            .emit(sink, emit_info, state);
1870                            AtomicOP::merge(
1871                                writable_spilltmp_reg2(),
1872                                writable_spilltmp_reg(),
1873                                offset,
1874                                t0.to_reg(),
1875                                ty,
1876                            )
1877                            .iter()
1878                            .for_each(|i| i.emit(sink, emit_info, state));
1879                            spilltmp_reg2()
1880                        } else {
1881                            t0.to_reg()
1882                        }
1883                    }
1884
1885                    crate::ir::AtomicRmwOp::Umin
1886                    | crate::ir::AtomicRmwOp::Umax
1887                    | crate::ir::AtomicRmwOp::Smin
1888                    | crate::ir::AtomicRmwOp::Smax => {
1889                        let label_select_dst = sink.get_label();
1890                        let label_select_done = sink.get_label();
1891                        if op == crate::ir::AtomicRmwOp::Umin || op == crate::ir::AtomicRmwOp::Umax
1892                        {
1893                            AtomicOP::extract(dst, offset, dst.to_reg(), ty)
1894                        } else {
1895                            AtomicOP::extract_sext(dst, offset, dst.to_reg(), ty)
1896                        }
1897                        .iter()
1898                        .for_each(|i| i.emit(sink, emit_info, state));
1899
1900                        Inst::CondBr {
1901                            taken: CondBrTarget::Label(label_select_dst),
1902                            not_taken: CondBrTarget::Fallthrough,
1903                            kind: IntegerCompare {
1904                                kind: match op {
1905                                    crate::ir::AtomicRmwOp::Umin => IntCC::UnsignedLessThan,
1906                                    crate::ir::AtomicRmwOp::Umax => IntCC::UnsignedGreaterThan,
1907                                    crate::ir::AtomicRmwOp::Smin => IntCC::SignedLessThan,
1908                                    crate::ir::AtomicRmwOp::Smax => IntCC::SignedGreaterThan,
1909                                    _ => unreachable!(),
1910                                },
1911                                rs1: dst.to_reg(),
1912                                rs2: x,
1913                            },
1914                        }
1915                        .emit(sink, emit_info, state);
1916                        // here we select x.
1917                        Inst::gen_move(t0, x, I64).emit(sink, emit_info, state);
1918                        Inst::gen_jump(label_select_done).emit(sink, emit_info, state);
1919                        sink.bind_label(label_select_dst, &mut state.ctrl_plane);
1920                        Inst::gen_move(t0, dst.to_reg(), I64).emit(sink, emit_info, state);
1921                        sink.bind_label(label_select_done, &mut state.ctrl_plane);
1922                        Inst::Atomic {
1923                            op: AtomicOP::load_op(ty),
1924                            rd: writable_spilltmp_reg2(),
1925                            addr: p,
1926                            src: zero_reg(),
1927                            amo: AMO::SeqCst,
1928                        }
1929                        .emit(sink, emit_info, state);
1930                        AtomicOP::merge(
1931                            writable_spilltmp_reg2(),
1932                            writable_spilltmp_reg(),
1933                            offset,
1934                            t0.to_reg(),
1935                            ty,
1936                        )
1937                        .iter()
1938                        .for_each(|i| i.emit(sink, emit_info, state));
1939                        spilltmp_reg2()
1940                    }
1941                    crate::ir::AtomicRmwOp::Xchg => {
1942                        AtomicOP::extract(dst, offset, dst.to_reg(), ty)
1943                            .iter()
1944                            .for_each(|i| i.emit(sink, emit_info, state));
1945                        Inst::Atomic {
1946                            op: AtomicOP::load_op(ty),
1947                            rd: writable_spilltmp_reg2(),
1948                            addr: p,
1949                            src: zero_reg(),
1950                            amo: AMO::SeqCst,
1951                        }
1952                        .emit(sink, emit_info, state);
1953                        AtomicOP::merge(
1954                            writable_spilltmp_reg2(),
1955                            writable_spilltmp_reg(),
1956                            offset,
1957                            x,
1958                            ty,
1959                        )
1960                        .iter()
1961                        .for_each(|i| i.emit(sink, emit_info, state));
1962                        spilltmp_reg2()
1963                    }
1964                };
1965
1966                Inst::Atomic {
1967                    op: AtomicOP::store_op(ty),
1968                    rd: t0,
1969                    addr: p,
1970                    src: store_value,
1971                    amo: AMO::SeqCst,
1972                }
1973                .emit(sink, emit_info, state);
1974
1975                // if store is not ok,retry.
1976                Inst::CondBr {
1977                    taken: CondBrTarget::Label(retry),
1978                    not_taken: CondBrTarget::Fallthrough,
1979                    kind: IntegerCompare {
1980                        kind: IntCC::NotEqual,
1981                        rs1: t0.to_reg(),
1982                        rs2: zero_reg(),
1983                    },
1984                }
1985                .emit(sink, emit_info, state);
1986            }
1987
1988            &Inst::LoadExtName {
1989                rd,
1990                ref name,
1991                offset,
1992            } => {
1993                if emit_info.shared_flag.is_pic() {
1994                    // Load a PC-relative address into a register.
1995                    // RISC-V does this slightly differently from other arches. We emit a relocation
1996                    // with a label, instead of the symbol itself.
1997                    //
1998                    // See: https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-elf.adoc#pc-relative-symbol-addresses
1999                    //
2000                    // Emit the following code:
2001                    // label:
2002                    //   auipc rd, 0              # R_RISCV_GOT_HI20 (symbol_name)
2003                    //   ld    rd, rd, 0          # R_RISCV_PCREL_LO12_I (label)
2004
2005                    // Create the label that is going to be published to the final binary object.
2006                    let auipc_label = sink.get_label();
2007                    sink.bind_label(auipc_label, &mut state.ctrl_plane);
2008
2009                    // Get the current PC.
2010                    sink.add_reloc(Reloc::RiscvGotHi20, &**name, 0);
2011                    Inst::Auipc {
2012                        rd: rd,
2013                        imm: Imm20::from_i32(0),
2014                    }
2015                    .emit_uncompressed(sink, emit_info, state, start_off);
2016
2017                    // The `ld` here, points to the `auipc` label instead of directly to the symbol.
2018                    sink.add_reloc(Reloc::RiscvPCRelLo12I, &auipc_label, 0);
2019                    Inst::Load {
2020                        rd,
2021                        op: LoadOP::Ld,
2022                        flags: MemFlags::trusted(),
2023                        from: AMode::RegOffset(rd.to_reg(), 0),
2024                    }
2025                    .emit_uncompressed(sink, emit_info, state, start_off);
2026                } else {
2027                    // In the non PIC sequence we relocate the absolute address into
2028                    // a prealocatted space, load it into a register and jump over it.
2029                    //
2030                    // Emit the following code:
2031                    //   ld rd, label_data
2032                    //   j label_end
2033                    // label_data:
2034                    //   <8 byte space>           # ABS8
2035                    // label_end:
2036
2037                    let label_data = sink.get_label();
2038                    let label_end = sink.get_label();
2039
2040                    // Load the value from a label
2041                    Inst::Load {
2042                        rd,
2043                        op: LoadOP::Ld,
2044                        flags: MemFlags::trusted(),
2045                        from: AMode::Label(label_data),
2046                    }
2047                    .emit(sink, emit_info, state);
2048
2049                    // Jump over the data
2050                    Inst::gen_jump(label_end).emit(sink, emit_info, state);
2051
2052                    sink.bind_label(label_data, &mut state.ctrl_plane);
2053                    sink.add_reloc(Reloc::Abs8, name.as_ref(), offset);
2054                    sink.put8(0);
2055
2056                    sink.bind_label(label_end, &mut state.ctrl_plane);
2057                }
2058            }
2059
2060            &Inst::ElfTlsGetAddr { rd, ref name } => {
2061                // RISC-V's TLS GD model is slightly different from other arches.
2062                //
2063                // We have a relocation (R_RISCV_TLS_GD_HI20) that loads the high 20 bits
2064                // of the address relative to the GOT entry. This relocation points to
2065                // the symbol as usual.
2066                //
2067                // However when loading the bottom 12bits of the address, we need to
2068                // use a label that points to the previous AUIPC instruction.
2069                //
2070                // label:
2071                //    auipc a0,0                    # R_RISCV_TLS_GD_HI20 (symbol)
2072                //    addi  a0,a0,0                 # R_RISCV_PCREL_LO12_I (label)
2073                //
2074                // https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-elf.adoc#global-dynamic
2075
2076                // Create the label that is going to be published to the final binary object.
2077                let auipc_label = sink.get_label();
2078                sink.bind_label(auipc_label, &mut state.ctrl_plane);
2079
2080                // Get the current PC.
2081                sink.add_reloc(Reloc::RiscvTlsGdHi20, &**name, 0);
2082                Inst::Auipc {
2083                    rd: rd,
2084                    imm: Imm20::from_i32(0),
2085                }
2086                .emit_uncompressed(sink, emit_info, state, start_off);
2087
2088                // The `addi` here, points to the `auipc` label instead of directly to the symbol.
2089                sink.add_reloc(Reloc::RiscvPCRelLo12I, &auipc_label, 0);
2090                Inst::AluRRImm12 {
2091                    alu_op: AluOPRRI::Addi,
2092                    rd: rd,
2093                    rs: rd.to_reg(),
2094                    imm12: Imm12::from_i16(0),
2095                }
2096                .emit_uncompressed(sink, emit_info, state, start_off);
2097
2098                Inst::Call {
2099                    info: Box::new(CallInfo::empty(
2100                        ExternalName::LibCall(LibCall::ElfTlsGetAddr),
2101                        CallConv::SystemV,
2102                    )),
2103                }
2104                .emit_uncompressed(sink, emit_info, state, start_off);
2105            }
2106
2107            &Inst::TrapIf {
2108                rs1,
2109                rs2,
2110                cc,
2111                trap_code,
2112            } => {
2113                let label_end = sink.get_label();
2114                let cond = IntegerCompare { kind: cc, rs1, rs2 };
2115
2116                // Jump over the trap if we the condition is false.
2117                Inst::CondBr {
2118                    taken: CondBrTarget::Label(label_end),
2119                    not_taken: CondBrTarget::Fallthrough,
2120                    kind: cond.inverse(),
2121                }
2122                .emit(sink, emit_info, state);
2123                Inst::Udf { trap_code }.emit(sink, emit_info, state);
2124
2125                sink.bind_label(label_end, &mut state.ctrl_plane);
2126            }
2127            &Inst::Udf { trap_code } => {
2128                sink.add_trap(trap_code);
2129                sink.put_data(Inst::TRAP_OPCODE);
2130            }
2131            &Inst::AtomicLoad { rd, ty, p } => {
2132                // emit the fence.
2133                Inst::Fence {
2134                    pred: Inst::FENCE_REQ_R | Inst::FENCE_REQ_W,
2135                    succ: Inst::FENCE_REQ_R | Inst::FENCE_REQ_W,
2136                }
2137                .emit(sink, emit_info, state);
2138                // load.
2139                Inst::Load {
2140                    rd: rd,
2141                    op: LoadOP::from_type(ty),
2142                    flags: MemFlags::new(),
2143                    from: AMode::RegOffset(p, 0),
2144                }
2145                .emit(sink, emit_info, state);
2146                Inst::Fence {
2147                    pred: Inst::FENCE_REQ_R,
2148                    succ: Inst::FENCE_REQ_R | Inst::FENCE_REQ_W,
2149                }
2150                .emit(sink, emit_info, state);
2151            }
2152            &Inst::AtomicStore { src, ty, p } => {
2153                Inst::Fence {
2154                    pred: Inst::FENCE_REQ_R | Inst::FENCE_REQ_W,
2155                    succ: Inst::FENCE_REQ_W,
2156                }
2157                .emit(sink, emit_info, state);
2158                Inst::Store {
2159                    to: AMode::RegOffset(p, 0),
2160                    op: StoreOP::from_type(ty),
2161                    flags: MemFlags::new(),
2162                    src,
2163                }
2164                .emit(sink, emit_info, state);
2165            }
2166
2167            &Inst::Popcnt {
2168                sum,
2169                tmp,
2170                step,
2171                rs,
2172                ty,
2173            } => {
2174                // load 0 to sum , init.
2175                Inst::gen_move(sum, zero_reg(), I64).emit(sink, emit_info, state);
2176                // load
2177                Inst::load_imm12(step, Imm12::from_i16(ty.bits() as i16))
2178                    .emit(sink, emit_info, state);
2179                //
2180                Inst::load_imm12(tmp, Imm12::ONE).emit(sink, emit_info, state);
2181                Inst::AluRRImm12 {
2182                    alu_op: AluOPRRI::Slli,
2183                    rd: tmp,
2184                    rs: tmp.to_reg(),
2185                    imm12: Imm12::from_i16((ty.bits() - 1) as i16),
2186                }
2187                .emit(sink, emit_info, state);
2188                let label_done = sink.get_label();
2189                let label_loop = sink.get_label();
2190                sink.bind_label(label_loop, &mut state.ctrl_plane);
2191                Inst::CondBr {
2192                    taken: CondBrTarget::Label(label_done),
2193                    not_taken: CondBrTarget::Fallthrough,
2194                    kind: IntegerCompare {
2195                        kind: IntCC::SignedLessThanOrEqual,
2196                        rs1: step.to_reg(),
2197                        rs2: zero_reg(),
2198                    },
2199                }
2200                .emit(sink, emit_info, state);
2201                // test and add sum.
2202                {
2203                    Inst::AluRRR {
2204                        alu_op: AluOPRRR::And,
2205                        rd: writable_spilltmp_reg2(),
2206                        rs1: tmp.to_reg(),
2207                        rs2: rs,
2208                    }
2209                    .emit(sink, emit_info, state);
2210                    let label_over = sink.get_label();
2211                    Inst::CondBr {
2212                        taken: CondBrTarget::Label(label_over),
2213                        not_taken: CondBrTarget::Fallthrough,
2214                        kind: IntegerCompare {
2215                            kind: IntCC::Equal,
2216                            rs1: zero_reg(),
2217                            rs2: spilltmp_reg2(),
2218                        },
2219                    }
2220                    .emit(sink, emit_info, state);
2221                    Inst::AluRRImm12 {
2222                        alu_op: AluOPRRI::Addi,
2223                        rd: sum,
2224                        rs: sum.to_reg(),
2225                        imm12: Imm12::ONE,
2226                    }
2227                    .emit(sink, emit_info, state);
2228                    sink.bind_label(label_over, &mut state.ctrl_plane);
2229                }
2230                // set step and tmp.
2231                {
2232                    Inst::AluRRImm12 {
2233                        alu_op: AluOPRRI::Addi,
2234                        rd: step,
2235                        rs: step.to_reg(),
2236                        imm12: Imm12::from_i16(-1),
2237                    }
2238                    .emit(sink, emit_info, state);
2239                    Inst::AluRRImm12 {
2240                        alu_op: AluOPRRI::Srli,
2241                        rd: tmp,
2242                        rs: tmp.to_reg(),
2243                        imm12: Imm12::ONE,
2244                    }
2245                    .emit(sink, emit_info, state);
2246                    Inst::gen_jump(label_loop).emit(sink, emit_info, state);
2247                }
2248                sink.bind_label(label_done, &mut state.ctrl_plane);
2249            }
2250            &Inst::Cltz {
2251                sum,
2252                tmp,
2253                step,
2254                rs,
2255                leading,
2256                ty,
2257            } => {
2258                // load 0 to sum , init.
2259                Inst::gen_move(sum, zero_reg(), I64).emit(sink, emit_info, state);
2260                // load
2261                Inst::load_imm12(step, Imm12::from_i16(ty.bits() as i16))
2262                    .emit(sink, emit_info, state);
2263                //
2264                Inst::load_imm12(tmp, Imm12::ONE).emit(sink, emit_info, state);
2265                if leading {
2266                    Inst::AluRRImm12 {
2267                        alu_op: AluOPRRI::Slli,
2268                        rd: tmp,
2269                        rs: tmp.to_reg(),
2270                        imm12: Imm12::from_i16((ty.bits() - 1) as i16),
2271                    }
2272                    .emit(sink, emit_info, state);
2273                }
2274                let label_done = sink.get_label();
2275                let label_loop = sink.get_label();
2276                sink.bind_label(label_loop, &mut state.ctrl_plane);
2277                Inst::CondBr {
2278                    taken: CondBrTarget::Label(label_done),
2279                    not_taken: CondBrTarget::Fallthrough,
2280                    kind: IntegerCompare {
2281                        kind: IntCC::SignedLessThanOrEqual,
2282                        rs1: step.to_reg(),
2283                        rs2: zero_reg(),
2284                    },
2285                }
2286                .emit(sink, emit_info, state);
2287                // test and add sum.
2288                {
2289                    Inst::AluRRR {
2290                        alu_op: AluOPRRR::And,
2291                        rd: writable_spilltmp_reg2(),
2292                        rs1: tmp.to_reg(),
2293                        rs2: rs,
2294                    }
2295                    .emit(sink, emit_info, state);
2296                    Inst::CondBr {
2297                        taken: CondBrTarget::Label(label_done),
2298                        not_taken: CondBrTarget::Fallthrough,
2299                        kind: IntegerCompare {
2300                            kind: IntCC::NotEqual,
2301                            rs1: zero_reg(),
2302                            rs2: spilltmp_reg2(),
2303                        },
2304                    }
2305                    .emit(sink, emit_info, state);
2306                    Inst::AluRRImm12 {
2307                        alu_op: AluOPRRI::Addi,
2308                        rd: sum,
2309                        rs: sum.to_reg(),
2310                        imm12: Imm12::ONE,
2311                    }
2312                    .emit(sink, emit_info, state);
2313                }
2314                // set step and tmp.
2315                {
2316                    Inst::AluRRImm12 {
2317                        alu_op: AluOPRRI::Addi,
2318                        rd: step,
2319                        rs: step.to_reg(),
2320                        imm12: Imm12::from_i16(-1),
2321                    }
2322                    .emit(sink, emit_info, state);
2323                    Inst::AluRRImm12 {
2324                        alu_op: if leading {
2325                            AluOPRRI::Srli
2326                        } else {
2327                            AluOPRRI::Slli
2328                        },
2329                        rd: tmp,
2330                        rs: tmp.to_reg(),
2331                        imm12: Imm12::ONE,
2332                    }
2333                    .emit(sink, emit_info, state);
2334                    Inst::gen_jump(label_loop).emit(sink, emit_info, state);
2335                }
2336                sink.bind_label(label_done, &mut state.ctrl_plane);
2337            }
2338            &Inst::Brev8 {
2339                rs,
2340                ty,
2341                step,
2342                tmp,
2343                tmp2,
2344                rd,
2345            } => {
2346                Inst::gen_move(rd, zero_reg(), I64).emit(sink, emit_info, state);
2347                Inst::load_imm12(step, Imm12::from_i16(ty.bits() as i16))
2348                    .emit(sink, emit_info, state);
2349                //
2350                Inst::load_imm12(tmp, Imm12::ONE).emit(sink, emit_info, state);
2351                Inst::AluRRImm12 {
2352                    alu_op: AluOPRRI::Slli,
2353                    rd: tmp,
2354                    rs: tmp.to_reg(),
2355                    imm12: Imm12::from_i16((ty.bits() - 1) as i16),
2356                }
2357                .emit(sink, emit_info, state);
2358                Inst::load_imm12(tmp2, Imm12::ONE).emit(sink, emit_info, state);
2359                Inst::AluRRImm12 {
2360                    alu_op: AluOPRRI::Slli,
2361                    rd: tmp2,
2362                    rs: tmp2.to_reg(),
2363                    imm12: Imm12::from_i16((ty.bits() - 8) as i16),
2364                }
2365                .emit(sink, emit_info, state);
2366
2367                let label_done = sink.get_label();
2368                let label_loop = sink.get_label();
2369                sink.bind_label(label_loop, &mut state.ctrl_plane);
2370                Inst::CondBr {
2371                    taken: CondBrTarget::Label(label_done),
2372                    not_taken: CondBrTarget::Fallthrough,
2373                    kind: IntegerCompare {
2374                        kind: IntCC::SignedLessThanOrEqual,
2375                        rs1: step.to_reg(),
2376                        rs2: zero_reg(),
2377                    },
2378                }
2379                .emit(sink, emit_info, state);
2380                // test and set bit.
2381                {
2382                    Inst::AluRRR {
2383                        alu_op: AluOPRRR::And,
2384                        rd: writable_spilltmp_reg2(),
2385                        rs1: tmp.to_reg(),
2386                        rs2: rs,
2387                    }
2388                    .emit(sink, emit_info, state);
2389                    let label_over = sink.get_label();
2390                    Inst::CondBr {
2391                        taken: CondBrTarget::Label(label_over),
2392                        not_taken: CondBrTarget::Fallthrough,
2393                        kind: IntegerCompare {
2394                            kind: IntCC::Equal,
2395                            rs1: zero_reg(),
2396                            rs2: spilltmp_reg2(),
2397                        },
2398                    }
2399                    .emit(sink, emit_info, state);
2400                    Inst::AluRRR {
2401                        alu_op: AluOPRRR::Or,
2402                        rd: rd,
2403                        rs1: rd.to_reg(),
2404                        rs2: tmp2.to_reg(),
2405                    }
2406                    .emit(sink, emit_info, state);
2407                    sink.bind_label(label_over, &mut state.ctrl_plane);
2408                }
2409                // set step and tmp.
2410                {
2411                    Inst::AluRRImm12 {
2412                        alu_op: AluOPRRI::Addi,
2413                        rd: step,
2414                        rs: step.to_reg(),
2415                        imm12: Imm12::from_i16(-1),
2416                    }
2417                    .emit(sink, emit_info, state);
2418                    Inst::AluRRImm12 {
2419                        alu_op: AluOPRRI::Srli,
2420                        rd: tmp,
2421                        rs: tmp.to_reg(),
2422                        imm12: Imm12::ONE,
2423                    }
2424                    .emit(sink, emit_info, state);
2425                    {
2426                        // reset tmp2
2427                        // if (step %=8 == 0) then tmp2 = tmp2 >> 15
2428                        // if (step %=8 != 0) then tmp2 = tmp2 << 1
2429                        let label_over = sink.get_label();
2430                        let label_sll_1 = sink.get_label();
2431                        Inst::load_imm12(writable_spilltmp_reg2(), Imm12::from_i16(8))
2432                            .emit(sink, emit_info, state);
2433                        Inst::AluRRR {
2434                            alu_op: AluOPRRR::Rem,
2435                            rd: writable_spilltmp_reg2(),
2436                            rs1: step.to_reg(),
2437                            rs2: spilltmp_reg2(),
2438                        }
2439                        .emit(sink, emit_info, state);
2440                        Inst::CondBr {
2441                            taken: CondBrTarget::Label(label_sll_1),
2442                            not_taken: CondBrTarget::Fallthrough,
2443                            kind: IntegerCompare {
2444                                kind: IntCC::NotEqual,
2445                                rs1: spilltmp_reg2(),
2446                                rs2: zero_reg(),
2447                            },
2448                        }
2449                        .emit(sink, emit_info, state);
2450                        Inst::AluRRImm12 {
2451                            alu_op: AluOPRRI::Srli,
2452                            rd: tmp2,
2453                            rs: tmp2.to_reg(),
2454                            imm12: Imm12::from_i16(15),
2455                        }
2456                        .emit(sink, emit_info, state);
2457                        Inst::gen_jump(label_over).emit(sink, emit_info, state);
2458                        sink.bind_label(label_sll_1, &mut state.ctrl_plane);
2459                        Inst::AluRRImm12 {
2460                            alu_op: AluOPRRI::Slli,
2461                            rd: tmp2,
2462                            rs: tmp2.to_reg(),
2463                            imm12: Imm12::ONE,
2464                        }
2465                        .emit(sink, emit_info, state);
2466                        sink.bind_label(label_over, &mut state.ctrl_plane);
2467                    }
2468                    Inst::gen_jump(label_loop).emit(sink, emit_info, state);
2469                }
2470                sink.bind_label(label_done, &mut state.ctrl_plane);
2471            }
2472            &Inst::StackProbeLoop {
2473                guard_size,
2474                probe_count,
2475                tmp: guard_size_tmp,
2476            } => {
2477                let step = writable_spilltmp_reg();
2478                Inst::load_constant_u64(step, (guard_size as u64) * (probe_count as u64))
2479                    .iter()
2480                    .for_each(|i| i.emit(sink, emit_info, state));
2481                Inst::load_constant_u64(guard_size_tmp, guard_size as u64)
2482                    .iter()
2483                    .for_each(|i| i.emit(sink, emit_info, state));
2484
2485                let loop_start = sink.get_label();
2486                let label_done = sink.get_label();
2487                sink.bind_label(loop_start, &mut state.ctrl_plane);
2488                Inst::CondBr {
2489                    taken: CondBrTarget::Label(label_done),
2490                    not_taken: CondBrTarget::Fallthrough,
2491                    kind: IntegerCompare {
2492                        kind: IntCC::UnsignedLessThanOrEqual,
2493                        rs1: step.to_reg(),
2494                        rs2: guard_size_tmp.to_reg(),
2495                    },
2496                }
2497                .emit(sink, emit_info, state);
2498                // compute address.
2499                Inst::AluRRR {
2500                    alu_op: AluOPRRR::Sub,
2501                    rd: writable_spilltmp_reg2(),
2502                    rs1: stack_reg(),
2503                    rs2: step.to_reg(),
2504                }
2505                .emit(sink, emit_info, state);
2506                Inst::Store {
2507                    to: AMode::RegOffset(spilltmp_reg2(), 0),
2508                    op: StoreOP::Sb,
2509                    flags: MemFlags::new(),
2510                    src: zero_reg(),
2511                }
2512                .emit(sink, emit_info, state);
2513                // reset step.
2514                Inst::AluRRR {
2515                    alu_op: AluOPRRR::Sub,
2516                    rd: step,
2517                    rs1: step.to_reg(),
2518                    rs2: guard_size_tmp.to_reg(),
2519                }
2520                .emit(sink, emit_info, state);
2521                Inst::gen_jump(loop_start).emit(sink, emit_info, state);
2522                sink.bind_label(label_done, &mut state.ctrl_plane);
2523            }
2524            &Inst::VecAluRRRImm5 {
2525                op,
2526                vd,
2527                vd_src,
2528                imm,
2529                vs2,
2530                ref mask,
2531                ..
2532            } => {
2533                debug_assert_eq!(vd.to_reg(), vd_src);
2534
2535                sink.put4(encode_valu_rrr_imm(op, vd, imm, vs2, *mask));
2536            }
2537            &Inst::VecAluRRRR {
2538                op,
2539                vd,
2540                vd_src,
2541                vs1,
2542                vs2,
2543                ref mask,
2544                ..
2545            } => {
2546                debug_assert_eq!(vd.to_reg(), vd_src);
2547
2548                sink.put4(encode_valu_rrrr(op, vd, vs2, vs1, *mask));
2549            }
2550            &Inst::VecAluRRR {
2551                op,
2552                vd,
2553                vs1,
2554                vs2,
2555                ref mask,
2556                ..
2557            } => {
2558                sink.put4(encode_valu(op, vd, vs1, vs2, *mask));
2559            }
2560            &Inst::VecAluRRImm5 {
2561                op,
2562                vd,
2563                imm,
2564                vs2,
2565                ref mask,
2566                ..
2567            } => {
2568                sink.put4(encode_valu_rr_imm(op, vd, imm, vs2, *mask));
2569            }
2570            &Inst::VecAluRR {
2571                op,
2572                vd,
2573                vs,
2574                ref mask,
2575                ..
2576            } => {
2577                sink.put4(encode_valu_rr(op, vd, vs, *mask));
2578            }
2579            &Inst::VecAluRImm5 {
2580                op,
2581                vd,
2582                imm,
2583                ref mask,
2584                ..
2585            } => {
2586                sink.put4(encode_valu_r_imm(op, vd, imm, *mask));
2587            }
2588            &Inst::VecSetState { rd, ref vstate } => {
2589                sink.put4(encode_vcfg_imm(
2590                    0x57,
2591                    rd.to_reg(),
2592                    vstate.avl.unwrap_static(),
2593                    &vstate.vtype,
2594                ));
2595
2596                // Update the current vector emit state.
2597                state.vstate = EmitVState::Known(*vstate);
2598            }
2599
2600            &Inst::VecLoad {
2601                eew,
2602                to,
2603                ref from,
2604                ref mask,
2605                flags,
2606                ..
2607            } => {
2608                // Vector Loads don't support immediate offsets, so we need to load it into a register.
2609                let addr = match from {
2610                    VecAMode::UnitStride { base } => {
2611                        let base_reg = base.get_base_register();
2612                        let offset = base.get_offset_with_state(state);
2613
2614                        // Reg+0 Offset can be directly encoded
2615                        if let (Some(base_reg), 0) = (base_reg, offset) {
2616                            base_reg
2617                        } else {
2618                            // Otherwise load the address it into a reg and load from it.
2619                            let tmp = writable_spilltmp_reg();
2620                            Inst::LoadAddr {
2621                                rd: tmp,
2622                                mem: *base,
2623                            }
2624                            .emit(sink, emit_info, state);
2625                            tmp.to_reg()
2626                        }
2627                    }
2628                };
2629
2630                if let Some(trap_code) = flags.trap_code() {
2631                    // Register the offset at which the actual load instruction starts.
2632                    sink.add_trap(trap_code);
2633                }
2634
2635                sink.put4(encode_vmem_load(
2636                    0x07,
2637                    to.to_reg(),
2638                    eew,
2639                    addr,
2640                    from.lumop(),
2641                    *mask,
2642                    from.mop(),
2643                    from.nf(),
2644                ));
2645            }
2646
2647            &Inst::VecStore {
2648                eew,
2649                ref to,
2650                from,
2651                ref mask,
2652                flags,
2653                ..
2654            } => {
2655                // Vector Stores don't support immediate offsets, so we need to load it into a register.
2656                let addr = match to {
2657                    VecAMode::UnitStride { base } => {
2658                        let base_reg = base.get_base_register();
2659                        let offset = base.get_offset_with_state(state);
2660
2661                        // Reg+0 Offset can be directly encoded
2662                        if let (Some(base_reg), 0) = (base_reg, offset) {
2663                            base_reg
2664                        } else {
2665                            // Otherwise load the address it into a reg and load from it.
2666                            let tmp = writable_spilltmp_reg();
2667                            Inst::LoadAddr {
2668                                rd: tmp,
2669                                mem: *base,
2670                            }
2671                            .emit(sink, emit_info, state);
2672                            tmp.to_reg()
2673                        }
2674                    }
2675                };
2676
2677                if let Some(trap_code) = flags.trap_code() {
2678                    // Register the offset at which the actual load instruction starts.
2679                    sink.add_trap(trap_code);
2680                }
2681
2682                sink.put4(encode_vmem_store(
2683                    0x27,
2684                    from,
2685                    eew,
2686                    addr,
2687                    to.sumop(),
2688                    *mask,
2689                    to.mop(),
2690                    to.nf(),
2691                ));
2692            }
2693
2694            Inst::EmitIsland { needed_space } => {
2695                if sink.island_needed(*needed_space) {
2696                    let jump_around_label = sink.get_label();
2697                    Inst::gen_jump(jump_around_label).emit(sink, emit_info, state);
2698                    sink.emit_island(needed_space + 4, &mut state.ctrl_plane);
2699                    sink.bind_label(jump_around_label, &mut state.ctrl_plane);
2700                }
2701            }
2702        }
2703    }
2704}
2705
2706fn emit_return_call_common_sequence<T>(
2707    sink: &mut MachBuffer<Inst>,
2708    emit_info: &EmitInfo,
2709    state: &mut EmitState,
2710    info: &ReturnCallInfo<T>,
2711) {
2712    // The return call sequence can potentially emit a lot of instructions (up to 634 bytes!)
2713    // So lets emit an island here if we need it.
2714    //
2715    // It is difficult to calculate exactly how many instructions are going to be emitted, so
2716    // we calculate it by emitting it into a disposable buffer, and then checking how many instructions
2717    // were actually emitted.
2718    let mut buffer = MachBuffer::new();
2719    let mut fake_emit_state = state.clone();
2720
2721    return_call_emit_impl(&mut buffer, emit_info, &mut fake_emit_state, info);
2722
2723    // Finalize the buffer and get the number of bytes emitted.
2724    let buffer = buffer.finish(&Default::default(), &mut Default::default());
2725    let length = buffer.data().len() as u32;
2726
2727    // And now emit the island inline with this instruction.
2728    if sink.island_needed(length) {
2729        let jump_around_label = sink.get_label();
2730        Inst::gen_jump(jump_around_label).emit(sink, emit_info, state);
2731        sink.emit_island(length + 4, &mut state.ctrl_plane);
2732        sink.bind_label(jump_around_label, &mut state.ctrl_plane);
2733    }
2734
2735    // Now that we're done, emit the *actual* return sequence.
2736    return_call_emit_impl(sink, emit_info, state, info);
2737}
2738
2739/// This should not be called directly, Instead prefer to call [emit_return_call_common_sequence].
2740fn return_call_emit_impl<T>(
2741    sink: &mut MachBuffer<Inst>,
2742    emit_info: &EmitInfo,
2743    state: &mut EmitState,
2744    info: &ReturnCallInfo<T>,
2745) {
2746    let sp_to_fp_offset = {
2747        let frame_layout = state.frame_layout();
2748        i64::from(
2749            frame_layout.clobber_size
2750                + frame_layout.fixed_frame_storage_size
2751                + frame_layout.outgoing_args_size,
2752        )
2753    };
2754
2755    let mut clobber_offset = sp_to_fp_offset - 8;
2756    for reg in state.frame_layout().clobbered_callee_saves.clone() {
2757        let rreg = reg.to_reg();
2758        let ty = match rreg.class() {
2759            RegClass::Int => I64,
2760            RegClass::Float => F64,
2761            RegClass::Vector => unimplemented!("Vector Clobber Restores"),
2762        };
2763
2764        Inst::gen_load(
2765            reg.map(Reg::from),
2766            AMode::SPOffset(clobber_offset),
2767            ty,
2768            MemFlags::trusted(),
2769        )
2770        .emit(sink, emit_info, state);
2771
2772        clobber_offset -= 8
2773    }
2774
2775    // Restore the link register and frame pointer
2776    let setup_area_size = i64::from(state.frame_layout().setup_area_size);
2777    if setup_area_size > 0 {
2778        Inst::gen_load(
2779            writable_link_reg(),
2780            AMode::SPOffset(sp_to_fp_offset + 8),
2781            I64,
2782            MemFlags::trusted(),
2783        )
2784        .emit(sink, emit_info, state);
2785
2786        Inst::gen_load(
2787            writable_fp_reg(),
2788            AMode::SPOffset(sp_to_fp_offset),
2789            I64,
2790            MemFlags::trusted(),
2791        )
2792        .emit(sink, emit_info, state);
2793    }
2794
2795    // If we over-allocated the incoming args area in the prologue, resize down to what the callee
2796    // is expecting.
2797    let incoming_args_diff =
2798        i64::from(state.frame_layout().tail_args_size - info.new_stack_arg_size);
2799
2800    // Increment SP all at once
2801    let sp_increment = sp_to_fp_offset + setup_area_size + incoming_args_diff;
2802    if sp_increment > 0 {
2803        for inst in Riscv64MachineDeps::gen_sp_reg_adjust(i32::try_from(sp_increment).unwrap()) {
2804            inst.emit(sink, emit_info, state);
2805        }
2806    }
2807}