cranelift_codegen/isa/riscv64/inst/
emit.rs

1//! Riscv64 ISA: binary code emission.
2
3use crate::ir::{self, LibCall, TrapCode};
4use crate::isa::riscv64::inst::*;
5use crate::isa::riscv64::lower::isle::generated_code::{
6    CaOp, CbOp, CiOp, CiwOp, ClOp, CrOp, CsOp, CssOp, CsznOp, FpuOPWidth, ZcbMemOp,
7};
8use cranelift_control::ControlPlane;
9
10pub struct EmitInfo {
11    #[expect(dead_code, reason = "may want to be used in the future")]
12    shared_flag: settings::Flags,
13    isa_flags: super::super::riscv_settings::Flags,
14}
15
16impl EmitInfo {
17    pub(crate) fn new(
18        shared_flag: settings::Flags,
19        isa_flags: super::super::riscv_settings::Flags,
20    ) -> Self {
21        Self {
22            shared_flag,
23            isa_flags,
24        }
25    }
26}
27
28pub(crate) fn reg_to_gpr_num(m: Reg) -> u32 {
29    u32::from(m.to_real_reg().unwrap().hw_enc() & 31)
30}
31
32pub(crate) fn reg_to_compressed_gpr_num(m: Reg) -> u32 {
33    let real_reg = m.to_real_reg().unwrap().hw_enc();
34    debug_assert!(real_reg >= 8 && real_reg < 16);
35    let compressed_reg = real_reg - 8;
36    u32::from(compressed_reg)
37}
38
39#[derive(Clone, Debug, PartialEq, Default)]
40pub enum EmitVState {
41    #[default]
42    Unknown,
43    Known(VState),
44}
45
46/// State carried between emissions of a sequence of instructions.
47#[derive(Default, Clone, Debug)]
48pub struct EmitState {
49    /// The user stack map for the upcoming instruction, as provided to
50    /// `pre_safepoint()`.
51    user_stack_map: Option<ir::UserStackMap>,
52
53    /// Only used during fuzz-testing. Otherwise, it is a zero-sized struct and
54    /// optimized away at compiletime. See [cranelift_control].
55    ctrl_plane: ControlPlane,
56
57    /// Vector State
58    /// Controls the current state of the vector unit at the emission point.
59    vstate: EmitVState,
60
61    frame_layout: FrameLayout,
62}
63
64impl EmitState {
65    fn take_stack_map(&mut self) -> Option<ir::UserStackMap> {
66        self.user_stack_map.take()
67    }
68
69    fn clobber_vstate(&mut self) {
70        self.vstate = EmitVState::Unknown;
71    }
72}
73
74impl MachInstEmitState<Inst> for EmitState {
75    fn new(
76        abi: &Callee<crate::isa::riscv64::abi::Riscv64MachineDeps>,
77        ctrl_plane: ControlPlane,
78    ) -> Self {
79        EmitState {
80            user_stack_map: None,
81            ctrl_plane,
82            vstate: EmitVState::Unknown,
83            frame_layout: abi.frame_layout().clone(),
84        }
85    }
86
87    fn pre_safepoint(&mut self, user_stack_map: Option<ir::UserStackMap>) {
88        self.user_stack_map = user_stack_map;
89    }
90
91    fn ctrl_plane_mut(&mut self) -> &mut ControlPlane {
92        &mut self.ctrl_plane
93    }
94
95    fn take_ctrl_plane(self) -> ControlPlane {
96        self.ctrl_plane
97    }
98
99    fn on_new_block(&mut self) {
100        // Reset the vector state.
101        self.clobber_vstate();
102    }
103
104    fn frame_layout(&self) -> &FrameLayout {
105        &self.frame_layout
106    }
107}
108
109impl Inst {
110    /// Load int mask.
111    /// If ty is int then 0xff in rd.
112    pub(crate) fn load_int_mask(rd: Writable<Reg>, ty: Type) -> SmallInstVec<Inst> {
113        let mut insts = SmallInstVec::new();
114        assert!(ty.is_int() && ty.bits() <= 64);
115        match ty {
116            I64 => {
117                insts.push(Inst::load_imm12(rd, Imm12::from_i16(-1)));
118            }
119            I32 | I16 => {
120                insts.push(Inst::load_imm12(rd, Imm12::from_i16(-1)));
121                insts.push(Inst::Extend {
122                    rd,
123                    rn: rd.to_reg(),
124                    signed: false,
125                    from_bits: ty.bits() as u8,
126                    to_bits: 64,
127                });
128            }
129            I8 => {
130                insts.push(Inst::load_imm12(rd, Imm12::from_i16(255)));
131            }
132            _ => unreachable!("ty:{:?}", ty),
133        }
134        insts
135    }
136    ///  inverse all bit
137    pub(crate) fn construct_bit_not(rd: Writable<Reg>, rs: Reg) -> Inst {
138        Inst::AluRRImm12 {
139            alu_op: AluOPRRI::Xori,
140            rd,
141            rs,
142            imm12: Imm12::from_i16(-1),
143        }
144    }
145
146    /// Returns Some(VState) if this instruction is expecting a specific vector state
147    /// before emission.
148    fn expected_vstate(&self) -> Option<&VState> {
149        match self {
150            Inst::Nop0
151            | Inst::Nop4
152            | Inst::BrTable { .. }
153            | Inst::Auipc { .. }
154            | Inst::Fli { .. }
155            | Inst::Lui { .. }
156            | Inst::LoadInlineConst { .. }
157            | Inst::AluRRR { .. }
158            | Inst::FpuRRR { .. }
159            | Inst::AluRRImm12 { .. }
160            | Inst::CsrReg { .. }
161            | Inst::CsrImm { .. }
162            | Inst::Load { .. }
163            | Inst::Store { .. }
164            | Inst::Args { .. }
165            | Inst::Rets { .. }
166            | Inst::Ret { .. }
167            | Inst::Extend { .. }
168            | Inst::Call { .. }
169            | Inst::CallInd { .. }
170            | Inst::ReturnCall { .. }
171            | Inst::ReturnCallInd { .. }
172            | Inst::Jal { .. }
173            | Inst::CondBr { .. }
174            | Inst::LoadExtNameGot { .. }
175            | Inst::LoadExtNameNear { .. }
176            | Inst::LoadExtNameFar { .. }
177            | Inst::ElfTlsGetAddr { .. }
178            | Inst::LoadAddr { .. }
179            | Inst::Mov { .. }
180            | Inst::MovFromPReg { .. }
181            | Inst::Fence { .. }
182            | Inst::EBreak
183            | Inst::Udf { .. }
184            | Inst::FpuRR { .. }
185            | Inst::FpuRRRR { .. }
186            | Inst::Jalr { .. }
187            | Inst::Atomic { .. }
188            | Inst::Select { .. }
189            | Inst::AtomicCas { .. }
190            | Inst::RawData { .. }
191            | Inst::AtomicStore { .. }
192            | Inst::AtomicLoad { .. }
193            | Inst::AtomicRmwLoop { .. }
194            | Inst::TrapIf { .. }
195            | Inst::Unwind { .. }
196            | Inst::DummyUse { .. }
197            | Inst::LabelAddress { .. }
198            | Inst::Popcnt { .. }
199            | Inst::Cltz { .. }
200            | Inst::Brev8 { .. }
201            | Inst::StackProbeLoop { .. } => None,
202
203            // VecSetState does not expect any vstate, rather it updates it.
204            Inst::VecSetState { .. } => None,
205
206            // `vmv` instructions copy a set of registers and ignore vstate.
207            Inst::VecAluRRImm5 { op: VecAluOpRRImm5::VmvrV, .. } => None,
208
209            Inst::VecAluRR { vstate, .. } |
210            Inst::VecAluRRR { vstate, .. } |
211            Inst::VecAluRRRR { vstate, .. } |
212            Inst::VecAluRImm5 { vstate, .. } |
213            Inst::VecAluRRImm5 { vstate, .. } |
214            Inst::VecAluRRRImm5 { vstate, .. } |
215            // TODO: Unit-stride loads and stores only need the AVL to be correct, not
216            // the full vtype. A future optimization could be to decouple these two when
217            // updating vstate. This would allow us to avoid emitting a VecSetState in
218            // some cases.
219            Inst::VecLoad { vstate, .. }
220            | Inst::VecStore { vstate, .. } => Some(vstate),
221            Inst::EmitIsland { .. } => None,
222        }
223    }
224}
225
226impl MachInstEmit for Inst {
227    type State = EmitState;
228    type Info = EmitInfo;
229
230    fn emit(&self, sink: &mut MachBuffer<Inst>, emit_info: &Self::Info, state: &mut EmitState) {
231        // Check if we need to update the vector state before emitting this instruction
232        if let Some(expected) = self.expected_vstate() {
233            if state.vstate != EmitVState::Known(*expected) {
234                // Update the vector state.
235                Inst::VecSetState {
236                    rd: writable_zero_reg(),
237                    vstate: *expected,
238                }
239                .emit(sink, emit_info, state);
240            }
241        }
242
243        // N.B.: we *must* not exceed the "worst-case size" used to compute
244        // where to insert islands, except when islands are explicitly triggered
245        // (with an `EmitIsland`). We check this in debug builds. This is `mut`
246        // to allow disabling the check for `JTSequence`, which is always
247        // emitted following an `EmitIsland`.
248        let mut start_off = sink.cur_offset();
249
250        // First try to emit this as a compressed instruction
251        let res = self.try_emit_compressed(sink, emit_info, state, &mut start_off);
252        if res.is_none() {
253            // If we can't lets emit it as a normal instruction
254            self.emit_uncompressed(sink, emit_info, state, &mut start_off);
255        }
256
257        // We exclude br_table, call, return_call and try_call from
258        // these checks since they emit their own islands, and thus
259        // are allowed to exceed the worst case size.
260        let emits_own_island = match self {
261            Inst::BrTable { .. }
262            | Inst::ReturnCall { .. }
263            | Inst::ReturnCallInd { .. }
264            | Inst::Call { .. }
265            | Inst::CallInd { .. }
266            | Inst::EmitIsland { .. } => true,
267            _ => false,
268        };
269        if !emits_own_island {
270            let end_off = sink.cur_offset();
271            assert!(
272                (end_off - start_off) <= Inst::worst_case_size(),
273                "Inst:{:?} length:{} worst_case_size:{}",
274                self,
275                end_off - start_off,
276                Inst::worst_case_size()
277            );
278        }
279    }
280
281    fn pretty_print_inst(&self, state: &mut Self::State) -> String {
282        self.print_with_state(state)
283    }
284}
285
286impl Inst {
287    /// Tries to emit an instruction as compressed, if we can't return false.
288    fn try_emit_compressed(
289        &self,
290        sink: &mut MachBuffer<Inst>,
291        emit_info: &EmitInfo,
292        state: &mut EmitState,
293        start_off: &mut u32,
294    ) -> Option<()> {
295        let has_m = emit_info.isa_flags.has_m();
296        let has_zba = emit_info.isa_flags.has_zba();
297        let has_zbb = emit_info.isa_flags.has_zbb();
298        let has_zca = emit_info.isa_flags.has_zca();
299        let has_zcb = emit_info.isa_flags.has_zcb();
300        let has_zcd = emit_info.isa_flags.has_zcd();
301
302        // Currently all compressed extensions (Zcb, Zcd, Zcmp, Zcmt, etc..) require Zca
303        // to be enabled, so check it early.
304        if !has_zca {
305            return None;
306        }
307
308        fn reg_is_compressible(r: Reg) -> bool {
309            r.to_real_reg()
310                .map(|r| r.hw_enc() >= 8 && r.hw_enc() < 16)
311                .unwrap_or(false)
312        }
313
314        match *self {
315            // C.ADD
316            Inst::AluRRR {
317                alu_op: AluOPRRR::Add,
318                rd,
319                rs1,
320                rs2,
321            } if (rd.to_reg() == rs1 || rd.to_reg() == rs2)
322                && rs1 != zero_reg()
323                && rs2 != zero_reg() =>
324            {
325                // Technically `c.add rd, rs` expands to `add rd, rd, rs`, but we can
326                // also swap rs1 with rs2 and we get an equivalent instruction. i.e we
327                // can also compress `add rd, rs, rd` into `c.add rd, rs`.
328                let src = if rd.to_reg() == rs1 { rs2 } else { rs1 };
329
330                sink.put2(encode_cr_type(CrOp::CAdd, rd, src));
331            }
332
333            // C.MV
334            Inst::AluRRImm12 {
335                alu_op: AluOPRRI::Addi | AluOPRRI::Ori,
336                rd,
337                rs,
338                imm12,
339            } if rd.to_reg() != rs
340                && rd.to_reg() != zero_reg()
341                && rs != zero_reg()
342                && imm12.as_i16() == 0 =>
343            {
344                sink.put2(encode_cr_type(CrOp::CMv, rd, rs));
345            }
346
347            // CA Ops
348            Inst::AluRRR {
349                alu_op:
350                    alu_op @ (AluOPRRR::And
351                    | AluOPRRR::Or
352                    | AluOPRRR::Xor
353                    | AluOPRRR::Addw
354                    | AluOPRRR::Mul),
355                rd,
356                rs1,
357                rs2,
358            } if (rd.to_reg() == rs1 || rd.to_reg() == rs2)
359                && reg_is_compressible(rs1)
360                && reg_is_compressible(rs2) =>
361            {
362                let op = match alu_op {
363                    AluOPRRR::And => CaOp::CAnd,
364                    AluOPRRR::Or => CaOp::COr,
365                    AluOPRRR::Xor => CaOp::CXor,
366                    AluOPRRR::Addw => CaOp::CAddw,
367                    AluOPRRR::Mul if has_zcb && has_m => CaOp::CMul,
368                    _ => return None,
369                };
370                // The canonical expansion for these instruction has `rd == rs1`, but
371                // these are all commutative operations, so we can swap the operands.
372                let src = if rd.to_reg() == rs1 { rs2 } else { rs1 };
373
374                sink.put2(encode_ca_type(op, rd, src));
375            }
376
377            // The sub instructions are non commutative, so we can't swap the operands.
378            Inst::AluRRR {
379                alu_op: alu_op @ (AluOPRRR::Sub | AluOPRRR::Subw),
380                rd,
381                rs1,
382                rs2,
383            } if rd.to_reg() == rs1 && reg_is_compressible(rs1) && reg_is_compressible(rs2) => {
384                let op = match alu_op {
385                    AluOPRRR::Sub => CaOp::CSub,
386                    AluOPRRR::Subw => CaOp::CSubw,
387                    _ => return None,
388                };
389                sink.put2(encode_ca_type(op, rd, rs2));
390            }
391
392            // c.j
393            //
394            // We don't have a separate JAL as that is only available in RV32C
395            Inst::Jal { label } => {
396                sink.use_label_at_offset(*start_off, label, LabelUse::RVCJump);
397                sink.add_uncond_branch(*start_off, *start_off + 2, label);
398                sink.put2(encode_cj_type(CjOp::CJ, Imm12::ZERO));
399            }
400
401            // c.jr
402            Inst::Jalr { rd, base, offset }
403                if rd.to_reg() == zero_reg() && base != zero_reg() && offset.as_i16() == 0 =>
404            {
405                sink.put2(encode_cr2_type(CrOp::CJr, base));
406                state.clobber_vstate();
407            }
408
409            // c.jalr
410            Inst::Jalr { rd, base, offset }
411                if rd.to_reg() == link_reg() && base != zero_reg() && offset.as_i16() == 0 =>
412            {
413                sink.put2(encode_cr2_type(CrOp::CJalr, base));
414                state.clobber_vstate();
415            }
416
417            // c.ebreak
418            Inst::EBreak => {
419                sink.put2(encode_cr_type(
420                    CrOp::CEbreak,
421                    writable_zero_reg(),
422                    zero_reg(),
423                ));
424            }
425
426            // c.unimp
427            Inst::Udf { trap_code } => {
428                sink.add_trap(trap_code);
429                sink.put2(0x0000);
430            }
431            // c.addi16sp
432            //
433            // c.addi16sp shares the opcode with c.lui, but has a destination field of x2.
434            // c.addi16sp adds the non-zero sign-extended 6-bit immediate to the value in the stack pointer (sp=x2),
435            // where the immediate is scaled to represent multiples of 16 in the range (-512,496). c.addi16sp is used
436            // to adjust the stack pointer in procedure prologues and epilogues. It expands into addi x2, x2, nzimm. c.addi16sp
437            // is only valid when nzimm≠0; the code point with nzimm=0 is reserved.
438            Inst::AluRRImm12 {
439                alu_op: AluOPRRI::Addi,
440                rd,
441                rs,
442                imm12,
443            } if rd.to_reg() == rs
444                && rs == stack_reg()
445                && imm12.as_i16() != 0
446                && (imm12.as_i16() % 16) == 0
447                && Imm6::maybe_from_i16(imm12.as_i16() / 16).is_some() =>
448            {
449                let imm6 = Imm6::maybe_from_i16(imm12.as_i16() / 16).unwrap();
450                sink.put2(encode_c_addi16sp(imm6));
451            }
452
453            // c.addi4spn
454            //
455            // c.addi4spn is a CIW-format instruction that adds a zero-extended non-zero
456            // immediate, scaled by 4, to the stack pointer, x2, and writes the result to
457            // rd. This instruction is used to generate pointers to stack-allocated variables
458            // and expands to addi rd, x2, nzuimm. c.addi4spn is only valid when nzuimm≠0;
459            // the code points with nzuimm=0 are reserved.
460            Inst::AluRRImm12 {
461                alu_op: AluOPRRI::Addi,
462                rd,
463                rs,
464                imm12,
465            } if reg_is_compressible(rd.to_reg())
466                && rs == stack_reg()
467                && imm12.as_i16() != 0
468                && (imm12.as_i16() % 4) == 0
469                && u8::try_from(imm12.as_i16() / 4).is_ok() =>
470            {
471                let imm = u8::try_from(imm12.as_i16() / 4).unwrap();
472                sink.put2(encode_ciw_type(CiwOp::CAddi4spn, rd, imm));
473            }
474
475            // c.li
476            Inst::AluRRImm12 {
477                alu_op: AluOPRRI::Addi,
478                rd,
479                rs,
480                imm12,
481            } if rd.to_reg() != zero_reg() && rs == zero_reg() => {
482                let imm6 = Imm6::maybe_from_imm12(imm12)?;
483                sink.put2(encode_ci_type(CiOp::CLi, rd, imm6));
484            }
485
486            // c.addi
487            Inst::AluRRImm12 {
488                alu_op: AluOPRRI::Addi,
489                rd,
490                rs,
491                imm12,
492            } if rd.to_reg() == rs && rs != zero_reg() && imm12.as_i16() != 0 => {
493                let imm6 = Imm6::maybe_from_imm12(imm12)?;
494                sink.put2(encode_ci_type(CiOp::CAddi, rd, imm6));
495            }
496
497            // c.addiw
498            Inst::AluRRImm12 {
499                alu_op: AluOPRRI::Addiw,
500                rd,
501                rs,
502                imm12,
503            } if rd.to_reg() == rs && rs != zero_reg() => {
504                let imm6 = Imm6::maybe_from_imm12(imm12)?;
505                sink.put2(encode_ci_type(CiOp::CAddiw, rd, imm6));
506            }
507
508            // c.lui
509            //
510            // c.lui loads the non-zero 6-bit immediate field into bits 17–12
511            // of the destination register, clears the bottom 12 bits, and
512            // sign-extends bit 17 into all higher bits of the destination.
513            Inst::Lui { rd, imm: imm20 }
514                if rd.to_reg() != zero_reg()
515                    && rd.to_reg() != stack_reg()
516                    && imm20.as_i32() != 0 =>
517            {
518                // Check that the top bits are sign extended
519                let imm = imm20.as_i32() << 14 >> 14;
520                if imm != imm20.as_i32() {
521                    return None;
522                }
523                let imm6 = Imm6::maybe_from_i32(imm)?;
524                sink.put2(encode_ci_type(CiOp::CLui, rd, imm6));
525            }
526
527            // c.slli
528            Inst::AluRRImm12 {
529                alu_op: AluOPRRI::Slli,
530                rd,
531                rs,
532                imm12,
533            } if rd.to_reg() == rs && rs != zero_reg() && imm12.as_i16() != 0 => {
534                // The shift amount is unsigned, but we encode it as signed.
535                let shift = imm12.as_i16() & 0x3f;
536                let imm6 = Imm6::maybe_from_i16(shift << 10 >> 10).unwrap();
537                sink.put2(encode_ci_type(CiOp::CSlli, rd, imm6));
538            }
539
540            // c.srli / c.srai
541            Inst::AluRRImm12 {
542                alu_op: op @ (AluOPRRI::Srli | AluOPRRI::Srai),
543                rd,
544                rs,
545                imm12,
546            } if rd.to_reg() == rs && reg_is_compressible(rs) && imm12.as_i16() != 0 => {
547                let op = match op {
548                    AluOPRRI::Srli => CbOp::CSrli,
549                    AluOPRRI::Srai => CbOp::CSrai,
550                    _ => unreachable!(),
551                };
552
553                // The shift amount is unsigned, but we encode it as signed.
554                let shift = imm12.as_i16() & 0x3f;
555                let imm6 = Imm6::maybe_from_i16(shift << 10 >> 10).unwrap();
556                sink.put2(encode_cb_type(op, rd, imm6));
557            }
558
559            // c.zextb
560            //
561            // This is an alias for `andi rd, rd, 0xff`
562            Inst::AluRRImm12 {
563                alu_op: AluOPRRI::Andi,
564                rd,
565                rs,
566                imm12,
567            } if has_zcb
568                && rd.to_reg() == rs
569                && reg_is_compressible(rs)
570                && imm12.as_i16() == 0xff =>
571            {
572                sink.put2(encode_cszn_type(CsznOp::CZextb, rd));
573            }
574
575            // c.andi
576            Inst::AluRRImm12 {
577                alu_op: AluOPRRI::Andi,
578                rd,
579                rs,
580                imm12,
581            } if rd.to_reg() == rs && reg_is_compressible(rs) => {
582                let imm6 = Imm6::maybe_from_imm12(imm12)?;
583                sink.put2(encode_cb_type(CbOp::CAndi, rd, imm6));
584            }
585
586            // Stack Based Loads
587            Inst::Load {
588                rd,
589                op: op @ (LoadOP::Lw | LoadOP::Ld | LoadOP::Fld),
590                from,
591                flags,
592            } if from.get_base_register() == Some(stack_reg())
593                && (from.get_offset_with_state(state) % op.size()) == 0 =>
594            {
595                // We encode the offset in multiples of the load size.
596                let offset = from.get_offset_with_state(state);
597                let imm6 = u8::try_from(offset / op.size())
598                    .ok()
599                    .and_then(Uimm6::maybe_from_u8)?;
600
601                // Some additional constraints on these instructions.
602                //
603                // Integer loads are not allowed to target x0, but floating point loads
604                // are, since f0 is not a special register.
605                //
606                // Floating point loads are not included in the base Zca extension
607                // but in a separate Zcd extension. Both of these are part of the C Extension.
608                let rd_is_zero = rd.to_reg() == zero_reg();
609                let op = match op {
610                    LoadOP::Lw if !rd_is_zero => CiOp::CLwsp,
611                    LoadOP::Ld if !rd_is_zero => CiOp::CLdsp,
612                    LoadOP::Fld if has_zcd => CiOp::CFldsp,
613                    _ => return None,
614                };
615
616                if let Some(trap_code) = flags.trap_code() {
617                    // Register the offset at which the actual load instruction starts.
618                    sink.add_trap(trap_code);
619                }
620                sink.put2(encode_ci_sp_load(op, rd, imm6));
621            }
622
623            // Regular Loads
624            Inst::Load {
625                rd,
626                op:
627                    op
628                    @ (LoadOP::Lw | LoadOP::Ld | LoadOP::Fld | LoadOP::Lbu | LoadOP::Lhu | LoadOP::Lh),
629                from,
630                flags,
631            } if reg_is_compressible(rd.to_reg())
632                && from
633                    .get_base_register()
634                    .map(reg_is_compressible)
635                    .unwrap_or(false)
636                && (from.get_offset_with_state(state) % op.size()) == 0 =>
637            {
638                let base = from.get_base_register().unwrap();
639
640                // We encode the offset in multiples of the store size.
641                let offset = from.get_offset_with_state(state);
642                let offset = u8::try_from(offset / op.size()).ok()?;
643
644                // We mix two different formats here.
645                //
646                // c.lw / c.ld / c.fld instructions are available in the standard Zca
647                // extension using the CL format.
648                //
649                // c.lbu / c.lhu / c.lh are only available in the Zcb extension and
650                // are also encoded differently. Technically they each have a different
651                // format, but they are similar enough that we can group them.
652                let is_zcb_load = matches!(op, LoadOP::Lbu | LoadOP::Lhu | LoadOP::Lh);
653                let encoded = if is_zcb_load {
654                    if !has_zcb {
655                        return None;
656                    }
657
658                    let op = match op {
659                        LoadOP::Lbu => ZcbMemOp::CLbu,
660                        LoadOP::Lhu => ZcbMemOp::CLhu,
661                        LoadOP::Lh => ZcbMemOp::CLh,
662                        _ => unreachable!(),
663                    };
664
665                    // Byte stores & loads have 2 bits of immediate offset. Halfword stores
666                    // and loads only have 1 bit.
667                    let imm2 = Uimm2::maybe_from_u8(offset)?;
668                    if (offset & !((1 << op.imm_bits()) - 1)) != 0 {
669                        return None;
670                    }
671
672                    encode_zcbmem_load(op, rd, base, imm2)
673                } else {
674                    // Floating point loads are not included in the base Zca extension
675                    // but in a separate Zcd extension. Both of these are part of the C Extension.
676                    let op = match op {
677                        LoadOP::Lw => ClOp::CLw,
678                        LoadOP::Ld => ClOp::CLd,
679                        LoadOP::Fld if has_zcd => ClOp::CFld,
680                        _ => return None,
681                    };
682                    let imm5 = Uimm5::maybe_from_u8(offset)?;
683
684                    encode_cl_type(op, rd, base, imm5)
685                };
686
687                if let Some(trap_code) = flags.trap_code() {
688                    // Register the offset at which the actual load instruction starts.
689                    sink.add_trap(trap_code);
690                }
691                sink.put2(encoded);
692            }
693
694            // Stack Based Stores
695            Inst::Store {
696                src,
697                op: op @ (StoreOP::Sw | StoreOP::Sd | StoreOP::Fsd),
698                to,
699                flags,
700            } if to.get_base_register() == Some(stack_reg())
701                && (to.get_offset_with_state(state) % op.size()) == 0 =>
702            {
703                // We encode the offset in multiples of the store size.
704                let offset = to.get_offset_with_state(state);
705                let imm6 = u8::try_from(offset / op.size())
706                    .ok()
707                    .and_then(Uimm6::maybe_from_u8)?;
708
709                // Floating point stores are not included in the base Zca extension
710                // but in a separate Zcd extension. Both of these are part of the C Extension.
711                let op = match op {
712                    StoreOP::Sw => CssOp::CSwsp,
713                    StoreOP::Sd => CssOp::CSdsp,
714                    StoreOP::Fsd if has_zcd => CssOp::CFsdsp,
715                    _ => return None,
716                };
717
718                if let Some(trap_code) = flags.trap_code() {
719                    // Register the offset at which the actual load instruction starts.
720                    sink.add_trap(trap_code);
721                }
722                sink.put2(encode_css_type(op, src, imm6));
723            }
724
725            // Regular Stores
726            Inst::Store {
727                src,
728                op: op @ (StoreOP::Sw | StoreOP::Sd | StoreOP::Fsd | StoreOP::Sh | StoreOP::Sb),
729                to,
730                flags,
731            } if reg_is_compressible(src)
732                && to
733                    .get_base_register()
734                    .map(reg_is_compressible)
735                    .unwrap_or(false)
736                && (to.get_offset_with_state(state) % op.size()) == 0 =>
737            {
738                let base = to.get_base_register().unwrap();
739
740                // We encode the offset in multiples of the store size.
741                let offset = to.get_offset_with_state(state);
742                let offset = u8::try_from(offset / op.size()).ok()?;
743
744                // We mix two different formats here.
745                //
746                // c.sw / c.sd / c.fsd instructions are available in the standard Zca
747                // extension using the CL format.
748                //
749                // c.sb / c.sh are only available in the Zcb extension and are also
750                // encoded differently.
751                let is_zcb_store = matches!(op, StoreOP::Sh | StoreOP::Sb);
752                let encoded = if is_zcb_store {
753                    if !has_zcb {
754                        return None;
755                    }
756
757                    let op = match op {
758                        StoreOP::Sh => ZcbMemOp::CSh,
759                        StoreOP::Sb => ZcbMemOp::CSb,
760                        _ => unreachable!(),
761                    };
762
763                    // Byte stores & loads have 2 bits of immediate offset. Halfword stores
764                    // and loads only have 1 bit.
765                    let imm2 = Uimm2::maybe_from_u8(offset)?;
766                    if (offset & !((1 << op.imm_bits()) - 1)) != 0 {
767                        return None;
768                    }
769
770                    encode_zcbmem_store(op, src, base, imm2)
771                } else {
772                    // Floating point stores are not included in the base Zca extension
773                    // but in a separate Zcd extension. Both of these are part of the C Extension.
774                    let op = match op {
775                        StoreOP::Sw => CsOp::CSw,
776                        StoreOP::Sd => CsOp::CSd,
777                        StoreOP::Fsd if has_zcd => CsOp::CFsd,
778                        _ => return None,
779                    };
780                    let imm5 = Uimm5::maybe_from_u8(offset)?;
781
782                    encode_cs_type(op, src, base, imm5)
783                };
784
785                if let Some(trap_code) = flags.trap_code() {
786                    // Register the offset at which the actual load instruction starts.
787                    sink.add_trap(trap_code);
788                }
789                sink.put2(encoded);
790            }
791
792            // c.not
793            //
794            // This is an alias for `xori rd, rd, -1`
795            Inst::AluRRImm12 {
796                alu_op: AluOPRRI::Xori,
797                rd,
798                rs,
799                imm12,
800            } if has_zcb
801                && rd.to_reg() == rs
802                && reg_is_compressible(rs)
803                && imm12.as_i16() == -1 =>
804            {
805                sink.put2(encode_cszn_type(CsznOp::CNot, rd));
806            }
807
808            // c.sext.b / c.sext.h / c.zext.h
809            //
810            // These are all the extend instructions present in `Zcb`, they
811            // also require `Zbb` since they aren't available in the base ISA.
812            Inst::AluRRImm12 {
813                alu_op: alu_op @ (AluOPRRI::Sextb | AluOPRRI::Sexth | AluOPRRI::Zexth),
814                rd,
815                rs,
816                imm12,
817            } if has_zcb
818                && has_zbb
819                && rd.to_reg() == rs
820                && reg_is_compressible(rs)
821                && imm12.as_i16() == 0 =>
822            {
823                let op = match alu_op {
824                    AluOPRRI::Sextb => CsznOp::CSextb,
825                    AluOPRRI::Sexth => CsznOp::CSexth,
826                    AluOPRRI::Zexth => CsznOp::CZexth,
827                    _ => unreachable!(),
828                };
829                sink.put2(encode_cszn_type(op, rd));
830            }
831
832            // c.zext.w
833            //
834            // This is an alias for `add.uw rd, rd, zero`
835            Inst::AluRRR {
836                alu_op: AluOPRRR::Adduw,
837                rd,
838                rs1,
839                rs2,
840            } if has_zcb
841                && has_zba
842                && rd.to_reg() == rs1
843                && reg_is_compressible(rs1)
844                && rs2 == zero_reg() =>
845            {
846                sink.put2(encode_cszn_type(CsznOp::CZextw, rd));
847            }
848
849            _ => return None,
850        }
851
852        return Some(());
853    }
854
855    fn emit_uncompressed(
856        &self,
857        sink: &mut MachBuffer<Inst>,
858        emit_info: &EmitInfo,
859        state: &mut EmitState,
860        start_off: &mut u32,
861    ) {
862        match self {
863            &Inst::Nop0 => {
864                // do nothing
865            }
866            // Addi x0, x0, 0
867            &Inst::Nop4 => {
868                let x = Inst::AluRRImm12 {
869                    alu_op: AluOPRRI::Addi,
870                    rd: Writable::from_reg(zero_reg()),
871                    rs: zero_reg(),
872                    imm12: Imm12::ZERO,
873                };
874                x.emit(sink, emit_info, state)
875            }
876            &Inst::RawData { ref data } => {
877                // Right now we only put a u32 or u64 in this instruction.
878                // It is not very long, no need to check if need `emit_island`.
879                // If data is very long , this is a bug because RawData is typically
880                // use to load some data and rely on some position in the code stream.
881                // and we may exceed `Inst::worst_case_size`.
882                // for more information see https://github.com/bytecodealliance/wasmtime/pull/5612.
883                sink.put_data(&data[..]);
884            }
885            &Inst::Lui { rd, ref imm } => {
886                let x: u32 = 0b0110111 | reg_to_gpr_num(rd.to_reg()) << 7 | (imm.bits() << 12);
887                sink.put4(x);
888            }
889            &Inst::Fli { rd, width, imm } => {
890                sink.put4(encode_fli(width, imm, rd));
891            }
892            &Inst::LoadInlineConst { rd, ty, imm } => {
893                let data = &imm.to_le_bytes()[..ty.bytes() as usize];
894
895                let label_data: MachLabel = sink.get_label();
896                let label_end: MachLabel = sink.get_label();
897
898                // Load into rd
899                Inst::Load {
900                    rd,
901                    op: LoadOP::from_type(ty),
902                    flags: MemFlags::new(),
903                    from: AMode::Label(label_data),
904                }
905                .emit(sink, emit_info, state);
906
907                // Jump over the inline pool
908                Inst::gen_jump(label_end).emit(sink, emit_info, state);
909
910                // Emit the inline data
911                sink.bind_label(label_data, &mut state.ctrl_plane);
912                Inst::RawData { data: data.into() }.emit(sink, emit_info, state);
913
914                sink.bind_label(label_end, &mut state.ctrl_plane);
915            }
916            &Inst::FpuRR {
917                alu_op,
918                width,
919                frm,
920                rd,
921                rs,
922            } => {
923                if alu_op.is_convert_to_int() {
924                    sink.add_trap(TrapCode::BAD_CONVERSION_TO_INTEGER);
925                }
926                sink.put4(encode_fp_rr(alu_op, width, frm, rd, rs));
927            }
928            &Inst::FpuRRRR {
929                alu_op,
930                rd,
931                rs1,
932                rs2,
933                rs3,
934                frm,
935                width,
936            } => {
937                sink.put4(encode_fp_rrrr(alu_op, width, frm, rd, rs1, rs2, rs3));
938            }
939            &Inst::FpuRRR {
940                alu_op,
941                width,
942                frm,
943                rd,
944                rs1,
945                rs2,
946            } => {
947                sink.put4(encode_fp_rrr(alu_op, width, frm, rd, rs1, rs2));
948            }
949            &Inst::Unwind { ref inst } => {
950                sink.add_unwind(inst.clone());
951            }
952            &Inst::DummyUse { .. } => {
953                // This has already been handled by Inst::allocate.
954            }
955            &Inst::AluRRR {
956                alu_op,
957                rd,
958                rs1,
959                rs2,
960            } => {
961                let (rs1, rs2) = if alu_op.reverse_rs() {
962                    (rs2, rs1)
963                } else {
964                    (rs1, rs2)
965                };
966
967                sink.put4(encode_r_type(
968                    alu_op.op_code(),
969                    rd,
970                    alu_op.funct3(),
971                    rs1,
972                    rs2,
973                    alu_op.funct7(),
974                ));
975            }
976            &Inst::AluRRImm12 {
977                alu_op,
978                rd,
979                rs,
980                imm12,
981            } => {
982                let x = alu_op.op_code()
983                    | reg_to_gpr_num(rd.to_reg()) << 7
984                    | alu_op.funct3() << 12
985                    | reg_to_gpr_num(rs) << 15
986                    | alu_op.imm12(imm12) << 20;
987                sink.put4(x);
988            }
989            &Inst::CsrReg { op, rd, rs, csr } => {
990                sink.put4(encode_csr_reg(op, rd, rs, csr));
991            }
992            &Inst::CsrImm { op, rd, csr, imm } => {
993                sink.put4(encode_csr_imm(op, rd, csr, imm));
994            }
995            &Inst::Load {
996                rd,
997                op: LoadOP::Flh,
998                from,
999                flags,
1000            } if !emit_info.isa_flags.has_zfhmin() => {
1001                // flh unavailable, use an integer load instead
1002                Inst::Load {
1003                    rd: writable_spilltmp_reg(),
1004                    op: LoadOP::Lh,
1005                    flags,
1006                    from,
1007                }
1008                .emit(sink, emit_info, state);
1009                // NaN-box the `f16` before loading it into the floating-point
1010                // register with a 32-bit `fmv`.
1011                Inst::Lui {
1012                    rd: writable_spilltmp_reg2(),
1013                    imm: Imm20::from_i32((0xffff_0000_u32 as i32) >> 12),
1014                }
1015                .emit(sink, emit_info, state);
1016                Inst::AluRRR {
1017                    alu_op: AluOPRRR::Or,
1018                    rd: writable_spilltmp_reg(),
1019                    rs1: spilltmp_reg(),
1020                    rs2: spilltmp_reg2(),
1021                }
1022                .emit(sink, emit_info, state);
1023                Inst::FpuRR {
1024                    alu_op: FpuOPRR::FmvFmtX,
1025                    width: FpuOPWidth::S,
1026                    frm: FRM::RNE,
1027                    rd,
1028                    rs: spilltmp_reg(),
1029                }
1030                .emit(sink, emit_info, state);
1031            }
1032            &Inst::Load {
1033                rd,
1034                op,
1035                from,
1036                flags,
1037            } => {
1038                let base = from.get_base_register();
1039                let offset = from.get_offset_with_state(state);
1040                let offset_imm12 = Imm12::maybe_from_i64(offset);
1041                let label = from.get_label_with_sink(sink);
1042
1043                let (addr, imm12) = match (base, offset_imm12, label) {
1044                    // When loading from a Reg+Offset, if the offset fits into an imm12 we can directly encode it.
1045                    (Some(base), Some(imm12), None) => (base, imm12),
1046
1047                    // Otherwise, if the offset does not fit into a imm12, we need to materialize it into a
1048                    // register and load from that.
1049                    (Some(_), None, None) => {
1050                        let tmp = writable_spilltmp_reg();
1051                        Inst::LoadAddr { rd: tmp, mem: from }.emit(sink, emit_info, state);
1052                        (tmp.to_reg(), Imm12::ZERO)
1053                    }
1054
1055                    // If the AMode contains a label we can emit an internal relocation that gets
1056                    // resolved with the correct address later.
1057                    (None, Some(imm), Some(label)) => {
1058                        debug_assert_eq!(imm.as_i16(), 0);
1059
1060                        // Get the current PC.
1061                        sink.use_label_at_offset(sink.cur_offset(), label, LabelUse::PCRelHi20);
1062                        Inst::Auipc {
1063                            rd,
1064                            imm: Imm20::ZERO,
1065                        }
1066                        .emit_uncompressed(sink, emit_info, state, start_off);
1067
1068                        // Emit a relocation for the load. This patches the offset into the instruction.
1069                        sink.use_label_at_offset(sink.cur_offset(), label, LabelUse::PCRelLo12I);
1070
1071                        // Imm12 here is meaningless since it's going to get replaced.
1072                        (rd.to_reg(), Imm12::ZERO)
1073                    }
1074
1075                    // These cases are impossible with the current AModes that we have. We either
1076                    // always have a register, or always have a label. Never both, and never neither.
1077                    (None, None, None)
1078                    | (None, Some(_), None)
1079                    | (Some(_), None, Some(_))
1080                    | (Some(_), Some(_), Some(_))
1081                    | (None, None, Some(_)) => {
1082                        unreachable!("Invalid load address")
1083                    }
1084                };
1085
1086                if let Some(trap_code) = flags.trap_code() {
1087                    // Register the offset at which the actual load instruction starts.
1088                    sink.add_trap(trap_code);
1089                }
1090
1091                sink.put4(encode_i_type(op.op_code(), rd, op.funct3(), addr, imm12));
1092            }
1093            &Inst::Store {
1094                op: StoreOP::Fsh,
1095                src,
1096                flags,
1097                to,
1098            } if !emit_info.isa_flags.has_zfhmin() => {
1099                // fsh unavailable, use an integer store instead
1100                Inst::FpuRR {
1101                    alu_op: FpuOPRR::FmvXFmt,
1102                    width: FpuOPWidth::S,
1103                    frm: FRM::RNE,
1104                    rd: writable_spilltmp_reg(),
1105                    rs: src,
1106                }
1107                .emit(sink, emit_info, state);
1108                Inst::Store {
1109                    to,
1110                    op: StoreOP::Sh,
1111                    flags,
1112                    src: spilltmp_reg(),
1113                }
1114                .emit(sink, emit_info, state);
1115            }
1116            &Inst::Store { op, src, flags, to } => {
1117                let base = to.get_base_register();
1118                let offset = to.get_offset_with_state(state);
1119                let offset_imm12 = Imm12::maybe_from_i64(offset);
1120
1121                let (addr, imm12) = match (base, offset_imm12) {
1122                    // If the offset fits into an imm12 we can directly encode it.
1123                    (Some(base), Some(imm12)) => (base, imm12),
1124                    // Otherwise load the address it into a reg and load from it.
1125                    _ => {
1126                        let tmp = writable_spilltmp_reg();
1127                        Inst::LoadAddr { rd: tmp, mem: to }.emit(sink, emit_info, state);
1128                        (tmp.to_reg(), Imm12::ZERO)
1129                    }
1130                };
1131
1132                if let Some(trap_code) = flags.trap_code() {
1133                    // Register the offset at which the actual load instruction starts.
1134                    sink.add_trap(trap_code);
1135                }
1136
1137                sink.put4(encode_s_type(op.op_code(), op.funct3(), addr, src, imm12));
1138            }
1139            &Inst::Args { .. } | &Inst::Rets { .. } => {
1140                // Nothing: this is a pseudoinstruction that serves
1141                // only to constrain registers at a certain point.
1142            }
1143            &Inst::Ret {} => {
1144                // RISC-V does not have a dedicated ret instruction, instead we emit the equivalent
1145                // `jalr x0, x1, 0` that jumps to the return address.
1146                Inst::Jalr {
1147                    rd: writable_zero_reg(),
1148                    base: link_reg(),
1149                    offset: Imm12::ZERO,
1150                }
1151                .emit(sink, emit_info, state);
1152            }
1153
1154            &Inst::Extend {
1155                rd,
1156                rn,
1157                signed,
1158                from_bits,
1159                to_bits: _to_bits,
1160            } => {
1161                let mut insts = SmallInstVec::new();
1162                let shift_bits = (64 - from_bits) as i16;
1163                let is_u8 = || from_bits == 8 && signed == false;
1164                if is_u8() {
1165                    // special for u8.
1166                    insts.push(Inst::AluRRImm12 {
1167                        alu_op: AluOPRRI::Andi,
1168                        rd,
1169                        rs: rn,
1170                        imm12: Imm12::from_i16(255),
1171                    });
1172                } else {
1173                    insts.push(Inst::AluRRImm12 {
1174                        alu_op: AluOPRRI::Slli,
1175                        rd,
1176                        rs: rn,
1177                        imm12: Imm12::from_i16(shift_bits),
1178                    });
1179                    insts.push(Inst::AluRRImm12 {
1180                        alu_op: if signed {
1181                            AluOPRRI::Srai
1182                        } else {
1183                            AluOPRRI::Srli
1184                        },
1185                        rd,
1186                        rs: rd.to_reg(),
1187                        imm12: Imm12::from_i16(shift_bits),
1188                    });
1189                }
1190                insts
1191                    .into_iter()
1192                    .for_each(|i| i.emit(sink, emit_info, state));
1193            }
1194
1195            &Inst::Call { ref info } => {
1196                sink.add_reloc(Reloc::RiscvCallPlt, &info.dest, 0);
1197
1198                Inst::construct_auipc_and_jalr(Some(writable_link_reg()), writable_link_reg(), 0)
1199                    .into_iter()
1200                    .for_each(|i| i.emit_uncompressed(sink, emit_info, state, start_off));
1201
1202                if let Some(s) = state.take_stack_map() {
1203                    let offset = sink.cur_offset();
1204                    sink.push_user_stack_map(state, offset, s);
1205                }
1206
1207                if let Some(try_call) = info.try_call_info.as_ref() {
1208                    sink.add_try_call_site(
1209                        Some(state.frame_layout.sp_to_fp()),
1210                        try_call.exception_handlers(&state.frame_layout),
1211                    );
1212                } else {
1213                    sink.add_call_site();
1214                }
1215
1216                let callee_pop_size = i32::try_from(info.callee_pop_size).unwrap();
1217                if callee_pop_size > 0 {
1218                    for inst in Riscv64MachineDeps::gen_sp_reg_adjust(-callee_pop_size) {
1219                        inst.emit(sink, emit_info, state);
1220                    }
1221                }
1222
1223                // Load any stack-carried return values.
1224                info.emit_retval_loads::<Riscv64MachineDeps, _, _>(
1225                    state.frame_layout().stackslots_size,
1226                    |inst| inst.emit(sink, emit_info, state),
1227                    |needed_space| Some(Inst::EmitIsland { needed_space }),
1228                );
1229
1230                // If this is a try-call, jump to the continuation
1231                // (normal-return) block.
1232                if let Some(try_call) = info.try_call_info.as_ref() {
1233                    let jmp = Inst::Jal {
1234                        label: try_call.continuation,
1235                    };
1236                    jmp.emit(sink, emit_info, state);
1237                }
1238
1239                *start_off = sink.cur_offset();
1240            }
1241            &Inst::CallInd { ref info } => {
1242                Inst::Jalr {
1243                    rd: writable_link_reg(),
1244                    base: info.dest,
1245                    offset: Imm12::ZERO,
1246                }
1247                .emit(sink, emit_info, state);
1248
1249                if let Some(s) = state.take_stack_map() {
1250                    let offset = sink.cur_offset();
1251                    sink.push_user_stack_map(state, offset, s);
1252                }
1253
1254                if let Some(try_call) = info.try_call_info.as_ref() {
1255                    sink.add_try_call_site(
1256                        Some(state.frame_layout.sp_to_fp()),
1257                        try_call.exception_handlers(&state.frame_layout),
1258                    );
1259                } else {
1260                    sink.add_call_site();
1261                }
1262
1263                let callee_pop_size = i32::try_from(info.callee_pop_size).unwrap();
1264                if callee_pop_size > 0 {
1265                    for inst in Riscv64MachineDeps::gen_sp_reg_adjust(-callee_pop_size) {
1266                        inst.emit(sink, emit_info, state);
1267                    }
1268                }
1269
1270                // Load any stack-carried return values.
1271                info.emit_retval_loads::<Riscv64MachineDeps, _, _>(
1272                    state.frame_layout().stackslots_size,
1273                    |inst| inst.emit(sink, emit_info, state),
1274                    |needed_space| Some(Inst::EmitIsland { needed_space }),
1275                );
1276
1277                // If this is a try-call, jump to the continuation
1278                // (normal-return) block.
1279                if let Some(try_call) = info.try_call_info.as_ref() {
1280                    let jmp = Inst::Jal {
1281                        label: try_call.continuation,
1282                    };
1283                    jmp.emit(sink, emit_info, state);
1284                }
1285
1286                *start_off = sink.cur_offset();
1287            }
1288
1289            &Inst::ReturnCall { ref info } => {
1290                emit_return_call_common_sequence(sink, emit_info, state, info);
1291
1292                sink.add_call_site();
1293                sink.add_reloc(Reloc::RiscvCallPlt, &info.dest, 0);
1294                Inst::construct_auipc_and_jalr(None, writable_spilltmp_reg(), 0)
1295                    .into_iter()
1296                    .for_each(|i| i.emit_uncompressed(sink, emit_info, state, start_off));
1297            }
1298
1299            &Inst::ReturnCallInd { ref info } => {
1300                emit_return_call_common_sequence(sink, emit_info, state, &info);
1301
1302                Inst::Jalr {
1303                    rd: writable_zero_reg(),
1304                    base: info.dest,
1305                    offset: Imm12::ZERO,
1306                }
1307                .emit(sink, emit_info, state);
1308            }
1309            &Inst::Jal { label } => {
1310                sink.use_label_at_offset(*start_off, label, LabelUse::Jal20);
1311                sink.add_uncond_branch(*start_off, *start_off + 4, label);
1312                sink.put4(0b1101111);
1313                state.clobber_vstate();
1314            }
1315            &Inst::CondBr {
1316                taken,
1317                not_taken,
1318                kind,
1319            } => {
1320                match taken {
1321                    CondBrTarget::Label(label) => {
1322                        let code = kind.emit();
1323                        let code_inverse = kind.inverse().emit().to_le_bytes();
1324                        sink.use_label_at_offset(*start_off, label, LabelUse::B12);
1325                        sink.add_cond_branch(*start_off, *start_off + 4, label, &code_inverse);
1326                        sink.put4(code);
1327                    }
1328                    CondBrTarget::Fallthrough => panic!("Cannot fallthrough in taken target"),
1329                }
1330
1331                match not_taken {
1332                    CondBrTarget::Label(label) => {
1333                        Inst::gen_jump(label).emit(sink, emit_info, state)
1334                    }
1335                    CondBrTarget::Fallthrough => {}
1336                };
1337            }
1338
1339            &Inst::Mov { rd, rm, ty } => {
1340                debug_assert_eq!(rd.to_reg().class(), rm.class());
1341                if rd.to_reg() == rm {
1342                    return;
1343                }
1344
1345                match rm.class() {
1346                    RegClass::Int => Inst::AluRRImm12 {
1347                        alu_op: AluOPRRI::Addi,
1348                        rd,
1349                        rs: rm,
1350                        imm12: Imm12::ZERO,
1351                    },
1352                    RegClass::Float => Inst::FpuRRR {
1353                        alu_op: FpuOPRRR::Fsgnj,
1354                        width: FpuOPWidth::try_from(ty).unwrap(),
1355                        frm: FRM::RNE,
1356                        rd,
1357                        rs1: rm,
1358                        rs2: rm,
1359                    },
1360                    RegClass::Vector => Inst::VecAluRRImm5 {
1361                        op: VecAluOpRRImm5::VmvrV,
1362                        vd: rd,
1363                        vs2: rm,
1364                        // Imm 0 means copy 1 register.
1365                        imm: Imm5::maybe_from_i8(0).unwrap(),
1366                        mask: VecOpMasking::Disabled,
1367                        // Vstate for this instruction is ignored.
1368                        vstate: VState::from_type(ty),
1369                    },
1370                }
1371                .emit(sink, emit_info, state);
1372            }
1373
1374            &Inst::MovFromPReg { rd, rm } => {
1375                Inst::gen_move(rd, Reg::from(rm), I64).emit(sink, emit_info, state);
1376            }
1377
1378            &Inst::BrTable {
1379                index,
1380                tmp1,
1381                tmp2,
1382                ref targets,
1383            } => {
1384                let ext_index = writable_spilltmp_reg();
1385
1386                let label_compute_target = sink.get_label();
1387
1388                // The default target is passed in as the 0th element of `targets`
1389                // separate it here for clarity.
1390                let default_target = targets[0];
1391                let targets = &targets[1..];
1392
1393                // We are going to potentially emit a large amount of instructions, so ensure that we emit an island
1394                // now if we need one.
1395                //
1396                // The worse case PC calculations are 12 instructions. And each entry in the jump table is 2 instructions.
1397                // Check if we need to emit a jump table here to support that jump.
1398                let inst_count = 12 + (targets.len() * 2);
1399                let distance = (inst_count * Inst::UNCOMPRESSED_INSTRUCTION_SIZE as usize) as u32;
1400                if sink.island_needed(distance) {
1401                    let jump_around_label = sink.get_label();
1402                    Inst::gen_jump(jump_around_label).emit(sink, emit_info, state);
1403                    sink.emit_island(distance + 4, &mut state.ctrl_plane);
1404                    sink.bind_label(jump_around_label, &mut state.ctrl_plane);
1405                }
1406
1407                // We emit a bounds check on the index, if the index is larger than the number of
1408                // jump table entries, we jump to the default block.  Otherwise we compute a jump
1409                // offset by multiplying the index by 8 (the size of each entry) and then jump to
1410                // that offset. Each jump table entry is a regular auipc+jalr which we emit sequentially.
1411                //
1412                // Build the following sequence:
1413                //
1414                // extend_index:
1415                //     zext.w  ext_index, index
1416                // bounds_check:
1417                //     li      tmp, n_labels
1418                //     bltu    ext_index, tmp, compute_target
1419                // jump_to_default_block:
1420                //     auipc   pc, 0
1421                //     jalr    zero, pc, default_block
1422                // compute_target:
1423                //     auipc   pc, 0
1424                //     slli    tmp, ext_index, 3
1425                //     add     pc, pc, tmp
1426                //     jalr    zero, pc, 0x10
1427                // jump_table:
1428                //     ; This repeats for each entry in the jumptable
1429                //     auipc   pc, 0
1430                //     jalr    zero, pc, block_target
1431
1432                // Extend the index to 64 bits.
1433                //
1434                // This prevents us branching on the top 32 bits of the index, which
1435                // are undefined.
1436                Inst::Extend {
1437                    rd: ext_index,
1438                    rn: index,
1439                    signed: false,
1440                    from_bits: 32,
1441                    to_bits: 64,
1442                }
1443                .emit(sink, emit_info, state);
1444
1445                // Bounds check.
1446                //
1447                // Check if the index passed in is larger than the number of jumptable
1448                // entries that we have. If it is, we fallthrough to a jump into the
1449                // default block.
1450                Inst::load_constant_u32(tmp2, targets.len() as u64)
1451                    .iter()
1452                    .for_each(|i| i.emit(sink, emit_info, state));
1453                Inst::CondBr {
1454                    taken: CondBrTarget::Label(label_compute_target),
1455                    not_taken: CondBrTarget::Fallthrough,
1456                    kind: IntegerCompare {
1457                        kind: IntCC::UnsignedLessThan,
1458                        rs1: ext_index.to_reg(),
1459                        rs2: tmp2.to_reg(),
1460                    },
1461                }
1462                .emit(sink, emit_info, state);
1463
1464                sink.use_label_at_offset(sink.cur_offset(), default_target, LabelUse::PCRel32);
1465                Inst::construct_auipc_and_jalr(None, tmp2, 0)
1466                    .iter()
1467                    .for_each(|i| i.emit_uncompressed(sink, emit_info, state, start_off));
1468
1469                // Compute the jump table offset.
1470                // We need to emit a PC relative offset,
1471                sink.bind_label(label_compute_target, &mut state.ctrl_plane);
1472
1473                // Get the current PC.
1474                Inst::Auipc {
1475                    rd: tmp1,
1476                    imm: Imm20::ZERO,
1477                }
1478                .emit_uncompressed(sink, emit_info, state, start_off);
1479
1480                // These instructions must be emitted as uncompressed since we
1481                // are manually computing the offset from the PC.
1482
1483                // Multiply the index by 8, since that is the size in
1484                // bytes of each jump table entry
1485                Inst::AluRRImm12 {
1486                    alu_op: AluOPRRI::Slli,
1487                    rd: tmp2,
1488                    rs: ext_index.to_reg(),
1489                    imm12: Imm12::from_i16(3),
1490                }
1491                .emit_uncompressed(sink, emit_info, state, start_off);
1492
1493                // Calculate the base of the jump, PC + the offset from above.
1494                Inst::AluRRR {
1495                    alu_op: AluOPRRR::Add,
1496                    rd: tmp1,
1497                    rs1: tmp1.to_reg(),
1498                    rs2: tmp2.to_reg(),
1499                }
1500                .emit_uncompressed(sink, emit_info, state, start_off);
1501
1502                // Jump to the middle of the jump table.
1503                // We add a 16 byte offset here, since we used 4 instructions
1504                // since the AUIPC that was used to get the PC.
1505                Inst::Jalr {
1506                    rd: writable_zero_reg(),
1507                    base: tmp1.to_reg(),
1508                    offset: Imm12::from_i16((4 * Inst::UNCOMPRESSED_INSTRUCTION_SIZE) as i16),
1509                }
1510                .emit_uncompressed(sink, emit_info, state, start_off);
1511
1512                // Emit the jump table.
1513                //
1514                // Each entry is a auipc + jalr to the target block. We also start with a island
1515                // if necessary.
1516
1517                // Emit the jumps back to back
1518                for target in targets.iter() {
1519                    sink.use_label_at_offset(sink.cur_offset(), *target, LabelUse::PCRel32);
1520
1521                    Inst::construct_auipc_and_jalr(None, tmp2, 0)
1522                        .iter()
1523                        .for_each(|i| i.emit_uncompressed(sink, emit_info, state, start_off));
1524                }
1525
1526                // We've just emitted an island that is safe up to *here*.
1527                // Mark it as such so that we don't needlessly emit additional islands.
1528                *start_off = sink.cur_offset();
1529            }
1530
1531            &Inst::Atomic {
1532                op,
1533                rd,
1534                addr,
1535                src,
1536                amo,
1537            } => {
1538                // TODO: get flags from original CLIF atomic instruction
1539                let flags = MemFlags::new();
1540                if let Some(trap_code) = flags.trap_code() {
1541                    sink.add_trap(trap_code);
1542                }
1543                let x = op.op_code()
1544                    | reg_to_gpr_num(rd.to_reg()) << 7
1545                    | op.funct3() << 12
1546                    | reg_to_gpr_num(addr) << 15
1547                    | reg_to_gpr_num(src) << 20
1548                    | op.funct7(amo) << 25;
1549
1550                sink.put4(x);
1551            }
1552            &Inst::Fence { pred, succ } => {
1553                let x = 0b0001111
1554                    | 0b00000 << 7
1555                    | 0b000 << 12
1556                    | 0b00000 << 15
1557                    | (succ as u32) << 20
1558                    | (pred as u32) << 24;
1559
1560                sink.put4(x);
1561            }
1562            &Inst::Auipc { rd, imm } => {
1563                sink.put4(enc_auipc(rd, imm));
1564            }
1565
1566            &Inst::LoadAddr { rd, mem } => {
1567                let base = mem.get_base_register();
1568                let offset = mem.get_offset_with_state(state);
1569                let offset_imm12 = Imm12::maybe_from_i64(offset);
1570
1571                match (mem, base, offset_imm12) {
1572                    (_, Some(rs), Some(imm12)) => {
1573                        Inst::AluRRImm12 {
1574                            alu_op: AluOPRRI::Addi,
1575                            rd,
1576                            rs,
1577                            imm12,
1578                        }
1579                        .emit(sink, emit_info, state);
1580                    }
1581                    (_, Some(rs), None) => {
1582                        let mut insts = Inst::load_constant_u64(rd, offset as u64);
1583                        insts.push(Inst::AluRRR {
1584                            alu_op: AluOPRRR::Add,
1585                            rd,
1586                            rs1: rd.to_reg(),
1587                            rs2: rs,
1588                        });
1589                        insts
1590                            .into_iter()
1591                            .for_each(|inst| inst.emit(sink, emit_info, state));
1592                    }
1593                    (AMode::Const(addr), None, _) => {
1594                        // Get an address label for the constant and recurse.
1595                        let label = sink.get_label_for_constant(addr);
1596                        Inst::LoadAddr {
1597                            rd,
1598                            mem: AMode::Label(label),
1599                        }
1600                        .emit(sink, emit_info, state);
1601                    }
1602                    (AMode::Label(label), None, _) => {
1603                        // Get the current PC.
1604                        sink.use_label_at_offset(sink.cur_offset(), label, LabelUse::PCRelHi20);
1605                        let inst = Inst::Auipc {
1606                            rd,
1607                            imm: Imm20::ZERO,
1608                        };
1609                        inst.emit_uncompressed(sink, emit_info, state, start_off);
1610
1611                        // Emit an add to the address with a relocation.
1612                        // This later gets patched up with the correct offset.
1613                        sink.use_label_at_offset(sink.cur_offset(), label, LabelUse::PCRelLo12I);
1614                        Inst::AluRRImm12 {
1615                            alu_op: AluOPRRI::Addi,
1616                            rd,
1617                            rs: rd.to_reg(),
1618                            imm12: Imm12::ZERO,
1619                        }
1620                        .emit_uncompressed(sink, emit_info, state, start_off);
1621                    }
1622                    (amode, _, _) => {
1623                        unimplemented!("LoadAddr: {:?}", amode);
1624                    }
1625                }
1626            }
1627
1628            &Inst::Select {
1629                ref dst,
1630                condition,
1631                ref x,
1632                ref y,
1633            } => {
1634                // The general form for this select is the following:
1635                //
1636                //     mv rd, x
1637                //     b{cond} rcond, label_end
1638                //     mv rd, y
1639                // label_end:
1640                //     ... etc
1641                //
1642                // This is built on the assumption that moves are cheap, but branches and jumps
1643                // are not. So with this format we always avoid one jump instruction at the expense
1644                // of an unconditional move.
1645                //
1646                // We also perform another optimization here. If the destination register is the same
1647                // as one of the input registers, we can avoid emitting the first unconditional move
1648                // and emit just the branch and the second move.
1649                //
1650                // To make sure that this happens as often as possible, we also try to invert the
1651                // condition, so that if either of the input registers are the same as the destination
1652                // we avoid that move.
1653
1654                let label_end = sink.get_label();
1655
1656                let xregs = x.regs();
1657                let yregs = y.regs();
1658                let dstregs: Vec<Reg> = dst.regs().into_iter().map(|r| r.to_reg()).collect();
1659                let condregs = condition.regs();
1660
1661                // We are going to write to the destination register before evaluating
1662                // the condition, so we need to make sure that the destination register
1663                // is not one of the condition registers.
1664                //
1665                // This should never happen, since hopefully the regalloc constraints
1666                // for this register are set up correctly.
1667                debug_assert_ne!(dstregs, condregs);
1668
1669                // Check if we can invert the condition and avoid moving the y registers into
1670                // the destination. This allows us to only emit the branch and one of the moves.
1671                let (uncond_move, cond_move, condition) = if yregs == dstregs {
1672                    (yregs, xregs, condition.inverse())
1673                } else {
1674                    (xregs, yregs, condition)
1675                };
1676
1677                // Unconditionally move one of the values to the destination register.
1678                //
1679                // These moves may not end up being emitted if the source and
1680                // destination registers are the same. That logic is built into
1681                // the emit function for `Inst::Mov`.
1682                for i in gen_moves(dst.regs(), uncond_move) {
1683                    i.emit(sink, emit_info, state);
1684                }
1685
1686                // If the condition passes we skip over the conditional move
1687                Inst::CondBr {
1688                    taken: CondBrTarget::Label(label_end),
1689                    not_taken: CondBrTarget::Fallthrough,
1690                    kind: condition,
1691                }
1692                .emit(sink, emit_info, state);
1693
1694                // Move the conditional value to the destination register.
1695                for i in gen_moves(dst.regs(), cond_move) {
1696                    i.emit(sink, emit_info, state);
1697                }
1698
1699                sink.bind_label(label_end, &mut state.ctrl_plane);
1700            }
1701            &Inst::Jalr { rd, base, offset } => {
1702                sink.put4(enc_jalr(rd, base, offset));
1703                state.clobber_vstate();
1704            }
1705            &Inst::EBreak => {
1706                sink.put4(0x00100073);
1707            }
1708            &Inst::AtomicCas {
1709                offset,
1710                t0,
1711                dst,
1712                e,
1713                addr,
1714                v,
1715                ty,
1716            } => {
1717                //     # addr holds address of memory location
1718                //     # e holds expected value
1719                //     # v holds desired value
1720                //     # dst holds return value
1721                // cas:
1722                //     lr.w dst, (addr)       # Load original value.
1723                //     bne dst, e, fail       # Doesn’t match, so fail.
1724                //     sc.w t0, v, (addr)     # Try to update.
1725                //     bnez t0 , cas          # if store not ok,retry.
1726                // fail:
1727                let fail_label = sink.get_label();
1728                let cas_lebel = sink.get_label();
1729                sink.bind_label(cas_lebel, &mut state.ctrl_plane);
1730                Inst::Atomic {
1731                    op: AtomicOP::load_op(ty),
1732                    rd: dst,
1733                    addr,
1734                    src: zero_reg(),
1735                    amo: AMO::SeqCst,
1736                }
1737                .emit(sink, emit_info, state);
1738                if ty.bits() < 32 {
1739                    AtomicOP::extract(dst, offset, dst.to_reg(), ty)
1740                        .iter()
1741                        .for_each(|i| i.emit(sink, emit_info, state));
1742                } else if ty.bits() == 32 {
1743                    Inst::Extend {
1744                        rd: dst,
1745                        rn: dst.to_reg(),
1746                        signed: false,
1747                        from_bits: 32,
1748                        to_bits: 64,
1749                    }
1750                    .emit(sink, emit_info, state);
1751                }
1752                Inst::CondBr {
1753                    taken: CondBrTarget::Label(fail_label),
1754                    not_taken: CondBrTarget::Fallthrough,
1755                    kind: IntegerCompare {
1756                        kind: IntCC::NotEqual,
1757                        rs1: e,
1758                        rs2: dst.to_reg(),
1759                    },
1760                }
1761                .emit(sink, emit_info, state);
1762                let store_value = if ty.bits() < 32 {
1763                    // reload value to t0.
1764                    Inst::Atomic {
1765                        op: AtomicOP::load_op(ty),
1766                        rd: t0,
1767                        addr,
1768                        src: zero_reg(),
1769                        amo: AMO::SeqCst,
1770                    }
1771                    .emit(sink, emit_info, state);
1772                    // set reset part.
1773                    AtomicOP::merge(t0, writable_spilltmp_reg(), offset, v, ty)
1774                        .iter()
1775                        .for_each(|i| i.emit(sink, emit_info, state));
1776                    t0.to_reg()
1777                } else {
1778                    v
1779                };
1780                Inst::Atomic {
1781                    op: AtomicOP::store_op(ty),
1782                    rd: t0,
1783                    addr,
1784                    src: store_value,
1785                    amo: AMO::SeqCst,
1786                }
1787                .emit(sink, emit_info, state);
1788                // check is our value stored.
1789                Inst::CondBr {
1790                    taken: CondBrTarget::Label(cas_lebel),
1791                    not_taken: CondBrTarget::Fallthrough,
1792                    kind: IntegerCompare {
1793                        kind: IntCC::NotEqual,
1794                        rs1: t0.to_reg(),
1795                        rs2: zero_reg(),
1796                    },
1797                }
1798                .emit(sink, emit_info, state);
1799                sink.bind_label(fail_label, &mut state.ctrl_plane);
1800            }
1801            &Inst::AtomicRmwLoop {
1802                offset,
1803                op,
1804                dst,
1805                ty,
1806                p,
1807                x,
1808                t0,
1809            } => {
1810                let retry = sink.get_label();
1811                sink.bind_label(retry, &mut state.ctrl_plane);
1812                // load old value.
1813                Inst::Atomic {
1814                    op: AtomicOP::load_op(ty),
1815                    rd: dst,
1816                    addr: p,
1817                    src: zero_reg(),
1818                    amo: AMO::SeqCst,
1819                }
1820                .emit(sink, emit_info, state);
1821                //
1822
1823                let store_value: Reg = match op {
1824                    crate::ir::AtomicRmwOp::Add
1825                    | crate::ir::AtomicRmwOp::Sub
1826                    | crate::ir::AtomicRmwOp::And
1827                    | crate::ir::AtomicRmwOp::Or
1828                    | crate::ir::AtomicRmwOp::Xor => {
1829                        AtomicOP::extract(dst, offset, dst.to_reg(), ty)
1830                            .iter()
1831                            .for_each(|i| i.emit(sink, emit_info, state));
1832                        Inst::AluRRR {
1833                            alu_op: match op {
1834                                crate::ir::AtomicRmwOp::Add => AluOPRRR::Add,
1835                                crate::ir::AtomicRmwOp::Sub => AluOPRRR::Sub,
1836                                crate::ir::AtomicRmwOp::And => AluOPRRR::And,
1837                                crate::ir::AtomicRmwOp::Or => AluOPRRR::Or,
1838                                crate::ir::AtomicRmwOp::Xor => AluOPRRR::Xor,
1839                                _ => unreachable!(),
1840                            },
1841                            rd: t0,
1842                            rs1: dst.to_reg(),
1843                            rs2: x,
1844                        }
1845                        .emit(sink, emit_info, state);
1846                        Inst::Atomic {
1847                            op: AtomicOP::load_op(ty),
1848                            rd: writable_spilltmp_reg2(),
1849                            addr: p,
1850                            src: zero_reg(),
1851                            amo: AMO::SeqCst,
1852                        }
1853                        .emit(sink, emit_info, state);
1854                        AtomicOP::merge(
1855                            writable_spilltmp_reg2(),
1856                            writable_spilltmp_reg(),
1857                            offset,
1858                            t0.to_reg(),
1859                            ty,
1860                        )
1861                        .iter()
1862                        .for_each(|i| i.emit(sink, emit_info, state));
1863                        spilltmp_reg2()
1864                    }
1865                    crate::ir::AtomicRmwOp::Nand => {
1866                        if ty.bits() < 32 {
1867                            AtomicOP::extract(dst, offset, dst.to_reg(), ty)
1868                                .iter()
1869                                .for_each(|i| i.emit(sink, emit_info, state));
1870                        }
1871                        Inst::AluRRR {
1872                            alu_op: AluOPRRR::And,
1873                            rd: t0,
1874                            rs1: x,
1875                            rs2: dst.to_reg(),
1876                        }
1877                        .emit(sink, emit_info, state);
1878                        Inst::construct_bit_not(t0, t0.to_reg()).emit(sink, emit_info, state);
1879                        if ty.bits() < 32 {
1880                            Inst::Atomic {
1881                                op: AtomicOP::load_op(ty),
1882                                rd: writable_spilltmp_reg2(),
1883                                addr: p,
1884                                src: zero_reg(),
1885                                amo: AMO::SeqCst,
1886                            }
1887                            .emit(sink, emit_info, state);
1888                            AtomicOP::merge(
1889                                writable_spilltmp_reg2(),
1890                                writable_spilltmp_reg(),
1891                                offset,
1892                                t0.to_reg(),
1893                                ty,
1894                            )
1895                            .iter()
1896                            .for_each(|i| i.emit(sink, emit_info, state));
1897                            spilltmp_reg2()
1898                        } else {
1899                            t0.to_reg()
1900                        }
1901                    }
1902
1903                    crate::ir::AtomicRmwOp::Umin
1904                    | crate::ir::AtomicRmwOp::Umax
1905                    | crate::ir::AtomicRmwOp::Smin
1906                    | crate::ir::AtomicRmwOp::Smax => {
1907                        let label_select_dst = sink.get_label();
1908                        let label_select_done = sink.get_label();
1909                        if op == crate::ir::AtomicRmwOp::Umin || op == crate::ir::AtomicRmwOp::Umax
1910                        {
1911                            AtomicOP::extract(dst, offset, dst.to_reg(), ty)
1912                        } else {
1913                            AtomicOP::extract_sext(dst, offset, dst.to_reg(), ty)
1914                        }
1915                        .iter()
1916                        .for_each(|i| i.emit(sink, emit_info, state));
1917
1918                        Inst::CondBr {
1919                            taken: CondBrTarget::Label(label_select_dst),
1920                            not_taken: CondBrTarget::Fallthrough,
1921                            kind: IntegerCompare {
1922                                kind: match op {
1923                                    crate::ir::AtomicRmwOp::Umin => IntCC::UnsignedLessThan,
1924                                    crate::ir::AtomicRmwOp::Umax => IntCC::UnsignedGreaterThan,
1925                                    crate::ir::AtomicRmwOp::Smin => IntCC::SignedLessThan,
1926                                    crate::ir::AtomicRmwOp::Smax => IntCC::SignedGreaterThan,
1927                                    _ => unreachable!(),
1928                                },
1929                                rs1: dst.to_reg(),
1930                                rs2: x,
1931                            },
1932                        }
1933                        .emit(sink, emit_info, state);
1934                        // here we select x.
1935                        Inst::gen_move(t0, x, I64).emit(sink, emit_info, state);
1936                        Inst::gen_jump(label_select_done).emit(sink, emit_info, state);
1937                        sink.bind_label(label_select_dst, &mut state.ctrl_plane);
1938                        Inst::gen_move(t0, dst.to_reg(), I64).emit(sink, emit_info, state);
1939                        sink.bind_label(label_select_done, &mut state.ctrl_plane);
1940                        Inst::Atomic {
1941                            op: AtomicOP::load_op(ty),
1942                            rd: writable_spilltmp_reg2(),
1943                            addr: p,
1944                            src: zero_reg(),
1945                            amo: AMO::SeqCst,
1946                        }
1947                        .emit(sink, emit_info, state);
1948                        AtomicOP::merge(
1949                            writable_spilltmp_reg2(),
1950                            writable_spilltmp_reg(),
1951                            offset,
1952                            t0.to_reg(),
1953                            ty,
1954                        )
1955                        .iter()
1956                        .for_each(|i| i.emit(sink, emit_info, state));
1957                        spilltmp_reg2()
1958                    }
1959                    crate::ir::AtomicRmwOp::Xchg => {
1960                        AtomicOP::extract(dst, offset, dst.to_reg(), ty)
1961                            .iter()
1962                            .for_each(|i| i.emit(sink, emit_info, state));
1963                        Inst::Atomic {
1964                            op: AtomicOP::load_op(ty),
1965                            rd: writable_spilltmp_reg2(),
1966                            addr: p,
1967                            src: zero_reg(),
1968                            amo: AMO::SeqCst,
1969                        }
1970                        .emit(sink, emit_info, state);
1971                        AtomicOP::merge(
1972                            writable_spilltmp_reg2(),
1973                            writable_spilltmp_reg(),
1974                            offset,
1975                            x,
1976                            ty,
1977                        )
1978                        .iter()
1979                        .for_each(|i| i.emit(sink, emit_info, state));
1980                        spilltmp_reg2()
1981                    }
1982                };
1983
1984                Inst::Atomic {
1985                    op: AtomicOP::store_op(ty),
1986                    rd: t0,
1987                    addr: p,
1988                    src: store_value,
1989                    amo: AMO::SeqCst,
1990                }
1991                .emit(sink, emit_info, state);
1992
1993                // if store is not ok,retry.
1994                Inst::CondBr {
1995                    taken: CondBrTarget::Label(retry),
1996                    not_taken: CondBrTarget::Fallthrough,
1997                    kind: IntegerCompare {
1998                        kind: IntCC::NotEqual,
1999                        rs1: t0.to_reg(),
2000                        rs2: zero_reg(),
2001                    },
2002                }
2003                .emit(sink, emit_info, state);
2004            }
2005
2006            &Inst::LoadExtNameGot { rd, ref name } => {
2007                // Load a PC-relative address into a register.
2008                // RISC-V does this slightly differently from other arches. We emit a relocation
2009                // with a label, instead of the symbol itself.
2010                //
2011                // See: https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-elf.adoc#pc-relative-symbol-addresses
2012                //
2013                // Emit the following code:
2014                // label:
2015                //   auipc rd, 0              # R_RISCV_GOT_HI20 (symbol_name)
2016                //   ld    rd, rd, 0          # R_RISCV_PCREL_LO12_I (label)
2017
2018                // Create the label that is going to be published to the final binary object.
2019                let auipc_label = sink.get_label();
2020                sink.bind_label(auipc_label, &mut state.ctrl_plane);
2021
2022                // Get the current PC.
2023                sink.add_reloc(Reloc::RiscvGotHi20, &**name, 0);
2024                Inst::Auipc {
2025                    rd,
2026                    imm: Imm20::from_i32(0),
2027                }
2028                .emit_uncompressed(sink, emit_info, state, start_off);
2029
2030                // The `ld` here, points to the `auipc` label instead of directly to the symbol.
2031                sink.add_reloc(Reloc::RiscvPCRelLo12I, &auipc_label, 0);
2032                Inst::Load {
2033                    rd,
2034                    op: LoadOP::Ld,
2035                    flags: MemFlags::trusted(),
2036                    from: AMode::RegOffset(rd.to_reg(), 0),
2037                }
2038                .emit_uncompressed(sink, emit_info, state, start_off);
2039            }
2040
2041            &Inst::LoadExtNameFar {
2042                rd,
2043                ref name,
2044                offset,
2045            } => {
2046                // In the non PIC sequence we relocate the absolute address into
2047                // a preallocated space, load it into a register and jump over
2048                // it.
2049                //
2050                // Emit the following code:
2051                //   ld rd, label_data
2052                //   j label_end
2053                // label_data:
2054                //   <8 byte space>           # ABS8
2055                // label_end:
2056
2057                let label_data = sink.get_label();
2058                let label_end = sink.get_label();
2059
2060                // Load the value from a label
2061                Inst::Load {
2062                    rd,
2063                    op: LoadOP::Ld,
2064                    flags: MemFlags::trusted(),
2065                    from: AMode::Label(label_data),
2066                }
2067                .emit(sink, emit_info, state);
2068
2069                // Jump over the data
2070                Inst::gen_jump(label_end).emit(sink, emit_info, state);
2071
2072                sink.bind_label(label_data, &mut state.ctrl_plane);
2073                sink.add_reloc(Reloc::Abs8, name.as_ref(), offset);
2074                sink.put8(0);
2075
2076                sink.bind_label(label_end, &mut state.ctrl_plane);
2077            }
2078
2079            &Inst::LoadExtNameNear {
2080                rd,
2081                ref name,
2082                offset,
2083            } => {
2084                // Emit the following code:
2085                // label:
2086                //   auipc rd, 0              # R_RISCV_PCREL_HI20 (symbol_name)
2087                //   ld    rd, rd, 0          # R_RISCV_PCREL_LO12_I (label)
2088
2089                let auipc_label = sink.get_label();
2090                sink.bind_label(auipc_label, &mut state.ctrl_plane);
2091
2092                // Get the current PC.
2093                sink.add_reloc(Reloc::RiscvPCRelHi20, &**name, offset);
2094                Inst::Auipc {
2095                    rd,
2096                    imm: Imm20::from_i32(0),
2097                }
2098                .emit_uncompressed(sink, emit_info, state, start_off);
2099
2100                sink.add_reloc(Reloc::RiscvPCRelLo12I, &auipc_label, 0);
2101                Inst::AluRRImm12 {
2102                    alu_op: AluOPRRI::Addi,
2103                    rd,
2104                    rs: rd.to_reg(),
2105                    imm12: Imm12::ZERO,
2106                }
2107                .emit_uncompressed(sink, emit_info, state, start_off);
2108            }
2109
2110            &Inst::LabelAddress { dst, label } => {
2111                let offset = sink.cur_offset();
2112                Inst::Auipc {
2113                    rd: dst,
2114                    imm: Imm20::from_i32(0),
2115                }
2116                .emit_uncompressed(sink, emit_info, state, start_off);
2117                sink.use_label_at_offset(offset, label, LabelUse::PCRelHi20);
2118
2119                let offset = sink.cur_offset();
2120                Inst::AluRRImm12 {
2121                    alu_op: AluOPRRI::Addi,
2122                    rd: dst,
2123                    rs: dst.to_reg(),
2124                    imm12: Imm12::ZERO,
2125                }
2126                .emit_uncompressed(sink, emit_info, state, start_off);
2127                sink.use_label_at_offset(offset, label, LabelUse::PCRelLo12I);
2128            }
2129
2130            &Inst::ElfTlsGetAddr { rd, ref name } => {
2131                // RISC-V's TLS GD model is slightly different from other arches.
2132                //
2133                // We have a relocation (R_RISCV_TLS_GD_HI20) that loads the high 20 bits
2134                // of the address relative to the GOT entry. This relocation points to
2135                // the symbol as usual.
2136                //
2137                // However when loading the bottom 12bits of the address, we need to
2138                // use a label that points to the previous AUIPC instruction.
2139                //
2140                // label:
2141                //    auipc a0,0                    # R_RISCV_TLS_GD_HI20 (symbol)
2142                //    addi  a0,a0,0                 # R_RISCV_PCREL_LO12_I (label)
2143                //
2144                // https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-elf.adoc#global-dynamic
2145
2146                // Create the label that is going to be published to the final binary object.
2147                let auipc_label = sink.get_label();
2148                sink.bind_label(auipc_label, &mut state.ctrl_plane);
2149
2150                // Get the current PC.
2151                sink.add_reloc(Reloc::RiscvTlsGdHi20, &**name, 0);
2152                Inst::Auipc {
2153                    rd,
2154                    imm: Imm20::from_i32(0),
2155                }
2156                .emit_uncompressed(sink, emit_info, state, start_off);
2157
2158                // The `addi` here, points to the `auipc` label instead of directly to the symbol.
2159                sink.add_reloc(Reloc::RiscvPCRelLo12I, &auipc_label, 0);
2160                Inst::AluRRImm12 {
2161                    alu_op: AluOPRRI::Addi,
2162                    rd,
2163                    rs: rd.to_reg(),
2164                    imm12: Imm12::from_i16(0),
2165                }
2166                .emit_uncompressed(sink, emit_info, state, start_off);
2167
2168                Inst::Call {
2169                    info: Box::new(CallInfo::empty(
2170                        ExternalName::LibCall(LibCall::ElfTlsGetAddr),
2171                        CallConv::SystemV,
2172                    )),
2173                }
2174                .emit_uncompressed(sink, emit_info, state, start_off);
2175            }
2176
2177            &Inst::TrapIf {
2178                rs1,
2179                rs2,
2180                cc,
2181                trap_code,
2182            } => {
2183                let label_end = sink.get_label();
2184                let cond = IntegerCompare { kind: cc, rs1, rs2 };
2185
2186                // Jump over the trap if we the condition is false.
2187                Inst::CondBr {
2188                    taken: CondBrTarget::Label(label_end),
2189                    not_taken: CondBrTarget::Fallthrough,
2190                    kind: cond.inverse(),
2191                }
2192                .emit(sink, emit_info, state);
2193                Inst::Udf { trap_code }.emit(sink, emit_info, state);
2194
2195                sink.bind_label(label_end, &mut state.ctrl_plane);
2196            }
2197            &Inst::Udf { trap_code } => {
2198                sink.add_trap(trap_code);
2199                sink.put_data(Inst::TRAP_OPCODE);
2200            }
2201            &Inst::AtomicLoad { rd, ty, p } => {
2202                // emit the fence.
2203                Inst::Fence {
2204                    pred: Inst::FENCE_REQ_R | Inst::FENCE_REQ_W,
2205                    succ: Inst::FENCE_REQ_R | Inst::FENCE_REQ_W,
2206                }
2207                .emit(sink, emit_info, state);
2208                // load.
2209                Inst::Load {
2210                    rd,
2211                    op: LoadOP::from_type(ty),
2212                    flags: MemFlags::new(),
2213                    from: AMode::RegOffset(p, 0),
2214                }
2215                .emit(sink, emit_info, state);
2216                Inst::Fence {
2217                    pred: Inst::FENCE_REQ_R,
2218                    succ: Inst::FENCE_REQ_R | Inst::FENCE_REQ_W,
2219                }
2220                .emit(sink, emit_info, state);
2221            }
2222            &Inst::AtomicStore { src, ty, p } => {
2223                Inst::Fence {
2224                    pred: Inst::FENCE_REQ_R | Inst::FENCE_REQ_W,
2225                    succ: Inst::FENCE_REQ_W,
2226                }
2227                .emit(sink, emit_info, state);
2228                Inst::Store {
2229                    to: AMode::RegOffset(p, 0),
2230                    op: StoreOP::from_type(ty),
2231                    flags: MemFlags::new(),
2232                    src,
2233                }
2234                .emit(sink, emit_info, state);
2235            }
2236
2237            &Inst::Popcnt {
2238                sum,
2239                tmp,
2240                step,
2241                rs,
2242                ty,
2243            } => {
2244                // load 0 to sum , init.
2245                Inst::gen_move(sum, zero_reg(), I64).emit(sink, emit_info, state);
2246                // load
2247                Inst::load_imm12(step, Imm12::from_i16(ty.bits() as i16))
2248                    .emit(sink, emit_info, state);
2249                //
2250                Inst::load_imm12(tmp, Imm12::ONE).emit(sink, emit_info, state);
2251                Inst::AluRRImm12 {
2252                    alu_op: AluOPRRI::Slli,
2253                    rd: tmp,
2254                    rs: tmp.to_reg(),
2255                    imm12: Imm12::from_i16((ty.bits() - 1) as i16),
2256                }
2257                .emit(sink, emit_info, state);
2258                let label_done = sink.get_label();
2259                let label_loop = sink.get_label();
2260                sink.bind_label(label_loop, &mut state.ctrl_plane);
2261                Inst::CondBr {
2262                    taken: CondBrTarget::Label(label_done),
2263                    not_taken: CondBrTarget::Fallthrough,
2264                    kind: IntegerCompare {
2265                        kind: IntCC::SignedLessThanOrEqual,
2266                        rs1: step.to_reg(),
2267                        rs2: zero_reg(),
2268                    },
2269                }
2270                .emit(sink, emit_info, state);
2271                // test and add sum.
2272                {
2273                    Inst::AluRRR {
2274                        alu_op: AluOPRRR::And,
2275                        rd: writable_spilltmp_reg2(),
2276                        rs1: tmp.to_reg(),
2277                        rs2: rs,
2278                    }
2279                    .emit(sink, emit_info, state);
2280                    let label_over = sink.get_label();
2281                    Inst::CondBr {
2282                        taken: CondBrTarget::Label(label_over),
2283                        not_taken: CondBrTarget::Fallthrough,
2284                        kind: IntegerCompare {
2285                            kind: IntCC::Equal,
2286                            rs1: zero_reg(),
2287                            rs2: spilltmp_reg2(),
2288                        },
2289                    }
2290                    .emit(sink, emit_info, state);
2291                    Inst::AluRRImm12 {
2292                        alu_op: AluOPRRI::Addi,
2293                        rd: sum,
2294                        rs: sum.to_reg(),
2295                        imm12: Imm12::ONE,
2296                    }
2297                    .emit(sink, emit_info, state);
2298                    sink.bind_label(label_over, &mut state.ctrl_plane);
2299                }
2300                // set step and tmp.
2301                {
2302                    Inst::AluRRImm12 {
2303                        alu_op: AluOPRRI::Addi,
2304                        rd: step,
2305                        rs: step.to_reg(),
2306                        imm12: Imm12::from_i16(-1),
2307                    }
2308                    .emit(sink, emit_info, state);
2309                    Inst::AluRRImm12 {
2310                        alu_op: AluOPRRI::Srli,
2311                        rd: tmp,
2312                        rs: tmp.to_reg(),
2313                        imm12: Imm12::ONE,
2314                    }
2315                    .emit(sink, emit_info, state);
2316                    Inst::gen_jump(label_loop).emit(sink, emit_info, state);
2317                }
2318                sink.bind_label(label_done, &mut state.ctrl_plane);
2319            }
2320            &Inst::Cltz {
2321                sum,
2322                tmp,
2323                step,
2324                rs,
2325                leading,
2326                ty,
2327            } => {
2328                // load 0 to sum , init.
2329                Inst::gen_move(sum, zero_reg(), I64).emit(sink, emit_info, state);
2330                // load
2331                Inst::load_imm12(step, Imm12::from_i16(ty.bits() as i16))
2332                    .emit(sink, emit_info, state);
2333                //
2334                Inst::load_imm12(tmp, Imm12::ONE).emit(sink, emit_info, state);
2335                if leading {
2336                    Inst::AluRRImm12 {
2337                        alu_op: AluOPRRI::Slli,
2338                        rd: tmp,
2339                        rs: tmp.to_reg(),
2340                        imm12: Imm12::from_i16((ty.bits() - 1) as i16),
2341                    }
2342                    .emit(sink, emit_info, state);
2343                }
2344                let label_done = sink.get_label();
2345                let label_loop = sink.get_label();
2346                sink.bind_label(label_loop, &mut state.ctrl_plane);
2347                Inst::CondBr {
2348                    taken: CondBrTarget::Label(label_done),
2349                    not_taken: CondBrTarget::Fallthrough,
2350                    kind: IntegerCompare {
2351                        kind: IntCC::SignedLessThanOrEqual,
2352                        rs1: step.to_reg(),
2353                        rs2: zero_reg(),
2354                    },
2355                }
2356                .emit(sink, emit_info, state);
2357                // test and add sum.
2358                {
2359                    Inst::AluRRR {
2360                        alu_op: AluOPRRR::And,
2361                        rd: writable_spilltmp_reg2(),
2362                        rs1: tmp.to_reg(),
2363                        rs2: rs,
2364                    }
2365                    .emit(sink, emit_info, state);
2366                    Inst::CondBr {
2367                        taken: CondBrTarget::Label(label_done),
2368                        not_taken: CondBrTarget::Fallthrough,
2369                        kind: IntegerCompare {
2370                            kind: IntCC::NotEqual,
2371                            rs1: zero_reg(),
2372                            rs2: spilltmp_reg2(),
2373                        },
2374                    }
2375                    .emit(sink, emit_info, state);
2376                    Inst::AluRRImm12 {
2377                        alu_op: AluOPRRI::Addi,
2378                        rd: sum,
2379                        rs: sum.to_reg(),
2380                        imm12: Imm12::ONE,
2381                    }
2382                    .emit(sink, emit_info, state);
2383                }
2384                // set step and tmp.
2385                {
2386                    Inst::AluRRImm12 {
2387                        alu_op: AluOPRRI::Addi,
2388                        rd: step,
2389                        rs: step.to_reg(),
2390                        imm12: Imm12::from_i16(-1),
2391                    }
2392                    .emit(sink, emit_info, state);
2393                    Inst::AluRRImm12 {
2394                        alu_op: if leading {
2395                            AluOPRRI::Srli
2396                        } else {
2397                            AluOPRRI::Slli
2398                        },
2399                        rd: tmp,
2400                        rs: tmp.to_reg(),
2401                        imm12: Imm12::ONE,
2402                    }
2403                    .emit(sink, emit_info, state);
2404                    Inst::gen_jump(label_loop).emit(sink, emit_info, state);
2405                }
2406                sink.bind_label(label_done, &mut state.ctrl_plane);
2407            }
2408            &Inst::Brev8 {
2409                rs,
2410                ty,
2411                step,
2412                tmp,
2413                tmp2,
2414                rd,
2415            } => {
2416                Inst::gen_move(rd, zero_reg(), I64).emit(sink, emit_info, state);
2417                Inst::load_imm12(step, Imm12::from_i16(ty.bits() as i16))
2418                    .emit(sink, emit_info, state);
2419                //
2420                Inst::load_imm12(tmp, Imm12::ONE).emit(sink, emit_info, state);
2421                Inst::AluRRImm12 {
2422                    alu_op: AluOPRRI::Slli,
2423                    rd: tmp,
2424                    rs: tmp.to_reg(),
2425                    imm12: Imm12::from_i16((ty.bits() - 1) as i16),
2426                }
2427                .emit(sink, emit_info, state);
2428                Inst::load_imm12(tmp2, Imm12::ONE).emit(sink, emit_info, state);
2429                Inst::AluRRImm12 {
2430                    alu_op: AluOPRRI::Slli,
2431                    rd: tmp2,
2432                    rs: tmp2.to_reg(),
2433                    imm12: Imm12::from_i16((ty.bits() - 8) as i16),
2434                }
2435                .emit(sink, emit_info, state);
2436
2437                let label_done = sink.get_label();
2438                let label_loop = sink.get_label();
2439                sink.bind_label(label_loop, &mut state.ctrl_plane);
2440                Inst::CondBr {
2441                    taken: CondBrTarget::Label(label_done),
2442                    not_taken: CondBrTarget::Fallthrough,
2443                    kind: IntegerCompare {
2444                        kind: IntCC::SignedLessThanOrEqual,
2445                        rs1: step.to_reg(),
2446                        rs2: zero_reg(),
2447                    },
2448                }
2449                .emit(sink, emit_info, state);
2450                // test and set bit.
2451                {
2452                    Inst::AluRRR {
2453                        alu_op: AluOPRRR::And,
2454                        rd: writable_spilltmp_reg2(),
2455                        rs1: tmp.to_reg(),
2456                        rs2: rs,
2457                    }
2458                    .emit(sink, emit_info, state);
2459                    let label_over = sink.get_label();
2460                    Inst::CondBr {
2461                        taken: CondBrTarget::Label(label_over),
2462                        not_taken: CondBrTarget::Fallthrough,
2463                        kind: IntegerCompare {
2464                            kind: IntCC::Equal,
2465                            rs1: zero_reg(),
2466                            rs2: spilltmp_reg2(),
2467                        },
2468                    }
2469                    .emit(sink, emit_info, state);
2470                    Inst::AluRRR {
2471                        alu_op: AluOPRRR::Or,
2472                        rd,
2473                        rs1: rd.to_reg(),
2474                        rs2: tmp2.to_reg(),
2475                    }
2476                    .emit(sink, emit_info, state);
2477                    sink.bind_label(label_over, &mut state.ctrl_plane);
2478                }
2479                // set step and tmp.
2480                {
2481                    Inst::AluRRImm12 {
2482                        alu_op: AluOPRRI::Addi,
2483                        rd: step,
2484                        rs: step.to_reg(),
2485                        imm12: Imm12::from_i16(-1),
2486                    }
2487                    .emit(sink, emit_info, state);
2488                    Inst::AluRRImm12 {
2489                        alu_op: AluOPRRI::Srli,
2490                        rd: tmp,
2491                        rs: tmp.to_reg(),
2492                        imm12: Imm12::ONE,
2493                    }
2494                    .emit(sink, emit_info, state);
2495                    {
2496                        // reset tmp2
2497                        // if (step %=8 == 0) then tmp2 = tmp2 >> 15
2498                        // if (step %=8 != 0) then tmp2 = tmp2 << 1
2499                        let label_over = sink.get_label();
2500                        let label_sll_1 = sink.get_label();
2501                        Inst::load_imm12(writable_spilltmp_reg2(), Imm12::from_i16(8))
2502                            .emit(sink, emit_info, state);
2503                        Inst::AluRRR {
2504                            alu_op: AluOPRRR::Rem,
2505                            rd: writable_spilltmp_reg2(),
2506                            rs1: step.to_reg(),
2507                            rs2: spilltmp_reg2(),
2508                        }
2509                        .emit(sink, emit_info, state);
2510                        Inst::CondBr {
2511                            taken: CondBrTarget::Label(label_sll_1),
2512                            not_taken: CondBrTarget::Fallthrough,
2513                            kind: IntegerCompare {
2514                                kind: IntCC::NotEqual,
2515                                rs1: spilltmp_reg2(),
2516                                rs2: zero_reg(),
2517                            },
2518                        }
2519                        .emit(sink, emit_info, state);
2520                        Inst::AluRRImm12 {
2521                            alu_op: AluOPRRI::Srli,
2522                            rd: tmp2,
2523                            rs: tmp2.to_reg(),
2524                            imm12: Imm12::from_i16(15),
2525                        }
2526                        .emit(sink, emit_info, state);
2527                        Inst::gen_jump(label_over).emit(sink, emit_info, state);
2528                        sink.bind_label(label_sll_1, &mut state.ctrl_plane);
2529                        Inst::AluRRImm12 {
2530                            alu_op: AluOPRRI::Slli,
2531                            rd: tmp2,
2532                            rs: tmp2.to_reg(),
2533                            imm12: Imm12::ONE,
2534                        }
2535                        .emit(sink, emit_info, state);
2536                        sink.bind_label(label_over, &mut state.ctrl_plane);
2537                    }
2538                    Inst::gen_jump(label_loop).emit(sink, emit_info, state);
2539                }
2540                sink.bind_label(label_done, &mut state.ctrl_plane);
2541            }
2542            &Inst::StackProbeLoop {
2543                guard_size,
2544                probe_count,
2545                tmp: guard_size_tmp,
2546            } => {
2547                let step = writable_spilltmp_reg();
2548                Inst::load_constant_u64(step, (guard_size as u64) * (probe_count as u64))
2549                    .iter()
2550                    .for_each(|i| i.emit(sink, emit_info, state));
2551                Inst::load_constant_u64(guard_size_tmp, guard_size as u64)
2552                    .iter()
2553                    .for_each(|i| i.emit(sink, emit_info, state));
2554
2555                let loop_start = sink.get_label();
2556                let label_done = sink.get_label();
2557                sink.bind_label(loop_start, &mut state.ctrl_plane);
2558                Inst::CondBr {
2559                    taken: CondBrTarget::Label(label_done),
2560                    not_taken: CondBrTarget::Fallthrough,
2561                    kind: IntegerCompare {
2562                        kind: IntCC::UnsignedLessThanOrEqual,
2563                        rs1: step.to_reg(),
2564                        rs2: guard_size_tmp.to_reg(),
2565                    },
2566                }
2567                .emit(sink, emit_info, state);
2568                // compute address.
2569                Inst::AluRRR {
2570                    alu_op: AluOPRRR::Sub,
2571                    rd: writable_spilltmp_reg2(),
2572                    rs1: stack_reg(),
2573                    rs2: step.to_reg(),
2574                }
2575                .emit(sink, emit_info, state);
2576                Inst::Store {
2577                    to: AMode::RegOffset(spilltmp_reg2(), 0),
2578                    op: StoreOP::Sb,
2579                    flags: MemFlags::new(),
2580                    src: zero_reg(),
2581                }
2582                .emit(sink, emit_info, state);
2583                // reset step.
2584                Inst::AluRRR {
2585                    alu_op: AluOPRRR::Sub,
2586                    rd: step,
2587                    rs1: step.to_reg(),
2588                    rs2: guard_size_tmp.to_reg(),
2589                }
2590                .emit(sink, emit_info, state);
2591                Inst::gen_jump(loop_start).emit(sink, emit_info, state);
2592                sink.bind_label(label_done, &mut state.ctrl_plane);
2593            }
2594            &Inst::VecAluRRRImm5 {
2595                op,
2596                vd,
2597                vd_src,
2598                imm,
2599                vs2,
2600                ref mask,
2601                ..
2602            } => {
2603                debug_assert_eq!(vd.to_reg(), vd_src);
2604
2605                sink.put4(encode_valu_rrr_imm(op, vd, imm, vs2, *mask));
2606            }
2607            &Inst::VecAluRRRR {
2608                op,
2609                vd,
2610                vd_src,
2611                vs1,
2612                vs2,
2613                ref mask,
2614                ..
2615            } => {
2616                debug_assert_eq!(vd.to_reg(), vd_src);
2617
2618                sink.put4(encode_valu_rrrr(op, vd, vs2, vs1, *mask));
2619            }
2620            &Inst::VecAluRRR {
2621                op,
2622                vd,
2623                vs1,
2624                vs2,
2625                ref mask,
2626                ..
2627            } => {
2628                sink.put4(encode_valu(op, vd, vs1, vs2, *mask));
2629            }
2630            &Inst::VecAluRRImm5 {
2631                op,
2632                vd,
2633                imm,
2634                vs2,
2635                ref mask,
2636                ..
2637            } => {
2638                sink.put4(encode_valu_rr_imm(op, vd, imm, vs2, *mask));
2639            }
2640            &Inst::VecAluRR {
2641                op,
2642                vd,
2643                vs,
2644                ref mask,
2645                ..
2646            } => {
2647                sink.put4(encode_valu_rr(op, vd, vs, *mask));
2648            }
2649            &Inst::VecAluRImm5 {
2650                op,
2651                vd,
2652                imm,
2653                ref mask,
2654                ..
2655            } => {
2656                sink.put4(encode_valu_r_imm(op, vd, imm, *mask));
2657            }
2658            &Inst::VecSetState { rd, ref vstate } => {
2659                sink.put4(encode_vcfg_imm(
2660                    0x57,
2661                    rd.to_reg(),
2662                    vstate.avl.unwrap_static(),
2663                    &vstate.vtype,
2664                ));
2665
2666                // Update the current vector emit state.
2667                state.vstate = EmitVState::Known(*vstate);
2668            }
2669
2670            &Inst::VecLoad {
2671                eew,
2672                to,
2673                ref from,
2674                ref mask,
2675                flags,
2676                ..
2677            } => {
2678                // Vector Loads don't support immediate offsets, so we need to load it into a register.
2679                let addr = match from {
2680                    VecAMode::UnitStride { base } => {
2681                        let base_reg = base.get_base_register();
2682                        let offset = base.get_offset_with_state(state);
2683
2684                        // Reg+0 Offset can be directly encoded
2685                        if let (Some(base_reg), 0) = (base_reg, offset) {
2686                            base_reg
2687                        } else {
2688                            // Otherwise load the address it into a reg and load from it.
2689                            let tmp = writable_spilltmp_reg();
2690                            Inst::LoadAddr {
2691                                rd: tmp,
2692                                mem: *base,
2693                            }
2694                            .emit(sink, emit_info, state);
2695                            tmp.to_reg()
2696                        }
2697                    }
2698                };
2699
2700                if let Some(trap_code) = flags.trap_code() {
2701                    // Register the offset at which the actual load instruction starts.
2702                    sink.add_trap(trap_code);
2703                }
2704
2705                sink.put4(encode_vmem_load(
2706                    0x07,
2707                    to.to_reg(),
2708                    eew,
2709                    addr,
2710                    from.lumop(),
2711                    *mask,
2712                    from.mop(),
2713                    from.nf(),
2714                ));
2715            }
2716
2717            &Inst::VecStore {
2718                eew,
2719                ref to,
2720                from,
2721                ref mask,
2722                flags,
2723                ..
2724            } => {
2725                // Vector Stores don't support immediate offsets, so we need to load it into a register.
2726                let addr = match to {
2727                    VecAMode::UnitStride { base } => {
2728                        let base_reg = base.get_base_register();
2729                        let offset = base.get_offset_with_state(state);
2730
2731                        // Reg+0 Offset can be directly encoded
2732                        if let (Some(base_reg), 0) = (base_reg, offset) {
2733                            base_reg
2734                        } else {
2735                            // Otherwise load the address it into a reg and load from it.
2736                            let tmp = writable_spilltmp_reg();
2737                            Inst::LoadAddr {
2738                                rd: tmp,
2739                                mem: *base,
2740                            }
2741                            .emit(sink, emit_info, state);
2742                            tmp.to_reg()
2743                        }
2744                    }
2745                };
2746
2747                if let Some(trap_code) = flags.trap_code() {
2748                    // Register the offset at which the actual load instruction starts.
2749                    sink.add_trap(trap_code);
2750                }
2751
2752                sink.put4(encode_vmem_store(
2753                    0x27,
2754                    from,
2755                    eew,
2756                    addr,
2757                    to.sumop(),
2758                    *mask,
2759                    to.mop(),
2760                    to.nf(),
2761                ));
2762            }
2763
2764            Inst::EmitIsland { needed_space } => {
2765                if sink.island_needed(*needed_space) {
2766                    let jump_around_label = sink.get_label();
2767                    Inst::gen_jump(jump_around_label).emit(sink, emit_info, state);
2768                    sink.emit_island(needed_space + 4, &mut state.ctrl_plane);
2769                    sink.bind_label(jump_around_label, &mut state.ctrl_plane);
2770                }
2771            }
2772        }
2773    }
2774}
2775
2776fn emit_return_call_common_sequence<T>(
2777    sink: &mut MachBuffer<Inst>,
2778    emit_info: &EmitInfo,
2779    state: &mut EmitState,
2780    info: &ReturnCallInfo<T>,
2781) {
2782    // The return call sequence can potentially emit a lot of instructions (up to 634 bytes!)
2783    // So lets emit an island here if we need it.
2784    //
2785    // It is difficult to calculate exactly how many instructions are going to be emitted, so
2786    // we calculate it by emitting it into a disposable buffer, and then checking how many instructions
2787    // were actually emitted.
2788    let mut buffer = MachBuffer::new();
2789    let mut fake_emit_state = state.clone();
2790
2791    return_call_emit_impl(&mut buffer, emit_info, &mut fake_emit_state, info);
2792
2793    // Finalize the buffer and get the number of bytes emitted.
2794    let buffer = buffer.finish(&Default::default(), &mut Default::default());
2795    let length = buffer.data().len() as u32;
2796
2797    // And now emit the island inline with this instruction.
2798    if sink.island_needed(length) {
2799        let jump_around_label = sink.get_label();
2800        Inst::gen_jump(jump_around_label).emit(sink, emit_info, state);
2801        sink.emit_island(length + 4, &mut state.ctrl_plane);
2802        sink.bind_label(jump_around_label, &mut state.ctrl_plane);
2803    }
2804
2805    // Now that we're done, emit the *actual* return sequence.
2806    return_call_emit_impl(sink, emit_info, state, info);
2807}
2808
2809/// This should not be called directly, Instead prefer to call [emit_return_call_common_sequence].
2810fn return_call_emit_impl<T>(
2811    sink: &mut MachBuffer<Inst>,
2812    emit_info: &EmitInfo,
2813    state: &mut EmitState,
2814    info: &ReturnCallInfo<T>,
2815) {
2816    let sp_to_fp_offset = {
2817        let frame_layout = state.frame_layout();
2818        i64::from(
2819            frame_layout.clobber_size
2820                + frame_layout.fixed_frame_storage_size
2821                + frame_layout.outgoing_args_size,
2822        )
2823    };
2824
2825    let mut clobber_offset = sp_to_fp_offset - 8;
2826    for reg in state.frame_layout().clobbered_callee_saves.clone() {
2827        let rreg = reg.to_reg();
2828        let ty = match rreg.class() {
2829            RegClass::Int => I64,
2830            RegClass::Float => F64,
2831            RegClass::Vector => unimplemented!("Vector Clobber Restores"),
2832        };
2833
2834        Inst::gen_load(
2835            reg.map(Reg::from),
2836            AMode::SPOffset(clobber_offset),
2837            ty,
2838            MemFlags::trusted(),
2839        )
2840        .emit(sink, emit_info, state);
2841
2842        clobber_offset -= 8
2843    }
2844
2845    // Restore the link register and frame pointer
2846    let setup_area_size = i64::from(state.frame_layout().setup_area_size);
2847    if setup_area_size > 0 {
2848        Inst::gen_load(
2849            writable_link_reg(),
2850            AMode::SPOffset(sp_to_fp_offset + 8),
2851            I64,
2852            MemFlags::trusted(),
2853        )
2854        .emit(sink, emit_info, state);
2855
2856        Inst::gen_load(
2857            writable_fp_reg(),
2858            AMode::SPOffset(sp_to_fp_offset),
2859            I64,
2860            MemFlags::trusted(),
2861        )
2862        .emit(sink, emit_info, state);
2863    }
2864
2865    // If we over-allocated the incoming args area in the prologue, resize down to what the callee
2866    // is expecting.
2867    let incoming_args_diff =
2868        i64::from(state.frame_layout().tail_args_size - info.new_stack_arg_size);
2869
2870    // Increment SP all at once
2871    let sp_increment = sp_to_fp_offset + setup_area_size + incoming_args_diff;
2872    if sp_increment > 0 {
2873        for inst in Riscv64MachineDeps::gen_sp_reg_adjust(i32::try_from(sp_increment).unwrap()) {
2874            inst.emit(sink, emit_info, state);
2875        }
2876    }
2877}