cranelift_codegen/isa/riscv64/inst/
emit.rs

1//! Riscv64 ISA: binary code emission.
2
3use crate::ir::{self, LibCall, TrapCode};
4use crate::isa::riscv64::inst::*;
5use crate::isa::riscv64::lower::isle::generated_code::{
6    CaOp, CbOp, CiOp, CiwOp, ClOp, CrOp, CsOp, CssOp, CsznOp, FpuOPWidth, ZcbMemOp,
7};
8use cranelift_control::ControlPlane;
9
10pub struct EmitInfo {
11    #[expect(dead_code, reason = "may want to be used in the future")]
12    shared_flag: settings::Flags,
13    isa_flags: super::super::riscv_settings::Flags,
14}
15
16impl EmitInfo {
17    pub(crate) fn new(
18        shared_flag: settings::Flags,
19        isa_flags: super::super::riscv_settings::Flags,
20    ) -> Self {
21        Self {
22            shared_flag,
23            isa_flags,
24        }
25    }
26}
27
28pub(crate) fn reg_to_gpr_num(m: Reg) -> u32 {
29    u32::from(m.to_real_reg().unwrap().hw_enc() & 31)
30}
31
32pub(crate) fn reg_to_compressed_gpr_num(m: Reg) -> u32 {
33    let real_reg = m.to_real_reg().unwrap().hw_enc();
34    debug_assert!(real_reg >= 8 && real_reg < 16);
35    let compressed_reg = real_reg - 8;
36    u32::from(compressed_reg)
37}
38
39#[derive(Clone, Debug, PartialEq, Default)]
40pub enum EmitVState {
41    #[default]
42    Unknown,
43    Known(VState),
44}
45
46/// State carried between emissions of a sequence of instructions.
47#[derive(Default, Clone, Debug)]
48pub struct EmitState {
49    /// The user stack map for the upcoming instruction, as provided to
50    /// `pre_safepoint()`.
51    user_stack_map: Option<ir::UserStackMap>,
52
53    /// Only used during fuzz-testing. Otherwise, it is a zero-sized struct and
54    /// optimized away at compiletime. See [cranelift_control].
55    ctrl_plane: ControlPlane,
56
57    /// Vector State
58    /// Controls the current state of the vector unit at the emission point.
59    vstate: EmitVState,
60
61    frame_layout: FrameLayout,
62}
63
64impl EmitState {
65    fn take_stack_map(&mut self) -> Option<ir::UserStackMap> {
66        self.user_stack_map.take()
67    }
68
69    fn clobber_vstate(&mut self) {
70        self.vstate = EmitVState::Unknown;
71    }
72}
73
74impl MachInstEmitState<Inst> for EmitState {
75    fn new(
76        abi: &Callee<crate::isa::riscv64::abi::Riscv64MachineDeps>,
77        ctrl_plane: ControlPlane,
78    ) -> Self {
79        EmitState {
80            user_stack_map: None,
81            ctrl_plane,
82            vstate: EmitVState::Unknown,
83            frame_layout: abi.frame_layout().clone(),
84        }
85    }
86
87    fn pre_safepoint(&mut self, user_stack_map: Option<ir::UserStackMap>) {
88        self.user_stack_map = user_stack_map;
89    }
90
91    fn ctrl_plane_mut(&mut self) -> &mut ControlPlane {
92        &mut self.ctrl_plane
93    }
94
95    fn take_ctrl_plane(self) -> ControlPlane {
96        self.ctrl_plane
97    }
98
99    fn on_new_block(&mut self) {
100        // Reset the vector state.
101        self.clobber_vstate();
102    }
103
104    fn frame_layout(&self) -> &FrameLayout {
105        &self.frame_layout
106    }
107}
108
109impl Inst {
110    /// Load int mask.
111    /// If ty is int then 0xff in rd.
112    pub(crate) fn load_int_mask(rd: Writable<Reg>, ty: Type) -> SmallInstVec<Inst> {
113        let mut insts = SmallInstVec::new();
114        assert!(ty.is_int() && ty.bits() <= 64);
115        match ty {
116            I64 => {
117                insts.push(Inst::load_imm12(rd, Imm12::from_i16(-1)));
118            }
119            I32 | I16 => {
120                insts.push(Inst::load_imm12(rd, Imm12::from_i16(-1)));
121                insts.push(Inst::Extend {
122                    rd,
123                    rn: rd.to_reg(),
124                    signed: false,
125                    from_bits: ty.bits() as u8,
126                    to_bits: 64,
127                });
128            }
129            I8 => {
130                insts.push(Inst::load_imm12(rd, Imm12::from_i16(255)));
131            }
132            _ => unreachable!("ty:{:?}", ty),
133        }
134        insts
135    }
136    ///  inverse all bit
137    pub(crate) fn construct_bit_not(rd: Writable<Reg>, rs: Reg) -> Inst {
138        Inst::AluRRImm12 {
139            alu_op: AluOPRRI::Xori,
140            rd,
141            rs,
142            imm12: Imm12::from_i16(-1),
143        }
144    }
145
146    /// Returns Some(VState) if this instruction is expecting a specific vector state
147    /// before emission.
148    fn expected_vstate(&self) -> Option<&VState> {
149        match self {
150            Inst::Nop0
151            | Inst::Nop4
152            | Inst::BrTable { .. }
153            | Inst::Auipc { .. }
154            | Inst::Fli { .. }
155            | Inst::Lui { .. }
156            | Inst::LoadInlineConst { .. }
157            | Inst::AluRRR { .. }
158            | Inst::FpuRRR { .. }
159            | Inst::AluRRImm12 { .. }
160            | Inst::CsrReg { .. }
161            | Inst::CsrImm { .. }
162            | Inst::Load { .. }
163            | Inst::Store { .. }
164            | Inst::Args { .. }
165            | Inst::Rets { .. }
166            | Inst::Ret { .. }
167            | Inst::Extend { .. }
168            | Inst::Call { .. }
169            | Inst::CallInd { .. }
170            | Inst::ReturnCall { .. }
171            | Inst::ReturnCallInd { .. }
172            | Inst::Jal { .. }
173            | Inst::CondBr { .. }
174            | Inst::LoadExtNameGot { .. }
175            | Inst::LoadExtNameNear { .. }
176            | Inst::LoadExtNameFar { .. }
177            | Inst::ElfTlsGetAddr { .. }
178            | Inst::LoadAddr { .. }
179            | Inst::Mov { .. }
180            | Inst::MovFromPReg { .. }
181            | Inst::Fence { .. }
182            | Inst::EBreak
183            | Inst::Udf { .. }
184            | Inst::FpuRR { .. }
185            | Inst::FpuRRRR { .. }
186            | Inst::Jalr { .. }
187            | Inst::Atomic { .. }
188            | Inst::Select { .. }
189            | Inst::AtomicCas { .. }
190            | Inst::RawData { .. }
191            | Inst::AtomicStore { .. }
192            | Inst::AtomicLoad { .. }
193            | Inst::AtomicRmwLoop { .. }
194            | Inst::TrapIf { .. }
195            | Inst::Unwind { .. }
196            | Inst::DummyUse { .. }
197            | Inst::LabelAddress { .. }
198            | Inst::SequencePoint { .. }
199            | Inst::Popcnt { .. }
200            | Inst::Cltz { .. }
201            | Inst::Brev8 { .. }
202            | Inst::StackProbeLoop { .. } => None,
203
204            // VecSetState does not expect any vstate, rather it updates it.
205            Inst::VecSetState { .. } => None,
206
207            // `vmv` instructions copy a set of registers and ignore vstate.
208            Inst::VecAluRRImm5 { op: VecAluOpRRImm5::VmvrV, .. } => None,
209
210            Inst::VecAluRR { vstate, .. } |
211            Inst::VecAluRRR { vstate, .. } |
212            Inst::VecAluRRRR { vstate, .. } |
213            Inst::VecAluRImm5 { vstate, .. } |
214            Inst::VecAluRRImm5 { vstate, .. } |
215            Inst::VecAluRRRImm5 { vstate, .. } |
216            // TODO: Unit-stride loads and stores only need the AVL to be correct, not
217            // the full vtype. A future optimization could be to decouple these two when
218            // updating vstate. This would allow us to avoid emitting a VecSetState in
219            // some cases.
220            Inst::VecLoad { vstate, .. }
221            | Inst::VecStore { vstate, .. } => Some(vstate),
222            Inst::EmitIsland { .. } => None,
223        }
224    }
225}
226
227impl MachInstEmit for Inst {
228    type State = EmitState;
229    type Info = EmitInfo;
230
231    fn emit(&self, sink: &mut MachBuffer<Inst>, emit_info: &Self::Info, state: &mut EmitState) {
232        // Check if we need to update the vector state before emitting this instruction
233        if let Some(expected) = self.expected_vstate() {
234            if state.vstate != EmitVState::Known(*expected) {
235                // Update the vector state.
236                Inst::VecSetState {
237                    rd: writable_zero_reg(),
238                    vstate: *expected,
239                }
240                .emit(sink, emit_info, state);
241            }
242        }
243
244        // N.B.: we *must* not exceed the "worst-case size" used to compute
245        // where to insert islands, except when islands are explicitly triggered
246        // (with an `EmitIsland`). We check this in debug builds. This is `mut`
247        // to allow disabling the check for `JTSequence`, which is always
248        // emitted following an `EmitIsland`.
249        let mut start_off = sink.cur_offset();
250
251        // First try to emit this as a compressed instruction
252        let res = self.try_emit_compressed(sink, emit_info, state, &mut start_off);
253        if res.is_none() {
254            // If we can't lets emit it as a normal instruction
255            self.emit_uncompressed(sink, emit_info, state, &mut start_off);
256        }
257
258        // We exclude br_table, call, return_call and try_call from
259        // these checks since they emit their own islands, and thus
260        // are allowed to exceed the worst case size.
261        let emits_own_island = match self {
262            Inst::BrTable { .. }
263            | Inst::ReturnCall { .. }
264            | Inst::ReturnCallInd { .. }
265            | Inst::Call { .. }
266            | Inst::CallInd { .. }
267            | Inst::EmitIsland { .. } => true,
268            _ => false,
269        };
270        if !emits_own_island {
271            let end_off = sink.cur_offset();
272            assert!(
273                (end_off - start_off) <= Inst::worst_case_size(),
274                "Inst:{:?} length:{} worst_case_size:{}",
275                self,
276                end_off - start_off,
277                Inst::worst_case_size()
278            );
279        }
280    }
281
282    fn pretty_print_inst(&self, state: &mut Self::State) -> String {
283        self.print_with_state(state)
284    }
285}
286
287impl Inst {
288    /// Tries to emit an instruction as compressed, if we can't return false.
289    fn try_emit_compressed(
290        &self,
291        sink: &mut MachBuffer<Inst>,
292        emit_info: &EmitInfo,
293        state: &mut EmitState,
294        start_off: &mut u32,
295    ) -> Option<()> {
296        let has_m = emit_info.isa_flags.has_m();
297        let has_zba = emit_info.isa_flags.has_zba();
298        let has_zbb = emit_info.isa_flags.has_zbb();
299        let has_zca = emit_info.isa_flags.has_zca();
300        let has_zcb = emit_info.isa_flags.has_zcb();
301        let has_zcd = emit_info.isa_flags.has_zcd();
302
303        // Currently all compressed extensions (Zcb, Zcd, Zcmp, Zcmt, etc..) require Zca
304        // to be enabled, so check it early.
305        if !has_zca {
306            return None;
307        }
308
309        fn reg_is_compressible(r: Reg) -> bool {
310            r.to_real_reg()
311                .map(|r| r.hw_enc() >= 8 && r.hw_enc() < 16)
312                .unwrap_or(false)
313        }
314
315        match *self {
316            // C.ADD
317            Inst::AluRRR {
318                alu_op: AluOPRRR::Add,
319                rd,
320                rs1,
321                rs2,
322            } if (rd.to_reg() == rs1 || rd.to_reg() == rs2)
323                && rs1 != zero_reg()
324                && rs2 != zero_reg() =>
325            {
326                // Technically `c.add rd, rs` expands to `add rd, rd, rs`, but we can
327                // also swap rs1 with rs2 and we get an equivalent instruction. i.e we
328                // can also compress `add rd, rs, rd` into `c.add rd, rs`.
329                let src = if rd.to_reg() == rs1 { rs2 } else { rs1 };
330
331                sink.put2(encode_cr_type(CrOp::CAdd, rd, src));
332            }
333
334            // C.MV
335            Inst::AluRRImm12 {
336                alu_op: AluOPRRI::Addi | AluOPRRI::Ori,
337                rd,
338                rs,
339                imm12,
340            } if rd.to_reg() != rs
341                && rd.to_reg() != zero_reg()
342                && rs != zero_reg()
343                && imm12.as_i16() == 0 =>
344            {
345                sink.put2(encode_cr_type(CrOp::CMv, rd, rs));
346            }
347
348            // CA Ops
349            Inst::AluRRR {
350                alu_op:
351                    alu_op @ (AluOPRRR::And
352                    | AluOPRRR::Or
353                    | AluOPRRR::Xor
354                    | AluOPRRR::Addw
355                    | AluOPRRR::Mul),
356                rd,
357                rs1,
358                rs2,
359            } if (rd.to_reg() == rs1 || rd.to_reg() == rs2)
360                && reg_is_compressible(rs1)
361                && reg_is_compressible(rs2) =>
362            {
363                let op = match alu_op {
364                    AluOPRRR::And => CaOp::CAnd,
365                    AluOPRRR::Or => CaOp::COr,
366                    AluOPRRR::Xor => CaOp::CXor,
367                    AluOPRRR::Addw => CaOp::CAddw,
368                    AluOPRRR::Mul if has_zcb && has_m => CaOp::CMul,
369                    _ => return None,
370                };
371                // The canonical expansion for these instruction has `rd == rs1`, but
372                // these are all commutative operations, so we can swap the operands.
373                let src = if rd.to_reg() == rs1 { rs2 } else { rs1 };
374
375                sink.put2(encode_ca_type(op, rd, src));
376            }
377
378            // The sub instructions are non commutative, so we can't swap the operands.
379            Inst::AluRRR {
380                alu_op: alu_op @ (AluOPRRR::Sub | AluOPRRR::Subw),
381                rd,
382                rs1,
383                rs2,
384            } if rd.to_reg() == rs1 && reg_is_compressible(rs1) && reg_is_compressible(rs2) => {
385                let op = match alu_op {
386                    AluOPRRR::Sub => CaOp::CSub,
387                    AluOPRRR::Subw => CaOp::CSubw,
388                    _ => return None,
389                };
390                sink.put2(encode_ca_type(op, rd, rs2));
391            }
392
393            // c.j
394            //
395            // We don't have a separate JAL as that is only available in RV32C
396            Inst::Jal { label } => {
397                sink.use_label_at_offset(*start_off, label, LabelUse::RVCJump);
398                sink.add_uncond_branch(*start_off, *start_off + 2, label);
399                sink.put2(encode_cj_type(CjOp::CJ, Imm12::ZERO));
400            }
401
402            // c.jr
403            Inst::Jalr { rd, base, offset }
404                if rd.to_reg() == zero_reg() && base != zero_reg() && offset.as_i16() == 0 =>
405            {
406                sink.put2(encode_cr2_type(CrOp::CJr, base));
407                state.clobber_vstate();
408            }
409
410            // c.jalr
411            Inst::Jalr { rd, base, offset }
412                if rd.to_reg() == link_reg() && base != zero_reg() && offset.as_i16() == 0 =>
413            {
414                sink.put2(encode_cr2_type(CrOp::CJalr, base));
415                state.clobber_vstate();
416            }
417
418            // c.ebreak
419            Inst::EBreak => {
420                sink.put2(encode_cr_type(
421                    CrOp::CEbreak,
422                    writable_zero_reg(),
423                    zero_reg(),
424                ));
425            }
426
427            // c.unimp
428            Inst::Udf { trap_code } => {
429                sink.add_trap(trap_code);
430                sink.put2(0x0000);
431            }
432            // c.addi16sp
433            //
434            // c.addi16sp shares the opcode with c.lui, but has a destination field of x2.
435            // c.addi16sp adds the non-zero sign-extended 6-bit immediate to the value in the stack pointer (sp=x2),
436            // where the immediate is scaled to represent multiples of 16 in the range (-512,496). c.addi16sp is used
437            // to adjust the stack pointer in procedure prologues and epilogues. It expands into addi x2, x2, nzimm. c.addi16sp
438            // is only valid when nzimm≠0; the code point with nzimm=0 is reserved.
439            Inst::AluRRImm12 {
440                alu_op: AluOPRRI::Addi,
441                rd,
442                rs,
443                imm12,
444            } if rd.to_reg() == rs
445                && rs == stack_reg()
446                && imm12.as_i16() != 0
447                && (imm12.as_i16() % 16) == 0
448                && Imm6::maybe_from_i16(imm12.as_i16() / 16).is_some() =>
449            {
450                let imm6 = Imm6::maybe_from_i16(imm12.as_i16() / 16).unwrap();
451                sink.put2(encode_c_addi16sp(imm6));
452            }
453
454            // c.addi4spn
455            //
456            // c.addi4spn is a CIW-format instruction that adds a zero-extended non-zero
457            // immediate, scaled by 4, to the stack pointer, x2, and writes the result to
458            // rd. This instruction is used to generate pointers to stack-allocated variables
459            // and expands to addi rd, x2, nzuimm. c.addi4spn is only valid when nzuimm≠0;
460            // the code points with nzuimm=0 are reserved.
461            Inst::AluRRImm12 {
462                alu_op: AluOPRRI::Addi,
463                rd,
464                rs,
465                imm12,
466            } if reg_is_compressible(rd.to_reg())
467                && rs == stack_reg()
468                && imm12.as_i16() != 0
469                && (imm12.as_i16() % 4) == 0
470                && u8::try_from(imm12.as_i16() / 4).is_ok() =>
471            {
472                let imm = u8::try_from(imm12.as_i16() / 4).unwrap();
473                sink.put2(encode_ciw_type(CiwOp::CAddi4spn, rd, imm));
474            }
475
476            // c.li
477            Inst::AluRRImm12 {
478                alu_op: AluOPRRI::Addi,
479                rd,
480                rs,
481                imm12,
482            } if rd.to_reg() != zero_reg() && rs == zero_reg() => {
483                let imm6 = Imm6::maybe_from_imm12(imm12)?;
484                sink.put2(encode_ci_type(CiOp::CLi, rd, imm6));
485            }
486
487            // c.addi
488            Inst::AluRRImm12 {
489                alu_op: AluOPRRI::Addi,
490                rd,
491                rs,
492                imm12,
493            } if rd.to_reg() == rs && rs != zero_reg() && imm12.as_i16() != 0 => {
494                let imm6 = Imm6::maybe_from_imm12(imm12)?;
495                sink.put2(encode_ci_type(CiOp::CAddi, rd, imm6));
496            }
497
498            // c.addiw
499            Inst::AluRRImm12 {
500                alu_op: AluOPRRI::Addiw,
501                rd,
502                rs,
503                imm12,
504            } if rd.to_reg() == rs && rs != zero_reg() => {
505                let imm6 = Imm6::maybe_from_imm12(imm12)?;
506                sink.put2(encode_ci_type(CiOp::CAddiw, rd, imm6));
507            }
508
509            // c.lui
510            //
511            // c.lui loads the non-zero 6-bit immediate field into bits 17–12
512            // of the destination register, clears the bottom 12 bits, and
513            // sign-extends bit 17 into all higher bits of the destination.
514            Inst::Lui { rd, imm: imm20 }
515                if rd.to_reg() != zero_reg()
516                    && rd.to_reg() != stack_reg()
517                    && imm20.as_i32() != 0 =>
518            {
519                // Check that the top bits are sign extended
520                let imm = imm20.as_i32() << 14 >> 14;
521                if imm != imm20.as_i32() {
522                    return None;
523                }
524                let imm6 = Imm6::maybe_from_i32(imm)?;
525                sink.put2(encode_ci_type(CiOp::CLui, rd, imm6));
526            }
527
528            // c.slli
529            Inst::AluRRImm12 {
530                alu_op: AluOPRRI::Slli,
531                rd,
532                rs,
533                imm12,
534            } if rd.to_reg() == rs && rs != zero_reg() && imm12.as_i16() != 0 => {
535                // The shift amount is unsigned, but we encode it as signed.
536                let shift = imm12.as_i16() & 0x3f;
537                let imm6 = Imm6::maybe_from_i16(shift << 10 >> 10).unwrap();
538                sink.put2(encode_ci_type(CiOp::CSlli, rd, imm6));
539            }
540
541            // c.srli / c.srai
542            Inst::AluRRImm12 {
543                alu_op: op @ (AluOPRRI::Srli | AluOPRRI::Srai),
544                rd,
545                rs,
546                imm12,
547            } if rd.to_reg() == rs && reg_is_compressible(rs) && imm12.as_i16() != 0 => {
548                let op = match op {
549                    AluOPRRI::Srli => CbOp::CSrli,
550                    AluOPRRI::Srai => CbOp::CSrai,
551                    _ => unreachable!(),
552                };
553
554                // The shift amount is unsigned, but we encode it as signed.
555                let shift = imm12.as_i16() & 0x3f;
556                let imm6 = Imm6::maybe_from_i16(shift << 10 >> 10).unwrap();
557                sink.put2(encode_cb_type(op, rd, imm6));
558            }
559
560            // c.zextb
561            //
562            // This is an alias for `andi rd, rd, 0xff`
563            Inst::AluRRImm12 {
564                alu_op: AluOPRRI::Andi,
565                rd,
566                rs,
567                imm12,
568            } if has_zcb
569                && rd.to_reg() == rs
570                && reg_is_compressible(rs)
571                && imm12.as_i16() == 0xff =>
572            {
573                sink.put2(encode_cszn_type(CsznOp::CZextb, rd));
574            }
575
576            // c.andi
577            Inst::AluRRImm12 {
578                alu_op: AluOPRRI::Andi,
579                rd,
580                rs,
581                imm12,
582            } if rd.to_reg() == rs && reg_is_compressible(rs) => {
583                let imm6 = Imm6::maybe_from_imm12(imm12)?;
584                sink.put2(encode_cb_type(CbOp::CAndi, rd, imm6));
585            }
586
587            // Stack Based Loads
588            Inst::Load {
589                rd,
590                op: op @ (LoadOP::Lw | LoadOP::Ld | LoadOP::Fld),
591                from,
592                flags,
593            } if from.get_base_register() == Some(stack_reg())
594                && (from.get_offset_with_state(state) % op.size()) == 0 =>
595            {
596                // We encode the offset in multiples of the load size.
597                let offset = from.get_offset_with_state(state);
598                let imm6 = u8::try_from(offset / op.size())
599                    .ok()
600                    .and_then(Uimm6::maybe_from_u8)?;
601
602                // Some additional constraints on these instructions.
603                //
604                // Integer loads are not allowed to target x0, but floating point loads
605                // are, since f0 is not a special register.
606                //
607                // Floating point loads are not included in the base Zca extension
608                // but in a separate Zcd extension. Both of these are part of the C Extension.
609                let rd_is_zero = rd.to_reg() == zero_reg();
610                let op = match op {
611                    LoadOP::Lw if !rd_is_zero => CiOp::CLwsp,
612                    LoadOP::Ld if !rd_is_zero => CiOp::CLdsp,
613                    LoadOP::Fld if has_zcd => CiOp::CFldsp,
614                    _ => return None,
615                };
616
617                if let Some(trap_code) = flags.trap_code() {
618                    // Register the offset at which the actual load instruction starts.
619                    sink.add_trap(trap_code);
620                }
621                sink.put2(encode_ci_sp_load(op, rd, imm6));
622            }
623
624            // Regular Loads
625            Inst::Load {
626                rd,
627                op:
628                    op
629                    @ (LoadOP::Lw | LoadOP::Ld | LoadOP::Fld | LoadOP::Lbu | LoadOP::Lhu | LoadOP::Lh),
630                from,
631                flags,
632            } if reg_is_compressible(rd.to_reg())
633                && from
634                    .get_base_register()
635                    .map(reg_is_compressible)
636                    .unwrap_or(false)
637                && (from.get_offset_with_state(state) % op.size()) == 0 =>
638            {
639                let base = from.get_base_register().unwrap();
640
641                // We encode the offset in multiples of the store size.
642                let offset = from.get_offset_with_state(state);
643                let offset = u8::try_from(offset / op.size()).ok()?;
644
645                // We mix two different formats here.
646                //
647                // c.lw / c.ld / c.fld instructions are available in the standard Zca
648                // extension using the CL format.
649                //
650                // c.lbu / c.lhu / c.lh are only available in the Zcb extension and
651                // are also encoded differently. Technically they each have a different
652                // format, but they are similar enough that we can group them.
653                let is_zcb_load = matches!(op, LoadOP::Lbu | LoadOP::Lhu | LoadOP::Lh);
654                let encoded = if is_zcb_load {
655                    if !has_zcb {
656                        return None;
657                    }
658
659                    let op = match op {
660                        LoadOP::Lbu => ZcbMemOp::CLbu,
661                        LoadOP::Lhu => ZcbMemOp::CLhu,
662                        LoadOP::Lh => ZcbMemOp::CLh,
663                        _ => unreachable!(),
664                    };
665
666                    // Byte stores & loads have 2 bits of immediate offset. Halfword stores
667                    // and loads only have 1 bit.
668                    let imm2 = Uimm2::maybe_from_u8(offset)?;
669                    if (offset & !((1 << op.imm_bits()) - 1)) != 0 {
670                        return None;
671                    }
672
673                    encode_zcbmem_load(op, rd, base, imm2)
674                } else {
675                    // Floating point loads are not included in the base Zca extension
676                    // but in a separate Zcd extension. Both of these are part of the C Extension.
677                    let op = match op {
678                        LoadOP::Lw => ClOp::CLw,
679                        LoadOP::Ld => ClOp::CLd,
680                        LoadOP::Fld if has_zcd => ClOp::CFld,
681                        _ => return None,
682                    };
683                    let imm5 = Uimm5::maybe_from_u8(offset)?;
684
685                    encode_cl_type(op, rd, base, imm5)
686                };
687
688                if let Some(trap_code) = flags.trap_code() {
689                    // Register the offset at which the actual load instruction starts.
690                    sink.add_trap(trap_code);
691                }
692                sink.put2(encoded);
693            }
694
695            // Stack Based Stores
696            Inst::Store {
697                src,
698                op: op @ (StoreOP::Sw | StoreOP::Sd | StoreOP::Fsd),
699                to,
700                flags,
701            } if to.get_base_register() == Some(stack_reg())
702                && (to.get_offset_with_state(state) % op.size()) == 0 =>
703            {
704                // We encode the offset in multiples of the store size.
705                let offset = to.get_offset_with_state(state);
706                let imm6 = u8::try_from(offset / op.size())
707                    .ok()
708                    .and_then(Uimm6::maybe_from_u8)?;
709
710                // Floating point stores are not included in the base Zca extension
711                // but in a separate Zcd extension. Both of these are part of the C Extension.
712                let op = match op {
713                    StoreOP::Sw => CssOp::CSwsp,
714                    StoreOP::Sd => CssOp::CSdsp,
715                    StoreOP::Fsd if has_zcd => CssOp::CFsdsp,
716                    _ => return None,
717                };
718
719                if let Some(trap_code) = flags.trap_code() {
720                    // Register the offset at which the actual load instruction starts.
721                    sink.add_trap(trap_code);
722                }
723                sink.put2(encode_css_type(op, src, imm6));
724            }
725
726            // Regular Stores
727            Inst::Store {
728                src,
729                op: op @ (StoreOP::Sw | StoreOP::Sd | StoreOP::Fsd | StoreOP::Sh | StoreOP::Sb),
730                to,
731                flags,
732            } if reg_is_compressible(src)
733                && to
734                    .get_base_register()
735                    .map(reg_is_compressible)
736                    .unwrap_or(false)
737                && (to.get_offset_with_state(state) % op.size()) == 0 =>
738            {
739                let base = to.get_base_register().unwrap();
740
741                // We encode the offset in multiples of the store size.
742                let offset = to.get_offset_with_state(state);
743                let offset = u8::try_from(offset / op.size()).ok()?;
744
745                // We mix two different formats here.
746                //
747                // c.sw / c.sd / c.fsd instructions are available in the standard Zca
748                // extension using the CL format.
749                //
750                // c.sb / c.sh are only available in the Zcb extension and are also
751                // encoded differently.
752                let is_zcb_store = matches!(op, StoreOP::Sh | StoreOP::Sb);
753                let encoded = if is_zcb_store {
754                    if !has_zcb {
755                        return None;
756                    }
757
758                    let op = match op {
759                        StoreOP::Sh => ZcbMemOp::CSh,
760                        StoreOP::Sb => ZcbMemOp::CSb,
761                        _ => unreachable!(),
762                    };
763
764                    // Byte stores & loads have 2 bits of immediate offset. Halfword stores
765                    // and loads only have 1 bit.
766                    let imm2 = Uimm2::maybe_from_u8(offset)?;
767                    if (offset & !((1 << op.imm_bits()) - 1)) != 0 {
768                        return None;
769                    }
770
771                    encode_zcbmem_store(op, src, base, imm2)
772                } else {
773                    // Floating point stores are not included in the base Zca extension
774                    // but in a separate Zcd extension. Both of these are part of the C Extension.
775                    let op = match op {
776                        StoreOP::Sw => CsOp::CSw,
777                        StoreOP::Sd => CsOp::CSd,
778                        StoreOP::Fsd if has_zcd => CsOp::CFsd,
779                        _ => return None,
780                    };
781                    let imm5 = Uimm5::maybe_from_u8(offset)?;
782
783                    encode_cs_type(op, src, base, imm5)
784                };
785
786                if let Some(trap_code) = flags.trap_code() {
787                    // Register the offset at which the actual load instruction starts.
788                    sink.add_trap(trap_code);
789                }
790                sink.put2(encoded);
791            }
792
793            // c.not
794            //
795            // This is an alias for `xori rd, rd, -1`
796            Inst::AluRRImm12 {
797                alu_op: AluOPRRI::Xori,
798                rd,
799                rs,
800                imm12,
801            } if has_zcb
802                && rd.to_reg() == rs
803                && reg_is_compressible(rs)
804                && imm12.as_i16() == -1 =>
805            {
806                sink.put2(encode_cszn_type(CsznOp::CNot, rd));
807            }
808
809            // c.sext.b / c.sext.h / c.zext.h
810            //
811            // These are all the extend instructions present in `Zcb`, they
812            // also require `Zbb` since they aren't available in the base ISA.
813            Inst::AluRRImm12 {
814                alu_op: alu_op @ (AluOPRRI::Sextb | AluOPRRI::Sexth | AluOPRRI::Zexth),
815                rd,
816                rs,
817                imm12,
818            } if has_zcb
819                && has_zbb
820                && rd.to_reg() == rs
821                && reg_is_compressible(rs)
822                && imm12.as_i16() == 0 =>
823            {
824                let op = match alu_op {
825                    AluOPRRI::Sextb => CsznOp::CSextb,
826                    AluOPRRI::Sexth => CsznOp::CSexth,
827                    AluOPRRI::Zexth => CsznOp::CZexth,
828                    _ => unreachable!(),
829                };
830                sink.put2(encode_cszn_type(op, rd));
831            }
832
833            // c.zext.w
834            //
835            // This is an alias for `add.uw rd, rd, zero`
836            Inst::AluRRR {
837                alu_op: AluOPRRR::Adduw,
838                rd,
839                rs1,
840                rs2,
841            } if has_zcb
842                && has_zba
843                && rd.to_reg() == rs1
844                && reg_is_compressible(rs1)
845                && rs2 == zero_reg() =>
846            {
847                sink.put2(encode_cszn_type(CsznOp::CZextw, rd));
848            }
849
850            _ => return None,
851        }
852
853        return Some(());
854    }
855
856    fn emit_uncompressed(
857        &self,
858        sink: &mut MachBuffer<Inst>,
859        emit_info: &EmitInfo,
860        state: &mut EmitState,
861        start_off: &mut u32,
862    ) {
863        match self {
864            &Inst::Nop0 => {
865                // do nothing
866            }
867            // Addi x0, x0, 0
868            &Inst::Nop4 => {
869                let x = Inst::AluRRImm12 {
870                    alu_op: AluOPRRI::Addi,
871                    rd: Writable::from_reg(zero_reg()),
872                    rs: zero_reg(),
873                    imm12: Imm12::ZERO,
874                };
875                x.emit(sink, emit_info, state)
876            }
877            &Inst::RawData { ref data } => {
878                // Right now we only put a u32 or u64 in this instruction.
879                // It is not very long, no need to check if need `emit_island`.
880                // If data is very long , this is a bug because RawData is typically
881                // use to load some data and rely on some position in the code stream.
882                // and we may exceed `Inst::worst_case_size`.
883                // for more information see https://github.com/bytecodealliance/wasmtime/pull/5612.
884                sink.put_data(&data[..]);
885            }
886            &Inst::Lui { rd, ref imm } => {
887                let x: u32 = 0b0110111 | reg_to_gpr_num(rd.to_reg()) << 7 | (imm.bits() << 12);
888                sink.put4(x);
889            }
890            &Inst::Fli { rd, width, imm } => {
891                sink.put4(encode_fli(width, imm, rd));
892            }
893            &Inst::LoadInlineConst { rd, ty, imm } => {
894                let data = &imm.to_le_bytes()[..ty.bytes() as usize];
895
896                let label_data: MachLabel = sink.get_label();
897                let label_end: MachLabel = sink.get_label();
898
899                // Load into rd
900                Inst::Load {
901                    rd,
902                    op: LoadOP::from_type(ty),
903                    flags: MemFlags::new(),
904                    from: AMode::Label(label_data),
905                }
906                .emit(sink, emit_info, state);
907
908                // Jump over the inline pool
909                Inst::gen_jump(label_end).emit(sink, emit_info, state);
910
911                // Emit the inline data
912                sink.bind_label(label_data, &mut state.ctrl_plane);
913                Inst::RawData { data: data.into() }.emit(sink, emit_info, state);
914
915                sink.bind_label(label_end, &mut state.ctrl_plane);
916            }
917            &Inst::FpuRR {
918                alu_op,
919                width,
920                frm,
921                rd,
922                rs,
923            } => {
924                if alu_op.is_convert_to_int() {
925                    sink.add_trap(TrapCode::BAD_CONVERSION_TO_INTEGER);
926                }
927                sink.put4(encode_fp_rr(alu_op, width, frm, rd, rs));
928            }
929            &Inst::FpuRRRR {
930                alu_op,
931                rd,
932                rs1,
933                rs2,
934                rs3,
935                frm,
936                width,
937            } => {
938                sink.put4(encode_fp_rrrr(alu_op, width, frm, rd, rs1, rs2, rs3));
939            }
940            &Inst::FpuRRR {
941                alu_op,
942                width,
943                frm,
944                rd,
945                rs1,
946                rs2,
947            } => {
948                sink.put4(encode_fp_rrr(alu_op, width, frm, rd, rs1, rs2));
949            }
950            &Inst::Unwind { ref inst } => {
951                sink.add_unwind(inst.clone());
952            }
953            &Inst::DummyUse { .. } => {
954                // This has already been handled by Inst::allocate.
955            }
956            &Inst::AluRRR {
957                alu_op,
958                rd,
959                rs1,
960                rs2,
961            } => {
962                let (rs1, rs2) = if alu_op.reverse_rs() {
963                    (rs2, rs1)
964                } else {
965                    (rs1, rs2)
966                };
967
968                sink.put4(encode_r_type(
969                    alu_op.op_code(),
970                    rd,
971                    alu_op.funct3(),
972                    rs1,
973                    rs2,
974                    alu_op.funct7(),
975                ));
976            }
977            &Inst::AluRRImm12 {
978                alu_op,
979                rd,
980                rs,
981                imm12,
982            } => {
983                let x = alu_op.op_code()
984                    | reg_to_gpr_num(rd.to_reg()) << 7
985                    | alu_op.funct3() << 12
986                    | reg_to_gpr_num(rs) << 15
987                    | alu_op.imm12(imm12) << 20;
988                sink.put4(x);
989            }
990            &Inst::CsrReg { op, rd, rs, csr } => {
991                sink.put4(encode_csr_reg(op, rd, rs, csr));
992            }
993            &Inst::CsrImm { op, rd, csr, imm } => {
994                sink.put4(encode_csr_imm(op, rd, csr, imm));
995            }
996            &Inst::Load {
997                rd,
998                op: LoadOP::Flh,
999                from,
1000                flags,
1001            } if !emit_info.isa_flags.has_zfhmin() => {
1002                // flh unavailable, use an integer load instead
1003                Inst::Load {
1004                    rd: writable_spilltmp_reg(),
1005                    op: LoadOP::Lh,
1006                    flags,
1007                    from,
1008                }
1009                .emit(sink, emit_info, state);
1010                // NaN-box the `f16` before loading it into the floating-point
1011                // register with a 32-bit `fmv`.
1012                Inst::Lui {
1013                    rd: writable_spilltmp_reg2(),
1014                    imm: Imm20::from_i32((0xffff_0000_u32 as i32) >> 12),
1015                }
1016                .emit(sink, emit_info, state);
1017                Inst::AluRRR {
1018                    alu_op: AluOPRRR::Or,
1019                    rd: writable_spilltmp_reg(),
1020                    rs1: spilltmp_reg(),
1021                    rs2: spilltmp_reg2(),
1022                }
1023                .emit(sink, emit_info, state);
1024                Inst::FpuRR {
1025                    alu_op: FpuOPRR::FmvFmtX,
1026                    width: FpuOPWidth::S,
1027                    frm: FRM::RNE,
1028                    rd,
1029                    rs: spilltmp_reg(),
1030                }
1031                .emit(sink, emit_info, state);
1032            }
1033            &Inst::Load {
1034                rd,
1035                op,
1036                from,
1037                flags,
1038            } => {
1039                let base = from.get_base_register();
1040                let offset = from.get_offset_with_state(state);
1041                let offset_imm12 = Imm12::maybe_from_i64(offset);
1042                let label = from.get_label_with_sink(sink);
1043
1044                let (addr, imm12) = match (base, offset_imm12, label) {
1045                    // When loading from a Reg+Offset, if the offset fits into an imm12 we can directly encode it.
1046                    (Some(base), Some(imm12), None) => (base, imm12),
1047
1048                    // Otherwise, if the offset does not fit into a imm12, we need to materialize it into a
1049                    // register and load from that.
1050                    (Some(_), None, None) => {
1051                        let tmp = writable_spilltmp_reg();
1052                        Inst::LoadAddr { rd: tmp, mem: from }.emit(sink, emit_info, state);
1053                        (tmp.to_reg(), Imm12::ZERO)
1054                    }
1055
1056                    // If the AMode contains a label we can emit an internal relocation that gets
1057                    // resolved with the correct address later.
1058                    (None, Some(imm), Some(label)) => {
1059                        debug_assert_eq!(imm.as_i16(), 0);
1060
1061                        // Get the current PC.
1062                        sink.use_label_at_offset(sink.cur_offset(), label, LabelUse::PCRelHi20);
1063                        Inst::Auipc {
1064                            rd,
1065                            imm: Imm20::ZERO,
1066                        }
1067                        .emit_uncompressed(sink, emit_info, state, start_off);
1068
1069                        // Emit a relocation for the load. This patches the offset into the instruction.
1070                        sink.use_label_at_offset(sink.cur_offset(), label, LabelUse::PCRelLo12I);
1071
1072                        // Imm12 here is meaningless since it's going to get replaced.
1073                        (rd.to_reg(), Imm12::ZERO)
1074                    }
1075
1076                    // These cases are impossible with the current AModes that we have. We either
1077                    // always have a register, or always have a label. Never both, and never neither.
1078                    (None, None, None)
1079                    | (None, Some(_), None)
1080                    | (Some(_), None, Some(_))
1081                    | (Some(_), Some(_), Some(_))
1082                    | (None, None, Some(_)) => {
1083                        unreachable!("Invalid load address")
1084                    }
1085                };
1086
1087                if let Some(trap_code) = flags.trap_code() {
1088                    // Register the offset at which the actual load instruction starts.
1089                    sink.add_trap(trap_code);
1090                }
1091
1092                sink.put4(encode_i_type(op.op_code(), rd, op.funct3(), addr, imm12));
1093            }
1094            &Inst::Store {
1095                op: StoreOP::Fsh,
1096                src,
1097                flags,
1098                to,
1099            } if !emit_info.isa_flags.has_zfhmin() => {
1100                // fsh unavailable, use an integer store instead
1101                Inst::FpuRR {
1102                    alu_op: FpuOPRR::FmvXFmt,
1103                    width: FpuOPWidth::S,
1104                    frm: FRM::RNE,
1105                    rd: writable_spilltmp_reg(),
1106                    rs: src,
1107                }
1108                .emit(sink, emit_info, state);
1109                Inst::Store {
1110                    to,
1111                    op: StoreOP::Sh,
1112                    flags,
1113                    src: spilltmp_reg(),
1114                }
1115                .emit(sink, emit_info, state);
1116            }
1117            &Inst::Store { op, src, flags, to } => {
1118                let base = to.get_base_register();
1119                let offset = to.get_offset_with_state(state);
1120                let offset_imm12 = Imm12::maybe_from_i64(offset);
1121
1122                let (addr, imm12) = match (base, offset_imm12) {
1123                    // If the offset fits into an imm12 we can directly encode it.
1124                    (Some(base), Some(imm12)) => (base, imm12),
1125                    // Otherwise load the address it into a reg and load from it.
1126                    _ => {
1127                        let tmp = writable_spilltmp_reg();
1128                        Inst::LoadAddr { rd: tmp, mem: to }.emit(sink, emit_info, state);
1129                        (tmp.to_reg(), Imm12::ZERO)
1130                    }
1131                };
1132
1133                if let Some(trap_code) = flags.trap_code() {
1134                    // Register the offset at which the actual load instruction starts.
1135                    sink.add_trap(trap_code);
1136                }
1137
1138                sink.put4(encode_s_type(op.op_code(), op.funct3(), addr, src, imm12));
1139            }
1140            &Inst::Args { .. } | &Inst::Rets { .. } => {
1141                // Nothing: this is a pseudoinstruction that serves
1142                // only to constrain registers at a certain point.
1143            }
1144            &Inst::Ret {} => {
1145                // RISC-V does not have a dedicated ret instruction, instead we emit the equivalent
1146                // `jalr x0, x1, 0` that jumps to the return address.
1147                Inst::Jalr {
1148                    rd: writable_zero_reg(),
1149                    base: link_reg(),
1150                    offset: Imm12::ZERO,
1151                }
1152                .emit(sink, emit_info, state);
1153            }
1154
1155            &Inst::Extend {
1156                rd,
1157                rn,
1158                signed,
1159                from_bits,
1160                to_bits: _to_bits,
1161            } => {
1162                let mut insts = SmallInstVec::new();
1163                let shift_bits = (64 - from_bits) as i16;
1164                let is_u8 = || from_bits == 8 && signed == false;
1165                if is_u8() {
1166                    // special for u8.
1167                    insts.push(Inst::AluRRImm12 {
1168                        alu_op: AluOPRRI::Andi,
1169                        rd,
1170                        rs: rn,
1171                        imm12: Imm12::from_i16(255),
1172                    });
1173                } else {
1174                    insts.push(Inst::AluRRImm12 {
1175                        alu_op: AluOPRRI::Slli,
1176                        rd,
1177                        rs: rn,
1178                        imm12: Imm12::from_i16(shift_bits),
1179                    });
1180                    insts.push(Inst::AluRRImm12 {
1181                        alu_op: if signed {
1182                            AluOPRRI::Srai
1183                        } else {
1184                            AluOPRRI::Srli
1185                        },
1186                        rd,
1187                        rs: rd.to_reg(),
1188                        imm12: Imm12::from_i16(shift_bits),
1189                    });
1190                }
1191                insts
1192                    .into_iter()
1193                    .for_each(|i| i.emit(sink, emit_info, state));
1194            }
1195
1196            &Inst::Call { ref info } => {
1197                sink.add_reloc(Reloc::RiscvCallPlt, &info.dest, 0);
1198
1199                let start = sink.cur_offset();
1200                Inst::construct_auipc_and_jalr(Some(writable_link_reg()), writable_link_reg(), 0)
1201                    .into_iter()
1202                    .for_each(|i| i.emit_uncompressed(sink, emit_info, state, start_off));
1203
1204                if let Some(s) = state.take_stack_map() {
1205                    let offset = sink.cur_offset();
1206                    sink.push_user_stack_map(state, offset, s);
1207                }
1208
1209                if let Some(try_call) = info.try_call_info.as_ref() {
1210                    sink.add_try_call_site(
1211                        Some(state.frame_layout.sp_to_fp()),
1212                        try_call.exception_handlers(&state.frame_layout),
1213                    );
1214                } else {
1215                    sink.add_call_site();
1216                }
1217
1218                let callee_pop_size = i32::try_from(info.callee_pop_size).unwrap();
1219                if callee_pop_size > 0 {
1220                    for inst in Riscv64MachineDeps::gen_sp_reg_adjust(-callee_pop_size) {
1221                        inst.emit(sink, emit_info, state);
1222                    }
1223                }
1224
1225                if info.patchable {
1226                    sink.add_patchable_call_site(sink.cur_offset() - start);
1227                } else {
1228                    // Load any stack-carried return values.
1229                    info.emit_retval_loads::<Riscv64MachineDeps, _, _>(
1230                        state.frame_layout().stackslots_size,
1231                        |inst| inst.emit(sink, emit_info, state),
1232                        |needed_space| Some(Inst::EmitIsland { needed_space }),
1233                    );
1234                }
1235
1236                // If this is a try-call, jump to the continuation
1237                // (normal-return) block.
1238                if let Some(try_call) = info.try_call_info.as_ref() {
1239                    let jmp = Inst::Jal {
1240                        label: try_call.continuation,
1241                    };
1242                    jmp.emit(sink, emit_info, state);
1243                }
1244
1245                *start_off = sink.cur_offset();
1246            }
1247            &Inst::CallInd { ref info } => {
1248                Inst::Jalr {
1249                    rd: writable_link_reg(),
1250                    base: info.dest,
1251                    offset: Imm12::ZERO,
1252                }
1253                .emit(sink, emit_info, state);
1254
1255                if let Some(s) = state.take_stack_map() {
1256                    let offset = sink.cur_offset();
1257                    sink.push_user_stack_map(state, offset, s);
1258                }
1259
1260                if let Some(try_call) = info.try_call_info.as_ref() {
1261                    sink.add_try_call_site(
1262                        Some(state.frame_layout.sp_to_fp()),
1263                        try_call.exception_handlers(&state.frame_layout),
1264                    );
1265                } else {
1266                    sink.add_call_site();
1267                }
1268
1269                let callee_pop_size = i32::try_from(info.callee_pop_size).unwrap();
1270                if callee_pop_size > 0 {
1271                    for inst in Riscv64MachineDeps::gen_sp_reg_adjust(-callee_pop_size) {
1272                        inst.emit(sink, emit_info, state);
1273                    }
1274                }
1275
1276                // Load any stack-carried return values.
1277                info.emit_retval_loads::<Riscv64MachineDeps, _, _>(
1278                    state.frame_layout().stackslots_size,
1279                    |inst| inst.emit(sink, emit_info, state),
1280                    |needed_space| Some(Inst::EmitIsland { needed_space }),
1281                );
1282
1283                // If this is a try-call, jump to the continuation
1284                // (normal-return) block.
1285                if let Some(try_call) = info.try_call_info.as_ref() {
1286                    let jmp = Inst::Jal {
1287                        label: try_call.continuation,
1288                    };
1289                    jmp.emit(sink, emit_info, state);
1290                }
1291
1292                *start_off = sink.cur_offset();
1293            }
1294
1295            &Inst::ReturnCall { ref info } => {
1296                emit_return_call_common_sequence(sink, emit_info, state, info);
1297
1298                sink.add_call_site();
1299                sink.add_reloc(Reloc::RiscvCallPlt, &info.dest, 0);
1300                Inst::construct_auipc_and_jalr(None, writable_spilltmp_reg(), 0)
1301                    .into_iter()
1302                    .for_each(|i| i.emit_uncompressed(sink, emit_info, state, start_off));
1303            }
1304
1305            &Inst::ReturnCallInd { ref info } => {
1306                emit_return_call_common_sequence(sink, emit_info, state, &info);
1307
1308                Inst::Jalr {
1309                    rd: writable_zero_reg(),
1310                    base: info.dest,
1311                    offset: Imm12::ZERO,
1312                }
1313                .emit(sink, emit_info, state);
1314            }
1315            &Inst::Jal { label } => {
1316                sink.use_label_at_offset(*start_off, label, LabelUse::Jal20);
1317                sink.add_uncond_branch(*start_off, *start_off + 4, label);
1318                sink.put4(0b1101111);
1319                state.clobber_vstate();
1320            }
1321            &Inst::CondBr {
1322                taken,
1323                not_taken,
1324                kind,
1325            } => {
1326                match taken {
1327                    CondBrTarget::Label(label) => {
1328                        let code = kind.emit();
1329                        let code_inverse = kind.inverse().emit().to_le_bytes();
1330                        sink.use_label_at_offset(*start_off, label, LabelUse::B12);
1331                        sink.add_cond_branch(*start_off, *start_off + 4, label, &code_inverse);
1332                        sink.put4(code);
1333                    }
1334                    CondBrTarget::Fallthrough => panic!("Cannot fallthrough in taken target"),
1335                }
1336
1337                match not_taken {
1338                    CondBrTarget::Label(label) => {
1339                        Inst::gen_jump(label).emit(sink, emit_info, state)
1340                    }
1341                    CondBrTarget::Fallthrough => {}
1342                };
1343            }
1344
1345            &Inst::Mov { rd, rm, ty } => {
1346                debug_assert_eq!(rd.to_reg().class(), rm.class());
1347                if rd.to_reg() == rm {
1348                    return;
1349                }
1350
1351                match rm.class() {
1352                    RegClass::Int => Inst::AluRRImm12 {
1353                        alu_op: AluOPRRI::Addi,
1354                        rd,
1355                        rs: rm,
1356                        imm12: Imm12::ZERO,
1357                    },
1358                    RegClass::Float => Inst::FpuRRR {
1359                        alu_op: FpuOPRRR::Fsgnj,
1360                        width: FpuOPWidth::try_from(ty).unwrap(),
1361                        frm: FRM::RNE,
1362                        rd,
1363                        rs1: rm,
1364                        rs2: rm,
1365                    },
1366                    RegClass::Vector => Inst::VecAluRRImm5 {
1367                        op: VecAluOpRRImm5::VmvrV,
1368                        vd: rd,
1369                        vs2: rm,
1370                        // Imm 0 means copy 1 register.
1371                        imm: Imm5::maybe_from_i8(0).unwrap(),
1372                        mask: VecOpMasking::Disabled,
1373                        // Vstate for this instruction is ignored.
1374                        vstate: VState::from_type(ty),
1375                    },
1376                }
1377                .emit(sink, emit_info, state);
1378            }
1379
1380            &Inst::MovFromPReg { rd, rm } => {
1381                Inst::gen_move(rd, Reg::from(rm), I64).emit(sink, emit_info, state);
1382            }
1383
1384            &Inst::BrTable {
1385                index,
1386                tmp1,
1387                tmp2,
1388                ref targets,
1389            } => {
1390                let ext_index = writable_spilltmp_reg();
1391
1392                let label_compute_target = sink.get_label();
1393
1394                // The default target is passed in as the 0th element of `targets`
1395                // separate it here for clarity.
1396                let default_target = targets[0];
1397                let targets = &targets[1..];
1398
1399                // We are going to potentially emit a large amount of instructions, so ensure that we emit an island
1400                // now if we need one.
1401                //
1402                // The worse case PC calculations are 12 instructions. And each entry in the jump table is 2 instructions.
1403                // Check if we need to emit a jump table here to support that jump.
1404                let inst_count = 12 + (targets.len() * 2);
1405                let distance = (inst_count * Inst::UNCOMPRESSED_INSTRUCTION_SIZE as usize) as u32;
1406                if sink.island_needed(distance) {
1407                    let jump_around_label = sink.get_label();
1408                    Inst::gen_jump(jump_around_label).emit(sink, emit_info, state);
1409                    sink.emit_island(distance + 4, &mut state.ctrl_plane);
1410                    sink.bind_label(jump_around_label, &mut state.ctrl_plane);
1411                }
1412
1413                // We emit a bounds check on the index, if the index is larger than the number of
1414                // jump table entries, we jump to the default block.  Otherwise we compute a jump
1415                // offset by multiplying the index by 8 (the size of each entry) and then jump to
1416                // that offset. Each jump table entry is a regular auipc+jalr which we emit sequentially.
1417                //
1418                // Build the following sequence:
1419                //
1420                // extend_index:
1421                //     zext.w  ext_index, index
1422                // bounds_check:
1423                //     li      tmp, n_labels
1424                //     bltu    ext_index, tmp, compute_target
1425                // jump_to_default_block:
1426                //     auipc   pc, 0
1427                //     jalr    zero, pc, default_block
1428                // compute_target:
1429                //     auipc   pc, 0
1430                //     slli    tmp, ext_index, 3
1431                //     add     pc, pc, tmp
1432                //     jalr    zero, pc, 0x10
1433                // jump_table:
1434                //     ; This repeats for each entry in the jumptable
1435                //     auipc   pc, 0
1436                //     jalr    zero, pc, block_target
1437
1438                // Extend the index to 64 bits.
1439                //
1440                // This prevents us branching on the top 32 bits of the index, which
1441                // are undefined.
1442                Inst::Extend {
1443                    rd: ext_index,
1444                    rn: index,
1445                    signed: false,
1446                    from_bits: 32,
1447                    to_bits: 64,
1448                }
1449                .emit(sink, emit_info, state);
1450
1451                // Bounds check.
1452                //
1453                // Check if the index passed in is larger than the number of jumptable
1454                // entries that we have. If it is, we fallthrough to a jump into the
1455                // default block.
1456                Inst::load_constant_u32(tmp2, targets.len() as u64)
1457                    .iter()
1458                    .for_each(|i| i.emit(sink, emit_info, state));
1459                Inst::CondBr {
1460                    taken: CondBrTarget::Label(label_compute_target),
1461                    not_taken: CondBrTarget::Fallthrough,
1462                    kind: IntegerCompare {
1463                        kind: IntCC::UnsignedLessThan,
1464                        rs1: ext_index.to_reg(),
1465                        rs2: tmp2.to_reg(),
1466                    },
1467                }
1468                .emit(sink, emit_info, state);
1469
1470                sink.use_label_at_offset(sink.cur_offset(), default_target, LabelUse::PCRel32);
1471                Inst::construct_auipc_and_jalr(None, tmp2, 0)
1472                    .iter()
1473                    .for_each(|i| i.emit_uncompressed(sink, emit_info, state, start_off));
1474
1475                // Compute the jump table offset.
1476                // We need to emit a PC relative offset,
1477                sink.bind_label(label_compute_target, &mut state.ctrl_plane);
1478
1479                // Get the current PC.
1480                Inst::Auipc {
1481                    rd: tmp1,
1482                    imm: Imm20::ZERO,
1483                }
1484                .emit_uncompressed(sink, emit_info, state, start_off);
1485
1486                // These instructions must be emitted as uncompressed since we
1487                // are manually computing the offset from the PC.
1488
1489                // Multiply the index by 8, since that is the size in
1490                // bytes of each jump table entry
1491                Inst::AluRRImm12 {
1492                    alu_op: AluOPRRI::Slli,
1493                    rd: tmp2,
1494                    rs: ext_index.to_reg(),
1495                    imm12: Imm12::from_i16(3),
1496                }
1497                .emit_uncompressed(sink, emit_info, state, start_off);
1498
1499                // Calculate the base of the jump, PC + the offset from above.
1500                Inst::AluRRR {
1501                    alu_op: AluOPRRR::Add,
1502                    rd: tmp1,
1503                    rs1: tmp1.to_reg(),
1504                    rs2: tmp2.to_reg(),
1505                }
1506                .emit_uncompressed(sink, emit_info, state, start_off);
1507
1508                // Jump to the middle of the jump table.
1509                // We add a 16 byte offset here, since we used 4 instructions
1510                // since the AUIPC that was used to get the PC.
1511                Inst::Jalr {
1512                    rd: writable_zero_reg(),
1513                    base: tmp1.to_reg(),
1514                    offset: Imm12::from_i16((4 * Inst::UNCOMPRESSED_INSTRUCTION_SIZE) as i16),
1515                }
1516                .emit_uncompressed(sink, emit_info, state, start_off);
1517
1518                // Emit the jump table.
1519                //
1520                // Each entry is a auipc + jalr to the target block. We also start with a island
1521                // if necessary.
1522
1523                // Emit the jumps back to back
1524                for target in targets.iter() {
1525                    sink.use_label_at_offset(sink.cur_offset(), *target, LabelUse::PCRel32);
1526
1527                    Inst::construct_auipc_and_jalr(None, tmp2, 0)
1528                        .iter()
1529                        .for_each(|i| i.emit_uncompressed(sink, emit_info, state, start_off));
1530                }
1531
1532                // We've just emitted an island that is safe up to *here*.
1533                // Mark it as such so that we don't needlessly emit additional islands.
1534                *start_off = sink.cur_offset();
1535            }
1536
1537            &Inst::Atomic {
1538                op,
1539                rd,
1540                addr,
1541                src,
1542                amo,
1543            } => {
1544                // TODO: get flags from original CLIF atomic instruction
1545                let flags = MemFlags::new();
1546                if let Some(trap_code) = flags.trap_code() {
1547                    sink.add_trap(trap_code);
1548                }
1549                let x = op.op_code()
1550                    | reg_to_gpr_num(rd.to_reg()) << 7
1551                    | op.funct3() << 12
1552                    | reg_to_gpr_num(addr) << 15
1553                    | reg_to_gpr_num(src) << 20
1554                    | op.funct7(amo) << 25;
1555
1556                sink.put4(x);
1557            }
1558            &Inst::Fence { pred, succ } => {
1559                let x = 0b0001111
1560                    | 0b00000 << 7
1561                    | 0b000 << 12
1562                    | 0b00000 << 15
1563                    | (succ as u32) << 20
1564                    | (pred as u32) << 24;
1565
1566                sink.put4(x);
1567            }
1568            &Inst::Auipc { rd, imm } => {
1569                sink.put4(enc_auipc(rd, imm));
1570            }
1571
1572            &Inst::LoadAddr { rd, mem } => {
1573                let base = mem.get_base_register();
1574                let offset = mem.get_offset_with_state(state);
1575                let offset_imm12 = Imm12::maybe_from_i64(offset);
1576
1577                match (mem, base, offset_imm12) {
1578                    (_, Some(rs), Some(imm12)) => {
1579                        Inst::AluRRImm12 {
1580                            alu_op: AluOPRRI::Addi,
1581                            rd,
1582                            rs,
1583                            imm12,
1584                        }
1585                        .emit(sink, emit_info, state);
1586                    }
1587                    (_, Some(rs), None) => {
1588                        let mut insts = Inst::load_constant_u64(rd, offset as u64);
1589                        insts.push(Inst::AluRRR {
1590                            alu_op: AluOPRRR::Add,
1591                            rd,
1592                            rs1: rd.to_reg(),
1593                            rs2: rs,
1594                        });
1595                        insts
1596                            .into_iter()
1597                            .for_each(|inst| inst.emit(sink, emit_info, state));
1598                    }
1599                    (AMode::Const(addr), None, _) => {
1600                        // Get an address label for the constant and recurse.
1601                        let label = sink.get_label_for_constant(addr);
1602                        Inst::LoadAddr {
1603                            rd,
1604                            mem: AMode::Label(label),
1605                        }
1606                        .emit(sink, emit_info, state);
1607                    }
1608                    (AMode::Label(label), None, _) => {
1609                        // Get the current PC.
1610                        sink.use_label_at_offset(sink.cur_offset(), label, LabelUse::PCRelHi20);
1611                        let inst = Inst::Auipc {
1612                            rd,
1613                            imm: Imm20::ZERO,
1614                        };
1615                        inst.emit_uncompressed(sink, emit_info, state, start_off);
1616
1617                        // Emit an add to the address with a relocation.
1618                        // This later gets patched up with the correct offset.
1619                        sink.use_label_at_offset(sink.cur_offset(), label, LabelUse::PCRelLo12I);
1620                        Inst::AluRRImm12 {
1621                            alu_op: AluOPRRI::Addi,
1622                            rd,
1623                            rs: rd.to_reg(),
1624                            imm12: Imm12::ZERO,
1625                        }
1626                        .emit_uncompressed(sink, emit_info, state, start_off);
1627                    }
1628                    (amode, _, _) => {
1629                        unimplemented!("LoadAddr: {:?}", amode);
1630                    }
1631                }
1632            }
1633
1634            &Inst::Select {
1635                ref dst,
1636                condition,
1637                ref x,
1638                ref y,
1639            } => {
1640                // The general form for this select is the following:
1641                //
1642                //     mv rd, x
1643                //     b{cond} rcond, label_end
1644                //     mv rd, y
1645                // label_end:
1646                //     ... etc
1647                //
1648                // This is built on the assumption that moves are cheap, but branches and jumps
1649                // are not. So with this format we always avoid one jump instruction at the expense
1650                // of an unconditional move.
1651                //
1652                // We also perform another optimization here. If the destination register is the same
1653                // as one of the input registers, we can avoid emitting the first unconditional move
1654                // and emit just the branch and the second move.
1655                //
1656                // To make sure that this happens as often as possible, we also try to invert the
1657                // condition, so that if either of the input registers are the same as the destination
1658                // we avoid that move.
1659
1660                let label_end = sink.get_label();
1661
1662                let xregs = x.regs();
1663                let yregs = y.regs();
1664                let dstregs: Vec<Reg> = dst.regs().into_iter().map(|r| r.to_reg()).collect();
1665                let condregs = condition.regs();
1666
1667                // We are going to write to the destination register before evaluating
1668                // the condition, so we need to make sure that the destination register
1669                // is not one of the condition registers.
1670                //
1671                // This should never happen, since hopefully the regalloc constraints
1672                // for this register are set up correctly.
1673                debug_assert_ne!(dstregs, condregs);
1674
1675                // Check if we can invert the condition and avoid moving the y registers into
1676                // the destination. This allows us to only emit the branch and one of the moves.
1677                let (uncond_move, cond_move, condition) = if yregs == dstregs {
1678                    (yregs, xregs, condition.inverse())
1679                } else {
1680                    (xregs, yregs, condition)
1681                };
1682
1683                // Unconditionally move one of the values to the destination register.
1684                //
1685                // These moves may not end up being emitted if the source and
1686                // destination registers are the same. That logic is built into
1687                // the emit function for `Inst::Mov`.
1688                for i in gen_moves(dst.regs(), uncond_move) {
1689                    i.emit(sink, emit_info, state);
1690                }
1691
1692                // If the condition passes we skip over the conditional move
1693                Inst::CondBr {
1694                    taken: CondBrTarget::Label(label_end),
1695                    not_taken: CondBrTarget::Fallthrough,
1696                    kind: condition,
1697                }
1698                .emit(sink, emit_info, state);
1699
1700                // Move the conditional value to the destination register.
1701                for i in gen_moves(dst.regs(), cond_move) {
1702                    i.emit(sink, emit_info, state);
1703                }
1704
1705                sink.bind_label(label_end, &mut state.ctrl_plane);
1706            }
1707            &Inst::Jalr { rd, base, offset } => {
1708                sink.put4(enc_jalr(rd, base, offset));
1709                state.clobber_vstate();
1710            }
1711            &Inst::EBreak => {
1712                sink.put4(0x00100073);
1713            }
1714            &Inst::AtomicCas {
1715                offset,
1716                t0,
1717                dst,
1718                e,
1719                addr,
1720                v,
1721                ty,
1722            } => {
1723                //     # addr holds address of memory location
1724                //     # e holds expected value
1725                //     # v holds desired value
1726                //     # dst holds return value
1727                // cas:
1728                //     lr.w dst, (addr)       # Load original value.
1729                //     bne dst, e, fail       # Doesn’t match, so fail.
1730                //     sc.w t0, v, (addr)     # Try to update.
1731                //     bnez t0 , cas          # if store not ok,retry.
1732                // fail:
1733                let fail_label = sink.get_label();
1734                let cas_lebel = sink.get_label();
1735                sink.bind_label(cas_lebel, &mut state.ctrl_plane);
1736                Inst::Atomic {
1737                    op: AtomicOP::load_op(ty),
1738                    rd: dst,
1739                    addr,
1740                    src: zero_reg(),
1741                    amo: AMO::SeqCst,
1742                }
1743                .emit(sink, emit_info, state);
1744                if ty.bits() < 32 {
1745                    AtomicOP::extract(dst, offset, dst.to_reg(), ty)
1746                        .iter()
1747                        .for_each(|i| i.emit(sink, emit_info, state));
1748                } else if ty.bits() == 32 {
1749                    Inst::Extend {
1750                        rd: dst,
1751                        rn: dst.to_reg(),
1752                        signed: false,
1753                        from_bits: 32,
1754                        to_bits: 64,
1755                    }
1756                    .emit(sink, emit_info, state);
1757                }
1758                Inst::CondBr {
1759                    taken: CondBrTarget::Label(fail_label),
1760                    not_taken: CondBrTarget::Fallthrough,
1761                    kind: IntegerCompare {
1762                        kind: IntCC::NotEqual,
1763                        rs1: e,
1764                        rs2: dst.to_reg(),
1765                    },
1766                }
1767                .emit(sink, emit_info, state);
1768                let store_value = if ty.bits() < 32 {
1769                    // reload value to t0.
1770                    Inst::Atomic {
1771                        op: AtomicOP::load_op(ty),
1772                        rd: t0,
1773                        addr,
1774                        src: zero_reg(),
1775                        amo: AMO::SeqCst,
1776                    }
1777                    .emit(sink, emit_info, state);
1778                    // set reset part.
1779                    AtomicOP::merge(t0, writable_spilltmp_reg(), offset, v, ty)
1780                        .iter()
1781                        .for_each(|i| i.emit(sink, emit_info, state));
1782                    t0.to_reg()
1783                } else {
1784                    v
1785                };
1786                Inst::Atomic {
1787                    op: AtomicOP::store_op(ty),
1788                    rd: t0,
1789                    addr,
1790                    src: store_value,
1791                    amo: AMO::SeqCst,
1792                }
1793                .emit(sink, emit_info, state);
1794                // check is our value stored.
1795                Inst::CondBr {
1796                    taken: CondBrTarget::Label(cas_lebel),
1797                    not_taken: CondBrTarget::Fallthrough,
1798                    kind: IntegerCompare {
1799                        kind: IntCC::NotEqual,
1800                        rs1: t0.to_reg(),
1801                        rs2: zero_reg(),
1802                    },
1803                }
1804                .emit(sink, emit_info, state);
1805                sink.bind_label(fail_label, &mut state.ctrl_plane);
1806            }
1807            &Inst::AtomicRmwLoop {
1808                offset,
1809                op,
1810                dst,
1811                ty,
1812                p,
1813                x,
1814                t0,
1815            } => {
1816                let retry = sink.get_label();
1817                sink.bind_label(retry, &mut state.ctrl_plane);
1818                // load old value.
1819                Inst::Atomic {
1820                    op: AtomicOP::load_op(ty),
1821                    rd: dst,
1822                    addr: p,
1823                    src: zero_reg(),
1824                    amo: AMO::SeqCst,
1825                }
1826                .emit(sink, emit_info, state);
1827                //
1828
1829                let store_value: Reg = match op {
1830                    crate::ir::AtomicRmwOp::Add
1831                    | crate::ir::AtomicRmwOp::Sub
1832                    | crate::ir::AtomicRmwOp::And
1833                    | crate::ir::AtomicRmwOp::Or
1834                    | crate::ir::AtomicRmwOp::Xor => {
1835                        AtomicOP::extract(dst, offset, dst.to_reg(), ty)
1836                            .iter()
1837                            .for_each(|i| i.emit(sink, emit_info, state));
1838                        Inst::AluRRR {
1839                            alu_op: match op {
1840                                crate::ir::AtomicRmwOp::Add => AluOPRRR::Add,
1841                                crate::ir::AtomicRmwOp::Sub => AluOPRRR::Sub,
1842                                crate::ir::AtomicRmwOp::And => AluOPRRR::And,
1843                                crate::ir::AtomicRmwOp::Or => AluOPRRR::Or,
1844                                crate::ir::AtomicRmwOp::Xor => AluOPRRR::Xor,
1845                                _ => unreachable!(),
1846                            },
1847                            rd: t0,
1848                            rs1: dst.to_reg(),
1849                            rs2: x,
1850                        }
1851                        .emit(sink, emit_info, state);
1852                        Inst::Atomic {
1853                            op: AtomicOP::load_op(ty),
1854                            rd: writable_spilltmp_reg2(),
1855                            addr: p,
1856                            src: zero_reg(),
1857                            amo: AMO::SeqCst,
1858                        }
1859                        .emit(sink, emit_info, state);
1860                        AtomicOP::merge(
1861                            writable_spilltmp_reg2(),
1862                            writable_spilltmp_reg(),
1863                            offset,
1864                            t0.to_reg(),
1865                            ty,
1866                        )
1867                        .iter()
1868                        .for_each(|i| i.emit(sink, emit_info, state));
1869                        spilltmp_reg2()
1870                    }
1871                    crate::ir::AtomicRmwOp::Nand => {
1872                        if ty.bits() < 32 {
1873                            AtomicOP::extract(dst, offset, dst.to_reg(), ty)
1874                                .iter()
1875                                .for_each(|i| i.emit(sink, emit_info, state));
1876                        }
1877                        Inst::AluRRR {
1878                            alu_op: AluOPRRR::And,
1879                            rd: t0,
1880                            rs1: x,
1881                            rs2: dst.to_reg(),
1882                        }
1883                        .emit(sink, emit_info, state);
1884                        Inst::construct_bit_not(t0, t0.to_reg()).emit(sink, emit_info, state);
1885                        if ty.bits() < 32 {
1886                            Inst::Atomic {
1887                                op: AtomicOP::load_op(ty),
1888                                rd: writable_spilltmp_reg2(),
1889                                addr: p,
1890                                src: zero_reg(),
1891                                amo: AMO::SeqCst,
1892                            }
1893                            .emit(sink, emit_info, state);
1894                            AtomicOP::merge(
1895                                writable_spilltmp_reg2(),
1896                                writable_spilltmp_reg(),
1897                                offset,
1898                                t0.to_reg(),
1899                                ty,
1900                            )
1901                            .iter()
1902                            .for_each(|i| i.emit(sink, emit_info, state));
1903                            spilltmp_reg2()
1904                        } else {
1905                            t0.to_reg()
1906                        }
1907                    }
1908
1909                    crate::ir::AtomicRmwOp::Umin
1910                    | crate::ir::AtomicRmwOp::Umax
1911                    | crate::ir::AtomicRmwOp::Smin
1912                    | crate::ir::AtomicRmwOp::Smax => {
1913                        let label_select_dst = sink.get_label();
1914                        let label_select_done = sink.get_label();
1915                        if op == crate::ir::AtomicRmwOp::Umin || op == crate::ir::AtomicRmwOp::Umax
1916                        {
1917                            AtomicOP::extract(dst, offset, dst.to_reg(), ty)
1918                        } else {
1919                            AtomicOP::extract_sext(dst, offset, dst.to_reg(), ty)
1920                        }
1921                        .iter()
1922                        .for_each(|i| i.emit(sink, emit_info, state));
1923
1924                        Inst::CondBr {
1925                            taken: CondBrTarget::Label(label_select_dst),
1926                            not_taken: CondBrTarget::Fallthrough,
1927                            kind: IntegerCompare {
1928                                kind: match op {
1929                                    crate::ir::AtomicRmwOp::Umin => IntCC::UnsignedLessThan,
1930                                    crate::ir::AtomicRmwOp::Umax => IntCC::UnsignedGreaterThan,
1931                                    crate::ir::AtomicRmwOp::Smin => IntCC::SignedLessThan,
1932                                    crate::ir::AtomicRmwOp::Smax => IntCC::SignedGreaterThan,
1933                                    _ => unreachable!(),
1934                                },
1935                                rs1: dst.to_reg(),
1936                                rs2: x,
1937                            },
1938                        }
1939                        .emit(sink, emit_info, state);
1940                        // here we select x.
1941                        Inst::gen_move(t0, x, I64).emit(sink, emit_info, state);
1942                        Inst::gen_jump(label_select_done).emit(sink, emit_info, state);
1943                        sink.bind_label(label_select_dst, &mut state.ctrl_plane);
1944                        Inst::gen_move(t0, dst.to_reg(), I64).emit(sink, emit_info, state);
1945                        sink.bind_label(label_select_done, &mut state.ctrl_plane);
1946                        Inst::Atomic {
1947                            op: AtomicOP::load_op(ty),
1948                            rd: writable_spilltmp_reg2(),
1949                            addr: p,
1950                            src: zero_reg(),
1951                            amo: AMO::SeqCst,
1952                        }
1953                        .emit(sink, emit_info, state);
1954                        AtomicOP::merge(
1955                            writable_spilltmp_reg2(),
1956                            writable_spilltmp_reg(),
1957                            offset,
1958                            t0.to_reg(),
1959                            ty,
1960                        )
1961                        .iter()
1962                        .for_each(|i| i.emit(sink, emit_info, state));
1963                        spilltmp_reg2()
1964                    }
1965                    crate::ir::AtomicRmwOp::Xchg => {
1966                        AtomicOP::extract(dst, offset, dst.to_reg(), ty)
1967                            .iter()
1968                            .for_each(|i| i.emit(sink, emit_info, state));
1969                        Inst::Atomic {
1970                            op: AtomicOP::load_op(ty),
1971                            rd: writable_spilltmp_reg2(),
1972                            addr: p,
1973                            src: zero_reg(),
1974                            amo: AMO::SeqCst,
1975                        }
1976                        .emit(sink, emit_info, state);
1977                        AtomicOP::merge(
1978                            writable_spilltmp_reg2(),
1979                            writable_spilltmp_reg(),
1980                            offset,
1981                            x,
1982                            ty,
1983                        )
1984                        .iter()
1985                        .for_each(|i| i.emit(sink, emit_info, state));
1986                        spilltmp_reg2()
1987                    }
1988                };
1989
1990                Inst::Atomic {
1991                    op: AtomicOP::store_op(ty),
1992                    rd: t0,
1993                    addr: p,
1994                    src: store_value,
1995                    amo: AMO::SeqCst,
1996                }
1997                .emit(sink, emit_info, state);
1998
1999                // if store is not ok,retry.
2000                Inst::CondBr {
2001                    taken: CondBrTarget::Label(retry),
2002                    not_taken: CondBrTarget::Fallthrough,
2003                    kind: IntegerCompare {
2004                        kind: IntCC::NotEqual,
2005                        rs1: t0.to_reg(),
2006                        rs2: zero_reg(),
2007                    },
2008                }
2009                .emit(sink, emit_info, state);
2010            }
2011
2012            &Inst::LoadExtNameGot { rd, ref name } => {
2013                // Load a PC-relative address into a register.
2014                // RISC-V does this slightly differently from other arches. We emit a relocation
2015                // with a label, instead of the symbol itself.
2016                //
2017                // See: https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-elf.adoc#pc-relative-symbol-addresses
2018                //
2019                // Emit the following code:
2020                // label:
2021                //   auipc rd, 0              # R_RISCV_GOT_HI20 (symbol_name)
2022                //   ld    rd, rd, 0          # R_RISCV_PCREL_LO12_I (label)
2023
2024                // Create the label that is going to be published to the final binary object.
2025                let auipc_label = sink.get_label();
2026                sink.bind_label(auipc_label, &mut state.ctrl_plane);
2027
2028                // Get the current PC.
2029                sink.add_reloc(Reloc::RiscvGotHi20, &**name, 0);
2030                Inst::Auipc {
2031                    rd,
2032                    imm: Imm20::from_i32(0),
2033                }
2034                .emit_uncompressed(sink, emit_info, state, start_off);
2035
2036                // The `ld` here, points to the `auipc` label instead of directly to the symbol.
2037                sink.add_reloc(Reloc::RiscvPCRelLo12I, &auipc_label, 0);
2038                Inst::Load {
2039                    rd,
2040                    op: LoadOP::Ld,
2041                    flags: MemFlags::trusted(),
2042                    from: AMode::RegOffset(rd.to_reg(), 0),
2043                }
2044                .emit_uncompressed(sink, emit_info, state, start_off);
2045            }
2046
2047            &Inst::LoadExtNameFar {
2048                rd,
2049                ref name,
2050                offset,
2051            } => {
2052                // In the non PIC sequence we relocate the absolute address into
2053                // a preallocated space, load it into a register and jump over
2054                // it.
2055                //
2056                // Emit the following code:
2057                //   ld rd, label_data
2058                //   j label_end
2059                // label_data:
2060                //   <8 byte space>           # ABS8
2061                // label_end:
2062
2063                let label_data = sink.get_label();
2064                let label_end = sink.get_label();
2065
2066                // Load the value from a label
2067                Inst::Load {
2068                    rd,
2069                    op: LoadOP::Ld,
2070                    flags: MemFlags::trusted(),
2071                    from: AMode::Label(label_data),
2072                }
2073                .emit(sink, emit_info, state);
2074
2075                // Jump over the data
2076                Inst::gen_jump(label_end).emit(sink, emit_info, state);
2077
2078                sink.bind_label(label_data, &mut state.ctrl_plane);
2079                sink.add_reloc(Reloc::Abs8, name.as_ref(), offset);
2080                sink.put8(0);
2081
2082                sink.bind_label(label_end, &mut state.ctrl_plane);
2083            }
2084
2085            &Inst::LoadExtNameNear {
2086                rd,
2087                ref name,
2088                offset,
2089            } => {
2090                // Emit the following code:
2091                // label:
2092                //   auipc rd, 0              # R_RISCV_PCREL_HI20 (symbol_name)
2093                //   ld    rd, rd, 0          # R_RISCV_PCREL_LO12_I (label)
2094
2095                let auipc_label = sink.get_label();
2096                sink.bind_label(auipc_label, &mut state.ctrl_plane);
2097
2098                // Get the current PC.
2099                sink.add_reloc(Reloc::RiscvPCRelHi20, &**name, offset);
2100                Inst::Auipc {
2101                    rd,
2102                    imm: Imm20::from_i32(0),
2103                }
2104                .emit_uncompressed(sink, emit_info, state, start_off);
2105
2106                sink.add_reloc(Reloc::RiscvPCRelLo12I, &auipc_label, 0);
2107                Inst::AluRRImm12 {
2108                    alu_op: AluOPRRI::Addi,
2109                    rd,
2110                    rs: rd.to_reg(),
2111                    imm12: Imm12::ZERO,
2112                }
2113                .emit_uncompressed(sink, emit_info, state, start_off);
2114            }
2115
2116            &Inst::LabelAddress { dst, label } => {
2117                let offset = sink.cur_offset();
2118                Inst::Auipc {
2119                    rd: dst,
2120                    imm: Imm20::from_i32(0),
2121                }
2122                .emit_uncompressed(sink, emit_info, state, start_off);
2123                sink.use_label_at_offset(offset, label, LabelUse::PCRelHi20);
2124
2125                let offset = sink.cur_offset();
2126                Inst::AluRRImm12 {
2127                    alu_op: AluOPRRI::Addi,
2128                    rd: dst,
2129                    rs: dst.to_reg(),
2130                    imm12: Imm12::ZERO,
2131                }
2132                .emit_uncompressed(sink, emit_info, state, start_off);
2133                sink.use_label_at_offset(offset, label, LabelUse::PCRelLo12I);
2134            }
2135
2136            &Inst::ElfTlsGetAddr { rd, ref name } => {
2137                // RISC-V's TLS GD model is slightly different from other arches.
2138                //
2139                // We have a relocation (R_RISCV_TLS_GD_HI20) that loads the high 20 bits
2140                // of the address relative to the GOT entry. This relocation points to
2141                // the symbol as usual.
2142                //
2143                // However when loading the bottom 12bits of the address, we need to
2144                // use a label that points to the previous AUIPC instruction.
2145                //
2146                // label:
2147                //    auipc a0,0                    # R_RISCV_TLS_GD_HI20 (symbol)
2148                //    addi  a0,a0,0                 # R_RISCV_PCREL_LO12_I (label)
2149                //
2150                // https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-elf.adoc#global-dynamic
2151
2152                // Create the label that is going to be published to the final binary object.
2153                let auipc_label = sink.get_label();
2154                sink.bind_label(auipc_label, &mut state.ctrl_plane);
2155
2156                // Get the current PC.
2157                sink.add_reloc(Reloc::RiscvTlsGdHi20, &**name, 0);
2158                Inst::Auipc {
2159                    rd,
2160                    imm: Imm20::from_i32(0),
2161                }
2162                .emit_uncompressed(sink, emit_info, state, start_off);
2163
2164                // The `addi` here, points to the `auipc` label instead of directly to the symbol.
2165                sink.add_reloc(Reloc::RiscvPCRelLo12I, &auipc_label, 0);
2166                Inst::AluRRImm12 {
2167                    alu_op: AluOPRRI::Addi,
2168                    rd,
2169                    rs: rd.to_reg(),
2170                    imm12: Imm12::from_i16(0),
2171                }
2172                .emit_uncompressed(sink, emit_info, state, start_off);
2173
2174                Inst::Call {
2175                    info: Box::new(CallInfo::empty(
2176                        ExternalName::LibCall(LibCall::ElfTlsGetAddr),
2177                        CallConv::SystemV,
2178                    )),
2179                }
2180                .emit_uncompressed(sink, emit_info, state, start_off);
2181            }
2182
2183            &Inst::TrapIf {
2184                rs1,
2185                rs2,
2186                cc,
2187                trap_code,
2188            } => {
2189                let label_end = sink.get_label();
2190                let cond = IntegerCompare { kind: cc, rs1, rs2 };
2191
2192                // Jump over the trap if we the condition is false.
2193                Inst::CondBr {
2194                    taken: CondBrTarget::Label(label_end),
2195                    not_taken: CondBrTarget::Fallthrough,
2196                    kind: cond.inverse(),
2197                }
2198                .emit(sink, emit_info, state);
2199                Inst::Udf { trap_code }.emit(sink, emit_info, state);
2200
2201                sink.bind_label(label_end, &mut state.ctrl_plane);
2202            }
2203            &Inst::Udf { trap_code } => {
2204                sink.add_trap(trap_code);
2205                sink.put_data(Inst::TRAP_OPCODE);
2206            }
2207            &Inst::AtomicLoad { rd, ty, p } => {
2208                // emit the fence.
2209                Inst::Fence {
2210                    pred: Inst::FENCE_REQ_R | Inst::FENCE_REQ_W,
2211                    succ: Inst::FENCE_REQ_R | Inst::FENCE_REQ_W,
2212                }
2213                .emit(sink, emit_info, state);
2214                // load.
2215                Inst::Load {
2216                    rd,
2217                    op: LoadOP::from_type(ty),
2218                    flags: MemFlags::new(),
2219                    from: AMode::RegOffset(p, 0),
2220                }
2221                .emit(sink, emit_info, state);
2222                Inst::Fence {
2223                    pred: Inst::FENCE_REQ_R,
2224                    succ: Inst::FENCE_REQ_R | Inst::FENCE_REQ_W,
2225                }
2226                .emit(sink, emit_info, state);
2227            }
2228            &Inst::AtomicStore { src, ty, p } => {
2229                Inst::Fence {
2230                    pred: Inst::FENCE_REQ_R | Inst::FENCE_REQ_W,
2231                    succ: Inst::FENCE_REQ_W,
2232                }
2233                .emit(sink, emit_info, state);
2234                Inst::Store {
2235                    to: AMode::RegOffset(p, 0),
2236                    op: StoreOP::from_type(ty),
2237                    flags: MemFlags::new(),
2238                    src,
2239                }
2240                .emit(sink, emit_info, state);
2241            }
2242
2243            &Inst::Popcnt {
2244                sum,
2245                tmp,
2246                step,
2247                rs,
2248                ty,
2249            } => {
2250                // load 0 to sum , init.
2251                Inst::gen_move(sum, zero_reg(), I64).emit(sink, emit_info, state);
2252                // load
2253                Inst::load_imm12(step, Imm12::from_i16(ty.bits() as i16))
2254                    .emit(sink, emit_info, state);
2255                //
2256                Inst::load_imm12(tmp, Imm12::ONE).emit(sink, emit_info, state);
2257                Inst::AluRRImm12 {
2258                    alu_op: AluOPRRI::Slli,
2259                    rd: tmp,
2260                    rs: tmp.to_reg(),
2261                    imm12: Imm12::from_i16((ty.bits() - 1) as i16),
2262                }
2263                .emit(sink, emit_info, state);
2264                let label_done = sink.get_label();
2265                let label_loop = sink.get_label();
2266                sink.bind_label(label_loop, &mut state.ctrl_plane);
2267                Inst::CondBr {
2268                    taken: CondBrTarget::Label(label_done),
2269                    not_taken: CondBrTarget::Fallthrough,
2270                    kind: IntegerCompare {
2271                        kind: IntCC::SignedLessThanOrEqual,
2272                        rs1: step.to_reg(),
2273                        rs2: zero_reg(),
2274                    },
2275                }
2276                .emit(sink, emit_info, state);
2277                // test and add sum.
2278                {
2279                    Inst::AluRRR {
2280                        alu_op: AluOPRRR::And,
2281                        rd: writable_spilltmp_reg2(),
2282                        rs1: tmp.to_reg(),
2283                        rs2: rs,
2284                    }
2285                    .emit(sink, emit_info, state);
2286                    let label_over = sink.get_label();
2287                    Inst::CondBr {
2288                        taken: CondBrTarget::Label(label_over),
2289                        not_taken: CondBrTarget::Fallthrough,
2290                        kind: IntegerCompare {
2291                            kind: IntCC::Equal,
2292                            rs1: zero_reg(),
2293                            rs2: spilltmp_reg2(),
2294                        },
2295                    }
2296                    .emit(sink, emit_info, state);
2297                    Inst::AluRRImm12 {
2298                        alu_op: AluOPRRI::Addi,
2299                        rd: sum,
2300                        rs: sum.to_reg(),
2301                        imm12: Imm12::ONE,
2302                    }
2303                    .emit(sink, emit_info, state);
2304                    sink.bind_label(label_over, &mut state.ctrl_plane);
2305                }
2306                // set step and tmp.
2307                {
2308                    Inst::AluRRImm12 {
2309                        alu_op: AluOPRRI::Addi,
2310                        rd: step,
2311                        rs: step.to_reg(),
2312                        imm12: Imm12::from_i16(-1),
2313                    }
2314                    .emit(sink, emit_info, state);
2315                    Inst::AluRRImm12 {
2316                        alu_op: AluOPRRI::Srli,
2317                        rd: tmp,
2318                        rs: tmp.to_reg(),
2319                        imm12: Imm12::ONE,
2320                    }
2321                    .emit(sink, emit_info, state);
2322                    Inst::gen_jump(label_loop).emit(sink, emit_info, state);
2323                }
2324                sink.bind_label(label_done, &mut state.ctrl_plane);
2325            }
2326            &Inst::Cltz {
2327                sum,
2328                tmp,
2329                step,
2330                rs,
2331                leading,
2332                ty,
2333            } => {
2334                // load 0 to sum , init.
2335                Inst::gen_move(sum, zero_reg(), I64).emit(sink, emit_info, state);
2336                // load
2337                Inst::load_imm12(step, Imm12::from_i16(ty.bits() as i16))
2338                    .emit(sink, emit_info, state);
2339                //
2340                Inst::load_imm12(tmp, Imm12::ONE).emit(sink, emit_info, state);
2341                if leading {
2342                    Inst::AluRRImm12 {
2343                        alu_op: AluOPRRI::Slli,
2344                        rd: tmp,
2345                        rs: tmp.to_reg(),
2346                        imm12: Imm12::from_i16((ty.bits() - 1) as i16),
2347                    }
2348                    .emit(sink, emit_info, state);
2349                }
2350                let label_done = sink.get_label();
2351                let label_loop = sink.get_label();
2352                sink.bind_label(label_loop, &mut state.ctrl_plane);
2353                Inst::CondBr {
2354                    taken: CondBrTarget::Label(label_done),
2355                    not_taken: CondBrTarget::Fallthrough,
2356                    kind: IntegerCompare {
2357                        kind: IntCC::SignedLessThanOrEqual,
2358                        rs1: step.to_reg(),
2359                        rs2: zero_reg(),
2360                    },
2361                }
2362                .emit(sink, emit_info, state);
2363                // test and add sum.
2364                {
2365                    Inst::AluRRR {
2366                        alu_op: AluOPRRR::And,
2367                        rd: writable_spilltmp_reg2(),
2368                        rs1: tmp.to_reg(),
2369                        rs2: rs,
2370                    }
2371                    .emit(sink, emit_info, state);
2372                    Inst::CondBr {
2373                        taken: CondBrTarget::Label(label_done),
2374                        not_taken: CondBrTarget::Fallthrough,
2375                        kind: IntegerCompare {
2376                            kind: IntCC::NotEqual,
2377                            rs1: zero_reg(),
2378                            rs2: spilltmp_reg2(),
2379                        },
2380                    }
2381                    .emit(sink, emit_info, state);
2382                    Inst::AluRRImm12 {
2383                        alu_op: AluOPRRI::Addi,
2384                        rd: sum,
2385                        rs: sum.to_reg(),
2386                        imm12: Imm12::ONE,
2387                    }
2388                    .emit(sink, emit_info, state);
2389                }
2390                // set step and tmp.
2391                {
2392                    Inst::AluRRImm12 {
2393                        alu_op: AluOPRRI::Addi,
2394                        rd: step,
2395                        rs: step.to_reg(),
2396                        imm12: Imm12::from_i16(-1),
2397                    }
2398                    .emit(sink, emit_info, state);
2399                    Inst::AluRRImm12 {
2400                        alu_op: if leading {
2401                            AluOPRRI::Srli
2402                        } else {
2403                            AluOPRRI::Slli
2404                        },
2405                        rd: tmp,
2406                        rs: tmp.to_reg(),
2407                        imm12: Imm12::ONE,
2408                    }
2409                    .emit(sink, emit_info, state);
2410                    Inst::gen_jump(label_loop).emit(sink, emit_info, state);
2411                }
2412                sink.bind_label(label_done, &mut state.ctrl_plane);
2413            }
2414            &Inst::Brev8 {
2415                rs,
2416                ty,
2417                step,
2418                tmp,
2419                tmp2,
2420                rd,
2421            } => {
2422                Inst::gen_move(rd, zero_reg(), I64).emit(sink, emit_info, state);
2423                Inst::load_imm12(step, Imm12::from_i16(ty.bits() as i16))
2424                    .emit(sink, emit_info, state);
2425                //
2426                Inst::load_imm12(tmp, Imm12::ONE).emit(sink, emit_info, state);
2427                Inst::AluRRImm12 {
2428                    alu_op: AluOPRRI::Slli,
2429                    rd: tmp,
2430                    rs: tmp.to_reg(),
2431                    imm12: Imm12::from_i16((ty.bits() - 1) as i16),
2432                }
2433                .emit(sink, emit_info, state);
2434                Inst::load_imm12(tmp2, Imm12::ONE).emit(sink, emit_info, state);
2435                Inst::AluRRImm12 {
2436                    alu_op: AluOPRRI::Slli,
2437                    rd: tmp2,
2438                    rs: tmp2.to_reg(),
2439                    imm12: Imm12::from_i16((ty.bits() - 8) as i16),
2440                }
2441                .emit(sink, emit_info, state);
2442
2443                let label_done = sink.get_label();
2444                let label_loop = sink.get_label();
2445                sink.bind_label(label_loop, &mut state.ctrl_plane);
2446                Inst::CondBr {
2447                    taken: CondBrTarget::Label(label_done),
2448                    not_taken: CondBrTarget::Fallthrough,
2449                    kind: IntegerCompare {
2450                        kind: IntCC::SignedLessThanOrEqual,
2451                        rs1: step.to_reg(),
2452                        rs2: zero_reg(),
2453                    },
2454                }
2455                .emit(sink, emit_info, state);
2456                // test and set bit.
2457                {
2458                    Inst::AluRRR {
2459                        alu_op: AluOPRRR::And,
2460                        rd: writable_spilltmp_reg2(),
2461                        rs1: tmp.to_reg(),
2462                        rs2: rs,
2463                    }
2464                    .emit(sink, emit_info, state);
2465                    let label_over = sink.get_label();
2466                    Inst::CondBr {
2467                        taken: CondBrTarget::Label(label_over),
2468                        not_taken: CondBrTarget::Fallthrough,
2469                        kind: IntegerCompare {
2470                            kind: IntCC::Equal,
2471                            rs1: zero_reg(),
2472                            rs2: spilltmp_reg2(),
2473                        },
2474                    }
2475                    .emit(sink, emit_info, state);
2476                    Inst::AluRRR {
2477                        alu_op: AluOPRRR::Or,
2478                        rd,
2479                        rs1: rd.to_reg(),
2480                        rs2: tmp2.to_reg(),
2481                    }
2482                    .emit(sink, emit_info, state);
2483                    sink.bind_label(label_over, &mut state.ctrl_plane);
2484                }
2485                // set step and tmp.
2486                {
2487                    Inst::AluRRImm12 {
2488                        alu_op: AluOPRRI::Addi,
2489                        rd: step,
2490                        rs: step.to_reg(),
2491                        imm12: Imm12::from_i16(-1),
2492                    }
2493                    .emit(sink, emit_info, state);
2494                    Inst::AluRRImm12 {
2495                        alu_op: AluOPRRI::Srli,
2496                        rd: tmp,
2497                        rs: tmp.to_reg(),
2498                        imm12: Imm12::ONE,
2499                    }
2500                    .emit(sink, emit_info, state);
2501                    {
2502                        // reset tmp2
2503                        // if (step %=8 == 0) then tmp2 = tmp2 >> 15
2504                        // if (step %=8 != 0) then tmp2 = tmp2 << 1
2505                        let label_over = sink.get_label();
2506                        let label_sll_1 = sink.get_label();
2507                        Inst::load_imm12(writable_spilltmp_reg2(), Imm12::from_i16(8))
2508                            .emit(sink, emit_info, state);
2509                        Inst::AluRRR {
2510                            alu_op: AluOPRRR::Rem,
2511                            rd: writable_spilltmp_reg2(),
2512                            rs1: step.to_reg(),
2513                            rs2: spilltmp_reg2(),
2514                        }
2515                        .emit(sink, emit_info, state);
2516                        Inst::CondBr {
2517                            taken: CondBrTarget::Label(label_sll_1),
2518                            not_taken: CondBrTarget::Fallthrough,
2519                            kind: IntegerCompare {
2520                                kind: IntCC::NotEqual,
2521                                rs1: spilltmp_reg2(),
2522                                rs2: zero_reg(),
2523                            },
2524                        }
2525                        .emit(sink, emit_info, state);
2526                        Inst::AluRRImm12 {
2527                            alu_op: AluOPRRI::Srli,
2528                            rd: tmp2,
2529                            rs: tmp2.to_reg(),
2530                            imm12: Imm12::from_i16(15),
2531                        }
2532                        .emit(sink, emit_info, state);
2533                        Inst::gen_jump(label_over).emit(sink, emit_info, state);
2534                        sink.bind_label(label_sll_1, &mut state.ctrl_plane);
2535                        Inst::AluRRImm12 {
2536                            alu_op: AluOPRRI::Slli,
2537                            rd: tmp2,
2538                            rs: tmp2.to_reg(),
2539                            imm12: Imm12::ONE,
2540                        }
2541                        .emit(sink, emit_info, state);
2542                        sink.bind_label(label_over, &mut state.ctrl_plane);
2543                    }
2544                    Inst::gen_jump(label_loop).emit(sink, emit_info, state);
2545                }
2546                sink.bind_label(label_done, &mut state.ctrl_plane);
2547            }
2548            &Inst::StackProbeLoop {
2549                guard_size,
2550                probe_count,
2551                tmp: guard_size_tmp,
2552            } => {
2553                let step = writable_spilltmp_reg();
2554                Inst::load_constant_u64(step, (guard_size as u64) * (probe_count as u64))
2555                    .iter()
2556                    .for_each(|i| i.emit(sink, emit_info, state));
2557                Inst::load_constant_u64(guard_size_tmp, guard_size as u64)
2558                    .iter()
2559                    .for_each(|i| i.emit(sink, emit_info, state));
2560
2561                let loop_start = sink.get_label();
2562                let label_done = sink.get_label();
2563                sink.bind_label(loop_start, &mut state.ctrl_plane);
2564                Inst::CondBr {
2565                    taken: CondBrTarget::Label(label_done),
2566                    not_taken: CondBrTarget::Fallthrough,
2567                    kind: IntegerCompare {
2568                        kind: IntCC::UnsignedLessThanOrEqual,
2569                        rs1: step.to_reg(),
2570                        rs2: guard_size_tmp.to_reg(),
2571                    },
2572                }
2573                .emit(sink, emit_info, state);
2574                // compute address.
2575                Inst::AluRRR {
2576                    alu_op: AluOPRRR::Sub,
2577                    rd: writable_spilltmp_reg2(),
2578                    rs1: stack_reg(),
2579                    rs2: step.to_reg(),
2580                }
2581                .emit(sink, emit_info, state);
2582                Inst::Store {
2583                    to: AMode::RegOffset(spilltmp_reg2(), 0),
2584                    op: StoreOP::Sb,
2585                    flags: MemFlags::new(),
2586                    src: zero_reg(),
2587                }
2588                .emit(sink, emit_info, state);
2589                // reset step.
2590                Inst::AluRRR {
2591                    alu_op: AluOPRRR::Sub,
2592                    rd: step,
2593                    rs1: step.to_reg(),
2594                    rs2: guard_size_tmp.to_reg(),
2595                }
2596                .emit(sink, emit_info, state);
2597                Inst::gen_jump(loop_start).emit(sink, emit_info, state);
2598                sink.bind_label(label_done, &mut state.ctrl_plane);
2599            }
2600            &Inst::VecAluRRRImm5 {
2601                op,
2602                vd,
2603                vd_src,
2604                imm,
2605                vs2,
2606                ref mask,
2607                ..
2608            } => {
2609                debug_assert_eq!(vd.to_reg(), vd_src);
2610
2611                sink.put4(encode_valu_rrr_imm(op, vd, imm, vs2, *mask));
2612            }
2613            &Inst::VecAluRRRR {
2614                op,
2615                vd,
2616                vd_src,
2617                vs1,
2618                vs2,
2619                ref mask,
2620                ..
2621            } => {
2622                debug_assert_eq!(vd.to_reg(), vd_src);
2623
2624                sink.put4(encode_valu_rrrr(op, vd, vs2, vs1, *mask));
2625            }
2626            &Inst::VecAluRRR {
2627                op,
2628                vd,
2629                vs1,
2630                vs2,
2631                ref mask,
2632                ..
2633            } => {
2634                sink.put4(encode_valu(op, vd, vs1, vs2, *mask));
2635            }
2636            &Inst::VecAluRRImm5 {
2637                op,
2638                vd,
2639                imm,
2640                vs2,
2641                ref mask,
2642                ..
2643            } => {
2644                sink.put4(encode_valu_rr_imm(op, vd, imm, vs2, *mask));
2645            }
2646            &Inst::VecAluRR {
2647                op,
2648                vd,
2649                vs,
2650                ref mask,
2651                ..
2652            } => {
2653                sink.put4(encode_valu_rr(op, vd, vs, *mask));
2654            }
2655            &Inst::VecAluRImm5 {
2656                op,
2657                vd,
2658                imm,
2659                ref mask,
2660                ..
2661            } => {
2662                sink.put4(encode_valu_r_imm(op, vd, imm, *mask));
2663            }
2664            &Inst::VecSetState { rd, ref vstate } => {
2665                sink.put4(encode_vcfg_imm(
2666                    0x57,
2667                    rd.to_reg(),
2668                    vstate.avl.unwrap_static(),
2669                    &vstate.vtype,
2670                ));
2671
2672                // Update the current vector emit state.
2673                state.vstate = EmitVState::Known(*vstate);
2674            }
2675
2676            &Inst::VecLoad {
2677                eew,
2678                to,
2679                ref from,
2680                ref mask,
2681                flags,
2682                ..
2683            } => {
2684                // Vector Loads don't support immediate offsets, so we need to load it into a register.
2685                let addr = match from {
2686                    VecAMode::UnitStride { base } => {
2687                        let base_reg = base.get_base_register();
2688                        let offset = base.get_offset_with_state(state);
2689
2690                        // Reg+0 Offset can be directly encoded
2691                        if let (Some(base_reg), 0) = (base_reg, offset) {
2692                            base_reg
2693                        } else {
2694                            // Otherwise load the address it into a reg and load from it.
2695                            let tmp = writable_spilltmp_reg();
2696                            Inst::LoadAddr {
2697                                rd: tmp,
2698                                mem: *base,
2699                            }
2700                            .emit(sink, emit_info, state);
2701                            tmp.to_reg()
2702                        }
2703                    }
2704                };
2705
2706                if let Some(trap_code) = flags.trap_code() {
2707                    // Register the offset at which the actual load instruction starts.
2708                    sink.add_trap(trap_code);
2709                }
2710
2711                sink.put4(encode_vmem_load(
2712                    0x07,
2713                    to.to_reg(),
2714                    eew,
2715                    addr,
2716                    from.lumop(),
2717                    *mask,
2718                    from.mop(),
2719                    from.nf(),
2720                ));
2721            }
2722
2723            &Inst::VecStore {
2724                eew,
2725                ref to,
2726                from,
2727                ref mask,
2728                flags,
2729                ..
2730            } => {
2731                // Vector Stores don't support immediate offsets, so we need to load it into a register.
2732                let addr = match to {
2733                    VecAMode::UnitStride { base } => {
2734                        let base_reg = base.get_base_register();
2735                        let offset = base.get_offset_with_state(state);
2736
2737                        // Reg+0 Offset can be directly encoded
2738                        if let (Some(base_reg), 0) = (base_reg, offset) {
2739                            base_reg
2740                        } else {
2741                            // Otherwise load the address it into a reg and load from it.
2742                            let tmp = writable_spilltmp_reg();
2743                            Inst::LoadAddr {
2744                                rd: tmp,
2745                                mem: *base,
2746                            }
2747                            .emit(sink, emit_info, state);
2748                            tmp.to_reg()
2749                        }
2750                    }
2751                };
2752
2753                if let Some(trap_code) = flags.trap_code() {
2754                    // Register the offset at which the actual load instruction starts.
2755                    sink.add_trap(trap_code);
2756                }
2757
2758                sink.put4(encode_vmem_store(
2759                    0x27,
2760                    from,
2761                    eew,
2762                    addr,
2763                    to.sumop(),
2764                    *mask,
2765                    to.mop(),
2766                    to.nf(),
2767                ));
2768            }
2769
2770            Inst::EmitIsland { needed_space } => {
2771                if sink.island_needed(*needed_space) {
2772                    let jump_around_label = sink.get_label();
2773                    Inst::gen_jump(jump_around_label).emit(sink, emit_info, state);
2774                    sink.emit_island(needed_space + 4, &mut state.ctrl_plane);
2775                    sink.bind_label(jump_around_label, &mut state.ctrl_plane);
2776                }
2777            }
2778
2779            Inst::SequencePoint { .. } => {
2780                // Nothing.
2781            }
2782        }
2783    }
2784}
2785
2786fn emit_return_call_common_sequence<T>(
2787    sink: &mut MachBuffer<Inst>,
2788    emit_info: &EmitInfo,
2789    state: &mut EmitState,
2790    info: &ReturnCallInfo<T>,
2791) {
2792    // The return call sequence can potentially emit a lot of instructions (up to 634 bytes!)
2793    // So lets emit an island here if we need it.
2794    //
2795    // It is difficult to calculate exactly how many instructions are going to be emitted, so
2796    // we calculate it by emitting it into a disposable buffer, and then checking how many instructions
2797    // were actually emitted.
2798    let mut buffer = MachBuffer::new();
2799    let mut fake_emit_state = state.clone();
2800
2801    return_call_emit_impl(&mut buffer, emit_info, &mut fake_emit_state, info);
2802
2803    // Finalize the buffer and get the number of bytes emitted.
2804    let buffer = buffer.finish(&Default::default(), &mut Default::default());
2805    let length = buffer.data().len() as u32;
2806
2807    // And now emit the island inline with this instruction.
2808    if sink.island_needed(length) {
2809        let jump_around_label = sink.get_label();
2810        Inst::gen_jump(jump_around_label).emit(sink, emit_info, state);
2811        sink.emit_island(length + 4, &mut state.ctrl_plane);
2812        sink.bind_label(jump_around_label, &mut state.ctrl_plane);
2813    }
2814
2815    // Now that we're done, emit the *actual* return sequence.
2816    return_call_emit_impl(sink, emit_info, state, info);
2817}
2818
2819/// This should not be called directly, Instead prefer to call [emit_return_call_common_sequence].
2820fn return_call_emit_impl<T>(
2821    sink: &mut MachBuffer<Inst>,
2822    emit_info: &EmitInfo,
2823    state: &mut EmitState,
2824    info: &ReturnCallInfo<T>,
2825) {
2826    let sp_to_fp_offset = {
2827        let frame_layout = state.frame_layout();
2828        i64::from(
2829            frame_layout.clobber_size
2830                + frame_layout.fixed_frame_storage_size
2831                + frame_layout.outgoing_args_size,
2832        )
2833    };
2834
2835    let mut clobber_offset = sp_to_fp_offset - 8;
2836    for reg in state.frame_layout().clobbered_callee_saves.clone() {
2837        let rreg = reg.to_reg();
2838        let ty = match rreg.class() {
2839            RegClass::Int => I64,
2840            RegClass::Float => F64,
2841            RegClass::Vector => unimplemented!("Vector Clobber Restores"),
2842        };
2843
2844        Inst::gen_load(
2845            reg.map(Reg::from),
2846            AMode::SPOffset(clobber_offset),
2847            ty,
2848            MemFlags::trusted(),
2849        )
2850        .emit(sink, emit_info, state);
2851
2852        clobber_offset -= 8
2853    }
2854
2855    // Restore the link register and frame pointer
2856    let setup_area_size = i64::from(state.frame_layout().setup_area_size);
2857    if setup_area_size > 0 {
2858        Inst::gen_load(
2859            writable_link_reg(),
2860            AMode::SPOffset(sp_to_fp_offset + 8),
2861            I64,
2862            MemFlags::trusted(),
2863        )
2864        .emit(sink, emit_info, state);
2865
2866        Inst::gen_load(
2867            writable_fp_reg(),
2868            AMode::SPOffset(sp_to_fp_offset),
2869            I64,
2870            MemFlags::trusted(),
2871        )
2872        .emit(sink, emit_info, state);
2873    }
2874
2875    // If we over-allocated the incoming args area in the prologue, resize down to what the callee
2876    // is expecting.
2877    let incoming_args_diff =
2878        i64::from(state.frame_layout().tail_args_size - info.new_stack_arg_size);
2879
2880    // Increment SP all at once
2881    let sp_increment = sp_to_fp_offset + setup_area_size + incoming_args_diff;
2882    if sp_increment > 0 {
2883        for inst in Riscv64MachineDeps::gen_sp_reg_adjust(i32::try_from(sp_increment).unwrap()) {
2884            inst.emit(sink, emit_info, state);
2885        }
2886    }
2887}