cranelift_codegen/isa/riscv64/inst/
emit.rs

1//! Riscv64 ISA: binary code emission.
2
3use crate::ir::{self, LibCall, TrapCode};
4use crate::isa::riscv64::inst::*;
5use crate::isa::riscv64::lower::isle::generated_code::{
6    CaOp, CbOp, CiOp, CiwOp, ClOp, CrOp, CsOp, CssOp, CsznOp, FpuOPWidth, ZcbMemOp,
7};
8use cranelift_control::ControlPlane;
9
10pub struct EmitInfo {
11    #[expect(dead_code, reason = "may want to be used in the future")]
12    shared_flag: settings::Flags,
13    isa_flags: super::super::riscv_settings::Flags,
14}
15
16impl EmitInfo {
17    pub(crate) fn new(
18        shared_flag: settings::Flags,
19        isa_flags: super::super::riscv_settings::Flags,
20    ) -> Self {
21        Self {
22            shared_flag,
23            isa_flags,
24        }
25    }
26}
27
28pub(crate) fn reg_to_gpr_num(m: Reg) -> u32 {
29    u32::from(m.to_real_reg().unwrap().hw_enc() & 31)
30}
31
32pub(crate) fn reg_to_compressed_gpr_num(m: Reg) -> u32 {
33    let real_reg = m.to_real_reg().unwrap().hw_enc();
34    debug_assert!(real_reg >= 8 && real_reg < 16);
35    let compressed_reg = real_reg - 8;
36    u32::from(compressed_reg)
37}
38
39#[derive(Clone, Debug, PartialEq, Default)]
40pub enum EmitVState {
41    #[default]
42    Unknown,
43    Known(VState),
44}
45
46/// State carried between emissions of a sequence of instructions.
47#[derive(Default, Clone, Debug)]
48pub struct EmitState {
49    /// The user stack map for the upcoming instruction, as provided to
50    /// `pre_safepoint()`.
51    user_stack_map: Option<ir::UserStackMap>,
52
53    /// Only used during fuzz-testing. Otherwise, it is a zero-sized struct and
54    /// optimized away at compiletime. See [cranelift_control].
55    ctrl_plane: ControlPlane,
56
57    /// Vector State
58    /// Controls the current state of the vector unit at the emission point.
59    vstate: EmitVState,
60
61    frame_layout: FrameLayout,
62}
63
64impl EmitState {
65    fn take_stack_map(&mut self) -> Option<ir::UserStackMap> {
66        self.user_stack_map.take()
67    }
68
69    fn clobber_vstate(&mut self) {
70        self.vstate = EmitVState::Unknown;
71    }
72}
73
74impl MachInstEmitState<Inst> for EmitState {
75    fn new(
76        abi: &Callee<crate::isa::riscv64::abi::Riscv64MachineDeps>,
77        ctrl_plane: ControlPlane,
78    ) -> Self {
79        EmitState {
80            user_stack_map: None,
81            ctrl_plane,
82            vstate: EmitVState::Unknown,
83            frame_layout: abi.frame_layout().clone(),
84        }
85    }
86
87    fn pre_safepoint(&mut self, user_stack_map: Option<ir::UserStackMap>) {
88        self.user_stack_map = user_stack_map;
89    }
90
91    fn ctrl_plane_mut(&mut self) -> &mut ControlPlane {
92        &mut self.ctrl_plane
93    }
94
95    fn take_ctrl_plane(self) -> ControlPlane {
96        self.ctrl_plane
97    }
98
99    fn on_new_block(&mut self) {
100        // Reset the vector state.
101        self.clobber_vstate();
102    }
103
104    fn frame_layout(&self) -> &FrameLayout {
105        &self.frame_layout
106    }
107}
108
109impl Inst {
110    /// Load int mask.
111    /// If ty is int then 0xff in rd.
112    pub(crate) fn load_int_mask(rd: Writable<Reg>, ty: Type) -> SmallInstVec<Inst> {
113        let mut insts = SmallInstVec::new();
114        assert!(ty.is_int() && ty.bits() <= 64);
115        match ty {
116            I64 => {
117                insts.push(Inst::load_imm12(rd, Imm12::from_i16(-1)));
118            }
119            I32 | I16 => {
120                insts.push(Inst::load_imm12(rd, Imm12::from_i16(-1)));
121                insts.push(Inst::Extend {
122                    rd,
123                    rn: rd.to_reg(),
124                    signed: false,
125                    from_bits: ty.bits() as u8,
126                    to_bits: 64,
127                });
128            }
129            I8 => {
130                insts.push(Inst::load_imm12(rd, Imm12::from_i16(255)));
131            }
132            _ => unreachable!("ty:{:?}", ty),
133        }
134        insts
135    }
136    ///  inverse all bit
137    pub(crate) fn construct_bit_not(rd: Writable<Reg>, rs: Reg) -> Inst {
138        Inst::AluRRImm12 {
139            alu_op: AluOPRRI::Xori,
140            rd,
141            rs,
142            imm12: Imm12::from_i16(-1),
143        }
144    }
145
146    /// Returns Some(VState) if this instruction is expecting a specific vector state
147    /// before emission.
148    fn expected_vstate(&self) -> Option<&VState> {
149        match self {
150            Inst::Nop0
151            | Inst::Nop4
152            | Inst::BrTable { .. }
153            | Inst::Auipc { .. }
154            | Inst::Fli { .. }
155            | Inst::Lui { .. }
156            | Inst::LoadInlineConst { .. }
157            | Inst::AluRRR { .. }
158            | Inst::FpuRRR { .. }
159            | Inst::AluRRImm12 { .. }
160            | Inst::CsrReg { .. }
161            | Inst::CsrImm { .. }
162            | Inst::Load { .. }
163            | Inst::Store { .. }
164            | Inst::Args { .. }
165            | Inst::Rets { .. }
166            | Inst::Ret { .. }
167            | Inst::Extend { .. }
168            | Inst::Call { .. }
169            | Inst::CallInd { .. }
170            | Inst::ReturnCall { .. }
171            | Inst::ReturnCallInd { .. }
172            | Inst::Jal { .. }
173            | Inst::CondBr { .. }
174            | Inst::LoadExtNameGot { .. }
175            | Inst::LoadExtNameNear { .. }
176            | Inst::LoadExtNameFar { .. }
177            | Inst::ElfTlsGetAddr { .. }
178            | Inst::LoadAddr { .. }
179            | Inst::Mov { .. }
180            | Inst::MovFromPReg { .. }
181            | Inst::Fence { .. }
182            | Inst::EBreak
183            | Inst::Udf { .. }
184            | Inst::FpuRR { .. }
185            | Inst::FpuRRRR { .. }
186            | Inst::Jalr { .. }
187            | Inst::Atomic { .. }
188            | Inst::Select { .. }
189            | Inst::AtomicCas { .. }
190            | Inst::RawData { .. }
191            | Inst::AtomicStore { .. }
192            | Inst::AtomicLoad { .. }
193            | Inst::AtomicRmwLoop { .. }
194            | Inst::TrapIf { .. }
195            | Inst::Unwind { .. }
196            | Inst::DummyUse { .. }
197            | Inst::LabelAddress { .. }
198            | Inst::SequencePoint { .. }
199            | Inst::Popcnt { .. }
200            | Inst::Cltz { .. }
201            | Inst::Brev8 { .. }
202            | Inst::StackProbeLoop { .. } => None,
203
204            // VecSetState does not expect any vstate, rather it updates it.
205            Inst::VecSetState { .. } => None,
206
207            // `vmv` instructions copy a set of registers and ignore vstate.
208            Inst::VecAluRRImm5 { op: VecAluOpRRImm5::VmvrV, .. } => None,
209
210            Inst::VecAluRR { vstate, .. } |
211            Inst::VecAluRRR { vstate, .. } |
212            Inst::VecAluRRRR { vstate, .. } |
213            Inst::VecAluRImm5 { vstate, .. } |
214            Inst::VecAluRRImm5 { vstate, .. } |
215            Inst::VecAluRRRImm5 { vstate, .. } |
216            // TODO: Unit-stride loads and stores only need the AVL to be correct, not
217            // the full vtype. A future optimization could be to decouple these two when
218            // updating vstate. This would allow us to avoid emitting a VecSetState in
219            // some cases.
220            Inst::VecLoad { vstate, .. }
221            | Inst::VecStore { vstate, .. } => Some(vstate),
222            Inst::EmitIsland { .. } => None,
223        }
224    }
225}
226
227impl MachInstEmit for Inst {
228    type State = EmitState;
229    type Info = EmitInfo;
230
231    fn emit(&self, sink: &mut MachBuffer<Inst>, emit_info: &Self::Info, state: &mut EmitState) {
232        // Check if we need to update the vector state before emitting this instruction
233        if let Some(expected) = self.expected_vstate() {
234            if state.vstate != EmitVState::Known(*expected) {
235                // Update the vector state.
236                Inst::VecSetState {
237                    rd: writable_zero_reg(),
238                    vstate: *expected,
239                }
240                .emit(sink, emit_info, state);
241            }
242        }
243
244        // N.B.: we *must* not exceed the "worst-case size" used to compute
245        // where to insert islands, except when islands are explicitly triggered
246        // (with an `EmitIsland`). We check this in debug builds. This is `mut`
247        // to allow disabling the check for `JTSequence`, which is always
248        // emitted following an `EmitIsland`.
249        let mut start_off = sink.cur_offset();
250
251        // First try to emit this as a compressed instruction
252        let res = self.try_emit_compressed(sink, emit_info, state, &mut start_off);
253        if res.is_none() {
254            // If we can't lets emit it as a normal instruction
255            self.emit_uncompressed(sink, emit_info, state, &mut start_off);
256        }
257
258        // We exclude br_table, call, return_call and try_call from
259        // these checks since they emit their own islands, and thus
260        // are allowed to exceed the worst case size.
261        let emits_own_island = match self {
262            Inst::BrTable { .. }
263            | Inst::ReturnCall { .. }
264            | Inst::ReturnCallInd { .. }
265            | Inst::Call { .. }
266            | Inst::CallInd { .. }
267            | Inst::EmitIsland { .. } => true,
268            _ => false,
269        };
270        if !emits_own_island {
271            let end_off = sink.cur_offset();
272            assert!(
273                (end_off - start_off) <= Inst::worst_case_size(),
274                "Inst:{:?} length:{} worst_case_size:{}",
275                self,
276                end_off - start_off,
277                Inst::worst_case_size()
278            );
279        }
280    }
281
282    fn pretty_print_inst(&self, state: &mut Self::State) -> String {
283        self.print_with_state(state)
284    }
285}
286
287impl Inst {
288    /// Tries to emit an instruction as compressed, if we can't return false.
289    fn try_emit_compressed(
290        &self,
291        sink: &mut MachBuffer<Inst>,
292        emit_info: &EmitInfo,
293        state: &mut EmitState,
294        start_off: &mut u32,
295    ) -> Option<()> {
296        let has_m = emit_info.isa_flags.has_m();
297        let has_zba = emit_info.isa_flags.has_zba();
298        let has_zbb = emit_info.isa_flags.has_zbb();
299        let has_zca = emit_info.isa_flags.has_zca();
300        let has_zcb = emit_info.isa_flags.has_zcb();
301        let has_zcd = emit_info.isa_flags.has_zcd();
302
303        // Currently all compressed extensions (Zcb, Zcd, Zcmp, Zcmt, etc..) require Zca
304        // to be enabled, so check it early.
305        if !has_zca {
306            return None;
307        }
308
309        fn reg_is_compressible(r: Reg) -> bool {
310            r.to_real_reg()
311                .map(|r| r.hw_enc() >= 8 && r.hw_enc() < 16)
312                .unwrap_or(false)
313        }
314
315        match *self {
316            // C.ADD
317            Inst::AluRRR {
318                alu_op: AluOPRRR::Add,
319                rd,
320                rs1,
321                rs2,
322            } if (rd.to_reg() == rs1 || rd.to_reg() == rs2)
323                && rs1 != zero_reg()
324                && rs2 != zero_reg() =>
325            {
326                // Technically `c.add rd, rs` expands to `add rd, rd, rs`, but we can
327                // also swap rs1 with rs2 and we get an equivalent instruction. i.e we
328                // can also compress `add rd, rs, rd` into `c.add rd, rs`.
329                let src = if rd.to_reg() == rs1 { rs2 } else { rs1 };
330
331                sink.put2(encode_cr_type(CrOp::CAdd, rd, src));
332            }
333
334            // C.MV
335            Inst::AluRRImm12 {
336                alu_op: AluOPRRI::Addi | AluOPRRI::Ori,
337                rd,
338                rs,
339                imm12,
340            } if rd.to_reg() != rs
341                && rd.to_reg() != zero_reg()
342                && rs != zero_reg()
343                && imm12.as_i16() == 0 =>
344            {
345                sink.put2(encode_cr_type(CrOp::CMv, rd, rs));
346            }
347
348            // CA Ops
349            Inst::AluRRR {
350                alu_op:
351                    alu_op @ (AluOPRRR::And
352                    | AluOPRRR::Or
353                    | AluOPRRR::Xor
354                    | AluOPRRR::Addw
355                    | AluOPRRR::Mul),
356                rd,
357                rs1,
358                rs2,
359            } if (rd.to_reg() == rs1 || rd.to_reg() == rs2)
360                && reg_is_compressible(rs1)
361                && reg_is_compressible(rs2) =>
362            {
363                let op = match alu_op {
364                    AluOPRRR::And => CaOp::CAnd,
365                    AluOPRRR::Or => CaOp::COr,
366                    AluOPRRR::Xor => CaOp::CXor,
367                    AluOPRRR::Addw => CaOp::CAddw,
368                    AluOPRRR::Mul if has_zcb && has_m => CaOp::CMul,
369                    _ => return None,
370                };
371                // The canonical expansion for these instruction has `rd == rs1`, but
372                // these are all commutative operations, so we can swap the operands.
373                let src = if rd.to_reg() == rs1 { rs2 } else { rs1 };
374
375                sink.put2(encode_ca_type(op, rd, src));
376            }
377
378            // The sub instructions are non commutative, so we can't swap the operands.
379            Inst::AluRRR {
380                alu_op: alu_op @ (AluOPRRR::Sub | AluOPRRR::Subw),
381                rd,
382                rs1,
383                rs2,
384            } if rd.to_reg() == rs1 && reg_is_compressible(rs1) && reg_is_compressible(rs2) => {
385                let op = match alu_op {
386                    AluOPRRR::Sub => CaOp::CSub,
387                    AluOPRRR::Subw => CaOp::CSubw,
388                    _ => return None,
389                };
390                sink.put2(encode_ca_type(op, rd, rs2));
391            }
392
393            // c.j
394            //
395            // We don't have a separate JAL as that is only available in RV32C
396            Inst::Jal { label } => {
397                sink.use_label_at_offset(*start_off, label, LabelUse::RVCJump);
398                sink.add_uncond_branch(*start_off, *start_off + 2, label);
399                sink.put2(encode_cj_type(CjOp::CJ, Imm12::ZERO));
400            }
401
402            // c.jr
403            Inst::Jalr { rd, base, offset }
404                if rd.to_reg() == zero_reg() && base != zero_reg() && offset.as_i16() == 0 =>
405            {
406                sink.put2(encode_cr2_type(CrOp::CJr, base));
407                state.clobber_vstate();
408            }
409
410            // c.jalr
411            Inst::Jalr { rd, base, offset }
412                if rd.to_reg() == link_reg() && base != zero_reg() && offset.as_i16() == 0 =>
413            {
414                sink.put2(encode_cr2_type(CrOp::CJalr, base));
415                state.clobber_vstate();
416            }
417
418            // c.ebreak
419            Inst::EBreak => {
420                sink.put2(encode_cr_type(
421                    CrOp::CEbreak,
422                    writable_zero_reg(),
423                    zero_reg(),
424                ));
425            }
426
427            // c.unimp
428            Inst::Udf { trap_code } => {
429                sink.add_trap(trap_code);
430                sink.put2(0x0000);
431            }
432            // c.addi16sp
433            //
434            // c.addi16sp shares the opcode with c.lui, but has a destination field of x2.
435            // c.addi16sp adds the non-zero sign-extended 6-bit immediate to the value in the stack pointer (sp=x2),
436            // where the immediate is scaled to represent multiples of 16 in the range (-512,496). c.addi16sp is used
437            // to adjust the stack pointer in procedure prologues and epilogues. It expands into addi x2, x2, nzimm. c.addi16sp
438            // is only valid when nzimm≠0; the code point with nzimm=0 is reserved.
439            Inst::AluRRImm12 {
440                alu_op: AluOPRRI::Addi,
441                rd,
442                rs,
443                imm12,
444            } if rd.to_reg() == rs
445                && rs == stack_reg()
446                && imm12.as_i16() != 0
447                && (imm12.as_i16() % 16) == 0
448                && Imm6::maybe_from_i16(imm12.as_i16() / 16).is_some() =>
449            {
450                let imm6 = Imm6::maybe_from_i16(imm12.as_i16() / 16).unwrap();
451                sink.put2(encode_c_addi16sp(imm6));
452            }
453
454            // c.addi4spn
455            //
456            // c.addi4spn is a CIW-format instruction that adds a zero-extended non-zero
457            // immediate, scaled by 4, to the stack pointer, x2, and writes the result to
458            // rd. This instruction is used to generate pointers to stack-allocated variables
459            // and expands to addi rd, x2, nzuimm. c.addi4spn is only valid when nzuimm≠0;
460            // the code points with nzuimm=0 are reserved.
461            Inst::AluRRImm12 {
462                alu_op: AluOPRRI::Addi,
463                rd,
464                rs,
465                imm12,
466            } if reg_is_compressible(rd.to_reg())
467                && rs == stack_reg()
468                && imm12.as_i16() != 0
469                && (imm12.as_i16() % 4) == 0
470                && u8::try_from(imm12.as_i16() / 4).is_ok() =>
471            {
472                let imm = u8::try_from(imm12.as_i16() / 4).unwrap();
473                sink.put2(encode_ciw_type(CiwOp::CAddi4spn, rd, imm));
474            }
475
476            // c.li
477            Inst::AluRRImm12 {
478                alu_op: AluOPRRI::Addi,
479                rd,
480                rs,
481                imm12,
482            } if rd.to_reg() != zero_reg() && rs == zero_reg() => {
483                let imm6 = Imm6::maybe_from_imm12(imm12)?;
484                sink.put2(encode_ci_type(CiOp::CLi, rd, imm6));
485            }
486
487            // c.addi
488            Inst::AluRRImm12 {
489                alu_op: AluOPRRI::Addi,
490                rd,
491                rs,
492                imm12,
493            } if rd.to_reg() == rs && rs != zero_reg() && imm12.as_i16() != 0 => {
494                let imm6 = Imm6::maybe_from_imm12(imm12)?;
495                sink.put2(encode_ci_type(CiOp::CAddi, rd, imm6));
496            }
497
498            // c.addiw
499            Inst::AluRRImm12 {
500                alu_op: AluOPRRI::Addiw,
501                rd,
502                rs,
503                imm12,
504            } if rd.to_reg() == rs && rs != zero_reg() => {
505                let imm6 = Imm6::maybe_from_imm12(imm12)?;
506                sink.put2(encode_ci_type(CiOp::CAddiw, rd, imm6));
507            }
508
509            // c.lui
510            //
511            // c.lui loads the non-zero 6-bit immediate field into bits 17–12
512            // of the destination register, clears the bottom 12 bits, and
513            // sign-extends bit 17 into all higher bits of the destination.
514            Inst::Lui { rd, imm: imm20 }
515                if rd.to_reg() != zero_reg()
516                    && rd.to_reg() != stack_reg()
517                    && imm20.as_i32() != 0 =>
518            {
519                // Check that the top bits are sign extended
520                let imm = imm20.as_i32() << 14 >> 14;
521                if imm != imm20.as_i32() {
522                    return None;
523                }
524                let imm6 = Imm6::maybe_from_i32(imm)?;
525                sink.put2(encode_ci_type(CiOp::CLui, rd, imm6));
526            }
527
528            // c.slli
529            Inst::AluRRImm12 {
530                alu_op: AluOPRRI::Slli,
531                rd,
532                rs,
533                imm12,
534            } if rd.to_reg() == rs && rs != zero_reg() && imm12.as_i16() != 0 => {
535                // The shift amount is unsigned, but we encode it as signed.
536                let shift = imm12.as_i16() & 0x3f;
537                let imm6 = Imm6::maybe_from_i16(shift << 10 >> 10).unwrap();
538                sink.put2(encode_ci_type(CiOp::CSlli, rd, imm6));
539            }
540
541            // c.srli / c.srai
542            Inst::AluRRImm12 {
543                alu_op: op @ (AluOPRRI::Srli | AluOPRRI::Srai),
544                rd,
545                rs,
546                imm12,
547            } if rd.to_reg() == rs && reg_is_compressible(rs) && imm12.as_i16() != 0 => {
548                let op = match op {
549                    AluOPRRI::Srli => CbOp::CSrli,
550                    AluOPRRI::Srai => CbOp::CSrai,
551                    _ => unreachable!(),
552                };
553
554                // The shift amount is unsigned, but we encode it as signed.
555                let shift = imm12.as_i16() & 0x3f;
556                let imm6 = Imm6::maybe_from_i16(shift << 10 >> 10).unwrap();
557                sink.put2(encode_cb_type(op, rd, imm6));
558            }
559
560            // c.zextb
561            //
562            // This is an alias for `andi rd, rd, 0xff`
563            Inst::AluRRImm12 {
564                alu_op: AluOPRRI::Andi,
565                rd,
566                rs,
567                imm12,
568            } if has_zcb
569                && rd.to_reg() == rs
570                && reg_is_compressible(rs)
571                && imm12.as_i16() == 0xff =>
572            {
573                sink.put2(encode_cszn_type(CsznOp::CZextb, rd));
574            }
575
576            // c.andi
577            Inst::AluRRImm12 {
578                alu_op: AluOPRRI::Andi,
579                rd,
580                rs,
581                imm12,
582            } if rd.to_reg() == rs && reg_is_compressible(rs) => {
583                let imm6 = Imm6::maybe_from_imm12(imm12)?;
584                sink.put2(encode_cb_type(CbOp::CAndi, rd, imm6));
585            }
586
587            // Stack Based Loads
588            Inst::Load {
589                rd,
590                op: op @ (LoadOP::Lw | LoadOP::Ld | LoadOP::Fld),
591                from,
592                flags,
593            } if from.get_base_register() == Some(stack_reg())
594                && (from.get_offset_with_state(state) % op.size()) == 0 =>
595            {
596                // We encode the offset in multiples of the load size.
597                let offset = from.get_offset_with_state(state);
598                let imm6 = u8::try_from(offset / op.size())
599                    .ok()
600                    .and_then(Uimm6::maybe_from_u8)?;
601
602                // Some additional constraints on these instructions.
603                //
604                // Integer loads are not allowed to target x0, but floating point loads
605                // are, since f0 is not a special register.
606                //
607                // Floating point loads are not included in the base Zca extension
608                // but in a separate Zcd extension. Both of these are part of the C Extension.
609                let rd_is_zero = rd.to_reg() == zero_reg();
610                let op = match op {
611                    LoadOP::Lw if !rd_is_zero => CiOp::CLwsp,
612                    LoadOP::Ld if !rd_is_zero => CiOp::CLdsp,
613                    LoadOP::Fld if has_zcd => CiOp::CFldsp,
614                    _ => return None,
615                };
616
617                if let Some(trap_code) = flags.trap_code() {
618                    // Register the offset at which the actual load instruction starts.
619                    sink.add_trap(trap_code);
620                }
621                sink.put2(encode_ci_sp_load(op, rd, imm6));
622            }
623
624            // Regular Loads
625            Inst::Load {
626                rd,
627                op:
628                    op
629                    @ (LoadOP::Lw | LoadOP::Ld | LoadOP::Fld | LoadOP::Lbu | LoadOP::Lhu | LoadOP::Lh),
630                from,
631                flags,
632            } if reg_is_compressible(rd.to_reg())
633                && from
634                    .get_base_register()
635                    .map(reg_is_compressible)
636                    .unwrap_or(false)
637                && (from.get_offset_with_state(state) % op.size()) == 0 =>
638            {
639                let base = from.get_base_register().unwrap();
640
641                // We encode the offset in multiples of the store size.
642                let offset = from.get_offset_with_state(state);
643                let offset = u8::try_from(offset / op.size()).ok()?;
644
645                // We mix two different formats here.
646                //
647                // c.lw / c.ld / c.fld instructions are available in the standard Zca
648                // extension using the CL format.
649                //
650                // c.lbu / c.lhu / c.lh are only available in the Zcb extension and
651                // are also encoded differently. Technically they each have a different
652                // format, but they are similar enough that we can group them.
653                let is_zcb_load = matches!(op, LoadOP::Lbu | LoadOP::Lhu | LoadOP::Lh);
654                let encoded = if is_zcb_load {
655                    if !has_zcb {
656                        return None;
657                    }
658
659                    let op = match op {
660                        LoadOP::Lbu => ZcbMemOp::CLbu,
661                        LoadOP::Lhu => ZcbMemOp::CLhu,
662                        LoadOP::Lh => ZcbMemOp::CLh,
663                        _ => unreachable!(),
664                    };
665
666                    // Byte stores & loads have 2 bits of immediate offset. Halfword stores
667                    // and loads only have 1 bit.
668                    let imm2 = Uimm2::maybe_from_u8(offset)?;
669                    if (offset & !((1 << op.imm_bits()) - 1)) != 0 {
670                        return None;
671                    }
672
673                    encode_zcbmem_load(op, rd, base, imm2)
674                } else {
675                    // Floating point loads are not included in the base Zca extension
676                    // but in a separate Zcd extension. Both of these are part of the C Extension.
677                    let op = match op {
678                        LoadOP::Lw => ClOp::CLw,
679                        LoadOP::Ld => ClOp::CLd,
680                        LoadOP::Fld if has_zcd => ClOp::CFld,
681                        _ => return None,
682                    };
683                    let imm5 = Uimm5::maybe_from_u8(offset)?;
684
685                    encode_cl_type(op, rd, base, imm5)
686                };
687
688                if let Some(trap_code) = flags.trap_code() {
689                    // Register the offset at which the actual load instruction starts.
690                    sink.add_trap(trap_code);
691                }
692                sink.put2(encoded);
693            }
694
695            // Stack Based Stores
696            Inst::Store {
697                src,
698                op: op @ (StoreOP::Sw | StoreOP::Sd | StoreOP::Fsd),
699                to,
700                flags,
701            } if to.get_base_register() == Some(stack_reg())
702                && (to.get_offset_with_state(state) % op.size()) == 0 =>
703            {
704                // We encode the offset in multiples of the store size.
705                let offset = to.get_offset_with_state(state);
706                let imm6 = u8::try_from(offset / op.size())
707                    .ok()
708                    .and_then(Uimm6::maybe_from_u8)?;
709
710                // Floating point stores are not included in the base Zca extension
711                // but in a separate Zcd extension. Both of these are part of the C Extension.
712                let op = match op {
713                    StoreOP::Sw => CssOp::CSwsp,
714                    StoreOP::Sd => CssOp::CSdsp,
715                    StoreOP::Fsd if has_zcd => CssOp::CFsdsp,
716                    _ => return None,
717                };
718
719                if let Some(trap_code) = flags.trap_code() {
720                    // Register the offset at which the actual load instruction starts.
721                    sink.add_trap(trap_code);
722                }
723                sink.put2(encode_css_type(op, src, imm6));
724            }
725
726            // Regular Stores
727            Inst::Store {
728                src,
729                op: op @ (StoreOP::Sw | StoreOP::Sd | StoreOP::Fsd | StoreOP::Sh | StoreOP::Sb),
730                to,
731                flags,
732            } if reg_is_compressible(src)
733                && to
734                    .get_base_register()
735                    .map(reg_is_compressible)
736                    .unwrap_or(false)
737                && (to.get_offset_with_state(state) % op.size()) == 0 =>
738            {
739                let base = to.get_base_register().unwrap();
740
741                // We encode the offset in multiples of the store size.
742                let offset = to.get_offset_with_state(state);
743                let offset = u8::try_from(offset / op.size()).ok()?;
744
745                // We mix two different formats here.
746                //
747                // c.sw / c.sd / c.fsd instructions are available in the standard Zca
748                // extension using the CL format.
749                //
750                // c.sb / c.sh are only available in the Zcb extension and are also
751                // encoded differently.
752                let is_zcb_store = matches!(op, StoreOP::Sh | StoreOP::Sb);
753                let encoded = if is_zcb_store {
754                    if !has_zcb {
755                        return None;
756                    }
757
758                    let op = match op {
759                        StoreOP::Sh => ZcbMemOp::CSh,
760                        StoreOP::Sb => ZcbMemOp::CSb,
761                        _ => unreachable!(),
762                    };
763
764                    // Byte stores & loads have 2 bits of immediate offset. Halfword stores
765                    // and loads only have 1 bit.
766                    let imm2 = Uimm2::maybe_from_u8(offset)?;
767                    if (offset & !((1 << op.imm_bits()) - 1)) != 0 {
768                        return None;
769                    }
770
771                    encode_zcbmem_store(op, src, base, imm2)
772                } else {
773                    // Floating point stores are not included in the base Zca extension
774                    // but in a separate Zcd extension. Both of these are part of the C Extension.
775                    let op = match op {
776                        StoreOP::Sw => CsOp::CSw,
777                        StoreOP::Sd => CsOp::CSd,
778                        StoreOP::Fsd if has_zcd => CsOp::CFsd,
779                        _ => return None,
780                    };
781                    let imm5 = Uimm5::maybe_from_u8(offset)?;
782
783                    encode_cs_type(op, src, base, imm5)
784                };
785
786                if let Some(trap_code) = flags.trap_code() {
787                    // Register the offset at which the actual load instruction starts.
788                    sink.add_trap(trap_code);
789                }
790                sink.put2(encoded);
791            }
792
793            // c.not
794            //
795            // This is an alias for `xori rd, rd, -1`
796            Inst::AluRRImm12 {
797                alu_op: AluOPRRI::Xori,
798                rd,
799                rs,
800                imm12,
801            } if has_zcb
802                && rd.to_reg() == rs
803                && reg_is_compressible(rs)
804                && imm12.as_i16() == -1 =>
805            {
806                sink.put2(encode_cszn_type(CsznOp::CNot, rd));
807            }
808
809            // c.sext.b / c.sext.h / c.zext.h
810            //
811            // These are all the extend instructions present in `Zcb`, they
812            // also require `Zbb` since they aren't available in the base ISA.
813            Inst::AluRRImm12 {
814                alu_op: alu_op @ (AluOPRRI::Sextb | AluOPRRI::Sexth | AluOPRRI::Zexth),
815                rd,
816                rs,
817                imm12,
818            } if has_zcb
819                && has_zbb
820                && rd.to_reg() == rs
821                && reg_is_compressible(rs)
822                && imm12.as_i16() == 0 =>
823            {
824                let op = match alu_op {
825                    AluOPRRI::Sextb => CsznOp::CSextb,
826                    AluOPRRI::Sexth => CsznOp::CSexth,
827                    AluOPRRI::Zexth => CsznOp::CZexth,
828                    _ => unreachable!(),
829                };
830                sink.put2(encode_cszn_type(op, rd));
831            }
832
833            // c.zext.w
834            //
835            // This is an alias for `add.uw rd, rd, zero`
836            Inst::AluRRR {
837                alu_op: AluOPRRR::Adduw,
838                rd,
839                rs1,
840                rs2,
841            } if has_zcb
842                && has_zba
843                && rd.to_reg() == rs1
844                && reg_is_compressible(rs1)
845                && rs2 == zero_reg() =>
846            {
847                sink.put2(encode_cszn_type(CsznOp::CZextw, rd));
848            }
849
850            _ => return None,
851        }
852
853        return Some(());
854    }
855
856    fn emit_uncompressed(
857        &self,
858        sink: &mut MachBuffer<Inst>,
859        emit_info: &EmitInfo,
860        state: &mut EmitState,
861        start_off: &mut u32,
862    ) {
863        match self {
864            &Inst::Nop0 => {
865                // do nothing
866            }
867            // Addi x0, x0, 0
868            &Inst::Nop4 => {
869                let x = Inst::AluRRImm12 {
870                    alu_op: AluOPRRI::Addi,
871                    rd: Writable::from_reg(zero_reg()),
872                    rs: zero_reg(),
873                    imm12: Imm12::ZERO,
874                };
875                x.emit(sink, emit_info, state)
876            }
877            &Inst::RawData { ref data } => {
878                // Right now we only put a u32 or u64 in this instruction.
879                // It is not very long, no need to check if need `emit_island`.
880                // If data is very long , this is a bug because RawData is typically
881                // use to load some data and rely on some position in the code stream.
882                // and we may exceed `Inst::worst_case_size`.
883                // for more information see https://github.com/bytecodealliance/wasmtime/pull/5612.
884                sink.put_data(&data[..]);
885            }
886            &Inst::Lui { rd, ref imm } => {
887                let x: u32 = 0b0110111 | reg_to_gpr_num(rd.to_reg()) << 7 | (imm.bits() << 12);
888                sink.put4(x);
889            }
890            &Inst::Fli { rd, width, imm } => {
891                sink.put4(encode_fli(width, imm, rd));
892            }
893            &Inst::LoadInlineConst { rd, ty, imm } => {
894                let data = &imm.to_le_bytes()[..ty.bytes() as usize];
895
896                let label_data: MachLabel = sink.get_label();
897                let label_end: MachLabel = sink.get_label();
898
899                // Load into rd
900                Inst::Load {
901                    rd,
902                    op: LoadOP::from_type(ty),
903                    flags: MemFlags::new(),
904                    from: AMode::Label(label_data),
905                }
906                .emit(sink, emit_info, state);
907
908                // Jump over the inline pool
909                Inst::gen_jump(label_end).emit(sink, emit_info, state);
910
911                // Emit the inline data
912                sink.bind_label(label_data, &mut state.ctrl_plane);
913                Inst::RawData { data: data.into() }.emit(sink, emit_info, state);
914
915                sink.bind_label(label_end, &mut state.ctrl_plane);
916            }
917            &Inst::FpuRR {
918                alu_op,
919                width,
920                frm,
921                rd,
922                rs,
923            } => {
924                if alu_op.is_convert_to_int() {
925                    sink.add_trap(TrapCode::BAD_CONVERSION_TO_INTEGER);
926                }
927                sink.put4(encode_fp_rr(alu_op, width, frm, rd, rs));
928            }
929            &Inst::FpuRRRR {
930                alu_op,
931                rd,
932                rs1,
933                rs2,
934                rs3,
935                frm,
936                width,
937            } => {
938                sink.put4(encode_fp_rrrr(alu_op, width, frm, rd, rs1, rs2, rs3));
939            }
940            &Inst::FpuRRR {
941                alu_op,
942                width,
943                frm,
944                rd,
945                rs1,
946                rs2,
947            } => {
948                sink.put4(encode_fp_rrr(alu_op, width, frm, rd, rs1, rs2));
949            }
950            &Inst::Unwind { ref inst } => {
951                sink.add_unwind(inst.clone());
952            }
953            &Inst::DummyUse { .. } => {
954                // This has already been handled by Inst::allocate.
955            }
956            &Inst::AluRRR {
957                alu_op,
958                rd,
959                rs1,
960                rs2,
961            } => {
962                let (rs1, rs2) = if alu_op.reverse_rs() {
963                    (rs2, rs1)
964                } else {
965                    (rs1, rs2)
966                };
967
968                sink.put4(encode_r_type(
969                    alu_op.op_code(),
970                    rd,
971                    alu_op.funct3(),
972                    rs1,
973                    rs2,
974                    alu_op.funct7(),
975                ));
976            }
977            &Inst::AluRRImm12 {
978                alu_op,
979                rd,
980                rs,
981                imm12,
982            } => {
983                let x = alu_op.op_code()
984                    | reg_to_gpr_num(rd.to_reg()) << 7
985                    | alu_op.funct3() << 12
986                    | reg_to_gpr_num(rs) << 15
987                    | alu_op.imm12(imm12) << 20;
988                sink.put4(x);
989            }
990            &Inst::CsrReg { op, rd, rs, csr } => {
991                sink.put4(encode_csr_reg(op, rd, rs, csr));
992            }
993            &Inst::CsrImm { op, rd, csr, imm } => {
994                sink.put4(encode_csr_imm(op, rd, csr, imm));
995            }
996            &Inst::Load {
997                rd,
998                op: LoadOP::Flh,
999                from,
1000                flags,
1001            } if !emit_info.isa_flags.has_zfhmin() => {
1002                // flh unavailable, use an integer load instead
1003                Inst::Load {
1004                    rd: writable_spilltmp_reg(),
1005                    op: LoadOP::Lh,
1006                    flags,
1007                    from,
1008                }
1009                .emit(sink, emit_info, state);
1010                // NaN-box the `f16` before loading it into the floating-point
1011                // register with a 32-bit `fmv`.
1012                Inst::Lui {
1013                    rd: writable_spilltmp_reg2(),
1014                    imm: Imm20::from_i32((0xffff_0000_u32 as i32) >> 12),
1015                }
1016                .emit(sink, emit_info, state);
1017                Inst::AluRRR {
1018                    alu_op: AluOPRRR::Or,
1019                    rd: writable_spilltmp_reg(),
1020                    rs1: spilltmp_reg(),
1021                    rs2: spilltmp_reg2(),
1022                }
1023                .emit(sink, emit_info, state);
1024                Inst::FpuRR {
1025                    alu_op: FpuOPRR::FmvFmtX,
1026                    width: FpuOPWidth::S,
1027                    frm: FRM::RNE,
1028                    rd,
1029                    rs: spilltmp_reg(),
1030                }
1031                .emit(sink, emit_info, state);
1032            }
1033            &Inst::Load {
1034                rd,
1035                op,
1036                from,
1037                flags,
1038            } => {
1039                let base = from.get_base_register();
1040                let offset = from.get_offset_with_state(state);
1041                let offset_imm12 = Imm12::maybe_from_i64(offset);
1042                let label = from.get_label_with_sink(sink);
1043
1044                let (addr, imm12) = match (base, offset_imm12, label) {
1045                    // When loading from a Reg+Offset, if the offset fits into an imm12 we can directly encode it.
1046                    (Some(base), Some(imm12), None) => (base, imm12),
1047
1048                    // Otherwise, if the offset does not fit into a imm12, we need to materialize it into a
1049                    // register and load from that.
1050                    (Some(_), None, None) => {
1051                        let tmp = writable_spilltmp_reg();
1052                        Inst::LoadAddr { rd: tmp, mem: from }.emit(sink, emit_info, state);
1053                        (tmp.to_reg(), Imm12::ZERO)
1054                    }
1055
1056                    // If the AMode contains a label we can emit an internal relocation that gets
1057                    // resolved with the correct address later.
1058                    (None, Some(imm), Some(label)) => {
1059                        debug_assert_eq!(imm.as_i16(), 0);
1060
1061                        // Get the current PC.
1062                        sink.use_label_at_offset(sink.cur_offset(), label, LabelUse::PCRelHi20);
1063                        Inst::Auipc {
1064                            rd,
1065                            imm: Imm20::ZERO,
1066                        }
1067                        .emit_uncompressed(sink, emit_info, state, start_off);
1068
1069                        // Emit a relocation for the load. This patches the offset into the instruction.
1070                        sink.use_label_at_offset(sink.cur_offset(), label, LabelUse::PCRelLo12I);
1071
1072                        // Imm12 here is meaningless since it's going to get replaced.
1073                        (rd.to_reg(), Imm12::ZERO)
1074                    }
1075
1076                    // These cases are impossible with the current AModes that we have. We either
1077                    // always have a register, or always have a label. Never both, and never neither.
1078                    (None, None, None)
1079                    | (None, Some(_), None)
1080                    | (Some(_), None, Some(_))
1081                    | (Some(_), Some(_), Some(_))
1082                    | (None, None, Some(_)) => {
1083                        unreachable!("Invalid load address")
1084                    }
1085                };
1086
1087                if let Some(trap_code) = flags.trap_code() {
1088                    // Register the offset at which the actual load instruction starts.
1089                    sink.add_trap(trap_code);
1090                }
1091
1092                sink.put4(encode_i_type(op.op_code(), rd, op.funct3(), addr, imm12));
1093            }
1094            &Inst::Store {
1095                op: StoreOP::Fsh,
1096                src,
1097                flags,
1098                to,
1099            } if !emit_info.isa_flags.has_zfhmin() => {
1100                // fsh unavailable, use an integer store instead
1101                Inst::FpuRR {
1102                    alu_op: FpuOPRR::FmvXFmt,
1103                    width: FpuOPWidth::S,
1104                    frm: FRM::RNE,
1105                    rd: writable_spilltmp_reg(),
1106                    rs: src,
1107                }
1108                .emit(sink, emit_info, state);
1109                Inst::Store {
1110                    to,
1111                    op: StoreOP::Sh,
1112                    flags,
1113                    src: spilltmp_reg(),
1114                }
1115                .emit(sink, emit_info, state);
1116            }
1117            &Inst::Store { op, src, flags, to } => {
1118                let base = to.get_base_register();
1119                let offset = to.get_offset_with_state(state);
1120                let offset_imm12 = Imm12::maybe_from_i64(offset);
1121
1122                let (addr, imm12) = match (base, offset_imm12) {
1123                    // If the offset fits into an imm12 we can directly encode it.
1124                    (Some(base), Some(imm12)) => (base, imm12),
1125                    // Otherwise load the address it into a reg and load from it.
1126                    _ => {
1127                        let tmp = writable_spilltmp_reg();
1128                        Inst::LoadAddr { rd: tmp, mem: to }.emit(sink, emit_info, state);
1129                        (tmp.to_reg(), Imm12::ZERO)
1130                    }
1131                };
1132
1133                if let Some(trap_code) = flags.trap_code() {
1134                    // Register the offset at which the actual load instruction starts.
1135                    sink.add_trap(trap_code);
1136                }
1137
1138                sink.put4(encode_s_type(op.op_code(), op.funct3(), addr, src, imm12));
1139            }
1140            &Inst::Args { .. } | &Inst::Rets { .. } => {
1141                // Nothing: this is a pseudoinstruction that serves
1142                // only to constrain registers at a certain point.
1143            }
1144            &Inst::Ret {} => {
1145                // RISC-V does not have a dedicated ret instruction, instead we emit the equivalent
1146                // `jalr x0, x1, 0` that jumps to the return address.
1147                Inst::Jalr {
1148                    rd: writable_zero_reg(),
1149                    base: link_reg(),
1150                    offset: Imm12::ZERO,
1151                }
1152                .emit(sink, emit_info, state);
1153            }
1154
1155            &Inst::Extend {
1156                rd,
1157                rn,
1158                signed,
1159                from_bits,
1160                to_bits: _to_bits,
1161            } => {
1162                let mut insts = SmallInstVec::new();
1163                let shift_bits = (64 - from_bits) as i16;
1164                let is_u8 = || from_bits == 8 && signed == false;
1165                if is_u8() {
1166                    // special for u8.
1167                    insts.push(Inst::AluRRImm12 {
1168                        alu_op: AluOPRRI::Andi,
1169                        rd,
1170                        rs: rn,
1171                        imm12: Imm12::from_i16(255),
1172                    });
1173                } else {
1174                    insts.push(Inst::AluRRImm12 {
1175                        alu_op: AluOPRRI::Slli,
1176                        rd,
1177                        rs: rn,
1178                        imm12: Imm12::from_i16(shift_bits),
1179                    });
1180                    insts.push(Inst::AluRRImm12 {
1181                        alu_op: if signed {
1182                            AluOPRRI::Srai
1183                        } else {
1184                            AluOPRRI::Srli
1185                        },
1186                        rd,
1187                        rs: rd.to_reg(),
1188                        imm12: Imm12::from_i16(shift_bits),
1189                    });
1190                }
1191                insts
1192                    .into_iter()
1193                    .for_each(|i| i.emit(sink, emit_info, state));
1194            }
1195
1196            &Inst::Call { ref info } => {
1197                sink.add_reloc(Reloc::RiscvCallPlt, &info.dest, 0);
1198
1199                Inst::construct_auipc_and_jalr(Some(writable_link_reg()), writable_link_reg(), 0)
1200                    .into_iter()
1201                    .for_each(|i| i.emit_uncompressed(sink, emit_info, state, start_off));
1202
1203                if let Some(s) = state.take_stack_map() {
1204                    let offset = sink.cur_offset();
1205                    sink.push_user_stack_map(state, offset, s);
1206                }
1207
1208                if let Some(try_call) = info.try_call_info.as_ref() {
1209                    sink.add_try_call_site(
1210                        Some(state.frame_layout.sp_to_fp()),
1211                        try_call.exception_handlers(&state.frame_layout),
1212                    );
1213                } else {
1214                    sink.add_call_site();
1215                }
1216
1217                let callee_pop_size = i32::try_from(info.callee_pop_size).unwrap();
1218                if callee_pop_size > 0 {
1219                    for inst in Riscv64MachineDeps::gen_sp_reg_adjust(-callee_pop_size) {
1220                        inst.emit(sink, emit_info, state);
1221                    }
1222                }
1223
1224                // Load any stack-carried return values.
1225                info.emit_retval_loads::<Riscv64MachineDeps, _, _>(
1226                    state.frame_layout().stackslots_size,
1227                    |inst| inst.emit(sink, emit_info, state),
1228                    |needed_space| Some(Inst::EmitIsland { needed_space }),
1229                );
1230
1231                // If this is a try-call, jump to the continuation
1232                // (normal-return) block.
1233                if let Some(try_call) = info.try_call_info.as_ref() {
1234                    let jmp = Inst::Jal {
1235                        label: try_call.continuation,
1236                    };
1237                    jmp.emit(sink, emit_info, state);
1238                }
1239
1240                *start_off = sink.cur_offset();
1241            }
1242            &Inst::CallInd { ref info } => {
1243                Inst::Jalr {
1244                    rd: writable_link_reg(),
1245                    base: info.dest,
1246                    offset: Imm12::ZERO,
1247                }
1248                .emit(sink, emit_info, state);
1249
1250                if let Some(s) = state.take_stack_map() {
1251                    let offset = sink.cur_offset();
1252                    sink.push_user_stack_map(state, offset, s);
1253                }
1254
1255                if let Some(try_call) = info.try_call_info.as_ref() {
1256                    sink.add_try_call_site(
1257                        Some(state.frame_layout.sp_to_fp()),
1258                        try_call.exception_handlers(&state.frame_layout),
1259                    );
1260                } else {
1261                    sink.add_call_site();
1262                }
1263
1264                let callee_pop_size = i32::try_from(info.callee_pop_size).unwrap();
1265                if callee_pop_size > 0 {
1266                    for inst in Riscv64MachineDeps::gen_sp_reg_adjust(-callee_pop_size) {
1267                        inst.emit(sink, emit_info, state);
1268                    }
1269                }
1270
1271                // Load any stack-carried return values.
1272                info.emit_retval_loads::<Riscv64MachineDeps, _, _>(
1273                    state.frame_layout().stackslots_size,
1274                    |inst| inst.emit(sink, emit_info, state),
1275                    |needed_space| Some(Inst::EmitIsland { needed_space }),
1276                );
1277
1278                // If this is a try-call, jump to the continuation
1279                // (normal-return) block.
1280                if let Some(try_call) = info.try_call_info.as_ref() {
1281                    let jmp = Inst::Jal {
1282                        label: try_call.continuation,
1283                    };
1284                    jmp.emit(sink, emit_info, state);
1285                }
1286
1287                *start_off = sink.cur_offset();
1288            }
1289
1290            &Inst::ReturnCall { ref info } => {
1291                emit_return_call_common_sequence(sink, emit_info, state, info);
1292
1293                sink.add_call_site();
1294                sink.add_reloc(Reloc::RiscvCallPlt, &info.dest, 0);
1295                Inst::construct_auipc_and_jalr(None, writable_spilltmp_reg(), 0)
1296                    .into_iter()
1297                    .for_each(|i| i.emit_uncompressed(sink, emit_info, state, start_off));
1298            }
1299
1300            &Inst::ReturnCallInd { ref info } => {
1301                emit_return_call_common_sequence(sink, emit_info, state, &info);
1302
1303                Inst::Jalr {
1304                    rd: writable_zero_reg(),
1305                    base: info.dest,
1306                    offset: Imm12::ZERO,
1307                }
1308                .emit(sink, emit_info, state);
1309            }
1310            &Inst::Jal { label } => {
1311                sink.use_label_at_offset(*start_off, label, LabelUse::Jal20);
1312                sink.add_uncond_branch(*start_off, *start_off + 4, label);
1313                sink.put4(0b1101111);
1314                state.clobber_vstate();
1315            }
1316            &Inst::CondBr {
1317                taken,
1318                not_taken,
1319                kind,
1320            } => {
1321                match taken {
1322                    CondBrTarget::Label(label) => {
1323                        let code = kind.emit();
1324                        let code_inverse = kind.inverse().emit().to_le_bytes();
1325                        sink.use_label_at_offset(*start_off, label, LabelUse::B12);
1326                        sink.add_cond_branch(*start_off, *start_off + 4, label, &code_inverse);
1327                        sink.put4(code);
1328                    }
1329                    CondBrTarget::Fallthrough => panic!("Cannot fallthrough in taken target"),
1330                }
1331
1332                match not_taken {
1333                    CondBrTarget::Label(label) => {
1334                        Inst::gen_jump(label).emit(sink, emit_info, state)
1335                    }
1336                    CondBrTarget::Fallthrough => {}
1337                };
1338            }
1339
1340            &Inst::Mov { rd, rm, ty } => {
1341                debug_assert_eq!(rd.to_reg().class(), rm.class());
1342                if rd.to_reg() == rm {
1343                    return;
1344                }
1345
1346                match rm.class() {
1347                    RegClass::Int => Inst::AluRRImm12 {
1348                        alu_op: AluOPRRI::Addi,
1349                        rd,
1350                        rs: rm,
1351                        imm12: Imm12::ZERO,
1352                    },
1353                    RegClass::Float => Inst::FpuRRR {
1354                        alu_op: FpuOPRRR::Fsgnj,
1355                        width: FpuOPWidth::try_from(ty).unwrap(),
1356                        frm: FRM::RNE,
1357                        rd,
1358                        rs1: rm,
1359                        rs2: rm,
1360                    },
1361                    RegClass::Vector => Inst::VecAluRRImm5 {
1362                        op: VecAluOpRRImm5::VmvrV,
1363                        vd: rd,
1364                        vs2: rm,
1365                        // Imm 0 means copy 1 register.
1366                        imm: Imm5::maybe_from_i8(0).unwrap(),
1367                        mask: VecOpMasking::Disabled,
1368                        // Vstate for this instruction is ignored.
1369                        vstate: VState::from_type(ty),
1370                    },
1371                }
1372                .emit(sink, emit_info, state);
1373            }
1374
1375            &Inst::MovFromPReg { rd, rm } => {
1376                Inst::gen_move(rd, Reg::from(rm), I64).emit(sink, emit_info, state);
1377            }
1378
1379            &Inst::BrTable {
1380                index,
1381                tmp1,
1382                tmp2,
1383                ref targets,
1384            } => {
1385                let ext_index = writable_spilltmp_reg();
1386
1387                let label_compute_target = sink.get_label();
1388
1389                // The default target is passed in as the 0th element of `targets`
1390                // separate it here for clarity.
1391                let default_target = targets[0];
1392                let targets = &targets[1..];
1393
1394                // We are going to potentially emit a large amount of instructions, so ensure that we emit an island
1395                // now if we need one.
1396                //
1397                // The worse case PC calculations are 12 instructions. And each entry in the jump table is 2 instructions.
1398                // Check if we need to emit a jump table here to support that jump.
1399                let inst_count = 12 + (targets.len() * 2);
1400                let distance = (inst_count * Inst::UNCOMPRESSED_INSTRUCTION_SIZE as usize) as u32;
1401                if sink.island_needed(distance) {
1402                    let jump_around_label = sink.get_label();
1403                    Inst::gen_jump(jump_around_label).emit(sink, emit_info, state);
1404                    sink.emit_island(distance + 4, &mut state.ctrl_plane);
1405                    sink.bind_label(jump_around_label, &mut state.ctrl_plane);
1406                }
1407
1408                // We emit a bounds check on the index, if the index is larger than the number of
1409                // jump table entries, we jump to the default block.  Otherwise we compute a jump
1410                // offset by multiplying the index by 8 (the size of each entry) and then jump to
1411                // that offset. Each jump table entry is a regular auipc+jalr which we emit sequentially.
1412                //
1413                // Build the following sequence:
1414                //
1415                // extend_index:
1416                //     zext.w  ext_index, index
1417                // bounds_check:
1418                //     li      tmp, n_labels
1419                //     bltu    ext_index, tmp, compute_target
1420                // jump_to_default_block:
1421                //     auipc   pc, 0
1422                //     jalr    zero, pc, default_block
1423                // compute_target:
1424                //     auipc   pc, 0
1425                //     slli    tmp, ext_index, 3
1426                //     add     pc, pc, tmp
1427                //     jalr    zero, pc, 0x10
1428                // jump_table:
1429                //     ; This repeats for each entry in the jumptable
1430                //     auipc   pc, 0
1431                //     jalr    zero, pc, block_target
1432
1433                // Extend the index to 64 bits.
1434                //
1435                // This prevents us branching on the top 32 bits of the index, which
1436                // are undefined.
1437                Inst::Extend {
1438                    rd: ext_index,
1439                    rn: index,
1440                    signed: false,
1441                    from_bits: 32,
1442                    to_bits: 64,
1443                }
1444                .emit(sink, emit_info, state);
1445
1446                // Bounds check.
1447                //
1448                // Check if the index passed in is larger than the number of jumptable
1449                // entries that we have. If it is, we fallthrough to a jump into the
1450                // default block.
1451                Inst::load_constant_u32(tmp2, targets.len() as u64)
1452                    .iter()
1453                    .for_each(|i| i.emit(sink, emit_info, state));
1454                Inst::CondBr {
1455                    taken: CondBrTarget::Label(label_compute_target),
1456                    not_taken: CondBrTarget::Fallthrough,
1457                    kind: IntegerCompare {
1458                        kind: IntCC::UnsignedLessThan,
1459                        rs1: ext_index.to_reg(),
1460                        rs2: tmp2.to_reg(),
1461                    },
1462                }
1463                .emit(sink, emit_info, state);
1464
1465                sink.use_label_at_offset(sink.cur_offset(), default_target, LabelUse::PCRel32);
1466                Inst::construct_auipc_and_jalr(None, tmp2, 0)
1467                    .iter()
1468                    .for_each(|i| i.emit_uncompressed(sink, emit_info, state, start_off));
1469
1470                // Compute the jump table offset.
1471                // We need to emit a PC relative offset,
1472                sink.bind_label(label_compute_target, &mut state.ctrl_plane);
1473
1474                // Get the current PC.
1475                Inst::Auipc {
1476                    rd: tmp1,
1477                    imm: Imm20::ZERO,
1478                }
1479                .emit_uncompressed(sink, emit_info, state, start_off);
1480
1481                // These instructions must be emitted as uncompressed since we
1482                // are manually computing the offset from the PC.
1483
1484                // Multiply the index by 8, since that is the size in
1485                // bytes of each jump table entry
1486                Inst::AluRRImm12 {
1487                    alu_op: AluOPRRI::Slli,
1488                    rd: tmp2,
1489                    rs: ext_index.to_reg(),
1490                    imm12: Imm12::from_i16(3),
1491                }
1492                .emit_uncompressed(sink, emit_info, state, start_off);
1493
1494                // Calculate the base of the jump, PC + the offset from above.
1495                Inst::AluRRR {
1496                    alu_op: AluOPRRR::Add,
1497                    rd: tmp1,
1498                    rs1: tmp1.to_reg(),
1499                    rs2: tmp2.to_reg(),
1500                }
1501                .emit_uncompressed(sink, emit_info, state, start_off);
1502
1503                // Jump to the middle of the jump table.
1504                // We add a 16 byte offset here, since we used 4 instructions
1505                // since the AUIPC that was used to get the PC.
1506                Inst::Jalr {
1507                    rd: writable_zero_reg(),
1508                    base: tmp1.to_reg(),
1509                    offset: Imm12::from_i16((4 * Inst::UNCOMPRESSED_INSTRUCTION_SIZE) as i16),
1510                }
1511                .emit_uncompressed(sink, emit_info, state, start_off);
1512
1513                // Emit the jump table.
1514                //
1515                // Each entry is a auipc + jalr to the target block. We also start with a island
1516                // if necessary.
1517
1518                // Emit the jumps back to back
1519                for target in targets.iter() {
1520                    sink.use_label_at_offset(sink.cur_offset(), *target, LabelUse::PCRel32);
1521
1522                    Inst::construct_auipc_and_jalr(None, tmp2, 0)
1523                        .iter()
1524                        .for_each(|i| i.emit_uncompressed(sink, emit_info, state, start_off));
1525                }
1526
1527                // We've just emitted an island that is safe up to *here*.
1528                // Mark it as such so that we don't needlessly emit additional islands.
1529                *start_off = sink.cur_offset();
1530            }
1531
1532            &Inst::Atomic {
1533                op,
1534                rd,
1535                addr,
1536                src,
1537                amo,
1538            } => {
1539                // TODO: get flags from original CLIF atomic instruction
1540                let flags = MemFlags::new();
1541                if let Some(trap_code) = flags.trap_code() {
1542                    sink.add_trap(trap_code);
1543                }
1544                let x = op.op_code()
1545                    | reg_to_gpr_num(rd.to_reg()) << 7
1546                    | op.funct3() << 12
1547                    | reg_to_gpr_num(addr) << 15
1548                    | reg_to_gpr_num(src) << 20
1549                    | op.funct7(amo) << 25;
1550
1551                sink.put4(x);
1552            }
1553            &Inst::Fence { pred, succ } => {
1554                let x = 0b0001111
1555                    | 0b00000 << 7
1556                    | 0b000 << 12
1557                    | 0b00000 << 15
1558                    | (succ as u32) << 20
1559                    | (pred as u32) << 24;
1560
1561                sink.put4(x);
1562            }
1563            &Inst::Auipc { rd, imm } => {
1564                sink.put4(enc_auipc(rd, imm));
1565            }
1566
1567            &Inst::LoadAddr { rd, mem } => {
1568                let base = mem.get_base_register();
1569                let offset = mem.get_offset_with_state(state);
1570                let offset_imm12 = Imm12::maybe_from_i64(offset);
1571
1572                match (mem, base, offset_imm12) {
1573                    (_, Some(rs), Some(imm12)) => {
1574                        Inst::AluRRImm12 {
1575                            alu_op: AluOPRRI::Addi,
1576                            rd,
1577                            rs,
1578                            imm12,
1579                        }
1580                        .emit(sink, emit_info, state);
1581                    }
1582                    (_, Some(rs), None) => {
1583                        let mut insts = Inst::load_constant_u64(rd, offset as u64);
1584                        insts.push(Inst::AluRRR {
1585                            alu_op: AluOPRRR::Add,
1586                            rd,
1587                            rs1: rd.to_reg(),
1588                            rs2: rs,
1589                        });
1590                        insts
1591                            .into_iter()
1592                            .for_each(|inst| inst.emit(sink, emit_info, state));
1593                    }
1594                    (AMode::Const(addr), None, _) => {
1595                        // Get an address label for the constant and recurse.
1596                        let label = sink.get_label_for_constant(addr);
1597                        Inst::LoadAddr {
1598                            rd,
1599                            mem: AMode::Label(label),
1600                        }
1601                        .emit(sink, emit_info, state);
1602                    }
1603                    (AMode::Label(label), None, _) => {
1604                        // Get the current PC.
1605                        sink.use_label_at_offset(sink.cur_offset(), label, LabelUse::PCRelHi20);
1606                        let inst = Inst::Auipc {
1607                            rd,
1608                            imm: Imm20::ZERO,
1609                        };
1610                        inst.emit_uncompressed(sink, emit_info, state, start_off);
1611
1612                        // Emit an add to the address with a relocation.
1613                        // This later gets patched up with the correct offset.
1614                        sink.use_label_at_offset(sink.cur_offset(), label, LabelUse::PCRelLo12I);
1615                        Inst::AluRRImm12 {
1616                            alu_op: AluOPRRI::Addi,
1617                            rd,
1618                            rs: rd.to_reg(),
1619                            imm12: Imm12::ZERO,
1620                        }
1621                        .emit_uncompressed(sink, emit_info, state, start_off);
1622                    }
1623                    (amode, _, _) => {
1624                        unimplemented!("LoadAddr: {:?}", amode);
1625                    }
1626                }
1627            }
1628
1629            &Inst::Select {
1630                ref dst,
1631                condition,
1632                ref x,
1633                ref y,
1634            } => {
1635                // The general form for this select is the following:
1636                //
1637                //     mv rd, x
1638                //     b{cond} rcond, label_end
1639                //     mv rd, y
1640                // label_end:
1641                //     ... etc
1642                //
1643                // This is built on the assumption that moves are cheap, but branches and jumps
1644                // are not. So with this format we always avoid one jump instruction at the expense
1645                // of an unconditional move.
1646                //
1647                // We also perform another optimization here. If the destination register is the same
1648                // as one of the input registers, we can avoid emitting the first unconditional move
1649                // and emit just the branch and the second move.
1650                //
1651                // To make sure that this happens as often as possible, we also try to invert the
1652                // condition, so that if either of the input registers are the same as the destination
1653                // we avoid that move.
1654
1655                let label_end = sink.get_label();
1656
1657                let xregs = x.regs();
1658                let yregs = y.regs();
1659                let dstregs: Vec<Reg> = dst.regs().into_iter().map(|r| r.to_reg()).collect();
1660                let condregs = condition.regs();
1661
1662                // We are going to write to the destination register before evaluating
1663                // the condition, so we need to make sure that the destination register
1664                // is not one of the condition registers.
1665                //
1666                // This should never happen, since hopefully the regalloc constraints
1667                // for this register are set up correctly.
1668                debug_assert_ne!(dstregs, condregs);
1669
1670                // Check if we can invert the condition and avoid moving the y registers into
1671                // the destination. This allows us to only emit the branch and one of the moves.
1672                let (uncond_move, cond_move, condition) = if yregs == dstregs {
1673                    (yregs, xregs, condition.inverse())
1674                } else {
1675                    (xregs, yregs, condition)
1676                };
1677
1678                // Unconditionally move one of the values to the destination register.
1679                //
1680                // These moves may not end up being emitted if the source and
1681                // destination registers are the same. That logic is built into
1682                // the emit function for `Inst::Mov`.
1683                for i in gen_moves(dst.regs(), uncond_move) {
1684                    i.emit(sink, emit_info, state);
1685                }
1686
1687                // If the condition passes we skip over the conditional move
1688                Inst::CondBr {
1689                    taken: CondBrTarget::Label(label_end),
1690                    not_taken: CondBrTarget::Fallthrough,
1691                    kind: condition,
1692                }
1693                .emit(sink, emit_info, state);
1694
1695                // Move the conditional value to the destination register.
1696                for i in gen_moves(dst.regs(), cond_move) {
1697                    i.emit(sink, emit_info, state);
1698                }
1699
1700                sink.bind_label(label_end, &mut state.ctrl_plane);
1701            }
1702            &Inst::Jalr { rd, base, offset } => {
1703                sink.put4(enc_jalr(rd, base, offset));
1704                state.clobber_vstate();
1705            }
1706            &Inst::EBreak => {
1707                sink.put4(0x00100073);
1708            }
1709            &Inst::AtomicCas {
1710                offset,
1711                t0,
1712                dst,
1713                e,
1714                addr,
1715                v,
1716                ty,
1717            } => {
1718                //     # addr holds address of memory location
1719                //     # e holds expected value
1720                //     # v holds desired value
1721                //     # dst holds return value
1722                // cas:
1723                //     lr.w dst, (addr)       # Load original value.
1724                //     bne dst, e, fail       # Doesn’t match, so fail.
1725                //     sc.w t0, v, (addr)     # Try to update.
1726                //     bnez t0 , cas          # if store not ok,retry.
1727                // fail:
1728                let fail_label = sink.get_label();
1729                let cas_lebel = sink.get_label();
1730                sink.bind_label(cas_lebel, &mut state.ctrl_plane);
1731                Inst::Atomic {
1732                    op: AtomicOP::load_op(ty),
1733                    rd: dst,
1734                    addr,
1735                    src: zero_reg(),
1736                    amo: AMO::SeqCst,
1737                }
1738                .emit(sink, emit_info, state);
1739                if ty.bits() < 32 {
1740                    AtomicOP::extract(dst, offset, dst.to_reg(), ty)
1741                        .iter()
1742                        .for_each(|i| i.emit(sink, emit_info, state));
1743                } else if ty.bits() == 32 {
1744                    Inst::Extend {
1745                        rd: dst,
1746                        rn: dst.to_reg(),
1747                        signed: false,
1748                        from_bits: 32,
1749                        to_bits: 64,
1750                    }
1751                    .emit(sink, emit_info, state);
1752                }
1753                Inst::CondBr {
1754                    taken: CondBrTarget::Label(fail_label),
1755                    not_taken: CondBrTarget::Fallthrough,
1756                    kind: IntegerCompare {
1757                        kind: IntCC::NotEqual,
1758                        rs1: e,
1759                        rs2: dst.to_reg(),
1760                    },
1761                }
1762                .emit(sink, emit_info, state);
1763                let store_value = if ty.bits() < 32 {
1764                    // reload value to t0.
1765                    Inst::Atomic {
1766                        op: AtomicOP::load_op(ty),
1767                        rd: t0,
1768                        addr,
1769                        src: zero_reg(),
1770                        amo: AMO::SeqCst,
1771                    }
1772                    .emit(sink, emit_info, state);
1773                    // set reset part.
1774                    AtomicOP::merge(t0, writable_spilltmp_reg(), offset, v, ty)
1775                        .iter()
1776                        .for_each(|i| i.emit(sink, emit_info, state));
1777                    t0.to_reg()
1778                } else {
1779                    v
1780                };
1781                Inst::Atomic {
1782                    op: AtomicOP::store_op(ty),
1783                    rd: t0,
1784                    addr,
1785                    src: store_value,
1786                    amo: AMO::SeqCst,
1787                }
1788                .emit(sink, emit_info, state);
1789                // check is our value stored.
1790                Inst::CondBr {
1791                    taken: CondBrTarget::Label(cas_lebel),
1792                    not_taken: CondBrTarget::Fallthrough,
1793                    kind: IntegerCompare {
1794                        kind: IntCC::NotEqual,
1795                        rs1: t0.to_reg(),
1796                        rs2: zero_reg(),
1797                    },
1798                }
1799                .emit(sink, emit_info, state);
1800                sink.bind_label(fail_label, &mut state.ctrl_plane);
1801            }
1802            &Inst::AtomicRmwLoop {
1803                offset,
1804                op,
1805                dst,
1806                ty,
1807                p,
1808                x,
1809                t0,
1810            } => {
1811                let retry = sink.get_label();
1812                sink.bind_label(retry, &mut state.ctrl_plane);
1813                // load old value.
1814                Inst::Atomic {
1815                    op: AtomicOP::load_op(ty),
1816                    rd: dst,
1817                    addr: p,
1818                    src: zero_reg(),
1819                    amo: AMO::SeqCst,
1820                }
1821                .emit(sink, emit_info, state);
1822                //
1823
1824                let store_value: Reg = match op {
1825                    crate::ir::AtomicRmwOp::Add
1826                    | crate::ir::AtomicRmwOp::Sub
1827                    | crate::ir::AtomicRmwOp::And
1828                    | crate::ir::AtomicRmwOp::Or
1829                    | crate::ir::AtomicRmwOp::Xor => {
1830                        AtomicOP::extract(dst, offset, dst.to_reg(), ty)
1831                            .iter()
1832                            .for_each(|i| i.emit(sink, emit_info, state));
1833                        Inst::AluRRR {
1834                            alu_op: match op {
1835                                crate::ir::AtomicRmwOp::Add => AluOPRRR::Add,
1836                                crate::ir::AtomicRmwOp::Sub => AluOPRRR::Sub,
1837                                crate::ir::AtomicRmwOp::And => AluOPRRR::And,
1838                                crate::ir::AtomicRmwOp::Or => AluOPRRR::Or,
1839                                crate::ir::AtomicRmwOp::Xor => AluOPRRR::Xor,
1840                                _ => unreachable!(),
1841                            },
1842                            rd: t0,
1843                            rs1: dst.to_reg(),
1844                            rs2: x,
1845                        }
1846                        .emit(sink, emit_info, state);
1847                        Inst::Atomic {
1848                            op: AtomicOP::load_op(ty),
1849                            rd: writable_spilltmp_reg2(),
1850                            addr: p,
1851                            src: zero_reg(),
1852                            amo: AMO::SeqCst,
1853                        }
1854                        .emit(sink, emit_info, state);
1855                        AtomicOP::merge(
1856                            writable_spilltmp_reg2(),
1857                            writable_spilltmp_reg(),
1858                            offset,
1859                            t0.to_reg(),
1860                            ty,
1861                        )
1862                        .iter()
1863                        .for_each(|i| i.emit(sink, emit_info, state));
1864                        spilltmp_reg2()
1865                    }
1866                    crate::ir::AtomicRmwOp::Nand => {
1867                        if ty.bits() < 32 {
1868                            AtomicOP::extract(dst, offset, dst.to_reg(), ty)
1869                                .iter()
1870                                .for_each(|i| i.emit(sink, emit_info, state));
1871                        }
1872                        Inst::AluRRR {
1873                            alu_op: AluOPRRR::And,
1874                            rd: t0,
1875                            rs1: x,
1876                            rs2: dst.to_reg(),
1877                        }
1878                        .emit(sink, emit_info, state);
1879                        Inst::construct_bit_not(t0, t0.to_reg()).emit(sink, emit_info, state);
1880                        if ty.bits() < 32 {
1881                            Inst::Atomic {
1882                                op: AtomicOP::load_op(ty),
1883                                rd: writable_spilltmp_reg2(),
1884                                addr: p,
1885                                src: zero_reg(),
1886                                amo: AMO::SeqCst,
1887                            }
1888                            .emit(sink, emit_info, state);
1889                            AtomicOP::merge(
1890                                writable_spilltmp_reg2(),
1891                                writable_spilltmp_reg(),
1892                                offset,
1893                                t0.to_reg(),
1894                                ty,
1895                            )
1896                            .iter()
1897                            .for_each(|i| i.emit(sink, emit_info, state));
1898                            spilltmp_reg2()
1899                        } else {
1900                            t0.to_reg()
1901                        }
1902                    }
1903
1904                    crate::ir::AtomicRmwOp::Umin
1905                    | crate::ir::AtomicRmwOp::Umax
1906                    | crate::ir::AtomicRmwOp::Smin
1907                    | crate::ir::AtomicRmwOp::Smax => {
1908                        let label_select_dst = sink.get_label();
1909                        let label_select_done = sink.get_label();
1910                        if op == crate::ir::AtomicRmwOp::Umin || op == crate::ir::AtomicRmwOp::Umax
1911                        {
1912                            AtomicOP::extract(dst, offset, dst.to_reg(), ty)
1913                        } else {
1914                            AtomicOP::extract_sext(dst, offset, dst.to_reg(), ty)
1915                        }
1916                        .iter()
1917                        .for_each(|i| i.emit(sink, emit_info, state));
1918
1919                        Inst::CondBr {
1920                            taken: CondBrTarget::Label(label_select_dst),
1921                            not_taken: CondBrTarget::Fallthrough,
1922                            kind: IntegerCompare {
1923                                kind: match op {
1924                                    crate::ir::AtomicRmwOp::Umin => IntCC::UnsignedLessThan,
1925                                    crate::ir::AtomicRmwOp::Umax => IntCC::UnsignedGreaterThan,
1926                                    crate::ir::AtomicRmwOp::Smin => IntCC::SignedLessThan,
1927                                    crate::ir::AtomicRmwOp::Smax => IntCC::SignedGreaterThan,
1928                                    _ => unreachable!(),
1929                                },
1930                                rs1: dst.to_reg(),
1931                                rs2: x,
1932                            },
1933                        }
1934                        .emit(sink, emit_info, state);
1935                        // here we select x.
1936                        Inst::gen_move(t0, x, I64).emit(sink, emit_info, state);
1937                        Inst::gen_jump(label_select_done).emit(sink, emit_info, state);
1938                        sink.bind_label(label_select_dst, &mut state.ctrl_plane);
1939                        Inst::gen_move(t0, dst.to_reg(), I64).emit(sink, emit_info, state);
1940                        sink.bind_label(label_select_done, &mut state.ctrl_plane);
1941                        Inst::Atomic {
1942                            op: AtomicOP::load_op(ty),
1943                            rd: writable_spilltmp_reg2(),
1944                            addr: p,
1945                            src: zero_reg(),
1946                            amo: AMO::SeqCst,
1947                        }
1948                        .emit(sink, emit_info, state);
1949                        AtomicOP::merge(
1950                            writable_spilltmp_reg2(),
1951                            writable_spilltmp_reg(),
1952                            offset,
1953                            t0.to_reg(),
1954                            ty,
1955                        )
1956                        .iter()
1957                        .for_each(|i| i.emit(sink, emit_info, state));
1958                        spilltmp_reg2()
1959                    }
1960                    crate::ir::AtomicRmwOp::Xchg => {
1961                        AtomicOP::extract(dst, offset, dst.to_reg(), ty)
1962                            .iter()
1963                            .for_each(|i| i.emit(sink, emit_info, state));
1964                        Inst::Atomic {
1965                            op: AtomicOP::load_op(ty),
1966                            rd: writable_spilltmp_reg2(),
1967                            addr: p,
1968                            src: zero_reg(),
1969                            amo: AMO::SeqCst,
1970                        }
1971                        .emit(sink, emit_info, state);
1972                        AtomicOP::merge(
1973                            writable_spilltmp_reg2(),
1974                            writable_spilltmp_reg(),
1975                            offset,
1976                            x,
1977                            ty,
1978                        )
1979                        .iter()
1980                        .for_each(|i| i.emit(sink, emit_info, state));
1981                        spilltmp_reg2()
1982                    }
1983                };
1984
1985                Inst::Atomic {
1986                    op: AtomicOP::store_op(ty),
1987                    rd: t0,
1988                    addr: p,
1989                    src: store_value,
1990                    amo: AMO::SeqCst,
1991                }
1992                .emit(sink, emit_info, state);
1993
1994                // if store is not ok,retry.
1995                Inst::CondBr {
1996                    taken: CondBrTarget::Label(retry),
1997                    not_taken: CondBrTarget::Fallthrough,
1998                    kind: IntegerCompare {
1999                        kind: IntCC::NotEqual,
2000                        rs1: t0.to_reg(),
2001                        rs2: zero_reg(),
2002                    },
2003                }
2004                .emit(sink, emit_info, state);
2005            }
2006
2007            &Inst::LoadExtNameGot { rd, ref name } => {
2008                // Load a PC-relative address into a register.
2009                // RISC-V does this slightly differently from other arches. We emit a relocation
2010                // with a label, instead of the symbol itself.
2011                //
2012                // See: https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-elf.adoc#pc-relative-symbol-addresses
2013                //
2014                // Emit the following code:
2015                // label:
2016                //   auipc rd, 0              # R_RISCV_GOT_HI20 (symbol_name)
2017                //   ld    rd, rd, 0          # R_RISCV_PCREL_LO12_I (label)
2018
2019                // Create the label that is going to be published to the final binary object.
2020                let auipc_label = sink.get_label();
2021                sink.bind_label(auipc_label, &mut state.ctrl_plane);
2022
2023                // Get the current PC.
2024                sink.add_reloc(Reloc::RiscvGotHi20, &**name, 0);
2025                Inst::Auipc {
2026                    rd,
2027                    imm: Imm20::from_i32(0),
2028                }
2029                .emit_uncompressed(sink, emit_info, state, start_off);
2030
2031                // The `ld` here, points to the `auipc` label instead of directly to the symbol.
2032                sink.add_reloc(Reloc::RiscvPCRelLo12I, &auipc_label, 0);
2033                Inst::Load {
2034                    rd,
2035                    op: LoadOP::Ld,
2036                    flags: MemFlags::trusted(),
2037                    from: AMode::RegOffset(rd.to_reg(), 0),
2038                }
2039                .emit_uncompressed(sink, emit_info, state, start_off);
2040            }
2041
2042            &Inst::LoadExtNameFar {
2043                rd,
2044                ref name,
2045                offset,
2046            } => {
2047                // In the non PIC sequence we relocate the absolute address into
2048                // a preallocated space, load it into a register and jump over
2049                // it.
2050                //
2051                // Emit the following code:
2052                //   ld rd, label_data
2053                //   j label_end
2054                // label_data:
2055                //   <8 byte space>           # ABS8
2056                // label_end:
2057
2058                let label_data = sink.get_label();
2059                let label_end = sink.get_label();
2060
2061                // Load the value from a label
2062                Inst::Load {
2063                    rd,
2064                    op: LoadOP::Ld,
2065                    flags: MemFlags::trusted(),
2066                    from: AMode::Label(label_data),
2067                }
2068                .emit(sink, emit_info, state);
2069
2070                // Jump over the data
2071                Inst::gen_jump(label_end).emit(sink, emit_info, state);
2072
2073                sink.bind_label(label_data, &mut state.ctrl_plane);
2074                sink.add_reloc(Reloc::Abs8, name.as_ref(), offset);
2075                sink.put8(0);
2076
2077                sink.bind_label(label_end, &mut state.ctrl_plane);
2078            }
2079
2080            &Inst::LoadExtNameNear {
2081                rd,
2082                ref name,
2083                offset,
2084            } => {
2085                // Emit the following code:
2086                // label:
2087                //   auipc rd, 0              # R_RISCV_PCREL_HI20 (symbol_name)
2088                //   ld    rd, rd, 0          # R_RISCV_PCREL_LO12_I (label)
2089
2090                let auipc_label = sink.get_label();
2091                sink.bind_label(auipc_label, &mut state.ctrl_plane);
2092
2093                // Get the current PC.
2094                sink.add_reloc(Reloc::RiscvPCRelHi20, &**name, offset);
2095                Inst::Auipc {
2096                    rd,
2097                    imm: Imm20::from_i32(0),
2098                }
2099                .emit_uncompressed(sink, emit_info, state, start_off);
2100
2101                sink.add_reloc(Reloc::RiscvPCRelLo12I, &auipc_label, 0);
2102                Inst::AluRRImm12 {
2103                    alu_op: AluOPRRI::Addi,
2104                    rd,
2105                    rs: rd.to_reg(),
2106                    imm12: Imm12::ZERO,
2107                }
2108                .emit_uncompressed(sink, emit_info, state, start_off);
2109            }
2110
2111            &Inst::LabelAddress { dst, label } => {
2112                let offset = sink.cur_offset();
2113                Inst::Auipc {
2114                    rd: dst,
2115                    imm: Imm20::from_i32(0),
2116                }
2117                .emit_uncompressed(sink, emit_info, state, start_off);
2118                sink.use_label_at_offset(offset, label, LabelUse::PCRelHi20);
2119
2120                let offset = sink.cur_offset();
2121                Inst::AluRRImm12 {
2122                    alu_op: AluOPRRI::Addi,
2123                    rd: dst,
2124                    rs: dst.to_reg(),
2125                    imm12: Imm12::ZERO,
2126                }
2127                .emit_uncompressed(sink, emit_info, state, start_off);
2128                sink.use_label_at_offset(offset, label, LabelUse::PCRelLo12I);
2129            }
2130
2131            &Inst::ElfTlsGetAddr { rd, ref name } => {
2132                // RISC-V's TLS GD model is slightly different from other arches.
2133                //
2134                // We have a relocation (R_RISCV_TLS_GD_HI20) that loads the high 20 bits
2135                // of the address relative to the GOT entry. This relocation points to
2136                // the symbol as usual.
2137                //
2138                // However when loading the bottom 12bits of the address, we need to
2139                // use a label that points to the previous AUIPC instruction.
2140                //
2141                // label:
2142                //    auipc a0,0                    # R_RISCV_TLS_GD_HI20 (symbol)
2143                //    addi  a0,a0,0                 # R_RISCV_PCREL_LO12_I (label)
2144                //
2145                // https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-elf.adoc#global-dynamic
2146
2147                // Create the label that is going to be published to the final binary object.
2148                let auipc_label = sink.get_label();
2149                sink.bind_label(auipc_label, &mut state.ctrl_plane);
2150
2151                // Get the current PC.
2152                sink.add_reloc(Reloc::RiscvTlsGdHi20, &**name, 0);
2153                Inst::Auipc {
2154                    rd,
2155                    imm: Imm20::from_i32(0),
2156                }
2157                .emit_uncompressed(sink, emit_info, state, start_off);
2158
2159                // The `addi` here, points to the `auipc` label instead of directly to the symbol.
2160                sink.add_reloc(Reloc::RiscvPCRelLo12I, &auipc_label, 0);
2161                Inst::AluRRImm12 {
2162                    alu_op: AluOPRRI::Addi,
2163                    rd,
2164                    rs: rd.to_reg(),
2165                    imm12: Imm12::from_i16(0),
2166                }
2167                .emit_uncompressed(sink, emit_info, state, start_off);
2168
2169                Inst::Call {
2170                    info: Box::new(CallInfo::empty(
2171                        ExternalName::LibCall(LibCall::ElfTlsGetAddr),
2172                        CallConv::SystemV,
2173                    )),
2174                }
2175                .emit_uncompressed(sink, emit_info, state, start_off);
2176            }
2177
2178            &Inst::TrapIf {
2179                rs1,
2180                rs2,
2181                cc,
2182                trap_code,
2183            } => {
2184                let label_end = sink.get_label();
2185                let cond = IntegerCompare { kind: cc, rs1, rs2 };
2186
2187                // Jump over the trap if we the condition is false.
2188                Inst::CondBr {
2189                    taken: CondBrTarget::Label(label_end),
2190                    not_taken: CondBrTarget::Fallthrough,
2191                    kind: cond.inverse(),
2192                }
2193                .emit(sink, emit_info, state);
2194                Inst::Udf { trap_code }.emit(sink, emit_info, state);
2195
2196                sink.bind_label(label_end, &mut state.ctrl_plane);
2197            }
2198            &Inst::Udf { trap_code } => {
2199                sink.add_trap(trap_code);
2200                sink.put_data(Inst::TRAP_OPCODE);
2201            }
2202            &Inst::AtomicLoad { rd, ty, p } => {
2203                // emit the fence.
2204                Inst::Fence {
2205                    pred: Inst::FENCE_REQ_R | Inst::FENCE_REQ_W,
2206                    succ: Inst::FENCE_REQ_R | Inst::FENCE_REQ_W,
2207                }
2208                .emit(sink, emit_info, state);
2209                // load.
2210                Inst::Load {
2211                    rd,
2212                    op: LoadOP::from_type(ty),
2213                    flags: MemFlags::new(),
2214                    from: AMode::RegOffset(p, 0),
2215                }
2216                .emit(sink, emit_info, state);
2217                Inst::Fence {
2218                    pred: Inst::FENCE_REQ_R,
2219                    succ: Inst::FENCE_REQ_R | Inst::FENCE_REQ_W,
2220                }
2221                .emit(sink, emit_info, state);
2222            }
2223            &Inst::AtomicStore { src, ty, p } => {
2224                Inst::Fence {
2225                    pred: Inst::FENCE_REQ_R | Inst::FENCE_REQ_W,
2226                    succ: Inst::FENCE_REQ_W,
2227                }
2228                .emit(sink, emit_info, state);
2229                Inst::Store {
2230                    to: AMode::RegOffset(p, 0),
2231                    op: StoreOP::from_type(ty),
2232                    flags: MemFlags::new(),
2233                    src,
2234                }
2235                .emit(sink, emit_info, state);
2236            }
2237
2238            &Inst::Popcnt {
2239                sum,
2240                tmp,
2241                step,
2242                rs,
2243                ty,
2244            } => {
2245                // load 0 to sum , init.
2246                Inst::gen_move(sum, zero_reg(), I64).emit(sink, emit_info, state);
2247                // load
2248                Inst::load_imm12(step, Imm12::from_i16(ty.bits() as i16))
2249                    .emit(sink, emit_info, state);
2250                //
2251                Inst::load_imm12(tmp, Imm12::ONE).emit(sink, emit_info, state);
2252                Inst::AluRRImm12 {
2253                    alu_op: AluOPRRI::Slli,
2254                    rd: tmp,
2255                    rs: tmp.to_reg(),
2256                    imm12: Imm12::from_i16((ty.bits() - 1) as i16),
2257                }
2258                .emit(sink, emit_info, state);
2259                let label_done = sink.get_label();
2260                let label_loop = sink.get_label();
2261                sink.bind_label(label_loop, &mut state.ctrl_plane);
2262                Inst::CondBr {
2263                    taken: CondBrTarget::Label(label_done),
2264                    not_taken: CondBrTarget::Fallthrough,
2265                    kind: IntegerCompare {
2266                        kind: IntCC::SignedLessThanOrEqual,
2267                        rs1: step.to_reg(),
2268                        rs2: zero_reg(),
2269                    },
2270                }
2271                .emit(sink, emit_info, state);
2272                // test and add sum.
2273                {
2274                    Inst::AluRRR {
2275                        alu_op: AluOPRRR::And,
2276                        rd: writable_spilltmp_reg2(),
2277                        rs1: tmp.to_reg(),
2278                        rs2: rs,
2279                    }
2280                    .emit(sink, emit_info, state);
2281                    let label_over = sink.get_label();
2282                    Inst::CondBr {
2283                        taken: CondBrTarget::Label(label_over),
2284                        not_taken: CondBrTarget::Fallthrough,
2285                        kind: IntegerCompare {
2286                            kind: IntCC::Equal,
2287                            rs1: zero_reg(),
2288                            rs2: spilltmp_reg2(),
2289                        },
2290                    }
2291                    .emit(sink, emit_info, state);
2292                    Inst::AluRRImm12 {
2293                        alu_op: AluOPRRI::Addi,
2294                        rd: sum,
2295                        rs: sum.to_reg(),
2296                        imm12: Imm12::ONE,
2297                    }
2298                    .emit(sink, emit_info, state);
2299                    sink.bind_label(label_over, &mut state.ctrl_plane);
2300                }
2301                // set step and tmp.
2302                {
2303                    Inst::AluRRImm12 {
2304                        alu_op: AluOPRRI::Addi,
2305                        rd: step,
2306                        rs: step.to_reg(),
2307                        imm12: Imm12::from_i16(-1),
2308                    }
2309                    .emit(sink, emit_info, state);
2310                    Inst::AluRRImm12 {
2311                        alu_op: AluOPRRI::Srli,
2312                        rd: tmp,
2313                        rs: tmp.to_reg(),
2314                        imm12: Imm12::ONE,
2315                    }
2316                    .emit(sink, emit_info, state);
2317                    Inst::gen_jump(label_loop).emit(sink, emit_info, state);
2318                }
2319                sink.bind_label(label_done, &mut state.ctrl_plane);
2320            }
2321            &Inst::Cltz {
2322                sum,
2323                tmp,
2324                step,
2325                rs,
2326                leading,
2327                ty,
2328            } => {
2329                // load 0 to sum , init.
2330                Inst::gen_move(sum, zero_reg(), I64).emit(sink, emit_info, state);
2331                // load
2332                Inst::load_imm12(step, Imm12::from_i16(ty.bits() as i16))
2333                    .emit(sink, emit_info, state);
2334                //
2335                Inst::load_imm12(tmp, Imm12::ONE).emit(sink, emit_info, state);
2336                if leading {
2337                    Inst::AluRRImm12 {
2338                        alu_op: AluOPRRI::Slli,
2339                        rd: tmp,
2340                        rs: tmp.to_reg(),
2341                        imm12: Imm12::from_i16((ty.bits() - 1) as i16),
2342                    }
2343                    .emit(sink, emit_info, state);
2344                }
2345                let label_done = sink.get_label();
2346                let label_loop = sink.get_label();
2347                sink.bind_label(label_loop, &mut state.ctrl_plane);
2348                Inst::CondBr {
2349                    taken: CondBrTarget::Label(label_done),
2350                    not_taken: CondBrTarget::Fallthrough,
2351                    kind: IntegerCompare {
2352                        kind: IntCC::SignedLessThanOrEqual,
2353                        rs1: step.to_reg(),
2354                        rs2: zero_reg(),
2355                    },
2356                }
2357                .emit(sink, emit_info, state);
2358                // test and add sum.
2359                {
2360                    Inst::AluRRR {
2361                        alu_op: AluOPRRR::And,
2362                        rd: writable_spilltmp_reg2(),
2363                        rs1: tmp.to_reg(),
2364                        rs2: rs,
2365                    }
2366                    .emit(sink, emit_info, state);
2367                    Inst::CondBr {
2368                        taken: CondBrTarget::Label(label_done),
2369                        not_taken: CondBrTarget::Fallthrough,
2370                        kind: IntegerCompare {
2371                            kind: IntCC::NotEqual,
2372                            rs1: zero_reg(),
2373                            rs2: spilltmp_reg2(),
2374                        },
2375                    }
2376                    .emit(sink, emit_info, state);
2377                    Inst::AluRRImm12 {
2378                        alu_op: AluOPRRI::Addi,
2379                        rd: sum,
2380                        rs: sum.to_reg(),
2381                        imm12: Imm12::ONE,
2382                    }
2383                    .emit(sink, emit_info, state);
2384                }
2385                // set step and tmp.
2386                {
2387                    Inst::AluRRImm12 {
2388                        alu_op: AluOPRRI::Addi,
2389                        rd: step,
2390                        rs: step.to_reg(),
2391                        imm12: Imm12::from_i16(-1),
2392                    }
2393                    .emit(sink, emit_info, state);
2394                    Inst::AluRRImm12 {
2395                        alu_op: if leading {
2396                            AluOPRRI::Srli
2397                        } else {
2398                            AluOPRRI::Slli
2399                        },
2400                        rd: tmp,
2401                        rs: tmp.to_reg(),
2402                        imm12: Imm12::ONE,
2403                    }
2404                    .emit(sink, emit_info, state);
2405                    Inst::gen_jump(label_loop).emit(sink, emit_info, state);
2406                }
2407                sink.bind_label(label_done, &mut state.ctrl_plane);
2408            }
2409            &Inst::Brev8 {
2410                rs,
2411                ty,
2412                step,
2413                tmp,
2414                tmp2,
2415                rd,
2416            } => {
2417                Inst::gen_move(rd, zero_reg(), I64).emit(sink, emit_info, state);
2418                Inst::load_imm12(step, Imm12::from_i16(ty.bits() as i16))
2419                    .emit(sink, emit_info, state);
2420                //
2421                Inst::load_imm12(tmp, Imm12::ONE).emit(sink, emit_info, state);
2422                Inst::AluRRImm12 {
2423                    alu_op: AluOPRRI::Slli,
2424                    rd: tmp,
2425                    rs: tmp.to_reg(),
2426                    imm12: Imm12::from_i16((ty.bits() - 1) as i16),
2427                }
2428                .emit(sink, emit_info, state);
2429                Inst::load_imm12(tmp2, Imm12::ONE).emit(sink, emit_info, state);
2430                Inst::AluRRImm12 {
2431                    alu_op: AluOPRRI::Slli,
2432                    rd: tmp2,
2433                    rs: tmp2.to_reg(),
2434                    imm12: Imm12::from_i16((ty.bits() - 8) as i16),
2435                }
2436                .emit(sink, emit_info, state);
2437
2438                let label_done = sink.get_label();
2439                let label_loop = sink.get_label();
2440                sink.bind_label(label_loop, &mut state.ctrl_plane);
2441                Inst::CondBr {
2442                    taken: CondBrTarget::Label(label_done),
2443                    not_taken: CondBrTarget::Fallthrough,
2444                    kind: IntegerCompare {
2445                        kind: IntCC::SignedLessThanOrEqual,
2446                        rs1: step.to_reg(),
2447                        rs2: zero_reg(),
2448                    },
2449                }
2450                .emit(sink, emit_info, state);
2451                // test and set bit.
2452                {
2453                    Inst::AluRRR {
2454                        alu_op: AluOPRRR::And,
2455                        rd: writable_spilltmp_reg2(),
2456                        rs1: tmp.to_reg(),
2457                        rs2: rs,
2458                    }
2459                    .emit(sink, emit_info, state);
2460                    let label_over = sink.get_label();
2461                    Inst::CondBr {
2462                        taken: CondBrTarget::Label(label_over),
2463                        not_taken: CondBrTarget::Fallthrough,
2464                        kind: IntegerCompare {
2465                            kind: IntCC::Equal,
2466                            rs1: zero_reg(),
2467                            rs2: spilltmp_reg2(),
2468                        },
2469                    }
2470                    .emit(sink, emit_info, state);
2471                    Inst::AluRRR {
2472                        alu_op: AluOPRRR::Or,
2473                        rd,
2474                        rs1: rd.to_reg(),
2475                        rs2: tmp2.to_reg(),
2476                    }
2477                    .emit(sink, emit_info, state);
2478                    sink.bind_label(label_over, &mut state.ctrl_plane);
2479                }
2480                // set step and tmp.
2481                {
2482                    Inst::AluRRImm12 {
2483                        alu_op: AluOPRRI::Addi,
2484                        rd: step,
2485                        rs: step.to_reg(),
2486                        imm12: Imm12::from_i16(-1),
2487                    }
2488                    .emit(sink, emit_info, state);
2489                    Inst::AluRRImm12 {
2490                        alu_op: AluOPRRI::Srli,
2491                        rd: tmp,
2492                        rs: tmp.to_reg(),
2493                        imm12: Imm12::ONE,
2494                    }
2495                    .emit(sink, emit_info, state);
2496                    {
2497                        // reset tmp2
2498                        // if (step %=8 == 0) then tmp2 = tmp2 >> 15
2499                        // if (step %=8 != 0) then tmp2 = tmp2 << 1
2500                        let label_over = sink.get_label();
2501                        let label_sll_1 = sink.get_label();
2502                        Inst::load_imm12(writable_spilltmp_reg2(), Imm12::from_i16(8))
2503                            .emit(sink, emit_info, state);
2504                        Inst::AluRRR {
2505                            alu_op: AluOPRRR::Rem,
2506                            rd: writable_spilltmp_reg2(),
2507                            rs1: step.to_reg(),
2508                            rs2: spilltmp_reg2(),
2509                        }
2510                        .emit(sink, emit_info, state);
2511                        Inst::CondBr {
2512                            taken: CondBrTarget::Label(label_sll_1),
2513                            not_taken: CondBrTarget::Fallthrough,
2514                            kind: IntegerCompare {
2515                                kind: IntCC::NotEqual,
2516                                rs1: spilltmp_reg2(),
2517                                rs2: zero_reg(),
2518                            },
2519                        }
2520                        .emit(sink, emit_info, state);
2521                        Inst::AluRRImm12 {
2522                            alu_op: AluOPRRI::Srli,
2523                            rd: tmp2,
2524                            rs: tmp2.to_reg(),
2525                            imm12: Imm12::from_i16(15),
2526                        }
2527                        .emit(sink, emit_info, state);
2528                        Inst::gen_jump(label_over).emit(sink, emit_info, state);
2529                        sink.bind_label(label_sll_1, &mut state.ctrl_plane);
2530                        Inst::AluRRImm12 {
2531                            alu_op: AluOPRRI::Slli,
2532                            rd: tmp2,
2533                            rs: tmp2.to_reg(),
2534                            imm12: Imm12::ONE,
2535                        }
2536                        .emit(sink, emit_info, state);
2537                        sink.bind_label(label_over, &mut state.ctrl_plane);
2538                    }
2539                    Inst::gen_jump(label_loop).emit(sink, emit_info, state);
2540                }
2541                sink.bind_label(label_done, &mut state.ctrl_plane);
2542            }
2543            &Inst::StackProbeLoop {
2544                guard_size,
2545                probe_count,
2546                tmp: guard_size_tmp,
2547            } => {
2548                let step = writable_spilltmp_reg();
2549                Inst::load_constant_u64(step, (guard_size as u64) * (probe_count as u64))
2550                    .iter()
2551                    .for_each(|i| i.emit(sink, emit_info, state));
2552                Inst::load_constant_u64(guard_size_tmp, guard_size as u64)
2553                    .iter()
2554                    .for_each(|i| i.emit(sink, emit_info, state));
2555
2556                let loop_start = sink.get_label();
2557                let label_done = sink.get_label();
2558                sink.bind_label(loop_start, &mut state.ctrl_plane);
2559                Inst::CondBr {
2560                    taken: CondBrTarget::Label(label_done),
2561                    not_taken: CondBrTarget::Fallthrough,
2562                    kind: IntegerCompare {
2563                        kind: IntCC::UnsignedLessThanOrEqual,
2564                        rs1: step.to_reg(),
2565                        rs2: guard_size_tmp.to_reg(),
2566                    },
2567                }
2568                .emit(sink, emit_info, state);
2569                // compute address.
2570                Inst::AluRRR {
2571                    alu_op: AluOPRRR::Sub,
2572                    rd: writable_spilltmp_reg2(),
2573                    rs1: stack_reg(),
2574                    rs2: step.to_reg(),
2575                }
2576                .emit(sink, emit_info, state);
2577                Inst::Store {
2578                    to: AMode::RegOffset(spilltmp_reg2(), 0),
2579                    op: StoreOP::Sb,
2580                    flags: MemFlags::new(),
2581                    src: zero_reg(),
2582                }
2583                .emit(sink, emit_info, state);
2584                // reset step.
2585                Inst::AluRRR {
2586                    alu_op: AluOPRRR::Sub,
2587                    rd: step,
2588                    rs1: step.to_reg(),
2589                    rs2: guard_size_tmp.to_reg(),
2590                }
2591                .emit(sink, emit_info, state);
2592                Inst::gen_jump(loop_start).emit(sink, emit_info, state);
2593                sink.bind_label(label_done, &mut state.ctrl_plane);
2594            }
2595            &Inst::VecAluRRRImm5 {
2596                op,
2597                vd,
2598                vd_src,
2599                imm,
2600                vs2,
2601                ref mask,
2602                ..
2603            } => {
2604                debug_assert_eq!(vd.to_reg(), vd_src);
2605
2606                sink.put4(encode_valu_rrr_imm(op, vd, imm, vs2, *mask));
2607            }
2608            &Inst::VecAluRRRR {
2609                op,
2610                vd,
2611                vd_src,
2612                vs1,
2613                vs2,
2614                ref mask,
2615                ..
2616            } => {
2617                debug_assert_eq!(vd.to_reg(), vd_src);
2618
2619                sink.put4(encode_valu_rrrr(op, vd, vs2, vs1, *mask));
2620            }
2621            &Inst::VecAluRRR {
2622                op,
2623                vd,
2624                vs1,
2625                vs2,
2626                ref mask,
2627                ..
2628            } => {
2629                sink.put4(encode_valu(op, vd, vs1, vs2, *mask));
2630            }
2631            &Inst::VecAluRRImm5 {
2632                op,
2633                vd,
2634                imm,
2635                vs2,
2636                ref mask,
2637                ..
2638            } => {
2639                sink.put4(encode_valu_rr_imm(op, vd, imm, vs2, *mask));
2640            }
2641            &Inst::VecAluRR {
2642                op,
2643                vd,
2644                vs,
2645                ref mask,
2646                ..
2647            } => {
2648                sink.put4(encode_valu_rr(op, vd, vs, *mask));
2649            }
2650            &Inst::VecAluRImm5 {
2651                op,
2652                vd,
2653                imm,
2654                ref mask,
2655                ..
2656            } => {
2657                sink.put4(encode_valu_r_imm(op, vd, imm, *mask));
2658            }
2659            &Inst::VecSetState { rd, ref vstate } => {
2660                sink.put4(encode_vcfg_imm(
2661                    0x57,
2662                    rd.to_reg(),
2663                    vstate.avl.unwrap_static(),
2664                    &vstate.vtype,
2665                ));
2666
2667                // Update the current vector emit state.
2668                state.vstate = EmitVState::Known(*vstate);
2669            }
2670
2671            &Inst::VecLoad {
2672                eew,
2673                to,
2674                ref from,
2675                ref mask,
2676                flags,
2677                ..
2678            } => {
2679                // Vector Loads don't support immediate offsets, so we need to load it into a register.
2680                let addr = match from {
2681                    VecAMode::UnitStride { base } => {
2682                        let base_reg = base.get_base_register();
2683                        let offset = base.get_offset_with_state(state);
2684
2685                        // Reg+0 Offset can be directly encoded
2686                        if let (Some(base_reg), 0) = (base_reg, offset) {
2687                            base_reg
2688                        } else {
2689                            // Otherwise load the address it into a reg and load from it.
2690                            let tmp = writable_spilltmp_reg();
2691                            Inst::LoadAddr {
2692                                rd: tmp,
2693                                mem: *base,
2694                            }
2695                            .emit(sink, emit_info, state);
2696                            tmp.to_reg()
2697                        }
2698                    }
2699                };
2700
2701                if let Some(trap_code) = flags.trap_code() {
2702                    // Register the offset at which the actual load instruction starts.
2703                    sink.add_trap(trap_code);
2704                }
2705
2706                sink.put4(encode_vmem_load(
2707                    0x07,
2708                    to.to_reg(),
2709                    eew,
2710                    addr,
2711                    from.lumop(),
2712                    *mask,
2713                    from.mop(),
2714                    from.nf(),
2715                ));
2716            }
2717
2718            &Inst::VecStore {
2719                eew,
2720                ref to,
2721                from,
2722                ref mask,
2723                flags,
2724                ..
2725            } => {
2726                // Vector Stores don't support immediate offsets, so we need to load it into a register.
2727                let addr = match to {
2728                    VecAMode::UnitStride { base } => {
2729                        let base_reg = base.get_base_register();
2730                        let offset = base.get_offset_with_state(state);
2731
2732                        // Reg+0 Offset can be directly encoded
2733                        if let (Some(base_reg), 0) = (base_reg, offset) {
2734                            base_reg
2735                        } else {
2736                            // Otherwise load the address it into a reg and load from it.
2737                            let tmp = writable_spilltmp_reg();
2738                            Inst::LoadAddr {
2739                                rd: tmp,
2740                                mem: *base,
2741                            }
2742                            .emit(sink, emit_info, state);
2743                            tmp.to_reg()
2744                        }
2745                    }
2746                };
2747
2748                if let Some(trap_code) = flags.trap_code() {
2749                    // Register the offset at which the actual load instruction starts.
2750                    sink.add_trap(trap_code);
2751                }
2752
2753                sink.put4(encode_vmem_store(
2754                    0x27,
2755                    from,
2756                    eew,
2757                    addr,
2758                    to.sumop(),
2759                    *mask,
2760                    to.mop(),
2761                    to.nf(),
2762                ));
2763            }
2764
2765            Inst::EmitIsland { needed_space } => {
2766                if sink.island_needed(*needed_space) {
2767                    let jump_around_label = sink.get_label();
2768                    Inst::gen_jump(jump_around_label).emit(sink, emit_info, state);
2769                    sink.emit_island(needed_space + 4, &mut state.ctrl_plane);
2770                    sink.bind_label(jump_around_label, &mut state.ctrl_plane);
2771                }
2772            }
2773
2774            Inst::SequencePoint { .. } => {
2775                // Nothing.
2776            }
2777        }
2778    }
2779}
2780
2781fn emit_return_call_common_sequence<T>(
2782    sink: &mut MachBuffer<Inst>,
2783    emit_info: &EmitInfo,
2784    state: &mut EmitState,
2785    info: &ReturnCallInfo<T>,
2786) {
2787    // The return call sequence can potentially emit a lot of instructions (up to 634 bytes!)
2788    // So lets emit an island here if we need it.
2789    //
2790    // It is difficult to calculate exactly how many instructions are going to be emitted, so
2791    // we calculate it by emitting it into a disposable buffer, and then checking how many instructions
2792    // were actually emitted.
2793    let mut buffer = MachBuffer::new();
2794    let mut fake_emit_state = state.clone();
2795
2796    return_call_emit_impl(&mut buffer, emit_info, &mut fake_emit_state, info);
2797
2798    // Finalize the buffer and get the number of bytes emitted.
2799    let buffer = buffer.finish(&Default::default(), &mut Default::default());
2800    let length = buffer.data().len() as u32;
2801
2802    // And now emit the island inline with this instruction.
2803    if sink.island_needed(length) {
2804        let jump_around_label = sink.get_label();
2805        Inst::gen_jump(jump_around_label).emit(sink, emit_info, state);
2806        sink.emit_island(length + 4, &mut state.ctrl_plane);
2807        sink.bind_label(jump_around_label, &mut state.ctrl_plane);
2808    }
2809
2810    // Now that we're done, emit the *actual* return sequence.
2811    return_call_emit_impl(sink, emit_info, state, info);
2812}
2813
2814/// This should not be called directly, Instead prefer to call [emit_return_call_common_sequence].
2815fn return_call_emit_impl<T>(
2816    sink: &mut MachBuffer<Inst>,
2817    emit_info: &EmitInfo,
2818    state: &mut EmitState,
2819    info: &ReturnCallInfo<T>,
2820) {
2821    let sp_to_fp_offset = {
2822        let frame_layout = state.frame_layout();
2823        i64::from(
2824            frame_layout.clobber_size
2825                + frame_layout.fixed_frame_storage_size
2826                + frame_layout.outgoing_args_size,
2827        )
2828    };
2829
2830    let mut clobber_offset = sp_to_fp_offset - 8;
2831    for reg in state.frame_layout().clobbered_callee_saves.clone() {
2832        let rreg = reg.to_reg();
2833        let ty = match rreg.class() {
2834            RegClass::Int => I64,
2835            RegClass::Float => F64,
2836            RegClass::Vector => unimplemented!("Vector Clobber Restores"),
2837        };
2838
2839        Inst::gen_load(
2840            reg.map(Reg::from),
2841            AMode::SPOffset(clobber_offset),
2842            ty,
2843            MemFlags::trusted(),
2844        )
2845        .emit(sink, emit_info, state);
2846
2847        clobber_offset -= 8
2848    }
2849
2850    // Restore the link register and frame pointer
2851    let setup_area_size = i64::from(state.frame_layout().setup_area_size);
2852    if setup_area_size > 0 {
2853        Inst::gen_load(
2854            writable_link_reg(),
2855            AMode::SPOffset(sp_to_fp_offset + 8),
2856            I64,
2857            MemFlags::trusted(),
2858        )
2859        .emit(sink, emit_info, state);
2860
2861        Inst::gen_load(
2862            writable_fp_reg(),
2863            AMode::SPOffset(sp_to_fp_offset),
2864            I64,
2865            MemFlags::trusted(),
2866        )
2867        .emit(sink, emit_info, state);
2868    }
2869
2870    // If we over-allocated the incoming args area in the prologue, resize down to what the callee
2871    // is expecting.
2872    let incoming_args_diff =
2873        i64::from(state.frame_layout().tail_args_size - info.new_stack_arg_size);
2874
2875    // Increment SP all at once
2876    let sp_increment = sp_to_fp_offset + setup_area_size + incoming_args_diff;
2877    if sp_increment > 0 {
2878        for inst in Riscv64MachineDeps::gen_sp_reg_adjust(i32::try_from(sp_increment).unwrap()) {
2879            inst.emit(sink, emit_info, state);
2880        }
2881    }
2882}