Skip to main content

cranelift_codegen/isa/riscv64/inst/
emit.rs

1//! Riscv64 ISA: binary code emission.
2
3use crate::ir::{self, LibCall, TrapCode};
4use crate::isa::riscv64::inst::*;
5use crate::isa::riscv64::lower::isle::generated_code::{
6    CaOp, CbOp, CiOp, CiwOp, ClOp, CrOp, CsOp, CssOp, CsznOp, FpuOPWidth, ZcbMemOp,
7};
8use cranelift_control::ControlPlane;
9
10pub struct EmitInfo {
11    #[expect(dead_code, reason = "may want to be used in the future")]
12    shared_flag: settings::Flags,
13    isa_flags: super::super::riscv_settings::Flags,
14}
15
16impl EmitInfo {
17    pub(crate) fn new(
18        shared_flag: settings::Flags,
19        isa_flags: super::super::riscv_settings::Flags,
20    ) -> Self {
21        Self {
22            shared_flag,
23            isa_flags,
24        }
25    }
26}
27
28pub(crate) fn reg_to_gpr_num(m: Reg) -> u32 {
29    u32::from(m.to_real_reg().unwrap().hw_enc() & 31)
30}
31
32pub(crate) fn reg_to_compressed_gpr_num(m: Reg) -> u32 {
33    let real_reg = m.to_real_reg().unwrap().hw_enc();
34    debug_assert!(real_reg >= 8 && real_reg < 16);
35    let compressed_reg = real_reg - 8;
36    u32::from(compressed_reg)
37}
38
39#[derive(Clone, Debug, PartialEq, Default)]
40pub enum EmitVState {
41    #[default]
42    Unknown,
43    Known(VState),
44}
45
46/// State carried between emissions of a sequence of instructions.
47#[derive(Default, Clone, Debug)]
48pub struct EmitState {
49    /// The user stack map for the upcoming instruction, as provided to
50    /// `pre_safepoint()`.
51    user_stack_map: Option<ir::UserStackMap>,
52
53    /// Only used during fuzz-testing. Otherwise, it is a zero-sized struct and
54    /// optimized away at compiletime. See [cranelift_control].
55    ctrl_plane: ControlPlane,
56
57    /// Vector State
58    /// Controls the current state of the vector unit at the emission point.
59    vstate: EmitVState,
60
61    frame_layout: FrameLayout,
62}
63
64impl EmitState {
65    fn take_stack_map(&mut self) -> Option<ir::UserStackMap> {
66        self.user_stack_map.take()
67    }
68
69    fn clobber_vstate(&mut self) {
70        self.vstate = EmitVState::Unknown;
71    }
72}
73
74impl MachInstEmitState<Inst> for EmitState {
75    fn new(
76        abi: &Callee<crate::isa::riscv64::abi::Riscv64MachineDeps>,
77        ctrl_plane: ControlPlane,
78    ) -> Self {
79        EmitState {
80            user_stack_map: None,
81            ctrl_plane,
82            vstate: EmitVState::Unknown,
83            frame_layout: abi.frame_layout().clone(),
84        }
85    }
86
87    fn pre_safepoint(&mut self, user_stack_map: Option<ir::UserStackMap>) {
88        self.user_stack_map = user_stack_map;
89    }
90
91    fn ctrl_plane_mut(&mut self) -> &mut ControlPlane {
92        &mut self.ctrl_plane
93    }
94
95    fn take_ctrl_plane(self) -> ControlPlane {
96        self.ctrl_plane
97    }
98
99    fn on_new_block(&mut self) {
100        // Reset the vector state.
101        self.clobber_vstate();
102    }
103
104    fn frame_layout(&self) -> &FrameLayout {
105        &self.frame_layout
106    }
107}
108
109impl Inst {
110    /// Load int mask.
111    /// If ty is int then 0xff in rd.
112    pub(crate) fn load_int_mask(rd: Writable<Reg>, ty: Type) -> SmallInstVec<Inst> {
113        let mut insts = SmallInstVec::new();
114        assert!(ty.is_int() && ty.bits() <= 64);
115        match ty {
116            I64 => {
117                insts.push(Inst::load_imm12(rd, Imm12::from_i16(-1)));
118            }
119            I32 | I16 => {
120                insts.push(Inst::load_imm12(rd, Imm12::from_i16(-1)));
121                insts.push(Inst::Extend {
122                    rd,
123                    rn: rd.to_reg(),
124                    signed: false,
125                    from_bits: ty.bits() as u8,
126                    to_bits: 64,
127                });
128            }
129            I8 => {
130                insts.push(Inst::load_imm12(rd, Imm12::from_i16(255)));
131            }
132            _ => unreachable!("ty:{:?}", ty),
133        }
134        insts
135    }
136    ///  inverse all bit
137    pub(crate) fn construct_bit_not(rd: Writable<Reg>, rs: Reg) -> Inst {
138        Inst::AluRRImm12 {
139            alu_op: AluOPRRI::Xori,
140            rd,
141            rs,
142            imm12: Imm12::from_i16(-1),
143        }
144    }
145
146    /// Returns Some(VState) if this instruction is expecting a specific vector state
147    /// before emission.
148    fn expected_vstate(&self) -> Option<&VState> {
149        match self {
150            Inst::Nop0
151            | Inst::Nop4
152            | Inst::BrTable { .. }
153            | Inst::Auipc { .. }
154            | Inst::Fli { .. }
155            | Inst::Lui { .. }
156            | Inst::LoadInlineConst { .. }
157            | Inst::AluRRR { .. }
158            | Inst::FpuRRR { .. }
159            | Inst::AluRRImm12 { .. }
160            | Inst::CsrReg { .. }
161            | Inst::CsrImm { .. }
162            | Inst::Load { .. }
163            | Inst::Store { .. }
164            | Inst::Args { .. }
165            | Inst::Rets { .. }
166            | Inst::Ret { .. }
167            | Inst::Extend { .. }
168            | Inst::Call { .. }
169            | Inst::CallInd { .. }
170            | Inst::ReturnCall { .. }
171            | Inst::ReturnCallInd { .. }
172            | Inst::Jal { .. }
173            | Inst::CondBr { .. }
174            | Inst::LoadExtNameGot { .. }
175            | Inst::LoadExtNameNear { .. }
176            | Inst::LoadExtNameFar { .. }
177            | Inst::ElfTlsGetAddr { .. }
178            | Inst::LoadAddr { .. }
179            | Inst::Mov { .. }
180            | Inst::MovFromPReg { .. }
181            | Inst::Fence { .. }
182            | Inst::EBreak
183            | Inst::Udf { .. }
184            | Inst::FpuRR { .. }
185            | Inst::FpuRRRR { .. }
186            | Inst::Jalr { .. }
187            | Inst::Atomic { .. }
188            | Inst::Select { .. }
189            | Inst::AtomicCas { .. }
190            | Inst::RawData { .. }
191            | Inst::AtomicStore { .. }
192            | Inst::AtomicLoad { .. }
193            | Inst::AtomicRmwLoop { .. }
194            | Inst::TrapIf { .. }
195            | Inst::Unwind { .. }
196            | Inst::DummyUse { .. }
197            | Inst::LabelAddress { .. }
198            | Inst::SequencePoint { .. }
199            | Inst::Popcnt { .. }
200            | Inst::Cltz { .. }
201            | Inst::Brev8 { .. }
202            | Inst::StackProbeLoop { .. } => None,
203
204            // VecSetState does not expect any vstate, rather it updates it.
205            Inst::VecSetState { .. } => None,
206
207            // `vmv` instructions copy a set of registers and ignore vstate.
208            Inst::VecAluRRImm5 { op: VecAluOpRRImm5::VmvrV, .. } => None,
209
210            Inst::VecAluRR { vstate, .. } |
211            Inst::VecAluRRR { vstate, .. } |
212            Inst::VecAluRRRR { vstate, .. } |
213            Inst::VecAluRImm5 { vstate, .. } |
214            Inst::VecAluRRImm5 { vstate, .. } |
215            Inst::VecAluRRRImm5 { vstate, .. } |
216            // TODO: Unit-stride loads and stores only need the AVL to be correct, not
217            // the full vtype. A future optimization could be to decouple these two when
218            // updating vstate. This would allow us to avoid emitting a VecSetState in
219            // some cases.
220            Inst::VecLoad { vstate, .. }
221            | Inst::VecStore { vstate, .. } => Some(vstate),
222            Inst::EmitIsland { .. } => None,
223        }
224    }
225}
226
227impl MachInstEmit for Inst {
228    type State = EmitState;
229    type Info = EmitInfo;
230
231    fn emit(&self, sink: &mut MachBuffer<Inst>, emit_info: &Self::Info, state: &mut EmitState) {
232        // Check if we need to update the vector state before emitting this instruction
233        if let Some(expected) = self.expected_vstate() {
234            if state.vstate != EmitVState::Known(*expected) {
235                // Update the vector state.
236                Inst::VecSetState {
237                    rd: writable_zero_reg(),
238                    vstate: *expected,
239                }
240                .emit(sink, emit_info, state);
241            }
242        }
243
244        // N.B.: we *must* not exceed the "worst-case size" used to compute
245        // where to insert islands, except when islands are explicitly triggered
246        // (with an `EmitIsland`). We check this in debug builds. This is `mut`
247        // to allow disabling the check for `JTSequence`, which is always
248        // emitted following an `EmitIsland`.
249        let mut start_off = sink.cur_offset();
250
251        // First try to emit this as a compressed instruction
252        let res = self.try_emit_compressed(sink, emit_info, state, &mut start_off);
253        if res.is_none() {
254            // If we can't lets emit it as a normal instruction
255            self.emit_uncompressed(sink, emit_info, state, &mut start_off);
256        }
257
258        // We exclude br_table, call, return_call and try_call from
259        // these checks since they emit their own islands, and thus
260        // are allowed to exceed the worst case size.
261        let emits_own_island = match self {
262            Inst::BrTable { .. }
263            | Inst::ReturnCall { .. }
264            | Inst::ReturnCallInd { .. }
265            | Inst::Call { .. }
266            | Inst::CallInd { .. }
267            | Inst::EmitIsland { .. } => true,
268            _ => false,
269        };
270        if !emits_own_island {
271            let end_off = sink.cur_offset();
272            assert!(
273                (end_off - start_off) <= Inst::worst_case_size(),
274                "Inst:{:?} length:{} worst_case_size:{}",
275                self,
276                end_off - start_off,
277                Inst::worst_case_size()
278            );
279        }
280    }
281
282    fn pretty_print_inst(&self, state: &mut Self::State) -> String {
283        self.print_with_state(state)
284    }
285}
286
287impl Inst {
288    /// Tries to emit an instruction as compressed, if we can't return false.
289    fn try_emit_compressed(
290        &self,
291        sink: &mut MachBuffer<Inst>,
292        emit_info: &EmitInfo,
293        state: &mut EmitState,
294        start_off: &mut u32,
295    ) -> Option<()> {
296        let has_m = emit_info.isa_flags.has_m();
297        let has_zba = emit_info.isa_flags.has_zba();
298        let has_zbb = emit_info.isa_flags.has_zbb();
299        let has_zca = emit_info.isa_flags.has_zca();
300        let has_zcb = emit_info.isa_flags.has_zcb();
301        let has_zcd = emit_info.isa_flags.has_zcd();
302
303        // Currently all compressed extensions (Zcb, Zcd, Zcmp, Zcmt, etc..) require Zca
304        // to be enabled, so check it early.
305        if !has_zca {
306            return None;
307        }
308
309        fn reg_is_compressible(r: Reg) -> bool {
310            r.to_real_reg()
311                .map(|r| r.hw_enc() >= 8 && r.hw_enc() < 16)
312                .unwrap_or(false)
313        }
314
315        match *self {
316            // C.ADD
317            Inst::AluRRR {
318                alu_op: AluOPRRR::Add,
319                rd,
320                rs1,
321                rs2,
322            } if (rd.to_reg() == rs1 || rd.to_reg() == rs2)
323                && rs1 != zero_reg()
324                && rs2 != zero_reg() =>
325            {
326                // Technically `c.add rd, rs` expands to `add rd, rd, rs`, but we can
327                // also swap rs1 with rs2 and we get an equivalent instruction. i.e we
328                // can also compress `add rd, rs, rd` into `c.add rd, rs`.
329                let src = if rd.to_reg() == rs1 { rs2 } else { rs1 };
330
331                sink.put2(encode_cr_type(CrOp::CAdd, rd, src));
332            }
333
334            // C.MV
335            Inst::AluRRImm12 {
336                alu_op: AluOPRRI::Addi | AluOPRRI::Ori,
337                rd,
338                rs,
339                imm12,
340            } if rd.to_reg() != rs
341                && rd.to_reg() != zero_reg()
342                && rs != zero_reg()
343                && imm12.as_i16() == 0 =>
344            {
345                sink.put2(encode_cr_type(CrOp::CMv, rd, rs));
346            }
347
348            // CA Ops
349            Inst::AluRRR {
350                alu_op:
351                    alu_op @ (AluOPRRR::And
352                    | AluOPRRR::Or
353                    | AluOPRRR::Xor
354                    | AluOPRRR::Addw
355                    | AluOPRRR::Mul),
356                rd,
357                rs1,
358                rs2,
359            } if (rd.to_reg() == rs1 || rd.to_reg() == rs2)
360                && reg_is_compressible(rs1)
361                && reg_is_compressible(rs2) =>
362            {
363                let op = match alu_op {
364                    AluOPRRR::And => CaOp::CAnd,
365                    AluOPRRR::Or => CaOp::COr,
366                    AluOPRRR::Xor => CaOp::CXor,
367                    AluOPRRR::Addw => CaOp::CAddw,
368                    AluOPRRR::Mul if has_zcb && has_m => CaOp::CMul,
369                    _ => return None,
370                };
371                // The canonical expansion for these instruction has `rd == rs1`, but
372                // these are all commutative operations, so we can swap the operands.
373                let src = if rd.to_reg() == rs1 { rs2 } else { rs1 };
374
375                sink.put2(encode_ca_type(op, rd, src));
376            }
377
378            // The sub instructions are non commutative, so we can't swap the operands.
379            Inst::AluRRR {
380                alu_op: alu_op @ (AluOPRRR::Sub | AluOPRRR::Subw),
381                rd,
382                rs1,
383                rs2,
384            } if rd.to_reg() == rs1 && reg_is_compressible(rs1) && reg_is_compressible(rs2) => {
385                let op = match alu_op {
386                    AluOPRRR::Sub => CaOp::CSub,
387                    AluOPRRR::Subw => CaOp::CSubw,
388                    _ => return None,
389                };
390                sink.put2(encode_ca_type(op, rd, rs2));
391            }
392
393            // c.j
394            //
395            // We don't have a separate JAL as that is only available in RV32C
396            Inst::Jal { label } => {
397                sink.use_label_at_offset(*start_off, label, LabelUse::RVCJump);
398                sink.add_uncond_branch(*start_off, *start_off + 2, label);
399                sink.put2(encode_cj_type(CjOp::CJ, Imm12::ZERO));
400            }
401
402            // c.jr
403            Inst::Jalr { rd, base, offset }
404                if rd.to_reg() == zero_reg() && base != zero_reg() && offset.as_i16() == 0 =>
405            {
406                sink.put2(encode_cr2_type(CrOp::CJr, base));
407                state.clobber_vstate();
408            }
409
410            // c.jalr
411            Inst::Jalr { rd, base, offset }
412                if rd.to_reg() == link_reg() && base != zero_reg() && offset.as_i16() == 0 =>
413            {
414                sink.put2(encode_cr2_type(CrOp::CJalr, base));
415                state.clobber_vstate();
416            }
417
418            // c.ebreak
419            Inst::EBreak => {
420                sink.put2(encode_cr_type(
421                    CrOp::CEbreak,
422                    writable_zero_reg(),
423                    zero_reg(),
424                ));
425            }
426
427            // c.unimp
428            Inst::Udf { trap_code } => {
429                sink.add_trap(trap_code);
430                sink.put2(0x0000);
431            }
432            // c.addi16sp
433            //
434            // c.addi16sp shares the opcode with c.lui, but has a destination field of x2.
435            // c.addi16sp adds the non-zero sign-extended 6-bit immediate to the value in the stack pointer (sp=x2),
436            // where the immediate is scaled to represent multiples of 16 in the range (-512,496). c.addi16sp is used
437            // to adjust the stack pointer in procedure prologues and epilogues. It expands into addi x2, x2, nzimm. c.addi16sp
438            // is only valid when nzimm≠0; the code point with nzimm=0 is reserved.
439            Inst::AluRRImm12 {
440                alu_op: AluOPRRI::Addi,
441                rd,
442                rs,
443                imm12,
444            } if rd.to_reg() == rs
445                && rs == stack_reg()
446                && imm12.as_i16() != 0
447                && (imm12.as_i16() % 16) == 0
448                && Imm6::maybe_from_i16(imm12.as_i16() / 16).is_some() =>
449            {
450                let imm6 = Imm6::maybe_from_i16(imm12.as_i16() / 16).unwrap();
451                sink.put2(encode_c_addi16sp(imm6));
452            }
453
454            // c.addi4spn
455            //
456            // c.addi4spn is a CIW-format instruction that adds a zero-extended non-zero
457            // immediate, scaled by 4, to the stack pointer, x2, and writes the result to
458            // rd. This instruction is used to generate pointers to stack-allocated variables
459            // and expands to addi rd, x2, nzuimm. c.addi4spn is only valid when nzuimm≠0;
460            // the code points with nzuimm=0 are reserved.
461            Inst::AluRRImm12 {
462                alu_op: AluOPRRI::Addi,
463                rd,
464                rs,
465                imm12,
466            } if reg_is_compressible(rd.to_reg())
467                && rs == stack_reg()
468                && imm12.as_i16() != 0
469                && (imm12.as_i16() % 4) == 0
470                && u8::try_from(imm12.as_i16() / 4).is_ok() =>
471            {
472                let imm = u8::try_from(imm12.as_i16() / 4).unwrap();
473                sink.put2(encode_ciw_type(CiwOp::CAddi4spn, rd, imm));
474            }
475
476            // c.li
477            Inst::AluRRImm12 {
478                alu_op: AluOPRRI::Addi,
479                rd,
480                rs,
481                imm12,
482            } if rd.to_reg() != zero_reg() && rs == zero_reg() => {
483                let imm6 = Imm6::maybe_from_imm12(imm12)?;
484                sink.put2(encode_ci_type(CiOp::CLi, rd, imm6));
485            }
486
487            // c.addi
488            Inst::AluRRImm12 {
489                alu_op: AluOPRRI::Addi,
490                rd,
491                rs,
492                imm12,
493            } if rd.to_reg() == rs && rs != zero_reg() && imm12.as_i16() != 0 => {
494                let imm6 = Imm6::maybe_from_imm12(imm12)?;
495                sink.put2(encode_ci_type(CiOp::CAddi, rd, imm6));
496            }
497
498            // c.addiw
499            Inst::AluRRImm12 {
500                alu_op: AluOPRRI::Addiw,
501                rd,
502                rs,
503                imm12,
504            } if rd.to_reg() == rs && rs != zero_reg() => {
505                let imm6 = Imm6::maybe_from_imm12(imm12)?;
506                sink.put2(encode_ci_type(CiOp::CAddiw, rd, imm6));
507            }
508
509            // c.lui
510            //
511            // c.lui loads the non-zero 6-bit immediate field into bits 17–12
512            // of the destination register, clears the bottom 12 bits, and
513            // sign-extends bit 17 into all higher bits of the destination.
514            Inst::Lui { rd, imm: imm20 }
515                if rd.to_reg() != zero_reg()
516                    && rd.to_reg() != stack_reg()
517                    && imm20.as_i32() != 0 =>
518            {
519                // Check that the top bits are sign extended
520                let imm = imm20.as_i32() << 14 >> 14;
521                if imm != imm20.as_i32() {
522                    return None;
523                }
524                let imm6 = Imm6::maybe_from_i32(imm)?;
525                sink.put2(encode_ci_type(CiOp::CLui, rd, imm6));
526            }
527
528            // c.slli
529            Inst::AluRRImm12 {
530                alu_op: AluOPRRI::Slli,
531                rd,
532                rs,
533                imm12,
534            } if rd.to_reg() == rs && rs != zero_reg() && imm12.as_i16() != 0 => {
535                // The shift amount is unsigned, but we encode it as signed.
536                let shift = imm12.as_i16() & 0x3f;
537                let imm6 = Imm6::maybe_from_i16(shift << 10 >> 10).unwrap();
538                sink.put2(encode_ci_type(CiOp::CSlli, rd, imm6));
539            }
540
541            // c.srli / c.srai
542            Inst::AluRRImm12 {
543                alu_op: op @ (AluOPRRI::Srli | AluOPRRI::Srai),
544                rd,
545                rs,
546                imm12,
547            } if rd.to_reg() == rs && reg_is_compressible(rs) && imm12.as_i16() != 0 => {
548                let op = match op {
549                    AluOPRRI::Srli => CbOp::CSrli,
550                    AluOPRRI::Srai => CbOp::CSrai,
551                    _ => unreachable!(),
552                };
553
554                // The shift amount is unsigned, but we encode it as signed.
555                let shift = imm12.as_i16() & 0x3f;
556                let imm6 = Imm6::maybe_from_i16(shift << 10 >> 10).unwrap();
557                sink.put2(encode_cb_type(op, rd, imm6));
558            }
559
560            // c.zextb
561            //
562            // This is an alias for `andi rd, rd, 0xff`
563            Inst::AluRRImm12 {
564                alu_op: AluOPRRI::Andi,
565                rd,
566                rs,
567                imm12,
568            } if has_zcb
569                && rd.to_reg() == rs
570                && reg_is_compressible(rs)
571                && imm12.as_i16() == 0xff =>
572            {
573                sink.put2(encode_cszn_type(CsznOp::CZextb, rd));
574            }
575
576            // c.andi
577            Inst::AluRRImm12 {
578                alu_op: AluOPRRI::Andi,
579                rd,
580                rs,
581                imm12,
582            } if rd.to_reg() == rs && reg_is_compressible(rs) => {
583                let imm6 = Imm6::maybe_from_imm12(imm12)?;
584                sink.put2(encode_cb_type(CbOp::CAndi, rd, imm6));
585            }
586
587            // Stack Based Loads
588            Inst::Load {
589                rd,
590                op: op @ (LoadOP::Lw | LoadOP::Ld | LoadOP::Fld),
591                from,
592                flags,
593            } if from.get_base_register() == Some(stack_reg())
594                && (from.get_offset_with_state(state) % op.size()) == 0 =>
595            {
596                // We encode the offset in multiples of the load size.
597                let offset = from.get_offset_with_state(state);
598                let imm6 = u8::try_from(offset / op.size())
599                    .ok()
600                    .and_then(Uimm6::maybe_from_u8)?;
601
602                // Some additional constraints on these instructions.
603                //
604                // Integer loads are not allowed to target x0, but floating point loads
605                // are, since f0 is not a special register.
606                //
607                // Floating point loads are not included in the base Zca extension
608                // but in a separate Zcd extension. Both of these are part of the C Extension.
609                let rd_is_zero = rd.to_reg() == zero_reg();
610                let op = match op {
611                    LoadOP::Lw if !rd_is_zero => CiOp::CLwsp,
612                    LoadOP::Ld if !rd_is_zero => CiOp::CLdsp,
613                    LoadOP::Fld if has_zcd => CiOp::CFldsp,
614                    _ => return None,
615                };
616
617                if let Some(trap_code) = flags.trap_code() {
618                    // Register the offset at which the actual load instruction starts.
619                    sink.add_trap(trap_code);
620                }
621                sink.put2(encode_ci_sp_load(op, rd, imm6));
622            }
623
624            // Regular Loads
625            Inst::Load {
626                rd,
627                op:
628                    op
629                    @ (LoadOP::Lw | LoadOP::Ld | LoadOP::Fld | LoadOP::Lbu | LoadOP::Lhu | LoadOP::Lh),
630                from,
631                flags,
632            } if reg_is_compressible(rd.to_reg())
633                && from
634                    .get_base_register()
635                    .map(reg_is_compressible)
636                    .unwrap_or(false)
637                && (from.get_offset_with_state(state) % op.size()) == 0 =>
638            {
639                let base = from.get_base_register().unwrap();
640
641                // We encode the offset in multiples of the store size.
642                let offset = from.get_offset_with_state(state);
643                let offset = u8::try_from(offset / op.size()).ok()?;
644
645                // We mix two different formats here.
646                //
647                // c.lw / c.ld / c.fld instructions are available in the standard Zca
648                // extension using the CL format.
649                //
650                // c.lbu / c.lhu / c.lh are only available in the Zcb extension and
651                // are also encoded differently. Technically they each have a different
652                // format, but they are similar enough that we can group them.
653                let is_zcb_load = matches!(op, LoadOP::Lbu | LoadOP::Lhu | LoadOP::Lh);
654                let encoded = if is_zcb_load {
655                    if !has_zcb {
656                        return None;
657                    }
658
659                    let op = match op {
660                        LoadOP::Lbu => ZcbMemOp::CLbu,
661                        LoadOP::Lhu => ZcbMemOp::CLhu,
662                        LoadOP::Lh => ZcbMemOp::CLh,
663                        _ => unreachable!(),
664                    };
665
666                    // Byte stores & loads have 2 bits of immediate offset. Halfword stores
667                    // and loads only have 1 bit.
668                    let imm2 = Uimm2::maybe_from_u8(offset)?;
669                    if (offset & !((1 << op.imm_bits()) - 1)) != 0 {
670                        return None;
671                    }
672
673                    encode_zcbmem_load(op, rd, base, imm2)
674                } else {
675                    // Floating point loads are not included in the base Zca extension
676                    // but in a separate Zcd extension. Both of these are part of the C Extension.
677                    let op = match op {
678                        LoadOP::Lw => ClOp::CLw,
679                        LoadOP::Ld => ClOp::CLd,
680                        LoadOP::Fld if has_zcd => ClOp::CFld,
681                        _ => return None,
682                    };
683                    let imm5 = Uimm5::maybe_from_u8(offset)?;
684
685                    encode_cl_type(op, rd, base, imm5)
686                };
687
688                if let Some(trap_code) = flags.trap_code() {
689                    // Register the offset at which the actual load instruction starts.
690                    sink.add_trap(trap_code);
691                }
692                sink.put2(encoded);
693            }
694
695            // Stack Based Stores
696            Inst::Store {
697                src,
698                op: op @ (StoreOP::Sw | StoreOP::Sd | StoreOP::Fsd),
699                to,
700                flags,
701            } if to.get_base_register() == Some(stack_reg())
702                && (to.get_offset_with_state(state) % op.size()) == 0 =>
703            {
704                // We encode the offset in multiples of the store size.
705                let offset = to.get_offset_with_state(state);
706                let imm6 = u8::try_from(offset / op.size())
707                    .ok()
708                    .and_then(Uimm6::maybe_from_u8)?;
709
710                // Floating point stores are not included in the base Zca extension
711                // but in a separate Zcd extension. Both of these are part of the C Extension.
712                let op = match op {
713                    StoreOP::Sw => CssOp::CSwsp,
714                    StoreOP::Sd => CssOp::CSdsp,
715                    StoreOP::Fsd if has_zcd => CssOp::CFsdsp,
716                    _ => return None,
717                };
718
719                if let Some(trap_code) = flags.trap_code() {
720                    // Register the offset at which the actual load instruction starts.
721                    sink.add_trap(trap_code);
722                }
723                sink.put2(encode_css_type(op, src, imm6));
724            }
725
726            // Regular Stores
727            Inst::Store {
728                src,
729                op: op @ (StoreOP::Sw | StoreOP::Sd | StoreOP::Fsd | StoreOP::Sh | StoreOP::Sb),
730                to,
731                flags,
732            } if reg_is_compressible(src)
733                && to
734                    .get_base_register()
735                    .map(reg_is_compressible)
736                    .unwrap_or(false)
737                && (to.get_offset_with_state(state) % op.size()) == 0 =>
738            {
739                let base = to.get_base_register().unwrap();
740
741                // We encode the offset in multiples of the store size.
742                let offset = to.get_offset_with_state(state);
743                let offset = u8::try_from(offset / op.size()).ok()?;
744
745                // We mix two different formats here.
746                //
747                // c.sw / c.sd / c.fsd instructions are available in the standard Zca
748                // extension using the CL format.
749                //
750                // c.sb / c.sh are only available in the Zcb extension and are also
751                // encoded differently.
752                let is_zcb_store = matches!(op, StoreOP::Sh | StoreOP::Sb);
753                let encoded = if is_zcb_store {
754                    if !has_zcb {
755                        return None;
756                    }
757
758                    let op = match op {
759                        StoreOP::Sh => ZcbMemOp::CSh,
760                        StoreOP::Sb => ZcbMemOp::CSb,
761                        _ => unreachable!(),
762                    };
763
764                    // Byte stores & loads have 2 bits of immediate offset. Halfword stores
765                    // and loads only have 1 bit.
766                    let imm2 = Uimm2::maybe_from_u8(offset)?;
767                    if (offset & !((1 << op.imm_bits()) - 1)) != 0 {
768                        return None;
769                    }
770
771                    encode_zcbmem_store(op, src, base, imm2)
772                } else {
773                    // Floating point stores are not included in the base Zca extension
774                    // but in a separate Zcd extension. Both of these are part of the C Extension.
775                    let op = match op {
776                        StoreOP::Sw => CsOp::CSw,
777                        StoreOP::Sd => CsOp::CSd,
778                        StoreOP::Fsd if has_zcd => CsOp::CFsd,
779                        _ => return None,
780                    };
781                    let imm5 = Uimm5::maybe_from_u8(offset)?;
782
783                    encode_cs_type(op, src, base, imm5)
784                };
785
786                if let Some(trap_code) = flags.trap_code() {
787                    // Register the offset at which the actual load instruction starts.
788                    sink.add_trap(trap_code);
789                }
790                sink.put2(encoded);
791            }
792
793            // c.not
794            //
795            // This is an alias for `xori rd, rd, -1`
796            Inst::AluRRImm12 {
797                alu_op: AluOPRRI::Xori,
798                rd,
799                rs,
800                imm12,
801            } if has_zcb
802                && rd.to_reg() == rs
803                && reg_is_compressible(rs)
804                && imm12.as_i16() == -1 =>
805            {
806                sink.put2(encode_cszn_type(CsznOp::CNot, rd));
807            }
808
809            // c.sext.b / c.sext.h / c.zext.h
810            //
811            // These are all the extend instructions present in `Zcb`, they
812            // also require `Zbb` since they aren't available in the base ISA.
813            Inst::AluRRImm12 {
814                alu_op: alu_op @ (AluOPRRI::Sextb | AluOPRRI::Sexth | AluOPRRI::Zexth),
815                rd,
816                rs,
817                imm12,
818            } if has_zcb
819                && has_zbb
820                && rd.to_reg() == rs
821                && reg_is_compressible(rs)
822                && imm12.as_i16() == 0 =>
823            {
824                let op = match alu_op {
825                    AluOPRRI::Sextb => CsznOp::CSextb,
826                    AluOPRRI::Sexth => CsznOp::CSexth,
827                    AluOPRRI::Zexth => CsznOp::CZexth,
828                    _ => unreachable!(),
829                };
830                sink.put2(encode_cszn_type(op, rd));
831            }
832
833            // c.zext.w
834            //
835            // This is an alias for `add.uw rd, rd, zero`
836            Inst::AluRRR {
837                alu_op: AluOPRRR::Adduw,
838                rd,
839                rs1,
840                rs2,
841            } if has_zcb
842                && has_zba
843                && rd.to_reg() == rs1
844                && reg_is_compressible(rs1)
845                && rs2 == zero_reg() =>
846            {
847                sink.put2(encode_cszn_type(CsznOp::CZextw, rd));
848            }
849
850            _ => return None,
851        }
852
853        return Some(());
854    }
855
856    fn emit_uncompressed(
857        &self,
858        sink: &mut MachBuffer<Inst>,
859        emit_info: &EmitInfo,
860        state: &mut EmitState,
861        start_off: &mut u32,
862    ) {
863        match self {
864            &Inst::Nop0 => {
865                // do nothing
866            }
867            // Addi x0, x0, 0
868            &Inst::Nop4 => {
869                let x = Inst::AluRRImm12 {
870                    alu_op: AluOPRRI::Addi,
871                    rd: Writable::from_reg(zero_reg()),
872                    rs: zero_reg(),
873                    imm12: Imm12::ZERO,
874                };
875                x.emit(sink, emit_info, state)
876            }
877            &Inst::RawData { ref data } => {
878                // Right now we only put a u32 or u64 in this instruction.
879                // It is not very long, no need to check if need `emit_island`.
880                // If data is very long , this is a bug because RawData is typically
881                // use to load some data and rely on some position in the code stream.
882                // and we may exceed `Inst::worst_case_size`.
883                // for more information see https://github.com/bytecodealliance/wasmtime/pull/5612.
884                sink.put_data(&data[..]);
885            }
886            &Inst::Lui { rd, ref imm } => {
887                let x: u32 = 0b0110111 | reg_to_gpr_num(rd.to_reg()) << 7 | (imm.bits() << 12);
888                sink.put4(x);
889            }
890            &Inst::Fli { rd, width, imm } => {
891                sink.put4(encode_fli(width, imm, rd));
892            }
893            &Inst::LoadInlineConst { rd, ty, imm } => {
894                let data = &imm.to_le_bytes()[..ty.bytes() as usize];
895
896                let label_data: MachLabel = sink.get_label();
897                let label_end: MachLabel = sink.get_label();
898
899                // Load into rd
900                Inst::Load {
901                    rd,
902                    op: LoadOP::from_type(ty),
903                    flags: MemFlagsData::new(),
904                    from: AMode::Label(label_data),
905                }
906                .emit(sink, emit_info, state);
907
908                // Jump over the inline pool
909                Inst::gen_jump(label_end).emit(sink, emit_info, state);
910
911                // Emit the inline data
912                sink.bind_label(label_data, &mut state.ctrl_plane);
913                Inst::RawData { data: data.into() }.emit(sink, emit_info, state);
914
915                sink.bind_label(label_end, &mut state.ctrl_plane);
916            }
917            &Inst::FpuRR {
918                alu_op,
919                width,
920                frm,
921                rd,
922                rs,
923            } => {
924                if alu_op.is_convert_to_int() {
925                    sink.add_trap(TrapCode::BAD_CONVERSION_TO_INTEGER);
926                }
927                sink.put4(encode_fp_rr(alu_op, width, frm, rd, rs));
928            }
929            &Inst::FpuRRRR {
930                alu_op,
931                rd,
932                rs1,
933                rs2,
934                rs3,
935                frm,
936                width,
937            } => {
938                sink.put4(encode_fp_rrrr(alu_op, width, frm, rd, rs1, rs2, rs3));
939            }
940            &Inst::FpuRRR {
941                alu_op,
942                width,
943                frm,
944                rd,
945                rs1,
946                rs2,
947            } => {
948                sink.put4(encode_fp_rrr(alu_op, width, frm, rd, rs1, rs2));
949            }
950            &Inst::Unwind { ref inst } => {
951                sink.add_unwind(inst.clone());
952            }
953            &Inst::DummyUse { .. } => {
954                // This has already been handled by Inst::allocate.
955            }
956            &Inst::AluRRR {
957                alu_op,
958                rd,
959                rs1,
960                rs2,
961            } => {
962                let (rs1, rs2) = if alu_op.reverse_rs() {
963                    (rs2, rs1)
964                } else {
965                    (rs1, rs2)
966                };
967
968                sink.put4(encode_r_type(
969                    alu_op.op_code(),
970                    rd,
971                    alu_op.funct3(),
972                    rs1,
973                    rs2,
974                    alu_op.funct7(),
975                ));
976            }
977            &Inst::AluRRImm12 {
978                alu_op,
979                rd,
980                rs,
981                imm12,
982            } => {
983                let x = alu_op.op_code()
984                    | reg_to_gpr_num(rd.to_reg()) << 7
985                    | alu_op.funct3() << 12
986                    | reg_to_gpr_num(rs) << 15
987                    | alu_op.imm12(imm12) << 20;
988                sink.put4(x);
989            }
990            &Inst::CsrReg { op, rd, rs, csr } => {
991                sink.put4(encode_csr_reg(op, rd, rs, csr));
992            }
993            &Inst::CsrImm { op, rd, csr, imm } => {
994                sink.put4(encode_csr_imm(op, rd, csr, imm));
995            }
996            &Inst::Load {
997                rd,
998                op: LoadOP::Flh,
999                from,
1000                flags,
1001            } if !emit_info.isa_flags.has_zfhmin() => {
1002                // flh unavailable, use an integer load instead
1003                Inst::Load {
1004                    rd: writable_spilltmp_reg(),
1005                    op: LoadOP::Lh,
1006                    flags,
1007                    from,
1008                }
1009                .emit(sink, emit_info, state);
1010                // NaN-box the `f16` before loading it into the floating-point
1011                // register with a 32-bit `fmv`.
1012                Inst::Lui {
1013                    rd: writable_spilltmp_reg2(),
1014                    imm: Imm20::from_i32((0xffff_0000_u32 as i32) >> 12),
1015                }
1016                .emit(sink, emit_info, state);
1017                Inst::AluRRR {
1018                    alu_op: AluOPRRR::Or,
1019                    rd: writable_spilltmp_reg(),
1020                    rs1: spilltmp_reg(),
1021                    rs2: spilltmp_reg2(),
1022                }
1023                .emit(sink, emit_info, state);
1024                Inst::FpuRR {
1025                    alu_op: FpuOPRR::FmvFmtX,
1026                    width: FpuOPWidth::S,
1027                    frm: FRM::RNE,
1028                    rd,
1029                    rs: spilltmp_reg(),
1030                }
1031                .emit(sink, emit_info, state);
1032            }
1033            &Inst::Load {
1034                rd,
1035                op,
1036                from,
1037                flags,
1038            } => {
1039                let base = from.get_base_register();
1040                let offset = from.get_offset_with_state(state);
1041                let offset_imm12 = Imm12::maybe_from_i64(offset);
1042                let label = from.get_label_with_sink(sink);
1043
1044                let (addr, imm12) = match (base, offset_imm12, label) {
1045                    // When loading from a Reg+Offset, if the offset fits into an imm12 we can directly encode it.
1046                    (Some(base), Some(imm12), None) => (base, imm12),
1047
1048                    // Otherwise, if the offset does not fit into a imm12, we need to materialize it into a
1049                    // register and load from that.
1050                    (Some(_), None, None) => {
1051                        let tmp = writable_spilltmp_reg();
1052                        Inst::LoadAddr { rd: tmp, mem: from }.emit(sink, emit_info, state);
1053                        (tmp.to_reg(), Imm12::ZERO)
1054                    }
1055
1056                    // If the AMode contains a label we can emit an internal relocation that gets
1057                    // resolved with the correct address later.
1058                    (None, Some(imm), Some(label)) => {
1059                        debug_assert_eq!(imm.as_i16(), 0);
1060
1061                        // Get the current PC.
1062                        sink.use_label_at_offset(sink.cur_offset(), label, LabelUse::PCRelHi20);
1063                        Inst::Auipc {
1064                            rd,
1065                            imm: Imm20::ZERO,
1066                        }
1067                        .emit_uncompressed(sink, emit_info, state, start_off);
1068
1069                        // Emit a relocation for the load. This patches the offset into the instruction.
1070                        sink.use_label_at_offset(sink.cur_offset(), label, LabelUse::PCRelLo12I);
1071
1072                        // Imm12 here is meaningless since it's going to get replaced.
1073                        (rd.to_reg(), Imm12::ZERO)
1074                    }
1075
1076                    // These cases are impossible with the current AModes that we have. We either
1077                    // always have a register, or always have a label. Never both, and never neither.
1078                    (None, None, None)
1079                    | (None, Some(_), None)
1080                    | (Some(_), None, Some(_))
1081                    | (Some(_), Some(_), Some(_))
1082                    | (None, None, Some(_)) => {
1083                        unreachable!("Invalid load address")
1084                    }
1085                };
1086
1087                if let Some(trap_code) = flags.trap_code() {
1088                    // Register the offset at which the actual load instruction starts.
1089                    sink.add_trap(trap_code);
1090                }
1091
1092                sink.put4(encode_i_type(op.op_code(), rd, op.funct3(), addr, imm12));
1093            }
1094            &Inst::Store {
1095                op: StoreOP::Fsh,
1096                src,
1097                flags,
1098                to,
1099            } if !emit_info.isa_flags.has_zfhmin() => {
1100                // fsh unavailable, use an integer store instead
1101                Inst::FpuRR {
1102                    alu_op: FpuOPRR::FmvXFmt,
1103                    width: FpuOPWidth::S,
1104                    frm: FRM::RNE,
1105                    rd: writable_spilltmp_reg(),
1106                    rs: src,
1107                }
1108                .emit(sink, emit_info, state);
1109                Inst::Store {
1110                    to,
1111                    op: StoreOP::Sh,
1112                    flags,
1113                    src: spilltmp_reg(),
1114                }
1115                .emit(sink, emit_info, state);
1116            }
1117            &Inst::Store { op, src, flags, to } => {
1118                let base = to.get_base_register();
1119                let offset = to.get_offset_with_state(state);
1120                let offset_imm12 = Imm12::maybe_from_i64(offset);
1121
1122                let (addr, imm12) = match (base, offset_imm12) {
1123                    // If the offset fits into an imm12 we can directly encode it.
1124                    (Some(base), Some(imm12)) => (base, imm12),
1125                    // Otherwise load the address it into a reg and load from it.
1126                    _ => {
1127                        let tmp = writable_spilltmp_reg();
1128                        Inst::LoadAddr { rd: tmp, mem: to }.emit(sink, emit_info, state);
1129                        (tmp.to_reg(), Imm12::ZERO)
1130                    }
1131                };
1132
1133                if let Some(trap_code) = flags.trap_code() {
1134                    // Register the offset at which the actual load instruction starts.
1135                    sink.add_trap(trap_code);
1136                }
1137
1138                sink.put4(encode_s_type(op.op_code(), op.funct3(), addr, src, imm12));
1139            }
1140            &Inst::Args { .. } | &Inst::Rets { .. } => {
1141                // Nothing: this is a pseudoinstruction that serves
1142                // only to constrain registers at a certain point.
1143            }
1144            &Inst::Ret {} => {
1145                // RISC-V does not have a dedicated ret instruction, instead we emit the equivalent
1146                // `jalr x0, x1, 0` that jumps to the return address.
1147                Inst::Jalr {
1148                    rd: writable_zero_reg(),
1149                    base: link_reg(),
1150                    offset: Imm12::ZERO,
1151                }
1152                .emit(sink, emit_info, state);
1153            }
1154
1155            &Inst::Extend {
1156                rd,
1157                rn,
1158                signed,
1159                from_bits,
1160                to_bits: _to_bits,
1161            } => {
1162                let mut insts = SmallInstVec::new();
1163                let shift_bits = (64 - from_bits) as i16;
1164                let is_u8 = || from_bits == 8 && signed == false;
1165                if is_u8() {
1166                    // special for u8.
1167                    insts.push(Inst::AluRRImm12 {
1168                        alu_op: AluOPRRI::Andi,
1169                        rd,
1170                        rs: rn,
1171                        imm12: Imm12::from_i16(255),
1172                    });
1173                } else {
1174                    insts.push(Inst::AluRRImm12 {
1175                        alu_op: AluOPRRI::Slli,
1176                        rd,
1177                        rs: rn,
1178                        imm12: Imm12::from_i16(shift_bits),
1179                    });
1180                    insts.push(Inst::AluRRImm12 {
1181                        alu_op: if signed {
1182                            AluOPRRI::Srai
1183                        } else {
1184                            AluOPRRI::Srli
1185                        },
1186                        rd,
1187                        rs: rd.to_reg(),
1188                        imm12: Imm12::from_i16(shift_bits),
1189                    });
1190                }
1191                insts
1192                    .into_iter()
1193                    .for_each(|i| i.emit(sink, emit_info, state));
1194            }
1195
1196            &Inst::Call { ref info } => {
1197                sink.add_reloc(Reloc::RiscvCallPlt, &info.dest, 0);
1198
1199                let start = sink.cur_offset();
1200                Inst::construct_auipc_and_jalr(Some(writable_link_reg()), writable_link_reg(), 0)
1201                    .into_iter()
1202                    .for_each(|i| i.emit_uncompressed(sink, emit_info, state, start_off));
1203
1204                if let Some(s) = state.take_stack_map() {
1205                    let offset = sink.cur_offset();
1206                    sink.push_user_stack_map(state, offset, s);
1207                }
1208
1209                if let Some(try_call) = info.try_call_info.as_ref() {
1210                    sink.add_try_call_site(
1211                        Some(state.frame_layout.sp_to_fp()),
1212                        try_call.exception_handlers(&state.frame_layout),
1213                    );
1214                } else {
1215                    sink.add_call_site();
1216                }
1217
1218                let callee_pop_size = i32::try_from(info.callee_pop_size).unwrap();
1219                if callee_pop_size > 0 {
1220                    for inst in Riscv64MachineDeps::gen_sp_reg_adjust(-callee_pop_size) {
1221                        inst.emit(sink, emit_info, state);
1222                    }
1223                }
1224
1225                if info.patchable {
1226                    sink.add_patchable_call_site(sink.cur_offset() - start);
1227                } else {
1228                    // Load any stack-carried return values.
1229                    info.emit_retval_loads::<Riscv64MachineDeps, _, _>(
1230                        state.frame_layout().stackslots_size,
1231                        |inst| inst.emit(sink, emit_info, state),
1232                        |needed_space| Some(Inst::EmitIsland { needed_space }),
1233                    );
1234                }
1235
1236                // If this is a try-call, jump to the continuation
1237                // (normal-return) block.
1238                if let Some(try_call) = info.try_call_info.as_ref() {
1239                    let jmp = Inst::Jal {
1240                        label: try_call.continuation,
1241                    };
1242                    jmp.emit(sink, emit_info, state);
1243                }
1244
1245                *start_off = sink.cur_offset();
1246            }
1247            &Inst::CallInd { ref info } => {
1248                Inst::Jalr {
1249                    rd: writable_link_reg(),
1250                    base: info.dest,
1251                    offset: Imm12::ZERO,
1252                }
1253                .emit(sink, emit_info, state);
1254
1255                if let Some(s) = state.take_stack_map() {
1256                    let offset = sink.cur_offset();
1257                    sink.push_user_stack_map(state, offset, s);
1258                }
1259
1260                if let Some(try_call) = info.try_call_info.as_ref() {
1261                    sink.add_try_call_site(
1262                        Some(state.frame_layout.sp_to_fp()),
1263                        try_call.exception_handlers(&state.frame_layout),
1264                    );
1265                } else {
1266                    sink.add_call_site();
1267                }
1268
1269                let callee_pop_size = i32::try_from(info.callee_pop_size).unwrap();
1270                if callee_pop_size > 0 {
1271                    for inst in Riscv64MachineDeps::gen_sp_reg_adjust(-callee_pop_size) {
1272                        inst.emit(sink, emit_info, state);
1273                    }
1274                }
1275
1276                // Load any stack-carried return values.
1277                info.emit_retval_loads::<Riscv64MachineDeps, _, _>(
1278                    state.frame_layout().stackslots_size,
1279                    |inst| inst.emit(sink, emit_info, state),
1280                    |needed_space| Some(Inst::EmitIsland { needed_space }),
1281                );
1282
1283                // If this is a try-call, jump to the continuation
1284                // (normal-return) block.
1285                if let Some(try_call) = info.try_call_info.as_ref() {
1286                    let jmp = Inst::Jal {
1287                        label: try_call.continuation,
1288                    };
1289                    jmp.emit(sink, emit_info, state);
1290                }
1291
1292                *start_off = sink.cur_offset();
1293            }
1294
1295            &Inst::ReturnCall { ref info } => {
1296                emit_return_call_common_sequence(sink, emit_info, state, info);
1297
1298                sink.add_call_site();
1299                sink.add_reloc(Reloc::RiscvCallPlt, &info.dest, 0);
1300                Inst::construct_auipc_and_jalr(None, writable_spilltmp_reg(), 0)
1301                    .into_iter()
1302                    .for_each(|i| i.emit_uncompressed(sink, emit_info, state, start_off));
1303            }
1304
1305            &Inst::ReturnCallInd { ref info } => {
1306                emit_return_call_common_sequence(sink, emit_info, state, &info);
1307
1308                Inst::Jalr {
1309                    rd: writable_zero_reg(),
1310                    base: info.dest,
1311                    offset: Imm12::ZERO,
1312                }
1313                .emit(sink, emit_info, state);
1314            }
1315            &Inst::Jal { label } => {
1316                sink.use_label_at_offset(*start_off, label, LabelUse::Jal20);
1317                sink.add_uncond_branch(*start_off, *start_off + 4, label);
1318                sink.put4(0b1101111);
1319                state.clobber_vstate();
1320            }
1321            &Inst::CondBr {
1322                taken,
1323                not_taken,
1324                kind,
1325            } => {
1326                match taken {
1327                    CondBrTarget::Label(label) => {
1328                        let code = kind.emit();
1329                        let code_inverse = kind.inverse().emit().to_le_bytes();
1330                        sink.use_label_at_offset(*start_off, label, LabelUse::B12);
1331                        sink.add_cond_branch(*start_off, *start_off + 4, label, &code_inverse);
1332                        sink.put4(code);
1333                    }
1334                    CondBrTarget::Fallthrough => panic!("Cannot fallthrough in taken target"),
1335                }
1336
1337                match not_taken {
1338                    CondBrTarget::Label(label) => {
1339                        Inst::gen_jump(label).emit(sink, emit_info, state)
1340                    }
1341                    CondBrTarget::Fallthrough => {}
1342                };
1343            }
1344
1345            &Inst::Mov { rd, rm, ty } => {
1346                debug_assert_eq!(rd.to_reg().class(), rm.class());
1347                if rd.to_reg() == rm {
1348                    return;
1349                }
1350
1351                match rm.class() {
1352                    RegClass::Int => Inst::AluRRImm12 {
1353                        alu_op: AluOPRRI::Addi,
1354                        rd,
1355                        rs: rm,
1356                        imm12: Imm12::ZERO,
1357                    },
1358                    RegClass::Float => Inst::FpuRRR {
1359                        alu_op: FpuOPRRR::Fsgnj,
1360                        width: FpuOPWidth::try_from(ty).unwrap(),
1361                        frm: FRM::RNE,
1362                        rd,
1363                        rs1: rm,
1364                        rs2: rm,
1365                    },
1366                    RegClass::Vector => Inst::VecAluRRImm5 {
1367                        op: VecAluOpRRImm5::VmvrV,
1368                        vd: rd,
1369                        vs2: rm,
1370                        // Imm 0 means copy 1 register.
1371                        imm: Imm5::maybe_from_i8(0).unwrap(),
1372                        mask: VecOpMasking::Disabled,
1373                        // Vstate for this instruction is ignored.
1374                        vstate: VState::from_type(ty),
1375                    },
1376                }
1377                .emit(sink, emit_info, state);
1378            }
1379
1380            &Inst::MovFromPReg { rd, rm } => {
1381                Inst::gen_move(rd, Reg::from(rm), I64).emit(sink, emit_info, state);
1382            }
1383
1384            &Inst::BrTable {
1385                index,
1386                tmp1,
1387                tmp2,
1388                ref targets,
1389            } => {
1390                let ext_index = writable_spilltmp_reg();
1391
1392                let label_compute_target = sink.get_label();
1393
1394                // The default target is passed in as the 0th element of `targets`
1395                // separate it here for clarity.
1396                let default_target = targets[0];
1397                let targets = &targets[1..];
1398
1399                // We are going to potentially emit a large amount of instructions, so ensure that we emit an island
1400                // now if we need one.
1401                //
1402                // The worse case PC calculations are 12 instructions. And each entry in the jump table is 2 instructions.
1403                // Check if we need to emit a jump table here to support that jump.
1404                let inst_count = 12 + (targets.len() * 2);
1405                let distance = (inst_count * Inst::UNCOMPRESSED_INSTRUCTION_SIZE as usize) as u32;
1406                if sink.island_needed(distance) {
1407                    let jump_around_label = sink.get_label();
1408                    Inst::gen_jump(jump_around_label).emit(sink, emit_info, state);
1409                    sink.emit_island(distance + 4, &mut state.ctrl_plane);
1410                    sink.bind_label(jump_around_label, &mut state.ctrl_plane);
1411                }
1412
1413                // We emit a bounds check on the index, if the index is larger than the number of
1414                // jump table entries, we jump to the default block.  Otherwise we compute a jump
1415                // offset by multiplying the index by 8 (the size of each entry) and then jump to
1416                // that offset. Each jump table entry is a regular auipc+jalr which we emit sequentially.
1417                //
1418                // Build the following sequence:
1419                //
1420                // extend_index:
1421                //     zext.w  ext_index, index
1422                // bounds_check:
1423                //     li      tmp, n_labels
1424                //     bltu    ext_index, tmp, compute_target
1425                // jump_to_default_block:
1426                //     auipc   pc, 0
1427                //     jalr    zero, pc, default_block
1428                // compute_target:
1429                //     auipc   pc, 0
1430                //     slli    tmp, ext_index, 3
1431                //     add     pc, pc, tmp
1432                //     jalr    zero, pc, 0x10
1433                // jump_table:
1434                //     ; This repeats for each entry in the jumptable
1435                //     auipc   pc, 0
1436                //     jalr    zero, pc, block_target
1437
1438                // Extend the index to 64 bits.
1439                //
1440                // This prevents us branching on the top 32 bits of the index, which
1441                // are undefined.
1442                Inst::Extend {
1443                    rd: ext_index,
1444                    rn: index,
1445                    signed: false,
1446                    from_bits: 32,
1447                    to_bits: 64,
1448                }
1449                .emit(sink, emit_info, state);
1450
1451                // Bounds check.
1452                //
1453                // Check if the index passed in is larger than the number of jumptable
1454                // entries that we have. If it is, we fallthrough to a jump into the
1455                // default block.
1456                Inst::load_constant_u32(tmp2, targets.len() as u64)
1457                    .iter()
1458                    .for_each(|i| i.emit(sink, emit_info, state));
1459                Inst::CondBr {
1460                    taken: CondBrTarget::Label(label_compute_target),
1461                    not_taken: CondBrTarget::Fallthrough,
1462                    kind: IntegerCompare {
1463                        kind: IntCC::UnsignedLessThan,
1464                        rs1: ext_index.to_reg(),
1465                        rs2: tmp2.to_reg(),
1466                    },
1467                }
1468                .emit(sink, emit_info, state);
1469
1470                sink.use_label_at_offset(sink.cur_offset(), default_target, LabelUse::PCRel32);
1471                Inst::construct_auipc_and_jalr(None, tmp2, 0)
1472                    .iter()
1473                    .for_each(|i| i.emit_uncompressed(sink, emit_info, state, start_off));
1474
1475                // Compute the jump table offset.
1476                // We need to emit a PC relative offset,
1477                sink.bind_label(label_compute_target, &mut state.ctrl_plane);
1478
1479                // Get the current PC.
1480                Inst::Auipc {
1481                    rd: tmp1,
1482                    imm: Imm20::ZERO,
1483                }
1484                .emit_uncompressed(sink, emit_info, state, start_off);
1485
1486                // These instructions must be emitted as uncompressed since we
1487                // are manually computing the offset from the PC.
1488
1489                // Multiply the index by 8, since that is the size in
1490                // bytes of each jump table entry
1491                Inst::AluRRImm12 {
1492                    alu_op: AluOPRRI::Slli,
1493                    rd: tmp2,
1494                    rs: ext_index.to_reg(),
1495                    imm12: Imm12::from_i16(3),
1496                }
1497                .emit_uncompressed(sink, emit_info, state, start_off);
1498
1499                // Calculate the base of the jump, PC + the offset from above.
1500                Inst::AluRRR {
1501                    alu_op: AluOPRRR::Add,
1502                    rd: tmp1,
1503                    rs1: tmp1.to_reg(),
1504                    rs2: tmp2.to_reg(),
1505                }
1506                .emit_uncompressed(sink, emit_info, state, start_off);
1507
1508                // Jump to the middle of the jump table.
1509                // We add a 16 byte offset here, since we used 4 instructions
1510                // since the AUIPC that was used to get the PC.
1511                Inst::Jalr {
1512                    rd: writable_zero_reg(),
1513                    base: tmp1.to_reg(),
1514                    offset: Imm12::from_i16((4 * Inst::UNCOMPRESSED_INSTRUCTION_SIZE) as i16),
1515                }
1516                .emit_uncompressed(sink, emit_info, state, start_off);
1517
1518                // Emit the jump table.
1519                //
1520                // Each entry is a auipc + jalr to the target block. We also start with a island
1521                // if necessary.
1522
1523                // Emit the jumps back to back
1524                for target in targets.iter() {
1525                    sink.use_label_at_offset(sink.cur_offset(), *target, LabelUse::PCRel32);
1526
1527                    Inst::construct_auipc_and_jalr(None, tmp2, 0)
1528                        .iter()
1529                        .for_each(|i| i.emit_uncompressed(sink, emit_info, state, start_off));
1530                }
1531
1532                // We've just emitted an island that is safe up to *here*.
1533                // Mark it as such so that we don't needlessly emit additional islands.
1534                *start_off = sink.cur_offset();
1535            }
1536
1537            &Inst::Atomic {
1538                op,
1539                rd,
1540                addr,
1541                src,
1542                amo,
1543            } => {
1544                // TODO: get flags from original CLIF atomic instruction
1545                let flags = MemFlagsData::new();
1546                if let Some(trap_code) = flags.trap_code() {
1547                    sink.add_trap(trap_code);
1548                }
1549                let x = op.op_code()
1550                    | reg_to_gpr_num(rd.to_reg()) << 7
1551                    | op.funct3() << 12
1552                    | reg_to_gpr_num(addr) << 15
1553                    | reg_to_gpr_num(src) << 20
1554                    | op.funct7(amo) << 25;
1555
1556                sink.put4(x);
1557            }
1558            &Inst::Fence { pred, succ } => {
1559                let x = 0b0001111
1560                    | 0b00000 << 7
1561                    | 0b000 << 12
1562                    | 0b00000 << 15
1563                    | (succ as u32) << 20
1564                    | (pred as u32) << 24;
1565
1566                sink.put4(x);
1567            }
1568            &Inst::Auipc { rd, imm } => {
1569                sink.put4(enc_auipc(rd, imm));
1570            }
1571
1572            &Inst::LoadAddr { rd, mem } => {
1573                let base = mem.get_base_register();
1574                let offset = mem.get_offset_with_state(state);
1575                let offset_imm12 = Imm12::maybe_from_i64(offset);
1576
1577                match (mem, base, offset_imm12) {
1578                    (_, Some(rs), Some(imm12)) => {
1579                        Inst::AluRRImm12 {
1580                            alu_op: AluOPRRI::Addi,
1581                            rd,
1582                            rs,
1583                            imm12,
1584                        }
1585                        .emit(sink, emit_info, state);
1586                    }
1587                    (_, Some(rs), None) => {
1588                        let mut insts = Inst::load_constant_u64(rd, offset as u64);
1589                        insts.push(Inst::AluRRR {
1590                            alu_op: AluOPRRR::Add,
1591                            rd,
1592                            rs1: rd.to_reg(),
1593                            rs2: rs,
1594                        });
1595                        insts
1596                            .into_iter()
1597                            .for_each(|inst| inst.emit(sink, emit_info, state));
1598                    }
1599                    (AMode::Const(addr), None, _) => {
1600                        // Get an address label for the constant and recurse.
1601                        let label = sink.get_label_for_constant(addr);
1602                        Inst::LoadAddr {
1603                            rd,
1604                            mem: AMode::Label(label),
1605                        }
1606                        .emit(sink, emit_info, state);
1607                    }
1608                    (AMode::Label(label), None, _) => {
1609                        // Get the current PC.
1610                        sink.use_label_at_offset(sink.cur_offset(), label, LabelUse::PCRelHi20);
1611                        let inst = Inst::Auipc {
1612                            rd,
1613                            imm: Imm20::ZERO,
1614                        };
1615                        inst.emit_uncompressed(sink, emit_info, state, start_off);
1616
1617                        // Emit an add to the address with a relocation.
1618                        // This later gets patched up with the correct offset.
1619                        sink.use_label_at_offset(sink.cur_offset(), label, LabelUse::PCRelLo12I);
1620                        Inst::AluRRImm12 {
1621                            alu_op: AluOPRRI::Addi,
1622                            rd,
1623                            rs: rd.to_reg(),
1624                            imm12: Imm12::ZERO,
1625                        }
1626                        .emit_uncompressed(sink, emit_info, state, start_off);
1627                    }
1628                    (amode, _, _) => {
1629                        unimplemented!("LoadAddr: {:?}", amode);
1630                    }
1631                }
1632            }
1633
1634            &Inst::Select {
1635                ref dst,
1636                condition,
1637                ref x,
1638                ref y,
1639            } => {
1640                // The general form for this select is the following:
1641                //
1642                //     mv rd, x
1643                //     b{cond} rcond, label_end
1644                //     mv rd, y
1645                // label_end:
1646                //     ... etc
1647                //
1648                // This is built on the assumption that moves are cheap, but branches and jumps
1649                // are not. So with this format we always avoid one jump instruction at the expense
1650                // of an unconditional move.
1651                //
1652                // We also perform another optimization here. If the destination register is the same
1653                // as one of the input registers, we can avoid emitting the first unconditional move
1654                // and emit just the branch and the second move.
1655                //
1656                // To make sure that this happens as often as possible, we also try to invert the
1657                // condition, so that if either of the input registers are the same as the destination
1658                // we avoid that move.
1659
1660                let label_end = sink.get_label();
1661
1662                let xregs = x.regs();
1663                let yregs = y.regs();
1664                let dstregs: Vec<Reg> = dst.regs().into_iter().map(|r| r.to_reg()).collect();
1665                let condregs = condition.regs();
1666
1667                // We are going to write to the destination register before evaluating
1668                // the condition, so we need to make sure that the destination register
1669                // is not one of the condition registers.
1670                //
1671                // This should never happen, since hopefully the regalloc constraints
1672                // for this register are set up correctly.
1673                debug_assert_ne!(dstregs, condregs);
1674
1675                // Check if we can invert the condition and avoid moving the y registers into
1676                // the destination. This allows us to only emit the branch and one of the moves.
1677                let (uncond_move, cond_move, condition) = if yregs == dstregs {
1678                    (yregs, xregs, condition.inverse())
1679                } else {
1680                    (xregs, yregs, condition)
1681                };
1682
1683                // Unconditionally move one of the values to the destination register.
1684                //
1685                // These moves may not end up being emitted if the source and
1686                // destination registers are the same. That logic is built into
1687                // the emit function for `Inst::Mov`.
1688                for i in gen_moves(dst.regs(), uncond_move) {
1689                    i.emit(sink, emit_info, state);
1690                }
1691
1692                // If the condition passes we skip over the conditional move
1693                Inst::CondBr {
1694                    taken: CondBrTarget::Label(label_end),
1695                    not_taken: CondBrTarget::Fallthrough,
1696                    kind: condition,
1697                }
1698                .emit(sink, emit_info, state);
1699
1700                // Move the conditional value to the destination register.
1701                for i in gen_moves(dst.regs(), cond_move) {
1702                    i.emit(sink, emit_info, state);
1703                }
1704
1705                sink.bind_label(label_end, &mut state.ctrl_plane);
1706            }
1707            &Inst::Jalr { rd, base, offset } => {
1708                sink.put4(enc_jalr(rd, base, offset));
1709                state.clobber_vstate();
1710            }
1711            &Inst::EBreak => {
1712                sink.put4(0x00100073);
1713            }
1714            &Inst::AtomicCas {
1715                offset,
1716                t0,
1717                dst,
1718                e,
1719                addr,
1720                v,
1721                ty,
1722            } => {
1723                //     # addr holds address of memory location
1724                //     # e holds expected value
1725                //     # v holds desired value
1726                //     # dst holds return value
1727                // cas:
1728                //     lr.w dst, (addr)       # Load original value.
1729                //     bne dst, e, fail       # Doesn’t match, so fail.
1730                //     sc.w t0, v, (addr)     # Try to update.
1731                //     bnez t0 , cas          # if store not ok,retry.
1732                // fail:
1733                let fail_label = sink.get_label();
1734                let cas_lebel = sink.get_label();
1735                sink.bind_label(cas_lebel, &mut state.ctrl_plane);
1736                Inst::Atomic {
1737                    op: AtomicOP::load_op(ty),
1738                    rd: dst,
1739                    addr,
1740                    src: zero_reg(),
1741                    amo: AMO::SeqCst,
1742                }
1743                .emit(sink, emit_info, state);
1744                if ty.bits() < 32 {
1745                    AtomicOP::extract(dst, offset, dst.to_reg(), ty)
1746                        .iter()
1747                        .for_each(|i| i.emit(sink, emit_info, state));
1748                } else if ty.bits() == 32 {
1749                    Inst::Extend {
1750                        rd: dst,
1751                        rn: dst.to_reg(),
1752                        signed: false,
1753                        from_bits: 32,
1754                        to_bits: 64,
1755                    }
1756                    .emit(sink, emit_info, state);
1757                }
1758                Inst::CondBr {
1759                    taken: CondBrTarget::Label(fail_label),
1760                    not_taken: CondBrTarget::Fallthrough,
1761                    kind: IntegerCompare {
1762                        kind: IntCC::NotEqual,
1763                        rs1: e,
1764                        rs2: dst.to_reg(),
1765                    },
1766                }
1767                .emit(sink, emit_info, state);
1768                let store_value = if ty.bits() < 32 {
1769                    // reload value to t0.
1770                    Inst::Atomic {
1771                        op: AtomicOP::load_op(ty),
1772                        rd: t0,
1773                        addr,
1774                        src: zero_reg(),
1775                        amo: AMO::SeqCst,
1776                    }
1777                    .emit(sink, emit_info, state);
1778                    // set reset part.
1779                    AtomicOP::merge(t0, writable_spilltmp_reg(), offset, v, ty)
1780                        .iter()
1781                        .for_each(|i| i.emit(sink, emit_info, state));
1782                    t0.to_reg()
1783                } else {
1784                    v
1785                };
1786                Inst::Atomic {
1787                    op: AtomicOP::store_op(ty),
1788                    rd: t0,
1789                    addr,
1790                    src: store_value,
1791                    amo: AMO::SeqCst,
1792                }
1793                .emit(sink, emit_info, state);
1794                // check is our value stored.
1795                Inst::CondBr {
1796                    taken: CondBrTarget::Label(cas_lebel),
1797                    not_taken: CondBrTarget::Fallthrough,
1798                    kind: IntegerCompare {
1799                        kind: IntCC::NotEqual,
1800                        rs1: t0.to_reg(),
1801                        rs2: zero_reg(),
1802                    },
1803                }
1804                .emit(sink, emit_info, state);
1805                sink.bind_label(fail_label, &mut state.ctrl_plane);
1806            }
1807            &Inst::AtomicRmwLoop {
1808                offset,
1809                op,
1810                dst,
1811                ty,
1812                p,
1813                x,
1814                t0,
1815            } => {
1816                let retry = sink.get_label();
1817                sink.bind_label(retry, &mut state.ctrl_plane);
1818                // load old value.
1819                Inst::Atomic {
1820                    op: AtomicOP::load_op(ty),
1821                    rd: dst,
1822                    addr: p,
1823                    src: zero_reg(),
1824                    amo: AMO::SeqCst,
1825                }
1826                .emit(sink, emit_info, state);
1827                // For sub-word ops the merge step needs the original full word.
1828                // Stash it in spilltmp2 before `extract` clobbers `dst`; reusing
1829                // the value avoids a second LR that would cancel the reservation.
1830
1831                let store_value: Reg = match op {
1832                    crate::ir::AtomicRmwOp::Add
1833                    | crate::ir::AtomicRmwOp::Sub
1834                    | crate::ir::AtomicRmwOp::And
1835                    | crate::ir::AtomicRmwOp::Or
1836                    | crate::ir::AtomicRmwOp::Xor => {
1837                        Inst::gen_move(writable_spilltmp_reg2(), dst.to_reg(), I64)
1838                            .emit(sink, emit_info, state);
1839                        AtomicOP::extract(dst, offset, dst.to_reg(), ty)
1840                            .iter()
1841                            .for_each(|i| i.emit(sink, emit_info, state));
1842                        Inst::AluRRR {
1843                            alu_op: match op {
1844                                crate::ir::AtomicRmwOp::Add => AluOPRRR::Add,
1845                                crate::ir::AtomicRmwOp::Sub => AluOPRRR::Sub,
1846                                crate::ir::AtomicRmwOp::And => AluOPRRR::And,
1847                                crate::ir::AtomicRmwOp::Or => AluOPRRR::Or,
1848                                crate::ir::AtomicRmwOp::Xor => AluOPRRR::Xor,
1849                                _ => unreachable!(),
1850                            },
1851                            rd: t0,
1852                            rs1: dst.to_reg(),
1853                            rs2: x,
1854                        }
1855                        .emit(sink, emit_info, state);
1856                        AtomicOP::merge(
1857                            writable_spilltmp_reg2(),
1858                            writable_spilltmp_reg(),
1859                            offset,
1860                            t0.to_reg(),
1861                            ty,
1862                        )
1863                        .iter()
1864                        .for_each(|i| i.emit(sink, emit_info, state));
1865                        spilltmp_reg2()
1866                    }
1867                    crate::ir::AtomicRmwOp::Nand => {
1868                        if ty.bits() < 32 {
1869                            Inst::gen_move(writable_spilltmp_reg2(), dst.to_reg(), I64)
1870                                .emit(sink, emit_info, state);
1871                            AtomicOP::extract(dst, offset, dst.to_reg(), ty)
1872                                .iter()
1873                                .for_each(|i| i.emit(sink, emit_info, state));
1874                        }
1875                        Inst::AluRRR {
1876                            alu_op: AluOPRRR::And,
1877                            rd: t0,
1878                            rs1: x,
1879                            rs2: dst.to_reg(),
1880                        }
1881                        .emit(sink, emit_info, state);
1882                        Inst::construct_bit_not(t0, t0.to_reg()).emit(sink, emit_info, state);
1883                        if ty.bits() < 32 {
1884                            AtomicOP::merge(
1885                                writable_spilltmp_reg2(),
1886                                writable_spilltmp_reg(),
1887                                offset,
1888                                t0.to_reg(),
1889                                ty,
1890                            )
1891                            .iter()
1892                            .for_each(|i| i.emit(sink, emit_info, state));
1893                            spilltmp_reg2()
1894                        } else {
1895                            t0.to_reg()
1896                        }
1897                    }
1898
1899                    crate::ir::AtomicRmwOp::Umin
1900                    | crate::ir::AtomicRmwOp::Umax
1901                    | crate::ir::AtomicRmwOp::Smin
1902                    | crate::ir::AtomicRmwOp::Smax => {
1903                        let label_select_dst = sink.get_label();
1904                        let label_select_done = sink.get_label();
1905                        Inst::gen_move(writable_spilltmp_reg2(), dst.to_reg(), I64)
1906                            .emit(sink, emit_info, state);
1907                        if op == crate::ir::AtomicRmwOp::Umin || op == crate::ir::AtomicRmwOp::Umax
1908                        {
1909                            AtomicOP::extract(dst, offset, dst.to_reg(), ty)
1910                        } else {
1911                            AtomicOP::extract_sext(dst, offset, dst.to_reg(), ty)
1912                        }
1913                        .iter()
1914                        .for_each(|i| i.emit(sink, emit_info, state));
1915
1916                        Inst::CondBr {
1917                            taken: CondBrTarget::Label(label_select_dst),
1918                            not_taken: CondBrTarget::Fallthrough,
1919                            kind: IntegerCompare {
1920                                kind: match op {
1921                                    crate::ir::AtomicRmwOp::Umin => IntCC::UnsignedLessThan,
1922                                    crate::ir::AtomicRmwOp::Umax => IntCC::UnsignedGreaterThan,
1923                                    crate::ir::AtomicRmwOp::Smin => IntCC::SignedLessThan,
1924                                    crate::ir::AtomicRmwOp::Smax => IntCC::SignedGreaterThan,
1925                                    _ => unreachable!(),
1926                                },
1927                                rs1: dst.to_reg(),
1928                                rs2: x,
1929                            },
1930                        }
1931                        .emit(sink, emit_info, state);
1932                        // here we select x.
1933                        Inst::gen_move(t0, x, I64).emit(sink, emit_info, state);
1934                        Inst::gen_jump(label_select_done).emit(sink, emit_info, state);
1935                        sink.bind_label(label_select_dst, &mut state.ctrl_plane);
1936                        Inst::gen_move(t0, dst.to_reg(), I64).emit(sink, emit_info, state);
1937                        sink.bind_label(label_select_done, &mut state.ctrl_plane);
1938                        AtomicOP::merge(
1939                            writable_spilltmp_reg2(),
1940                            writable_spilltmp_reg(),
1941                            offset,
1942                            t0.to_reg(),
1943                            ty,
1944                        )
1945                        .iter()
1946                        .for_each(|i| i.emit(sink, emit_info, state));
1947                        spilltmp_reg2()
1948                    }
1949                    crate::ir::AtomicRmwOp::Xchg => {
1950                        Inst::gen_move(writable_spilltmp_reg2(), dst.to_reg(), I64)
1951                            .emit(sink, emit_info, state);
1952                        AtomicOP::extract(dst, offset, dst.to_reg(), ty)
1953                            .iter()
1954                            .for_each(|i| i.emit(sink, emit_info, state));
1955                        AtomicOP::merge(
1956                            writable_spilltmp_reg2(),
1957                            writable_spilltmp_reg(),
1958                            offset,
1959                            x,
1960                            ty,
1961                        )
1962                        .iter()
1963                        .for_each(|i| i.emit(sink, emit_info, state));
1964                        spilltmp_reg2()
1965                    }
1966                };
1967
1968                Inst::Atomic {
1969                    op: AtomicOP::store_op(ty),
1970                    rd: t0,
1971                    addr: p,
1972                    src: store_value,
1973                    amo: AMO::SeqCst,
1974                }
1975                .emit(sink, emit_info, state);
1976
1977                // if store is not ok,retry.
1978                Inst::CondBr {
1979                    taken: CondBrTarget::Label(retry),
1980                    not_taken: CondBrTarget::Fallthrough,
1981                    kind: IntegerCompare {
1982                        kind: IntCC::NotEqual,
1983                        rs1: t0.to_reg(),
1984                        rs2: zero_reg(),
1985                    },
1986                }
1987                .emit(sink, emit_info, state);
1988            }
1989
1990            &Inst::LoadExtNameGot { rd, ref name } => {
1991                // Load a PC-relative address into a register.
1992                // RISC-V does this slightly differently from other arches. We emit a relocation
1993                // with a label, instead of the symbol itself.
1994                //
1995                // See: https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-elf.adoc#pc-relative-symbol-addresses
1996                //
1997                // Emit the following code:
1998                // label:
1999                //   auipc rd, 0              # R_RISCV_GOT_HI20 (symbol_name)
2000                //   ld    rd, rd, 0          # R_RISCV_PCREL_LO12_I (label)
2001
2002                // Create the label that is going to be published to the final binary object.
2003                let auipc_label = sink.get_label();
2004                sink.bind_label(auipc_label, &mut state.ctrl_plane);
2005
2006                // Get the current PC.
2007                sink.add_reloc(Reloc::RiscvGotHi20, &**name, 0);
2008                Inst::Auipc {
2009                    rd,
2010                    imm: Imm20::from_i32(0),
2011                }
2012                .emit_uncompressed(sink, emit_info, state, start_off);
2013
2014                // The `ld` here, points to the `auipc` label instead of directly to the symbol.
2015                sink.add_reloc(Reloc::RiscvPCRelLo12I, &auipc_label, 0);
2016                Inst::Load {
2017                    rd,
2018                    op: LoadOP::Ld,
2019                    flags: MemFlagsData::trusted(),
2020                    from: AMode::RegOffset(rd.to_reg(), 0),
2021                }
2022                .emit_uncompressed(sink, emit_info, state, start_off);
2023            }
2024
2025            &Inst::LoadExtNameFar {
2026                rd,
2027                ref name,
2028                offset,
2029            } => {
2030                // In the non PIC sequence we relocate the absolute address into
2031                // a preallocated space, load it into a register and jump over
2032                // it.
2033                //
2034                // Emit the following code:
2035                //   ld rd, label_data
2036                //   j label_end
2037                // label_data:
2038                //   <8 byte space>           # ABS8
2039                // label_end:
2040
2041                let label_data = sink.get_label();
2042                let label_end = sink.get_label();
2043
2044                // Load the value from a label
2045                Inst::Load {
2046                    rd,
2047                    op: LoadOP::Ld,
2048                    flags: MemFlagsData::trusted(),
2049                    from: AMode::Label(label_data),
2050                }
2051                .emit(sink, emit_info, state);
2052
2053                // Jump over the data
2054                Inst::gen_jump(label_end).emit(sink, emit_info, state);
2055
2056                sink.bind_label(label_data, &mut state.ctrl_plane);
2057                sink.add_reloc(Reloc::Abs8, name.as_ref(), offset);
2058                sink.put8(0);
2059
2060                sink.bind_label(label_end, &mut state.ctrl_plane);
2061            }
2062
2063            &Inst::LoadExtNameNear {
2064                rd,
2065                ref name,
2066                offset,
2067            } => {
2068                // Emit the following code:
2069                // label:
2070                //   auipc rd, 0              # R_RISCV_PCREL_HI20 (symbol_name)
2071                //   ld    rd, rd, 0          # R_RISCV_PCREL_LO12_I (label)
2072
2073                let auipc_label = sink.get_label();
2074                sink.bind_label(auipc_label, &mut state.ctrl_plane);
2075
2076                // Get the current PC.
2077                sink.add_reloc(Reloc::RiscvPCRelHi20, &**name, offset);
2078                Inst::Auipc {
2079                    rd,
2080                    imm: Imm20::from_i32(0),
2081                }
2082                .emit_uncompressed(sink, emit_info, state, start_off);
2083
2084                sink.add_reloc(Reloc::RiscvPCRelLo12I, &auipc_label, 0);
2085                Inst::AluRRImm12 {
2086                    alu_op: AluOPRRI::Addi,
2087                    rd,
2088                    rs: rd.to_reg(),
2089                    imm12: Imm12::ZERO,
2090                }
2091                .emit_uncompressed(sink, emit_info, state, start_off);
2092            }
2093
2094            &Inst::LabelAddress { dst, label } => {
2095                let offset = sink.cur_offset();
2096                Inst::Auipc {
2097                    rd: dst,
2098                    imm: Imm20::from_i32(0),
2099                }
2100                .emit_uncompressed(sink, emit_info, state, start_off);
2101                sink.use_label_at_offset(offset, label, LabelUse::PCRelHi20);
2102
2103                let offset = sink.cur_offset();
2104                Inst::AluRRImm12 {
2105                    alu_op: AluOPRRI::Addi,
2106                    rd: dst,
2107                    rs: dst.to_reg(),
2108                    imm12: Imm12::ZERO,
2109                }
2110                .emit_uncompressed(sink, emit_info, state, start_off);
2111                sink.use_label_at_offset(offset, label, LabelUse::PCRelLo12I);
2112            }
2113
2114            &Inst::ElfTlsGetAddr { rd, ref name } => {
2115                // RISC-V's TLS GD model is slightly different from other arches.
2116                //
2117                // We have a relocation (R_RISCV_TLS_GD_HI20) that loads the high 20 bits
2118                // of the address relative to the GOT entry. This relocation points to
2119                // the symbol as usual.
2120                //
2121                // However when loading the bottom 12bits of the address, we need to
2122                // use a label that points to the previous AUIPC instruction.
2123                //
2124                // label:
2125                //    auipc a0,0                    # R_RISCV_TLS_GD_HI20 (symbol)
2126                //    addi  a0,a0,0                 # R_RISCV_PCREL_LO12_I (label)
2127                //
2128                // https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-elf.adoc#global-dynamic
2129
2130                // Create the label that is going to be published to the final binary object.
2131                let auipc_label = sink.get_label();
2132                sink.bind_label(auipc_label, &mut state.ctrl_plane);
2133
2134                // Get the current PC.
2135                sink.add_reloc(Reloc::RiscvTlsGdHi20, &**name, 0);
2136                Inst::Auipc {
2137                    rd,
2138                    imm: Imm20::from_i32(0),
2139                }
2140                .emit_uncompressed(sink, emit_info, state, start_off);
2141
2142                // The `addi` here, points to the `auipc` label instead of directly to the symbol.
2143                sink.add_reloc(Reloc::RiscvPCRelLo12I, &auipc_label, 0);
2144                Inst::AluRRImm12 {
2145                    alu_op: AluOPRRI::Addi,
2146                    rd,
2147                    rs: rd.to_reg(),
2148                    imm12: Imm12::from_i16(0),
2149                }
2150                .emit_uncompressed(sink, emit_info, state, start_off);
2151
2152                Inst::Call {
2153                    info: Box::new(CallInfo::empty(
2154                        ExternalName::LibCall(LibCall::ElfTlsGetAddr),
2155                        CallConv::SystemV,
2156                    )),
2157                }
2158                .emit_uncompressed(sink, emit_info, state, start_off);
2159            }
2160
2161            &Inst::TrapIf { cmp, trap_code } => {
2162                let label_end = sink.get_label();
2163
2164                // Jump over the trap if we the condition is false.
2165                Inst::CondBr {
2166                    taken: CondBrTarget::Label(label_end),
2167                    not_taken: CondBrTarget::Fallthrough,
2168                    kind: cmp.inverse(),
2169                }
2170                .emit(sink, emit_info, state);
2171                Inst::Udf { trap_code }.emit(sink, emit_info, state);
2172
2173                sink.bind_label(label_end, &mut state.ctrl_plane);
2174            }
2175            &Inst::Udf { trap_code } => {
2176                sink.add_trap(trap_code);
2177                sink.put_data(Inst::TRAP_OPCODE);
2178            }
2179            &Inst::AtomicLoad { rd, ty, p } => {
2180                // emit the fence.
2181                Inst::Fence {
2182                    pred: Inst::FENCE_REQ_R | Inst::FENCE_REQ_W,
2183                    succ: Inst::FENCE_REQ_R | Inst::FENCE_REQ_W,
2184                }
2185                .emit(sink, emit_info, state);
2186                // load.
2187                Inst::Load {
2188                    rd,
2189                    op: LoadOP::from_type(ty),
2190                    flags: MemFlagsData::new(),
2191                    from: AMode::RegOffset(p, 0),
2192                }
2193                .emit(sink, emit_info, state);
2194                Inst::Fence {
2195                    pred: Inst::FENCE_REQ_R,
2196                    succ: Inst::FENCE_REQ_R | Inst::FENCE_REQ_W,
2197                }
2198                .emit(sink, emit_info, state);
2199            }
2200            &Inst::AtomicStore { src, ty, p } => {
2201                Inst::Fence {
2202                    pred: Inst::FENCE_REQ_R | Inst::FENCE_REQ_W,
2203                    succ: Inst::FENCE_REQ_W,
2204                }
2205                .emit(sink, emit_info, state);
2206                Inst::Store {
2207                    to: AMode::RegOffset(p, 0),
2208                    op: StoreOP::from_type(ty),
2209                    flags: MemFlagsData::new(),
2210                    src,
2211                }
2212                .emit(sink, emit_info, state);
2213            }
2214
2215            &Inst::Popcnt {
2216                sum,
2217                tmp,
2218                step,
2219                rs,
2220                ty,
2221            } => {
2222                // load 0 to sum , init.
2223                Inst::gen_move(sum, zero_reg(), I64).emit(sink, emit_info, state);
2224                // load
2225                Inst::load_imm12(step, Imm12::from_i16(ty.bits() as i16))
2226                    .emit(sink, emit_info, state);
2227                //
2228                Inst::load_imm12(tmp, Imm12::ONE).emit(sink, emit_info, state);
2229                Inst::AluRRImm12 {
2230                    alu_op: AluOPRRI::Slli,
2231                    rd: tmp,
2232                    rs: tmp.to_reg(),
2233                    imm12: Imm12::from_i16((ty.bits() - 1) as i16),
2234                }
2235                .emit(sink, emit_info, state);
2236                let label_done = sink.get_label();
2237                let label_loop = sink.get_label();
2238                sink.bind_label(label_loop, &mut state.ctrl_plane);
2239                Inst::CondBr {
2240                    taken: CondBrTarget::Label(label_done),
2241                    not_taken: CondBrTarget::Fallthrough,
2242                    kind: IntegerCompare {
2243                        kind: IntCC::SignedLessThanOrEqual,
2244                        rs1: step.to_reg(),
2245                        rs2: zero_reg(),
2246                    },
2247                }
2248                .emit(sink, emit_info, state);
2249                // test and add sum.
2250                {
2251                    Inst::AluRRR {
2252                        alu_op: AluOPRRR::And,
2253                        rd: writable_spilltmp_reg2(),
2254                        rs1: tmp.to_reg(),
2255                        rs2: rs,
2256                    }
2257                    .emit(sink, emit_info, state);
2258                    let label_over = sink.get_label();
2259                    Inst::CondBr {
2260                        taken: CondBrTarget::Label(label_over),
2261                        not_taken: CondBrTarget::Fallthrough,
2262                        kind: IntegerCompare {
2263                            kind: IntCC::Equal,
2264                            rs1: zero_reg(),
2265                            rs2: spilltmp_reg2(),
2266                        },
2267                    }
2268                    .emit(sink, emit_info, state);
2269                    Inst::AluRRImm12 {
2270                        alu_op: AluOPRRI::Addi,
2271                        rd: sum,
2272                        rs: sum.to_reg(),
2273                        imm12: Imm12::ONE,
2274                    }
2275                    .emit(sink, emit_info, state);
2276                    sink.bind_label(label_over, &mut state.ctrl_plane);
2277                }
2278                // set step and tmp.
2279                {
2280                    Inst::AluRRImm12 {
2281                        alu_op: AluOPRRI::Addi,
2282                        rd: step,
2283                        rs: step.to_reg(),
2284                        imm12: Imm12::from_i16(-1),
2285                    }
2286                    .emit(sink, emit_info, state);
2287                    Inst::AluRRImm12 {
2288                        alu_op: AluOPRRI::Srli,
2289                        rd: tmp,
2290                        rs: tmp.to_reg(),
2291                        imm12: Imm12::ONE,
2292                    }
2293                    .emit(sink, emit_info, state);
2294                    Inst::gen_jump(label_loop).emit(sink, emit_info, state);
2295                }
2296                sink.bind_label(label_done, &mut state.ctrl_plane);
2297            }
2298            &Inst::Cltz {
2299                sum,
2300                tmp,
2301                step,
2302                rs,
2303                leading,
2304                ty,
2305            } => {
2306                // load 0 to sum , init.
2307                Inst::gen_move(sum, zero_reg(), I64).emit(sink, emit_info, state);
2308                // load
2309                Inst::load_imm12(step, Imm12::from_i16(ty.bits() as i16))
2310                    .emit(sink, emit_info, state);
2311                //
2312                Inst::load_imm12(tmp, Imm12::ONE).emit(sink, emit_info, state);
2313                if leading {
2314                    Inst::AluRRImm12 {
2315                        alu_op: AluOPRRI::Slli,
2316                        rd: tmp,
2317                        rs: tmp.to_reg(),
2318                        imm12: Imm12::from_i16((ty.bits() - 1) as i16),
2319                    }
2320                    .emit(sink, emit_info, state);
2321                }
2322                let label_done = sink.get_label();
2323                let label_loop = sink.get_label();
2324                sink.bind_label(label_loop, &mut state.ctrl_plane);
2325                Inst::CondBr {
2326                    taken: CondBrTarget::Label(label_done),
2327                    not_taken: CondBrTarget::Fallthrough,
2328                    kind: IntegerCompare {
2329                        kind: IntCC::SignedLessThanOrEqual,
2330                        rs1: step.to_reg(),
2331                        rs2: zero_reg(),
2332                    },
2333                }
2334                .emit(sink, emit_info, state);
2335                // test and add sum.
2336                {
2337                    Inst::AluRRR {
2338                        alu_op: AluOPRRR::And,
2339                        rd: writable_spilltmp_reg2(),
2340                        rs1: tmp.to_reg(),
2341                        rs2: rs,
2342                    }
2343                    .emit(sink, emit_info, state);
2344                    Inst::CondBr {
2345                        taken: CondBrTarget::Label(label_done),
2346                        not_taken: CondBrTarget::Fallthrough,
2347                        kind: IntegerCompare {
2348                            kind: IntCC::NotEqual,
2349                            rs1: zero_reg(),
2350                            rs2: spilltmp_reg2(),
2351                        },
2352                    }
2353                    .emit(sink, emit_info, state);
2354                    Inst::AluRRImm12 {
2355                        alu_op: AluOPRRI::Addi,
2356                        rd: sum,
2357                        rs: sum.to_reg(),
2358                        imm12: Imm12::ONE,
2359                    }
2360                    .emit(sink, emit_info, state);
2361                }
2362                // set step and tmp.
2363                {
2364                    Inst::AluRRImm12 {
2365                        alu_op: AluOPRRI::Addi,
2366                        rd: step,
2367                        rs: step.to_reg(),
2368                        imm12: Imm12::from_i16(-1),
2369                    }
2370                    .emit(sink, emit_info, state);
2371                    Inst::AluRRImm12 {
2372                        alu_op: if leading {
2373                            AluOPRRI::Srli
2374                        } else {
2375                            AluOPRRI::Slli
2376                        },
2377                        rd: tmp,
2378                        rs: tmp.to_reg(),
2379                        imm12: Imm12::ONE,
2380                    }
2381                    .emit(sink, emit_info, state);
2382                    Inst::gen_jump(label_loop).emit(sink, emit_info, state);
2383                }
2384                sink.bind_label(label_done, &mut state.ctrl_plane);
2385            }
2386            &Inst::Brev8 {
2387                rs,
2388                ty,
2389                step,
2390                tmp,
2391                tmp2,
2392                rd,
2393            } => {
2394                Inst::gen_move(rd, zero_reg(), I64).emit(sink, emit_info, state);
2395                Inst::load_imm12(step, Imm12::from_i16(ty.bits() as i16))
2396                    .emit(sink, emit_info, state);
2397                //
2398                Inst::load_imm12(tmp, Imm12::ONE).emit(sink, emit_info, state);
2399                Inst::AluRRImm12 {
2400                    alu_op: AluOPRRI::Slli,
2401                    rd: tmp,
2402                    rs: tmp.to_reg(),
2403                    imm12: Imm12::from_i16((ty.bits() - 1) as i16),
2404                }
2405                .emit(sink, emit_info, state);
2406                Inst::load_imm12(tmp2, Imm12::ONE).emit(sink, emit_info, state);
2407                Inst::AluRRImm12 {
2408                    alu_op: AluOPRRI::Slli,
2409                    rd: tmp2,
2410                    rs: tmp2.to_reg(),
2411                    imm12: Imm12::from_i16((ty.bits() - 8) as i16),
2412                }
2413                .emit(sink, emit_info, state);
2414
2415                let label_done = sink.get_label();
2416                let label_loop = sink.get_label();
2417                sink.bind_label(label_loop, &mut state.ctrl_plane);
2418                Inst::CondBr {
2419                    taken: CondBrTarget::Label(label_done),
2420                    not_taken: CondBrTarget::Fallthrough,
2421                    kind: IntegerCompare {
2422                        kind: IntCC::SignedLessThanOrEqual,
2423                        rs1: step.to_reg(),
2424                        rs2: zero_reg(),
2425                    },
2426                }
2427                .emit(sink, emit_info, state);
2428                // test and set bit.
2429                {
2430                    Inst::AluRRR {
2431                        alu_op: AluOPRRR::And,
2432                        rd: writable_spilltmp_reg2(),
2433                        rs1: tmp.to_reg(),
2434                        rs2: rs,
2435                    }
2436                    .emit(sink, emit_info, state);
2437                    let label_over = sink.get_label();
2438                    Inst::CondBr {
2439                        taken: CondBrTarget::Label(label_over),
2440                        not_taken: CondBrTarget::Fallthrough,
2441                        kind: IntegerCompare {
2442                            kind: IntCC::Equal,
2443                            rs1: zero_reg(),
2444                            rs2: spilltmp_reg2(),
2445                        },
2446                    }
2447                    .emit(sink, emit_info, state);
2448                    Inst::AluRRR {
2449                        alu_op: AluOPRRR::Or,
2450                        rd,
2451                        rs1: rd.to_reg(),
2452                        rs2: tmp2.to_reg(),
2453                    }
2454                    .emit(sink, emit_info, state);
2455                    sink.bind_label(label_over, &mut state.ctrl_plane);
2456                }
2457                // set step and tmp.
2458                {
2459                    Inst::AluRRImm12 {
2460                        alu_op: AluOPRRI::Addi,
2461                        rd: step,
2462                        rs: step.to_reg(),
2463                        imm12: Imm12::from_i16(-1),
2464                    }
2465                    .emit(sink, emit_info, state);
2466                    Inst::AluRRImm12 {
2467                        alu_op: AluOPRRI::Srli,
2468                        rd: tmp,
2469                        rs: tmp.to_reg(),
2470                        imm12: Imm12::ONE,
2471                    }
2472                    .emit(sink, emit_info, state);
2473                    {
2474                        // reset tmp2
2475                        // if (step %=8 == 0) then tmp2 = tmp2 >> 15
2476                        // if (step %=8 != 0) then tmp2 = tmp2 << 1
2477                        let label_over = sink.get_label();
2478                        let label_sll_1 = sink.get_label();
2479                        Inst::load_imm12(writable_spilltmp_reg2(), Imm12::from_i16(8))
2480                            .emit(sink, emit_info, state);
2481                        Inst::AluRRR {
2482                            alu_op: AluOPRRR::Rem,
2483                            rd: writable_spilltmp_reg2(),
2484                            rs1: step.to_reg(),
2485                            rs2: spilltmp_reg2(),
2486                        }
2487                        .emit(sink, emit_info, state);
2488                        Inst::CondBr {
2489                            taken: CondBrTarget::Label(label_sll_1),
2490                            not_taken: CondBrTarget::Fallthrough,
2491                            kind: IntegerCompare {
2492                                kind: IntCC::NotEqual,
2493                                rs1: spilltmp_reg2(),
2494                                rs2: zero_reg(),
2495                            },
2496                        }
2497                        .emit(sink, emit_info, state);
2498                        Inst::AluRRImm12 {
2499                            alu_op: AluOPRRI::Srli,
2500                            rd: tmp2,
2501                            rs: tmp2.to_reg(),
2502                            imm12: Imm12::from_i16(15),
2503                        }
2504                        .emit(sink, emit_info, state);
2505                        Inst::gen_jump(label_over).emit(sink, emit_info, state);
2506                        sink.bind_label(label_sll_1, &mut state.ctrl_plane);
2507                        Inst::AluRRImm12 {
2508                            alu_op: AluOPRRI::Slli,
2509                            rd: tmp2,
2510                            rs: tmp2.to_reg(),
2511                            imm12: Imm12::ONE,
2512                        }
2513                        .emit(sink, emit_info, state);
2514                        sink.bind_label(label_over, &mut state.ctrl_plane);
2515                    }
2516                    Inst::gen_jump(label_loop).emit(sink, emit_info, state);
2517                }
2518                sink.bind_label(label_done, &mut state.ctrl_plane);
2519            }
2520            &Inst::StackProbeLoop {
2521                guard_size,
2522                probe_count,
2523                tmp: guard_size_tmp,
2524            } => {
2525                let step = writable_spilltmp_reg();
2526                Inst::load_constant_u64(step, (guard_size as u64) * (probe_count as u64))
2527                    .iter()
2528                    .for_each(|i| i.emit(sink, emit_info, state));
2529                Inst::load_constant_u64(guard_size_tmp, guard_size as u64)
2530                    .iter()
2531                    .for_each(|i| i.emit(sink, emit_info, state));
2532
2533                let loop_start = sink.get_label();
2534                let label_done = sink.get_label();
2535                sink.bind_label(loop_start, &mut state.ctrl_plane);
2536                Inst::CondBr {
2537                    taken: CondBrTarget::Label(label_done),
2538                    not_taken: CondBrTarget::Fallthrough,
2539                    kind: IntegerCompare {
2540                        kind: IntCC::UnsignedLessThanOrEqual,
2541                        rs1: step.to_reg(),
2542                        rs2: guard_size_tmp.to_reg(),
2543                    },
2544                }
2545                .emit(sink, emit_info, state);
2546                // compute address.
2547                Inst::AluRRR {
2548                    alu_op: AluOPRRR::Sub,
2549                    rd: writable_spilltmp_reg2(),
2550                    rs1: stack_reg(),
2551                    rs2: step.to_reg(),
2552                }
2553                .emit(sink, emit_info, state);
2554                Inst::Store {
2555                    to: AMode::RegOffset(spilltmp_reg2(), 0),
2556                    op: StoreOP::Sb,
2557                    flags: MemFlagsData::new(),
2558                    src: zero_reg(),
2559                }
2560                .emit(sink, emit_info, state);
2561                // reset step.
2562                Inst::AluRRR {
2563                    alu_op: AluOPRRR::Sub,
2564                    rd: step,
2565                    rs1: step.to_reg(),
2566                    rs2: guard_size_tmp.to_reg(),
2567                }
2568                .emit(sink, emit_info, state);
2569                Inst::gen_jump(loop_start).emit(sink, emit_info, state);
2570                sink.bind_label(label_done, &mut state.ctrl_plane);
2571            }
2572            &Inst::VecAluRRRImm5 {
2573                op,
2574                vd,
2575                vd_src,
2576                imm,
2577                vs2,
2578                ref mask,
2579                ..
2580            } => {
2581                debug_assert_eq!(vd.to_reg(), vd_src);
2582
2583                sink.put4(encode_valu_rrr_imm(op, vd, imm, vs2, *mask));
2584            }
2585            &Inst::VecAluRRRR {
2586                op,
2587                vd,
2588                vd_src,
2589                vs1,
2590                vs2,
2591                ref mask,
2592                ..
2593            } => {
2594                debug_assert_eq!(vd.to_reg(), vd_src);
2595
2596                sink.put4(encode_valu_rrrr(op, vd, vs2, vs1, *mask));
2597            }
2598            &Inst::VecAluRRR {
2599                op,
2600                vd,
2601                vs1,
2602                vs2,
2603                ref mask,
2604                ..
2605            } => {
2606                sink.put4(encode_valu(op, vd, vs1, vs2, *mask));
2607            }
2608            &Inst::VecAluRRImm5 {
2609                op,
2610                vd,
2611                imm,
2612                vs2,
2613                ref mask,
2614                ..
2615            } => {
2616                sink.put4(encode_valu_rr_imm(op, vd, imm, vs2, *mask));
2617            }
2618            &Inst::VecAluRR {
2619                op,
2620                vd,
2621                vs,
2622                ref mask,
2623                ..
2624            } => {
2625                sink.put4(encode_valu_rr(op, vd, vs, *mask));
2626            }
2627            &Inst::VecAluRImm5 {
2628                op,
2629                vd,
2630                imm,
2631                ref mask,
2632                ..
2633            } => {
2634                sink.put4(encode_valu_r_imm(op, vd, imm, *mask));
2635            }
2636            &Inst::VecSetState { rd, ref vstate } => {
2637                sink.put4(encode_vcfg_imm(
2638                    0x57,
2639                    rd.to_reg(),
2640                    vstate.avl.unwrap_static(),
2641                    &vstate.vtype,
2642                ));
2643
2644                // Update the current vector emit state.
2645                state.vstate = EmitVState::Known(*vstate);
2646            }
2647
2648            &Inst::VecLoad {
2649                eew,
2650                to,
2651                ref from,
2652                ref mask,
2653                flags,
2654                ..
2655            } => {
2656                // Vector Loads don't support immediate offsets, so we need to load it into a register.
2657                let addr = match from {
2658                    VecAMode::UnitStride { base } => {
2659                        let base_reg = base.get_base_register();
2660                        let offset = base.get_offset_with_state(state);
2661
2662                        // Reg+0 Offset can be directly encoded
2663                        if let (Some(base_reg), 0) = (base_reg, offset) {
2664                            base_reg
2665                        } else {
2666                            // Otherwise load the address it into a reg and load from it.
2667                            let tmp = writable_spilltmp_reg();
2668                            Inst::LoadAddr {
2669                                rd: tmp,
2670                                mem: *base,
2671                            }
2672                            .emit(sink, emit_info, state);
2673                            tmp.to_reg()
2674                        }
2675                    }
2676                };
2677
2678                if let Some(trap_code) = flags.trap_code() {
2679                    // Register the offset at which the actual load instruction starts.
2680                    sink.add_trap(trap_code);
2681                }
2682
2683                sink.put4(encode_vmem_load(
2684                    0x07,
2685                    to.to_reg(),
2686                    eew,
2687                    addr,
2688                    from.lumop(),
2689                    *mask,
2690                    from.mop(),
2691                    from.nf(),
2692                ));
2693            }
2694
2695            &Inst::VecStore {
2696                eew,
2697                ref to,
2698                from,
2699                ref mask,
2700                flags,
2701                ..
2702            } => {
2703                // Vector Stores don't support immediate offsets, so we need to load it into a register.
2704                let addr = match to {
2705                    VecAMode::UnitStride { base } => {
2706                        let base_reg = base.get_base_register();
2707                        let offset = base.get_offset_with_state(state);
2708
2709                        // Reg+0 Offset can be directly encoded
2710                        if let (Some(base_reg), 0) = (base_reg, offset) {
2711                            base_reg
2712                        } else {
2713                            // Otherwise load the address it into a reg and load from it.
2714                            let tmp = writable_spilltmp_reg();
2715                            Inst::LoadAddr {
2716                                rd: tmp,
2717                                mem: *base,
2718                            }
2719                            .emit(sink, emit_info, state);
2720                            tmp.to_reg()
2721                        }
2722                    }
2723                };
2724
2725                if let Some(trap_code) = flags.trap_code() {
2726                    // Register the offset at which the actual load instruction starts.
2727                    sink.add_trap(trap_code);
2728                }
2729
2730                sink.put4(encode_vmem_store(
2731                    0x27,
2732                    from,
2733                    eew,
2734                    addr,
2735                    to.sumop(),
2736                    *mask,
2737                    to.mop(),
2738                    to.nf(),
2739                ));
2740            }
2741
2742            Inst::EmitIsland { needed_space } => {
2743                if sink.island_needed(*needed_space) {
2744                    let jump_around_label = sink.get_label();
2745                    Inst::gen_jump(jump_around_label).emit(sink, emit_info, state);
2746                    sink.emit_island(needed_space + 4, &mut state.ctrl_plane);
2747                    sink.bind_label(jump_around_label, &mut state.ctrl_plane);
2748                }
2749            }
2750
2751            Inst::SequencePoint { .. } => {
2752                // Nothing.
2753            }
2754        }
2755    }
2756}
2757
2758fn emit_return_call_common_sequence<T>(
2759    sink: &mut MachBuffer<Inst>,
2760    emit_info: &EmitInfo,
2761    state: &mut EmitState,
2762    info: &ReturnCallInfo<T>,
2763) {
2764    // The return call sequence can potentially emit a lot of instructions (up to 634 bytes!)
2765    // So lets emit an island here if we need it.
2766    //
2767    // It is difficult to calculate exactly how many instructions are going to be emitted, so
2768    // we calculate it by emitting it into a disposable buffer, and then checking how many instructions
2769    // were actually emitted.
2770    let mut buffer = MachBuffer::new();
2771    let mut fake_emit_state = state.clone();
2772
2773    return_call_emit_impl(&mut buffer, emit_info, &mut fake_emit_state, info);
2774
2775    // Finalize the buffer and get the number of bytes emitted.
2776    let buffer = buffer.finish(&Default::default(), &mut Default::default());
2777    let length = buffer.data().len() as u32;
2778
2779    // And now emit the island inline with this instruction.
2780    if sink.island_needed(length) {
2781        let jump_around_label = sink.get_label();
2782        Inst::gen_jump(jump_around_label).emit(sink, emit_info, state);
2783        sink.emit_island(length + 4, &mut state.ctrl_plane);
2784        sink.bind_label(jump_around_label, &mut state.ctrl_plane);
2785    }
2786
2787    // Now that we're done, emit the *actual* return sequence.
2788    return_call_emit_impl(sink, emit_info, state, info);
2789}
2790
2791/// This should not be called directly, Instead prefer to call [emit_return_call_common_sequence].
2792fn return_call_emit_impl<T>(
2793    sink: &mut MachBuffer<Inst>,
2794    emit_info: &EmitInfo,
2795    state: &mut EmitState,
2796    info: &ReturnCallInfo<T>,
2797) {
2798    let sp_to_fp_offset = {
2799        let frame_layout = state.frame_layout();
2800        i64::from(
2801            frame_layout.clobber_size
2802                + frame_layout.fixed_frame_storage_size
2803                + frame_layout.outgoing_args_size,
2804        )
2805    };
2806
2807    let mut clobber_offset = sp_to_fp_offset - 8;
2808    for reg in state.frame_layout().clobbered_callee_saves.clone() {
2809        let rreg = reg.to_reg();
2810        let ty = match rreg.class() {
2811            RegClass::Int => I64,
2812            RegClass::Float => F64,
2813            RegClass::Vector => unimplemented!("Vector Clobber Restores"),
2814        };
2815
2816        Inst::gen_load(
2817            reg.map(Reg::from),
2818            AMode::SPOffset(clobber_offset),
2819            ty,
2820            MemFlagsData::trusted(),
2821        )
2822        .emit(sink, emit_info, state);
2823
2824        clobber_offset -= 8
2825    }
2826
2827    // Restore the link register and frame pointer
2828    let setup_area_size = i64::from(state.frame_layout().setup_area_size);
2829    if setup_area_size > 0 {
2830        Inst::gen_load(
2831            writable_link_reg(),
2832            AMode::SPOffset(sp_to_fp_offset + 8),
2833            I64,
2834            MemFlagsData::trusted(),
2835        )
2836        .emit(sink, emit_info, state);
2837
2838        Inst::gen_load(
2839            writable_fp_reg(),
2840            AMode::SPOffset(sp_to_fp_offset),
2841            I64,
2842            MemFlagsData::trusted(),
2843        )
2844        .emit(sink, emit_info, state);
2845    }
2846
2847    // If we over-allocated the incoming args area in the prologue, resize down to what the callee
2848    // is expecting.
2849    let incoming_args_diff =
2850        i64::from(state.frame_layout().tail_args_size - info.new_stack_arg_size);
2851
2852    // Increment SP all at once
2853    let sp_increment = sp_to_fp_offset + setup_area_size + incoming_args_diff;
2854    if sp_increment > 0 {
2855        for inst in Riscv64MachineDeps::gen_sp_reg_adjust(i32::try_from(sp_increment).unwrap()) {
2856            inst.emit(sink, emit_info, state);
2857        }
2858    }
2859}