cranelift_codegen/isa/riscv64/inst/
mod.rs

1//! This module defines riscv64-specific machine instruction types.
2
3use super::lower::isle::generated_code::{VecAMode, VecElementWidth, VecOpMasking};
4use crate::binemit::{Addend, CodeOffset, Reloc};
5pub use crate::ir::condcodes::IntCC;
6use crate::ir::types::{self, F128, F16, F32, F64, I128, I16, I32, I64, I8, I8X16};
7
8pub use crate::ir::{ExternalName, MemFlags, Type};
9use crate::isa::{CallConv, FunctionAlignment};
10use crate::machinst::*;
11use crate::{settings, CodegenError, CodegenResult};
12
13pub use crate::ir::condcodes::FloatCC;
14
15use alloc::vec::Vec;
16use regalloc2::RegClass;
17use smallvec::{smallvec, SmallVec};
18use std::boxed::Box;
19use std::fmt::Write;
20use std::string::{String, ToString};
21
22pub mod regs;
23pub use self::regs::*;
24pub mod imms;
25pub use self::imms::*;
26pub mod args;
27pub use self::args::*;
28pub mod emit;
29pub use self::emit::*;
30pub mod vector;
31pub use self::vector::*;
32pub mod encode;
33pub use self::encode::*;
34pub mod unwind;
35
36use crate::isa::riscv64::abi::Riscv64MachineDeps;
37
38#[cfg(test)]
39mod emit_tests;
40
41use std::fmt::{Display, Formatter};
42
43pub(crate) type VecU8 = Vec<u8>;
44
45//=============================================================================
46// Instructions (top level): definition
47
48pub use crate::isa::riscv64::lower::isle::generated_code::{
49    AluOPRRI, AluOPRRR, AtomicOP, CsrImmOP, CsrRegOP, FClassResult, FFlagsException, FpuOPRR,
50    FpuOPRRR, FpuOPRRRR, LoadOP, MInst as Inst, StoreOP, CSR, FRM,
51};
52use crate::isa::riscv64::lower::isle::generated_code::{CjOp, MInst, VecAluOpRRImm5, VecAluOpRRR};
53
54/// Additional information for `return_call[_ind]` instructions, left out of
55/// line to lower the size of the `Inst` enum.
56#[derive(Clone, Debug)]
57pub struct ReturnCallInfo<T> {
58    pub dest: T,
59    pub uses: CallArgList,
60    pub new_stack_arg_size: u32,
61}
62
63/// A conditional branch target.
64#[derive(Clone, Copy, Debug, PartialEq, Eq)]
65pub enum CondBrTarget {
66    /// An unresolved reference to a Label, as passed into
67    /// `lower_branch_group()`.
68    Label(MachLabel),
69    /// No jump; fall through to the next instruction.
70    Fallthrough,
71}
72
73impl CondBrTarget {
74    /// Return the target's label, if it is a label-based target.
75    pub(crate) fn as_label(self) -> Option<MachLabel> {
76        match self {
77            CondBrTarget::Label(l) => Some(l),
78            _ => None,
79        }
80    }
81
82    pub(crate) fn is_fallthrouh(&self) -> bool {
83        self == &CondBrTarget::Fallthrough
84    }
85}
86
87impl Display for CondBrTarget {
88    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
89        match self {
90            CondBrTarget::Label(l) => write!(f, "{}", l.to_string()),
91            CondBrTarget::Fallthrough => write!(f, "0"),
92        }
93    }
94}
95
96pub(crate) fn enc_auipc(rd: Writable<Reg>, imm: Imm20) -> u32 {
97    let x = 0b0010111 | reg_to_gpr_num(rd.to_reg()) << 7 | imm.bits() << 12;
98    x
99}
100
101pub(crate) fn enc_jalr(rd: Writable<Reg>, base: Reg, offset: Imm12) -> u32 {
102    let x = 0b1100111
103        | reg_to_gpr_num(rd.to_reg()) << 7
104        | 0b000 << 12
105        | reg_to_gpr_num(base) << 15
106        | offset.bits() << 20;
107    x
108}
109
110/// rd and src must have the same length.
111pub(crate) fn gen_moves(rd: &[Writable<Reg>], src: &[Reg]) -> SmallInstVec<Inst> {
112    assert!(rd.len() == src.len());
113    assert!(rd.len() > 0);
114    let mut insts = SmallInstVec::new();
115    for (dst, src) in rd.iter().zip(src.iter()) {
116        let ty = Inst::canonical_type_for_rc(dst.to_reg().class());
117        insts.push(Inst::gen_move(*dst, *src, ty));
118    }
119    insts
120}
121
122impl Inst {
123    /// RISC-V can have multiple instruction sizes. 2 bytes for compressed
124    /// instructions, 4 for regular instructions, 6 and 8 byte instructions
125    /// are also being considered.
126    const UNCOMPRESSED_INSTRUCTION_SIZE: i32 = 4;
127
128    #[inline]
129    pub(crate) fn load_imm12(rd: Writable<Reg>, imm: Imm12) -> Inst {
130        Inst::AluRRImm12 {
131            alu_op: AluOPRRI::Addi,
132            rd,
133            rs: zero_reg(),
134            imm12: imm,
135        }
136    }
137
138    /// Immediates can be loaded using lui and addi instructions.
139    fn load_const_imm(rd: Writable<Reg>, value: u64) -> Option<SmallInstVec<Inst>> {
140        Inst::generate_imm(value).map(|(imm20, imm12)| {
141            let mut insts = SmallVec::new();
142
143            let imm20_is_zero = imm20.as_i32() == 0;
144            let imm12_is_zero = imm12.as_i16() == 0;
145
146            let rs = if !imm20_is_zero {
147                insts.push(Inst::Lui { rd, imm: imm20 });
148                rd.to_reg()
149            } else {
150                zero_reg()
151            };
152
153            // We also need to emit the addi if the value is 0, otherwise we just
154            // won't produce any instructions.
155            if !imm12_is_zero || (imm20_is_zero && imm12_is_zero) {
156                insts.push(Inst::AluRRImm12 {
157                    alu_op: AluOPRRI::Addi,
158                    rd,
159                    rs,
160                    imm12,
161                })
162            }
163
164            insts
165        })
166    }
167
168    pub(crate) fn load_constant_u32(rd: Writable<Reg>, value: u64) -> SmallInstVec<Inst> {
169        let insts = Inst::load_const_imm(rd, value);
170        insts.unwrap_or_else(|| {
171            smallvec![Inst::LoadInlineConst {
172                rd,
173                ty: I32,
174                imm: value
175            }]
176        })
177    }
178
179    pub fn load_constant_u64(rd: Writable<Reg>, value: u64) -> SmallInstVec<Inst> {
180        let insts = Inst::load_const_imm(rd, value);
181        insts.unwrap_or_else(|| {
182            smallvec![Inst::LoadInlineConst {
183                rd,
184                ty: I64,
185                imm: value
186            }]
187        })
188    }
189
190    pub(crate) fn construct_auipc_and_jalr(
191        link: Option<Writable<Reg>>,
192        tmp: Writable<Reg>,
193        offset: i64,
194    ) -> [Inst; 2] {
195        Inst::generate_imm(offset as u64)
196            .map(|(imm20, imm12)| {
197                let a = Inst::Auipc {
198                    rd: tmp,
199                    imm: imm20,
200                };
201                let b = Inst::Jalr {
202                    rd: link.unwrap_or(writable_zero_reg()),
203                    base: tmp.to_reg(),
204                    offset: imm12,
205                };
206                [a, b]
207            })
208            .expect("code range is too big.")
209    }
210
211    /// Generic constructor for a load (zero-extending where appropriate).
212    pub fn gen_load(into_reg: Writable<Reg>, mem: AMode, ty: Type, flags: MemFlags) -> Inst {
213        if ty.is_vector() {
214            Inst::VecLoad {
215                eew: VecElementWidth::from_type(ty),
216                to: into_reg,
217                from: VecAMode::UnitStride { base: mem },
218                flags,
219                mask: VecOpMasking::Disabled,
220                vstate: VState::from_type(ty),
221            }
222        } else {
223            Inst::Load {
224                rd: into_reg,
225                op: LoadOP::from_type(ty),
226                from: mem,
227                flags,
228            }
229        }
230    }
231
232    /// Generic constructor for a store.
233    pub fn gen_store(mem: AMode, from_reg: Reg, ty: Type, flags: MemFlags) -> Inst {
234        if ty.is_vector() {
235            Inst::VecStore {
236                eew: VecElementWidth::from_type(ty),
237                to: VecAMode::UnitStride { base: mem },
238                from: from_reg,
239                flags,
240                mask: VecOpMasking::Disabled,
241                vstate: VState::from_type(ty),
242            }
243        } else {
244            Inst::Store {
245                src: from_reg,
246                op: StoreOP::from_type(ty),
247                to: mem,
248                flags,
249            }
250        }
251    }
252}
253
254//=============================================================================
255
256fn vec_mask_operands(mask: &mut VecOpMasking, collector: &mut impl OperandVisitor) {
257    match mask {
258        VecOpMasking::Enabled { reg } => {
259            collector.reg_fixed_use(reg, pv_reg(0).into());
260        }
261        VecOpMasking::Disabled => {}
262    }
263}
264fn vec_mask_late_operands(mask: &mut VecOpMasking, collector: &mut impl OperandVisitor) {
265    match mask {
266        VecOpMasking::Enabled { reg } => {
267            collector.reg_fixed_late_use(reg, pv_reg(0).into());
268        }
269        VecOpMasking::Disabled => {}
270    }
271}
272
273fn riscv64_get_operands(inst: &mut Inst, collector: &mut impl OperandVisitor) {
274    match inst {
275        Inst::Nop0 | Inst::Nop4 => {}
276        Inst::BrTable {
277            index, tmp1, tmp2, ..
278        } => {
279            collector.reg_use(index);
280            collector.reg_early_def(tmp1);
281            collector.reg_early_def(tmp2);
282        }
283        Inst::Auipc { rd, .. } => collector.reg_def(rd),
284        Inst::Lui { rd, .. } => collector.reg_def(rd),
285        Inst::Fli { rd, .. } => collector.reg_def(rd),
286        Inst::LoadInlineConst { rd, .. } => collector.reg_def(rd),
287        Inst::AluRRR { rd, rs1, rs2, .. } => {
288            collector.reg_use(rs1);
289            collector.reg_use(rs2);
290            collector.reg_def(rd);
291        }
292        Inst::FpuRRR { rd, rs1, rs2, .. } => {
293            collector.reg_use(rs1);
294            collector.reg_use(rs2);
295            collector.reg_def(rd);
296        }
297        Inst::AluRRImm12 { rd, rs, .. } => {
298            collector.reg_use(rs);
299            collector.reg_def(rd);
300        }
301        Inst::CsrReg { rd, rs, .. } => {
302            collector.reg_use(rs);
303            collector.reg_def(rd);
304        }
305        Inst::CsrImm { rd, .. } => {
306            collector.reg_def(rd);
307        }
308        Inst::Load { rd, from, .. } => {
309            from.get_operands(collector);
310            collector.reg_def(rd);
311        }
312        Inst::Store { to, src, .. } => {
313            to.get_operands(collector);
314            collector.reg_use(src);
315        }
316
317        Inst::Args { args } => {
318            for ArgPair { vreg, preg } in args {
319                collector.reg_fixed_def(vreg, *preg);
320            }
321        }
322        Inst::Rets { rets } => {
323            for RetPair { vreg, preg } in rets {
324                collector.reg_fixed_use(vreg, *preg);
325            }
326        }
327        Inst::Ret { .. } => {}
328
329        Inst::Extend { rd, rn, .. } => {
330            collector.reg_use(rn);
331            collector.reg_def(rd);
332        }
333        Inst::Call { info, .. } => {
334            let CallInfo { uses, defs, .. } = &mut **info;
335            for CallArgPair { vreg, preg } in uses {
336                collector.reg_fixed_use(vreg, *preg);
337            }
338            for CallRetPair { vreg, preg } in defs {
339                collector.reg_fixed_def(vreg, *preg);
340            }
341            collector.reg_clobbers(info.clobbers);
342        }
343        Inst::CallInd { info } => {
344            let CallInfo {
345                dest, uses, defs, ..
346            } = &mut **info;
347            collector.reg_use(dest);
348            for CallArgPair { vreg, preg } in uses {
349                collector.reg_fixed_use(vreg, *preg);
350            }
351            for CallRetPair { vreg, preg } in defs {
352                collector.reg_fixed_def(vreg, *preg);
353            }
354            collector.reg_clobbers(info.clobbers);
355        }
356        Inst::ReturnCall { info } => {
357            for CallArgPair { vreg, preg } in &mut info.uses {
358                collector.reg_fixed_use(vreg, *preg);
359            }
360        }
361        Inst::ReturnCallInd { info } => {
362            // TODO(https://github.com/bytecodealliance/regalloc2/issues/145):
363            // This shouldn't be a fixed register constraint.
364            collector.reg_fixed_use(&mut info.dest, x_reg(5));
365
366            for CallArgPair { vreg, preg } in &mut info.uses {
367                collector.reg_fixed_use(vreg, *preg);
368            }
369        }
370        Inst::Jal { .. } => {
371            // JAL technically has a rd register, but we currently always
372            // hardcode it to x0.
373        }
374        Inst::CondBr {
375            kind: IntegerCompare { rs1, rs2, .. },
376            ..
377        } => {
378            collector.reg_use(rs1);
379            collector.reg_use(rs2);
380        }
381        Inst::LoadExtName { rd, .. } => {
382            collector.reg_def(rd);
383        }
384        Inst::ElfTlsGetAddr { rd, .. } => {
385            // x10 is a0 which is both the first argument and the first return value.
386            collector.reg_fixed_def(rd, a0());
387            let mut clobbers = Riscv64MachineDeps::get_regs_clobbered_by_call(CallConv::SystemV);
388            clobbers.remove(px_reg(10));
389            collector.reg_clobbers(clobbers);
390        }
391        Inst::LoadAddr { rd, mem } => {
392            mem.get_operands(collector);
393            collector.reg_early_def(rd);
394        }
395
396        Inst::Mov { rd, rm, .. } => {
397            collector.reg_use(rm);
398            collector.reg_def(rd);
399        }
400        Inst::MovFromPReg { rd, rm } => {
401            debug_assert!([px_reg(2), px_reg(8)].contains(rm));
402            collector.reg_def(rd);
403        }
404        Inst::Fence { .. } => {}
405        Inst::EBreak => {}
406        Inst::Udf { .. } => {}
407        Inst::FpuRR { rd, rs, .. } => {
408            collector.reg_use(rs);
409            collector.reg_def(rd);
410        }
411        Inst::FpuRRRR {
412            rd, rs1, rs2, rs3, ..
413        } => {
414            collector.reg_use(rs1);
415            collector.reg_use(rs2);
416            collector.reg_use(rs3);
417            collector.reg_def(rd);
418        }
419
420        Inst::Jalr { rd, base, .. } => {
421            collector.reg_use(base);
422            collector.reg_def(rd);
423        }
424        Inst::Atomic { rd, addr, src, .. } => {
425            collector.reg_use(addr);
426            collector.reg_use(src);
427            collector.reg_def(rd);
428        }
429        Inst::Select {
430            dst,
431            condition: IntegerCompare { rs1, rs2, .. },
432            x,
433            y,
434            ..
435        } => {
436            // Mark the condition registers as late use so that they don't overlap with the destination
437            // register. We may potentially write to the destination register before evaluating the
438            // condition.
439            collector.reg_late_use(rs1);
440            collector.reg_late_use(rs2);
441
442            for reg in x.regs_mut() {
443                collector.reg_use(reg);
444            }
445            for reg in y.regs_mut() {
446                collector.reg_use(reg);
447            }
448
449            // If there's more than one destination register then use
450            // `reg_early_def` to prevent destination registers from overlapping
451            // with any operands. This ensures that the lowering doesn't have to
452            // deal with a situation such as when the input registers need to be
453            // swapped when moved to the destination.
454            //
455            // When there's only one destination register though don't use an
456            // early def because once the register is written no other inputs
457            // are read so it's ok for the destination to overlap the sources.
458            // The condition registers are already marked as late use so they
459            // won't overlap with the destination.
460            match dst.regs_mut() {
461                [reg] => collector.reg_def(reg),
462                regs => {
463                    for d in regs {
464                        collector.reg_early_def(d);
465                    }
466                }
467            }
468        }
469        Inst::AtomicCas {
470            offset,
471            t0,
472            dst,
473            e,
474            addr,
475            v,
476            ..
477        } => {
478            collector.reg_use(offset);
479            collector.reg_use(e);
480            collector.reg_use(addr);
481            collector.reg_use(v);
482            collector.reg_early_def(t0);
483            collector.reg_early_def(dst);
484        }
485
486        Inst::RawData { .. } => {}
487        Inst::AtomicStore { src, p, .. } => {
488            collector.reg_use(src);
489            collector.reg_use(p);
490        }
491        Inst::AtomicLoad { rd, p, .. } => {
492            collector.reg_use(p);
493            collector.reg_def(rd);
494        }
495        Inst::AtomicRmwLoop {
496            offset,
497            dst,
498            p,
499            x,
500            t0,
501            ..
502        } => {
503            collector.reg_use(offset);
504            collector.reg_use(p);
505            collector.reg_use(x);
506            collector.reg_early_def(t0);
507            collector.reg_early_def(dst);
508        }
509        Inst::TrapIf { rs1, rs2, .. } => {
510            collector.reg_use(rs1);
511            collector.reg_use(rs2);
512        }
513        Inst::Unwind { .. } => {}
514        Inst::DummyUse { reg } => {
515            collector.reg_use(reg);
516        }
517        Inst::Popcnt {
518            sum, step, rs, tmp, ..
519        } => {
520            collector.reg_use(rs);
521            collector.reg_early_def(tmp);
522            collector.reg_early_def(step);
523            collector.reg_early_def(sum);
524        }
525        Inst::Cltz {
526            sum, step, tmp, rs, ..
527        } => {
528            collector.reg_use(rs);
529            collector.reg_early_def(tmp);
530            collector.reg_early_def(step);
531            collector.reg_early_def(sum);
532        }
533        Inst::Brev8 {
534            rs,
535            rd,
536            step,
537            tmp,
538            tmp2,
539            ..
540        } => {
541            collector.reg_use(rs);
542            collector.reg_early_def(step);
543            collector.reg_early_def(tmp);
544            collector.reg_early_def(tmp2);
545            collector.reg_early_def(rd);
546        }
547        Inst::StackProbeLoop { .. } => {
548            // StackProbeLoop has a tmp register and StackProbeLoop used at gen_prologue.
549            // t3 will do the job. (t3 is caller-save register and not used directly by compiler like writable_spilltmp_reg)
550            // gen_prologue is called at emit stage.
551            // no need let reg alloc know.
552        }
553        Inst::VecAluRRRR {
554            op,
555            vd,
556            vd_src,
557            vs1,
558            vs2,
559            mask,
560            ..
561        } => {
562            debug_assert_eq!(vd_src.class(), RegClass::Vector);
563            debug_assert_eq!(vd.to_reg().class(), RegClass::Vector);
564            debug_assert_eq!(vs2.class(), RegClass::Vector);
565            debug_assert_eq!(vs1.class(), op.vs1_regclass());
566
567            collector.reg_late_use(vs1);
568            collector.reg_late_use(vs2);
569            collector.reg_use(vd_src);
570            collector.reg_reuse_def(vd, 2); // `vd` == `vd_src`.
571            vec_mask_late_operands(mask, collector);
572        }
573        Inst::VecAluRRRImm5 {
574            op,
575            vd,
576            vd_src,
577            vs2,
578            mask,
579            ..
580        } => {
581            debug_assert_eq!(vd_src.class(), RegClass::Vector);
582            debug_assert_eq!(vd.to_reg().class(), RegClass::Vector);
583            debug_assert_eq!(vs2.class(), RegClass::Vector);
584
585            // If the operation forbids source/destination overlap we need to
586            // ensure that the source and destination registers are different.
587            if op.forbids_overlaps(mask) {
588                collector.reg_late_use(vs2);
589                collector.reg_use(vd_src);
590                collector.reg_reuse_def(vd, 1); // `vd` == `vd_src`.
591                vec_mask_late_operands(mask, collector);
592            } else {
593                collector.reg_use(vs2);
594                collector.reg_use(vd_src);
595                collector.reg_reuse_def(vd, 1); // `vd` == `vd_src`.
596                vec_mask_operands(mask, collector);
597            }
598        }
599        Inst::VecAluRRR {
600            op,
601            vd,
602            vs1,
603            vs2,
604            mask,
605            ..
606        } => {
607            debug_assert_eq!(vd.to_reg().class(), RegClass::Vector);
608            debug_assert_eq!(vs2.class(), RegClass::Vector);
609            debug_assert_eq!(vs1.class(), op.vs1_regclass());
610
611            collector.reg_use(vs1);
612            collector.reg_use(vs2);
613
614            // If the operation forbids source/destination overlap, then we must
615            // register it as an early_def. This encodes the constraint that
616            // these must not overlap.
617            if op.forbids_overlaps(mask) {
618                collector.reg_early_def(vd);
619            } else {
620                collector.reg_def(vd);
621            }
622
623            vec_mask_operands(mask, collector);
624        }
625        Inst::VecAluRRImm5 {
626            op, vd, vs2, mask, ..
627        } => {
628            debug_assert_eq!(vd.to_reg().class(), RegClass::Vector);
629            debug_assert_eq!(vs2.class(), RegClass::Vector);
630
631            collector.reg_use(vs2);
632
633            // If the operation forbids source/destination overlap, then we must
634            // register it as an early_def. This encodes the constraint that
635            // these must not overlap.
636            if op.forbids_overlaps(mask) {
637                collector.reg_early_def(vd);
638            } else {
639                collector.reg_def(vd);
640            }
641
642            vec_mask_operands(mask, collector);
643        }
644        Inst::VecAluRR {
645            op, vd, vs, mask, ..
646        } => {
647            debug_assert_eq!(vd.to_reg().class(), op.dst_regclass());
648            debug_assert_eq!(vs.class(), op.src_regclass());
649
650            collector.reg_use(vs);
651
652            // If the operation forbids source/destination overlap, then we must
653            // register it as an early_def. This encodes the constraint that
654            // these must not overlap.
655            if op.forbids_overlaps(mask) {
656                collector.reg_early_def(vd);
657            } else {
658                collector.reg_def(vd);
659            }
660
661            vec_mask_operands(mask, collector);
662        }
663        Inst::VecAluRImm5 { op, vd, mask, .. } => {
664            debug_assert_eq!(vd.to_reg().class(), RegClass::Vector);
665            debug_assert!(!op.forbids_overlaps(mask));
666
667            collector.reg_def(vd);
668            vec_mask_operands(mask, collector);
669        }
670        Inst::VecSetState { rd, .. } => {
671            collector.reg_def(rd);
672        }
673        Inst::VecLoad { to, from, mask, .. } => {
674            from.get_operands(collector);
675            collector.reg_def(to);
676            vec_mask_operands(mask, collector);
677        }
678        Inst::VecStore { to, from, mask, .. } => {
679            to.get_operands(collector);
680            collector.reg_use(from);
681            vec_mask_operands(mask, collector);
682        }
683    }
684}
685
686impl MachInst for Inst {
687    type LabelUse = LabelUse;
688    type ABIMachineSpec = Riscv64MachineDeps;
689
690    // https://github.com/riscv/riscv-isa-manual/issues/850
691    // all zero will cause invalid opcode.
692    const TRAP_OPCODE: &'static [u8] = &[0; 4];
693
694    fn gen_dummy_use(reg: Reg) -> Self {
695        Inst::DummyUse { reg }
696    }
697
698    fn canonical_type_for_rc(rc: RegClass) -> Type {
699        match rc {
700            regalloc2::RegClass::Int => I64,
701            regalloc2::RegClass::Float => F64,
702            regalloc2::RegClass::Vector => I8X16,
703        }
704    }
705
706    fn is_safepoint(&self) -> bool {
707        match self {
708            Inst::Call { .. } | Inst::CallInd { .. } => true,
709            _ => false,
710        }
711    }
712
713    fn get_operands(&mut self, collector: &mut impl OperandVisitor) {
714        riscv64_get_operands(self, collector);
715    }
716
717    fn is_move(&self) -> Option<(Writable<Reg>, Reg)> {
718        match self {
719            Inst::Mov { rd, rm, .. } => Some((*rd, *rm)),
720            _ => None,
721        }
722    }
723
724    fn is_included_in_clobbers(&self) -> bool {
725        match self {
726            &Inst::Args { .. } => false,
727            _ => true,
728        }
729    }
730
731    fn is_trap(&self) -> bool {
732        match self {
733            Self::Udf { .. } => true,
734            _ => false,
735        }
736    }
737
738    fn is_args(&self) -> bool {
739        match self {
740            Self::Args { .. } => true,
741            _ => false,
742        }
743    }
744
745    fn is_term(&self) -> MachTerminator {
746        match self {
747            &Inst::Jal { .. } => MachTerminator::Uncond,
748            &Inst::CondBr { .. } => MachTerminator::Cond,
749            &Inst::Jalr { .. } => MachTerminator::Uncond,
750            &Inst::Rets { .. } => MachTerminator::Ret,
751            &Inst::BrTable { .. } => MachTerminator::Indirect,
752            &Inst::ReturnCall { .. } | &Inst::ReturnCallInd { .. } => MachTerminator::RetCall,
753            _ => MachTerminator::None,
754        }
755    }
756
757    fn is_mem_access(&self) -> bool {
758        panic!("TODO FILL ME OUT")
759    }
760
761    fn gen_move(to_reg: Writable<Reg>, from_reg: Reg, ty: Type) -> Inst {
762        let x = Inst::Mov {
763            rd: to_reg,
764            rm: from_reg,
765            ty,
766        };
767        x
768    }
769
770    fn gen_nop(preferred_size: usize) -> Inst {
771        if preferred_size == 0 {
772            return Inst::Nop0;
773        }
774        // We can't give a NOP (or any insn) < 4 bytes.
775        assert!(preferred_size >= 4);
776        Inst::Nop4
777    }
778
779    fn rc_for_type(ty: Type) -> CodegenResult<(&'static [RegClass], &'static [Type])> {
780        match ty {
781            I8 => Ok((&[RegClass::Int], &[I8])),
782            I16 => Ok((&[RegClass::Int], &[I16])),
783            I32 => Ok((&[RegClass::Int], &[I32])),
784            I64 => Ok((&[RegClass::Int], &[I64])),
785            F16 => Ok((&[RegClass::Float], &[F16])),
786            F32 => Ok((&[RegClass::Float], &[F32])),
787            F64 => Ok((&[RegClass::Float], &[F64])),
788            I128 => Ok((&[RegClass::Int, RegClass::Int], &[I64, I64])),
789            _ if ty.is_vector() => {
790                debug_assert!(ty.bits() <= 512);
791
792                // Here we only need to return a SIMD type with the same size as `ty`.
793                // We use these types for spills and reloads, so prefer types with lanes <= 31
794                // since that fits in the immediate field of `vsetivli`.
795                const SIMD_TYPES: [[Type; 1]; 6] = [
796                    [types::I8X2],
797                    [types::I8X4],
798                    [types::I8X8],
799                    [types::I8X16],
800                    [types::I16X16],
801                    [types::I32X16],
802                ];
803                let idx = (ty.bytes().ilog2() - 1) as usize;
804                let ty = &SIMD_TYPES[idx][..];
805
806                Ok((&[RegClass::Vector], ty))
807            }
808            _ => Err(CodegenError::Unsupported(format!(
809                "Unexpected SSA-value type: {ty}"
810            ))),
811        }
812    }
813
814    fn gen_jump(target: MachLabel) -> Inst {
815        Inst::Jal { label: target }
816    }
817
818    fn worst_case_size() -> CodeOffset {
819        // Our worst case size is determined by the riscv64_worst_case_instruction_size test
820        84
821    }
822
823    fn ref_type_regclass(_settings: &settings::Flags) -> RegClass {
824        RegClass::Int
825    }
826
827    fn function_alignment() -> FunctionAlignment {
828        FunctionAlignment {
829            minimum: 2,
830            preferred: 4,
831        }
832    }
833}
834
835//=============================================================================
836// Pretty-printing of instructions.
837pub fn reg_name(reg: Reg) -> String {
838    match reg.to_real_reg() {
839        Some(real) => match real.class() {
840            RegClass::Int => match real.hw_enc() {
841                0 => "zero".into(),
842                1 => "ra".into(),
843                2 => "sp".into(),
844                3 => "gp".into(),
845                4 => "tp".into(),
846                5..=7 => format!("t{}", real.hw_enc() - 5),
847                8 => "fp".into(),
848                9 => "s1".into(),
849                10..=17 => format!("a{}", real.hw_enc() - 10),
850                18..=27 => format!("s{}", real.hw_enc() - 16),
851                28..=31 => format!("t{}", real.hw_enc() - 25),
852                _ => unreachable!(),
853            },
854            RegClass::Float => match real.hw_enc() {
855                0..=7 => format!("ft{}", real.hw_enc() - 0),
856                8..=9 => format!("fs{}", real.hw_enc() - 8),
857                10..=17 => format!("fa{}", real.hw_enc() - 10),
858                18..=27 => format!("fs{}", real.hw_enc() - 16),
859                28..=31 => format!("ft{}", real.hw_enc() - 20),
860                _ => unreachable!(),
861            },
862            RegClass::Vector => format!("v{}", real.hw_enc()),
863        },
864        None => {
865            format!("{reg:?}")
866        }
867    }
868}
869
870impl Inst {
871    fn print_with_state(&self, _state: &mut EmitState) -> String {
872        let format_reg = |reg: Reg| -> String { reg_name(reg) };
873
874        let format_vec_amode = |amode: &VecAMode| -> String {
875            match amode {
876                VecAMode::UnitStride { base } => base.to_string(),
877            }
878        };
879
880        let format_mask = |mask: &VecOpMasking| -> String {
881            match mask {
882                VecOpMasking::Enabled { reg } => format!(",{}.t", format_reg(*reg)),
883                VecOpMasking::Disabled => format!(""),
884            }
885        };
886
887        let format_regs = |regs: &[Reg]| -> String {
888            let mut x = if regs.len() > 1 {
889                String::from("[")
890            } else {
891                String::default()
892            };
893            regs.iter().for_each(|i| {
894                x.push_str(format_reg(*i).as_str());
895                if *i != *regs.last().unwrap() {
896                    x.push_str(",");
897                }
898            });
899            if regs.len() > 1 {
900                x.push_str("]");
901            }
902            x
903        };
904        let format_labels = |labels: &[MachLabel]| -> String {
905            if labels.len() == 0 {
906                return String::from("[_]");
907            }
908            let mut x = String::from("[");
909            labels.iter().for_each(|l| {
910                x.push_str(
911                    format!(
912                        "{:?}{}",
913                        l,
914                        if l != labels.last().unwrap() { "," } else { "" },
915                    )
916                    .as_str(),
917                );
918            });
919            x.push_str("]");
920            x
921        };
922
923        fn format_frm(rounding_mode: FRM) -> String {
924            format!(",{}", rounding_mode.to_static_str())
925        }
926
927        match self {
928            &Inst::Nop0 => {
929                format!("##zero length nop")
930            }
931            &Inst::Nop4 => {
932                format!("##fixed 4-size nop")
933            }
934            &Inst::StackProbeLoop {
935                guard_size,
936                probe_count,
937                tmp,
938            } => {
939                let tmp = format_reg(tmp.to_reg());
940                format!(
941                    "inline_stack_probe##guard_size={guard_size} probe_count={probe_count} tmp={tmp}"
942                )
943            }
944            &Inst::AtomicStore { src, ty, p } => {
945                let src = format_reg(src);
946                let p = format_reg(p);
947                format!("atomic_store.{ty} {src},({p})")
948            }
949            &Inst::DummyUse { reg } => {
950                let reg = format_reg(reg);
951                format!("dummy_use {reg}")
952            }
953
954            &Inst::AtomicLoad { rd, ty, p } => {
955                let p = format_reg(p);
956                let rd = format_reg(rd.to_reg());
957                format!("atomic_load.{ty} {rd},({p})")
958            }
959            &Inst::AtomicRmwLoop {
960                offset,
961                op,
962                dst,
963                ty,
964                p,
965                x,
966                t0,
967            } => {
968                let offset = format_reg(offset);
969                let p = format_reg(p);
970                let x = format_reg(x);
971                let t0 = format_reg(t0.to_reg());
972                let dst = format_reg(dst.to_reg());
973                format!("atomic_rmw.{ty} {op} {dst},{x},({p})##t0={t0} offset={offset}")
974            }
975
976            &Inst::RawData { ref data } => match data.len() {
977                4 => {
978                    let mut bytes = [0; 4];
979                    for i in 0..bytes.len() {
980                        bytes[i] = data[i];
981                    }
982                    format!(".4byte 0x{:x}", u32::from_le_bytes(bytes))
983                }
984                8 => {
985                    let mut bytes = [0; 8];
986                    for i in 0..bytes.len() {
987                        bytes[i] = data[i];
988                    }
989                    format!(".8byte 0x{:x}", u64::from_le_bytes(bytes))
990                }
991                _ => {
992                    format!(".data {data:?}")
993                }
994            },
995            &Inst::Unwind { ref inst } => {
996                format!("unwind {inst:?}")
997            }
998            &Inst::Brev8 {
999                rs,
1000                ty,
1001                step,
1002                tmp,
1003                tmp2,
1004                rd,
1005            } => {
1006                let rs = format_reg(rs);
1007                let step = format_reg(step.to_reg());
1008                let tmp = format_reg(tmp.to_reg());
1009                let tmp2 = format_reg(tmp2.to_reg());
1010                let rd = format_reg(rd.to_reg());
1011                format!("brev8 {rd},{rs}##tmp={tmp} tmp2={tmp2} step={step} ty={ty}")
1012            }
1013            &Inst::Popcnt {
1014                sum,
1015                step,
1016                rs,
1017                tmp,
1018                ty,
1019            } => {
1020                let rs = format_reg(rs);
1021                let tmp = format_reg(tmp.to_reg());
1022                let step = format_reg(step.to_reg());
1023                let sum = format_reg(sum.to_reg());
1024                format!("popcnt {sum},{rs}##ty={ty} tmp={tmp} step={step}")
1025            }
1026            &Inst::Cltz {
1027                sum,
1028                step,
1029                rs,
1030                tmp,
1031                ty,
1032                leading,
1033            } => {
1034                let rs = format_reg(rs);
1035                let tmp = format_reg(tmp.to_reg());
1036                let step = format_reg(step.to_reg());
1037                let sum = format_reg(sum.to_reg());
1038                format!(
1039                    "{} {},{}##ty={} tmp={} step={}",
1040                    if leading { "clz" } else { "ctz" },
1041                    sum,
1042                    rs,
1043                    ty,
1044                    tmp,
1045                    step
1046                )
1047            }
1048            &Inst::AtomicCas {
1049                offset,
1050                t0,
1051                dst,
1052                e,
1053                addr,
1054                v,
1055                ty,
1056            } => {
1057                let offset = format_reg(offset);
1058                let e = format_reg(e);
1059                let addr = format_reg(addr);
1060                let v = format_reg(v);
1061                let t0 = format_reg(t0.to_reg());
1062                let dst = format_reg(dst.to_reg());
1063                format!("atomic_cas.{ty} {dst},{e},{v},({addr})##t0={t0} offset={offset}",)
1064            }
1065            &Inst::BrTable {
1066                index,
1067                tmp1,
1068                tmp2,
1069                ref targets,
1070            } => {
1071                format!(
1072                    "{} {},{}##tmp1={},tmp2={}",
1073                    "br_table",
1074                    format_reg(index),
1075                    format_labels(&targets[..]),
1076                    format_reg(tmp1.to_reg()),
1077                    format_reg(tmp2.to_reg()),
1078                )
1079            }
1080            &Inst::Auipc { rd, imm } => {
1081                format!("{} {},{}", "auipc", format_reg(rd.to_reg()), imm.as_i32(),)
1082            }
1083            &Inst::Jalr { rd, base, offset } => {
1084                let base = format_reg(base);
1085                let rd = format_reg(rd.to_reg());
1086                format!("{} {},{}({})", "jalr", rd, offset.as_i16(), base)
1087            }
1088            &Inst::Lui { rd, ref imm } => {
1089                format!("{} {},{}", "lui", format_reg(rd.to_reg()), imm.as_i32())
1090            }
1091            &Inst::Fli { rd, ty, imm } => {
1092                let rd_s = format_reg(rd.to_reg());
1093                let imm_s = imm.format();
1094                let suffix = match ty {
1095                    F32 => "s",
1096                    F64 => "d",
1097                    _ => unreachable!(),
1098                };
1099
1100                format!("fli.{suffix} {rd_s},{imm_s}")
1101            }
1102            &Inst::LoadInlineConst { rd, imm, .. } => {
1103                let rd = format_reg(rd.to_reg());
1104                let mut buf = String::new();
1105                write!(&mut buf, "auipc {rd},0; ").unwrap();
1106                write!(&mut buf, "ld {rd},12({rd}); ").unwrap();
1107                write!(&mut buf, "j {}; ", Inst::UNCOMPRESSED_INSTRUCTION_SIZE + 8).unwrap();
1108                write!(&mut buf, ".8byte 0x{imm:x}").unwrap();
1109                buf
1110            }
1111            &Inst::AluRRR {
1112                alu_op,
1113                rd,
1114                rs1,
1115                rs2,
1116            } => {
1117                let rs1_s = format_reg(rs1);
1118                let rs2_s = format_reg(rs2);
1119                let rd_s = format_reg(rd.to_reg());
1120                match alu_op {
1121                    AluOPRRR::Adduw if rs2 == zero_reg() => {
1122                        format!("zext.w {rd_s},{rs1_s}")
1123                    }
1124                    _ => {
1125                        format!("{} {},{},{}", alu_op.op_name(), rd_s, rs1_s, rs2_s)
1126                    }
1127                }
1128            }
1129            &Inst::FpuRR {
1130                alu_op,
1131                width,
1132                frm,
1133                rd,
1134                rs,
1135            } => {
1136                let rs = format_reg(rs);
1137                let rd = format_reg(rd.to_reg());
1138                let frm = if alu_op.has_frm() {
1139                    format_frm(frm)
1140                } else {
1141                    String::new()
1142                };
1143                format!("{} {rd},{rs}{frm}", alu_op.op_name(width))
1144            }
1145            &Inst::FpuRRR {
1146                alu_op,
1147                width,
1148                rd,
1149                rs1,
1150                rs2,
1151                frm,
1152            } => {
1153                let rs1 = format_reg(rs1);
1154                let rs2 = format_reg(rs2);
1155                let rd = format_reg(rd.to_reg());
1156                let frm = if alu_op.has_frm() {
1157                    format_frm(frm)
1158                } else {
1159                    String::new()
1160                };
1161
1162                let rs1_is_rs2 = rs1 == rs2;
1163                match alu_op {
1164                    FpuOPRRR::Fsgnj if rs1_is_rs2 => format!("fmv.{width} {rd},{rs1}"),
1165                    FpuOPRRR::Fsgnjn if rs1_is_rs2 => format!("fneg.{width} {rd},{rs1}"),
1166                    FpuOPRRR::Fsgnjx if rs1_is_rs2 => format!("fabs.{width} {rd},{rs1}"),
1167                    _ => format!("{} {rd},{rs1},{rs2}{frm}", alu_op.op_name(width)),
1168                }
1169            }
1170            &Inst::FpuRRRR {
1171                alu_op,
1172                rd,
1173                rs1,
1174                rs2,
1175                rs3,
1176                frm,
1177                width,
1178            } => {
1179                let rs1 = format_reg(rs1);
1180                let rs2 = format_reg(rs2);
1181                let rs3 = format_reg(rs3);
1182                let rd = format_reg(rd.to_reg());
1183                let frm = format_frm(frm);
1184                let op_name = alu_op.op_name(width);
1185                format!("{op_name} {rd},{rs1},{rs2},{rs3}{frm}")
1186            }
1187            &Inst::AluRRImm12 {
1188                alu_op,
1189                rd,
1190                rs,
1191                ref imm12,
1192            } => {
1193                let rs_s = format_reg(rs);
1194                let rd = format_reg(rd.to_reg());
1195
1196                // Some of these special cases are better known as
1197                // their pseudo-instruction version, so prefer printing those.
1198                match (alu_op, rs, imm12) {
1199                    (AluOPRRI::Addi, rs, _) if rs == zero_reg() => {
1200                        return format!("li {},{}", rd, imm12.as_i16());
1201                    }
1202                    (AluOPRRI::Addiw, _, imm12) if imm12.as_i16() == 0 => {
1203                        return format!("sext.w {rd},{rs_s}");
1204                    }
1205                    (AluOPRRI::Xori, _, imm12) if imm12.as_i16() == -1 => {
1206                        return format!("not {rd},{rs_s}");
1207                    }
1208                    (AluOPRRI::SltiU, _, imm12) if imm12.as_i16() == 1 => {
1209                        return format!("seqz {rd},{rs_s}");
1210                    }
1211                    (alu_op, _, _) if alu_op.option_funct12().is_some() => {
1212                        format!("{} {},{}", alu_op.op_name(), rd, rs_s)
1213                    }
1214                    (alu_op, _, imm12) => {
1215                        format!("{} {},{},{}", alu_op.op_name(), rd, rs_s, imm12.as_i16())
1216                    }
1217                }
1218            }
1219            &Inst::CsrReg { op, rd, rs, csr } => {
1220                let rs_s = format_reg(rs);
1221                let rd_s = format_reg(rd.to_reg());
1222
1223                match (op, csr, rd) {
1224                    (CsrRegOP::CsrRW, CSR::Frm, rd) if rd.to_reg() == zero_reg() => {
1225                        format!("fsrm {rs_s}")
1226                    }
1227                    _ => {
1228                        format!("{op} {rd_s},{csr},{rs_s}")
1229                    }
1230                }
1231            }
1232            &Inst::CsrImm { op, rd, csr, imm } => {
1233                let rd_s = format_reg(rd.to_reg());
1234
1235                match (op, csr, rd) {
1236                    (CsrImmOP::CsrRWI, CSR::Frm, rd) if rd.to_reg() != zero_reg() => {
1237                        format!("fsrmi {rd_s},{imm}")
1238                    }
1239                    _ => {
1240                        format!("{op} {rd_s},{csr},{imm}")
1241                    }
1242                }
1243            }
1244            &Inst::Load {
1245                rd,
1246                op,
1247                from,
1248                flags: _flags,
1249            } => {
1250                let base = from.to_string();
1251                let rd = format_reg(rd.to_reg());
1252                format!("{} {},{}", op.op_name(), rd, base,)
1253            }
1254            &Inst::Store {
1255                to,
1256                src,
1257                op,
1258                flags: _flags,
1259            } => {
1260                let base = to.to_string();
1261                let src = format_reg(src);
1262                format!("{} {},{}", op.op_name(), src, base,)
1263            }
1264            &Inst::Args { ref args } => {
1265                let mut s = "args".to_string();
1266                for arg in args {
1267                    let preg = format_reg(arg.preg);
1268                    let def = format_reg(arg.vreg.to_reg());
1269                    write!(&mut s, " {def}={preg}").unwrap();
1270                }
1271                s
1272            }
1273            &Inst::Rets { ref rets } => {
1274                let mut s = "rets".to_string();
1275                for ret in rets {
1276                    let preg = format_reg(ret.preg);
1277                    let vreg = format_reg(ret.vreg);
1278                    write!(&mut s, " {vreg}={preg}").unwrap();
1279                }
1280                s
1281            }
1282            &Inst::Ret {} => "ret".to_string(),
1283
1284            &MInst::Extend {
1285                rd,
1286                rn,
1287                signed,
1288                from_bits,
1289                ..
1290            } => {
1291                let rn = format_reg(rn);
1292                let rd = format_reg(rd.to_reg());
1293                return if signed == false && from_bits == 8 {
1294                    format!("andi {rd},{rn}")
1295                } else {
1296                    let op = if signed { "srai" } else { "srli" };
1297                    let shift_bits = (64 - from_bits) as i16;
1298                    format!("slli {rd},{rn},{shift_bits}; {op} {rd},{rd},{shift_bits}")
1299                };
1300            }
1301            &MInst::Call { ref info } => format!("call {}", info.dest.display(None)),
1302            &MInst::CallInd { ref info } => {
1303                let rd = format_reg(info.dest);
1304                format!("callind {rd}")
1305            }
1306            &MInst::ReturnCall { ref info } => {
1307                let mut s = format!(
1308                    "return_call {:?} new_stack_arg_size:{}",
1309                    info.dest, info.new_stack_arg_size
1310                );
1311                for ret in &info.uses {
1312                    let preg = format_reg(ret.preg);
1313                    let vreg = format_reg(ret.vreg);
1314                    write!(&mut s, " {vreg}={preg}").unwrap();
1315                }
1316                s
1317            }
1318            &MInst::ReturnCallInd { ref info } => {
1319                let callee = format_reg(info.dest);
1320                let mut s = format!(
1321                    "return_call_ind {callee} new_stack_arg_size:{}",
1322                    info.new_stack_arg_size
1323                );
1324                for ret in &info.uses {
1325                    let preg = format_reg(ret.preg);
1326                    let vreg = format_reg(ret.vreg);
1327                    write!(&mut s, " {vreg}={preg}").unwrap();
1328                }
1329                s
1330            }
1331            &MInst::TrapIf {
1332                rs1,
1333                rs2,
1334                cc,
1335                trap_code,
1336            } => {
1337                let rs1 = format_reg(rs1);
1338                let rs2 = format_reg(rs2);
1339                format!("trap_if {trap_code}##({rs1} {cc} {rs2})")
1340            }
1341            &MInst::Jal { label } => {
1342                format!("j {}", label.to_string())
1343            }
1344            &MInst::CondBr {
1345                taken,
1346                not_taken,
1347                kind,
1348                ..
1349            } => {
1350                let rs1 = format_reg(kind.rs1);
1351                let rs2 = format_reg(kind.rs2);
1352                if not_taken.is_fallthrouh() && taken.as_label().is_none() {
1353                    format!("{} {},{},0", kind.op_name(), rs1, rs2)
1354                } else {
1355                    let x = format!(
1356                        "{} {},{},taken({}),not_taken({})",
1357                        kind.op_name(),
1358                        rs1,
1359                        rs2,
1360                        taken,
1361                        not_taken
1362                    );
1363                    x
1364                }
1365            }
1366            &MInst::Atomic {
1367                op,
1368                rd,
1369                addr,
1370                src,
1371                amo,
1372            } => {
1373                let op_name = op.op_name(amo);
1374                let addr = format_reg(addr);
1375                let src = format_reg(src);
1376                let rd = format_reg(rd.to_reg());
1377                if op.is_load() {
1378                    format!("{op_name} {rd},({addr})")
1379                } else {
1380                    format!("{op_name} {rd},{src},({addr})")
1381                }
1382            }
1383            &MInst::LoadExtName {
1384                rd,
1385                ref name,
1386                offset,
1387            } => {
1388                let rd = format_reg(rd.to_reg());
1389                format!("load_sym {},{}{:+}", rd, name.display(None), offset)
1390            }
1391            &Inst::ElfTlsGetAddr { rd, ref name } => {
1392                let rd = format_reg(rd.to_reg());
1393                format!("elf_tls_get_addr {rd},{}", name.display(None))
1394            }
1395            &MInst::LoadAddr { ref rd, ref mem } => {
1396                let rs = mem.to_string();
1397                let rd = format_reg(rd.to_reg());
1398                format!("load_addr {rd},{rs}")
1399            }
1400            &MInst::Mov { rd, rm, ty } => {
1401                let rm = format_reg(rm);
1402                let rd = format_reg(rd.to_reg());
1403
1404                let op = match ty {
1405                    F16 => "fmv.h",
1406                    F32 => "fmv.s",
1407                    F64 => "fmv.d",
1408                    ty if ty.is_vector() => "vmv1r.v",
1409                    _ => "mv",
1410                };
1411
1412                format!("{op} {rd},{rm}")
1413            }
1414            &MInst::MovFromPReg { rd, rm } => {
1415                let rd = format_reg(rd.to_reg());
1416                debug_assert!([px_reg(2), px_reg(8)].contains(&rm));
1417                let rm = reg_name(Reg::from(rm));
1418                format!("mv {rd},{rm}")
1419            }
1420            &MInst::Fence { pred, succ } => {
1421                format!(
1422                    "fence {},{}",
1423                    Inst::fence_req_to_string(pred),
1424                    Inst::fence_req_to_string(succ),
1425                )
1426            }
1427            &MInst::Select {
1428                ref dst,
1429                condition,
1430                ref x,
1431                ref y,
1432            } => {
1433                let c_rs1 = format_reg(condition.rs1);
1434                let c_rs2 = format_reg(condition.rs2);
1435                let x = format_regs(x.regs());
1436                let y = format_regs(y.regs());
1437                let dst = dst.map(|r| r.to_reg());
1438                let dst = format_regs(dst.regs());
1439                format!(
1440                    "select {},{},{}##condition=({} {} {})",
1441                    dst,
1442                    x,
1443                    y,
1444                    c_rs1,
1445                    condition.kind.to_static_str(),
1446                    c_rs2
1447                )
1448            }
1449            &MInst::Udf { trap_code } => format!("udf##trap_code={trap_code}"),
1450            &MInst::EBreak {} => String::from("ebreak"),
1451            &Inst::VecAluRRRR {
1452                op,
1453                vd,
1454                vd_src,
1455                vs1,
1456                vs2,
1457                ref mask,
1458                ref vstate,
1459            } => {
1460                let vs1_s = format_reg(vs1);
1461                let vs2_s = format_reg(vs2);
1462                let vd_src_s = format_reg(vd_src);
1463                let vd_s = format_reg(vd.to_reg());
1464                let mask = format_mask(mask);
1465
1466                let vd_fmt = if vd_s != vd_src_s {
1467                    format!("{vd_s},{vd_src_s}")
1468                } else {
1469                    vd_s
1470                };
1471
1472                // Note: vs2 and vs1 here are opposite to the standard scalar ordering.
1473                // This is noted in Section 10.1 of the RISC-V Vector spec.
1474                format!("{op} {vd_fmt},{vs2_s},{vs1_s}{mask} {vstate}")
1475            }
1476            &Inst::VecAluRRRImm5 {
1477                op,
1478                vd,
1479                imm,
1480                vs2,
1481                ref mask,
1482                ref vstate,
1483                ..
1484            } => {
1485                let vs2_s = format_reg(vs2);
1486                let vd_s = format_reg(vd.to_reg());
1487                let mask = format_mask(mask);
1488
1489                // Some opcodes interpret the immediate as unsigned, lets show the
1490                // correct number here.
1491                let imm_s = if op.imm_is_unsigned() {
1492                    format!("{}", imm.bits())
1493                } else {
1494                    format!("{imm}")
1495                };
1496
1497                format!("{op} {vd_s},{vs2_s},{imm_s}{mask} {vstate}")
1498            }
1499            &Inst::VecAluRRR {
1500                op,
1501                vd,
1502                vs1,
1503                vs2,
1504                ref mask,
1505                ref vstate,
1506            } => {
1507                let vs1_s = format_reg(vs1);
1508                let vs2_s = format_reg(vs2);
1509                let vd_s = format_reg(vd.to_reg());
1510                let mask = format_mask(mask);
1511
1512                // Note: vs2 and vs1 here are opposite to the standard scalar ordering.
1513                // This is noted in Section 10.1 of the RISC-V Vector spec.
1514                match (op, vs2, vs1) {
1515                    (VecAluOpRRR::VrsubVX, _, vs1) if vs1 == zero_reg() => {
1516                        format!("vneg.v {vd_s},{vs2_s}{mask} {vstate}")
1517                    }
1518                    (VecAluOpRRR::VfsgnjnVV, vs2, vs1) if vs2 == vs1 => {
1519                        format!("vfneg.v {vd_s},{vs2_s}{mask} {vstate}")
1520                    }
1521                    (VecAluOpRRR::VfsgnjxVV, vs2, vs1) if vs2 == vs1 => {
1522                        format!("vfabs.v {vd_s},{vs2_s}{mask} {vstate}")
1523                    }
1524                    (VecAluOpRRR::VmnandMM, vs2, vs1) if vs2 == vs1 => {
1525                        format!("vmnot.m {vd_s},{vs2_s}{mask} {vstate}")
1526                    }
1527                    _ => format!("{op} {vd_s},{vs2_s},{vs1_s}{mask} {vstate}"),
1528                }
1529            }
1530            &Inst::VecAluRRImm5 {
1531                op,
1532                vd,
1533                imm,
1534                vs2,
1535                ref mask,
1536                ref vstate,
1537            } => {
1538                let vs2_s = format_reg(vs2);
1539                let vd_s = format_reg(vd.to_reg());
1540                let mask = format_mask(mask);
1541
1542                // Some opcodes interpret the immediate as unsigned, lets show the
1543                // correct number here.
1544                let imm_s = if op.imm_is_unsigned() {
1545                    format!("{}", imm.bits())
1546                } else {
1547                    format!("{imm}")
1548                };
1549
1550                match (op, imm) {
1551                    (VecAluOpRRImm5::VxorVI, imm) if imm == Imm5::maybe_from_i8(-1).unwrap() => {
1552                        format!("vnot.v {vd_s},{vs2_s}{mask} {vstate}")
1553                    }
1554                    _ => format!("{op} {vd_s},{vs2_s},{imm_s}{mask} {vstate}"),
1555                }
1556            }
1557            &Inst::VecAluRR {
1558                op,
1559                vd,
1560                vs,
1561                ref mask,
1562                ref vstate,
1563            } => {
1564                let vs_s = format_reg(vs);
1565                let vd_s = format_reg(vd.to_reg());
1566                let mask = format_mask(mask);
1567
1568                format!("{op} {vd_s},{vs_s}{mask} {vstate}")
1569            }
1570            &Inst::VecAluRImm5 {
1571                op,
1572                vd,
1573                imm,
1574                ref mask,
1575                ref vstate,
1576            } => {
1577                let vd_s = format_reg(vd.to_reg());
1578                let mask = format_mask(mask);
1579
1580                format!("{op} {vd_s},{imm}{mask} {vstate}")
1581            }
1582            &Inst::VecSetState { rd, ref vstate } => {
1583                let rd_s = format_reg(rd.to_reg());
1584                assert!(vstate.avl.is_static());
1585                format!("vsetivli {}, {}, {}", rd_s, vstate.avl, vstate.vtype)
1586            }
1587            Inst::VecLoad {
1588                eew,
1589                to,
1590                from,
1591                mask,
1592                vstate,
1593                ..
1594            } => {
1595                let base = format_vec_amode(from);
1596                let vd = format_reg(to.to_reg());
1597                let mask = format_mask(mask);
1598
1599                format!("vl{eew}.v {vd},{base}{mask} {vstate}")
1600            }
1601            Inst::VecStore {
1602                eew,
1603                to,
1604                from,
1605                mask,
1606                vstate,
1607                ..
1608            } => {
1609                let dst = format_vec_amode(to);
1610                let vs3 = format_reg(*from);
1611                let mask = format_mask(mask);
1612
1613                format!("vs{eew}.v {vs3},{dst}{mask} {vstate}")
1614            }
1615        }
1616    }
1617}
1618
1619/// Different forms of label references for different instruction formats.
1620#[derive(Clone, Copy, Debug, PartialEq, Eq)]
1621pub enum LabelUse {
1622    /// 20-bit branch offset (unconditional branches). PC-rel, offset is
1623    /// imm << 1. Immediate is 20 signed bits. Use in Jal instructions.
1624    Jal20,
1625
1626    /// The unconditional jump instructions all use PC-relative
1627    /// addressing to help support position independent code. The JALR
1628    /// instruction was defined to enable a two-instruction sequence to
1629    /// jump anywhere in a 32-bit absolute address range. A LUI
1630    /// instruction can first load rs1 with the upper 20 bits of a
1631    /// target address, then JALR can add in the lower bits. Similarly,
1632    /// AUIPC then JALR can jump anywhere in a 32-bit pc-relative
1633    /// address range.
1634    PCRel32,
1635
1636    /// All branch instructions use the B-type instruction format. The
1637    /// 12-bit B-immediate encodes signed offsets in multiples of 2, and
1638    /// is added to the current pc to give the target address. The
1639    /// conditional branch range is ±4 KiB.
1640    B12,
1641
1642    /// Equivalent to the `R_RISCV_PCREL_HI20` relocation, Allows setting
1643    /// the immediate field of an `auipc` instruction.
1644    PCRelHi20,
1645
1646    /// Similar to the `R_RISCV_PCREL_LO12_I` relocation but pointing to
1647    /// the final address, instead of the `PCREL_HI20` label. Allows setting
1648    /// the immediate field of I Type instructions such as `addi` or `lw`.
1649    ///
1650    /// Since we currently don't support offsets in labels, this relocation has
1651    /// an implicit offset of 4.
1652    PCRelLo12I,
1653
1654    /// 11-bit PC-relative jump offset. Equivalent to the `RVC_JUMP` relocation
1655    RVCJump,
1656}
1657
1658impl MachInstLabelUse for LabelUse {
1659    /// Alignment for veneer code. Every Riscv64 instruction must be
1660    /// 4-byte-aligned.
1661    const ALIGN: CodeOffset = 4;
1662
1663    /// Maximum PC-relative range (positive), inclusive.
1664    fn max_pos_range(self) -> CodeOffset {
1665        match self {
1666            LabelUse::Jal20 => ((1 << 19) - 1) * 2,
1667            LabelUse::PCRelLo12I | LabelUse::PCRelHi20 | LabelUse::PCRel32 => {
1668                Inst::imm_max() as CodeOffset
1669            }
1670            LabelUse::B12 => ((1 << 11) - 1) * 2,
1671            LabelUse::RVCJump => ((1 << 10) - 1) * 2,
1672        }
1673    }
1674
1675    /// Maximum PC-relative range (negative).
1676    fn max_neg_range(self) -> CodeOffset {
1677        match self {
1678            LabelUse::PCRel32 => Inst::imm_min().abs() as CodeOffset,
1679            _ => self.max_pos_range() + 2,
1680        }
1681    }
1682
1683    /// Size of window into code needed to do the patch.
1684    fn patch_size(self) -> CodeOffset {
1685        match self {
1686            LabelUse::RVCJump => 2,
1687            LabelUse::Jal20 | LabelUse::B12 | LabelUse::PCRelHi20 | LabelUse::PCRelLo12I => 4,
1688            LabelUse::PCRel32 => 8,
1689        }
1690    }
1691
1692    /// Perform the patch.
1693    fn patch(self, buffer: &mut [u8], use_offset: CodeOffset, label_offset: CodeOffset) {
1694        assert!(use_offset % 2 == 0);
1695        assert!(label_offset % 2 == 0);
1696        let offset = (label_offset as i64) - (use_offset as i64);
1697
1698        // re-check range
1699        assert!(
1700            offset >= -(self.max_neg_range() as i64) && offset <= (self.max_pos_range() as i64),
1701            "{self:?} offset '{offset}' use_offset:'{use_offset}' label_offset:'{label_offset}'  must not exceed max range.",
1702        );
1703        self.patch_raw_offset(buffer, offset);
1704    }
1705
1706    /// Is a veneer supported for this label reference type?
1707    fn supports_veneer(self) -> bool {
1708        match self {
1709            Self::Jal20 | Self::B12 | Self::RVCJump => true,
1710            _ => false,
1711        }
1712    }
1713
1714    /// How large is the veneer, if supported?
1715    fn veneer_size(self) -> CodeOffset {
1716        match self {
1717            Self::B12 | Self::Jal20 | Self::RVCJump => 8,
1718            _ => unreachable!(),
1719        }
1720    }
1721
1722    fn worst_case_veneer_size() -> CodeOffset {
1723        8
1724    }
1725
1726    /// Generate a veneer into the buffer, given that this veneer is at `veneer_offset`, and return
1727    /// an offset and label-use for the veneer's use of the original label.
1728    fn generate_veneer(
1729        self,
1730        buffer: &mut [u8],
1731        veneer_offset: CodeOffset,
1732    ) -> (CodeOffset, LabelUse) {
1733        let base = writable_spilltmp_reg();
1734        {
1735            let x = enc_auipc(base, Imm20::ZERO).to_le_bytes();
1736            buffer[0] = x[0];
1737            buffer[1] = x[1];
1738            buffer[2] = x[2];
1739            buffer[3] = x[3];
1740        }
1741        {
1742            let x = enc_jalr(writable_zero_reg(), base.to_reg(), Imm12::ZERO).to_le_bytes();
1743            buffer[4] = x[0];
1744            buffer[5] = x[1];
1745            buffer[6] = x[2];
1746            buffer[7] = x[3];
1747        }
1748        (veneer_offset, Self::PCRel32)
1749    }
1750
1751    fn from_reloc(reloc: Reloc, addend: Addend) -> Option<LabelUse> {
1752        match (reloc, addend) {
1753            (Reloc::RiscvCallPlt, _) => Some(Self::PCRel32),
1754            _ => None,
1755        }
1756    }
1757}
1758
1759impl LabelUse {
1760    #[allow(dead_code)] // in case it's needed in the future
1761    fn offset_in_range(self, offset: i64) -> bool {
1762        let min = -(self.max_neg_range() as i64);
1763        let max = self.max_pos_range() as i64;
1764        offset >= min && offset <= max
1765    }
1766
1767    fn patch_raw_offset(self, buffer: &mut [u8], offset: i64) {
1768        let insn = match self {
1769            LabelUse::RVCJump => u16::from_le_bytes(buffer[..2].try_into().unwrap()) as u32,
1770            _ => u32::from_le_bytes(buffer[..4].try_into().unwrap()),
1771        };
1772
1773        match self {
1774            LabelUse::Jal20 => {
1775                let offset = offset as u32;
1776                let v = ((offset >> 12 & 0b1111_1111) << 12)
1777                    | ((offset >> 11 & 0b1) << 20)
1778                    | ((offset >> 1 & 0b11_1111_1111) << 21)
1779                    | ((offset >> 20 & 0b1) << 31);
1780                buffer[0..4].clone_from_slice(&u32::to_le_bytes(insn | v));
1781            }
1782            LabelUse::PCRel32 => {
1783                let insn2 = u32::from_le_bytes([buffer[4], buffer[5], buffer[6], buffer[7]]);
1784                Inst::generate_imm(offset as u64)
1785                    .map(|(imm20, imm12)| {
1786                        // Encode the OR-ed-in value with zero_reg(). The
1787                        // register parameter must be in the original
1788                        // encoded instruction and or'ing in zeroes does not
1789                        // change it.
1790                        buffer[0..4].clone_from_slice(&u32::to_le_bytes(
1791                            insn | enc_auipc(writable_zero_reg(), imm20),
1792                        ));
1793                        buffer[4..8].clone_from_slice(&u32::to_le_bytes(
1794                            insn2 | enc_jalr(writable_zero_reg(), zero_reg(), imm12),
1795                        ));
1796                    })
1797                    // expect make sure we handled.
1798                    .expect("we have check the range before,this is a compiler error.");
1799            }
1800
1801            LabelUse::B12 => {
1802                let offset = offset as u32;
1803                let v = ((offset >> 11 & 0b1) << 7)
1804                    | ((offset >> 1 & 0b1111) << 8)
1805                    | ((offset >> 5 & 0b11_1111) << 25)
1806                    | ((offset >> 12 & 0b1) << 31);
1807                buffer[0..4].clone_from_slice(&u32::to_le_bytes(insn | v));
1808            }
1809
1810            LabelUse::PCRelHi20 => {
1811                // See https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-elf.adoc#pc-relative-symbol-addresses
1812                //
1813                // We need to add 0x800 to ensure that we land at the next page as soon as it goes out of range for the
1814                // Lo12 relocation. That relocation is signed and has a maximum range of -2048..2047. So when we get an
1815                // offset of 2048, we need to land at the next page and subtract instead.
1816                let offset = offset as u32;
1817                let hi20 = offset.wrapping_add(0x800) >> 12;
1818                let insn = (insn & 0xFFF) | (hi20 << 12);
1819                buffer[0..4].clone_from_slice(&u32::to_le_bytes(insn));
1820            }
1821
1822            LabelUse::PCRelLo12I => {
1823                // `offset` is the offset from the current instruction to the target address.
1824                //
1825                // However we are trying to compute the offset to the target address from the previous instruction.
1826                // The previous instruction should be the one that contains the PCRelHi20 relocation and
1827                // stores/references the program counter (`auipc` usually).
1828                //
1829                // Since we are trying to compute the offset from the previous instruction, we can
1830                // represent it as offset = target_address - (current_instruction_address - 4)
1831                // which is equivalent to offset = target_address - current_instruction_address + 4.
1832                //
1833                // Thus we need to add 4 to the offset here.
1834                let lo12 = (offset + 4) as u32 & 0xFFF;
1835                let insn = (insn & 0xFFFFF) | (lo12 << 20);
1836                buffer[0..4].clone_from_slice(&u32::to_le_bytes(insn));
1837            }
1838            LabelUse::RVCJump => {
1839                debug_assert!(offset & 1 == 0);
1840
1841                // We currently only support this for the C.J operation, so assert that is the opcode in
1842                // the buffer.
1843                debug_assert_eq!(insn & 0xFFFF, 0xA001);
1844
1845                buffer[0..2].clone_from_slice(&u16::to_le_bytes(encode_cj_type(
1846                    CjOp::CJ,
1847                    Imm12::from_i16(i16::try_from(offset).unwrap()),
1848                )));
1849            }
1850        }
1851    }
1852}
1853
1854#[cfg(test)]
1855mod test {
1856    use super::*;
1857    #[test]
1858    fn label_use_max_range() {
1859        assert!(LabelUse::B12.max_neg_range() == LabelUse::B12.max_pos_range() + 2);
1860        assert!(LabelUse::Jal20.max_neg_range() == LabelUse::Jal20.max_pos_range() + 2);
1861        assert!(LabelUse::PCRel32.max_pos_range() == (Inst::imm_max() as CodeOffset));
1862        assert!(LabelUse::PCRel32.max_neg_range() == (Inst::imm_min().abs() as CodeOffset));
1863        assert!(LabelUse::B12.max_pos_range() == ((1 << 11) - 1) * 2);
1864    }
1865}