cranelift_codegen/isa/aarch64/inst/
mod.rs

1//! This module defines aarch64-specific machine instruction types.
2
3use crate::binemit::{Addend, CodeOffset, Reloc};
4use crate::ir::types::{F128, F16, F32, F64, I128, I16, I32, I64, I8, I8X16};
5use crate::ir::{types, MemFlags, Type};
6use crate::isa::{CallConv, FunctionAlignment};
7use crate::machinst::*;
8use crate::{settings, CodegenError, CodegenResult};
9
10use crate::machinst::{PrettyPrint, Reg, RegClass, Writable};
11
12use alloc::vec::Vec;
13use core::slice;
14use smallvec::{smallvec, SmallVec};
15use std::fmt::Write;
16use std::string::{String, ToString};
17
18pub(crate) mod regs;
19pub(crate) use self::regs::*;
20pub mod imms;
21pub use self::imms::*;
22pub mod args;
23pub use self::args::*;
24pub mod emit;
25pub(crate) use self::emit::*;
26use crate::isa::aarch64::abi::AArch64MachineDeps;
27
28pub(crate) mod unwind;
29
30#[cfg(test)]
31mod emit_tests;
32
33//=============================================================================
34// Instructions (top level): definition
35
36pub use crate::isa::aarch64::lower::isle::generated_code::{
37    ALUOp, ALUOp3, AMode, APIKey, AtomicRMWLoopOp, AtomicRMWOp, BitOp, BranchTargetType, FPUOp1,
38    FPUOp2, FPUOp3, FpuRoundMode, FpuToIntOp, IntToFpuOp, MInst as Inst, MoveWideOp, VecALUModOp,
39    VecALUOp, VecExtendOp, VecLanesOp, VecMisc2, VecPairOp, VecRRLongOp, VecRRNarrowOp,
40    VecRRPairLongOp, VecRRRLongModOp, VecRRRLongOp, VecShiftImmModOp, VecShiftImmOp,
41};
42
43/// A floating-point unit (FPU) operation with two args, a register and an immediate.
44#[derive(Copy, Clone, Debug)]
45pub enum FPUOpRI {
46    /// Unsigned right shift. Rd = Rn << #imm
47    UShr32(FPURightShiftImm),
48    /// Unsigned right shift. Rd = Rn << #imm
49    UShr64(FPURightShiftImm),
50}
51
52/// A floating-point unit (FPU) operation with two args, a register and
53/// an immediate that modifies its dest (so takes that input value as a
54/// separate virtual register).
55#[derive(Copy, Clone, Debug)]
56pub enum FPUOpRIMod {
57    /// Shift left and insert. Rd |= Rn << #imm
58    Sli32(FPULeftShiftImm),
59    /// Shift left and insert. Rd |= Rn << #imm
60    Sli64(FPULeftShiftImm),
61}
62
63impl BitOp {
64    /// Get the assembly mnemonic for this opcode.
65    pub fn op_str(&self) -> &'static str {
66        match self {
67            BitOp::RBit => "rbit",
68            BitOp::Clz => "clz",
69            BitOp::Cls => "cls",
70            BitOp::Rev16 => "rev16",
71            BitOp::Rev32 => "rev32",
72            BitOp::Rev64 => "rev64",
73        }
74    }
75}
76
77/// Additional information for `return_call[_ind]` instructions, left out of
78/// line to lower the size of the `Inst` enum.
79#[derive(Clone, Debug)]
80pub struct ReturnCallInfo<T> {
81    /// Where this call is going to
82    pub dest: T,
83    /// Arguments to the call instruction.
84    pub uses: CallArgList,
85    /// The size of the new stack frame's stack arguments. This is necessary
86    /// for copying the frame over our current frame. It must already be
87    /// allocated on the stack.
88    pub new_stack_arg_size: u32,
89    /// API key to use to restore the return address, if any.
90    pub key: Option<APIKey>,
91}
92
93fn count_zero_half_words(mut value: u64, num_half_words: u8) -> usize {
94    let mut count = 0;
95    for _ in 0..num_half_words {
96        if value & 0xffff == 0 {
97            count += 1;
98        }
99        value >>= 16;
100    }
101
102    count
103}
104
105impl Inst {
106    /// Create an instruction that loads a constant, using one of several options (MOVZ, MOVN,
107    /// logical immediate, or constant pool).
108    pub fn load_constant<F: FnMut(Type) -> Writable<Reg>>(
109        rd: Writable<Reg>,
110        value: u64,
111        alloc_tmp: &mut F,
112    ) -> SmallVec<[Inst; 4]> {
113        // NB: this is duplicated in `lower/isle.rs` and `inst.isle` right now,
114        // if modifications are made here before this is deleted after moving to
115        // ISLE then those locations should be updated as well.
116
117        if let Some(imm) = MoveWideConst::maybe_from_u64(value) {
118            // 16-bit immediate (shifted by 0, 16, 32 or 48 bits) in MOVZ
119            smallvec![Inst::MovWide {
120                op: MoveWideOp::MovZ,
121                rd,
122                imm,
123                size: OperandSize::Size64
124            }]
125        } else if let Some(imm) = MoveWideConst::maybe_from_u64(!value) {
126            // 16-bit immediate (shifted by 0, 16, 32 or 48 bits) in MOVN
127            smallvec![Inst::MovWide {
128                op: MoveWideOp::MovN,
129                rd,
130                imm,
131                size: OperandSize::Size64
132            }]
133        } else if let Some(imml) = ImmLogic::maybe_from_u64(value, I64) {
134            // Weird logical-instruction immediate in ORI using zero register
135            smallvec![Inst::AluRRImmLogic {
136                alu_op: ALUOp::Orr,
137                size: OperandSize::Size64,
138                rd,
139                rn: zero_reg(),
140                imml,
141            }]
142        } else {
143            let mut insts = smallvec![];
144
145            // If the top 32 bits are zero, use 32-bit `mov` operations.
146            let (num_half_words, size, negated) = if value >> 32 == 0 {
147                (2, OperandSize::Size32, (!value << 32) >> 32)
148            } else {
149                (4, OperandSize::Size64, !value)
150            };
151
152            // If the number of 0xffff half words is greater than the number of 0x0000 half words
153            // it is more efficient to use `movn` for the first instruction.
154            let first_is_inverted = count_zero_half_words(negated, num_half_words)
155                > count_zero_half_words(value, num_half_words);
156
157            // Either 0xffff or 0x0000 half words can be skipped, depending on the first
158            // instruction used.
159            let ignored_halfword = if first_is_inverted { 0xffff } else { 0 };
160
161            let halfwords: SmallVec<[_; 4]> = (0..num_half_words)
162                .filter_map(|i| {
163                    let imm16 = (value >> (16 * i)) & 0xffff;
164                    if imm16 == ignored_halfword {
165                        None
166                    } else {
167                        Some((i, imm16))
168                    }
169                })
170                .collect();
171
172            let mut prev_result = None;
173            let last_index = halfwords.last().unwrap().0;
174            for (i, imm16) in halfwords {
175                let shift = i * 16;
176                let rd = if i == last_index { rd } else { alloc_tmp(I16) };
177
178                if let Some(rn) = prev_result {
179                    let imm = MoveWideConst::maybe_with_shift(imm16 as u16, shift).unwrap();
180                    insts.push(Inst::MovK { rd, rn, imm, size });
181                } else {
182                    if first_is_inverted {
183                        let imm =
184                            MoveWideConst::maybe_with_shift(((!imm16) & 0xffff) as u16, shift)
185                                .unwrap();
186                        insts.push(Inst::MovWide {
187                            op: MoveWideOp::MovN,
188                            rd,
189                            imm,
190                            size,
191                        });
192                    } else {
193                        let imm = MoveWideConst::maybe_with_shift(imm16 as u16, shift).unwrap();
194                        insts.push(Inst::MovWide {
195                            op: MoveWideOp::MovZ,
196                            rd,
197                            imm,
198                            size,
199                        });
200                    }
201                }
202
203                prev_result = Some(rd.to_reg());
204            }
205
206            assert!(prev_result.is_some());
207
208            insts
209        }
210    }
211
212    /// Generic constructor for a load (zero-extending where appropriate).
213    pub fn gen_load(into_reg: Writable<Reg>, mem: AMode, ty: Type, flags: MemFlags) -> Inst {
214        match ty {
215            I8 => Inst::ULoad8 {
216                rd: into_reg,
217                mem,
218                flags,
219            },
220            I16 => Inst::ULoad16 {
221                rd: into_reg,
222                mem,
223                flags,
224            },
225            I32 => Inst::ULoad32 {
226                rd: into_reg,
227                mem,
228                flags,
229            },
230            I64 => Inst::ULoad64 {
231                rd: into_reg,
232                mem,
233                flags,
234            },
235            _ => {
236                if ty.is_vector() || ty.is_float() {
237                    let bits = ty_bits(ty);
238                    let rd = into_reg;
239
240                    match bits {
241                        128 => Inst::FpuLoad128 { rd, mem, flags },
242                        64 => Inst::FpuLoad64 { rd, mem, flags },
243                        32 => Inst::FpuLoad32 { rd, mem, flags },
244                        16 => Inst::FpuLoad16 { rd, mem, flags },
245                        _ => unimplemented!("gen_load({})", ty),
246                    }
247                } else {
248                    unimplemented!("gen_load({})", ty);
249                }
250            }
251        }
252    }
253
254    /// Generic constructor for a store.
255    pub fn gen_store(mem: AMode, from_reg: Reg, ty: Type, flags: MemFlags) -> Inst {
256        match ty {
257            I8 => Inst::Store8 {
258                rd: from_reg,
259                mem,
260                flags,
261            },
262            I16 => Inst::Store16 {
263                rd: from_reg,
264                mem,
265                flags,
266            },
267            I32 => Inst::Store32 {
268                rd: from_reg,
269                mem,
270                flags,
271            },
272            I64 => Inst::Store64 {
273                rd: from_reg,
274                mem,
275                flags,
276            },
277            _ => {
278                if ty.is_vector() || ty.is_float() {
279                    let bits = ty_bits(ty);
280                    let rd = from_reg;
281
282                    match bits {
283                        128 => Inst::FpuStore128 { rd, mem, flags },
284                        64 => Inst::FpuStore64 { rd, mem, flags },
285                        32 => Inst::FpuStore32 { rd, mem, flags },
286                        16 => Inst::FpuStore16 { rd, mem, flags },
287                        _ => unimplemented!("gen_store({})", ty),
288                    }
289                } else {
290                    unimplemented!("gen_store({})", ty);
291                }
292            }
293        }
294    }
295
296    /// What type does this load or store instruction access in memory? When
297    /// uimm12 encoding is used, the size of this type is the amount that
298    /// immediate offsets are scaled by.
299    pub fn mem_type(&self) -> Option<Type> {
300        match self {
301            Inst::ULoad8 { .. } => Some(I8),
302            Inst::SLoad8 { .. } => Some(I8),
303            Inst::ULoad16 { .. } => Some(I16),
304            Inst::SLoad16 { .. } => Some(I16),
305            Inst::ULoad32 { .. } => Some(I32),
306            Inst::SLoad32 { .. } => Some(I32),
307            Inst::ULoad64 { .. } => Some(I64),
308            Inst::FpuLoad16 { .. } => Some(F16),
309            Inst::FpuLoad32 { .. } => Some(F32),
310            Inst::FpuLoad64 { .. } => Some(F64),
311            Inst::FpuLoad128 { .. } => Some(I8X16),
312            Inst::Store8 { .. } => Some(I8),
313            Inst::Store16 { .. } => Some(I16),
314            Inst::Store32 { .. } => Some(I32),
315            Inst::Store64 { .. } => Some(I64),
316            Inst::FpuStore16 { .. } => Some(F16),
317            Inst::FpuStore32 { .. } => Some(F32),
318            Inst::FpuStore64 { .. } => Some(F64),
319            Inst::FpuStore128 { .. } => Some(I8X16),
320            _ => None,
321        }
322    }
323}
324
325//=============================================================================
326// Instructions: get_regs
327
328fn memarg_operands(memarg: &mut AMode, collector: &mut impl OperandVisitor) {
329    match memarg {
330        AMode::Unscaled { rn, .. } | AMode::UnsignedOffset { rn, .. } => {
331            collector.reg_use(rn);
332        }
333        AMode::RegReg { rn, rm, .. }
334        | AMode::RegScaled { rn, rm, .. }
335        | AMode::RegScaledExtended { rn, rm, .. }
336        | AMode::RegExtended { rn, rm, .. } => {
337            collector.reg_use(rn);
338            collector.reg_use(rm);
339        }
340        AMode::Label { .. } => {}
341        AMode::SPPreIndexed { .. } | AMode::SPPostIndexed { .. } => {}
342        AMode::FPOffset { .. } | AMode::IncomingArg { .. } => {}
343        AMode::SPOffset { .. } | AMode::SlotOffset { .. } => {}
344        AMode::RegOffset { rn, .. } => {
345            collector.reg_use(rn);
346        }
347        AMode::Const { .. } => {}
348    }
349}
350
351fn pairmemarg_operands(pairmemarg: &mut PairAMode, collector: &mut impl OperandVisitor) {
352    match pairmemarg {
353        PairAMode::SignedOffset { reg, .. } => {
354            collector.reg_use(reg);
355        }
356        PairAMode::SPPreIndexed { .. } | PairAMode::SPPostIndexed { .. } => {}
357    }
358}
359
360fn aarch64_get_operands(inst: &mut Inst, collector: &mut impl OperandVisitor) {
361    match inst {
362        Inst::AluRRR { rd, rn, rm, .. } => {
363            collector.reg_def(rd);
364            collector.reg_use(rn);
365            collector.reg_use(rm);
366        }
367        Inst::AluRRRR { rd, rn, rm, ra, .. } => {
368            collector.reg_def(rd);
369            collector.reg_use(rn);
370            collector.reg_use(rm);
371            collector.reg_use(ra);
372        }
373        Inst::AluRRImm12 { rd, rn, .. } => {
374            collector.reg_def(rd);
375            collector.reg_use(rn);
376        }
377        Inst::AluRRImmLogic { rd, rn, .. } => {
378            collector.reg_def(rd);
379            collector.reg_use(rn);
380        }
381        Inst::AluRRImmShift { rd, rn, .. } => {
382            collector.reg_def(rd);
383            collector.reg_use(rn);
384        }
385        Inst::AluRRRShift { rd, rn, rm, .. } => {
386            collector.reg_def(rd);
387            collector.reg_use(rn);
388            collector.reg_use(rm);
389        }
390        Inst::AluRRRExtend { rd, rn, rm, .. } => {
391            collector.reg_def(rd);
392            collector.reg_use(rn);
393            collector.reg_use(rm);
394        }
395        Inst::BitRR { rd, rn, .. } => {
396            collector.reg_def(rd);
397            collector.reg_use(rn);
398        }
399        Inst::ULoad8 { rd, mem, .. }
400        | Inst::SLoad8 { rd, mem, .. }
401        | Inst::ULoad16 { rd, mem, .. }
402        | Inst::SLoad16 { rd, mem, .. }
403        | Inst::ULoad32 { rd, mem, .. }
404        | Inst::SLoad32 { rd, mem, .. }
405        | Inst::ULoad64 { rd, mem, .. } => {
406            collector.reg_def(rd);
407            memarg_operands(mem, collector);
408        }
409        Inst::Store8 { rd, mem, .. }
410        | Inst::Store16 { rd, mem, .. }
411        | Inst::Store32 { rd, mem, .. }
412        | Inst::Store64 { rd, mem, .. } => {
413            collector.reg_use(rd);
414            memarg_operands(mem, collector);
415        }
416        Inst::StoreP64 { rt, rt2, mem, .. } => {
417            collector.reg_use(rt);
418            collector.reg_use(rt2);
419            pairmemarg_operands(mem, collector);
420        }
421        Inst::LoadP64 { rt, rt2, mem, .. } => {
422            collector.reg_def(rt);
423            collector.reg_def(rt2);
424            pairmemarg_operands(mem, collector);
425        }
426        Inst::Mov { rd, rm, .. } => {
427            collector.reg_def(rd);
428            collector.reg_use(rm);
429        }
430        Inst::MovFromPReg { rd, rm } => {
431            debug_assert!(rd.to_reg().is_virtual());
432            collector.reg_def(rd);
433            collector.reg_fixed_nonallocatable(*rm);
434        }
435        Inst::MovToPReg { rd, rm } => {
436            debug_assert!(rm.is_virtual());
437            collector.reg_fixed_nonallocatable(*rd);
438            collector.reg_use(rm);
439        }
440        Inst::MovK { rd, rn, .. } => {
441            collector.reg_use(rn);
442            collector.reg_reuse_def(rd, 0); // `rn` == `rd`.
443        }
444        Inst::MovWide { rd, .. } => {
445            collector.reg_def(rd);
446        }
447        Inst::CSel { rd, rn, rm, .. } => {
448            collector.reg_def(rd);
449            collector.reg_use(rn);
450            collector.reg_use(rm);
451        }
452        Inst::CSNeg { rd, rn, rm, .. } => {
453            collector.reg_def(rd);
454            collector.reg_use(rn);
455            collector.reg_use(rm);
456        }
457        Inst::CSet { rd, .. } | Inst::CSetm { rd, .. } => {
458            collector.reg_def(rd);
459        }
460        Inst::CCmp { rn, rm, .. } => {
461            collector.reg_use(rn);
462            collector.reg_use(rm);
463        }
464        Inst::CCmpImm { rn, .. } => {
465            collector.reg_use(rn);
466        }
467        Inst::AtomicRMWLoop {
468            op,
469            addr,
470            operand,
471            oldval,
472            scratch1,
473            scratch2,
474            ..
475        } => {
476            collector.reg_fixed_use(addr, xreg(25));
477            collector.reg_fixed_use(operand, xreg(26));
478            collector.reg_fixed_def(oldval, xreg(27));
479            collector.reg_fixed_def(scratch1, xreg(24));
480            if *op != AtomicRMWLoopOp::Xchg {
481                collector.reg_fixed_def(scratch2, xreg(28));
482            }
483        }
484        Inst::AtomicRMW { rs, rt, rn, .. } => {
485            collector.reg_use(rs);
486            collector.reg_def(rt);
487            collector.reg_use(rn);
488        }
489        Inst::AtomicCAS { rd, rs, rt, rn, .. } => {
490            collector.reg_reuse_def(rd, 1); // reuse `rs`.
491            collector.reg_use(rs);
492            collector.reg_use(rt);
493            collector.reg_use(rn);
494        }
495        Inst::AtomicCASLoop {
496            addr,
497            expected,
498            replacement,
499            oldval,
500            scratch,
501            ..
502        } => {
503            collector.reg_fixed_use(addr, xreg(25));
504            collector.reg_fixed_use(expected, xreg(26));
505            collector.reg_fixed_use(replacement, xreg(28));
506            collector.reg_fixed_def(oldval, xreg(27));
507            collector.reg_fixed_def(scratch, xreg(24));
508        }
509        Inst::LoadAcquire { rt, rn, .. } => {
510            collector.reg_use(rn);
511            collector.reg_def(rt);
512        }
513        Inst::StoreRelease { rt, rn, .. } => {
514            collector.reg_use(rn);
515            collector.reg_use(rt);
516        }
517        Inst::Fence {} | Inst::Csdb {} => {}
518        Inst::FpuMove32 { rd, rn } => {
519            collector.reg_def(rd);
520            collector.reg_use(rn);
521        }
522        Inst::FpuMove64 { rd, rn } => {
523            collector.reg_def(rd);
524            collector.reg_use(rn);
525        }
526        Inst::FpuMove128 { rd, rn } => {
527            collector.reg_def(rd);
528            collector.reg_use(rn);
529        }
530        Inst::FpuMoveFromVec { rd, rn, .. } => {
531            collector.reg_def(rd);
532            collector.reg_use(rn);
533        }
534        Inst::FpuExtend { rd, rn, .. } => {
535            collector.reg_def(rd);
536            collector.reg_use(rn);
537        }
538        Inst::FpuRR { rd, rn, .. } => {
539            collector.reg_def(rd);
540            collector.reg_use(rn);
541        }
542        Inst::FpuRRR { rd, rn, rm, .. } => {
543            collector.reg_def(rd);
544            collector.reg_use(rn);
545            collector.reg_use(rm);
546        }
547        Inst::FpuRRI { rd, rn, .. } => {
548            collector.reg_def(rd);
549            collector.reg_use(rn);
550        }
551        Inst::FpuRRIMod { rd, ri, rn, .. } => {
552            collector.reg_reuse_def(rd, 1); // reuse `ri`.
553            collector.reg_use(ri);
554            collector.reg_use(rn);
555        }
556        Inst::FpuRRRR { rd, rn, rm, ra, .. } => {
557            collector.reg_def(rd);
558            collector.reg_use(rn);
559            collector.reg_use(rm);
560            collector.reg_use(ra);
561        }
562        Inst::VecMisc { rd, rn, .. } => {
563            collector.reg_def(rd);
564            collector.reg_use(rn);
565        }
566
567        Inst::VecLanes { rd, rn, .. } => {
568            collector.reg_def(rd);
569            collector.reg_use(rn);
570        }
571        Inst::VecShiftImm { rd, rn, .. } => {
572            collector.reg_def(rd);
573            collector.reg_use(rn);
574        }
575        Inst::VecShiftImmMod { rd, ri, rn, .. } => {
576            collector.reg_reuse_def(rd, 1); // `rd` == `ri`.
577            collector.reg_use(ri);
578            collector.reg_use(rn);
579        }
580        Inst::VecExtract { rd, rn, rm, .. } => {
581            collector.reg_def(rd);
582            collector.reg_use(rn);
583            collector.reg_use(rm);
584        }
585        Inst::VecTbl { rd, rn, rm } => {
586            collector.reg_use(rn);
587            collector.reg_use(rm);
588            collector.reg_def(rd);
589        }
590        Inst::VecTblExt { rd, ri, rn, rm } => {
591            collector.reg_use(rn);
592            collector.reg_use(rm);
593            collector.reg_reuse_def(rd, 3); // `rd` == `ri`.
594            collector.reg_use(ri);
595        }
596
597        Inst::VecTbl2 { rd, rn, rn2, rm } => {
598            // Constrain to v30 / v31 so that we satisfy the "adjacent
599            // registers" constraint without use of pinned vregs in
600            // lowering.
601            collector.reg_fixed_use(rn, vreg(30));
602            collector.reg_fixed_use(rn2, vreg(31));
603            collector.reg_use(rm);
604            collector.reg_def(rd);
605        }
606        Inst::VecTbl2Ext {
607            rd,
608            ri,
609            rn,
610            rn2,
611            rm,
612        } => {
613            // Constrain to v30 / v31 so that we satisfy the "adjacent
614            // registers" constraint without use of pinned vregs in
615            // lowering.
616            collector.reg_fixed_use(rn, vreg(30));
617            collector.reg_fixed_use(rn2, vreg(31));
618            collector.reg_use(rm);
619            collector.reg_reuse_def(rd, 4); // `rd` == `ri`.
620            collector.reg_use(ri);
621        }
622        Inst::VecLoadReplicate { rd, rn, .. } => {
623            collector.reg_def(rd);
624            collector.reg_use(rn);
625        }
626        Inst::VecCSel { rd, rn, rm, .. } => {
627            collector.reg_def(rd);
628            collector.reg_use(rn);
629            collector.reg_use(rm);
630        }
631        Inst::FpuCmp { rn, rm, .. } => {
632            collector.reg_use(rn);
633            collector.reg_use(rm);
634        }
635        Inst::FpuLoad16 { rd, mem, .. } => {
636            collector.reg_def(rd);
637            memarg_operands(mem, collector);
638        }
639        Inst::FpuLoad32 { rd, mem, .. } => {
640            collector.reg_def(rd);
641            memarg_operands(mem, collector);
642        }
643        Inst::FpuLoad64 { rd, mem, .. } => {
644            collector.reg_def(rd);
645            memarg_operands(mem, collector);
646        }
647        Inst::FpuLoad128 { rd, mem, .. } => {
648            collector.reg_def(rd);
649            memarg_operands(mem, collector);
650        }
651        Inst::FpuStore16 { rd, mem, .. } => {
652            collector.reg_use(rd);
653            memarg_operands(mem, collector);
654        }
655        Inst::FpuStore32 { rd, mem, .. } => {
656            collector.reg_use(rd);
657            memarg_operands(mem, collector);
658        }
659        Inst::FpuStore64 { rd, mem, .. } => {
660            collector.reg_use(rd);
661            memarg_operands(mem, collector);
662        }
663        Inst::FpuStore128 { rd, mem, .. } => {
664            collector.reg_use(rd);
665            memarg_operands(mem, collector);
666        }
667        Inst::FpuLoadP64 { rt, rt2, mem, .. } => {
668            collector.reg_def(rt);
669            collector.reg_def(rt2);
670            pairmemarg_operands(mem, collector);
671        }
672        Inst::FpuStoreP64 { rt, rt2, mem, .. } => {
673            collector.reg_use(rt);
674            collector.reg_use(rt2);
675            pairmemarg_operands(mem, collector);
676        }
677        Inst::FpuLoadP128 { rt, rt2, mem, .. } => {
678            collector.reg_def(rt);
679            collector.reg_def(rt2);
680            pairmemarg_operands(mem, collector);
681        }
682        Inst::FpuStoreP128 { rt, rt2, mem, .. } => {
683            collector.reg_use(rt);
684            collector.reg_use(rt2);
685            pairmemarg_operands(mem, collector);
686        }
687        Inst::FpuToInt { rd, rn, .. } => {
688            collector.reg_def(rd);
689            collector.reg_use(rn);
690        }
691        Inst::IntToFpu { rd, rn, .. } => {
692            collector.reg_def(rd);
693            collector.reg_use(rn);
694        }
695        Inst::FpuCSel16 { rd, rn, rm, .. }
696        | Inst::FpuCSel32 { rd, rn, rm, .. }
697        | Inst::FpuCSel64 { rd, rn, rm, .. } => {
698            collector.reg_def(rd);
699            collector.reg_use(rn);
700            collector.reg_use(rm);
701        }
702        Inst::FpuRound { rd, rn, .. } => {
703            collector.reg_def(rd);
704            collector.reg_use(rn);
705        }
706        Inst::MovToFpu { rd, rn, .. } => {
707            collector.reg_def(rd);
708            collector.reg_use(rn);
709        }
710        Inst::FpuMoveFPImm { rd, .. } => {
711            collector.reg_def(rd);
712        }
713        Inst::MovToVec { rd, ri, rn, .. } => {
714            collector.reg_reuse_def(rd, 1); // `rd` == `ri`.
715            collector.reg_use(ri);
716            collector.reg_use(rn);
717        }
718        Inst::MovFromVec { rd, rn, .. } | Inst::MovFromVecSigned { rd, rn, .. } => {
719            collector.reg_def(rd);
720            collector.reg_use(rn);
721        }
722        Inst::VecDup { rd, rn, .. } => {
723            collector.reg_def(rd);
724            collector.reg_use(rn);
725        }
726        Inst::VecDupFromFpu { rd, rn, .. } => {
727            collector.reg_def(rd);
728            collector.reg_use(rn);
729        }
730        Inst::VecDupFPImm { rd, .. } => {
731            collector.reg_def(rd);
732        }
733        Inst::VecDupImm { rd, .. } => {
734            collector.reg_def(rd);
735        }
736        Inst::VecExtend { rd, rn, .. } => {
737            collector.reg_def(rd);
738            collector.reg_use(rn);
739        }
740        Inst::VecMovElement { rd, ri, rn, .. } => {
741            collector.reg_reuse_def(rd, 1); // `rd` == `ri`.
742            collector.reg_use(ri);
743            collector.reg_use(rn);
744        }
745        Inst::VecRRLong { rd, rn, .. } => {
746            collector.reg_def(rd);
747            collector.reg_use(rn);
748        }
749        Inst::VecRRNarrowLow { rd, rn, .. } => {
750            collector.reg_use(rn);
751            collector.reg_def(rd);
752        }
753        Inst::VecRRNarrowHigh { rd, ri, rn, .. } => {
754            collector.reg_use(rn);
755            collector.reg_reuse_def(rd, 2); // `rd` == `ri`.
756            collector.reg_use(ri);
757        }
758        Inst::VecRRPair { rd, rn, .. } => {
759            collector.reg_def(rd);
760            collector.reg_use(rn);
761        }
762        Inst::VecRRRLong { rd, rn, rm, .. } => {
763            collector.reg_def(rd);
764            collector.reg_use(rn);
765            collector.reg_use(rm);
766        }
767        Inst::VecRRRLongMod { rd, ri, rn, rm, .. } => {
768            collector.reg_reuse_def(rd, 1); // `rd` == `ri`.
769            collector.reg_use(ri);
770            collector.reg_use(rn);
771            collector.reg_use(rm);
772        }
773        Inst::VecRRPairLong { rd, rn, .. } => {
774            collector.reg_def(rd);
775            collector.reg_use(rn);
776        }
777        Inst::VecRRR { rd, rn, rm, .. } => {
778            collector.reg_def(rd);
779            collector.reg_use(rn);
780            collector.reg_use(rm);
781        }
782        Inst::VecRRRMod { rd, ri, rn, rm, .. } | Inst::VecFmlaElem { rd, ri, rn, rm, .. } => {
783            collector.reg_reuse_def(rd, 1); // `rd` == `ri`.
784            collector.reg_use(ri);
785            collector.reg_use(rn);
786            collector.reg_use(rm);
787        }
788        Inst::MovToNZCV { rn } => {
789            collector.reg_use(rn);
790        }
791        Inst::MovFromNZCV { rd } => {
792            collector.reg_def(rd);
793        }
794        Inst::Extend { rd, rn, .. } => {
795            collector.reg_def(rd);
796            collector.reg_use(rn);
797        }
798        Inst::Args { args } => {
799            for ArgPair { vreg, preg } in args {
800                collector.reg_fixed_def(vreg, *preg);
801            }
802        }
803        Inst::Rets { rets } => {
804            for RetPair { vreg, preg } in rets {
805                collector.reg_fixed_use(vreg, *preg);
806            }
807        }
808        Inst::Ret { .. } | Inst::AuthenticatedRet { .. } => {}
809        Inst::Jump { .. } => {}
810        Inst::Call { info, .. } => {
811            let CallInfo { uses, defs, .. } = &mut **info;
812            for CallArgPair { vreg, preg } in uses {
813                collector.reg_fixed_use(vreg, *preg);
814            }
815            for CallRetPair { vreg, location } in defs {
816                match location {
817                    RetLocation::Reg(preg, ..) => collector.reg_fixed_def(vreg, *preg),
818                    RetLocation::Stack(..) => collector.any_def(vreg),
819                }
820            }
821            collector.reg_clobbers(info.clobbers);
822        }
823        Inst::CallInd { info, .. } => {
824            let CallInfo {
825                dest, uses, defs, ..
826            } = &mut **info;
827            collector.reg_use(dest);
828            for CallArgPair { vreg, preg } in uses {
829                collector.reg_fixed_use(vreg, *preg);
830            }
831            for CallRetPair { vreg, location } in defs {
832                match location {
833                    RetLocation::Reg(preg, ..) => collector.reg_fixed_def(vreg, *preg),
834                    RetLocation::Stack(..) => collector.any_def(vreg),
835                }
836            }
837            collector.reg_clobbers(info.clobbers);
838        }
839        Inst::ReturnCall { info } => {
840            for CallArgPair { vreg, preg } in &mut info.uses {
841                collector.reg_fixed_use(vreg, *preg);
842            }
843        }
844        Inst::ReturnCallInd { info } => {
845            // TODO(https://github.com/bytecodealliance/regalloc2/issues/145):
846            // This shouldn't be a fixed register constraint, but it's not clear how to pick a
847            // register that won't be clobbered by the callee-save restore code emitted with a
848            // return_call_indirect.
849            collector.reg_fixed_use(&mut info.dest, xreg(1));
850            for CallArgPair { vreg, preg } in &mut info.uses {
851                collector.reg_fixed_use(vreg, *preg);
852            }
853        }
854        Inst::CondBr { kind, .. } => match kind {
855            CondBrKind::Zero(rt, _) | CondBrKind::NotZero(rt, _) => collector.reg_use(rt),
856            CondBrKind::Cond(_) => {}
857        },
858        Inst::TestBitAndBranch { rn, .. } => {
859            collector.reg_use(rn);
860        }
861        Inst::IndirectBr { rn, .. } => {
862            collector.reg_use(rn);
863        }
864        Inst::Nop0 | Inst::Nop4 => {}
865        Inst::Brk => {}
866        Inst::Udf { .. } => {}
867        Inst::TrapIf { kind, .. } => match kind {
868            CondBrKind::Zero(rt, _) | CondBrKind::NotZero(rt, _) => collector.reg_use(rt),
869            CondBrKind::Cond(_) => {}
870        },
871        Inst::Adr { rd, .. } | Inst::Adrp { rd, .. } => {
872            collector.reg_def(rd);
873        }
874        Inst::Word4 { .. } | Inst::Word8 { .. } => {}
875        Inst::JTSequence {
876            ridx, rtmp1, rtmp2, ..
877        } => {
878            collector.reg_use(ridx);
879            collector.reg_early_def(rtmp1);
880            collector.reg_early_def(rtmp2);
881        }
882        Inst::LoadExtName { rd, .. } => {
883            collector.reg_def(rd);
884        }
885        Inst::LoadAddr { rd, mem } => {
886            collector.reg_def(rd);
887            memarg_operands(mem, collector);
888        }
889        Inst::Paci { .. } | Inst::Xpaclri => {
890            // Neither LR nor SP is an allocatable register, so there is no need
891            // to do anything.
892        }
893        Inst::Bti { .. } => {}
894
895        Inst::ElfTlsGetAddr { rd, tmp, .. } => {
896            // TLSDESC has a very neat calling convention. It is required to preserve
897            // all registers except x0 and x30. X30 is non allocatable in cranelift since
898            // its the link register.
899            //
900            // Additionally we need a second register as a temporary register for the
901            // TLSDESC sequence. This register can be any register other than x0 (and x30).
902            collector.reg_fixed_def(rd, regs::xreg(0));
903            collector.reg_early_def(tmp);
904        }
905        Inst::MachOTlsGetAddr { rd, .. } => {
906            collector.reg_fixed_def(rd, regs::xreg(0));
907            let mut clobbers =
908                AArch64MachineDeps::get_regs_clobbered_by_call(CallConv::AppleAarch64, false);
909            clobbers.remove(regs::xreg_preg(0));
910            collector.reg_clobbers(clobbers);
911        }
912        Inst::Unwind { .. } => {}
913        Inst::EmitIsland { .. } => {}
914        Inst::DummyUse { reg } => {
915            collector.reg_use(reg);
916        }
917        Inst::StackProbeLoop { start, end, .. } => {
918            collector.reg_early_def(start);
919            collector.reg_use(end);
920        }
921    }
922}
923
924//=============================================================================
925// Instructions: misc functions and external interface
926
927impl MachInst for Inst {
928    type ABIMachineSpec = AArch64MachineDeps;
929    type LabelUse = LabelUse;
930
931    // "CLIF" in hex, to make the trap recognizable during
932    // debugging.
933    const TRAP_OPCODE: &'static [u8] = &0xc11f_u32.to_le_bytes();
934
935    fn get_operands(&mut self, collector: &mut impl OperandVisitor) {
936        aarch64_get_operands(self, collector);
937    }
938
939    fn is_move(&self) -> Option<(Writable<Reg>, Reg)> {
940        match self {
941            &Inst::Mov {
942                size: OperandSize::Size64,
943                rd,
944                rm,
945            } => Some((rd, rm)),
946            &Inst::FpuMove64 { rd, rn } => Some((rd, rn)),
947            &Inst::FpuMove128 { rd, rn } => Some((rd, rn)),
948            _ => None,
949        }
950    }
951
952    fn is_included_in_clobbers(&self) -> bool {
953        let (caller, callee, is_exception) = match self {
954            Inst::Args { .. } => return false,
955            Inst::Call { info } => (
956                info.caller_conv,
957                info.callee_conv,
958                info.try_call_info.is_some(),
959            ),
960            Inst::CallInd { info } => (
961                info.caller_conv,
962                info.callee_conv,
963                info.try_call_info.is_some(),
964            ),
965            _ => return true,
966        };
967
968        // We exclude call instructions from the clobber-set when they are calls
969        // from caller to callee that both clobber the same register (such as
970        // using the same or similar ABIs). Such calls cannot possibly force any
971        // new registers to be saved in the prologue, because anything that the
972        // callee clobbers, the caller is also allowed to clobber. This both
973        // saves work and enables us to more precisely follow the
974        // half-caller-save, half-callee-save SysV ABI for some vector
975        // registers.
976        //
977        // See the note in [crate::isa::aarch64::abi::is_caller_save_reg] for
978        // more information on this ABI-implementation hack.
979        let caller_clobbers = AArch64MachineDeps::get_regs_clobbered_by_call(caller, is_exception);
980        let callee_clobbers = AArch64MachineDeps::get_regs_clobbered_by_call(callee, is_exception);
981
982        let mut all_clobbers = caller_clobbers;
983        all_clobbers.union_from(callee_clobbers);
984        all_clobbers != caller_clobbers
985    }
986
987    fn is_trap(&self) -> bool {
988        match self {
989            Self::Udf { .. } => true,
990            _ => false,
991        }
992    }
993
994    fn is_args(&self) -> bool {
995        match self {
996            Self::Args { .. } => true,
997            _ => false,
998        }
999    }
1000
1001    fn is_term(&self) -> MachTerminator {
1002        match self {
1003            &Inst::Rets { .. } => MachTerminator::Ret,
1004            &Inst::ReturnCall { .. } | &Inst::ReturnCallInd { .. } => MachTerminator::RetCall,
1005            &Inst::Jump { .. } => MachTerminator::Branch,
1006            &Inst::CondBr { .. } => MachTerminator::Branch,
1007            &Inst::TestBitAndBranch { .. } => MachTerminator::Branch,
1008            &Inst::IndirectBr { .. } => MachTerminator::Branch,
1009            &Inst::JTSequence { .. } => MachTerminator::Branch,
1010            &Inst::Call { ref info } if info.try_call_info.is_some() => MachTerminator::Branch,
1011            &Inst::CallInd { ref info } if info.try_call_info.is_some() => MachTerminator::Branch,
1012            _ => MachTerminator::None,
1013        }
1014    }
1015
1016    fn is_mem_access(&self) -> bool {
1017        match self {
1018            &Inst::ULoad8 { .. }
1019            | &Inst::SLoad8 { .. }
1020            | &Inst::ULoad16 { .. }
1021            | &Inst::SLoad16 { .. }
1022            | &Inst::ULoad32 { .. }
1023            | &Inst::SLoad32 { .. }
1024            | &Inst::ULoad64 { .. }
1025            | &Inst::LoadP64 { .. }
1026            | &Inst::FpuLoad16 { .. }
1027            | &Inst::FpuLoad32 { .. }
1028            | &Inst::FpuLoad64 { .. }
1029            | &Inst::FpuLoad128 { .. }
1030            | &Inst::FpuLoadP64 { .. }
1031            | &Inst::FpuLoadP128 { .. }
1032            | &Inst::Store8 { .. }
1033            | &Inst::Store16 { .. }
1034            | &Inst::Store32 { .. }
1035            | &Inst::Store64 { .. }
1036            | &Inst::StoreP64 { .. }
1037            | &Inst::FpuStore16 { .. }
1038            | &Inst::FpuStore32 { .. }
1039            | &Inst::FpuStore64 { .. }
1040            | &Inst::FpuStore128 { .. } => true,
1041            // TODO: verify this carefully
1042            _ => false,
1043        }
1044    }
1045
1046    fn gen_move(to_reg: Writable<Reg>, from_reg: Reg, ty: Type) -> Inst {
1047        let bits = ty.bits();
1048
1049        assert!(bits <= 128);
1050        assert!(to_reg.to_reg().class() == from_reg.class());
1051        match from_reg.class() {
1052            RegClass::Int => Inst::Mov {
1053                size: OperandSize::Size64,
1054                rd: to_reg,
1055                rm: from_reg,
1056            },
1057            RegClass::Float => {
1058                if bits > 64 {
1059                    Inst::FpuMove128 {
1060                        rd: to_reg,
1061                        rn: from_reg,
1062                    }
1063                } else {
1064                    Inst::FpuMove64 {
1065                        rd: to_reg,
1066                        rn: from_reg,
1067                    }
1068                }
1069            }
1070            RegClass::Vector => unreachable!(),
1071        }
1072    }
1073
1074    fn is_safepoint(&self) -> bool {
1075        match self {
1076            Inst::Call { .. } | Inst::CallInd { .. } => true,
1077            _ => false,
1078        }
1079    }
1080
1081    fn gen_dummy_use(reg: Reg) -> Inst {
1082        Inst::DummyUse { reg }
1083    }
1084
1085    fn gen_nop(preferred_size: usize) -> Inst {
1086        if preferred_size == 0 {
1087            return Inst::Nop0;
1088        }
1089        // We can't give a NOP (or any insn) < 4 bytes.
1090        assert!(preferred_size >= 4);
1091        Inst::Nop4
1092    }
1093
1094    fn rc_for_type(ty: Type) -> CodegenResult<(&'static [RegClass], &'static [Type])> {
1095        match ty {
1096            I8 => Ok((&[RegClass::Int], &[I8])),
1097            I16 => Ok((&[RegClass::Int], &[I16])),
1098            I32 => Ok((&[RegClass::Int], &[I32])),
1099            I64 => Ok((&[RegClass::Int], &[I64])),
1100            F16 => Ok((&[RegClass::Float], &[F16])),
1101            F32 => Ok((&[RegClass::Float], &[F32])),
1102            F64 => Ok((&[RegClass::Float], &[F64])),
1103            F128 => Ok((&[RegClass::Float], &[F128])),
1104            I128 => Ok((&[RegClass::Int, RegClass::Int], &[I64, I64])),
1105            _ if ty.is_vector() && ty.bits() <= 128 => {
1106                let types = &[types::I8X2, types::I8X4, types::I8X8, types::I8X16];
1107                Ok((
1108                    &[RegClass::Float],
1109                    slice::from_ref(&types[ty.bytes().ilog2() as usize - 1]),
1110                ))
1111            }
1112            _ if ty.is_dynamic_vector() => Ok((&[RegClass::Float], &[I8X16])),
1113            _ => Err(CodegenError::Unsupported(format!(
1114                "Unexpected SSA-value type: {ty}"
1115            ))),
1116        }
1117    }
1118
1119    fn canonical_type_for_rc(rc: RegClass) -> Type {
1120        match rc {
1121            RegClass::Float => types::I8X16,
1122            RegClass::Int => types::I64,
1123            RegClass::Vector => unreachable!(),
1124        }
1125    }
1126
1127    fn gen_jump(target: MachLabel) -> Inst {
1128        Inst::Jump {
1129            dest: BranchTarget::Label(target),
1130        }
1131    }
1132
1133    fn worst_case_size() -> CodeOffset {
1134        // The maximum size, in bytes, of any `Inst`'s emitted code. We have at least one case of
1135        // an 8-instruction sequence (saturating int-to-float conversions) with three embedded
1136        // 64-bit f64 constants.
1137        //
1138        // Note that inline jump-tables handle island/pool insertion separately, so we do not need
1139        // to account for them here (otherwise the worst case would be 2^31 * 4, clearly not
1140        // feasible for other reasons).
1141        44
1142    }
1143
1144    fn ref_type_regclass(_: &settings::Flags) -> RegClass {
1145        RegClass::Int
1146    }
1147
1148    fn gen_block_start(
1149        is_indirect_branch_target: bool,
1150        is_forward_edge_cfi_enabled: bool,
1151    ) -> Option<Self> {
1152        if is_indirect_branch_target && is_forward_edge_cfi_enabled {
1153            Some(Inst::Bti {
1154                targets: BranchTargetType::J,
1155            })
1156        } else {
1157            None
1158        }
1159    }
1160
1161    fn function_alignment() -> FunctionAlignment {
1162        // We use 32-byte alignment for performance reasons, but for correctness
1163        // we would only need 4-byte alignment.
1164        FunctionAlignment {
1165            minimum: 4,
1166            preferred: 32,
1167        }
1168    }
1169}
1170
1171//=============================================================================
1172// Pretty-printing of instructions.
1173
1174fn mem_finalize_for_show(mem: &AMode, access_ty: Type, state: &EmitState) -> (String, String) {
1175    let (mem_insts, mem) = mem_finalize(None, mem, access_ty, state);
1176    let mut mem_str = mem_insts
1177        .into_iter()
1178        .map(|inst| inst.print_with_state(&mut EmitState::default()))
1179        .collect::<Vec<_>>()
1180        .join(" ; ");
1181    if !mem_str.is_empty() {
1182        mem_str += " ; ";
1183    }
1184
1185    let mem = mem.pretty_print(access_ty.bytes() as u8);
1186    (mem_str, mem)
1187}
1188
1189fn pretty_print_try_call(info: &TryCallInfo) -> String {
1190    let dests = info
1191        .exception_dests
1192        .iter()
1193        .map(|(tag, label)| format!("{tag:?}: {label:?}"))
1194        .collect::<Vec<_>>()
1195        .join(", ");
1196    format!("; b {:?}; catch [{dests}]", info.continuation)
1197}
1198
1199impl Inst {
1200    fn print_with_state(&self, state: &mut EmitState) -> String {
1201        fn op_name(alu_op: ALUOp) -> &'static str {
1202            match alu_op {
1203                ALUOp::Add => "add",
1204                ALUOp::Sub => "sub",
1205                ALUOp::Orr => "orr",
1206                ALUOp::And => "and",
1207                ALUOp::AndS => "ands",
1208                ALUOp::Eor => "eor",
1209                ALUOp::AddS => "adds",
1210                ALUOp::SubS => "subs",
1211                ALUOp::SMulH => "smulh",
1212                ALUOp::UMulH => "umulh",
1213                ALUOp::SDiv => "sdiv",
1214                ALUOp::UDiv => "udiv",
1215                ALUOp::AndNot => "bic",
1216                ALUOp::OrrNot => "orn",
1217                ALUOp::EorNot => "eon",
1218                ALUOp::Extr => "extr",
1219                ALUOp::Lsr => "lsr",
1220                ALUOp::Asr => "asr",
1221                ALUOp::Lsl => "lsl",
1222                ALUOp::Adc => "adc",
1223                ALUOp::AdcS => "adcs",
1224                ALUOp::Sbc => "sbc",
1225                ALUOp::SbcS => "sbcs",
1226            }
1227        }
1228
1229        match self {
1230            &Inst::Nop0 => "nop-zero-len".to_string(),
1231            &Inst::Nop4 => "nop".to_string(),
1232            &Inst::AluRRR {
1233                alu_op,
1234                size,
1235                rd,
1236                rn,
1237                rm,
1238            } => {
1239                let op = op_name(alu_op);
1240                let rd = pretty_print_ireg(rd.to_reg(), size);
1241                let rn = pretty_print_ireg(rn, size);
1242                let rm = pretty_print_ireg(rm, size);
1243                format!("{op} {rd}, {rn}, {rm}")
1244            }
1245            &Inst::AluRRRR {
1246                alu_op,
1247                size,
1248                rd,
1249                rn,
1250                rm,
1251                ra,
1252            } => {
1253                let (op, da_size) = match alu_op {
1254                    ALUOp3::MAdd => ("madd", size),
1255                    ALUOp3::MSub => ("msub", size),
1256                    ALUOp3::UMAddL => ("umaddl", OperandSize::Size64),
1257                    ALUOp3::SMAddL => ("smaddl", OperandSize::Size64),
1258                };
1259                let rd = pretty_print_ireg(rd.to_reg(), da_size);
1260                let rn = pretty_print_ireg(rn, size);
1261                let rm = pretty_print_ireg(rm, size);
1262                let ra = pretty_print_ireg(ra, da_size);
1263
1264                format!("{op} {rd}, {rn}, {rm}, {ra}")
1265            }
1266            &Inst::AluRRImm12 {
1267                alu_op,
1268                size,
1269                rd,
1270                rn,
1271                ref imm12,
1272            } => {
1273                let op = op_name(alu_op);
1274                let rd = pretty_print_ireg(rd.to_reg(), size);
1275                let rn = pretty_print_ireg(rn, size);
1276
1277                if imm12.bits == 0 && alu_op == ALUOp::Add && size.is64() {
1278                    // special-case MOV (used for moving into SP).
1279                    format!("mov {rd}, {rn}")
1280                } else {
1281                    let imm12 = imm12.pretty_print(0);
1282                    format!("{op} {rd}, {rn}, {imm12}")
1283                }
1284            }
1285            &Inst::AluRRImmLogic {
1286                alu_op,
1287                size,
1288                rd,
1289                rn,
1290                ref imml,
1291            } => {
1292                let op = op_name(alu_op);
1293                let rd = pretty_print_ireg(rd.to_reg(), size);
1294                let rn = pretty_print_ireg(rn, size);
1295                let imml = imml.pretty_print(0);
1296                format!("{op} {rd}, {rn}, {imml}")
1297            }
1298            &Inst::AluRRImmShift {
1299                alu_op,
1300                size,
1301                rd,
1302                rn,
1303                ref immshift,
1304            } => {
1305                let op = op_name(alu_op);
1306                let rd = pretty_print_ireg(rd.to_reg(), size);
1307                let rn = pretty_print_ireg(rn, size);
1308                let immshift = immshift.pretty_print(0);
1309                format!("{op} {rd}, {rn}, {immshift}")
1310            }
1311            &Inst::AluRRRShift {
1312                alu_op,
1313                size,
1314                rd,
1315                rn,
1316                rm,
1317                ref shiftop,
1318            } => {
1319                let op = op_name(alu_op);
1320                let rd = pretty_print_ireg(rd.to_reg(), size);
1321                let rn = pretty_print_ireg(rn, size);
1322                let rm = pretty_print_ireg(rm, size);
1323                let shiftop = shiftop.pretty_print(0);
1324                format!("{op} {rd}, {rn}, {rm}, {shiftop}")
1325            }
1326            &Inst::AluRRRExtend {
1327                alu_op,
1328                size,
1329                rd,
1330                rn,
1331                rm,
1332                ref extendop,
1333            } => {
1334                let op = op_name(alu_op);
1335                let rd = pretty_print_ireg(rd.to_reg(), size);
1336                let rn = pretty_print_ireg(rn, size);
1337                let rm = pretty_print_ireg(rm, size);
1338                let extendop = extendop.pretty_print(0);
1339                format!("{op} {rd}, {rn}, {rm}, {extendop}")
1340            }
1341            &Inst::BitRR { op, size, rd, rn } => {
1342                let op = op.op_str();
1343                let rd = pretty_print_ireg(rd.to_reg(), size);
1344                let rn = pretty_print_ireg(rn, size);
1345                format!("{op} {rd}, {rn}")
1346            }
1347            &Inst::ULoad8 { rd, ref mem, .. }
1348            | &Inst::SLoad8 { rd, ref mem, .. }
1349            | &Inst::ULoad16 { rd, ref mem, .. }
1350            | &Inst::SLoad16 { rd, ref mem, .. }
1351            | &Inst::ULoad32 { rd, ref mem, .. }
1352            | &Inst::SLoad32 { rd, ref mem, .. }
1353            | &Inst::ULoad64 { rd, ref mem, .. } => {
1354                let is_unscaled = match &mem {
1355                    &AMode::Unscaled { .. } => true,
1356                    _ => false,
1357                };
1358                let (op, size) = match (self, is_unscaled) {
1359                    (&Inst::ULoad8 { .. }, false) => ("ldrb", OperandSize::Size32),
1360                    (&Inst::ULoad8 { .. }, true) => ("ldurb", OperandSize::Size32),
1361                    (&Inst::SLoad8 { .. }, false) => ("ldrsb", OperandSize::Size64),
1362                    (&Inst::SLoad8 { .. }, true) => ("ldursb", OperandSize::Size64),
1363                    (&Inst::ULoad16 { .. }, false) => ("ldrh", OperandSize::Size32),
1364                    (&Inst::ULoad16 { .. }, true) => ("ldurh", OperandSize::Size32),
1365                    (&Inst::SLoad16 { .. }, false) => ("ldrsh", OperandSize::Size64),
1366                    (&Inst::SLoad16 { .. }, true) => ("ldursh", OperandSize::Size64),
1367                    (&Inst::ULoad32 { .. }, false) => ("ldr", OperandSize::Size32),
1368                    (&Inst::ULoad32 { .. }, true) => ("ldur", OperandSize::Size32),
1369                    (&Inst::SLoad32 { .. }, false) => ("ldrsw", OperandSize::Size64),
1370                    (&Inst::SLoad32 { .. }, true) => ("ldursw", OperandSize::Size64),
1371                    (&Inst::ULoad64 { .. }, false) => ("ldr", OperandSize::Size64),
1372                    (&Inst::ULoad64 { .. }, true) => ("ldur", OperandSize::Size64),
1373                    _ => unreachable!(),
1374                };
1375
1376                let rd = pretty_print_ireg(rd.to_reg(), size);
1377                let mem = mem.clone();
1378                let access_ty = self.mem_type().unwrap();
1379                let (mem_str, mem) = mem_finalize_for_show(&mem, access_ty, state);
1380
1381                format!("{mem_str}{op} {rd}, {mem}")
1382            }
1383            &Inst::Store8 { rd, ref mem, .. }
1384            | &Inst::Store16 { rd, ref mem, .. }
1385            | &Inst::Store32 { rd, ref mem, .. }
1386            | &Inst::Store64 { rd, ref mem, .. } => {
1387                let is_unscaled = match &mem {
1388                    &AMode::Unscaled { .. } => true,
1389                    _ => false,
1390                };
1391                let (op, size) = match (self, is_unscaled) {
1392                    (&Inst::Store8 { .. }, false) => ("strb", OperandSize::Size32),
1393                    (&Inst::Store8 { .. }, true) => ("sturb", OperandSize::Size32),
1394                    (&Inst::Store16 { .. }, false) => ("strh", OperandSize::Size32),
1395                    (&Inst::Store16 { .. }, true) => ("sturh", OperandSize::Size32),
1396                    (&Inst::Store32 { .. }, false) => ("str", OperandSize::Size32),
1397                    (&Inst::Store32 { .. }, true) => ("stur", OperandSize::Size32),
1398                    (&Inst::Store64 { .. }, false) => ("str", OperandSize::Size64),
1399                    (&Inst::Store64 { .. }, true) => ("stur", OperandSize::Size64),
1400                    _ => unreachable!(),
1401                };
1402
1403                let rd = pretty_print_ireg(rd, size);
1404                let mem = mem.clone();
1405                let access_ty = self.mem_type().unwrap();
1406                let (mem_str, mem) = mem_finalize_for_show(&mem, access_ty, state);
1407
1408                format!("{mem_str}{op} {rd}, {mem}")
1409            }
1410            &Inst::StoreP64 {
1411                rt, rt2, ref mem, ..
1412            } => {
1413                let rt = pretty_print_ireg(rt, OperandSize::Size64);
1414                let rt2 = pretty_print_ireg(rt2, OperandSize::Size64);
1415                let mem = mem.clone();
1416                let mem = mem.pretty_print_default();
1417                format!("stp {rt}, {rt2}, {mem}")
1418            }
1419            &Inst::LoadP64 {
1420                rt, rt2, ref mem, ..
1421            } => {
1422                let rt = pretty_print_ireg(rt.to_reg(), OperandSize::Size64);
1423                let rt2 = pretty_print_ireg(rt2.to_reg(), OperandSize::Size64);
1424                let mem = mem.clone();
1425                let mem = mem.pretty_print_default();
1426                format!("ldp {rt}, {rt2}, {mem}")
1427            }
1428            &Inst::Mov { size, rd, rm } => {
1429                let rd = pretty_print_ireg(rd.to_reg(), size);
1430                let rm = pretty_print_ireg(rm, size);
1431                format!("mov {rd}, {rm}")
1432            }
1433            &Inst::MovFromPReg { rd, rm } => {
1434                let rd = pretty_print_ireg(rd.to_reg(), OperandSize::Size64);
1435                let rm = show_ireg_sized(rm.into(), OperandSize::Size64);
1436                format!("mov {rd}, {rm}")
1437            }
1438            &Inst::MovToPReg { rd, rm } => {
1439                let rd = show_ireg_sized(rd.into(), OperandSize::Size64);
1440                let rm = pretty_print_ireg(rm, OperandSize::Size64);
1441                format!("mov {rd}, {rm}")
1442            }
1443            &Inst::MovWide {
1444                op,
1445                rd,
1446                ref imm,
1447                size,
1448            } => {
1449                let op_str = match op {
1450                    MoveWideOp::MovZ => "movz",
1451                    MoveWideOp::MovN => "movn",
1452                };
1453                let rd = pretty_print_ireg(rd.to_reg(), size);
1454                let imm = imm.pretty_print(0);
1455                format!("{op_str} {rd}, {imm}")
1456            }
1457            &Inst::MovK {
1458                rd,
1459                rn,
1460                ref imm,
1461                size,
1462            } => {
1463                let rn = pretty_print_ireg(rn, size);
1464                let rd = pretty_print_ireg(rd.to_reg(), size);
1465                let imm = imm.pretty_print(0);
1466                format!("movk {rd}, {rn}, {imm}")
1467            }
1468            &Inst::CSel { rd, rn, rm, cond } => {
1469                let rd = pretty_print_ireg(rd.to_reg(), OperandSize::Size64);
1470                let rn = pretty_print_ireg(rn, OperandSize::Size64);
1471                let rm = pretty_print_ireg(rm, OperandSize::Size64);
1472                let cond = cond.pretty_print(0);
1473                format!("csel {rd}, {rn}, {rm}, {cond}")
1474            }
1475            &Inst::CSNeg { rd, rn, rm, cond } => {
1476                let rd = pretty_print_ireg(rd.to_reg(), OperandSize::Size64);
1477                let rn = pretty_print_ireg(rn, OperandSize::Size64);
1478                let rm = pretty_print_ireg(rm, OperandSize::Size64);
1479                let cond = cond.pretty_print(0);
1480                format!("csneg {rd}, {rn}, {rm}, {cond}")
1481            }
1482            &Inst::CSet { rd, cond } => {
1483                let rd = pretty_print_ireg(rd.to_reg(), OperandSize::Size64);
1484                let cond = cond.pretty_print(0);
1485                format!("cset {rd}, {cond}")
1486            }
1487            &Inst::CSetm { rd, cond } => {
1488                let rd = pretty_print_ireg(rd.to_reg(), OperandSize::Size64);
1489                let cond = cond.pretty_print(0);
1490                format!("csetm {rd}, {cond}")
1491            }
1492            &Inst::CCmp {
1493                size,
1494                rn,
1495                rm,
1496                nzcv,
1497                cond,
1498            } => {
1499                let rn = pretty_print_ireg(rn, size);
1500                let rm = pretty_print_ireg(rm, size);
1501                let nzcv = nzcv.pretty_print(0);
1502                let cond = cond.pretty_print(0);
1503                format!("ccmp {rn}, {rm}, {nzcv}, {cond}")
1504            }
1505            &Inst::CCmpImm {
1506                size,
1507                rn,
1508                imm,
1509                nzcv,
1510                cond,
1511            } => {
1512                let rn = pretty_print_ireg(rn, size);
1513                let imm = imm.pretty_print(0);
1514                let nzcv = nzcv.pretty_print(0);
1515                let cond = cond.pretty_print(0);
1516                format!("ccmp {rn}, {imm}, {nzcv}, {cond}")
1517            }
1518            &Inst::AtomicRMW {
1519                rs, rt, rn, ty, op, ..
1520            } => {
1521                let op = match op {
1522                    AtomicRMWOp::Add => "ldaddal",
1523                    AtomicRMWOp::Clr => "ldclral",
1524                    AtomicRMWOp::Eor => "ldeoral",
1525                    AtomicRMWOp::Set => "ldsetal",
1526                    AtomicRMWOp::Smax => "ldsmaxal",
1527                    AtomicRMWOp::Umax => "ldumaxal",
1528                    AtomicRMWOp::Smin => "ldsminal",
1529                    AtomicRMWOp::Umin => "lduminal",
1530                    AtomicRMWOp::Swp => "swpal",
1531                };
1532
1533                let size = OperandSize::from_ty(ty);
1534                let rs = pretty_print_ireg(rs, size);
1535                let rt = pretty_print_ireg(rt.to_reg(), size);
1536                let rn = pretty_print_ireg(rn, OperandSize::Size64);
1537
1538                let ty_suffix = match ty {
1539                    I8 => "b",
1540                    I16 => "h",
1541                    _ => "",
1542                };
1543                format!("{op}{ty_suffix} {rs}, {rt}, [{rn}]")
1544            }
1545            &Inst::AtomicRMWLoop {
1546                ty,
1547                op,
1548                addr,
1549                operand,
1550                oldval,
1551                scratch1,
1552                scratch2,
1553                ..
1554            } => {
1555                let op = match op {
1556                    AtomicRMWLoopOp::Add => "add",
1557                    AtomicRMWLoopOp::Sub => "sub",
1558                    AtomicRMWLoopOp::Eor => "eor",
1559                    AtomicRMWLoopOp::Orr => "orr",
1560                    AtomicRMWLoopOp::And => "and",
1561                    AtomicRMWLoopOp::Nand => "nand",
1562                    AtomicRMWLoopOp::Smin => "smin",
1563                    AtomicRMWLoopOp::Smax => "smax",
1564                    AtomicRMWLoopOp::Umin => "umin",
1565                    AtomicRMWLoopOp::Umax => "umax",
1566                    AtomicRMWLoopOp::Xchg => "xchg",
1567                };
1568                let addr = pretty_print_ireg(addr, OperandSize::Size64);
1569                let operand = pretty_print_ireg(operand, OperandSize::Size64);
1570                let oldval = pretty_print_ireg(oldval.to_reg(), OperandSize::Size64);
1571                let scratch1 = pretty_print_ireg(scratch1.to_reg(), OperandSize::Size64);
1572                let scratch2 = pretty_print_ireg(scratch2.to_reg(), OperandSize::Size64);
1573                format!(
1574                    "atomic_rmw_loop_{}_{} addr={} operand={} oldval={} scratch1={} scratch2={}",
1575                    op,
1576                    ty.bits(),
1577                    addr,
1578                    operand,
1579                    oldval,
1580                    scratch1,
1581                    scratch2,
1582                )
1583            }
1584            &Inst::AtomicCAS {
1585                rd, rs, rt, rn, ty, ..
1586            } => {
1587                let op = match ty {
1588                    I8 => "casalb",
1589                    I16 => "casalh",
1590                    I32 | I64 => "casal",
1591                    _ => panic!("Unsupported type: {ty}"),
1592                };
1593                let size = OperandSize::from_ty(ty);
1594                let rd = pretty_print_ireg(rd.to_reg(), size);
1595                let rs = pretty_print_ireg(rs, size);
1596                let rt = pretty_print_ireg(rt, size);
1597                let rn = pretty_print_ireg(rn, OperandSize::Size64);
1598
1599                format!("{op} {rd}, {rs}, {rt}, [{rn}]")
1600            }
1601            &Inst::AtomicCASLoop {
1602                ty,
1603                addr,
1604                expected,
1605                replacement,
1606                oldval,
1607                scratch,
1608                ..
1609            } => {
1610                let addr = pretty_print_ireg(addr, OperandSize::Size64);
1611                let expected = pretty_print_ireg(expected, OperandSize::Size64);
1612                let replacement = pretty_print_ireg(replacement, OperandSize::Size64);
1613                let oldval = pretty_print_ireg(oldval.to_reg(), OperandSize::Size64);
1614                let scratch = pretty_print_ireg(scratch.to_reg(), OperandSize::Size64);
1615                format!(
1616                    "atomic_cas_loop_{} addr={}, expect={}, replacement={}, oldval={}, scratch={}",
1617                    ty.bits(),
1618                    addr,
1619                    expected,
1620                    replacement,
1621                    oldval,
1622                    scratch,
1623                )
1624            }
1625            &Inst::LoadAcquire {
1626                access_ty, rt, rn, ..
1627            } => {
1628                let (op, ty) = match access_ty {
1629                    I8 => ("ldarb", I32),
1630                    I16 => ("ldarh", I32),
1631                    I32 => ("ldar", I32),
1632                    I64 => ("ldar", I64),
1633                    _ => panic!("Unsupported type: {access_ty}"),
1634                };
1635                let size = OperandSize::from_ty(ty);
1636                let rn = pretty_print_ireg(rn, OperandSize::Size64);
1637                let rt = pretty_print_ireg(rt.to_reg(), size);
1638                format!("{op} {rt}, [{rn}]")
1639            }
1640            &Inst::StoreRelease {
1641                access_ty, rt, rn, ..
1642            } => {
1643                let (op, ty) = match access_ty {
1644                    I8 => ("stlrb", I32),
1645                    I16 => ("stlrh", I32),
1646                    I32 => ("stlr", I32),
1647                    I64 => ("stlr", I64),
1648                    _ => panic!("Unsupported type: {access_ty}"),
1649                };
1650                let size = OperandSize::from_ty(ty);
1651                let rn = pretty_print_ireg(rn, OperandSize::Size64);
1652                let rt = pretty_print_ireg(rt, size);
1653                format!("{op} {rt}, [{rn}]")
1654            }
1655            &Inst::Fence {} => {
1656                format!("dmb ish")
1657            }
1658            &Inst::Csdb {} => {
1659                format!("csdb")
1660            }
1661            &Inst::FpuMove32 { rd, rn } => {
1662                let rd = pretty_print_vreg_scalar(rd.to_reg(), ScalarSize::Size32);
1663                let rn = pretty_print_vreg_scalar(rn, ScalarSize::Size32);
1664                format!("fmov {rd}, {rn}")
1665            }
1666            &Inst::FpuMove64 { rd, rn } => {
1667                let rd = pretty_print_vreg_scalar(rd.to_reg(), ScalarSize::Size64);
1668                let rn = pretty_print_vreg_scalar(rn, ScalarSize::Size64);
1669                format!("fmov {rd}, {rn}")
1670            }
1671            &Inst::FpuMove128 { rd, rn } => {
1672                let rd = pretty_print_reg(rd.to_reg());
1673                let rn = pretty_print_reg(rn);
1674                format!("mov {rd}.16b, {rn}.16b")
1675            }
1676            &Inst::FpuMoveFromVec { rd, rn, idx, size } => {
1677                let rd = pretty_print_vreg_scalar(rd.to_reg(), size.lane_size());
1678                let rn = pretty_print_vreg_element(rn, idx as usize, size.lane_size());
1679                format!("mov {rd}, {rn}")
1680            }
1681            &Inst::FpuExtend { rd, rn, size } => {
1682                let rd = pretty_print_vreg_scalar(rd.to_reg(), size);
1683                let rn = pretty_print_vreg_scalar(rn, size);
1684                format!("fmov {rd}, {rn}")
1685            }
1686            &Inst::FpuRR {
1687                fpu_op,
1688                size,
1689                rd,
1690                rn,
1691            } => {
1692                let op = match fpu_op {
1693                    FPUOp1::Abs => "fabs",
1694                    FPUOp1::Neg => "fneg",
1695                    FPUOp1::Sqrt => "fsqrt",
1696                    FPUOp1::Cvt32To64 | FPUOp1::Cvt64To32 => "fcvt",
1697                };
1698                let dst_size = match fpu_op {
1699                    FPUOp1::Cvt32To64 => ScalarSize::Size64,
1700                    FPUOp1::Cvt64To32 => ScalarSize::Size32,
1701                    _ => size,
1702                };
1703                let rd = pretty_print_vreg_scalar(rd.to_reg(), dst_size);
1704                let rn = pretty_print_vreg_scalar(rn, size);
1705                format!("{op} {rd}, {rn}")
1706            }
1707            &Inst::FpuRRR {
1708                fpu_op,
1709                size,
1710                rd,
1711                rn,
1712                rm,
1713            } => {
1714                let op = match fpu_op {
1715                    FPUOp2::Add => "fadd",
1716                    FPUOp2::Sub => "fsub",
1717                    FPUOp2::Mul => "fmul",
1718                    FPUOp2::Div => "fdiv",
1719                    FPUOp2::Max => "fmax",
1720                    FPUOp2::Min => "fmin",
1721                };
1722                let rd = pretty_print_vreg_scalar(rd.to_reg(), size);
1723                let rn = pretty_print_vreg_scalar(rn, size);
1724                let rm = pretty_print_vreg_scalar(rm, size);
1725                format!("{op} {rd}, {rn}, {rm}")
1726            }
1727            &Inst::FpuRRI { fpu_op, rd, rn } => {
1728                let (op, imm, vector) = match fpu_op {
1729                    FPUOpRI::UShr32(imm) => ("ushr", imm.pretty_print(0), true),
1730                    FPUOpRI::UShr64(imm) => ("ushr", imm.pretty_print(0), false),
1731                };
1732
1733                let (rd, rn) = if vector {
1734                    (
1735                        pretty_print_vreg_vector(rd.to_reg(), VectorSize::Size32x2),
1736                        pretty_print_vreg_vector(rn, VectorSize::Size32x2),
1737                    )
1738                } else {
1739                    (
1740                        pretty_print_vreg_scalar(rd.to_reg(), ScalarSize::Size64),
1741                        pretty_print_vreg_scalar(rn, ScalarSize::Size64),
1742                    )
1743                };
1744                format!("{op} {rd}, {rn}, {imm}")
1745            }
1746            &Inst::FpuRRIMod { fpu_op, rd, ri, rn } => {
1747                let (op, imm, vector) = match fpu_op {
1748                    FPUOpRIMod::Sli32(imm) => ("sli", imm.pretty_print(0), true),
1749                    FPUOpRIMod::Sli64(imm) => ("sli", imm.pretty_print(0), false),
1750                };
1751
1752                let (rd, ri, rn) = if vector {
1753                    (
1754                        pretty_print_vreg_vector(rd.to_reg(), VectorSize::Size32x2),
1755                        pretty_print_vreg_vector(ri, VectorSize::Size32x2),
1756                        pretty_print_vreg_vector(rn, VectorSize::Size32x2),
1757                    )
1758                } else {
1759                    (
1760                        pretty_print_vreg_scalar(rd.to_reg(), ScalarSize::Size64),
1761                        pretty_print_vreg_scalar(ri, ScalarSize::Size64),
1762                        pretty_print_vreg_scalar(rn, ScalarSize::Size64),
1763                    )
1764                };
1765                format!("{op} {rd}, {ri}, {rn}, {imm}")
1766            }
1767            &Inst::FpuRRRR {
1768                fpu_op,
1769                size,
1770                rd,
1771                rn,
1772                rm,
1773                ra,
1774            } => {
1775                let op = match fpu_op {
1776                    FPUOp3::MAdd => "fmadd",
1777                    FPUOp3::MSub => "fmsub",
1778                    FPUOp3::NMAdd => "fnmadd",
1779                    FPUOp3::NMSub => "fnmsub",
1780                };
1781                let rd = pretty_print_vreg_scalar(rd.to_reg(), size);
1782                let rn = pretty_print_vreg_scalar(rn, size);
1783                let rm = pretty_print_vreg_scalar(rm, size);
1784                let ra = pretty_print_vreg_scalar(ra, size);
1785                format!("{op} {rd}, {rn}, {rm}, {ra}")
1786            }
1787            &Inst::FpuCmp { size, rn, rm } => {
1788                let rn = pretty_print_vreg_scalar(rn, size);
1789                let rm = pretty_print_vreg_scalar(rm, size);
1790                format!("fcmp {rn}, {rm}")
1791            }
1792            &Inst::FpuLoad16 { rd, ref mem, .. } => {
1793                let rd = pretty_print_vreg_scalar(rd.to_reg(), ScalarSize::Size16);
1794                let mem = mem.clone();
1795                let access_ty = self.mem_type().unwrap();
1796                let (mem_str, mem) = mem_finalize_for_show(&mem, access_ty, state);
1797                format!("{mem_str}ldr {rd}, {mem}")
1798            }
1799            &Inst::FpuLoad32 { rd, ref mem, .. } => {
1800                let rd = pretty_print_vreg_scalar(rd.to_reg(), ScalarSize::Size32);
1801                let mem = mem.clone();
1802                let access_ty = self.mem_type().unwrap();
1803                let (mem_str, mem) = mem_finalize_for_show(&mem, access_ty, state);
1804                format!("{mem_str}ldr {rd}, {mem}")
1805            }
1806            &Inst::FpuLoad64 { rd, ref mem, .. } => {
1807                let rd = pretty_print_vreg_scalar(rd.to_reg(), ScalarSize::Size64);
1808                let mem = mem.clone();
1809                let access_ty = self.mem_type().unwrap();
1810                let (mem_str, mem) = mem_finalize_for_show(&mem, access_ty, state);
1811                format!("{mem_str}ldr {rd}, {mem}")
1812            }
1813            &Inst::FpuLoad128 { rd, ref mem, .. } => {
1814                let rd = pretty_print_reg(rd.to_reg());
1815                let rd = "q".to_string() + &rd[1..];
1816                let mem = mem.clone();
1817                let access_ty = self.mem_type().unwrap();
1818                let (mem_str, mem) = mem_finalize_for_show(&mem, access_ty, state);
1819                format!("{mem_str}ldr {rd}, {mem}")
1820            }
1821            &Inst::FpuStore16 { rd, ref mem, .. } => {
1822                let rd = pretty_print_vreg_scalar(rd, ScalarSize::Size16);
1823                let mem = mem.clone();
1824                let access_ty = self.mem_type().unwrap();
1825                let (mem_str, mem) = mem_finalize_for_show(&mem, access_ty, state);
1826                format!("{mem_str}str {rd}, {mem}")
1827            }
1828            &Inst::FpuStore32 { rd, ref mem, .. } => {
1829                let rd = pretty_print_vreg_scalar(rd, ScalarSize::Size32);
1830                let mem = mem.clone();
1831                let access_ty = self.mem_type().unwrap();
1832                let (mem_str, mem) = mem_finalize_for_show(&mem, access_ty, state);
1833                format!("{mem_str}str {rd}, {mem}")
1834            }
1835            &Inst::FpuStore64 { rd, ref mem, .. } => {
1836                let rd = pretty_print_vreg_scalar(rd, ScalarSize::Size64);
1837                let mem = mem.clone();
1838                let access_ty = self.mem_type().unwrap();
1839                let (mem_str, mem) = mem_finalize_for_show(&mem, access_ty, state);
1840                format!("{mem_str}str {rd}, {mem}")
1841            }
1842            &Inst::FpuStore128 { rd, ref mem, .. } => {
1843                let rd = pretty_print_reg(rd);
1844                let rd = "q".to_string() + &rd[1..];
1845                let mem = mem.clone();
1846                let access_ty = self.mem_type().unwrap();
1847                let (mem_str, mem) = mem_finalize_for_show(&mem, access_ty, state);
1848                format!("{mem_str}str {rd}, {mem}")
1849            }
1850            &Inst::FpuLoadP64 {
1851                rt, rt2, ref mem, ..
1852            } => {
1853                let rt = pretty_print_vreg_scalar(rt.to_reg(), ScalarSize::Size64);
1854                let rt2 = pretty_print_vreg_scalar(rt2.to_reg(), ScalarSize::Size64);
1855                let mem = mem.clone();
1856                let mem = mem.pretty_print_default();
1857
1858                format!("ldp {rt}, {rt2}, {mem}")
1859            }
1860            &Inst::FpuStoreP64 {
1861                rt, rt2, ref mem, ..
1862            } => {
1863                let rt = pretty_print_vreg_scalar(rt, ScalarSize::Size64);
1864                let rt2 = pretty_print_vreg_scalar(rt2, ScalarSize::Size64);
1865                let mem = mem.clone();
1866                let mem = mem.pretty_print_default();
1867
1868                format!("stp {rt}, {rt2}, {mem}")
1869            }
1870            &Inst::FpuLoadP128 {
1871                rt, rt2, ref mem, ..
1872            } => {
1873                let rt = pretty_print_vreg_scalar(rt.to_reg(), ScalarSize::Size128);
1874                let rt2 = pretty_print_vreg_scalar(rt2.to_reg(), ScalarSize::Size128);
1875                let mem = mem.clone();
1876                let mem = mem.pretty_print_default();
1877
1878                format!("ldp {rt}, {rt2}, {mem}")
1879            }
1880            &Inst::FpuStoreP128 {
1881                rt, rt2, ref mem, ..
1882            } => {
1883                let rt = pretty_print_vreg_scalar(rt, ScalarSize::Size128);
1884                let rt2 = pretty_print_vreg_scalar(rt2, ScalarSize::Size128);
1885                let mem = mem.clone();
1886                let mem = mem.pretty_print_default();
1887
1888                format!("stp {rt}, {rt2}, {mem}")
1889            }
1890            &Inst::FpuToInt { op, rd, rn } => {
1891                let (op, sizesrc, sizedest) = match op {
1892                    FpuToIntOp::F32ToI32 => ("fcvtzs", ScalarSize::Size32, OperandSize::Size32),
1893                    FpuToIntOp::F32ToU32 => ("fcvtzu", ScalarSize::Size32, OperandSize::Size32),
1894                    FpuToIntOp::F32ToI64 => ("fcvtzs", ScalarSize::Size32, OperandSize::Size64),
1895                    FpuToIntOp::F32ToU64 => ("fcvtzu", ScalarSize::Size32, OperandSize::Size64),
1896                    FpuToIntOp::F64ToI32 => ("fcvtzs", ScalarSize::Size64, OperandSize::Size32),
1897                    FpuToIntOp::F64ToU32 => ("fcvtzu", ScalarSize::Size64, OperandSize::Size32),
1898                    FpuToIntOp::F64ToI64 => ("fcvtzs", ScalarSize::Size64, OperandSize::Size64),
1899                    FpuToIntOp::F64ToU64 => ("fcvtzu", ScalarSize::Size64, OperandSize::Size64),
1900                };
1901                let rd = pretty_print_ireg(rd.to_reg(), sizedest);
1902                let rn = pretty_print_vreg_scalar(rn, sizesrc);
1903                format!("{op} {rd}, {rn}")
1904            }
1905            &Inst::IntToFpu { op, rd, rn } => {
1906                let (op, sizesrc, sizedest) = match op {
1907                    IntToFpuOp::I32ToF32 => ("scvtf", OperandSize::Size32, ScalarSize::Size32),
1908                    IntToFpuOp::U32ToF32 => ("ucvtf", OperandSize::Size32, ScalarSize::Size32),
1909                    IntToFpuOp::I64ToF32 => ("scvtf", OperandSize::Size64, ScalarSize::Size32),
1910                    IntToFpuOp::U64ToF32 => ("ucvtf", OperandSize::Size64, ScalarSize::Size32),
1911                    IntToFpuOp::I32ToF64 => ("scvtf", OperandSize::Size32, ScalarSize::Size64),
1912                    IntToFpuOp::U32ToF64 => ("ucvtf", OperandSize::Size32, ScalarSize::Size64),
1913                    IntToFpuOp::I64ToF64 => ("scvtf", OperandSize::Size64, ScalarSize::Size64),
1914                    IntToFpuOp::U64ToF64 => ("ucvtf", OperandSize::Size64, ScalarSize::Size64),
1915                };
1916                let rd = pretty_print_vreg_scalar(rd.to_reg(), sizedest);
1917                let rn = pretty_print_ireg(rn, sizesrc);
1918                format!("{op} {rd}, {rn}")
1919            }
1920            &Inst::FpuCSel16 { rd, rn, rm, cond } => {
1921                let rd = pretty_print_vreg_scalar(rd.to_reg(), ScalarSize::Size16);
1922                let rn = pretty_print_vreg_scalar(rn, ScalarSize::Size16);
1923                let rm = pretty_print_vreg_scalar(rm, ScalarSize::Size16);
1924                let cond = cond.pretty_print(0);
1925                format!("fcsel {rd}, {rn}, {rm}, {cond}")
1926            }
1927            &Inst::FpuCSel32 { rd, rn, rm, cond } => {
1928                let rd = pretty_print_vreg_scalar(rd.to_reg(), ScalarSize::Size32);
1929                let rn = pretty_print_vreg_scalar(rn, ScalarSize::Size32);
1930                let rm = pretty_print_vreg_scalar(rm, ScalarSize::Size32);
1931                let cond = cond.pretty_print(0);
1932                format!("fcsel {rd}, {rn}, {rm}, {cond}")
1933            }
1934            &Inst::FpuCSel64 { rd, rn, rm, cond } => {
1935                let rd = pretty_print_vreg_scalar(rd.to_reg(), ScalarSize::Size64);
1936                let rn = pretty_print_vreg_scalar(rn, ScalarSize::Size64);
1937                let rm = pretty_print_vreg_scalar(rm, ScalarSize::Size64);
1938                let cond = cond.pretty_print(0);
1939                format!("fcsel {rd}, {rn}, {rm}, {cond}")
1940            }
1941            &Inst::FpuRound { op, rd, rn } => {
1942                let (inst, size) = match op {
1943                    FpuRoundMode::Minus32 => ("frintm", ScalarSize::Size32),
1944                    FpuRoundMode::Minus64 => ("frintm", ScalarSize::Size64),
1945                    FpuRoundMode::Plus32 => ("frintp", ScalarSize::Size32),
1946                    FpuRoundMode::Plus64 => ("frintp", ScalarSize::Size64),
1947                    FpuRoundMode::Zero32 => ("frintz", ScalarSize::Size32),
1948                    FpuRoundMode::Zero64 => ("frintz", ScalarSize::Size64),
1949                    FpuRoundMode::Nearest32 => ("frintn", ScalarSize::Size32),
1950                    FpuRoundMode::Nearest64 => ("frintn", ScalarSize::Size64),
1951                };
1952                let rd = pretty_print_vreg_scalar(rd.to_reg(), size);
1953                let rn = pretty_print_vreg_scalar(rn, size);
1954                format!("{inst} {rd}, {rn}")
1955            }
1956            &Inst::MovToFpu { rd, rn, size } => {
1957                let operand_size = size.operand_size();
1958                let rd = pretty_print_vreg_scalar(rd.to_reg(), size);
1959                let rn = pretty_print_ireg(rn, operand_size);
1960                format!("fmov {rd}, {rn}")
1961            }
1962            &Inst::FpuMoveFPImm { rd, imm, size } => {
1963                let imm = imm.pretty_print(0);
1964                let rd = pretty_print_vreg_scalar(rd.to_reg(), size);
1965
1966                format!("fmov {rd}, {imm}")
1967            }
1968            &Inst::MovToVec {
1969                rd,
1970                ri,
1971                rn,
1972                idx,
1973                size,
1974            } => {
1975                let rd = pretty_print_vreg_element(rd.to_reg(), idx as usize, size.lane_size());
1976                let ri = pretty_print_vreg_element(ri, idx as usize, size.lane_size());
1977                let rn = pretty_print_ireg(rn, size.operand_size());
1978                format!("mov {rd}, {ri}, {rn}")
1979            }
1980            &Inst::MovFromVec { rd, rn, idx, size } => {
1981                let op = match size {
1982                    ScalarSize::Size8 => "umov",
1983                    ScalarSize::Size16 => "umov",
1984                    ScalarSize::Size32 => "mov",
1985                    ScalarSize::Size64 => "mov",
1986                    _ => unimplemented!(),
1987                };
1988                let rd = pretty_print_ireg(rd.to_reg(), size.operand_size());
1989                let rn = pretty_print_vreg_element(rn, idx as usize, size);
1990                format!("{op} {rd}, {rn}")
1991            }
1992            &Inst::MovFromVecSigned {
1993                rd,
1994                rn,
1995                idx,
1996                size,
1997                scalar_size,
1998            } => {
1999                let rd = pretty_print_ireg(rd.to_reg(), scalar_size);
2000                let rn = pretty_print_vreg_element(rn, idx as usize, size.lane_size());
2001                format!("smov {rd}, {rn}")
2002            }
2003            &Inst::VecDup { rd, rn, size } => {
2004                let rd = pretty_print_vreg_vector(rd.to_reg(), size);
2005                let rn = pretty_print_ireg(rn, size.operand_size());
2006                format!("dup {rd}, {rn}")
2007            }
2008            &Inst::VecDupFromFpu { rd, rn, size, lane } => {
2009                let rd = pretty_print_vreg_vector(rd.to_reg(), size);
2010                let rn = pretty_print_vreg_element(rn, lane.into(), size.lane_size());
2011                format!("dup {rd}, {rn}")
2012            }
2013            &Inst::VecDupFPImm { rd, imm, size } => {
2014                let imm = imm.pretty_print(0);
2015                let rd = pretty_print_vreg_vector(rd.to_reg(), size);
2016
2017                format!("fmov {rd}, {imm}")
2018            }
2019            &Inst::VecDupImm {
2020                rd,
2021                imm,
2022                invert,
2023                size,
2024            } => {
2025                let imm = imm.pretty_print(0);
2026                let op = if invert { "mvni" } else { "movi" };
2027                let rd = pretty_print_vreg_vector(rd.to_reg(), size);
2028
2029                format!("{op} {rd}, {imm}")
2030            }
2031            &Inst::VecExtend {
2032                t,
2033                rd,
2034                rn,
2035                high_half,
2036                lane_size,
2037            } => {
2038                let vec64 = VectorSize::from_lane_size(lane_size.narrow(), false);
2039                let vec128 = VectorSize::from_lane_size(lane_size.narrow(), true);
2040                let rd_size = VectorSize::from_lane_size(lane_size, true);
2041                let (op, rn_size) = match (t, high_half) {
2042                    (VecExtendOp::Sxtl, false) => ("sxtl", vec64),
2043                    (VecExtendOp::Sxtl, true) => ("sxtl2", vec128),
2044                    (VecExtendOp::Uxtl, false) => ("uxtl", vec64),
2045                    (VecExtendOp::Uxtl, true) => ("uxtl2", vec128),
2046                };
2047                let rd = pretty_print_vreg_vector(rd.to_reg(), rd_size);
2048                let rn = pretty_print_vreg_vector(rn, rn_size);
2049                format!("{op} {rd}, {rn}")
2050            }
2051            &Inst::VecMovElement {
2052                rd,
2053                ri,
2054                rn,
2055                dest_idx,
2056                src_idx,
2057                size,
2058            } => {
2059                let rd =
2060                    pretty_print_vreg_element(rd.to_reg(), dest_idx as usize, size.lane_size());
2061                let ri = pretty_print_vreg_element(ri, dest_idx as usize, size.lane_size());
2062                let rn = pretty_print_vreg_element(rn, src_idx as usize, size.lane_size());
2063                format!("mov {rd}, {ri}, {rn}")
2064            }
2065            &Inst::VecRRLong {
2066                op,
2067                rd,
2068                rn,
2069                high_half,
2070            } => {
2071                let (op, rd_size, size, suffix) = match (op, high_half) {
2072                    (VecRRLongOp::Fcvtl16, false) => {
2073                        ("fcvtl", VectorSize::Size32x4, VectorSize::Size16x4, "")
2074                    }
2075                    (VecRRLongOp::Fcvtl16, true) => {
2076                        ("fcvtl2", VectorSize::Size32x4, VectorSize::Size16x8, "")
2077                    }
2078                    (VecRRLongOp::Fcvtl32, false) => {
2079                        ("fcvtl", VectorSize::Size64x2, VectorSize::Size32x2, "")
2080                    }
2081                    (VecRRLongOp::Fcvtl32, true) => {
2082                        ("fcvtl2", VectorSize::Size64x2, VectorSize::Size32x4, "")
2083                    }
2084                    (VecRRLongOp::Shll8, false) => {
2085                        ("shll", VectorSize::Size16x8, VectorSize::Size8x8, ", #8")
2086                    }
2087                    (VecRRLongOp::Shll8, true) => {
2088                        ("shll2", VectorSize::Size16x8, VectorSize::Size8x16, ", #8")
2089                    }
2090                    (VecRRLongOp::Shll16, false) => {
2091                        ("shll", VectorSize::Size32x4, VectorSize::Size16x4, ", #16")
2092                    }
2093                    (VecRRLongOp::Shll16, true) => {
2094                        ("shll2", VectorSize::Size32x4, VectorSize::Size16x8, ", #16")
2095                    }
2096                    (VecRRLongOp::Shll32, false) => {
2097                        ("shll", VectorSize::Size64x2, VectorSize::Size32x2, ", #32")
2098                    }
2099                    (VecRRLongOp::Shll32, true) => {
2100                        ("shll2", VectorSize::Size64x2, VectorSize::Size32x4, ", #32")
2101                    }
2102                };
2103                let rd = pretty_print_vreg_vector(rd.to_reg(), rd_size);
2104                let rn = pretty_print_vreg_vector(rn, size);
2105
2106                format!("{op} {rd}, {rn}{suffix}")
2107            }
2108            &Inst::VecRRNarrowLow {
2109                op,
2110                rd,
2111                rn,
2112                lane_size,
2113                ..
2114            }
2115            | &Inst::VecRRNarrowHigh {
2116                op,
2117                rd,
2118                rn,
2119                lane_size,
2120                ..
2121            } => {
2122                let vec64 = VectorSize::from_lane_size(lane_size, false);
2123                let vec128 = VectorSize::from_lane_size(lane_size, true);
2124                let rn_size = VectorSize::from_lane_size(lane_size.widen(), true);
2125                let high_half = match self {
2126                    &Inst::VecRRNarrowLow { .. } => false,
2127                    &Inst::VecRRNarrowHigh { .. } => true,
2128                    _ => unreachable!(),
2129                };
2130                let (op, rd_size) = match (op, high_half) {
2131                    (VecRRNarrowOp::Xtn, false) => ("xtn", vec64),
2132                    (VecRRNarrowOp::Xtn, true) => ("xtn2", vec128),
2133                    (VecRRNarrowOp::Sqxtn, false) => ("sqxtn", vec64),
2134                    (VecRRNarrowOp::Sqxtn, true) => ("sqxtn2", vec128),
2135                    (VecRRNarrowOp::Sqxtun, false) => ("sqxtun", vec64),
2136                    (VecRRNarrowOp::Sqxtun, true) => ("sqxtun2", vec128),
2137                    (VecRRNarrowOp::Uqxtn, false) => ("uqxtn", vec64),
2138                    (VecRRNarrowOp::Uqxtn, true) => ("uqxtn2", vec128),
2139                    (VecRRNarrowOp::Fcvtn, false) => ("fcvtn", vec64),
2140                    (VecRRNarrowOp::Fcvtn, true) => ("fcvtn2", vec128),
2141                };
2142                let rn = pretty_print_vreg_vector(rn, rn_size);
2143                let rd = pretty_print_vreg_vector(rd.to_reg(), rd_size);
2144                let ri = match self {
2145                    &Inst::VecRRNarrowLow { .. } => "".to_string(),
2146                    &Inst::VecRRNarrowHigh { ri, .. } => {
2147                        format!("{}, ", pretty_print_vreg_vector(ri, rd_size))
2148                    }
2149                    _ => unreachable!(),
2150                };
2151
2152                format!("{op} {rd}, {ri}{rn}")
2153            }
2154            &Inst::VecRRPair { op, rd, rn } => {
2155                let op = match op {
2156                    VecPairOp::Addp => "addp",
2157                };
2158                let rd = pretty_print_vreg_scalar(rd.to_reg(), ScalarSize::Size64);
2159                let rn = pretty_print_vreg_vector(rn, VectorSize::Size64x2);
2160
2161                format!("{op} {rd}, {rn}")
2162            }
2163            &Inst::VecRRPairLong { op, rd, rn } => {
2164                let (op, dest, src) = match op {
2165                    VecRRPairLongOp::Saddlp8 => {
2166                        ("saddlp", VectorSize::Size16x8, VectorSize::Size8x16)
2167                    }
2168                    VecRRPairLongOp::Saddlp16 => {
2169                        ("saddlp", VectorSize::Size32x4, VectorSize::Size16x8)
2170                    }
2171                    VecRRPairLongOp::Uaddlp8 => {
2172                        ("uaddlp", VectorSize::Size16x8, VectorSize::Size8x16)
2173                    }
2174                    VecRRPairLongOp::Uaddlp16 => {
2175                        ("uaddlp", VectorSize::Size32x4, VectorSize::Size16x8)
2176                    }
2177                };
2178                let rd = pretty_print_vreg_vector(rd.to_reg(), dest);
2179                let rn = pretty_print_vreg_vector(rn, src);
2180
2181                format!("{op} {rd}, {rn}")
2182            }
2183            &Inst::VecRRR {
2184                rd,
2185                rn,
2186                rm,
2187                alu_op,
2188                size,
2189            } => {
2190                let (op, size) = match alu_op {
2191                    VecALUOp::Sqadd => ("sqadd", size),
2192                    VecALUOp::Uqadd => ("uqadd", size),
2193                    VecALUOp::Sqsub => ("sqsub", size),
2194                    VecALUOp::Uqsub => ("uqsub", size),
2195                    VecALUOp::Cmeq => ("cmeq", size),
2196                    VecALUOp::Cmge => ("cmge", size),
2197                    VecALUOp::Cmgt => ("cmgt", size),
2198                    VecALUOp::Cmhs => ("cmhs", size),
2199                    VecALUOp::Cmhi => ("cmhi", size),
2200                    VecALUOp::Fcmeq => ("fcmeq", size),
2201                    VecALUOp::Fcmgt => ("fcmgt", size),
2202                    VecALUOp::Fcmge => ("fcmge", size),
2203                    VecALUOp::And => ("and", VectorSize::Size8x16),
2204                    VecALUOp::Bic => ("bic", VectorSize::Size8x16),
2205                    VecALUOp::Orr => ("orr", VectorSize::Size8x16),
2206                    VecALUOp::Eor => ("eor", VectorSize::Size8x16),
2207                    VecALUOp::Umaxp => ("umaxp", size),
2208                    VecALUOp::Add => ("add", size),
2209                    VecALUOp::Sub => ("sub", size),
2210                    VecALUOp::Mul => ("mul", size),
2211                    VecALUOp::Sshl => ("sshl", size),
2212                    VecALUOp::Ushl => ("ushl", size),
2213                    VecALUOp::Umin => ("umin", size),
2214                    VecALUOp::Smin => ("smin", size),
2215                    VecALUOp::Umax => ("umax", size),
2216                    VecALUOp::Smax => ("smax", size),
2217                    VecALUOp::Urhadd => ("urhadd", size),
2218                    VecALUOp::Fadd => ("fadd", size),
2219                    VecALUOp::Fsub => ("fsub", size),
2220                    VecALUOp::Fdiv => ("fdiv", size),
2221                    VecALUOp::Fmax => ("fmax", size),
2222                    VecALUOp::Fmin => ("fmin", size),
2223                    VecALUOp::Fmul => ("fmul", size),
2224                    VecALUOp::Addp => ("addp", size),
2225                    VecALUOp::Zip1 => ("zip1", size),
2226                    VecALUOp::Zip2 => ("zip2", size),
2227                    VecALUOp::Sqrdmulh => ("sqrdmulh", size),
2228                    VecALUOp::Uzp1 => ("uzp1", size),
2229                    VecALUOp::Uzp2 => ("uzp2", size),
2230                    VecALUOp::Trn1 => ("trn1", size),
2231                    VecALUOp::Trn2 => ("trn2", size),
2232                };
2233                let rd = pretty_print_vreg_vector(rd.to_reg(), size);
2234                let rn = pretty_print_vreg_vector(rn, size);
2235                let rm = pretty_print_vreg_vector(rm, size);
2236                format!("{op} {rd}, {rn}, {rm}")
2237            }
2238            &Inst::VecRRRMod {
2239                rd,
2240                ri,
2241                rn,
2242                rm,
2243                alu_op,
2244                size,
2245            } => {
2246                let (op, size) = match alu_op {
2247                    VecALUModOp::Bsl => ("bsl", VectorSize::Size8x16),
2248                    VecALUModOp::Fmla => ("fmla", size),
2249                    VecALUModOp::Fmls => ("fmls", size),
2250                };
2251                let rd = pretty_print_vreg_vector(rd.to_reg(), size);
2252                let ri = pretty_print_vreg_vector(ri, size);
2253                let rn = pretty_print_vreg_vector(rn, size);
2254                let rm = pretty_print_vreg_vector(rm, size);
2255                format!("{op} {rd}, {ri}, {rn}, {rm}")
2256            }
2257            &Inst::VecFmlaElem {
2258                rd,
2259                ri,
2260                rn,
2261                rm,
2262                alu_op,
2263                size,
2264                idx,
2265            } => {
2266                let (op, size) = match alu_op {
2267                    VecALUModOp::Fmla => ("fmla", size),
2268                    VecALUModOp::Fmls => ("fmls", size),
2269                    _ => unreachable!(),
2270                };
2271                let rd = pretty_print_vreg_vector(rd.to_reg(), size);
2272                let ri = pretty_print_vreg_vector(ri, size);
2273                let rn = pretty_print_vreg_vector(rn, size);
2274                let rm = pretty_print_vreg_element(rm, idx.into(), size.lane_size());
2275                format!("{op} {rd}, {ri}, {rn}, {rm}")
2276            }
2277            &Inst::VecRRRLong {
2278                rd,
2279                rn,
2280                rm,
2281                alu_op,
2282                high_half,
2283            } => {
2284                let (op, dest_size, src_size) = match (alu_op, high_half) {
2285                    (VecRRRLongOp::Smull8, false) => {
2286                        ("smull", VectorSize::Size16x8, VectorSize::Size8x8)
2287                    }
2288                    (VecRRRLongOp::Smull8, true) => {
2289                        ("smull2", VectorSize::Size16x8, VectorSize::Size8x16)
2290                    }
2291                    (VecRRRLongOp::Smull16, false) => {
2292                        ("smull", VectorSize::Size32x4, VectorSize::Size16x4)
2293                    }
2294                    (VecRRRLongOp::Smull16, true) => {
2295                        ("smull2", VectorSize::Size32x4, VectorSize::Size16x8)
2296                    }
2297                    (VecRRRLongOp::Smull32, false) => {
2298                        ("smull", VectorSize::Size64x2, VectorSize::Size32x2)
2299                    }
2300                    (VecRRRLongOp::Smull32, true) => {
2301                        ("smull2", VectorSize::Size64x2, VectorSize::Size32x4)
2302                    }
2303                    (VecRRRLongOp::Umull8, false) => {
2304                        ("umull", VectorSize::Size16x8, VectorSize::Size8x8)
2305                    }
2306                    (VecRRRLongOp::Umull8, true) => {
2307                        ("umull2", VectorSize::Size16x8, VectorSize::Size8x16)
2308                    }
2309                    (VecRRRLongOp::Umull16, false) => {
2310                        ("umull", VectorSize::Size32x4, VectorSize::Size16x4)
2311                    }
2312                    (VecRRRLongOp::Umull16, true) => {
2313                        ("umull2", VectorSize::Size32x4, VectorSize::Size16x8)
2314                    }
2315                    (VecRRRLongOp::Umull32, false) => {
2316                        ("umull", VectorSize::Size64x2, VectorSize::Size32x2)
2317                    }
2318                    (VecRRRLongOp::Umull32, true) => {
2319                        ("umull2", VectorSize::Size64x2, VectorSize::Size32x4)
2320                    }
2321                };
2322                let rd = pretty_print_vreg_vector(rd.to_reg(), dest_size);
2323                let rn = pretty_print_vreg_vector(rn, src_size);
2324                let rm = pretty_print_vreg_vector(rm, src_size);
2325                format!("{op} {rd}, {rn}, {rm}")
2326            }
2327            &Inst::VecRRRLongMod {
2328                rd,
2329                ri,
2330                rn,
2331                rm,
2332                alu_op,
2333                high_half,
2334            } => {
2335                let (op, dest_size, src_size) = match (alu_op, high_half) {
2336                    (VecRRRLongModOp::Umlal8, false) => {
2337                        ("umlal", VectorSize::Size16x8, VectorSize::Size8x8)
2338                    }
2339                    (VecRRRLongModOp::Umlal8, true) => {
2340                        ("umlal2", VectorSize::Size16x8, VectorSize::Size8x16)
2341                    }
2342                    (VecRRRLongModOp::Umlal16, false) => {
2343                        ("umlal", VectorSize::Size32x4, VectorSize::Size16x4)
2344                    }
2345                    (VecRRRLongModOp::Umlal16, true) => {
2346                        ("umlal2", VectorSize::Size32x4, VectorSize::Size16x8)
2347                    }
2348                    (VecRRRLongModOp::Umlal32, false) => {
2349                        ("umlal", VectorSize::Size64x2, VectorSize::Size32x2)
2350                    }
2351                    (VecRRRLongModOp::Umlal32, true) => {
2352                        ("umlal2", VectorSize::Size64x2, VectorSize::Size32x4)
2353                    }
2354                };
2355                let rd = pretty_print_vreg_vector(rd.to_reg(), dest_size);
2356                let ri = pretty_print_vreg_vector(ri, dest_size);
2357                let rn = pretty_print_vreg_vector(rn, src_size);
2358                let rm = pretty_print_vreg_vector(rm, src_size);
2359                format!("{op} {rd}, {ri}, {rn}, {rm}")
2360            }
2361            &Inst::VecMisc { op, rd, rn, size } => {
2362                let (op, size, suffix) = match op {
2363                    VecMisc2::Not => (
2364                        "mvn",
2365                        if size.is_128bits() {
2366                            VectorSize::Size8x16
2367                        } else {
2368                            VectorSize::Size8x8
2369                        },
2370                        "",
2371                    ),
2372                    VecMisc2::Neg => ("neg", size, ""),
2373                    VecMisc2::Abs => ("abs", size, ""),
2374                    VecMisc2::Fabs => ("fabs", size, ""),
2375                    VecMisc2::Fneg => ("fneg", size, ""),
2376                    VecMisc2::Fsqrt => ("fsqrt", size, ""),
2377                    VecMisc2::Rev16 => ("rev16", size, ""),
2378                    VecMisc2::Rev32 => ("rev32", size, ""),
2379                    VecMisc2::Rev64 => ("rev64", size, ""),
2380                    VecMisc2::Fcvtzs => ("fcvtzs", size, ""),
2381                    VecMisc2::Fcvtzu => ("fcvtzu", size, ""),
2382                    VecMisc2::Scvtf => ("scvtf", size, ""),
2383                    VecMisc2::Ucvtf => ("ucvtf", size, ""),
2384                    VecMisc2::Frintn => ("frintn", size, ""),
2385                    VecMisc2::Frintz => ("frintz", size, ""),
2386                    VecMisc2::Frintm => ("frintm", size, ""),
2387                    VecMisc2::Frintp => ("frintp", size, ""),
2388                    VecMisc2::Cnt => ("cnt", size, ""),
2389                    VecMisc2::Cmeq0 => ("cmeq", size, ", #0"),
2390                    VecMisc2::Cmge0 => ("cmge", size, ", #0"),
2391                    VecMisc2::Cmgt0 => ("cmgt", size, ", #0"),
2392                    VecMisc2::Cmle0 => ("cmle", size, ", #0"),
2393                    VecMisc2::Cmlt0 => ("cmlt", size, ", #0"),
2394                    VecMisc2::Fcmeq0 => ("fcmeq", size, ", #0.0"),
2395                    VecMisc2::Fcmge0 => ("fcmge", size, ", #0.0"),
2396                    VecMisc2::Fcmgt0 => ("fcmgt", size, ", #0.0"),
2397                    VecMisc2::Fcmle0 => ("fcmle", size, ", #0.0"),
2398                    VecMisc2::Fcmlt0 => ("fcmlt", size, ", #0.0"),
2399                };
2400                let rd = pretty_print_vreg_vector(rd.to_reg(), size);
2401                let rn = pretty_print_vreg_vector(rn, size);
2402                format!("{op} {rd}, {rn}{suffix}")
2403            }
2404            &Inst::VecLanes { op, rd, rn, size } => {
2405                let op = match op {
2406                    VecLanesOp::Uminv => "uminv",
2407                    VecLanesOp::Addv => "addv",
2408                };
2409                let rd = pretty_print_vreg_scalar(rd.to_reg(), size.lane_size());
2410                let rn = pretty_print_vreg_vector(rn, size);
2411                format!("{op} {rd}, {rn}")
2412            }
2413            &Inst::VecShiftImm {
2414                op,
2415                rd,
2416                rn,
2417                size,
2418                imm,
2419            } => {
2420                let op = match op {
2421                    VecShiftImmOp::Shl => "shl",
2422                    VecShiftImmOp::Ushr => "ushr",
2423                    VecShiftImmOp::Sshr => "sshr",
2424                };
2425                let rd = pretty_print_vreg_vector(rd.to_reg(), size);
2426                let rn = pretty_print_vreg_vector(rn, size);
2427                format!("{op} {rd}, {rn}, #{imm}")
2428            }
2429            &Inst::VecShiftImmMod {
2430                op,
2431                rd,
2432                ri,
2433                rn,
2434                size,
2435                imm,
2436            } => {
2437                let op = match op {
2438                    VecShiftImmModOp::Sli => "sli",
2439                };
2440                let rd = pretty_print_vreg_vector(rd.to_reg(), size);
2441                let ri = pretty_print_vreg_vector(ri, size);
2442                let rn = pretty_print_vreg_vector(rn, size);
2443                format!("{op} {rd}, {ri}, {rn}, #{imm}")
2444            }
2445            &Inst::VecExtract { rd, rn, rm, imm4 } => {
2446                let rd = pretty_print_vreg_vector(rd.to_reg(), VectorSize::Size8x16);
2447                let rn = pretty_print_vreg_vector(rn, VectorSize::Size8x16);
2448                let rm = pretty_print_vreg_vector(rm, VectorSize::Size8x16);
2449                format!("ext {rd}, {rn}, {rm}, #{imm4}")
2450            }
2451            &Inst::VecTbl { rd, rn, rm } => {
2452                let rn = pretty_print_vreg_vector(rn, VectorSize::Size8x16);
2453                let rm = pretty_print_vreg_vector(rm, VectorSize::Size8x16);
2454                let rd = pretty_print_vreg_vector(rd.to_reg(), VectorSize::Size8x16);
2455                format!("tbl {rd}, {{ {rn} }}, {rm}")
2456            }
2457            &Inst::VecTblExt { rd, ri, rn, rm } => {
2458                let rn = pretty_print_vreg_vector(rn, VectorSize::Size8x16);
2459                let rm = pretty_print_vreg_vector(rm, VectorSize::Size8x16);
2460                let rd = pretty_print_vreg_vector(rd.to_reg(), VectorSize::Size8x16);
2461                let ri = pretty_print_vreg_vector(ri, VectorSize::Size8x16);
2462                format!("tbx {rd}, {ri}, {{ {rn} }}, {rm}")
2463            }
2464            &Inst::VecTbl2 { rd, rn, rn2, rm } => {
2465                let rn = pretty_print_vreg_vector(rn, VectorSize::Size8x16);
2466                let rn2 = pretty_print_vreg_vector(rn2, VectorSize::Size8x16);
2467                let rm = pretty_print_vreg_vector(rm, VectorSize::Size8x16);
2468                let rd = pretty_print_vreg_vector(rd.to_reg(), VectorSize::Size8x16);
2469                format!("tbl {rd}, {{ {rn}, {rn2} }}, {rm}")
2470            }
2471            &Inst::VecTbl2Ext {
2472                rd,
2473                ri,
2474                rn,
2475                rn2,
2476                rm,
2477            } => {
2478                let rn = pretty_print_vreg_vector(rn, VectorSize::Size8x16);
2479                let rn2 = pretty_print_vreg_vector(rn2, VectorSize::Size8x16);
2480                let rm = pretty_print_vreg_vector(rm, VectorSize::Size8x16);
2481                let rd = pretty_print_vreg_vector(rd.to_reg(), VectorSize::Size8x16);
2482                let ri = pretty_print_vreg_vector(ri, VectorSize::Size8x16);
2483                format!("tbx {rd}, {ri}, {{ {rn}, {rn2} }}, {rm}")
2484            }
2485            &Inst::VecLoadReplicate { rd, rn, size, .. } => {
2486                let rd = pretty_print_vreg_vector(rd.to_reg(), size);
2487                let rn = pretty_print_reg(rn);
2488
2489                format!("ld1r {{ {rd} }}, [{rn}]")
2490            }
2491            &Inst::VecCSel { rd, rn, rm, cond } => {
2492                let rd = pretty_print_vreg_vector(rd.to_reg(), VectorSize::Size8x16);
2493                let rn = pretty_print_vreg_vector(rn, VectorSize::Size8x16);
2494                let rm = pretty_print_vreg_vector(rm, VectorSize::Size8x16);
2495                let cond = cond.pretty_print(0);
2496                format!("vcsel {rd}, {rn}, {rm}, {cond} (if-then-else diamond)")
2497            }
2498            &Inst::MovToNZCV { rn } => {
2499                let rn = pretty_print_reg(rn);
2500                format!("msr nzcv, {rn}")
2501            }
2502            &Inst::MovFromNZCV { rd } => {
2503                let rd = pretty_print_reg(rd.to_reg());
2504                format!("mrs {rd}, nzcv")
2505            }
2506            &Inst::Extend {
2507                rd,
2508                rn,
2509                signed: false,
2510                from_bits: 1,
2511                ..
2512            } => {
2513                let rd = pretty_print_ireg(rd.to_reg(), OperandSize::Size32);
2514                let rn = pretty_print_ireg(rn, OperandSize::Size32);
2515                format!("and {rd}, {rn}, #1")
2516            }
2517            &Inst::Extend {
2518                rd,
2519                rn,
2520                signed: false,
2521                from_bits: 32,
2522                to_bits: 64,
2523            } => {
2524                // The case of a zero extension from 32 to 64 bits, is implemented
2525                // with a "mov" to a 32-bit (W-reg) dest, because this zeroes
2526                // the top 32 bits.
2527                let rd = pretty_print_ireg(rd.to_reg(), OperandSize::Size32);
2528                let rn = pretty_print_ireg(rn, OperandSize::Size32);
2529                format!("mov {rd}, {rn}")
2530            }
2531            &Inst::Extend {
2532                rd,
2533                rn,
2534                signed,
2535                from_bits,
2536                to_bits,
2537            } => {
2538                assert!(from_bits <= to_bits);
2539                let op = match (signed, from_bits) {
2540                    (false, 8) => "uxtb",
2541                    (true, 8) => "sxtb",
2542                    (false, 16) => "uxth",
2543                    (true, 16) => "sxth",
2544                    (true, 32) => "sxtw",
2545                    (true, _) => "sbfx",
2546                    (false, _) => "ubfx",
2547                };
2548                if op == "sbfx" || op == "ubfx" {
2549                    let dest_size = OperandSize::from_bits(to_bits);
2550                    let rd = pretty_print_ireg(rd.to_reg(), dest_size);
2551                    let rn = pretty_print_ireg(rn, dest_size);
2552                    format!("{op} {rd}, {rn}, #0, #{from_bits}")
2553                } else {
2554                    let dest_size = if signed {
2555                        OperandSize::from_bits(to_bits)
2556                    } else {
2557                        OperandSize::Size32
2558                    };
2559                    let rd = pretty_print_ireg(rd.to_reg(), dest_size);
2560                    let rn = pretty_print_ireg(rn, OperandSize::from_bits(from_bits));
2561                    format!("{op} {rd}, {rn}")
2562                }
2563            }
2564            &Inst::Call { ref info } => {
2565                let try_call = info
2566                    .try_call_info
2567                    .as_ref()
2568                    .map(|tci| pretty_print_try_call(tci))
2569                    .unwrap_or_default();
2570                format!("bl 0{try_call}")
2571            }
2572            &Inst::CallInd { ref info } => {
2573                let rn = pretty_print_reg(info.dest);
2574                let try_call = info
2575                    .try_call_info
2576                    .as_ref()
2577                    .map(|tci| pretty_print_try_call(tci))
2578                    .unwrap_or_default();
2579                format!("blr {rn}{try_call}")
2580            }
2581            &Inst::ReturnCall { ref info } => {
2582                let mut s = format!(
2583                    "return_call {:?} new_stack_arg_size:{}",
2584                    info.dest, info.new_stack_arg_size
2585                );
2586                for ret in &info.uses {
2587                    let preg = pretty_print_reg(ret.preg);
2588                    let vreg = pretty_print_reg(ret.vreg);
2589                    write!(&mut s, " {vreg}={preg}").unwrap();
2590                }
2591                s
2592            }
2593            &Inst::ReturnCallInd { ref info } => {
2594                let callee = pretty_print_reg(info.dest);
2595                let mut s = format!(
2596                    "return_call_ind {callee} new_stack_arg_size:{}",
2597                    info.new_stack_arg_size
2598                );
2599                for ret in &info.uses {
2600                    let preg = pretty_print_reg(ret.preg);
2601                    let vreg = pretty_print_reg(ret.vreg);
2602                    write!(&mut s, " {vreg}={preg}").unwrap();
2603                }
2604                s
2605            }
2606            &Inst::Args { ref args } => {
2607                let mut s = "args".to_string();
2608                for arg in args {
2609                    let preg = pretty_print_reg(arg.preg);
2610                    let def = pretty_print_reg(arg.vreg.to_reg());
2611                    write!(&mut s, " {def}={preg}").unwrap();
2612                }
2613                s
2614            }
2615            &Inst::Rets { ref rets } => {
2616                let mut s = "rets".to_string();
2617                for ret in rets {
2618                    let preg = pretty_print_reg(ret.preg);
2619                    let vreg = pretty_print_reg(ret.vreg);
2620                    write!(&mut s, " {vreg}={preg}").unwrap();
2621                }
2622                s
2623            }
2624            &Inst::Ret {} => "ret".to_string(),
2625            &Inst::AuthenticatedRet { key, is_hint } => {
2626                let key = match key {
2627                    APIKey::AZ => "az",
2628                    APIKey::BZ => "bz",
2629                    APIKey::ASP => "asp",
2630                    APIKey::BSP => "bsp",
2631                };
2632                match is_hint {
2633                    false => format!("reta{key}"),
2634                    true => format!("auti{key} ; ret"),
2635                }
2636            }
2637            &Inst::Jump { ref dest } => {
2638                let dest = dest.pretty_print(0);
2639                format!("b {dest}")
2640            }
2641            &Inst::CondBr {
2642                ref taken,
2643                ref not_taken,
2644                ref kind,
2645            } => {
2646                let taken = taken.pretty_print(0);
2647                let not_taken = not_taken.pretty_print(0);
2648                match kind {
2649                    &CondBrKind::Zero(reg, size) => {
2650                        let reg = pretty_print_reg_sized(reg, size);
2651                        format!("cbz {reg}, {taken} ; b {not_taken}")
2652                    }
2653                    &CondBrKind::NotZero(reg, size) => {
2654                        let reg = pretty_print_reg_sized(reg, size);
2655                        format!("cbnz {reg}, {taken} ; b {not_taken}")
2656                    }
2657                    &CondBrKind::Cond(c) => {
2658                        let c = c.pretty_print(0);
2659                        format!("b.{c} {taken} ; b {not_taken}")
2660                    }
2661                }
2662            }
2663            &Inst::TestBitAndBranch {
2664                kind,
2665                ref taken,
2666                ref not_taken,
2667                rn,
2668                bit,
2669            } => {
2670                let cond = match kind {
2671                    TestBitAndBranchKind::Z => "z",
2672                    TestBitAndBranchKind::NZ => "nz",
2673                };
2674                let taken = taken.pretty_print(0);
2675                let not_taken = not_taken.pretty_print(0);
2676                let rn = pretty_print_reg(rn);
2677                format!("tb{cond} {rn}, #{bit}, {taken} ; b {not_taken}")
2678            }
2679            &Inst::IndirectBr { rn, .. } => {
2680                let rn = pretty_print_reg(rn);
2681                format!("br {rn}")
2682            }
2683            &Inst::Brk => "brk #0".to_string(),
2684            &Inst::Udf { .. } => "udf #0xc11f".to_string(),
2685            &Inst::TrapIf {
2686                ref kind,
2687                trap_code,
2688            } => match kind {
2689                &CondBrKind::Zero(reg, size) => {
2690                    let reg = pretty_print_reg_sized(reg, size);
2691                    format!("cbz {reg}, #trap={trap_code}")
2692                }
2693                &CondBrKind::NotZero(reg, size) => {
2694                    let reg = pretty_print_reg_sized(reg, size);
2695                    format!("cbnz {reg}, #trap={trap_code}")
2696                }
2697                &CondBrKind::Cond(c) => {
2698                    let c = c.pretty_print(0);
2699                    format!("b.{c} #trap={trap_code}")
2700                }
2701            },
2702            &Inst::Adr { rd, off } => {
2703                let rd = pretty_print_reg(rd.to_reg());
2704                format!("adr {rd}, pc+{off}")
2705            }
2706            &Inst::Adrp { rd, off } => {
2707                let rd = pretty_print_reg(rd.to_reg());
2708                // This instruction addresses 4KiB pages, so multiply it by the page size.
2709                let byte_offset = off * 4096;
2710                format!("adrp {rd}, pc+{byte_offset}")
2711            }
2712            &Inst::Word4 { data } => format!("data.i32 {data}"),
2713            &Inst::Word8 { data } => format!("data.i64 {data}"),
2714            &Inst::JTSequence {
2715                default,
2716                ref targets,
2717                ridx,
2718                rtmp1,
2719                rtmp2,
2720                ..
2721            } => {
2722                let ridx = pretty_print_reg(ridx);
2723                let rtmp1 = pretty_print_reg(rtmp1.to_reg());
2724                let rtmp2 = pretty_print_reg(rtmp2.to_reg());
2725                let default_target = BranchTarget::Label(default).pretty_print(0);
2726                format!(
2727                    concat!(
2728                        "b.hs {} ; ",
2729                        "csel {}, xzr, {}, hs ; ",
2730                        "csdb ; ",
2731                        "adr {}, pc+16 ; ",
2732                        "ldrsw {}, [{}, {}, uxtw #2] ; ",
2733                        "add {}, {}, {} ; ",
2734                        "br {} ; ",
2735                        "jt_entries {:?}"
2736                    ),
2737                    default_target,
2738                    rtmp2,
2739                    ridx,
2740                    rtmp1,
2741                    rtmp2,
2742                    rtmp1,
2743                    rtmp2,
2744                    rtmp1,
2745                    rtmp1,
2746                    rtmp2,
2747                    rtmp1,
2748                    targets
2749                )
2750            }
2751            &Inst::LoadExtName {
2752                rd,
2753                ref name,
2754                offset,
2755            } => {
2756                let rd = pretty_print_reg(rd.to_reg());
2757                format!("load_ext_name {rd}, {name:?}+{offset}")
2758            }
2759            &Inst::LoadAddr { rd, ref mem } => {
2760                // TODO: we really should find a better way to avoid duplication of
2761                // this logic between `emit()` and `show_rru()` -- a separate 1-to-N
2762                // expansion stage (i.e., legalization, but without the slow edit-in-place
2763                // of the existing legalization framework).
2764                let mem = mem.clone();
2765                let (mem_insts, mem) = mem_finalize(None, &mem, I8, state);
2766                let mut ret = String::new();
2767                for inst in mem_insts.into_iter() {
2768                    ret.push_str(&inst.print_with_state(&mut EmitState::default()));
2769                }
2770                let (reg, index_reg, offset) = match mem {
2771                    AMode::RegExtended { rn, rm, extendop } => (rn, Some((rm, extendop)), 0),
2772                    AMode::Unscaled { rn, simm9 } => (rn, None, simm9.value()),
2773                    AMode::UnsignedOffset { rn, uimm12 } => (rn, None, uimm12.value() as i32),
2774                    _ => panic!("Unsupported case for LoadAddr: {mem:?}"),
2775                };
2776                let abs_offset = if offset < 0 {
2777                    -offset as u64
2778                } else {
2779                    offset as u64
2780                };
2781                let alu_op = if offset < 0 { ALUOp::Sub } else { ALUOp::Add };
2782
2783                if let Some((idx, extendop)) = index_reg {
2784                    let add = Inst::AluRRRExtend {
2785                        alu_op: ALUOp::Add,
2786                        size: OperandSize::Size64,
2787                        rd,
2788                        rn: reg,
2789                        rm: idx,
2790                        extendop,
2791                    };
2792
2793                    ret.push_str(&add.print_with_state(&mut EmitState::default()));
2794                } else if offset == 0 {
2795                    let mov = Inst::gen_move(rd, reg, I64);
2796                    ret.push_str(&mov.print_with_state(&mut EmitState::default()));
2797                } else if let Some(imm12) = Imm12::maybe_from_u64(abs_offset) {
2798                    let add = Inst::AluRRImm12 {
2799                        alu_op,
2800                        size: OperandSize::Size64,
2801                        rd,
2802                        rn: reg,
2803                        imm12,
2804                    };
2805                    ret.push_str(&add.print_with_state(&mut EmitState::default()));
2806                } else {
2807                    let tmp = writable_spilltmp_reg();
2808                    for inst in Inst::load_constant(tmp, abs_offset, &mut |_| tmp).into_iter() {
2809                        ret.push_str(&inst.print_with_state(&mut EmitState::default()));
2810                    }
2811                    let add = Inst::AluRRR {
2812                        alu_op,
2813                        size: OperandSize::Size64,
2814                        rd,
2815                        rn: reg,
2816                        rm: tmp.to_reg(),
2817                    };
2818                    ret.push_str(&add.print_with_state(&mut EmitState::default()));
2819                }
2820                ret
2821            }
2822            &Inst::Paci { key } => {
2823                let key = match key {
2824                    APIKey::AZ => "az",
2825                    APIKey::BZ => "bz",
2826                    APIKey::ASP => "asp",
2827                    APIKey::BSP => "bsp",
2828                };
2829
2830                "paci".to_string() + key
2831            }
2832            &Inst::Xpaclri => "xpaclri".to_string(),
2833            &Inst::Bti { targets } => {
2834                let targets = match targets {
2835                    BranchTargetType::None => "",
2836                    BranchTargetType::C => " c",
2837                    BranchTargetType::J => " j",
2838                    BranchTargetType::JC => " jc",
2839                };
2840
2841                "bti".to_string() + targets
2842            }
2843            &Inst::EmitIsland { needed_space } => format!("emit_island {needed_space}"),
2844
2845            &Inst::ElfTlsGetAddr {
2846                ref symbol,
2847                rd,
2848                tmp,
2849            } => {
2850                let rd = pretty_print_reg(rd.to_reg());
2851                let tmp = pretty_print_reg(tmp.to_reg());
2852                format!("elf_tls_get_addr {}, {}, {}", rd, tmp, symbol.display(None))
2853            }
2854            &Inst::MachOTlsGetAddr { ref symbol, rd } => {
2855                let rd = pretty_print_reg(rd.to_reg());
2856                format!("macho_tls_get_addr {}, {}", rd, symbol.display(None))
2857            }
2858            &Inst::Unwind { ref inst } => {
2859                format!("unwind {inst:?}")
2860            }
2861            &Inst::DummyUse { reg } => {
2862                let reg = pretty_print_reg(reg);
2863                format!("dummy_use {reg}")
2864            }
2865            &Inst::StackProbeLoop { start, end, step } => {
2866                let start = pretty_print_reg(start.to_reg());
2867                let end = pretty_print_reg(end);
2868                let step = step.pretty_print(0);
2869                format!("stack_probe_loop {start}, {end}, {step}")
2870            }
2871        }
2872    }
2873}
2874
2875//=============================================================================
2876// Label fixups and jump veneers.
2877
2878/// Different forms of label references for different instruction formats.
2879#[derive(Clone, Copy, Debug, PartialEq, Eq)]
2880pub enum LabelUse {
2881    /// 14-bit branch offset (conditional branches). PC-rel, offset is imm <<
2882    /// 2. Immediate is 14 signed bits, in bits 18:5. Used by tbz and tbnz.
2883    Branch14,
2884    /// 19-bit branch offset (conditional branches). PC-rel, offset is imm << 2. Immediate is 19
2885    /// signed bits, in bits 23:5. Used by cbz, cbnz, b.cond.
2886    Branch19,
2887    /// 26-bit branch offset (unconditional branches). PC-rel, offset is imm << 2. Immediate is 26
2888    /// signed bits, in bits 25:0. Used by b, bl.
2889    Branch26,
2890    #[allow(dead_code)]
2891    /// 19-bit offset for LDR (load literal). PC-rel, offset is imm << 2. Immediate is 19 signed bits,
2892    /// in bits 23:5.
2893    Ldr19,
2894    #[allow(dead_code)]
2895    /// 21-bit offset for ADR (get address of label). PC-rel, offset is not shifted. Immediate is
2896    /// 21 signed bits, with high 19 bits in bits 23:5 and low 2 bits in bits 30:29.
2897    Adr21,
2898    /// 32-bit PC relative constant offset (from address of constant itself),
2899    /// signed. Used in jump tables.
2900    PCRel32,
2901}
2902
2903impl MachInstLabelUse for LabelUse {
2904    /// Alignment for veneer code. Every AArch64 instruction must be 4-byte-aligned.
2905    const ALIGN: CodeOffset = 4;
2906
2907    /// Maximum PC-relative range (positive), inclusive.
2908    fn max_pos_range(self) -> CodeOffset {
2909        match self {
2910            // N-bit immediate, left-shifted by 2, for (N+2) bits of total
2911            // range. Signed, so +2^(N+1) from zero. Likewise for two other
2912            // shifted cases below.
2913            LabelUse::Branch14 => (1 << 15) - 1,
2914            LabelUse::Branch19 => (1 << 20) - 1,
2915            LabelUse::Branch26 => (1 << 27) - 1,
2916            LabelUse::Ldr19 => (1 << 20) - 1,
2917            // Adr does not shift its immediate, so the 21-bit immediate gives 21 bits of total
2918            // range.
2919            LabelUse::Adr21 => (1 << 20) - 1,
2920            LabelUse::PCRel32 => 0x7fffffff,
2921        }
2922    }
2923
2924    /// Maximum PC-relative range (negative).
2925    fn max_neg_range(self) -> CodeOffset {
2926        // All forms are twos-complement signed offsets, so negative limit is one more than
2927        // positive limit.
2928        self.max_pos_range() + 1
2929    }
2930
2931    /// Size of window into code needed to do the patch.
2932    fn patch_size(self) -> CodeOffset {
2933        // Patch is on one instruction only for all of these label reference types.
2934        4
2935    }
2936
2937    /// Perform the patch.
2938    fn patch(self, buffer: &mut [u8], use_offset: CodeOffset, label_offset: CodeOffset) {
2939        let pc_rel = (label_offset as i64) - (use_offset as i64);
2940        debug_assert!(pc_rel <= self.max_pos_range() as i64);
2941        debug_assert!(pc_rel >= -(self.max_neg_range() as i64));
2942        let pc_rel = pc_rel as u32;
2943        let insn_word = u32::from_le_bytes([buffer[0], buffer[1], buffer[2], buffer[3]]);
2944        let mask = match self {
2945            LabelUse::Branch14 => 0x0007ffe0, // bits 18..5 inclusive
2946            LabelUse::Branch19 => 0x00ffffe0, // bits 23..5 inclusive
2947            LabelUse::Branch26 => 0x03ffffff, // bits 25..0 inclusive
2948            LabelUse::Ldr19 => 0x00ffffe0,    // bits 23..5 inclusive
2949            LabelUse::Adr21 => 0x60ffffe0,    // bits 30..29, 25..5 inclusive
2950            LabelUse::PCRel32 => 0xffffffff,
2951        };
2952        let pc_rel_shifted = match self {
2953            LabelUse::Adr21 | LabelUse::PCRel32 => pc_rel,
2954            _ => {
2955                debug_assert!(pc_rel & 3 == 0);
2956                pc_rel >> 2
2957            }
2958        };
2959        let pc_rel_inserted = match self {
2960            LabelUse::Branch14 => (pc_rel_shifted & 0x3fff) << 5,
2961            LabelUse::Branch19 | LabelUse::Ldr19 => (pc_rel_shifted & 0x7ffff) << 5,
2962            LabelUse::Branch26 => pc_rel_shifted & 0x3ffffff,
2963            LabelUse::Adr21 => (pc_rel_shifted & 0x7ffff) << 5 | (pc_rel_shifted & 0x180000) << 10,
2964            LabelUse::PCRel32 => pc_rel_shifted,
2965        };
2966        let is_add = match self {
2967            LabelUse::PCRel32 => true,
2968            _ => false,
2969        };
2970        let insn_word = if is_add {
2971            insn_word.wrapping_add(pc_rel_inserted)
2972        } else {
2973            (insn_word & !mask) | pc_rel_inserted
2974        };
2975        buffer[0..4].clone_from_slice(&u32::to_le_bytes(insn_word));
2976    }
2977
2978    /// Is a veneer supported for this label reference type?
2979    fn supports_veneer(self) -> bool {
2980        match self {
2981            LabelUse::Branch14 | LabelUse::Branch19 => true, // veneer is a Branch26
2982            LabelUse::Branch26 => true,                      // veneer is a PCRel32
2983            _ => false,
2984        }
2985    }
2986
2987    /// How large is the veneer, if supported?
2988    fn veneer_size(self) -> CodeOffset {
2989        match self {
2990            LabelUse::Branch14 | LabelUse::Branch19 => 4,
2991            LabelUse::Branch26 => 20,
2992            _ => unreachable!(),
2993        }
2994    }
2995
2996    fn worst_case_veneer_size() -> CodeOffset {
2997        20
2998    }
2999
3000    /// Generate a veneer into the buffer, given that this veneer is at `veneer_offset`, and return
3001    /// an offset and label-use for the veneer's use of the original label.
3002    fn generate_veneer(
3003        self,
3004        buffer: &mut [u8],
3005        veneer_offset: CodeOffset,
3006    ) -> (CodeOffset, LabelUse) {
3007        match self {
3008            LabelUse::Branch14 | LabelUse::Branch19 => {
3009                // veneer is a Branch26 (unconditional branch). Just encode directly here -- don't
3010                // bother with constructing an Inst.
3011                let insn_word = 0b000101 << 26;
3012                buffer[0..4].clone_from_slice(&u32::to_le_bytes(insn_word));
3013                (veneer_offset, LabelUse::Branch26)
3014            }
3015
3016            // This is promoting a 26-bit call/jump to a 32-bit call/jump to
3017            // get a further range. This jump translates to a jump to a
3018            // relative location based on the address of the constant loaded
3019            // from here.
3020            //
3021            // If this path is taken from a call instruction then caller-saved
3022            // registers are available (minus arguments), so x16/x17 are
3023            // available. Otherwise for intra-function jumps we also reserve
3024            // x16/x17 as spill-style registers. In both cases these are
3025            // available for us to use.
3026            LabelUse::Branch26 => {
3027                let tmp1 = regs::spilltmp_reg();
3028                let tmp1_w = regs::writable_spilltmp_reg();
3029                let tmp2 = regs::tmp2_reg();
3030                let tmp2_w = regs::writable_tmp2_reg();
3031                // ldrsw x16, 16
3032                let ldr = emit::enc_ldst_imm19(0b1001_1000, 16 / 4, tmp1);
3033                // adr x17, 12
3034                let adr = emit::enc_adr(12, tmp2_w);
3035                // add x16, x16, x17
3036                let add = emit::enc_arith_rrr(0b10001011_000, 0, tmp1_w, tmp1, tmp2);
3037                // br x16
3038                let br = emit::enc_br(tmp1);
3039                buffer[0..4].clone_from_slice(&u32::to_le_bytes(ldr));
3040                buffer[4..8].clone_from_slice(&u32::to_le_bytes(adr));
3041                buffer[8..12].clone_from_slice(&u32::to_le_bytes(add));
3042                buffer[12..16].clone_from_slice(&u32::to_le_bytes(br));
3043                // the 4-byte signed immediate we'll load is after these
3044                // instructions, 16-bytes in.
3045                (veneer_offset + 16, LabelUse::PCRel32)
3046            }
3047
3048            _ => panic!("Unsupported label-reference type for veneer generation!"),
3049        }
3050    }
3051
3052    fn from_reloc(reloc: Reloc, addend: Addend) -> Option<LabelUse> {
3053        match (reloc, addend) {
3054            (Reloc::Arm64Call, 0) => Some(LabelUse::Branch26),
3055            _ => None,
3056        }
3057    }
3058}
3059
3060#[cfg(test)]
3061mod tests {
3062    use super::*;
3063
3064    #[test]
3065    fn inst_size_test() {
3066        // This test will help with unintentionally growing the size
3067        // of the Inst enum.
3068        let expected = if cfg!(target_pointer_width = "32") && !cfg!(target_arch = "arm") {
3069            28
3070        } else {
3071            32
3072        };
3073        assert_eq!(expected, std::mem::size_of::<Inst>());
3074    }
3075}