cranelift_codegen/isa/aarch64/inst/
mod.rs

1//! This module defines aarch64-specific machine instruction types.
2
3use crate::binemit::{Addend, CodeOffset, Reloc};
4use crate::ir::types::{F128, F16, F32, F64, I128, I16, I32, I64, I8, I8X16};
5use crate::ir::{types, MemFlags, Type};
6use crate::isa::{CallConv, FunctionAlignment};
7use crate::machinst::*;
8use crate::{settings, CodegenError, CodegenResult};
9
10use crate::machinst::{PrettyPrint, Reg, RegClass, Writable};
11
12use alloc::vec::Vec;
13use smallvec::{smallvec, SmallVec};
14use std::fmt::Write;
15use std::string::{String, ToString};
16
17pub(crate) mod regs;
18pub(crate) use self::regs::*;
19pub mod imms;
20pub use self::imms::*;
21pub mod args;
22pub use self::args::*;
23pub mod emit;
24pub(crate) use self::emit::*;
25use crate::isa::aarch64::abi::AArch64MachineDeps;
26
27pub(crate) mod unwind;
28
29#[cfg(test)]
30mod emit_tests;
31
32//=============================================================================
33// Instructions (top level): definition
34
35pub use crate::isa::aarch64::lower::isle::generated_code::{
36    ALUOp, ALUOp3, AMode, APIKey, AtomicRMWLoopOp, AtomicRMWOp, BitOp, BranchTargetType, FPUOp1,
37    FPUOp2, FPUOp3, FpuRoundMode, FpuToIntOp, IntToFpuOp, MInst as Inst, MoveWideOp, VecALUModOp,
38    VecALUOp, VecExtendOp, VecLanesOp, VecMisc2, VecPairOp, VecRRLongOp, VecRRNarrowOp,
39    VecRRPairLongOp, VecRRRLongModOp, VecRRRLongOp, VecShiftImmModOp, VecShiftImmOp,
40};
41
42/// A floating-point unit (FPU) operation with two args, a register and an immediate.
43#[derive(Copy, Clone, Debug)]
44pub enum FPUOpRI {
45    /// Unsigned right shift. Rd = Rn << #imm
46    UShr32(FPURightShiftImm),
47    /// Unsigned right shift. Rd = Rn << #imm
48    UShr64(FPURightShiftImm),
49}
50
51/// A floating-point unit (FPU) operation with two args, a register and
52/// an immediate that modifies its dest (so takes that input value as a
53/// separate virtual register).
54#[derive(Copy, Clone, Debug)]
55pub enum FPUOpRIMod {
56    /// Shift left and insert. Rd |= Rn << #imm
57    Sli32(FPULeftShiftImm),
58    /// Shift left and insert. Rd |= Rn << #imm
59    Sli64(FPULeftShiftImm),
60}
61
62impl BitOp {
63    /// Get the assembly mnemonic for this opcode.
64    pub fn op_str(&self) -> &'static str {
65        match self {
66            BitOp::RBit => "rbit",
67            BitOp::Clz => "clz",
68            BitOp::Cls => "cls",
69            BitOp::Rev16 => "rev16",
70            BitOp::Rev32 => "rev32",
71            BitOp::Rev64 => "rev64",
72        }
73    }
74}
75
76/// Additional information for `return_call[_ind]` instructions, left out of
77/// line to lower the size of the `Inst` enum.
78#[derive(Clone, Debug)]
79pub struct ReturnCallInfo<T> {
80    /// Where this call is going to
81    pub dest: T,
82    /// Arguments to the call instruction.
83    pub uses: CallArgList,
84    /// The size of the new stack frame's stack arguments. This is necessary
85    /// for copying the frame over our current frame. It must already be
86    /// allocated on the stack.
87    pub new_stack_arg_size: u32,
88    /// API key to use to restore the return address, if any.
89    pub key: Option<APIKey>,
90}
91
92fn count_zero_half_words(mut value: u64, num_half_words: u8) -> usize {
93    let mut count = 0;
94    for _ in 0..num_half_words {
95        if value & 0xffff == 0 {
96            count += 1;
97        }
98        value >>= 16;
99    }
100
101    count
102}
103
104impl Inst {
105    /// Create an instruction that loads a constant, using one of several options (MOVZ, MOVN,
106    /// logical immediate, or constant pool).
107    pub fn load_constant<F: FnMut(Type) -> Writable<Reg>>(
108        rd: Writable<Reg>,
109        value: u64,
110        alloc_tmp: &mut F,
111    ) -> SmallVec<[Inst; 4]> {
112        // NB: this is duplicated in `lower/isle.rs` and `inst.isle` right now,
113        // if modifications are made here before this is deleted after moving to
114        // ISLE then those locations should be updated as well.
115
116        if let Some(imm) = MoveWideConst::maybe_from_u64(value) {
117            // 16-bit immediate (shifted by 0, 16, 32 or 48 bits) in MOVZ
118            smallvec![Inst::MovWide {
119                op: MoveWideOp::MovZ,
120                rd,
121                imm,
122                size: OperandSize::Size64
123            }]
124        } else if let Some(imm) = MoveWideConst::maybe_from_u64(!value) {
125            // 16-bit immediate (shifted by 0, 16, 32 or 48 bits) in MOVN
126            smallvec![Inst::MovWide {
127                op: MoveWideOp::MovN,
128                rd,
129                imm,
130                size: OperandSize::Size64
131            }]
132        } else if let Some(imml) = ImmLogic::maybe_from_u64(value, I64) {
133            // Weird logical-instruction immediate in ORI using zero register
134            smallvec![Inst::AluRRImmLogic {
135                alu_op: ALUOp::Orr,
136                size: OperandSize::Size64,
137                rd,
138                rn: zero_reg(),
139                imml,
140            }]
141        } else {
142            let mut insts = smallvec![];
143
144            // If the top 32 bits are zero, use 32-bit `mov` operations.
145            let (num_half_words, size, negated) = if value >> 32 == 0 {
146                (2, OperandSize::Size32, (!value << 32) >> 32)
147            } else {
148                (4, OperandSize::Size64, !value)
149            };
150
151            // If the number of 0xffff half words is greater than the number of 0x0000 half words
152            // it is more efficient to use `movn` for the first instruction.
153            let first_is_inverted = count_zero_half_words(negated, num_half_words)
154                > count_zero_half_words(value, num_half_words);
155
156            // Either 0xffff or 0x0000 half words can be skipped, depending on the first
157            // instruction used.
158            let ignored_halfword = if first_is_inverted { 0xffff } else { 0 };
159
160            let halfwords: SmallVec<[_; 4]> = (0..num_half_words)
161                .filter_map(|i| {
162                    let imm16 = (value >> (16 * i)) & 0xffff;
163                    if imm16 == ignored_halfword {
164                        None
165                    } else {
166                        Some((i, imm16))
167                    }
168                })
169                .collect();
170
171            let mut prev_result = None;
172            let last_index = halfwords.last().unwrap().0;
173            for (i, imm16) in halfwords {
174                let shift = i * 16;
175                let rd = if i == last_index { rd } else { alloc_tmp(I16) };
176
177                if let Some(rn) = prev_result {
178                    let imm = MoveWideConst::maybe_with_shift(imm16 as u16, shift).unwrap();
179                    insts.push(Inst::MovK { rd, rn, imm, size });
180                } else {
181                    if first_is_inverted {
182                        let imm =
183                            MoveWideConst::maybe_with_shift(((!imm16) & 0xffff) as u16, shift)
184                                .unwrap();
185                        insts.push(Inst::MovWide {
186                            op: MoveWideOp::MovN,
187                            rd,
188                            imm,
189                            size,
190                        });
191                    } else {
192                        let imm = MoveWideConst::maybe_with_shift(imm16 as u16, shift).unwrap();
193                        insts.push(Inst::MovWide {
194                            op: MoveWideOp::MovZ,
195                            rd,
196                            imm,
197                            size,
198                        });
199                    }
200                }
201
202                prev_result = Some(rd.to_reg());
203            }
204
205            assert!(prev_result.is_some());
206
207            insts
208        }
209    }
210
211    /// Generic constructor for a load (zero-extending where appropriate).
212    pub fn gen_load(into_reg: Writable<Reg>, mem: AMode, ty: Type, flags: MemFlags) -> Inst {
213        match ty {
214            I8 => Inst::ULoad8 {
215                rd: into_reg,
216                mem,
217                flags,
218            },
219            I16 => Inst::ULoad16 {
220                rd: into_reg,
221                mem,
222                flags,
223            },
224            I32 => Inst::ULoad32 {
225                rd: into_reg,
226                mem,
227                flags,
228            },
229            I64 => Inst::ULoad64 {
230                rd: into_reg,
231                mem,
232                flags,
233            },
234            F16 => Inst::FpuLoad16 {
235                rd: into_reg,
236                mem,
237                flags,
238            },
239            F32 => Inst::FpuLoad32 {
240                rd: into_reg,
241                mem,
242                flags,
243            },
244            F64 => Inst::FpuLoad64 {
245                rd: into_reg,
246                mem,
247                flags,
248            },
249            _ => {
250                if ty.is_vector() || ty.is_float() {
251                    let bits = ty_bits(ty);
252                    let rd = into_reg;
253
254                    if bits == 128 {
255                        Inst::FpuLoad128 { rd, mem, flags }
256                    } else {
257                        assert_eq!(bits, 64);
258                        Inst::FpuLoad64 { rd, mem, flags }
259                    }
260                } else {
261                    unimplemented!("gen_load({})", ty);
262                }
263            }
264        }
265    }
266
267    /// Generic constructor for a store.
268    pub fn gen_store(mem: AMode, from_reg: Reg, ty: Type, flags: MemFlags) -> Inst {
269        match ty {
270            I8 => Inst::Store8 {
271                rd: from_reg,
272                mem,
273                flags,
274            },
275            I16 => Inst::Store16 {
276                rd: from_reg,
277                mem,
278                flags,
279            },
280            I32 => Inst::Store32 {
281                rd: from_reg,
282                mem,
283                flags,
284            },
285            I64 => Inst::Store64 {
286                rd: from_reg,
287                mem,
288                flags,
289            },
290            F16 => Inst::FpuStore16 {
291                rd: from_reg,
292                mem,
293                flags,
294            },
295            F32 => Inst::FpuStore32 {
296                rd: from_reg,
297                mem,
298                flags,
299            },
300            F64 => Inst::FpuStore64 {
301                rd: from_reg,
302                mem,
303                flags,
304            },
305            _ => {
306                if ty.is_vector() || ty.is_float() {
307                    let bits = ty_bits(ty);
308                    let rd = from_reg;
309
310                    if bits == 128 {
311                        Inst::FpuStore128 { rd, mem, flags }
312                    } else {
313                        assert_eq!(bits, 64);
314                        Inst::FpuStore64 { rd, mem, flags }
315                    }
316                } else {
317                    unimplemented!("gen_store({})", ty);
318                }
319            }
320        }
321    }
322
323    /// What type does this load or store instruction access in memory? When
324    /// uimm12 encoding is used, the size of this type is the amount that
325    /// immediate offsets are scaled by.
326    pub fn mem_type(&self) -> Option<Type> {
327        match self {
328            Inst::ULoad8 { .. } => Some(I8),
329            Inst::SLoad8 { .. } => Some(I8),
330            Inst::ULoad16 { .. } => Some(I16),
331            Inst::SLoad16 { .. } => Some(I16),
332            Inst::ULoad32 { .. } => Some(I32),
333            Inst::SLoad32 { .. } => Some(I32),
334            Inst::ULoad64 { .. } => Some(I64),
335            Inst::FpuLoad16 { .. } => Some(F16),
336            Inst::FpuLoad32 { .. } => Some(F32),
337            Inst::FpuLoad64 { .. } => Some(F64),
338            Inst::FpuLoad128 { .. } => Some(I8X16),
339            Inst::Store8 { .. } => Some(I8),
340            Inst::Store16 { .. } => Some(I16),
341            Inst::Store32 { .. } => Some(I32),
342            Inst::Store64 { .. } => Some(I64),
343            Inst::FpuStore16 { .. } => Some(F16),
344            Inst::FpuStore32 { .. } => Some(F32),
345            Inst::FpuStore64 { .. } => Some(F64),
346            Inst::FpuStore128 { .. } => Some(I8X16),
347            _ => None,
348        }
349    }
350}
351
352//=============================================================================
353// Instructions: get_regs
354
355fn memarg_operands(memarg: &mut AMode, collector: &mut impl OperandVisitor) {
356    match memarg {
357        AMode::Unscaled { rn, .. } | AMode::UnsignedOffset { rn, .. } => {
358            collector.reg_use(rn);
359        }
360        AMode::RegReg { rn, rm, .. }
361        | AMode::RegScaled { rn, rm, .. }
362        | AMode::RegScaledExtended { rn, rm, .. }
363        | AMode::RegExtended { rn, rm, .. } => {
364            collector.reg_use(rn);
365            collector.reg_use(rm);
366        }
367        AMode::Label { .. } => {}
368        AMode::SPPreIndexed { .. } | AMode::SPPostIndexed { .. } => {}
369        AMode::FPOffset { .. } | AMode::IncomingArg { .. } => {}
370        AMode::SPOffset { .. } | AMode::SlotOffset { .. } => {}
371        AMode::RegOffset { rn, .. } => {
372            collector.reg_use(rn);
373        }
374        AMode::Const { .. } => {}
375    }
376}
377
378fn pairmemarg_operands(pairmemarg: &mut PairAMode, collector: &mut impl OperandVisitor) {
379    match pairmemarg {
380        PairAMode::SignedOffset { reg, .. } => {
381            collector.reg_use(reg);
382        }
383        PairAMode::SPPreIndexed { .. } | PairAMode::SPPostIndexed { .. } => {}
384    }
385}
386
387fn aarch64_get_operands(inst: &mut Inst, collector: &mut impl OperandVisitor) {
388    match inst {
389        Inst::AluRRR { rd, rn, rm, .. } => {
390            collector.reg_def(rd);
391            collector.reg_use(rn);
392            collector.reg_use(rm);
393        }
394        Inst::AluRRRR { rd, rn, rm, ra, .. } => {
395            collector.reg_def(rd);
396            collector.reg_use(rn);
397            collector.reg_use(rm);
398            collector.reg_use(ra);
399        }
400        Inst::AluRRImm12 { rd, rn, .. } => {
401            collector.reg_def(rd);
402            collector.reg_use(rn);
403        }
404        Inst::AluRRImmLogic { rd, rn, .. } => {
405            collector.reg_def(rd);
406            collector.reg_use(rn);
407        }
408        Inst::AluRRImmShift { rd, rn, .. } => {
409            collector.reg_def(rd);
410            collector.reg_use(rn);
411        }
412        Inst::AluRRRShift { rd, rn, rm, .. } => {
413            collector.reg_def(rd);
414            collector.reg_use(rn);
415            collector.reg_use(rm);
416        }
417        Inst::AluRRRExtend { rd, rn, rm, .. } => {
418            collector.reg_def(rd);
419            collector.reg_use(rn);
420            collector.reg_use(rm);
421        }
422        Inst::BitRR { rd, rn, .. } => {
423            collector.reg_def(rd);
424            collector.reg_use(rn);
425        }
426        Inst::ULoad8 { rd, mem, .. }
427        | Inst::SLoad8 { rd, mem, .. }
428        | Inst::ULoad16 { rd, mem, .. }
429        | Inst::SLoad16 { rd, mem, .. }
430        | Inst::ULoad32 { rd, mem, .. }
431        | Inst::SLoad32 { rd, mem, .. }
432        | Inst::ULoad64 { rd, mem, .. } => {
433            collector.reg_def(rd);
434            memarg_operands(mem, collector);
435        }
436        Inst::Store8 { rd, mem, .. }
437        | Inst::Store16 { rd, mem, .. }
438        | Inst::Store32 { rd, mem, .. }
439        | Inst::Store64 { rd, mem, .. } => {
440            collector.reg_use(rd);
441            memarg_operands(mem, collector);
442        }
443        Inst::StoreP64 { rt, rt2, mem, .. } => {
444            collector.reg_use(rt);
445            collector.reg_use(rt2);
446            pairmemarg_operands(mem, collector);
447        }
448        Inst::LoadP64 { rt, rt2, mem, .. } => {
449            collector.reg_def(rt);
450            collector.reg_def(rt2);
451            pairmemarg_operands(mem, collector);
452        }
453        Inst::Mov { rd, rm, .. } => {
454            collector.reg_def(rd);
455            collector.reg_use(rm);
456        }
457        Inst::MovFromPReg { rd, rm } => {
458            debug_assert!(rd.to_reg().is_virtual());
459            collector.reg_def(rd);
460            collector.reg_fixed_nonallocatable(*rm);
461        }
462        Inst::MovToPReg { rd, rm } => {
463            debug_assert!(rm.is_virtual());
464            collector.reg_fixed_nonallocatable(*rd);
465            collector.reg_use(rm);
466        }
467        Inst::MovK { rd, rn, .. } => {
468            collector.reg_use(rn);
469            collector.reg_reuse_def(rd, 0); // `rn` == `rd`.
470        }
471        Inst::MovWide { rd, .. } => {
472            collector.reg_def(rd);
473        }
474        Inst::CSel { rd, rn, rm, .. } => {
475            collector.reg_def(rd);
476            collector.reg_use(rn);
477            collector.reg_use(rm);
478        }
479        Inst::CSNeg { rd, rn, rm, .. } => {
480            collector.reg_def(rd);
481            collector.reg_use(rn);
482            collector.reg_use(rm);
483        }
484        Inst::CSet { rd, .. } | Inst::CSetm { rd, .. } => {
485            collector.reg_def(rd);
486        }
487        Inst::CCmp { rn, rm, .. } => {
488            collector.reg_use(rn);
489            collector.reg_use(rm);
490        }
491        Inst::CCmpImm { rn, .. } => {
492            collector.reg_use(rn);
493        }
494        Inst::AtomicRMWLoop {
495            op,
496            addr,
497            operand,
498            oldval,
499            scratch1,
500            scratch2,
501            ..
502        } => {
503            collector.reg_fixed_use(addr, xreg(25));
504            collector.reg_fixed_use(operand, xreg(26));
505            collector.reg_fixed_def(oldval, xreg(27));
506            collector.reg_fixed_def(scratch1, xreg(24));
507            if *op != AtomicRMWLoopOp::Xchg {
508                collector.reg_fixed_def(scratch2, xreg(28));
509            }
510        }
511        Inst::AtomicRMW { rs, rt, rn, .. } => {
512            collector.reg_use(rs);
513            collector.reg_def(rt);
514            collector.reg_use(rn);
515        }
516        Inst::AtomicCAS { rd, rs, rt, rn, .. } => {
517            collector.reg_reuse_def(rd, 1); // reuse `rs`.
518            collector.reg_use(rs);
519            collector.reg_use(rt);
520            collector.reg_use(rn);
521        }
522        Inst::AtomicCASLoop {
523            addr,
524            expected,
525            replacement,
526            oldval,
527            scratch,
528            ..
529        } => {
530            collector.reg_fixed_use(addr, xreg(25));
531            collector.reg_fixed_use(expected, xreg(26));
532            collector.reg_fixed_use(replacement, xreg(28));
533            collector.reg_fixed_def(oldval, xreg(27));
534            collector.reg_fixed_def(scratch, xreg(24));
535        }
536        Inst::LoadAcquire { rt, rn, .. } => {
537            collector.reg_use(rn);
538            collector.reg_def(rt);
539        }
540        Inst::StoreRelease { rt, rn, .. } => {
541            collector.reg_use(rn);
542            collector.reg_use(rt);
543        }
544        Inst::Fence {} | Inst::Csdb {} => {}
545        Inst::FpuMove32 { rd, rn } => {
546            collector.reg_def(rd);
547            collector.reg_use(rn);
548        }
549        Inst::FpuMove64 { rd, rn } => {
550            collector.reg_def(rd);
551            collector.reg_use(rn);
552        }
553        Inst::FpuMove128 { rd, rn } => {
554            collector.reg_def(rd);
555            collector.reg_use(rn);
556        }
557        Inst::FpuMoveFromVec { rd, rn, .. } => {
558            collector.reg_def(rd);
559            collector.reg_use(rn);
560        }
561        Inst::FpuExtend { rd, rn, .. } => {
562            collector.reg_def(rd);
563            collector.reg_use(rn);
564        }
565        Inst::FpuRR { rd, rn, .. } => {
566            collector.reg_def(rd);
567            collector.reg_use(rn);
568        }
569        Inst::FpuRRR { rd, rn, rm, .. } => {
570            collector.reg_def(rd);
571            collector.reg_use(rn);
572            collector.reg_use(rm);
573        }
574        Inst::FpuRRI { rd, rn, .. } => {
575            collector.reg_def(rd);
576            collector.reg_use(rn);
577        }
578        Inst::FpuRRIMod { rd, ri, rn, .. } => {
579            collector.reg_reuse_def(rd, 1); // reuse `ri`.
580            collector.reg_use(ri);
581            collector.reg_use(rn);
582        }
583        Inst::FpuRRRR { rd, rn, rm, ra, .. } => {
584            collector.reg_def(rd);
585            collector.reg_use(rn);
586            collector.reg_use(rm);
587            collector.reg_use(ra);
588        }
589        Inst::VecMisc { rd, rn, .. } => {
590            collector.reg_def(rd);
591            collector.reg_use(rn);
592        }
593
594        Inst::VecLanes { rd, rn, .. } => {
595            collector.reg_def(rd);
596            collector.reg_use(rn);
597        }
598        Inst::VecShiftImm { rd, rn, .. } => {
599            collector.reg_def(rd);
600            collector.reg_use(rn);
601        }
602        Inst::VecShiftImmMod { rd, ri, rn, .. } => {
603            collector.reg_reuse_def(rd, 1); // `rd` == `ri`.
604            collector.reg_use(ri);
605            collector.reg_use(rn);
606        }
607        Inst::VecExtract { rd, rn, rm, .. } => {
608            collector.reg_def(rd);
609            collector.reg_use(rn);
610            collector.reg_use(rm);
611        }
612        Inst::VecTbl { rd, rn, rm } => {
613            collector.reg_use(rn);
614            collector.reg_use(rm);
615            collector.reg_def(rd);
616        }
617        Inst::VecTblExt { rd, ri, rn, rm } => {
618            collector.reg_use(rn);
619            collector.reg_use(rm);
620            collector.reg_reuse_def(rd, 3); // `rd` == `ri`.
621            collector.reg_use(ri);
622        }
623
624        Inst::VecTbl2 { rd, rn, rn2, rm } => {
625            // Constrain to v30 / v31 so that we satisfy the "adjacent
626            // registers" constraint without use of pinned vregs in
627            // lowering.
628            collector.reg_fixed_use(rn, vreg(30));
629            collector.reg_fixed_use(rn2, vreg(31));
630            collector.reg_use(rm);
631            collector.reg_def(rd);
632        }
633        Inst::VecTbl2Ext {
634            rd,
635            ri,
636            rn,
637            rn2,
638            rm,
639        } => {
640            // Constrain to v30 / v31 so that we satisfy the "adjacent
641            // registers" constraint without use of pinned vregs in
642            // lowering.
643            collector.reg_fixed_use(rn, vreg(30));
644            collector.reg_fixed_use(rn2, vreg(31));
645            collector.reg_use(rm);
646            collector.reg_reuse_def(rd, 4); // `rd` == `ri`.
647            collector.reg_use(ri);
648        }
649        Inst::VecLoadReplicate { rd, rn, .. } => {
650            collector.reg_def(rd);
651            collector.reg_use(rn);
652        }
653        Inst::VecCSel { rd, rn, rm, .. } => {
654            collector.reg_def(rd);
655            collector.reg_use(rn);
656            collector.reg_use(rm);
657        }
658        Inst::FpuCmp { rn, rm, .. } => {
659            collector.reg_use(rn);
660            collector.reg_use(rm);
661        }
662        Inst::FpuLoad16 { rd, mem, .. } => {
663            collector.reg_def(rd);
664            memarg_operands(mem, collector);
665        }
666        Inst::FpuLoad32 { rd, mem, .. } => {
667            collector.reg_def(rd);
668            memarg_operands(mem, collector);
669        }
670        Inst::FpuLoad64 { rd, mem, .. } => {
671            collector.reg_def(rd);
672            memarg_operands(mem, collector);
673        }
674        Inst::FpuLoad128 { rd, mem, .. } => {
675            collector.reg_def(rd);
676            memarg_operands(mem, collector);
677        }
678        Inst::FpuStore16 { rd, mem, .. } => {
679            collector.reg_use(rd);
680            memarg_operands(mem, collector);
681        }
682        Inst::FpuStore32 { rd, mem, .. } => {
683            collector.reg_use(rd);
684            memarg_operands(mem, collector);
685        }
686        Inst::FpuStore64 { rd, mem, .. } => {
687            collector.reg_use(rd);
688            memarg_operands(mem, collector);
689        }
690        Inst::FpuStore128 { rd, mem, .. } => {
691            collector.reg_use(rd);
692            memarg_operands(mem, collector);
693        }
694        Inst::FpuLoadP64 { rt, rt2, mem, .. } => {
695            collector.reg_def(rt);
696            collector.reg_def(rt2);
697            pairmemarg_operands(mem, collector);
698        }
699        Inst::FpuStoreP64 { rt, rt2, mem, .. } => {
700            collector.reg_use(rt);
701            collector.reg_use(rt2);
702            pairmemarg_operands(mem, collector);
703        }
704        Inst::FpuLoadP128 { rt, rt2, mem, .. } => {
705            collector.reg_def(rt);
706            collector.reg_def(rt2);
707            pairmemarg_operands(mem, collector);
708        }
709        Inst::FpuStoreP128 { rt, rt2, mem, .. } => {
710            collector.reg_use(rt);
711            collector.reg_use(rt2);
712            pairmemarg_operands(mem, collector);
713        }
714        Inst::FpuToInt { rd, rn, .. } => {
715            collector.reg_def(rd);
716            collector.reg_use(rn);
717        }
718        Inst::IntToFpu { rd, rn, .. } => {
719            collector.reg_def(rd);
720            collector.reg_use(rn);
721        }
722        Inst::FpuCSel16 { rd, rn, rm, .. }
723        | Inst::FpuCSel32 { rd, rn, rm, .. }
724        | Inst::FpuCSel64 { rd, rn, rm, .. } => {
725            collector.reg_def(rd);
726            collector.reg_use(rn);
727            collector.reg_use(rm);
728        }
729        Inst::FpuRound { rd, rn, .. } => {
730            collector.reg_def(rd);
731            collector.reg_use(rn);
732        }
733        Inst::MovToFpu { rd, rn, .. } => {
734            collector.reg_def(rd);
735            collector.reg_use(rn);
736        }
737        Inst::FpuMoveFPImm { rd, .. } => {
738            collector.reg_def(rd);
739        }
740        Inst::MovToVec { rd, ri, rn, .. } => {
741            collector.reg_reuse_def(rd, 1); // `rd` == `ri`.
742            collector.reg_use(ri);
743            collector.reg_use(rn);
744        }
745        Inst::MovFromVec { rd, rn, .. } | Inst::MovFromVecSigned { rd, rn, .. } => {
746            collector.reg_def(rd);
747            collector.reg_use(rn);
748        }
749        Inst::VecDup { rd, rn, .. } => {
750            collector.reg_def(rd);
751            collector.reg_use(rn);
752        }
753        Inst::VecDupFromFpu { rd, rn, .. } => {
754            collector.reg_def(rd);
755            collector.reg_use(rn);
756        }
757        Inst::VecDupFPImm { rd, .. } => {
758            collector.reg_def(rd);
759        }
760        Inst::VecDupImm { rd, .. } => {
761            collector.reg_def(rd);
762        }
763        Inst::VecExtend { rd, rn, .. } => {
764            collector.reg_def(rd);
765            collector.reg_use(rn);
766        }
767        Inst::VecMovElement { rd, ri, rn, .. } => {
768            collector.reg_reuse_def(rd, 1); // `rd` == `ri`.
769            collector.reg_use(ri);
770            collector.reg_use(rn);
771        }
772        Inst::VecRRLong { rd, rn, .. } => {
773            collector.reg_def(rd);
774            collector.reg_use(rn);
775        }
776        Inst::VecRRNarrowLow { rd, rn, .. } => {
777            collector.reg_use(rn);
778            collector.reg_def(rd);
779        }
780        Inst::VecRRNarrowHigh { rd, ri, rn, .. } => {
781            collector.reg_use(rn);
782            collector.reg_reuse_def(rd, 2); // `rd` == `ri`.
783            collector.reg_use(ri);
784        }
785        Inst::VecRRPair { rd, rn, .. } => {
786            collector.reg_def(rd);
787            collector.reg_use(rn);
788        }
789        Inst::VecRRRLong { rd, rn, rm, .. } => {
790            collector.reg_def(rd);
791            collector.reg_use(rn);
792            collector.reg_use(rm);
793        }
794        Inst::VecRRRLongMod { rd, ri, rn, rm, .. } => {
795            collector.reg_reuse_def(rd, 1); // `rd` == `ri`.
796            collector.reg_use(ri);
797            collector.reg_use(rn);
798            collector.reg_use(rm);
799        }
800        Inst::VecRRPairLong { rd, rn, .. } => {
801            collector.reg_def(rd);
802            collector.reg_use(rn);
803        }
804        Inst::VecRRR { rd, rn, rm, .. } => {
805            collector.reg_def(rd);
806            collector.reg_use(rn);
807            collector.reg_use(rm);
808        }
809        Inst::VecRRRMod { rd, ri, rn, rm, .. } | Inst::VecFmlaElem { rd, ri, rn, rm, .. } => {
810            collector.reg_reuse_def(rd, 1); // `rd` == `ri`.
811            collector.reg_use(ri);
812            collector.reg_use(rn);
813            collector.reg_use(rm);
814        }
815        Inst::MovToNZCV { rn } => {
816            collector.reg_use(rn);
817        }
818        Inst::MovFromNZCV { rd } => {
819            collector.reg_def(rd);
820        }
821        Inst::Extend { rd, rn, .. } => {
822            collector.reg_def(rd);
823            collector.reg_use(rn);
824        }
825        Inst::Args { args } => {
826            for ArgPair { vreg, preg } in args {
827                collector.reg_fixed_def(vreg, *preg);
828            }
829        }
830        Inst::Rets { rets } => {
831            for RetPair { vreg, preg } in rets {
832                collector.reg_fixed_use(vreg, *preg);
833            }
834        }
835        Inst::Ret { .. } | Inst::AuthenticatedRet { .. } => {}
836        Inst::Jump { .. } => {}
837        Inst::Call { info, .. } => {
838            let CallInfo { uses, defs, .. } = &mut **info;
839            for CallArgPair { vreg, preg } in uses {
840                collector.reg_fixed_use(vreg, *preg);
841            }
842            for CallRetPair { vreg, location } in defs {
843                match location {
844                    RetLocation::Reg(preg, ..) => collector.reg_fixed_def(vreg, *preg),
845                    RetLocation::Stack(..) => collector.any_def(vreg),
846                }
847            }
848            collector.reg_clobbers(info.clobbers);
849        }
850        Inst::CallInd { info, .. } => {
851            let CallInfo {
852                dest, uses, defs, ..
853            } = &mut **info;
854            collector.reg_use(dest);
855            for CallArgPair { vreg, preg } in uses {
856                collector.reg_fixed_use(vreg, *preg);
857            }
858            for CallRetPair { vreg, location } in defs {
859                match location {
860                    RetLocation::Reg(preg, ..) => collector.reg_fixed_def(vreg, *preg),
861                    RetLocation::Stack(..) => collector.any_def(vreg),
862                }
863            }
864            collector.reg_clobbers(info.clobbers);
865        }
866        Inst::ReturnCall { info } => {
867            for CallArgPair { vreg, preg } in &mut info.uses {
868                collector.reg_fixed_use(vreg, *preg);
869            }
870        }
871        Inst::ReturnCallInd { info } => {
872            // TODO(https://github.com/bytecodealliance/regalloc2/issues/145):
873            // This shouldn't be a fixed register constraint, but it's not clear how to pick a
874            // register that won't be clobbered by the callee-save restore code emitted with a
875            // return_call_indirect.
876            collector.reg_fixed_use(&mut info.dest, xreg(1));
877            for CallArgPair { vreg, preg } in &mut info.uses {
878                collector.reg_fixed_use(vreg, *preg);
879            }
880        }
881        Inst::CondBr { kind, .. } => match kind {
882            CondBrKind::Zero(rt, _) | CondBrKind::NotZero(rt, _) => collector.reg_use(rt),
883            CondBrKind::Cond(_) => {}
884        },
885        Inst::TestBitAndBranch { rn, .. } => {
886            collector.reg_use(rn);
887        }
888        Inst::IndirectBr { rn, .. } => {
889            collector.reg_use(rn);
890        }
891        Inst::Nop0 | Inst::Nop4 => {}
892        Inst::Brk => {}
893        Inst::Udf { .. } => {}
894        Inst::TrapIf { kind, .. } => match kind {
895            CondBrKind::Zero(rt, _) | CondBrKind::NotZero(rt, _) => collector.reg_use(rt),
896            CondBrKind::Cond(_) => {}
897        },
898        Inst::Adr { rd, .. } | Inst::Adrp { rd, .. } => {
899            collector.reg_def(rd);
900        }
901        Inst::Word4 { .. } | Inst::Word8 { .. } => {}
902        Inst::JTSequence {
903            ridx, rtmp1, rtmp2, ..
904        } => {
905            collector.reg_use(ridx);
906            collector.reg_early_def(rtmp1);
907            collector.reg_early_def(rtmp2);
908        }
909        Inst::LoadExtName { rd, .. } => {
910            collector.reg_def(rd);
911        }
912        Inst::LoadAddr { rd, mem } => {
913            collector.reg_def(rd);
914            memarg_operands(mem, collector);
915        }
916        Inst::Paci { .. } | Inst::Xpaclri => {
917            // Neither LR nor SP is an allocatable register, so there is no need
918            // to do anything.
919        }
920        Inst::Bti { .. } => {}
921
922        Inst::ElfTlsGetAddr { rd, tmp, .. } => {
923            // TLSDESC has a very neat calling convention. It is required to preserve
924            // all registers except x0 and x30. X30 is non allocatable in cranelift since
925            // its the link register.
926            //
927            // Additionally we need a second register as a temporary register for the
928            // TLSDESC sequence. This register can be any register other than x0 (and x30).
929            collector.reg_fixed_def(rd, regs::xreg(0));
930            collector.reg_early_def(tmp);
931        }
932        Inst::MachOTlsGetAddr { rd, .. } => {
933            collector.reg_fixed_def(rd, regs::xreg(0));
934            let mut clobbers =
935                AArch64MachineDeps::get_regs_clobbered_by_call(CallConv::AppleAarch64, false);
936            clobbers.remove(regs::xreg_preg(0));
937            collector.reg_clobbers(clobbers);
938        }
939        Inst::Unwind { .. } => {}
940        Inst::EmitIsland { .. } => {}
941        Inst::DummyUse { reg } => {
942            collector.reg_use(reg);
943        }
944        Inst::StackProbeLoop { start, end, .. } => {
945            collector.reg_early_def(start);
946            collector.reg_use(end);
947        }
948    }
949}
950
951//=============================================================================
952// Instructions: misc functions and external interface
953
954impl MachInst for Inst {
955    type ABIMachineSpec = AArch64MachineDeps;
956    type LabelUse = LabelUse;
957
958    // "CLIF" in hex, to make the trap recognizable during
959    // debugging.
960    const TRAP_OPCODE: &'static [u8] = &0xc11f_u32.to_le_bytes();
961
962    fn get_operands(&mut self, collector: &mut impl OperandVisitor) {
963        aarch64_get_operands(self, collector);
964    }
965
966    fn is_move(&self) -> Option<(Writable<Reg>, Reg)> {
967        match self {
968            &Inst::Mov {
969                size: OperandSize::Size64,
970                rd,
971                rm,
972            } => Some((rd, rm)),
973            &Inst::FpuMove64 { rd, rn } => Some((rd, rn)),
974            &Inst::FpuMove128 { rd, rn } => Some((rd, rn)),
975            _ => None,
976        }
977    }
978
979    fn is_included_in_clobbers(&self) -> bool {
980        let (caller, callee) = match self {
981            Inst::Args { .. } => return false,
982            Inst::Call { info } if info.try_call_info.is_some() => return true,
983            Inst::CallInd { info } if info.try_call_info.is_some() => return true,
984            Inst::Call { info } => (info.caller_conv, info.callee_conv),
985            Inst::CallInd { info } => (info.caller_conv, info.callee_conv),
986            _ => return true,
987        };
988
989        // We exclude call instructions from the clobber-set when they are calls
990        // from caller to callee that both clobber the same register (such as
991        // using the same or similar ABIs). Such calls cannot possibly force any
992        // new registers to be saved in the prologue, because anything that the
993        // callee clobbers, the caller is also allowed to clobber. This both
994        // saves work and enables us to more precisely follow the
995        // half-caller-save, half-callee-save SysV ABI for some vector
996        // registers.
997        //
998        // See the note in [crate::isa::aarch64::abi::is_caller_save_reg] for
999        // more information on this ABI-implementation hack.
1000        let caller_clobbers = AArch64MachineDeps::get_regs_clobbered_by_call(caller, false);
1001        let callee_clobbers = AArch64MachineDeps::get_regs_clobbered_by_call(callee, false);
1002
1003        let mut all_clobbers = caller_clobbers;
1004        all_clobbers.union_from(callee_clobbers);
1005        all_clobbers != caller_clobbers
1006    }
1007
1008    fn is_trap(&self) -> bool {
1009        match self {
1010            Self::Udf { .. } => true,
1011            _ => false,
1012        }
1013    }
1014
1015    fn is_args(&self) -> bool {
1016        match self {
1017            Self::Args { .. } => true,
1018            _ => false,
1019        }
1020    }
1021
1022    fn is_term(&self) -> MachTerminator {
1023        match self {
1024            &Inst::Rets { .. } => MachTerminator::Ret,
1025            &Inst::ReturnCall { .. } | &Inst::ReturnCallInd { .. } => MachTerminator::RetCall,
1026            &Inst::Jump { .. } => MachTerminator::Branch,
1027            &Inst::CondBr { .. } => MachTerminator::Branch,
1028            &Inst::TestBitAndBranch { .. } => MachTerminator::Branch,
1029            &Inst::IndirectBr { .. } => MachTerminator::Branch,
1030            &Inst::JTSequence { .. } => MachTerminator::Branch,
1031            &Inst::Call { ref info } if info.try_call_info.is_some() => MachTerminator::Branch,
1032            &Inst::CallInd { ref info } if info.try_call_info.is_some() => MachTerminator::Branch,
1033            _ => MachTerminator::None,
1034        }
1035    }
1036
1037    fn is_mem_access(&self) -> bool {
1038        match self {
1039            &Inst::ULoad8 { .. }
1040            | &Inst::SLoad8 { .. }
1041            | &Inst::ULoad16 { .. }
1042            | &Inst::SLoad16 { .. }
1043            | &Inst::ULoad32 { .. }
1044            | &Inst::SLoad32 { .. }
1045            | &Inst::ULoad64 { .. }
1046            | &Inst::LoadP64 { .. }
1047            | &Inst::FpuLoad16 { .. }
1048            | &Inst::FpuLoad32 { .. }
1049            | &Inst::FpuLoad64 { .. }
1050            | &Inst::FpuLoad128 { .. }
1051            | &Inst::FpuLoadP64 { .. }
1052            | &Inst::FpuLoadP128 { .. }
1053            | &Inst::Store8 { .. }
1054            | &Inst::Store16 { .. }
1055            | &Inst::Store32 { .. }
1056            | &Inst::Store64 { .. }
1057            | &Inst::StoreP64 { .. }
1058            | &Inst::FpuStore16 { .. }
1059            | &Inst::FpuStore32 { .. }
1060            | &Inst::FpuStore64 { .. }
1061            | &Inst::FpuStore128 { .. } => true,
1062            // TODO: verify this carefully
1063            _ => false,
1064        }
1065    }
1066
1067    fn gen_move(to_reg: Writable<Reg>, from_reg: Reg, ty: Type) -> Inst {
1068        let bits = ty.bits();
1069
1070        assert!(bits <= 128);
1071        assert!(to_reg.to_reg().class() == from_reg.class());
1072        match from_reg.class() {
1073            RegClass::Int => Inst::Mov {
1074                size: OperandSize::Size64,
1075                rd: to_reg,
1076                rm: from_reg,
1077            },
1078            RegClass::Float => {
1079                if bits > 64 {
1080                    Inst::FpuMove128 {
1081                        rd: to_reg,
1082                        rn: from_reg,
1083                    }
1084                } else {
1085                    Inst::FpuMove64 {
1086                        rd: to_reg,
1087                        rn: from_reg,
1088                    }
1089                }
1090            }
1091            RegClass::Vector => unreachable!(),
1092        }
1093    }
1094
1095    fn is_safepoint(&self) -> bool {
1096        match self {
1097            Inst::Call { .. } | Inst::CallInd { .. } => true,
1098            _ => false,
1099        }
1100    }
1101
1102    fn gen_dummy_use(reg: Reg) -> Inst {
1103        Inst::DummyUse { reg }
1104    }
1105
1106    fn gen_nop(preferred_size: usize) -> Inst {
1107        if preferred_size == 0 {
1108            return Inst::Nop0;
1109        }
1110        // We can't give a NOP (or any insn) < 4 bytes.
1111        assert!(preferred_size >= 4);
1112        Inst::Nop4
1113    }
1114
1115    fn rc_for_type(ty: Type) -> CodegenResult<(&'static [RegClass], &'static [Type])> {
1116        match ty {
1117            I8 => Ok((&[RegClass::Int], &[I8])),
1118            I16 => Ok((&[RegClass::Int], &[I16])),
1119            I32 => Ok((&[RegClass::Int], &[I32])),
1120            I64 => Ok((&[RegClass::Int], &[I64])),
1121            F16 => Ok((&[RegClass::Float], &[F16])),
1122            F32 => Ok((&[RegClass::Float], &[F32])),
1123            F64 => Ok((&[RegClass::Float], &[F64])),
1124            F128 => Ok((&[RegClass::Float], &[F128])),
1125            I128 => Ok((&[RegClass::Int, RegClass::Int], &[I64, I64])),
1126            _ if ty.is_vector() => {
1127                assert!(ty.bits() <= 128);
1128                Ok((&[RegClass::Float], &[I8X16]))
1129            }
1130            _ if ty.is_dynamic_vector() => Ok((&[RegClass::Float], &[I8X16])),
1131            _ => Err(CodegenError::Unsupported(format!(
1132                "Unexpected SSA-value type: {ty}"
1133            ))),
1134        }
1135    }
1136
1137    fn canonical_type_for_rc(rc: RegClass) -> Type {
1138        match rc {
1139            RegClass::Float => types::I8X16,
1140            RegClass::Int => types::I64,
1141            RegClass::Vector => unreachable!(),
1142        }
1143    }
1144
1145    fn gen_jump(target: MachLabel) -> Inst {
1146        Inst::Jump {
1147            dest: BranchTarget::Label(target),
1148        }
1149    }
1150
1151    fn worst_case_size() -> CodeOffset {
1152        // The maximum size, in bytes, of any `Inst`'s emitted code. We have at least one case of
1153        // an 8-instruction sequence (saturating int-to-float conversions) with three embedded
1154        // 64-bit f64 constants.
1155        //
1156        // Note that inline jump-tables handle island/pool insertion separately, so we do not need
1157        // to account for them here (otherwise the worst case would be 2^31 * 4, clearly not
1158        // feasible for other reasons).
1159        44
1160    }
1161
1162    fn ref_type_regclass(_: &settings::Flags) -> RegClass {
1163        RegClass::Int
1164    }
1165
1166    fn gen_block_start(
1167        is_indirect_branch_target: bool,
1168        is_forward_edge_cfi_enabled: bool,
1169    ) -> Option<Self> {
1170        if is_indirect_branch_target && is_forward_edge_cfi_enabled {
1171            Some(Inst::Bti {
1172                targets: BranchTargetType::J,
1173            })
1174        } else {
1175            None
1176        }
1177    }
1178
1179    fn function_alignment() -> FunctionAlignment {
1180        // We use 32-byte alignment for performance reasons, but for correctness
1181        // we would only need 4-byte alignment.
1182        FunctionAlignment {
1183            minimum: 4,
1184            preferred: 32,
1185        }
1186    }
1187}
1188
1189//=============================================================================
1190// Pretty-printing of instructions.
1191
1192fn mem_finalize_for_show(mem: &AMode, access_ty: Type, state: &EmitState) -> (String, String) {
1193    let (mem_insts, mem) = mem_finalize(None, mem, access_ty, state);
1194    let mut mem_str = mem_insts
1195        .into_iter()
1196        .map(|inst| inst.print_with_state(&mut EmitState::default()))
1197        .collect::<Vec<_>>()
1198        .join(" ; ");
1199    if !mem_str.is_empty() {
1200        mem_str += " ; ";
1201    }
1202
1203    let mem = mem.pretty_print(access_ty.bytes() as u8);
1204    (mem_str, mem)
1205}
1206
1207fn pretty_print_try_call(info: &TryCallInfo) -> String {
1208    let dests = info
1209        .exception_dests
1210        .iter()
1211        .map(|(tag, label)| format!("{tag:?}: {label:?}"))
1212        .collect::<Vec<_>>()
1213        .join(", ");
1214    format!("; b {:?}; catch [{dests}]", info.continuation)
1215}
1216
1217impl Inst {
1218    fn print_with_state(&self, state: &mut EmitState) -> String {
1219        fn op_name(alu_op: ALUOp) -> &'static str {
1220            match alu_op {
1221                ALUOp::Add => "add",
1222                ALUOp::Sub => "sub",
1223                ALUOp::Orr => "orr",
1224                ALUOp::And => "and",
1225                ALUOp::AndS => "ands",
1226                ALUOp::Eor => "eor",
1227                ALUOp::AddS => "adds",
1228                ALUOp::SubS => "subs",
1229                ALUOp::SMulH => "smulh",
1230                ALUOp::UMulH => "umulh",
1231                ALUOp::SDiv => "sdiv",
1232                ALUOp::UDiv => "udiv",
1233                ALUOp::AndNot => "bic",
1234                ALUOp::OrrNot => "orn",
1235                ALUOp::EorNot => "eon",
1236                ALUOp::Extr => "extr",
1237                ALUOp::Lsr => "lsr",
1238                ALUOp::Asr => "asr",
1239                ALUOp::Lsl => "lsl",
1240                ALUOp::Adc => "adc",
1241                ALUOp::AdcS => "adcs",
1242                ALUOp::Sbc => "sbc",
1243                ALUOp::SbcS => "sbcs",
1244            }
1245        }
1246
1247        match self {
1248            &Inst::Nop0 => "nop-zero-len".to_string(),
1249            &Inst::Nop4 => "nop".to_string(),
1250            &Inst::AluRRR {
1251                alu_op,
1252                size,
1253                rd,
1254                rn,
1255                rm,
1256            } => {
1257                let op = op_name(alu_op);
1258                let rd = pretty_print_ireg(rd.to_reg(), size);
1259                let rn = pretty_print_ireg(rn, size);
1260                let rm = pretty_print_ireg(rm, size);
1261                format!("{op} {rd}, {rn}, {rm}")
1262            }
1263            &Inst::AluRRRR {
1264                alu_op,
1265                size,
1266                rd,
1267                rn,
1268                rm,
1269                ra,
1270            } => {
1271                let (op, da_size) = match alu_op {
1272                    ALUOp3::MAdd => ("madd", size),
1273                    ALUOp3::MSub => ("msub", size),
1274                    ALUOp3::UMAddL => ("umaddl", OperandSize::Size64),
1275                    ALUOp3::SMAddL => ("smaddl", OperandSize::Size64),
1276                };
1277                let rd = pretty_print_ireg(rd.to_reg(), da_size);
1278                let rn = pretty_print_ireg(rn, size);
1279                let rm = pretty_print_ireg(rm, size);
1280                let ra = pretty_print_ireg(ra, da_size);
1281
1282                format!("{op} {rd}, {rn}, {rm}, {ra}")
1283            }
1284            &Inst::AluRRImm12 {
1285                alu_op,
1286                size,
1287                rd,
1288                rn,
1289                ref imm12,
1290            } => {
1291                let op = op_name(alu_op);
1292                let rd = pretty_print_ireg(rd.to_reg(), size);
1293                let rn = pretty_print_ireg(rn, size);
1294
1295                if imm12.bits == 0 && alu_op == ALUOp::Add && size.is64() {
1296                    // special-case MOV (used for moving into SP).
1297                    format!("mov {rd}, {rn}")
1298                } else {
1299                    let imm12 = imm12.pretty_print(0);
1300                    format!("{op} {rd}, {rn}, {imm12}")
1301                }
1302            }
1303            &Inst::AluRRImmLogic {
1304                alu_op,
1305                size,
1306                rd,
1307                rn,
1308                ref imml,
1309            } => {
1310                let op = op_name(alu_op);
1311                let rd = pretty_print_ireg(rd.to_reg(), size);
1312                let rn = pretty_print_ireg(rn, size);
1313                let imml = imml.pretty_print(0);
1314                format!("{op} {rd}, {rn}, {imml}")
1315            }
1316            &Inst::AluRRImmShift {
1317                alu_op,
1318                size,
1319                rd,
1320                rn,
1321                ref immshift,
1322            } => {
1323                let op = op_name(alu_op);
1324                let rd = pretty_print_ireg(rd.to_reg(), size);
1325                let rn = pretty_print_ireg(rn, size);
1326                let immshift = immshift.pretty_print(0);
1327                format!("{op} {rd}, {rn}, {immshift}")
1328            }
1329            &Inst::AluRRRShift {
1330                alu_op,
1331                size,
1332                rd,
1333                rn,
1334                rm,
1335                ref shiftop,
1336            } => {
1337                let op = op_name(alu_op);
1338                let rd = pretty_print_ireg(rd.to_reg(), size);
1339                let rn = pretty_print_ireg(rn, size);
1340                let rm = pretty_print_ireg(rm, size);
1341                let shiftop = shiftop.pretty_print(0);
1342                format!("{op} {rd}, {rn}, {rm}, {shiftop}")
1343            }
1344            &Inst::AluRRRExtend {
1345                alu_op,
1346                size,
1347                rd,
1348                rn,
1349                rm,
1350                ref extendop,
1351            } => {
1352                let op = op_name(alu_op);
1353                let rd = pretty_print_ireg(rd.to_reg(), size);
1354                let rn = pretty_print_ireg(rn, size);
1355                let rm = pretty_print_ireg(rm, size);
1356                let extendop = extendop.pretty_print(0);
1357                format!("{op} {rd}, {rn}, {rm}, {extendop}")
1358            }
1359            &Inst::BitRR { op, size, rd, rn } => {
1360                let op = op.op_str();
1361                let rd = pretty_print_ireg(rd.to_reg(), size);
1362                let rn = pretty_print_ireg(rn, size);
1363                format!("{op} {rd}, {rn}")
1364            }
1365            &Inst::ULoad8 { rd, ref mem, .. }
1366            | &Inst::SLoad8 { rd, ref mem, .. }
1367            | &Inst::ULoad16 { rd, ref mem, .. }
1368            | &Inst::SLoad16 { rd, ref mem, .. }
1369            | &Inst::ULoad32 { rd, ref mem, .. }
1370            | &Inst::SLoad32 { rd, ref mem, .. }
1371            | &Inst::ULoad64 { rd, ref mem, .. } => {
1372                let is_unscaled = match &mem {
1373                    &AMode::Unscaled { .. } => true,
1374                    _ => false,
1375                };
1376                let (op, size) = match (self, is_unscaled) {
1377                    (&Inst::ULoad8 { .. }, false) => ("ldrb", OperandSize::Size32),
1378                    (&Inst::ULoad8 { .. }, true) => ("ldurb", OperandSize::Size32),
1379                    (&Inst::SLoad8 { .. }, false) => ("ldrsb", OperandSize::Size64),
1380                    (&Inst::SLoad8 { .. }, true) => ("ldursb", OperandSize::Size64),
1381                    (&Inst::ULoad16 { .. }, false) => ("ldrh", OperandSize::Size32),
1382                    (&Inst::ULoad16 { .. }, true) => ("ldurh", OperandSize::Size32),
1383                    (&Inst::SLoad16 { .. }, false) => ("ldrsh", OperandSize::Size64),
1384                    (&Inst::SLoad16 { .. }, true) => ("ldursh", OperandSize::Size64),
1385                    (&Inst::ULoad32 { .. }, false) => ("ldr", OperandSize::Size32),
1386                    (&Inst::ULoad32 { .. }, true) => ("ldur", OperandSize::Size32),
1387                    (&Inst::SLoad32 { .. }, false) => ("ldrsw", OperandSize::Size64),
1388                    (&Inst::SLoad32 { .. }, true) => ("ldursw", OperandSize::Size64),
1389                    (&Inst::ULoad64 { .. }, false) => ("ldr", OperandSize::Size64),
1390                    (&Inst::ULoad64 { .. }, true) => ("ldur", OperandSize::Size64),
1391                    _ => unreachable!(),
1392                };
1393
1394                let rd = pretty_print_ireg(rd.to_reg(), size);
1395                let mem = mem.clone();
1396                let access_ty = self.mem_type().unwrap();
1397                let (mem_str, mem) = mem_finalize_for_show(&mem, access_ty, state);
1398
1399                format!("{mem_str}{op} {rd}, {mem}")
1400            }
1401            &Inst::Store8 { rd, ref mem, .. }
1402            | &Inst::Store16 { rd, ref mem, .. }
1403            | &Inst::Store32 { rd, ref mem, .. }
1404            | &Inst::Store64 { rd, ref mem, .. } => {
1405                let is_unscaled = match &mem {
1406                    &AMode::Unscaled { .. } => true,
1407                    _ => false,
1408                };
1409                let (op, size) = match (self, is_unscaled) {
1410                    (&Inst::Store8 { .. }, false) => ("strb", OperandSize::Size32),
1411                    (&Inst::Store8 { .. }, true) => ("sturb", OperandSize::Size32),
1412                    (&Inst::Store16 { .. }, false) => ("strh", OperandSize::Size32),
1413                    (&Inst::Store16 { .. }, true) => ("sturh", OperandSize::Size32),
1414                    (&Inst::Store32 { .. }, false) => ("str", OperandSize::Size32),
1415                    (&Inst::Store32 { .. }, true) => ("stur", OperandSize::Size32),
1416                    (&Inst::Store64 { .. }, false) => ("str", OperandSize::Size64),
1417                    (&Inst::Store64 { .. }, true) => ("stur", OperandSize::Size64),
1418                    _ => unreachable!(),
1419                };
1420
1421                let rd = pretty_print_ireg(rd, size);
1422                let mem = mem.clone();
1423                let access_ty = self.mem_type().unwrap();
1424                let (mem_str, mem) = mem_finalize_for_show(&mem, access_ty, state);
1425
1426                format!("{mem_str}{op} {rd}, {mem}")
1427            }
1428            &Inst::StoreP64 {
1429                rt, rt2, ref mem, ..
1430            } => {
1431                let rt = pretty_print_ireg(rt, OperandSize::Size64);
1432                let rt2 = pretty_print_ireg(rt2, OperandSize::Size64);
1433                let mem = mem.clone();
1434                let mem = mem.pretty_print_default();
1435                format!("stp {rt}, {rt2}, {mem}")
1436            }
1437            &Inst::LoadP64 {
1438                rt, rt2, ref mem, ..
1439            } => {
1440                let rt = pretty_print_ireg(rt.to_reg(), OperandSize::Size64);
1441                let rt2 = pretty_print_ireg(rt2.to_reg(), OperandSize::Size64);
1442                let mem = mem.clone();
1443                let mem = mem.pretty_print_default();
1444                format!("ldp {rt}, {rt2}, {mem}")
1445            }
1446            &Inst::Mov { size, rd, rm } => {
1447                let rd = pretty_print_ireg(rd.to_reg(), size);
1448                let rm = pretty_print_ireg(rm, size);
1449                format!("mov {rd}, {rm}")
1450            }
1451            &Inst::MovFromPReg { rd, rm } => {
1452                let rd = pretty_print_ireg(rd.to_reg(), OperandSize::Size64);
1453                let rm = show_ireg_sized(rm.into(), OperandSize::Size64);
1454                format!("mov {rd}, {rm}")
1455            }
1456            &Inst::MovToPReg { rd, rm } => {
1457                let rd = show_ireg_sized(rd.into(), OperandSize::Size64);
1458                let rm = pretty_print_ireg(rm, OperandSize::Size64);
1459                format!("mov {rd}, {rm}")
1460            }
1461            &Inst::MovWide {
1462                op,
1463                rd,
1464                ref imm,
1465                size,
1466            } => {
1467                let op_str = match op {
1468                    MoveWideOp::MovZ => "movz",
1469                    MoveWideOp::MovN => "movn",
1470                };
1471                let rd = pretty_print_ireg(rd.to_reg(), size);
1472                let imm = imm.pretty_print(0);
1473                format!("{op_str} {rd}, {imm}")
1474            }
1475            &Inst::MovK {
1476                rd,
1477                rn,
1478                ref imm,
1479                size,
1480            } => {
1481                let rn = pretty_print_ireg(rn, size);
1482                let rd = pretty_print_ireg(rd.to_reg(), size);
1483                let imm = imm.pretty_print(0);
1484                format!("movk {rd}, {rn}, {imm}")
1485            }
1486            &Inst::CSel { rd, rn, rm, cond } => {
1487                let rd = pretty_print_ireg(rd.to_reg(), OperandSize::Size64);
1488                let rn = pretty_print_ireg(rn, OperandSize::Size64);
1489                let rm = pretty_print_ireg(rm, OperandSize::Size64);
1490                let cond = cond.pretty_print(0);
1491                format!("csel {rd}, {rn}, {rm}, {cond}")
1492            }
1493            &Inst::CSNeg { rd, rn, rm, cond } => {
1494                let rd = pretty_print_ireg(rd.to_reg(), OperandSize::Size64);
1495                let rn = pretty_print_ireg(rn, OperandSize::Size64);
1496                let rm = pretty_print_ireg(rm, OperandSize::Size64);
1497                let cond = cond.pretty_print(0);
1498                format!("csneg {rd}, {rn}, {rm}, {cond}")
1499            }
1500            &Inst::CSet { rd, cond } => {
1501                let rd = pretty_print_ireg(rd.to_reg(), OperandSize::Size64);
1502                let cond = cond.pretty_print(0);
1503                format!("cset {rd}, {cond}")
1504            }
1505            &Inst::CSetm { rd, cond } => {
1506                let rd = pretty_print_ireg(rd.to_reg(), OperandSize::Size64);
1507                let cond = cond.pretty_print(0);
1508                format!("csetm {rd}, {cond}")
1509            }
1510            &Inst::CCmp {
1511                size,
1512                rn,
1513                rm,
1514                nzcv,
1515                cond,
1516            } => {
1517                let rn = pretty_print_ireg(rn, size);
1518                let rm = pretty_print_ireg(rm, size);
1519                let nzcv = nzcv.pretty_print(0);
1520                let cond = cond.pretty_print(0);
1521                format!("ccmp {rn}, {rm}, {nzcv}, {cond}")
1522            }
1523            &Inst::CCmpImm {
1524                size,
1525                rn,
1526                imm,
1527                nzcv,
1528                cond,
1529            } => {
1530                let rn = pretty_print_ireg(rn, size);
1531                let imm = imm.pretty_print(0);
1532                let nzcv = nzcv.pretty_print(0);
1533                let cond = cond.pretty_print(0);
1534                format!("ccmp {rn}, {imm}, {nzcv}, {cond}")
1535            }
1536            &Inst::AtomicRMW {
1537                rs, rt, rn, ty, op, ..
1538            } => {
1539                let op = match op {
1540                    AtomicRMWOp::Add => "ldaddal",
1541                    AtomicRMWOp::Clr => "ldclral",
1542                    AtomicRMWOp::Eor => "ldeoral",
1543                    AtomicRMWOp::Set => "ldsetal",
1544                    AtomicRMWOp::Smax => "ldsmaxal",
1545                    AtomicRMWOp::Umax => "ldumaxal",
1546                    AtomicRMWOp::Smin => "ldsminal",
1547                    AtomicRMWOp::Umin => "lduminal",
1548                    AtomicRMWOp::Swp => "swpal",
1549                };
1550
1551                let size = OperandSize::from_ty(ty);
1552                let rs = pretty_print_ireg(rs, size);
1553                let rt = pretty_print_ireg(rt.to_reg(), size);
1554                let rn = pretty_print_ireg(rn, OperandSize::Size64);
1555
1556                let ty_suffix = match ty {
1557                    I8 => "b",
1558                    I16 => "h",
1559                    _ => "",
1560                };
1561                format!("{op}{ty_suffix} {rs}, {rt}, [{rn}]")
1562            }
1563            &Inst::AtomicRMWLoop {
1564                ty,
1565                op,
1566                addr,
1567                operand,
1568                oldval,
1569                scratch1,
1570                scratch2,
1571                ..
1572            } => {
1573                let op = match op {
1574                    AtomicRMWLoopOp::Add => "add",
1575                    AtomicRMWLoopOp::Sub => "sub",
1576                    AtomicRMWLoopOp::Eor => "eor",
1577                    AtomicRMWLoopOp::Orr => "orr",
1578                    AtomicRMWLoopOp::And => "and",
1579                    AtomicRMWLoopOp::Nand => "nand",
1580                    AtomicRMWLoopOp::Smin => "smin",
1581                    AtomicRMWLoopOp::Smax => "smax",
1582                    AtomicRMWLoopOp::Umin => "umin",
1583                    AtomicRMWLoopOp::Umax => "umax",
1584                    AtomicRMWLoopOp::Xchg => "xchg",
1585                };
1586                let addr = pretty_print_ireg(addr, OperandSize::Size64);
1587                let operand = pretty_print_ireg(operand, OperandSize::Size64);
1588                let oldval = pretty_print_ireg(oldval.to_reg(), OperandSize::Size64);
1589                let scratch1 = pretty_print_ireg(scratch1.to_reg(), OperandSize::Size64);
1590                let scratch2 = pretty_print_ireg(scratch2.to_reg(), OperandSize::Size64);
1591                format!(
1592                    "atomic_rmw_loop_{}_{} addr={} operand={} oldval={} scratch1={} scratch2={}",
1593                    op,
1594                    ty.bits(),
1595                    addr,
1596                    operand,
1597                    oldval,
1598                    scratch1,
1599                    scratch2,
1600                )
1601            }
1602            &Inst::AtomicCAS {
1603                rd, rs, rt, rn, ty, ..
1604            } => {
1605                let op = match ty {
1606                    I8 => "casalb",
1607                    I16 => "casalh",
1608                    I32 | I64 => "casal",
1609                    _ => panic!("Unsupported type: {ty}"),
1610                };
1611                let size = OperandSize::from_ty(ty);
1612                let rd = pretty_print_ireg(rd.to_reg(), size);
1613                let rs = pretty_print_ireg(rs, size);
1614                let rt = pretty_print_ireg(rt, size);
1615                let rn = pretty_print_ireg(rn, OperandSize::Size64);
1616
1617                format!("{op} {rd}, {rs}, {rt}, [{rn}]")
1618            }
1619            &Inst::AtomicCASLoop {
1620                ty,
1621                addr,
1622                expected,
1623                replacement,
1624                oldval,
1625                scratch,
1626                ..
1627            } => {
1628                let addr = pretty_print_ireg(addr, OperandSize::Size64);
1629                let expected = pretty_print_ireg(expected, OperandSize::Size64);
1630                let replacement = pretty_print_ireg(replacement, OperandSize::Size64);
1631                let oldval = pretty_print_ireg(oldval.to_reg(), OperandSize::Size64);
1632                let scratch = pretty_print_ireg(scratch.to_reg(), OperandSize::Size64);
1633                format!(
1634                    "atomic_cas_loop_{} addr={}, expect={}, replacement={}, oldval={}, scratch={}",
1635                    ty.bits(),
1636                    addr,
1637                    expected,
1638                    replacement,
1639                    oldval,
1640                    scratch,
1641                )
1642            }
1643            &Inst::LoadAcquire {
1644                access_ty, rt, rn, ..
1645            } => {
1646                let (op, ty) = match access_ty {
1647                    I8 => ("ldarb", I32),
1648                    I16 => ("ldarh", I32),
1649                    I32 => ("ldar", I32),
1650                    I64 => ("ldar", I64),
1651                    _ => panic!("Unsupported type: {access_ty}"),
1652                };
1653                let size = OperandSize::from_ty(ty);
1654                let rn = pretty_print_ireg(rn, OperandSize::Size64);
1655                let rt = pretty_print_ireg(rt.to_reg(), size);
1656                format!("{op} {rt}, [{rn}]")
1657            }
1658            &Inst::StoreRelease {
1659                access_ty, rt, rn, ..
1660            } => {
1661                let (op, ty) = match access_ty {
1662                    I8 => ("stlrb", I32),
1663                    I16 => ("stlrh", I32),
1664                    I32 => ("stlr", I32),
1665                    I64 => ("stlr", I64),
1666                    _ => panic!("Unsupported type: {access_ty}"),
1667                };
1668                let size = OperandSize::from_ty(ty);
1669                let rn = pretty_print_ireg(rn, OperandSize::Size64);
1670                let rt = pretty_print_ireg(rt, size);
1671                format!("{op} {rt}, [{rn}]")
1672            }
1673            &Inst::Fence {} => {
1674                format!("dmb ish")
1675            }
1676            &Inst::Csdb {} => {
1677                format!("csdb")
1678            }
1679            &Inst::FpuMove32 { rd, rn } => {
1680                let rd = pretty_print_vreg_scalar(rd.to_reg(), ScalarSize::Size32);
1681                let rn = pretty_print_vreg_scalar(rn, ScalarSize::Size32);
1682                format!("fmov {rd}, {rn}")
1683            }
1684            &Inst::FpuMove64 { rd, rn } => {
1685                let rd = pretty_print_vreg_scalar(rd.to_reg(), ScalarSize::Size64);
1686                let rn = pretty_print_vreg_scalar(rn, ScalarSize::Size64);
1687                format!("fmov {rd}, {rn}")
1688            }
1689            &Inst::FpuMove128 { rd, rn } => {
1690                let rd = pretty_print_reg(rd.to_reg());
1691                let rn = pretty_print_reg(rn);
1692                format!("mov {rd}.16b, {rn}.16b")
1693            }
1694            &Inst::FpuMoveFromVec { rd, rn, idx, size } => {
1695                let rd = pretty_print_vreg_scalar(rd.to_reg(), size.lane_size());
1696                let rn = pretty_print_vreg_element(rn, idx as usize, size.lane_size());
1697                format!("mov {rd}, {rn}")
1698            }
1699            &Inst::FpuExtend { rd, rn, size } => {
1700                let rd = pretty_print_vreg_scalar(rd.to_reg(), size);
1701                let rn = pretty_print_vreg_scalar(rn, size);
1702                format!("fmov {rd}, {rn}")
1703            }
1704            &Inst::FpuRR {
1705                fpu_op,
1706                size,
1707                rd,
1708                rn,
1709            } => {
1710                let op = match fpu_op {
1711                    FPUOp1::Abs => "fabs",
1712                    FPUOp1::Neg => "fneg",
1713                    FPUOp1::Sqrt => "fsqrt",
1714                    FPUOp1::Cvt32To64 | FPUOp1::Cvt64To32 => "fcvt",
1715                };
1716                let dst_size = match fpu_op {
1717                    FPUOp1::Cvt32To64 => ScalarSize::Size64,
1718                    FPUOp1::Cvt64To32 => ScalarSize::Size32,
1719                    _ => size,
1720                };
1721                let rd = pretty_print_vreg_scalar(rd.to_reg(), dst_size);
1722                let rn = pretty_print_vreg_scalar(rn, size);
1723                format!("{op} {rd}, {rn}")
1724            }
1725            &Inst::FpuRRR {
1726                fpu_op,
1727                size,
1728                rd,
1729                rn,
1730                rm,
1731            } => {
1732                let op = match fpu_op {
1733                    FPUOp2::Add => "fadd",
1734                    FPUOp2::Sub => "fsub",
1735                    FPUOp2::Mul => "fmul",
1736                    FPUOp2::Div => "fdiv",
1737                    FPUOp2::Max => "fmax",
1738                    FPUOp2::Min => "fmin",
1739                };
1740                let rd = pretty_print_vreg_scalar(rd.to_reg(), size);
1741                let rn = pretty_print_vreg_scalar(rn, size);
1742                let rm = pretty_print_vreg_scalar(rm, size);
1743                format!("{op} {rd}, {rn}, {rm}")
1744            }
1745            &Inst::FpuRRI { fpu_op, rd, rn } => {
1746                let (op, imm, vector) = match fpu_op {
1747                    FPUOpRI::UShr32(imm) => ("ushr", imm.pretty_print(0), true),
1748                    FPUOpRI::UShr64(imm) => ("ushr", imm.pretty_print(0), false),
1749                };
1750
1751                let (rd, rn) = if vector {
1752                    (
1753                        pretty_print_vreg_vector(rd.to_reg(), VectorSize::Size32x2),
1754                        pretty_print_vreg_vector(rn, VectorSize::Size32x2),
1755                    )
1756                } else {
1757                    (
1758                        pretty_print_vreg_scalar(rd.to_reg(), ScalarSize::Size64),
1759                        pretty_print_vreg_scalar(rn, ScalarSize::Size64),
1760                    )
1761                };
1762                format!("{op} {rd}, {rn}, {imm}")
1763            }
1764            &Inst::FpuRRIMod { fpu_op, rd, ri, rn } => {
1765                let (op, imm, vector) = match fpu_op {
1766                    FPUOpRIMod::Sli32(imm) => ("sli", imm.pretty_print(0), true),
1767                    FPUOpRIMod::Sli64(imm) => ("sli", imm.pretty_print(0), false),
1768                };
1769
1770                let (rd, ri, rn) = if vector {
1771                    (
1772                        pretty_print_vreg_vector(rd.to_reg(), VectorSize::Size32x2),
1773                        pretty_print_vreg_vector(ri, VectorSize::Size32x2),
1774                        pretty_print_vreg_vector(rn, VectorSize::Size32x2),
1775                    )
1776                } else {
1777                    (
1778                        pretty_print_vreg_scalar(rd.to_reg(), ScalarSize::Size64),
1779                        pretty_print_vreg_scalar(ri, ScalarSize::Size64),
1780                        pretty_print_vreg_scalar(rn, ScalarSize::Size64),
1781                    )
1782                };
1783                format!("{op} {rd}, {ri}, {rn}, {imm}")
1784            }
1785            &Inst::FpuRRRR {
1786                fpu_op,
1787                size,
1788                rd,
1789                rn,
1790                rm,
1791                ra,
1792            } => {
1793                let op = match fpu_op {
1794                    FPUOp3::MAdd => "fmadd",
1795                    FPUOp3::MSub => "fmsub",
1796                    FPUOp3::NMAdd => "fnmadd",
1797                    FPUOp3::NMSub => "fnmsub",
1798                };
1799                let rd = pretty_print_vreg_scalar(rd.to_reg(), size);
1800                let rn = pretty_print_vreg_scalar(rn, size);
1801                let rm = pretty_print_vreg_scalar(rm, size);
1802                let ra = pretty_print_vreg_scalar(ra, size);
1803                format!("{op} {rd}, {rn}, {rm}, {ra}")
1804            }
1805            &Inst::FpuCmp { size, rn, rm } => {
1806                let rn = pretty_print_vreg_scalar(rn, size);
1807                let rm = pretty_print_vreg_scalar(rm, size);
1808                format!("fcmp {rn}, {rm}")
1809            }
1810            &Inst::FpuLoad16 { rd, ref mem, .. } => {
1811                let rd = pretty_print_vreg_scalar(rd.to_reg(), ScalarSize::Size16);
1812                let mem = mem.clone();
1813                let access_ty = self.mem_type().unwrap();
1814                let (mem_str, mem) = mem_finalize_for_show(&mem, access_ty, state);
1815                format!("{mem_str}ldr {rd}, {mem}")
1816            }
1817            &Inst::FpuLoad32 { rd, ref mem, .. } => {
1818                let rd = pretty_print_vreg_scalar(rd.to_reg(), ScalarSize::Size32);
1819                let mem = mem.clone();
1820                let access_ty = self.mem_type().unwrap();
1821                let (mem_str, mem) = mem_finalize_for_show(&mem, access_ty, state);
1822                format!("{mem_str}ldr {rd}, {mem}")
1823            }
1824            &Inst::FpuLoad64 { rd, ref mem, .. } => {
1825                let rd = pretty_print_vreg_scalar(rd.to_reg(), ScalarSize::Size64);
1826                let mem = mem.clone();
1827                let access_ty = self.mem_type().unwrap();
1828                let (mem_str, mem) = mem_finalize_for_show(&mem, access_ty, state);
1829                format!("{mem_str}ldr {rd}, {mem}")
1830            }
1831            &Inst::FpuLoad128 { rd, ref mem, .. } => {
1832                let rd = pretty_print_reg(rd.to_reg());
1833                let rd = "q".to_string() + &rd[1..];
1834                let mem = mem.clone();
1835                let access_ty = self.mem_type().unwrap();
1836                let (mem_str, mem) = mem_finalize_for_show(&mem, access_ty, state);
1837                format!("{mem_str}ldr {rd}, {mem}")
1838            }
1839            &Inst::FpuStore16 { rd, ref mem, .. } => {
1840                let rd = pretty_print_vreg_scalar(rd, ScalarSize::Size16);
1841                let mem = mem.clone();
1842                let access_ty = self.mem_type().unwrap();
1843                let (mem_str, mem) = mem_finalize_for_show(&mem, access_ty, state);
1844                format!("{mem_str}str {rd}, {mem}")
1845            }
1846            &Inst::FpuStore32 { rd, ref mem, .. } => {
1847                let rd = pretty_print_vreg_scalar(rd, ScalarSize::Size32);
1848                let mem = mem.clone();
1849                let access_ty = self.mem_type().unwrap();
1850                let (mem_str, mem) = mem_finalize_for_show(&mem, access_ty, state);
1851                format!("{mem_str}str {rd}, {mem}")
1852            }
1853            &Inst::FpuStore64 { rd, ref mem, .. } => {
1854                let rd = pretty_print_vreg_scalar(rd, ScalarSize::Size64);
1855                let mem = mem.clone();
1856                let access_ty = self.mem_type().unwrap();
1857                let (mem_str, mem) = mem_finalize_for_show(&mem, access_ty, state);
1858                format!("{mem_str}str {rd}, {mem}")
1859            }
1860            &Inst::FpuStore128 { rd, ref mem, .. } => {
1861                let rd = pretty_print_reg(rd);
1862                let rd = "q".to_string() + &rd[1..];
1863                let mem = mem.clone();
1864                let access_ty = self.mem_type().unwrap();
1865                let (mem_str, mem) = mem_finalize_for_show(&mem, access_ty, state);
1866                format!("{mem_str}str {rd}, {mem}")
1867            }
1868            &Inst::FpuLoadP64 {
1869                rt, rt2, ref mem, ..
1870            } => {
1871                let rt = pretty_print_vreg_scalar(rt.to_reg(), ScalarSize::Size64);
1872                let rt2 = pretty_print_vreg_scalar(rt2.to_reg(), ScalarSize::Size64);
1873                let mem = mem.clone();
1874                let mem = mem.pretty_print_default();
1875
1876                format!("ldp {rt}, {rt2}, {mem}")
1877            }
1878            &Inst::FpuStoreP64 {
1879                rt, rt2, ref mem, ..
1880            } => {
1881                let rt = pretty_print_vreg_scalar(rt, ScalarSize::Size64);
1882                let rt2 = pretty_print_vreg_scalar(rt2, ScalarSize::Size64);
1883                let mem = mem.clone();
1884                let mem = mem.pretty_print_default();
1885
1886                format!("stp {rt}, {rt2}, {mem}")
1887            }
1888            &Inst::FpuLoadP128 {
1889                rt, rt2, ref mem, ..
1890            } => {
1891                let rt = pretty_print_vreg_scalar(rt.to_reg(), ScalarSize::Size128);
1892                let rt2 = pretty_print_vreg_scalar(rt2.to_reg(), ScalarSize::Size128);
1893                let mem = mem.clone();
1894                let mem = mem.pretty_print_default();
1895
1896                format!("ldp {rt}, {rt2}, {mem}")
1897            }
1898            &Inst::FpuStoreP128 {
1899                rt, rt2, ref mem, ..
1900            } => {
1901                let rt = pretty_print_vreg_scalar(rt, ScalarSize::Size128);
1902                let rt2 = pretty_print_vreg_scalar(rt2, ScalarSize::Size128);
1903                let mem = mem.clone();
1904                let mem = mem.pretty_print_default();
1905
1906                format!("stp {rt}, {rt2}, {mem}")
1907            }
1908            &Inst::FpuToInt { op, rd, rn } => {
1909                let (op, sizesrc, sizedest) = match op {
1910                    FpuToIntOp::F32ToI32 => ("fcvtzs", ScalarSize::Size32, OperandSize::Size32),
1911                    FpuToIntOp::F32ToU32 => ("fcvtzu", ScalarSize::Size32, OperandSize::Size32),
1912                    FpuToIntOp::F32ToI64 => ("fcvtzs", ScalarSize::Size32, OperandSize::Size64),
1913                    FpuToIntOp::F32ToU64 => ("fcvtzu", ScalarSize::Size32, OperandSize::Size64),
1914                    FpuToIntOp::F64ToI32 => ("fcvtzs", ScalarSize::Size64, OperandSize::Size32),
1915                    FpuToIntOp::F64ToU32 => ("fcvtzu", ScalarSize::Size64, OperandSize::Size32),
1916                    FpuToIntOp::F64ToI64 => ("fcvtzs", ScalarSize::Size64, OperandSize::Size64),
1917                    FpuToIntOp::F64ToU64 => ("fcvtzu", ScalarSize::Size64, OperandSize::Size64),
1918                };
1919                let rd = pretty_print_ireg(rd.to_reg(), sizedest);
1920                let rn = pretty_print_vreg_scalar(rn, sizesrc);
1921                format!("{op} {rd}, {rn}")
1922            }
1923            &Inst::IntToFpu { op, rd, rn } => {
1924                let (op, sizesrc, sizedest) = match op {
1925                    IntToFpuOp::I32ToF32 => ("scvtf", OperandSize::Size32, ScalarSize::Size32),
1926                    IntToFpuOp::U32ToF32 => ("ucvtf", OperandSize::Size32, ScalarSize::Size32),
1927                    IntToFpuOp::I64ToF32 => ("scvtf", OperandSize::Size64, ScalarSize::Size32),
1928                    IntToFpuOp::U64ToF32 => ("ucvtf", OperandSize::Size64, ScalarSize::Size32),
1929                    IntToFpuOp::I32ToF64 => ("scvtf", OperandSize::Size32, ScalarSize::Size64),
1930                    IntToFpuOp::U32ToF64 => ("ucvtf", OperandSize::Size32, ScalarSize::Size64),
1931                    IntToFpuOp::I64ToF64 => ("scvtf", OperandSize::Size64, ScalarSize::Size64),
1932                    IntToFpuOp::U64ToF64 => ("ucvtf", OperandSize::Size64, ScalarSize::Size64),
1933                };
1934                let rd = pretty_print_vreg_scalar(rd.to_reg(), sizedest);
1935                let rn = pretty_print_ireg(rn, sizesrc);
1936                format!("{op} {rd}, {rn}")
1937            }
1938            &Inst::FpuCSel16 { rd, rn, rm, cond } => {
1939                let rd = pretty_print_vreg_scalar(rd.to_reg(), ScalarSize::Size16);
1940                let rn = pretty_print_vreg_scalar(rn, ScalarSize::Size16);
1941                let rm = pretty_print_vreg_scalar(rm, ScalarSize::Size16);
1942                let cond = cond.pretty_print(0);
1943                format!("fcsel {rd}, {rn}, {rm}, {cond}")
1944            }
1945            &Inst::FpuCSel32 { rd, rn, rm, cond } => {
1946                let rd = pretty_print_vreg_scalar(rd.to_reg(), ScalarSize::Size32);
1947                let rn = pretty_print_vreg_scalar(rn, ScalarSize::Size32);
1948                let rm = pretty_print_vreg_scalar(rm, ScalarSize::Size32);
1949                let cond = cond.pretty_print(0);
1950                format!("fcsel {rd}, {rn}, {rm}, {cond}")
1951            }
1952            &Inst::FpuCSel64 { rd, rn, rm, cond } => {
1953                let rd = pretty_print_vreg_scalar(rd.to_reg(), ScalarSize::Size64);
1954                let rn = pretty_print_vreg_scalar(rn, ScalarSize::Size64);
1955                let rm = pretty_print_vreg_scalar(rm, ScalarSize::Size64);
1956                let cond = cond.pretty_print(0);
1957                format!("fcsel {rd}, {rn}, {rm}, {cond}")
1958            }
1959            &Inst::FpuRound { op, rd, rn } => {
1960                let (inst, size) = match op {
1961                    FpuRoundMode::Minus32 => ("frintm", ScalarSize::Size32),
1962                    FpuRoundMode::Minus64 => ("frintm", ScalarSize::Size64),
1963                    FpuRoundMode::Plus32 => ("frintp", ScalarSize::Size32),
1964                    FpuRoundMode::Plus64 => ("frintp", ScalarSize::Size64),
1965                    FpuRoundMode::Zero32 => ("frintz", ScalarSize::Size32),
1966                    FpuRoundMode::Zero64 => ("frintz", ScalarSize::Size64),
1967                    FpuRoundMode::Nearest32 => ("frintn", ScalarSize::Size32),
1968                    FpuRoundMode::Nearest64 => ("frintn", ScalarSize::Size64),
1969                };
1970                let rd = pretty_print_vreg_scalar(rd.to_reg(), size);
1971                let rn = pretty_print_vreg_scalar(rn, size);
1972                format!("{inst} {rd}, {rn}")
1973            }
1974            &Inst::MovToFpu { rd, rn, size } => {
1975                let operand_size = size.operand_size();
1976                let rd = pretty_print_vreg_scalar(rd.to_reg(), size);
1977                let rn = pretty_print_ireg(rn, operand_size);
1978                format!("fmov {rd}, {rn}")
1979            }
1980            &Inst::FpuMoveFPImm { rd, imm, size } => {
1981                let imm = imm.pretty_print(0);
1982                let rd = pretty_print_vreg_scalar(rd.to_reg(), size);
1983
1984                format!("fmov {rd}, {imm}")
1985            }
1986            &Inst::MovToVec {
1987                rd,
1988                ri,
1989                rn,
1990                idx,
1991                size,
1992            } => {
1993                let rd = pretty_print_vreg_element(rd.to_reg(), idx as usize, size.lane_size());
1994                let ri = pretty_print_vreg_element(ri, idx as usize, size.lane_size());
1995                let rn = pretty_print_ireg(rn, size.operand_size());
1996                format!("mov {rd}, {ri}, {rn}")
1997            }
1998            &Inst::MovFromVec { rd, rn, idx, size } => {
1999                let op = match size {
2000                    ScalarSize::Size8 => "umov",
2001                    ScalarSize::Size16 => "umov",
2002                    ScalarSize::Size32 => "mov",
2003                    ScalarSize::Size64 => "mov",
2004                    _ => unimplemented!(),
2005                };
2006                let rd = pretty_print_ireg(rd.to_reg(), size.operand_size());
2007                let rn = pretty_print_vreg_element(rn, idx as usize, size);
2008                format!("{op} {rd}, {rn}")
2009            }
2010            &Inst::MovFromVecSigned {
2011                rd,
2012                rn,
2013                idx,
2014                size,
2015                scalar_size,
2016            } => {
2017                let rd = pretty_print_ireg(rd.to_reg(), scalar_size);
2018                let rn = pretty_print_vreg_element(rn, idx as usize, size.lane_size());
2019                format!("smov {rd}, {rn}")
2020            }
2021            &Inst::VecDup { rd, rn, size } => {
2022                let rd = pretty_print_vreg_vector(rd.to_reg(), size);
2023                let rn = pretty_print_ireg(rn, size.operand_size());
2024                format!("dup {rd}, {rn}")
2025            }
2026            &Inst::VecDupFromFpu { rd, rn, size, lane } => {
2027                let rd = pretty_print_vreg_vector(rd.to_reg(), size);
2028                let rn = pretty_print_vreg_element(rn, lane.into(), size.lane_size());
2029                format!("dup {rd}, {rn}")
2030            }
2031            &Inst::VecDupFPImm { rd, imm, size } => {
2032                let imm = imm.pretty_print(0);
2033                let rd = pretty_print_vreg_vector(rd.to_reg(), size);
2034
2035                format!("fmov {rd}, {imm}")
2036            }
2037            &Inst::VecDupImm {
2038                rd,
2039                imm,
2040                invert,
2041                size,
2042            } => {
2043                let imm = imm.pretty_print(0);
2044                let op = if invert { "mvni" } else { "movi" };
2045                let rd = pretty_print_vreg_vector(rd.to_reg(), size);
2046
2047                format!("{op} {rd}, {imm}")
2048            }
2049            &Inst::VecExtend {
2050                t,
2051                rd,
2052                rn,
2053                high_half,
2054                lane_size,
2055            } => {
2056                let vec64 = VectorSize::from_lane_size(lane_size.narrow(), false);
2057                let vec128 = VectorSize::from_lane_size(lane_size.narrow(), true);
2058                let rd_size = VectorSize::from_lane_size(lane_size, true);
2059                let (op, rn_size) = match (t, high_half) {
2060                    (VecExtendOp::Sxtl, false) => ("sxtl", vec64),
2061                    (VecExtendOp::Sxtl, true) => ("sxtl2", vec128),
2062                    (VecExtendOp::Uxtl, false) => ("uxtl", vec64),
2063                    (VecExtendOp::Uxtl, true) => ("uxtl2", vec128),
2064                };
2065                let rd = pretty_print_vreg_vector(rd.to_reg(), rd_size);
2066                let rn = pretty_print_vreg_vector(rn, rn_size);
2067                format!("{op} {rd}, {rn}")
2068            }
2069            &Inst::VecMovElement {
2070                rd,
2071                ri,
2072                rn,
2073                dest_idx,
2074                src_idx,
2075                size,
2076            } => {
2077                let rd =
2078                    pretty_print_vreg_element(rd.to_reg(), dest_idx as usize, size.lane_size());
2079                let ri = pretty_print_vreg_element(ri, dest_idx as usize, size.lane_size());
2080                let rn = pretty_print_vreg_element(rn, src_idx as usize, size.lane_size());
2081                format!("mov {rd}, {ri}, {rn}")
2082            }
2083            &Inst::VecRRLong {
2084                op,
2085                rd,
2086                rn,
2087                high_half,
2088            } => {
2089                let (op, rd_size, size, suffix) = match (op, high_half) {
2090                    (VecRRLongOp::Fcvtl16, false) => {
2091                        ("fcvtl", VectorSize::Size32x4, VectorSize::Size16x4, "")
2092                    }
2093                    (VecRRLongOp::Fcvtl16, true) => {
2094                        ("fcvtl2", VectorSize::Size32x4, VectorSize::Size16x8, "")
2095                    }
2096                    (VecRRLongOp::Fcvtl32, false) => {
2097                        ("fcvtl", VectorSize::Size64x2, VectorSize::Size32x2, "")
2098                    }
2099                    (VecRRLongOp::Fcvtl32, true) => {
2100                        ("fcvtl2", VectorSize::Size64x2, VectorSize::Size32x4, "")
2101                    }
2102                    (VecRRLongOp::Shll8, false) => {
2103                        ("shll", VectorSize::Size16x8, VectorSize::Size8x8, ", #8")
2104                    }
2105                    (VecRRLongOp::Shll8, true) => {
2106                        ("shll2", VectorSize::Size16x8, VectorSize::Size8x16, ", #8")
2107                    }
2108                    (VecRRLongOp::Shll16, false) => {
2109                        ("shll", VectorSize::Size32x4, VectorSize::Size16x4, ", #16")
2110                    }
2111                    (VecRRLongOp::Shll16, true) => {
2112                        ("shll2", VectorSize::Size32x4, VectorSize::Size16x8, ", #16")
2113                    }
2114                    (VecRRLongOp::Shll32, false) => {
2115                        ("shll", VectorSize::Size64x2, VectorSize::Size32x2, ", #32")
2116                    }
2117                    (VecRRLongOp::Shll32, true) => {
2118                        ("shll2", VectorSize::Size64x2, VectorSize::Size32x4, ", #32")
2119                    }
2120                };
2121                let rd = pretty_print_vreg_vector(rd.to_reg(), rd_size);
2122                let rn = pretty_print_vreg_vector(rn, size);
2123
2124                format!("{op} {rd}, {rn}{suffix}")
2125            }
2126            &Inst::VecRRNarrowLow {
2127                op,
2128                rd,
2129                rn,
2130                lane_size,
2131                ..
2132            }
2133            | &Inst::VecRRNarrowHigh {
2134                op,
2135                rd,
2136                rn,
2137                lane_size,
2138                ..
2139            } => {
2140                let vec64 = VectorSize::from_lane_size(lane_size, false);
2141                let vec128 = VectorSize::from_lane_size(lane_size, true);
2142                let rn_size = VectorSize::from_lane_size(lane_size.widen(), true);
2143                let high_half = match self {
2144                    &Inst::VecRRNarrowLow { .. } => false,
2145                    &Inst::VecRRNarrowHigh { .. } => true,
2146                    _ => unreachable!(),
2147                };
2148                let (op, rd_size) = match (op, high_half) {
2149                    (VecRRNarrowOp::Xtn, false) => ("xtn", vec64),
2150                    (VecRRNarrowOp::Xtn, true) => ("xtn2", vec128),
2151                    (VecRRNarrowOp::Sqxtn, false) => ("sqxtn", vec64),
2152                    (VecRRNarrowOp::Sqxtn, true) => ("sqxtn2", vec128),
2153                    (VecRRNarrowOp::Sqxtun, false) => ("sqxtun", vec64),
2154                    (VecRRNarrowOp::Sqxtun, true) => ("sqxtun2", vec128),
2155                    (VecRRNarrowOp::Uqxtn, false) => ("uqxtn", vec64),
2156                    (VecRRNarrowOp::Uqxtn, true) => ("uqxtn2", vec128),
2157                    (VecRRNarrowOp::Fcvtn, false) => ("fcvtn", vec64),
2158                    (VecRRNarrowOp::Fcvtn, true) => ("fcvtn2", vec128),
2159                };
2160                let rn = pretty_print_vreg_vector(rn, rn_size);
2161                let rd = pretty_print_vreg_vector(rd.to_reg(), rd_size);
2162                let ri = match self {
2163                    &Inst::VecRRNarrowLow { .. } => "".to_string(),
2164                    &Inst::VecRRNarrowHigh { ri, .. } => {
2165                        format!("{}, ", pretty_print_vreg_vector(ri, rd_size))
2166                    }
2167                    _ => unreachable!(),
2168                };
2169
2170                format!("{op} {rd}, {ri}{rn}")
2171            }
2172            &Inst::VecRRPair { op, rd, rn } => {
2173                let op = match op {
2174                    VecPairOp::Addp => "addp",
2175                };
2176                let rd = pretty_print_vreg_scalar(rd.to_reg(), ScalarSize::Size64);
2177                let rn = pretty_print_vreg_vector(rn, VectorSize::Size64x2);
2178
2179                format!("{op} {rd}, {rn}")
2180            }
2181            &Inst::VecRRPairLong { op, rd, rn } => {
2182                let (op, dest, src) = match op {
2183                    VecRRPairLongOp::Saddlp8 => {
2184                        ("saddlp", VectorSize::Size16x8, VectorSize::Size8x16)
2185                    }
2186                    VecRRPairLongOp::Saddlp16 => {
2187                        ("saddlp", VectorSize::Size32x4, VectorSize::Size16x8)
2188                    }
2189                    VecRRPairLongOp::Uaddlp8 => {
2190                        ("uaddlp", VectorSize::Size16x8, VectorSize::Size8x16)
2191                    }
2192                    VecRRPairLongOp::Uaddlp16 => {
2193                        ("uaddlp", VectorSize::Size32x4, VectorSize::Size16x8)
2194                    }
2195                };
2196                let rd = pretty_print_vreg_vector(rd.to_reg(), dest);
2197                let rn = pretty_print_vreg_vector(rn, src);
2198
2199                format!("{op} {rd}, {rn}")
2200            }
2201            &Inst::VecRRR {
2202                rd,
2203                rn,
2204                rm,
2205                alu_op,
2206                size,
2207            } => {
2208                let (op, size) = match alu_op {
2209                    VecALUOp::Sqadd => ("sqadd", size),
2210                    VecALUOp::Uqadd => ("uqadd", size),
2211                    VecALUOp::Sqsub => ("sqsub", size),
2212                    VecALUOp::Uqsub => ("uqsub", size),
2213                    VecALUOp::Cmeq => ("cmeq", size),
2214                    VecALUOp::Cmge => ("cmge", size),
2215                    VecALUOp::Cmgt => ("cmgt", size),
2216                    VecALUOp::Cmhs => ("cmhs", size),
2217                    VecALUOp::Cmhi => ("cmhi", size),
2218                    VecALUOp::Fcmeq => ("fcmeq", size),
2219                    VecALUOp::Fcmgt => ("fcmgt", size),
2220                    VecALUOp::Fcmge => ("fcmge", size),
2221                    VecALUOp::And => ("and", VectorSize::Size8x16),
2222                    VecALUOp::Bic => ("bic", VectorSize::Size8x16),
2223                    VecALUOp::Orr => ("orr", VectorSize::Size8x16),
2224                    VecALUOp::Eor => ("eor", VectorSize::Size8x16),
2225                    VecALUOp::Umaxp => ("umaxp", size),
2226                    VecALUOp::Add => ("add", size),
2227                    VecALUOp::Sub => ("sub", size),
2228                    VecALUOp::Mul => ("mul", size),
2229                    VecALUOp::Sshl => ("sshl", size),
2230                    VecALUOp::Ushl => ("ushl", size),
2231                    VecALUOp::Umin => ("umin", size),
2232                    VecALUOp::Smin => ("smin", size),
2233                    VecALUOp::Umax => ("umax", size),
2234                    VecALUOp::Smax => ("smax", size),
2235                    VecALUOp::Urhadd => ("urhadd", size),
2236                    VecALUOp::Fadd => ("fadd", size),
2237                    VecALUOp::Fsub => ("fsub", size),
2238                    VecALUOp::Fdiv => ("fdiv", size),
2239                    VecALUOp::Fmax => ("fmax", size),
2240                    VecALUOp::Fmin => ("fmin", size),
2241                    VecALUOp::Fmul => ("fmul", size),
2242                    VecALUOp::Addp => ("addp", size),
2243                    VecALUOp::Zip1 => ("zip1", size),
2244                    VecALUOp::Zip2 => ("zip2", size),
2245                    VecALUOp::Sqrdmulh => ("sqrdmulh", size),
2246                    VecALUOp::Uzp1 => ("uzp1", size),
2247                    VecALUOp::Uzp2 => ("uzp2", size),
2248                    VecALUOp::Trn1 => ("trn1", size),
2249                    VecALUOp::Trn2 => ("trn2", size),
2250                };
2251                let rd = pretty_print_vreg_vector(rd.to_reg(), size);
2252                let rn = pretty_print_vreg_vector(rn, size);
2253                let rm = pretty_print_vreg_vector(rm, size);
2254                format!("{op} {rd}, {rn}, {rm}")
2255            }
2256            &Inst::VecRRRMod {
2257                rd,
2258                ri,
2259                rn,
2260                rm,
2261                alu_op,
2262                size,
2263            } => {
2264                let (op, size) = match alu_op {
2265                    VecALUModOp::Bsl => ("bsl", VectorSize::Size8x16),
2266                    VecALUModOp::Fmla => ("fmla", size),
2267                    VecALUModOp::Fmls => ("fmls", size),
2268                };
2269                let rd = pretty_print_vreg_vector(rd.to_reg(), size);
2270                let ri = pretty_print_vreg_vector(ri, size);
2271                let rn = pretty_print_vreg_vector(rn, size);
2272                let rm = pretty_print_vreg_vector(rm, size);
2273                format!("{op} {rd}, {ri}, {rn}, {rm}")
2274            }
2275            &Inst::VecFmlaElem {
2276                rd,
2277                ri,
2278                rn,
2279                rm,
2280                alu_op,
2281                size,
2282                idx,
2283            } => {
2284                let (op, size) = match alu_op {
2285                    VecALUModOp::Fmla => ("fmla", size),
2286                    VecALUModOp::Fmls => ("fmls", size),
2287                    _ => unreachable!(),
2288                };
2289                let rd = pretty_print_vreg_vector(rd.to_reg(), size);
2290                let ri = pretty_print_vreg_vector(ri, size);
2291                let rn = pretty_print_vreg_vector(rn, size);
2292                let rm = pretty_print_vreg_element(rm, idx.into(), size.lane_size());
2293                format!("{op} {rd}, {ri}, {rn}, {rm}")
2294            }
2295            &Inst::VecRRRLong {
2296                rd,
2297                rn,
2298                rm,
2299                alu_op,
2300                high_half,
2301            } => {
2302                let (op, dest_size, src_size) = match (alu_op, high_half) {
2303                    (VecRRRLongOp::Smull8, false) => {
2304                        ("smull", VectorSize::Size16x8, VectorSize::Size8x8)
2305                    }
2306                    (VecRRRLongOp::Smull8, true) => {
2307                        ("smull2", VectorSize::Size16x8, VectorSize::Size8x16)
2308                    }
2309                    (VecRRRLongOp::Smull16, false) => {
2310                        ("smull", VectorSize::Size32x4, VectorSize::Size16x4)
2311                    }
2312                    (VecRRRLongOp::Smull16, true) => {
2313                        ("smull2", VectorSize::Size32x4, VectorSize::Size16x8)
2314                    }
2315                    (VecRRRLongOp::Smull32, false) => {
2316                        ("smull", VectorSize::Size64x2, VectorSize::Size32x2)
2317                    }
2318                    (VecRRRLongOp::Smull32, true) => {
2319                        ("smull2", VectorSize::Size64x2, VectorSize::Size32x4)
2320                    }
2321                    (VecRRRLongOp::Umull8, false) => {
2322                        ("umull", VectorSize::Size16x8, VectorSize::Size8x8)
2323                    }
2324                    (VecRRRLongOp::Umull8, true) => {
2325                        ("umull2", VectorSize::Size16x8, VectorSize::Size8x16)
2326                    }
2327                    (VecRRRLongOp::Umull16, false) => {
2328                        ("umull", VectorSize::Size32x4, VectorSize::Size16x4)
2329                    }
2330                    (VecRRRLongOp::Umull16, true) => {
2331                        ("umull2", VectorSize::Size32x4, VectorSize::Size16x8)
2332                    }
2333                    (VecRRRLongOp::Umull32, false) => {
2334                        ("umull", VectorSize::Size64x2, VectorSize::Size32x2)
2335                    }
2336                    (VecRRRLongOp::Umull32, true) => {
2337                        ("umull2", VectorSize::Size64x2, VectorSize::Size32x4)
2338                    }
2339                };
2340                let rd = pretty_print_vreg_vector(rd.to_reg(), dest_size);
2341                let rn = pretty_print_vreg_vector(rn, src_size);
2342                let rm = pretty_print_vreg_vector(rm, src_size);
2343                format!("{op} {rd}, {rn}, {rm}")
2344            }
2345            &Inst::VecRRRLongMod {
2346                rd,
2347                ri,
2348                rn,
2349                rm,
2350                alu_op,
2351                high_half,
2352            } => {
2353                let (op, dest_size, src_size) = match (alu_op, high_half) {
2354                    (VecRRRLongModOp::Umlal8, false) => {
2355                        ("umlal", VectorSize::Size16x8, VectorSize::Size8x8)
2356                    }
2357                    (VecRRRLongModOp::Umlal8, true) => {
2358                        ("umlal2", VectorSize::Size16x8, VectorSize::Size8x16)
2359                    }
2360                    (VecRRRLongModOp::Umlal16, false) => {
2361                        ("umlal", VectorSize::Size32x4, VectorSize::Size16x4)
2362                    }
2363                    (VecRRRLongModOp::Umlal16, true) => {
2364                        ("umlal2", VectorSize::Size32x4, VectorSize::Size16x8)
2365                    }
2366                    (VecRRRLongModOp::Umlal32, false) => {
2367                        ("umlal", VectorSize::Size64x2, VectorSize::Size32x2)
2368                    }
2369                    (VecRRRLongModOp::Umlal32, true) => {
2370                        ("umlal2", VectorSize::Size64x2, VectorSize::Size32x4)
2371                    }
2372                };
2373                let rd = pretty_print_vreg_vector(rd.to_reg(), dest_size);
2374                let ri = pretty_print_vreg_vector(ri, dest_size);
2375                let rn = pretty_print_vreg_vector(rn, src_size);
2376                let rm = pretty_print_vreg_vector(rm, src_size);
2377                format!("{op} {rd}, {ri}, {rn}, {rm}")
2378            }
2379            &Inst::VecMisc { op, rd, rn, size } => {
2380                let (op, size, suffix) = match op {
2381                    VecMisc2::Not => (
2382                        "mvn",
2383                        if size.is_128bits() {
2384                            VectorSize::Size8x16
2385                        } else {
2386                            VectorSize::Size8x8
2387                        },
2388                        "",
2389                    ),
2390                    VecMisc2::Neg => ("neg", size, ""),
2391                    VecMisc2::Abs => ("abs", size, ""),
2392                    VecMisc2::Fabs => ("fabs", size, ""),
2393                    VecMisc2::Fneg => ("fneg", size, ""),
2394                    VecMisc2::Fsqrt => ("fsqrt", size, ""),
2395                    VecMisc2::Rev16 => ("rev16", size, ""),
2396                    VecMisc2::Rev32 => ("rev32", size, ""),
2397                    VecMisc2::Rev64 => ("rev64", size, ""),
2398                    VecMisc2::Fcvtzs => ("fcvtzs", size, ""),
2399                    VecMisc2::Fcvtzu => ("fcvtzu", size, ""),
2400                    VecMisc2::Scvtf => ("scvtf", size, ""),
2401                    VecMisc2::Ucvtf => ("ucvtf", size, ""),
2402                    VecMisc2::Frintn => ("frintn", size, ""),
2403                    VecMisc2::Frintz => ("frintz", size, ""),
2404                    VecMisc2::Frintm => ("frintm", size, ""),
2405                    VecMisc2::Frintp => ("frintp", size, ""),
2406                    VecMisc2::Cnt => ("cnt", size, ""),
2407                    VecMisc2::Cmeq0 => ("cmeq", size, ", #0"),
2408                    VecMisc2::Cmge0 => ("cmge", size, ", #0"),
2409                    VecMisc2::Cmgt0 => ("cmgt", size, ", #0"),
2410                    VecMisc2::Cmle0 => ("cmle", size, ", #0"),
2411                    VecMisc2::Cmlt0 => ("cmlt", size, ", #0"),
2412                    VecMisc2::Fcmeq0 => ("fcmeq", size, ", #0.0"),
2413                    VecMisc2::Fcmge0 => ("fcmge", size, ", #0.0"),
2414                    VecMisc2::Fcmgt0 => ("fcmgt", size, ", #0.0"),
2415                    VecMisc2::Fcmle0 => ("fcmle", size, ", #0.0"),
2416                    VecMisc2::Fcmlt0 => ("fcmlt", size, ", #0.0"),
2417                };
2418                let rd = pretty_print_vreg_vector(rd.to_reg(), size);
2419                let rn = pretty_print_vreg_vector(rn, size);
2420                format!("{op} {rd}, {rn}{suffix}")
2421            }
2422            &Inst::VecLanes { op, rd, rn, size } => {
2423                let op = match op {
2424                    VecLanesOp::Uminv => "uminv",
2425                    VecLanesOp::Addv => "addv",
2426                };
2427                let rd = pretty_print_vreg_scalar(rd.to_reg(), size.lane_size());
2428                let rn = pretty_print_vreg_vector(rn, size);
2429                format!("{op} {rd}, {rn}")
2430            }
2431            &Inst::VecShiftImm {
2432                op,
2433                rd,
2434                rn,
2435                size,
2436                imm,
2437            } => {
2438                let op = match op {
2439                    VecShiftImmOp::Shl => "shl",
2440                    VecShiftImmOp::Ushr => "ushr",
2441                    VecShiftImmOp::Sshr => "sshr",
2442                };
2443                let rd = pretty_print_vreg_vector(rd.to_reg(), size);
2444                let rn = pretty_print_vreg_vector(rn, size);
2445                format!("{op} {rd}, {rn}, #{imm}")
2446            }
2447            &Inst::VecShiftImmMod {
2448                op,
2449                rd,
2450                ri,
2451                rn,
2452                size,
2453                imm,
2454            } => {
2455                let op = match op {
2456                    VecShiftImmModOp::Sli => "sli",
2457                };
2458                let rd = pretty_print_vreg_vector(rd.to_reg(), size);
2459                let ri = pretty_print_vreg_vector(ri, size);
2460                let rn = pretty_print_vreg_vector(rn, size);
2461                format!("{op} {rd}, {ri}, {rn}, #{imm}")
2462            }
2463            &Inst::VecExtract { rd, rn, rm, imm4 } => {
2464                let rd = pretty_print_vreg_vector(rd.to_reg(), VectorSize::Size8x16);
2465                let rn = pretty_print_vreg_vector(rn, VectorSize::Size8x16);
2466                let rm = pretty_print_vreg_vector(rm, VectorSize::Size8x16);
2467                format!("ext {rd}, {rn}, {rm}, #{imm4}")
2468            }
2469            &Inst::VecTbl { rd, rn, rm } => {
2470                let rn = pretty_print_vreg_vector(rn, VectorSize::Size8x16);
2471                let rm = pretty_print_vreg_vector(rm, VectorSize::Size8x16);
2472                let rd = pretty_print_vreg_vector(rd.to_reg(), VectorSize::Size8x16);
2473                format!("tbl {rd}, {{ {rn} }}, {rm}")
2474            }
2475            &Inst::VecTblExt { rd, ri, rn, rm } => {
2476                let rn = pretty_print_vreg_vector(rn, VectorSize::Size8x16);
2477                let rm = pretty_print_vreg_vector(rm, VectorSize::Size8x16);
2478                let rd = pretty_print_vreg_vector(rd.to_reg(), VectorSize::Size8x16);
2479                let ri = pretty_print_vreg_vector(ri, VectorSize::Size8x16);
2480                format!("tbx {rd}, {ri}, {{ {rn} }}, {rm}")
2481            }
2482            &Inst::VecTbl2 { rd, rn, rn2, rm } => {
2483                let rn = pretty_print_vreg_vector(rn, VectorSize::Size8x16);
2484                let rn2 = pretty_print_vreg_vector(rn2, VectorSize::Size8x16);
2485                let rm = pretty_print_vreg_vector(rm, VectorSize::Size8x16);
2486                let rd = pretty_print_vreg_vector(rd.to_reg(), VectorSize::Size8x16);
2487                format!("tbl {rd}, {{ {rn}, {rn2} }}, {rm}")
2488            }
2489            &Inst::VecTbl2Ext {
2490                rd,
2491                ri,
2492                rn,
2493                rn2,
2494                rm,
2495            } => {
2496                let rn = pretty_print_vreg_vector(rn, VectorSize::Size8x16);
2497                let rn2 = pretty_print_vreg_vector(rn2, VectorSize::Size8x16);
2498                let rm = pretty_print_vreg_vector(rm, VectorSize::Size8x16);
2499                let rd = pretty_print_vreg_vector(rd.to_reg(), VectorSize::Size8x16);
2500                let ri = pretty_print_vreg_vector(ri, VectorSize::Size8x16);
2501                format!("tbx {rd}, {ri}, {{ {rn}, {rn2} }}, {rm}")
2502            }
2503            &Inst::VecLoadReplicate { rd, rn, size, .. } => {
2504                let rd = pretty_print_vreg_vector(rd.to_reg(), size);
2505                let rn = pretty_print_reg(rn);
2506
2507                format!("ld1r {{ {rd} }}, [{rn}]")
2508            }
2509            &Inst::VecCSel { rd, rn, rm, cond } => {
2510                let rd = pretty_print_vreg_vector(rd.to_reg(), VectorSize::Size8x16);
2511                let rn = pretty_print_vreg_vector(rn, VectorSize::Size8x16);
2512                let rm = pretty_print_vreg_vector(rm, VectorSize::Size8x16);
2513                let cond = cond.pretty_print(0);
2514                format!("vcsel {rd}, {rn}, {rm}, {cond} (if-then-else diamond)")
2515            }
2516            &Inst::MovToNZCV { rn } => {
2517                let rn = pretty_print_reg(rn);
2518                format!("msr nzcv, {rn}")
2519            }
2520            &Inst::MovFromNZCV { rd } => {
2521                let rd = pretty_print_reg(rd.to_reg());
2522                format!("mrs {rd}, nzcv")
2523            }
2524            &Inst::Extend {
2525                rd,
2526                rn,
2527                signed: false,
2528                from_bits: 1,
2529                ..
2530            } => {
2531                let rd = pretty_print_ireg(rd.to_reg(), OperandSize::Size32);
2532                let rn = pretty_print_ireg(rn, OperandSize::Size32);
2533                format!("and {rd}, {rn}, #1")
2534            }
2535            &Inst::Extend {
2536                rd,
2537                rn,
2538                signed: false,
2539                from_bits: 32,
2540                to_bits: 64,
2541            } => {
2542                // The case of a zero extension from 32 to 64 bits, is implemented
2543                // with a "mov" to a 32-bit (W-reg) dest, because this zeroes
2544                // the top 32 bits.
2545                let rd = pretty_print_ireg(rd.to_reg(), OperandSize::Size32);
2546                let rn = pretty_print_ireg(rn, OperandSize::Size32);
2547                format!("mov {rd}, {rn}")
2548            }
2549            &Inst::Extend {
2550                rd,
2551                rn,
2552                signed,
2553                from_bits,
2554                to_bits,
2555            } => {
2556                assert!(from_bits <= to_bits);
2557                let op = match (signed, from_bits) {
2558                    (false, 8) => "uxtb",
2559                    (true, 8) => "sxtb",
2560                    (false, 16) => "uxth",
2561                    (true, 16) => "sxth",
2562                    (true, 32) => "sxtw",
2563                    (true, _) => "sbfx",
2564                    (false, _) => "ubfx",
2565                };
2566                if op == "sbfx" || op == "ubfx" {
2567                    let dest_size = OperandSize::from_bits(to_bits);
2568                    let rd = pretty_print_ireg(rd.to_reg(), dest_size);
2569                    let rn = pretty_print_ireg(rn, dest_size);
2570                    format!("{op} {rd}, {rn}, #0, #{from_bits}")
2571                } else {
2572                    let dest_size = if signed {
2573                        OperandSize::from_bits(to_bits)
2574                    } else {
2575                        OperandSize::Size32
2576                    };
2577                    let rd = pretty_print_ireg(rd.to_reg(), dest_size);
2578                    let rn = pretty_print_ireg(rn, OperandSize::from_bits(from_bits));
2579                    format!("{op} {rd}, {rn}")
2580                }
2581            }
2582            &Inst::Call { ref info } => {
2583                let try_call = info
2584                    .try_call_info
2585                    .as_ref()
2586                    .map(|tci| pretty_print_try_call(tci))
2587                    .unwrap_or_default();
2588                format!("bl 0{try_call}")
2589            }
2590            &Inst::CallInd { ref info } => {
2591                let rn = pretty_print_reg(info.dest);
2592                let try_call = info
2593                    .try_call_info
2594                    .as_ref()
2595                    .map(|tci| pretty_print_try_call(tci))
2596                    .unwrap_or_default();
2597                format!("blr {rn}{try_call}")
2598            }
2599            &Inst::ReturnCall { ref info } => {
2600                let mut s = format!(
2601                    "return_call {:?} new_stack_arg_size:{}",
2602                    info.dest, info.new_stack_arg_size
2603                );
2604                for ret in &info.uses {
2605                    let preg = pretty_print_reg(ret.preg);
2606                    let vreg = pretty_print_reg(ret.vreg);
2607                    write!(&mut s, " {vreg}={preg}").unwrap();
2608                }
2609                s
2610            }
2611            &Inst::ReturnCallInd { ref info } => {
2612                let callee = pretty_print_reg(info.dest);
2613                let mut s = format!(
2614                    "return_call_ind {callee} new_stack_arg_size:{}",
2615                    info.new_stack_arg_size
2616                );
2617                for ret in &info.uses {
2618                    let preg = pretty_print_reg(ret.preg);
2619                    let vreg = pretty_print_reg(ret.vreg);
2620                    write!(&mut s, " {vreg}={preg}").unwrap();
2621                }
2622                s
2623            }
2624            &Inst::Args { ref args } => {
2625                let mut s = "args".to_string();
2626                for arg in args {
2627                    let preg = pretty_print_reg(arg.preg);
2628                    let def = pretty_print_reg(arg.vreg.to_reg());
2629                    write!(&mut s, " {def}={preg}").unwrap();
2630                }
2631                s
2632            }
2633            &Inst::Rets { ref rets } => {
2634                let mut s = "rets".to_string();
2635                for ret in rets {
2636                    let preg = pretty_print_reg(ret.preg);
2637                    let vreg = pretty_print_reg(ret.vreg);
2638                    write!(&mut s, " {vreg}={preg}").unwrap();
2639                }
2640                s
2641            }
2642            &Inst::Ret {} => "ret".to_string(),
2643            &Inst::AuthenticatedRet { key, is_hint } => {
2644                let key = match key {
2645                    APIKey::AZ => "az",
2646                    APIKey::BZ => "bz",
2647                    APIKey::ASP => "asp",
2648                    APIKey::BSP => "bsp",
2649                };
2650                match is_hint {
2651                    false => format!("reta{key}"),
2652                    true => format!("auti{key} ; ret"),
2653                }
2654            }
2655            &Inst::Jump { ref dest } => {
2656                let dest = dest.pretty_print(0);
2657                format!("b {dest}")
2658            }
2659            &Inst::CondBr {
2660                ref taken,
2661                ref not_taken,
2662                ref kind,
2663            } => {
2664                let taken = taken.pretty_print(0);
2665                let not_taken = not_taken.pretty_print(0);
2666                match kind {
2667                    &CondBrKind::Zero(reg, size) => {
2668                        let reg = pretty_print_reg_sized(reg, size);
2669                        format!("cbz {reg}, {taken} ; b {not_taken}")
2670                    }
2671                    &CondBrKind::NotZero(reg, size) => {
2672                        let reg = pretty_print_reg_sized(reg, size);
2673                        format!("cbnz {reg}, {taken} ; b {not_taken}")
2674                    }
2675                    &CondBrKind::Cond(c) => {
2676                        let c = c.pretty_print(0);
2677                        format!("b.{c} {taken} ; b {not_taken}")
2678                    }
2679                }
2680            }
2681            &Inst::TestBitAndBranch {
2682                kind,
2683                ref taken,
2684                ref not_taken,
2685                rn,
2686                bit,
2687            } => {
2688                let cond = match kind {
2689                    TestBitAndBranchKind::Z => "z",
2690                    TestBitAndBranchKind::NZ => "nz",
2691                };
2692                let taken = taken.pretty_print(0);
2693                let not_taken = not_taken.pretty_print(0);
2694                let rn = pretty_print_reg(rn);
2695                format!("tb{cond} {rn}, #{bit}, {taken} ; b {not_taken}")
2696            }
2697            &Inst::IndirectBr { rn, .. } => {
2698                let rn = pretty_print_reg(rn);
2699                format!("br {rn}")
2700            }
2701            &Inst::Brk => "brk #0".to_string(),
2702            &Inst::Udf { .. } => "udf #0xc11f".to_string(),
2703            &Inst::TrapIf {
2704                ref kind,
2705                trap_code,
2706            } => match kind {
2707                &CondBrKind::Zero(reg, size) => {
2708                    let reg = pretty_print_reg_sized(reg, size);
2709                    format!("cbz {reg}, #trap={trap_code}")
2710                }
2711                &CondBrKind::NotZero(reg, size) => {
2712                    let reg = pretty_print_reg_sized(reg, size);
2713                    format!("cbnz {reg}, #trap={trap_code}")
2714                }
2715                &CondBrKind::Cond(c) => {
2716                    let c = c.pretty_print(0);
2717                    format!("b.{c} #trap={trap_code}")
2718                }
2719            },
2720            &Inst::Adr { rd, off } => {
2721                let rd = pretty_print_reg(rd.to_reg());
2722                format!("adr {rd}, pc+{off}")
2723            }
2724            &Inst::Adrp { rd, off } => {
2725                let rd = pretty_print_reg(rd.to_reg());
2726                // This instruction addresses 4KiB pages, so multiply it by the page size.
2727                let byte_offset = off * 4096;
2728                format!("adrp {rd}, pc+{byte_offset}")
2729            }
2730            &Inst::Word4 { data } => format!("data.i32 {data}"),
2731            &Inst::Word8 { data } => format!("data.i64 {data}"),
2732            &Inst::JTSequence {
2733                default,
2734                ref targets,
2735                ridx,
2736                rtmp1,
2737                rtmp2,
2738                ..
2739            } => {
2740                let ridx = pretty_print_reg(ridx);
2741                let rtmp1 = pretty_print_reg(rtmp1.to_reg());
2742                let rtmp2 = pretty_print_reg(rtmp2.to_reg());
2743                let default_target = BranchTarget::Label(default).pretty_print(0);
2744                format!(
2745                    concat!(
2746                        "b.hs {} ; ",
2747                        "csel {}, xzr, {}, hs ; ",
2748                        "csdb ; ",
2749                        "adr {}, pc+16 ; ",
2750                        "ldrsw {}, [{}, {}, uxtw #2] ; ",
2751                        "add {}, {}, {} ; ",
2752                        "br {} ; ",
2753                        "jt_entries {:?}"
2754                    ),
2755                    default_target,
2756                    rtmp2,
2757                    ridx,
2758                    rtmp1,
2759                    rtmp2,
2760                    rtmp1,
2761                    rtmp2,
2762                    rtmp1,
2763                    rtmp1,
2764                    rtmp2,
2765                    rtmp1,
2766                    targets
2767                )
2768            }
2769            &Inst::LoadExtName {
2770                rd,
2771                ref name,
2772                offset,
2773            } => {
2774                let rd = pretty_print_reg(rd.to_reg());
2775                format!("load_ext_name {rd}, {name:?}+{offset}")
2776            }
2777            &Inst::LoadAddr { rd, ref mem } => {
2778                // TODO: we really should find a better way to avoid duplication of
2779                // this logic between `emit()` and `show_rru()` -- a separate 1-to-N
2780                // expansion stage (i.e., legalization, but without the slow edit-in-place
2781                // of the existing legalization framework).
2782                let mem = mem.clone();
2783                let (mem_insts, mem) = mem_finalize(None, &mem, I8, state);
2784                let mut ret = String::new();
2785                for inst in mem_insts.into_iter() {
2786                    ret.push_str(&inst.print_with_state(&mut EmitState::default()));
2787                }
2788                let (reg, index_reg, offset) = match mem {
2789                    AMode::RegExtended { rn, rm, extendop } => (rn, Some((rm, extendop)), 0),
2790                    AMode::Unscaled { rn, simm9 } => (rn, None, simm9.value()),
2791                    AMode::UnsignedOffset { rn, uimm12 } => (rn, None, uimm12.value() as i32),
2792                    _ => panic!("Unsupported case for LoadAddr: {mem:?}"),
2793                };
2794                let abs_offset = if offset < 0 {
2795                    -offset as u64
2796                } else {
2797                    offset as u64
2798                };
2799                let alu_op = if offset < 0 { ALUOp::Sub } else { ALUOp::Add };
2800
2801                if let Some((idx, extendop)) = index_reg {
2802                    let add = Inst::AluRRRExtend {
2803                        alu_op: ALUOp::Add,
2804                        size: OperandSize::Size64,
2805                        rd,
2806                        rn: reg,
2807                        rm: idx,
2808                        extendop,
2809                    };
2810
2811                    ret.push_str(&add.print_with_state(&mut EmitState::default()));
2812                } else if offset == 0 {
2813                    let mov = Inst::gen_move(rd, reg, I64);
2814                    ret.push_str(&mov.print_with_state(&mut EmitState::default()));
2815                } else if let Some(imm12) = Imm12::maybe_from_u64(abs_offset) {
2816                    let add = Inst::AluRRImm12 {
2817                        alu_op,
2818                        size: OperandSize::Size64,
2819                        rd,
2820                        rn: reg,
2821                        imm12,
2822                    };
2823                    ret.push_str(&add.print_with_state(&mut EmitState::default()));
2824                } else {
2825                    let tmp = writable_spilltmp_reg();
2826                    for inst in Inst::load_constant(tmp, abs_offset, &mut |_| tmp).into_iter() {
2827                        ret.push_str(&inst.print_with_state(&mut EmitState::default()));
2828                    }
2829                    let add = Inst::AluRRR {
2830                        alu_op,
2831                        size: OperandSize::Size64,
2832                        rd,
2833                        rn: reg,
2834                        rm: tmp.to_reg(),
2835                    };
2836                    ret.push_str(&add.print_with_state(&mut EmitState::default()));
2837                }
2838                ret
2839            }
2840            &Inst::Paci { key } => {
2841                let key = match key {
2842                    APIKey::AZ => "az",
2843                    APIKey::BZ => "bz",
2844                    APIKey::ASP => "asp",
2845                    APIKey::BSP => "bsp",
2846                };
2847
2848                "paci".to_string() + key
2849            }
2850            &Inst::Xpaclri => "xpaclri".to_string(),
2851            &Inst::Bti { targets } => {
2852                let targets = match targets {
2853                    BranchTargetType::None => "",
2854                    BranchTargetType::C => " c",
2855                    BranchTargetType::J => " j",
2856                    BranchTargetType::JC => " jc",
2857                };
2858
2859                "bti".to_string() + targets
2860            }
2861            &Inst::EmitIsland { needed_space } => format!("emit_island {needed_space}"),
2862
2863            &Inst::ElfTlsGetAddr {
2864                ref symbol,
2865                rd,
2866                tmp,
2867            } => {
2868                let rd = pretty_print_reg(rd.to_reg());
2869                let tmp = pretty_print_reg(tmp.to_reg());
2870                format!("elf_tls_get_addr {}, {}, {}", rd, tmp, symbol.display(None))
2871            }
2872            &Inst::MachOTlsGetAddr { ref symbol, rd } => {
2873                let rd = pretty_print_reg(rd.to_reg());
2874                format!("macho_tls_get_addr {}, {}", rd, symbol.display(None))
2875            }
2876            &Inst::Unwind { ref inst } => {
2877                format!("unwind {inst:?}")
2878            }
2879            &Inst::DummyUse { reg } => {
2880                let reg = pretty_print_reg(reg);
2881                format!("dummy_use {reg}")
2882            }
2883            &Inst::StackProbeLoop { start, end, step } => {
2884                let start = pretty_print_reg(start.to_reg());
2885                let end = pretty_print_reg(end);
2886                let step = step.pretty_print(0);
2887                format!("stack_probe_loop {start}, {end}, {step}")
2888            }
2889        }
2890    }
2891}
2892
2893//=============================================================================
2894// Label fixups and jump veneers.
2895
2896/// Different forms of label references for different instruction formats.
2897#[derive(Clone, Copy, Debug, PartialEq, Eq)]
2898pub enum LabelUse {
2899    /// 14-bit branch offset (conditional branches). PC-rel, offset is imm <<
2900    /// 2. Immediate is 14 signed bits, in bits 18:5. Used by tbz and tbnz.
2901    Branch14,
2902    /// 19-bit branch offset (conditional branches). PC-rel, offset is imm << 2. Immediate is 19
2903    /// signed bits, in bits 23:5. Used by cbz, cbnz, b.cond.
2904    Branch19,
2905    /// 26-bit branch offset (unconditional branches). PC-rel, offset is imm << 2. Immediate is 26
2906    /// signed bits, in bits 25:0. Used by b, bl.
2907    Branch26,
2908    #[allow(dead_code)]
2909    /// 19-bit offset for LDR (load literal). PC-rel, offset is imm << 2. Immediate is 19 signed bits,
2910    /// in bits 23:5.
2911    Ldr19,
2912    #[allow(dead_code)]
2913    /// 21-bit offset for ADR (get address of label). PC-rel, offset is not shifted. Immediate is
2914    /// 21 signed bits, with high 19 bits in bits 23:5 and low 2 bits in bits 30:29.
2915    Adr21,
2916    /// 32-bit PC relative constant offset (from address of constant itself),
2917    /// signed. Used in jump tables.
2918    PCRel32,
2919}
2920
2921impl MachInstLabelUse for LabelUse {
2922    /// Alignment for veneer code. Every AArch64 instruction must be 4-byte-aligned.
2923    const ALIGN: CodeOffset = 4;
2924
2925    /// Maximum PC-relative range (positive), inclusive.
2926    fn max_pos_range(self) -> CodeOffset {
2927        match self {
2928            // N-bit immediate, left-shifted by 2, for (N+2) bits of total
2929            // range. Signed, so +2^(N+1) from zero. Likewise for two other
2930            // shifted cases below.
2931            LabelUse::Branch14 => (1 << 15) - 1,
2932            LabelUse::Branch19 => (1 << 20) - 1,
2933            LabelUse::Branch26 => (1 << 27) - 1,
2934            LabelUse::Ldr19 => (1 << 20) - 1,
2935            // Adr does not shift its immediate, so the 21-bit immediate gives 21 bits of total
2936            // range.
2937            LabelUse::Adr21 => (1 << 20) - 1,
2938            LabelUse::PCRel32 => 0x7fffffff,
2939        }
2940    }
2941
2942    /// Maximum PC-relative range (negative).
2943    fn max_neg_range(self) -> CodeOffset {
2944        // All forms are twos-complement signed offsets, so negative limit is one more than
2945        // positive limit.
2946        self.max_pos_range() + 1
2947    }
2948
2949    /// Size of window into code needed to do the patch.
2950    fn patch_size(self) -> CodeOffset {
2951        // Patch is on one instruction only for all of these label reference types.
2952        4
2953    }
2954
2955    /// Perform the patch.
2956    fn patch(self, buffer: &mut [u8], use_offset: CodeOffset, label_offset: CodeOffset) {
2957        let pc_rel = (label_offset as i64) - (use_offset as i64);
2958        debug_assert!(pc_rel <= self.max_pos_range() as i64);
2959        debug_assert!(pc_rel >= -(self.max_neg_range() as i64));
2960        let pc_rel = pc_rel as u32;
2961        let insn_word = u32::from_le_bytes([buffer[0], buffer[1], buffer[2], buffer[3]]);
2962        let mask = match self {
2963            LabelUse::Branch14 => 0x0007ffe0, // bits 18..5 inclusive
2964            LabelUse::Branch19 => 0x00ffffe0, // bits 23..5 inclusive
2965            LabelUse::Branch26 => 0x03ffffff, // bits 25..0 inclusive
2966            LabelUse::Ldr19 => 0x00ffffe0,    // bits 23..5 inclusive
2967            LabelUse::Adr21 => 0x60ffffe0,    // bits 30..29, 25..5 inclusive
2968            LabelUse::PCRel32 => 0xffffffff,
2969        };
2970        let pc_rel_shifted = match self {
2971            LabelUse::Adr21 | LabelUse::PCRel32 => pc_rel,
2972            _ => {
2973                debug_assert!(pc_rel & 3 == 0);
2974                pc_rel >> 2
2975            }
2976        };
2977        let pc_rel_inserted = match self {
2978            LabelUse::Branch14 => (pc_rel_shifted & 0x3fff) << 5,
2979            LabelUse::Branch19 | LabelUse::Ldr19 => (pc_rel_shifted & 0x7ffff) << 5,
2980            LabelUse::Branch26 => pc_rel_shifted & 0x3ffffff,
2981            LabelUse::Adr21 => (pc_rel_shifted & 0x7ffff) << 5 | (pc_rel_shifted & 0x180000) << 10,
2982            LabelUse::PCRel32 => pc_rel_shifted,
2983        };
2984        let is_add = match self {
2985            LabelUse::PCRel32 => true,
2986            _ => false,
2987        };
2988        let insn_word = if is_add {
2989            insn_word.wrapping_add(pc_rel_inserted)
2990        } else {
2991            (insn_word & !mask) | pc_rel_inserted
2992        };
2993        buffer[0..4].clone_from_slice(&u32::to_le_bytes(insn_word));
2994    }
2995
2996    /// Is a veneer supported for this label reference type?
2997    fn supports_veneer(self) -> bool {
2998        match self {
2999            LabelUse::Branch14 | LabelUse::Branch19 => true, // veneer is a Branch26
3000            LabelUse::Branch26 => true,                      // veneer is a PCRel32
3001            _ => false,
3002        }
3003    }
3004
3005    /// How large is the veneer, if supported?
3006    fn veneer_size(self) -> CodeOffset {
3007        match self {
3008            LabelUse::Branch14 | LabelUse::Branch19 => 4,
3009            LabelUse::Branch26 => 20,
3010            _ => unreachable!(),
3011        }
3012    }
3013
3014    fn worst_case_veneer_size() -> CodeOffset {
3015        20
3016    }
3017
3018    /// Generate a veneer into the buffer, given that this veneer is at `veneer_offset`, and return
3019    /// an offset and label-use for the veneer's use of the original label.
3020    fn generate_veneer(
3021        self,
3022        buffer: &mut [u8],
3023        veneer_offset: CodeOffset,
3024    ) -> (CodeOffset, LabelUse) {
3025        match self {
3026            LabelUse::Branch14 | LabelUse::Branch19 => {
3027                // veneer is a Branch26 (unconditional branch). Just encode directly here -- don't
3028                // bother with constructing an Inst.
3029                let insn_word = 0b000101 << 26;
3030                buffer[0..4].clone_from_slice(&u32::to_le_bytes(insn_word));
3031                (veneer_offset, LabelUse::Branch26)
3032            }
3033
3034            // This is promoting a 26-bit call/jump to a 32-bit call/jump to
3035            // get a further range. This jump translates to a jump to a
3036            // relative location based on the address of the constant loaded
3037            // from here.
3038            //
3039            // If this path is taken from a call instruction then caller-saved
3040            // registers are available (minus arguments), so x16/x17 are
3041            // available. Otherwise for intra-function jumps we also reserve
3042            // x16/x17 as spill-style registers. In both cases these are
3043            // available for us to use.
3044            LabelUse::Branch26 => {
3045                let tmp1 = regs::spilltmp_reg();
3046                let tmp1_w = regs::writable_spilltmp_reg();
3047                let tmp2 = regs::tmp2_reg();
3048                let tmp2_w = regs::writable_tmp2_reg();
3049                // ldrsw x16, 16
3050                let ldr = emit::enc_ldst_imm19(0b1001_1000, 16 / 4, tmp1);
3051                // adr x17, 12
3052                let adr = emit::enc_adr(12, tmp2_w);
3053                // add x16, x16, x17
3054                let add = emit::enc_arith_rrr(0b10001011_000, 0, tmp1_w, tmp1, tmp2);
3055                // br x16
3056                let br = emit::enc_br(tmp1);
3057                buffer[0..4].clone_from_slice(&u32::to_le_bytes(ldr));
3058                buffer[4..8].clone_from_slice(&u32::to_le_bytes(adr));
3059                buffer[8..12].clone_from_slice(&u32::to_le_bytes(add));
3060                buffer[12..16].clone_from_slice(&u32::to_le_bytes(br));
3061                // the 4-byte signed immediate we'll load is after these
3062                // instructions, 16-bytes in.
3063                (veneer_offset + 16, LabelUse::PCRel32)
3064            }
3065
3066            _ => panic!("Unsupported label-reference type for veneer generation!"),
3067        }
3068    }
3069
3070    fn from_reloc(reloc: Reloc, addend: Addend) -> Option<LabelUse> {
3071        match (reloc, addend) {
3072            (Reloc::Arm64Call, 0) => Some(LabelUse::Branch26),
3073            _ => None,
3074        }
3075    }
3076}
3077
3078#[cfg(test)]
3079mod tests {
3080    use super::*;
3081
3082    #[test]
3083    fn inst_size_test() {
3084        // This test will help with unintentionally growing the size
3085        // of the Inst enum.
3086        let expected = if cfg!(target_pointer_width = "32") && !cfg!(target_arch = "arm") {
3087            28
3088        } else {
3089            32
3090        };
3091        assert_eq!(expected, std::mem::size_of::<Inst>());
3092    }
3093}