Skip to main content

cranelift_codegen/isa/aarch64/inst/
mod.rs

1//! This module defines aarch64-specific machine instruction types.
2
3use crate::binemit::{Addend, CodeOffset, Reloc};
4use crate::ir::types::{F16, F32, F64, F128, I8, I8X16, I16, I32, I64, I128};
5use crate::ir::{MemFlagsData, Type, types};
6use crate::isa::{CallConv, FunctionAlignment};
7use crate::machinst::*;
8use crate::{CodegenError, CodegenResult, settings};
9
10use crate::machinst::{PrettyPrint, Reg, RegClass, Writable};
11
12use alloc::string::{String, ToString};
13use alloc::vec::Vec;
14use core::fmt::Write;
15use core::slice;
16use smallvec::{SmallVec, smallvec};
17
18pub(crate) mod regs;
19pub use self::regs::*;
20pub mod imms;
21pub use self::imms::*;
22pub mod args;
23pub use self::args::*;
24pub mod emit;
25pub(crate) use self::emit::*;
26use crate::isa::aarch64::abi::AArch64MachineDeps;
27
28pub(crate) mod unwind;
29
30#[cfg(test)]
31mod emit_tests;
32
33//=============================================================================
34// Instructions (top level): definition
35
36pub use crate::isa::aarch64::lower::isle::generated_code::{
37    ALUOp, ALUOp3, AMode, APIKey, AtomicRMWLoopOp, AtomicRMWOp, BitOp, BranchTargetType, FPUOp1,
38    FPUOp2, FPUOp3, FpuRoundMode, FpuToIntOp, IntToFpuOp, MInst as Inst, MoveWideOp, VecALUModOp,
39    VecALUOp, VecExtendOp, VecLanesOp, VecMisc2, VecPairOp, VecRRLongOp, VecRRNarrowOp,
40    VecRRPairLongOp, VecRRRLongModOp, VecRRRLongOp, VecShiftImmModOp, VecShiftImmOp,
41};
42
43/// A floating-point unit (FPU) operation with two args, a register and an immediate.
44#[derive(Copy, Clone, Debug)]
45pub enum FPUOpRI {
46    /// Unsigned right shift. Rd = Rn << #imm
47    UShr32(FPURightShiftImm),
48    /// Unsigned right shift. Rd = Rn << #imm
49    UShr64(FPURightShiftImm),
50}
51
52/// A floating-point unit (FPU) operation with two args, a register and
53/// an immediate that modifies its dest (so takes that input value as a
54/// separate virtual register).
55#[derive(Copy, Clone, Debug)]
56pub enum FPUOpRIMod {
57    /// Shift left and insert. Rd |= Rn << #imm
58    Sli32(FPULeftShiftImm),
59    /// Shift left and insert. Rd |= Rn << #imm
60    Sli64(FPULeftShiftImm),
61}
62
63impl BitOp {
64    /// Get the assembly mnemonic for this opcode.
65    pub fn op_str(&self) -> &'static str {
66        match self {
67            BitOp::RBit => "rbit",
68            BitOp::Clz => "clz",
69            BitOp::Cls => "cls",
70            BitOp::Rev16 => "rev16",
71            BitOp::Rev32 => "rev32",
72            BitOp::Rev64 => "rev64",
73        }
74    }
75}
76
77/// Additional information for `return_call[_ind]` instructions, left out of
78/// line to lower the size of the `Inst` enum.
79#[derive(Clone, Debug)]
80pub struct ReturnCallInfo<T> {
81    /// Where this call is going to
82    pub dest: T,
83    /// Arguments to the call instruction.
84    pub uses: CallArgList,
85    /// The size of the new stack frame's stack arguments. This is necessary
86    /// for copying the frame over our current frame. It must already be
87    /// allocated on the stack.
88    pub new_stack_arg_size: u32,
89    /// API key to use to restore the return address, if any.
90    pub key: Option<APIKey>,
91    /// Whether pointer-auth return addresses are signed even without frame setup.
92    pub sign_return_address_all: bool,
93}
94
95fn count_zero_half_words(mut value: u64, num_half_words: u8) -> usize {
96    let mut count = 0;
97    for _ in 0..num_half_words {
98        if value & 0xffff == 0 {
99            count += 1;
100        }
101        value >>= 16;
102    }
103
104    count
105}
106
107impl Inst {
108    /// Create an instruction that loads a constant, using one of several options (MOVZ, MOVN,
109    /// logical immediate, or constant pool).
110    pub fn load_constant(rd: Writable<Reg>, value: u64) -> SmallVec<[Inst; 4]> {
111        // NB: this is duplicated in `lower/isle.rs` and `inst.isle` right now,
112        // if modifications are made here before this is deleted after moving to
113        // ISLE then those locations should be updated as well.
114
115        if let Some(imm) = MoveWideConst::maybe_from_u64(value) {
116            // 16-bit immediate (shifted by 0, 16, 32 or 48 bits) in MOVZ
117            smallvec![Inst::MovWide {
118                op: MoveWideOp::MovZ,
119                rd,
120                imm,
121                size: OperandSize::Size64
122            }]
123        } else if let Some(imm) = MoveWideConst::maybe_from_u64(!value) {
124            // 16-bit immediate (shifted by 0, 16, 32 or 48 bits) in MOVN
125            smallvec![Inst::MovWide {
126                op: MoveWideOp::MovN,
127                rd,
128                imm,
129                size: OperandSize::Size64
130            }]
131        } else if let Some(imml) = ImmLogic::maybe_from_u64(value, I64) {
132            // Weird logical-instruction immediate in ORI using zero register
133            smallvec![Inst::AluRRImmLogic {
134                alu_op: ALUOp::Orr,
135                size: OperandSize::Size64,
136                rd,
137                rn: zero_reg(),
138                imml,
139            }]
140        } else {
141            let mut insts = smallvec![];
142
143            // If the top 32 bits are zero, use 32-bit `mov` operations.
144            let (num_half_words, size, negated) = if value >> 32 == 0 {
145                (2, OperandSize::Size32, (!value << 32) >> 32)
146            } else {
147                (4, OperandSize::Size64, !value)
148            };
149
150            // If the number of 0xffff half words is greater than the number of 0x0000 half words
151            // it is more efficient to use `movn` for the first instruction.
152            let first_is_inverted = count_zero_half_words(negated, num_half_words)
153                > count_zero_half_words(value, num_half_words);
154
155            // Either 0xffff or 0x0000 half words can be skipped, depending on the first
156            // instruction used.
157            let ignored_halfword = if first_is_inverted { 0xffff } else { 0 };
158
159            let halfwords: SmallVec<[_; 4]> = (0..num_half_words)
160                .filter_map(|i| {
161                    let imm16 = (value >> (16 * i)) & 0xffff;
162                    if imm16 == ignored_halfword {
163                        None
164                    } else {
165                        Some((i, imm16))
166                    }
167                })
168                .collect();
169
170            let mut prev_result = None;
171            for (i, imm16) in halfwords {
172                let shift = i * 16;
173
174                if let Some(rn) = prev_result {
175                    let imm = MoveWideConst::maybe_with_shift(imm16 as u16, shift).unwrap();
176                    insts.push(Inst::MovK { rd, rn, imm, size });
177                } else {
178                    if first_is_inverted {
179                        let imm =
180                            MoveWideConst::maybe_with_shift(((!imm16) & 0xffff) as u16, shift)
181                                .unwrap();
182                        insts.push(Inst::MovWide {
183                            op: MoveWideOp::MovN,
184                            rd,
185                            imm,
186                            size,
187                        });
188                    } else {
189                        let imm = MoveWideConst::maybe_with_shift(imm16 as u16, shift).unwrap();
190                        insts.push(Inst::MovWide {
191                            op: MoveWideOp::MovZ,
192                            rd,
193                            imm,
194                            size,
195                        });
196                    }
197                }
198
199                prev_result = Some(rd.to_reg());
200            }
201
202            assert!(prev_result.is_some());
203
204            insts
205        }
206    }
207
208    /// Generic constructor for a load (zero-extending where appropriate).
209    pub fn gen_load(into_reg: Writable<Reg>, mem: AMode, ty: Type, flags: MemFlagsData) -> Inst {
210        match ty {
211            I8 => Inst::ULoad8 {
212                rd: into_reg,
213                mem,
214                flags,
215            },
216            I16 => Inst::ULoad16 {
217                rd: into_reg,
218                mem,
219                flags,
220            },
221            I32 => Inst::ULoad32 {
222                rd: into_reg,
223                mem,
224                flags,
225            },
226            I64 => Inst::ULoad64 {
227                rd: into_reg,
228                mem,
229                flags,
230            },
231            _ => {
232                if ty.is_vector() || ty.is_float() {
233                    let bits = ty_bits(ty);
234                    let rd = into_reg;
235
236                    match bits {
237                        128 => Inst::FpuLoad128 { rd, mem, flags },
238                        64 => Inst::FpuLoad64 { rd, mem, flags },
239                        32 => Inst::FpuLoad32 { rd, mem, flags },
240                        16 => Inst::FpuLoad16 { rd, mem, flags },
241                        _ => unimplemented!("gen_load({})", ty),
242                    }
243                } else {
244                    unimplemented!("gen_load({})", ty);
245                }
246            }
247        }
248    }
249
250    /// Generic constructor for a store.
251    pub fn gen_store(mem: AMode, from_reg: Reg, ty: Type, flags: MemFlagsData) -> Inst {
252        match ty {
253            I8 => Inst::Store8 {
254                rd: from_reg,
255                mem,
256                flags,
257            },
258            I16 => Inst::Store16 {
259                rd: from_reg,
260                mem,
261                flags,
262            },
263            I32 => Inst::Store32 {
264                rd: from_reg,
265                mem,
266                flags,
267            },
268            I64 => Inst::Store64 {
269                rd: from_reg,
270                mem,
271                flags,
272            },
273            _ => {
274                if ty.is_vector() || ty.is_float() {
275                    let bits = ty_bits(ty);
276                    let rd = from_reg;
277
278                    match bits {
279                        128 => Inst::FpuStore128 { rd, mem, flags },
280                        64 => Inst::FpuStore64 { rd, mem, flags },
281                        32 => Inst::FpuStore32 { rd, mem, flags },
282                        16 => Inst::FpuStore16 { rd, mem, flags },
283                        _ => unimplemented!("gen_store({})", ty),
284                    }
285                } else {
286                    unimplemented!("gen_store({})", ty);
287                }
288            }
289        }
290    }
291
292    /// What type does this load or store instruction access in memory? When
293    /// uimm12 encoding is used, the size of this type is the amount that
294    /// immediate offsets are scaled by.
295    pub fn mem_type(&self) -> Option<Type> {
296        match self {
297            Inst::ULoad8 { .. } => Some(I8),
298            Inst::SLoad8 { .. } => Some(I8),
299            Inst::ULoad16 { .. } => Some(I16),
300            Inst::SLoad16 { .. } => Some(I16),
301            Inst::ULoad32 { .. } => Some(I32),
302            Inst::SLoad32 { .. } => Some(I32),
303            Inst::ULoad64 { .. } => Some(I64),
304            Inst::FpuLoad16 { .. } => Some(F16),
305            Inst::FpuLoad32 { .. } => Some(F32),
306            Inst::FpuLoad64 { .. } => Some(F64),
307            Inst::FpuLoad128 { .. } => Some(I8X16),
308            Inst::Store8 { .. } => Some(I8),
309            Inst::Store16 { .. } => Some(I16),
310            Inst::Store32 { .. } => Some(I32),
311            Inst::Store64 { .. } => Some(I64),
312            Inst::FpuStore16 { .. } => Some(F16),
313            Inst::FpuStore32 { .. } => Some(F32),
314            Inst::FpuStore64 { .. } => Some(F64),
315            Inst::FpuStore128 { .. } => Some(I8X16),
316            _ => None,
317        }
318    }
319}
320
321//=============================================================================
322// Instructions: get_regs
323
324fn memarg_operands(memarg: &mut AMode, collector: &mut impl OperandVisitor) {
325    match memarg {
326        AMode::Unscaled { rn, .. } | AMode::UnsignedOffset { rn, .. } => {
327            collector.reg_use(rn);
328        }
329        AMode::RegReg { rn, rm, .. }
330        | AMode::RegScaled { rn, rm, .. }
331        | AMode::RegScaledExtended { rn, rm, .. }
332        | AMode::RegExtended { rn, rm, .. } => {
333            collector.reg_use(rn);
334            collector.reg_use(rm);
335        }
336        AMode::Label { .. } => {}
337        AMode::SPPreIndexed { .. } | AMode::SPPostIndexed { .. } => {}
338        AMode::FPOffset { .. } | AMode::IncomingArg { .. } => {}
339        AMode::SPOffset { .. } | AMode::SlotOffset { .. } => {}
340        AMode::RegOffset { rn, .. } => {
341            collector.reg_use(rn);
342        }
343        AMode::Const { .. } => {}
344    }
345}
346
347fn pairmemarg_operands(pairmemarg: &mut PairAMode, collector: &mut impl OperandVisitor) {
348    match pairmemarg {
349        PairAMode::SignedOffset { reg, .. } => {
350            collector.reg_use(reg);
351        }
352        PairAMode::SPPreIndexed { .. } | PairAMode::SPPostIndexed { .. } => {}
353    }
354}
355
356fn aarch64_get_operands(inst: &mut Inst, collector: &mut impl OperandVisitor) {
357    match inst {
358        Inst::AluRRR { rd, rn, rm, .. } => {
359            collector.reg_def(rd);
360            collector.reg_use(rn);
361            collector.reg_use(rm);
362        }
363        Inst::AluRRRR { rd, rn, rm, ra, .. } => {
364            collector.reg_def(rd);
365            collector.reg_use(rn);
366            collector.reg_use(rm);
367            collector.reg_use(ra);
368        }
369        Inst::AluRRImm12 { rd, rn, .. } => {
370            collector.reg_def(rd);
371            collector.reg_use(rn);
372        }
373        Inst::AluRRImmLogic { rd, rn, .. } => {
374            collector.reg_def(rd);
375            collector.reg_use(rn);
376        }
377        Inst::AluRRImmShift { rd, rn, .. } => {
378            collector.reg_def(rd);
379            collector.reg_use(rn);
380        }
381        Inst::AluRRRShift { rd, rn, rm, .. } => {
382            collector.reg_def(rd);
383            collector.reg_use(rn);
384            collector.reg_use(rm);
385        }
386        Inst::AluRRRExtend { rd, rn, rm, .. } => {
387            collector.reg_def(rd);
388            collector.reg_use(rn);
389            collector.reg_use(rm);
390        }
391        Inst::BitRR { rd, rn, .. } => {
392            collector.reg_def(rd);
393            collector.reg_use(rn);
394        }
395        Inst::ULoad8 { rd, mem, .. }
396        | Inst::SLoad8 { rd, mem, .. }
397        | Inst::ULoad16 { rd, mem, .. }
398        | Inst::SLoad16 { rd, mem, .. }
399        | Inst::ULoad32 { rd, mem, .. }
400        | Inst::SLoad32 { rd, mem, .. }
401        | Inst::ULoad64 { rd, mem, .. } => {
402            collector.reg_def(rd);
403            memarg_operands(mem, collector);
404        }
405        Inst::Store8 { rd, mem, .. }
406        | Inst::Store16 { rd, mem, .. }
407        | Inst::Store32 { rd, mem, .. }
408        | Inst::Store64 { rd, mem, .. } => {
409            collector.reg_use(rd);
410            memarg_operands(mem, collector);
411        }
412        Inst::StoreP64 { rt, rt2, mem, .. } => {
413            collector.reg_use(rt);
414            collector.reg_use(rt2);
415            pairmemarg_operands(mem, collector);
416        }
417        Inst::LoadP64 { rt, rt2, mem, .. } => {
418            collector.reg_def(rt);
419            collector.reg_def(rt2);
420            pairmemarg_operands(mem, collector);
421        }
422        Inst::Mov { rd, rm, .. } => {
423            collector.reg_def(rd);
424            collector.reg_use(rm);
425        }
426        Inst::MovFromPReg { rd, rm } => {
427            debug_assert!(rd.to_reg().is_virtual());
428            collector.reg_def(rd);
429            collector.reg_fixed_nonallocatable(*rm);
430        }
431        Inst::MovToPReg { rd, rm } => {
432            debug_assert!(rm.is_virtual());
433            collector.reg_fixed_nonallocatable(*rd);
434            collector.reg_use(rm);
435        }
436        Inst::MovK { rd, rn, .. } => {
437            collector.reg_use(rn);
438            collector.reg_reuse_def(rd, 0); // `rn` == `rd`.
439        }
440        Inst::MovWide { rd, .. } => {
441            collector.reg_def(rd);
442        }
443        Inst::CSel { rd, rn, rm, .. } => {
444            collector.reg_def(rd);
445            collector.reg_use(rn);
446            collector.reg_use(rm);
447        }
448        Inst::CSNeg { rd, rn, rm, .. } => {
449            collector.reg_def(rd);
450            collector.reg_use(rn);
451            collector.reg_use(rm);
452        }
453        Inst::CSet { rd, .. } | Inst::CSetm { rd, .. } => {
454            collector.reg_def(rd);
455        }
456        Inst::CCmp { rn, rm, .. } => {
457            collector.reg_use(rn);
458            collector.reg_use(rm);
459        }
460        Inst::CCmpImm { rn, .. } => {
461            collector.reg_use(rn);
462        }
463        Inst::AtomicRMWLoop {
464            op,
465            addr,
466            operand,
467            oldval,
468            scratch1,
469            scratch2,
470            ..
471        } => {
472            collector.reg_fixed_use(addr, xreg(25));
473            collector.reg_fixed_use(operand, xreg(26));
474            collector.reg_fixed_def(oldval, xreg(27));
475            collector.reg_fixed_def(scratch1, xreg(24));
476            if *op != AtomicRMWLoopOp::Xchg {
477                collector.reg_fixed_def(scratch2, xreg(28));
478            }
479        }
480        Inst::AtomicRMW { rs, rt, rn, .. } => {
481            collector.reg_use(rs);
482            collector.reg_def(rt);
483            collector.reg_use(rn);
484        }
485        Inst::AtomicCAS { rd, rs, rt, rn, .. } => {
486            collector.reg_reuse_def(rd, 1); // reuse `rs`.
487            collector.reg_use(rs);
488            collector.reg_use(rt);
489            collector.reg_use(rn);
490        }
491        Inst::AtomicCASLoop {
492            addr,
493            expected,
494            replacement,
495            oldval,
496            scratch,
497            ..
498        } => {
499            collector.reg_fixed_use(addr, xreg(25));
500            collector.reg_fixed_use(expected, xreg(26));
501            collector.reg_fixed_use(replacement, xreg(28));
502            collector.reg_fixed_def(oldval, xreg(27));
503            collector.reg_fixed_def(scratch, xreg(24));
504        }
505        Inst::LoadAcquire { rt, rn, .. } => {
506            collector.reg_use(rn);
507            collector.reg_def(rt);
508        }
509        Inst::StoreRelease { rt, rn, .. } => {
510            collector.reg_use(rn);
511            collector.reg_use(rt);
512        }
513        Inst::Fence {} | Inst::Csdb {} => {}
514        Inst::FpuMove32 { rd, rn } => {
515            collector.reg_def(rd);
516            collector.reg_use(rn);
517        }
518        Inst::FpuMove64 { rd, rn } => {
519            collector.reg_def(rd);
520            collector.reg_use(rn);
521        }
522        Inst::FpuMove128 { rd, rn } => {
523            collector.reg_def(rd);
524            collector.reg_use(rn);
525        }
526        Inst::FpuMoveFromVec { rd, rn, .. } => {
527            collector.reg_def(rd);
528            collector.reg_use(rn);
529        }
530        Inst::FpuExtend { rd, rn, .. } => {
531            collector.reg_def(rd);
532            collector.reg_use(rn);
533        }
534        Inst::FpuRR { rd, rn, .. } => {
535            collector.reg_def(rd);
536            collector.reg_use(rn);
537        }
538        Inst::FpuRRR { rd, rn, rm, .. } => {
539            collector.reg_def(rd);
540            collector.reg_use(rn);
541            collector.reg_use(rm);
542        }
543        Inst::FpuRRI { rd, rn, .. } => {
544            collector.reg_def(rd);
545            collector.reg_use(rn);
546        }
547        Inst::FpuRRIMod { rd, ri, rn, .. } => {
548            collector.reg_reuse_def(rd, 1); // reuse `ri`.
549            collector.reg_use(ri);
550            collector.reg_use(rn);
551        }
552        Inst::FpuRRRR { rd, rn, rm, ra, .. } => {
553            collector.reg_def(rd);
554            collector.reg_use(rn);
555            collector.reg_use(rm);
556            collector.reg_use(ra);
557        }
558        Inst::VecMisc { rd, rn, .. } => {
559            collector.reg_def(rd);
560            collector.reg_use(rn);
561        }
562
563        Inst::VecLanes { rd, rn, .. } => {
564            collector.reg_def(rd);
565            collector.reg_use(rn);
566        }
567        Inst::VecShiftImm { rd, rn, .. } => {
568            collector.reg_def(rd);
569            collector.reg_use(rn);
570        }
571        Inst::VecShiftImmMod { rd, ri, rn, .. } => {
572            collector.reg_reuse_def(rd, 1); // `rd` == `ri`.
573            collector.reg_use(ri);
574            collector.reg_use(rn);
575        }
576        Inst::VecExtract { rd, rn, rm, .. } => {
577            collector.reg_def(rd);
578            collector.reg_use(rn);
579            collector.reg_use(rm);
580        }
581        Inst::VecTbl { rd, rn, rm } => {
582            collector.reg_use(rn);
583            collector.reg_use(rm);
584            collector.reg_def(rd);
585        }
586        Inst::VecTblExt { rd, ri, rn, rm } => {
587            collector.reg_use(rn);
588            collector.reg_use(rm);
589            collector.reg_reuse_def(rd, 3); // `rd` == `ri`.
590            collector.reg_use(ri);
591        }
592
593        Inst::VecTbl2 { rd, rn, rn2, rm } => {
594            // Constrain to v30 / v31 so that we satisfy the "adjacent
595            // registers" constraint without use of pinned vregs in
596            // lowering.
597            collector.reg_fixed_use(rn, vreg(30));
598            collector.reg_fixed_use(rn2, vreg(31));
599            collector.reg_use(rm);
600            collector.reg_def(rd);
601        }
602        Inst::VecTbl2Ext {
603            rd,
604            ri,
605            rn,
606            rn2,
607            rm,
608        } => {
609            // Constrain to v30 / v31 so that we satisfy the "adjacent
610            // registers" constraint without use of pinned vregs in
611            // lowering.
612            collector.reg_fixed_use(rn, vreg(30));
613            collector.reg_fixed_use(rn2, vreg(31));
614            collector.reg_use(rm);
615            collector.reg_reuse_def(rd, 4); // `rd` == `ri`.
616            collector.reg_use(ri);
617        }
618        Inst::VecLoadReplicate { rd, rn, .. } => {
619            collector.reg_def(rd);
620            collector.reg_use(rn);
621        }
622        Inst::VecCSel { rd, rn, rm, .. } => {
623            collector.reg_def(rd);
624            collector.reg_use(rn);
625            collector.reg_use(rm);
626        }
627        Inst::FpuCmp { rn, rm, .. } => {
628            collector.reg_use(rn);
629            collector.reg_use(rm);
630        }
631        Inst::FpuLoad16 { rd, mem, .. } => {
632            collector.reg_def(rd);
633            memarg_operands(mem, collector);
634        }
635        Inst::FpuLoad32 { rd, mem, .. } => {
636            collector.reg_def(rd);
637            memarg_operands(mem, collector);
638        }
639        Inst::FpuLoad64 { rd, mem, .. } => {
640            collector.reg_def(rd);
641            memarg_operands(mem, collector);
642        }
643        Inst::FpuLoad128 { rd, mem, .. } => {
644            collector.reg_def(rd);
645            memarg_operands(mem, collector);
646        }
647        Inst::FpuStore16 { rd, mem, .. } => {
648            collector.reg_use(rd);
649            memarg_operands(mem, collector);
650        }
651        Inst::FpuStore32 { rd, mem, .. } => {
652            collector.reg_use(rd);
653            memarg_operands(mem, collector);
654        }
655        Inst::FpuStore64 { rd, mem, .. } => {
656            collector.reg_use(rd);
657            memarg_operands(mem, collector);
658        }
659        Inst::FpuStore128 { rd, mem, .. } => {
660            collector.reg_use(rd);
661            memarg_operands(mem, collector);
662        }
663        Inst::FpuLoadP64 { rt, rt2, mem, .. } => {
664            collector.reg_def(rt);
665            collector.reg_def(rt2);
666            pairmemarg_operands(mem, collector);
667        }
668        Inst::FpuStoreP64 { rt, rt2, mem, .. } => {
669            collector.reg_use(rt);
670            collector.reg_use(rt2);
671            pairmemarg_operands(mem, collector);
672        }
673        Inst::FpuLoadP128 { rt, rt2, mem, .. } => {
674            collector.reg_def(rt);
675            collector.reg_def(rt2);
676            pairmemarg_operands(mem, collector);
677        }
678        Inst::FpuStoreP128 { rt, rt2, mem, .. } => {
679            collector.reg_use(rt);
680            collector.reg_use(rt2);
681            pairmemarg_operands(mem, collector);
682        }
683        Inst::FpuToInt { rd, rn, .. } => {
684            collector.reg_def(rd);
685            collector.reg_use(rn);
686        }
687        Inst::IntToFpu { rd, rn, .. } => {
688            collector.reg_def(rd);
689            collector.reg_use(rn);
690        }
691        Inst::FpuCSel16 { rd, rn, rm, .. }
692        | Inst::FpuCSel32 { rd, rn, rm, .. }
693        | Inst::FpuCSel64 { rd, rn, rm, .. } => {
694            collector.reg_def(rd);
695            collector.reg_use(rn);
696            collector.reg_use(rm);
697        }
698        Inst::FpuRound { rd, rn, .. } => {
699            collector.reg_def(rd);
700            collector.reg_use(rn);
701        }
702        Inst::MovToFpu { rd, rn, .. } => {
703            collector.reg_def(rd);
704            collector.reg_use(rn);
705        }
706        Inst::FpuMoveFPImm { rd, .. } => {
707            collector.reg_def(rd);
708        }
709        Inst::MovToVec { rd, ri, rn, .. } => {
710            collector.reg_reuse_def(rd, 1); // `rd` == `ri`.
711            collector.reg_use(ri);
712            collector.reg_use(rn);
713        }
714        Inst::MovFromVec { rd, rn, .. } | Inst::MovFromVecSigned { rd, rn, .. } => {
715            collector.reg_def(rd);
716            collector.reg_use(rn);
717        }
718        Inst::VecDup { rd, rn, .. } => {
719            collector.reg_def(rd);
720            collector.reg_use(rn);
721        }
722        Inst::VecDupFromFpu { rd, rn, .. } => {
723            collector.reg_def(rd);
724            collector.reg_use(rn);
725        }
726        Inst::VecDupFPImm { rd, .. } => {
727            collector.reg_def(rd);
728        }
729        Inst::VecDupImm { rd, .. } => {
730            collector.reg_def(rd);
731        }
732        Inst::VecExtend { rd, rn, .. } => {
733            collector.reg_def(rd);
734            collector.reg_use(rn);
735        }
736        Inst::VecMovElement { rd, ri, rn, .. } => {
737            collector.reg_reuse_def(rd, 1); // `rd` == `ri`.
738            collector.reg_use(ri);
739            collector.reg_use(rn);
740        }
741        Inst::VecRRLong { rd, rn, .. } => {
742            collector.reg_def(rd);
743            collector.reg_use(rn);
744        }
745        Inst::VecRRNarrowLow { rd, rn, .. } => {
746            collector.reg_use(rn);
747            collector.reg_def(rd);
748        }
749        Inst::VecRRNarrowHigh { rd, ri, rn, .. } => {
750            collector.reg_use(rn);
751            collector.reg_reuse_def(rd, 2); // `rd` == `ri`.
752            collector.reg_use(ri);
753        }
754        Inst::VecRRPair { rd, rn, .. } => {
755            collector.reg_def(rd);
756            collector.reg_use(rn);
757        }
758        Inst::VecRRRLong { rd, rn, rm, .. } => {
759            collector.reg_def(rd);
760            collector.reg_use(rn);
761            collector.reg_use(rm);
762        }
763        Inst::VecRRRLongMod { rd, ri, rn, rm, .. } => {
764            collector.reg_reuse_def(rd, 1); // `rd` == `ri`.
765            collector.reg_use(ri);
766            collector.reg_use(rn);
767            collector.reg_use(rm);
768        }
769        Inst::VecRRPairLong { rd, rn, .. } => {
770            collector.reg_def(rd);
771            collector.reg_use(rn);
772        }
773        Inst::VecRRR { rd, rn, rm, .. } => {
774            collector.reg_def(rd);
775            collector.reg_use(rn);
776            collector.reg_use(rm);
777        }
778        Inst::VecRRRMod { rd, ri, rn, rm, .. } | Inst::VecFmlaElem { rd, ri, rn, rm, .. } => {
779            collector.reg_reuse_def(rd, 1); // `rd` == `ri`.
780            collector.reg_use(ri);
781            collector.reg_use(rn);
782            collector.reg_use(rm);
783        }
784        Inst::MovToNZCV { rn } => {
785            collector.reg_use(rn);
786        }
787        Inst::MovFromNZCV { rd } => {
788            collector.reg_def(rd);
789        }
790        Inst::Extend { rd, rn, .. } => {
791            collector.reg_def(rd);
792            collector.reg_use(rn);
793        }
794        Inst::Args { args } => {
795            for ArgPair { vreg, preg } in args {
796                collector.reg_fixed_def(vreg, *preg);
797            }
798        }
799        Inst::Rets { rets } => {
800            for RetPair { vreg, preg } in rets {
801                collector.reg_fixed_use(vreg, *preg);
802            }
803        }
804        Inst::Ret { .. } | Inst::AuthenticatedRet { .. } => {}
805        Inst::Jump { .. } => {}
806        Inst::Call { info, .. } => {
807            let CallInfo { uses, defs, .. } = &mut **info;
808            for CallArgPair { vreg, preg } in uses {
809                collector.reg_fixed_use(vreg, *preg);
810            }
811            for CallRetPair { vreg, location } in defs {
812                match location {
813                    RetLocation::Reg(preg, ..) => collector.reg_fixed_def(vreg, *preg),
814                    RetLocation::Stack(..) => collector.any_def(vreg),
815                }
816            }
817            collector.reg_clobbers(info.clobbers);
818            if let Some(try_call_info) = &mut info.try_call_info {
819                try_call_info.collect_operands(collector);
820            }
821        }
822        Inst::CallInd { info, .. } => {
823            let CallInfo {
824                dest, uses, defs, ..
825            } = &mut **info;
826            collector.reg_use(dest);
827            for CallArgPair { vreg, preg } in uses {
828                collector.reg_fixed_use(vreg, *preg);
829            }
830            for CallRetPair { vreg, location } in defs {
831                match location {
832                    RetLocation::Reg(preg, ..) => collector.reg_fixed_def(vreg, *preg),
833                    RetLocation::Stack(..) => collector.any_def(vreg),
834                }
835            }
836            collector.reg_clobbers(info.clobbers);
837            if let Some(try_call_info) = &mut info.try_call_info {
838                try_call_info.collect_operands(collector);
839            }
840        }
841        Inst::ReturnCall { info } => {
842            for CallArgPair { vreg, preg } in &mut info.uses {
843                collector.reg_fixed_use(vreg, *preg);
844            }
845        }
846        Inst::ReturnCallInd { info } => {
847            // TODO(https://github.com/bytecodealliance/regalloc2/issues/145):
848            // This shouldn't be a fixed register constraint, but it's not clear how to pick a
849            // register that won't be clobbered by the callee-save restore code emitted with a
850            // return_call_indirect.
851            collector.reg_fixed_use(&mut info.dest, xreg(1));
852            for CallArgPair { vreg, preg } in &mut info.uses {
853                collector.reg_fixed_use(vreg, *preg);
854            }
855        }
856        Inst::CondBr { kind, .. } => match kind {
857            CondBrKind::Zero(rt, _) | CondBrKind::NotZero(rt, _) => collector.reg_use(rt),
858            CondBrKind::Cond(_) => {}
859        },
860        Inst::TestBitAndBranch { rn, .. } => {
861            collector.reg_use(rn);
862        }
863        Inst::IndirectBr { rn, .. } => {
864            collector.reg_use(rn);
865        }
866        Inst::Nop0 | Inst::Nop4 => {}
867        Inst::Brk => {}
868        Inst::Udf { .. } => {}
869        Inst::TrapIf { kind, .. } => match kind {
870            CondBrKind::Zero(rt, _) | CondBrKind::NotZero(rt, _) => collector.reg_use(rt),
871            CondBrKind::Cond(_) => {}
872        },
873        Inst::Adr { rd, .. } | Inst::Adrp { rd, .. } => {
874            collector.reg_def(rd);
875        }
876        Inst::Word4 { .. } | Inst::Word8 { .. } => {}
877        Inst::JTSequence {
878            ridx, rtmp1, rtmp2, ..
879        } => {
880            collector.reg_use(ridx);
881            collector.reg_early_def(rtmp1);
882            collector.reg_early_def(rtmp2);
883        }
884        Inst::LoadExtNameGot { rd, .. }
885        | Inst::LoadExtNameNear { rd, .. }
886        | Inst::LoadExtNameFar { rd, .. } => {
887            collector.reg_def(rd);
888        }
889        Inst::LoadAddr { rd, mem } => {
890            collector.reg_def(rd);
891            memarg_operands(mem, collector);
892        }
893        Inst::Paci { .. } | Inst::Xpaclri => {
894            // Neither LR nor SP is an allocatable register, so there is no need
895            // to do anything.
896        }
897        Inst::Bti { .. } => {}
898
899        Inst::ElfTlsGetAddr { rd, tmp, .. } => {
900            // TLSDESC has a very neat calling convention. It is required to preserve
901            // all registers except x0 and x30. X30 is non allocatable in cranelift since
902            // its the link register.
903            //
904            // Additionally we need a second register as a temporary register for the
905            // TLSDESC sequence. This register can be any register other than x0 (and x30).
906            collector.reg_fixed_def(rd, regs::xreg(0));
907            collector.reg_early_def(tmp);
908        }
909        Inst::MachOTlsGetAddr { rd, .. } => {
910            collector.reg_fixed_def(rd, regs::xreg(0));
911            let mut clobbers =
912                AArch64MachineDeps::get_regs_clobbered_by_call(CallConv::AppleAarch64, false);
913            clobbers.remove(regs::xreg_preg(0));
914            collector.reg_clobbers(clobbers);
915        }
916        Inst::Unwind { .. } => {}
917        Inst::EmitIsland { .. } => {}
918        Inst::DummyUse { reg } => {
919            collector.reg_use(reg);
920        }
921        Inst::LabelAddress { dst, .. } => {
922            collector.reg_def(dst);
923        }
924        Inst::SequencePoint { .. } => {}
925        Inst::StackProbeLoop { start, end, .. } => {
926            collector.reg_early_def(start);
927            collector.reg_use(end);
928        }
929    }
930}
931
932//=============================================================================
933// Instructions: misc functions and external interface
934
935impl MachInst for Inst {
936    type ABIMachineSpec = AArch64MachineDeps;
937    type LabelUse = LabelUse;
938
939    // "CLIF" in hex, to make the trap recognizable during
940    // debugging.
941    const TRAP_OPCODE: &'static [u8] = &0xc11f_u32.to_le_bytes();
942
943    fn get_operands(&mut self, collector: &mut impl OperandVisitor) {
944        aarch64_get_operands(self, collector);
945    }
946
947    fn is_move(&self) -> Option<(Writable<Reg>, Reg)> {
948        match self {
949            &Inst::Mov {
950                size: OperandSize::Size64,
951                rd,
952                rm,
953            } => Some((rd, rm)),
954            &Inst::FpuMove64 { rd, rn } => Some((rd, rn)),
955            &Inst::FpuMove128 { rd, rn } => Some((rd, rn)),
956            _ => None,
957        }
958    }
959
960    fn is_included_in_clobbers(&self) -> bool {
961        let (caller, callee, is_exception) = match self {
962            Inst::Args { .. } => return false,
963            Inst::Call { info } => (
964                info.caller_conv,
965                info.callee_conv,
966                info.try_call_info.is_some(),
967            ),
968            Inst::CallInd { info } => (
969                info.caller_conv,
970                info.callee_conv,
971                info.try_call_info.is_some(),
972            ),
973            _ => return true,
974        };
975
976        // We exclude call instructions from the clobber-set when they are calls
977        // from caller to callee that both clobber the same register (such as
978        // using the same or similar ABIs). Such calls cannot possibly force any
979        // new registers to be saved in the prologue, because anything that the
980        // callee clobbers, the caller is also allowed to clobber. This both
981        // saves work and enables us to more precisely follow the
982        // half-caller-save, half-callee-save SysV ABI for some vector
983        // registers.
984        //
985        // See the note in [crate::isa::aarch64::abi::is_caller_save_reg] for
986        // more information on this ABI-implementation hack.
987        let caller_clobbers = AArch64MachineDeps::get_regs_clobbered_by_call(caller, false);
988        let callee_clobbers = AArch64MachineDeps::get_regs_clobbered_by_call(callee, is_exception);
989
990        let mut all_clobbers = caller_clobbers;
991        all_clobbers.union_from(callee_clobbers);
992        all_clobbers != caller_clobbers
993    }
994
995    fn is_trap(&self) -> bool {
996        match self {
997            Self::Udf { .. } => true,
998            _ => false,
999        }
1000    }
1001
1002    fn is_args(&self) -> bool {
1003        match self {
1004            Self::Args { .. } => true,
1005            _ => false,
1006        }
1007    }
1008
1009    fn call_type(&self) -> CallType {
1010        match self {
1011            Inst::Call { .. }
1012            | Inst::CallInd { .. }
1013            | Inst::ElfTlsGetAddr { .. }
1014            | Inst::MachOTlsGetAddr { .. } => CallType::Regular,
1015
1016            Inst::ReturnCall { .. } | Inst::ReturnCallInd { .. } => CallType::TailCall,
1017
1018            _ => CallType::None,
1019        }
1020    }
1021
1022    fn is_term(&self) -> MachTerminator {
1023        match self {
1024            &Inst::Rets { .. } => MachTerminator::Ret,
1025            &Inst::ReturnCall { .. } | &Inst::ReturnCallInd { .. } => MachTerminator::RetCall,
1026            &Inst::Jump { .. } => MachTerminator::Branch,
1027            &Inst::CondBr { .. } => MachTerminator::Branch,
1028            &Inst::TestBitAndBranch { .. } => MachTerminator::Branch,
1029            &Inst::IndirectBr { .. } => MachTerminator::Branch,
1030            &Inst::JTSequence { .. } => MachTerminator::Branch,
1031            &Inst::Call { ref info } if info.try_call_info.is_some() => MachTerminator::Branch,
1032            &Inst::CallInd { ref info } if info.try_call_info.is_some() => MachTerminator::Branch,
1033            _ => MachTerminator::None,
1034        }
1035    }
1036
1037    fn is_mem_access(&self) -> bool {
1038        match self {
1039            &Inst::ULoad8 { .. }
1040            | &Inst::SLoad8 { .. }
1041            | &Inst::ULoad16 { .. }
1042            | &Inst::SLoad16 { .. }
1043            | &Inst::ULoad32 { .. }
1044            | &Inst::SLoad32 { .. }
1045            | &Inst::ULoad64 { .. }
1046            | &Inst::LoadP64 { .. }
1047            | &Inst::FpuLoad16 { .. }
1048            | &Inst::FpuLoad32 { .. }
1049            | &Inst::FpuLoad64 { .. }
1050            | &Inst::FpuLoad128 { .. }
1051            | &Inst::FpuLoadP64 { .. }
1052            | &Inst::FpuLoadP128 { .. }
1053            | &Inst::Store8 { .. }
1054            | &Inst::Store16 { .. }
1055            | &Inst::Store32 { .. }
1056            | &Inst::Store64 { .. }
1057            | &Inst::StoreP64 { .. }
1058            | &Inst::FpuStore16 { .. }
1059            | &Inst::FpuStore32 { .. }
1060            | &Inst::FpuStore64 { .. }
1061            | &Inst::FpuStore128 { .. } => true,
1062            // TODO: verify this carefully
1063            _ => false,
1064        }
1065    }
1066
1067    fn gen_move(to_reg: Writable<Reg>, from_reg: Reg, ty: Type) -> Inst {
1068        let bits = ty.bits();
1069
1070        assert!(bits <= 128);
1071        assert!(to_reg.to_reg().class() == from_reg.class());
1072        match from_reg.class() {
1073            RegClass::Int => Inst::Mov {
1074                size: OperandSize::Size64,
1075                rd: to_reg,
1076                rm: from_reg,
1077            },
1078            RegClass::Float => {
1079                if bits > 64 {
1080                    Inst::FpuMove128 {
1081                        rd: to_reg,
1082                        rn: from_reg,
1083                    }
1084                } else {
1085                    Inst::FpuMove64 {
1086                        rd: to_reg,
1087                        rn: from_reg,
1088                    }
1089                }
1090            }
1091            RegClass::Vector => unreachable!(),
1092        }
1093    }
1094
1095    fn is_safepoint(&self) -> bool {
1096        match self {
1097            Inst::Call { .. } | Inst::CallInd { .. } => true,
1098            _ => false,
1099        }
1100    }
1101
1102    fn gen_dummy_use(reg: Reg) -> Inst {
1103        Inst::DummyUse { reg }
1104    }
1105
1106    fn gen_nop(preferred_size: usize) -> Inst {
1107        if preferred_size == 0 {
1108            return Inst::Nop0;
1109        }
1110        // We can't give a NOP (or any insn) < 4 bytes.
1111        assert!(preferred_size >= 4);
1112        Inst::Nop4
1113    }
1114
1115    fn gen_nop_units() -> Vec<Vec<u8>> {
1116        vec![vec![0x1f, 0x20, 0x03, 0xd5]]
1117    }
1118
1119    fn rc_for_type(ty: Type) -> CodegenResult<(&'static [RegClass], &'static [Type])> {
1120        match ty {
1121            I8 => Ok((&[RegClass::Int], &[I8])),
1122            I16 => Ok((&[RegClass::Int], &[I16])),
1123            I32 => Ok((&[RegClass::Int], &[I32])),
1124            I64 => Ok((&[RegClass::Int], &[I64])),
1125            F16 => Ok((&[RegClass::Float], &[F16])),
1126            F32 => Ok((&[RegClass::Float], &[F32])),
1127            F64 => Ok((&[RegClass::Float], &[F64])),
1128            F128 => Ok((&[RegClass::Float], &[F128])),
1129            I128 => Ok((&[RegClass::Int, RegClass::Int], &[I64, I64])),
1130            _ if ty.is_vector() && ty.bits() <= 128 => {
1131                let types = &[types::I8X2, types::I8X4, types::I8X8, types::I8X16];
1132                Ok((
1133                    &[RegClass::Float],
1134                    slice::from_ref(&types[ty.bytes().ilog2() as usize - 1]),
1135                ))
1136            }
1137            _ if ty.is_dynamic_vector() => Ok((&[RegClass::Float], &[I8X16])),
1138            _ => Err(CodegenError::Unsupported(format!(
1139                "Unexpected SSA-value type: {ty}"
1140            ))),
1141        }
1142    }
1143
1144    fn canonical_type_for_rc(rc: RegClass) -> Type {
1145        match rc {
1146            RegClass::Float => types::I8X16,
1147            RegClass::Int => types::I64,
1148            RegClass::Vector => unreachable!(),
1149        }
1150    }
1151
1152    fn gen_jump(target: MachLabel) -> Inst {
1153        Inst::Jump {
1154            dest: BranchTarget::Label(target),
1155        }
1156    }
1157
1158    fn worst_case_size() -> CodeOffset {
1159        // The maximum size, in bytes, of any `Inst`'s emitted code. We have at least one case of
1160        // an 8-instruction sequence (saturating int-to-float conversions) with three embedded
1161        // 64-bit f64 constants.
1162        //
1163        // Note that inline jump-tables handle island/pool insertion separately, so we do not need
1164        // to account for them here (otherwise the worst case would be 2^31 * 4, clearly not
1165        // feasible for other reasons).
1166        44
1167    }
1168
1169    fn worst_case_island_growth() -> CodeOffset {
1170        // A single `Inst` may add to the buffer's pending-island state:
1171        //
1172        // - Up to three 8-byte constants (the saturating int-to-float sequence
1173        //   noted above); count alignment padding into each.
1174        // - Up to one deferred trap (TrapIf and similar), 4 bytes.
1175        // - Up to one fixup per emitted instruction word, each contributing at
1176        //   most `worst_case_veneer_size()` (= 20) bytes of veneer.
1177        //
1178        // We pick a conservative bound that comfortably covers these.
1179        128
1180    }
1181
1182    fn ref_type_regclass(_: &settings::Flags) -> RegClass {
1183        RegClass::Int
1184    }
1185
1186    fn gen_block_start(
1187        is_indirect_branch_target: bool,
1188        is_forward_edge_cfi_enabled: bool,
1189    ) -> Option<Self> {
1190        if is_indirect_branch_target && is_forward_edge_cfi_enabled {
1191            Some(Inst::Bti {
1192                targets: BranchTargetType::J,
1193            })
1194        } else {
1195            None
1196        }
1197    }
1198
1199    fn function_alignment() -> FunctionAlignment {
1200        // We use 32-byte alignment for performance reasons, but for correctness
1201        // we would only need 4-byte alignment.
1202        FunctionAlignment {
1203            minimum: 4,
1204            preferred: 32,
1205        }
1206    }
1207}
1208
1209//=============================================================================
1210// Pretty-printing of instructions.
1211
1212fn mem_finalize_for_show(mem: &AMode, access_ty: Type, state: &EmitState) -> (String, String) {
1213    let (mem_insts, mem) = mem_finalize(None, mem, access_ty, state);
1214    let mut mem_str = mem_insts
1215        .into_iter()
1216        .map(|inst| inst.print_with_state(&mut EmitState::default()))
1217        .collect::<Vec<_>>()
1218        .join(" ; ");
1219    if !mem_str.is_empty() {
1220        mem_str += " ; ";
1221    }
1222
1223    let mem = mem.pretty_print(access_ty.bytes() as u8);
1224    (mem_str, mem)
1225}
1226
1227fn pretty_print_try_call(info: &TryCallInfo) -> String {
1228    format!(
1229        "; b {:?}; catch [{}]",
1230        info.continuation,
1231        info.pretty_print_dests()
1232    )
1233}
1234
1235impl Inst {
1236    #[expect(
1237        missing_docs,
1238        reason = "exposed for cranelift-isle/veri pretty-printing"
1239    )]
1240    pub fn print_with_state(&self, state: &mut EmitState) -> String {
1241        fn op_name(alu_op: ALUOp) -> &'static str {
1242            match alu_op {
1243                ALUOp::Add => "add",
1244                ALUOp::Sub => "sub",
1245                ALUOp::Orr => "orr",
1246                ALUOp::And => "and",
1247                ALUOp::AndS => "ands",
1248                ALUOp::Eor => "eor",
1249                ALUOp::AddS => "adds",
1250                ALUOp::SubS => "subs",
1251                ALUOp::SMulH => "smulh",
1252                ALUOp::UMulH => "umulh",
1253                ALUOp::SDiv => "sdiv",
1254                ALUOp::UDiv => "udiv",
1255                ALUOp::AndNot => "bic",
1256                ALUOp::OrrNot => "orn",
1257                ALUOp::EorNot => "eon",
1258                ALUOp::Extr => "extr",
1259                ALUOp::Lsr => "lsr",
1260                ALUOp::Asr => "asr",
1261                ALUOp::Lsl => "lsl",
1262                ALUOp::Adc => "adc",
1263                ALUOp::AdcS => "adcs",
1264                ALUOp::Sbc => "sbc",
1265                ALUOp::SbcS => "sbcs",
1266            }
1267        }
1268
1269        match self {
1270            &Inst::Nop0 => "nop-zero-len".to_string(),
1271            &Inst::Nop4 => "nop".to_string(),
1272            &Inst::AluRRR {
1273                alu_op,
1274                size,
1275                rd,
1276                rn,
1277                rm,
1278            } => {
1279                let op = op_name(alu_op);
1280                let rd = pretty_print_ireg(rd.to_reg(), size);
1281                let rn = pretty_print_ireg(rn, size);
1282                let rm = pretty_print_ireg(rm, size);
1283                format!("{op} {rd}, {rn}, {rm}")
1284            }
1285            &Inst::AluRRRR {
1286                alu_op,
1287                size,
1288                rd,
1289                rn,
1290                rm,
1291                ra,
1292            } => {
1293                let (op, da_size) = match alu_op {
1294                    ALUOp3::MAdd => ("madd", size),
1295                    ALUOp3::MSub => ("msub", size),
1296                    ALUOp3::UMAddL => ("umaddl", OperandSize::Size64),
1297                    ALUOp3::SMAddL => ("smaddl", OperandSize::Size64),
1298                };
1299                let rd = pretty_print_ireg(rd.to_reg(), da_size);
1300                let rn = pretty_print_ireg(rn, size);
1301                let rm = pretty_print_ireg(rm, size);
1302                let ra = pretty_print_ireg(ra, da_size);
1303
1304                format!("{op} {rd}, {rn}, {rm}, {ra}")
1305            }
1306            &Inst::AluRRImm12 {
1307                alu_op,
1308                size,
1309                rd,
1310                rn,
1311                ref imm12,
1312            } => {
1313                let op = op_name(alu_op);
1314                let rd = pretty_print_ireg(rd.to_reg(), size);
1315                let rn = pretty_print_ireg(rn, size);
1316
1317                if imm12.bits == 0 && alu_op == ALUOp::Add && size.is64() {
1318                    // special-case MOV (used for moving into SP).
1319                    format!("mov {rd}, {rn}")
1320                } else {
1321                    let imm12 = imm12.pretty_print(0);
1322                    format!("{op} {rd}, {rn}, {imm12}")
1323                }
1324            }
1325            &Inst::AluRRImmLogic {
1326                alu_op,
1327                size,
1328                rd,
1329                rn,
1330                ref imml,
1331            } => {
1332                let op = op_name(alu_op);
1333                let rd = pretty_print_ireg(rd.to_reg(), size);
1334                let rn = pretty_print_ireg(rn, size);
1335                let imml = imml.pretty_print(0);
1336                format!("{op} {rd}, {rn}, {imml}")
1337            }
1338            &Inst::AluRRImmShift {
1339                alu_op,
1340                size,
1341                rd,
1342                rn,
1343                ref immshift,
1344            } => {
1345                let op = op_name(alu_op);
1346                let rd = pretty_print_ireg(rd.to_reg(), size);
1347                let rn = pretty_print_ireg(rn, size);
1348                let immshift = immshift.pretty_print(0);
1349                format!("{op} {rd}, {rn}, {immshift}")
1350            }
1351            &Inst::AluRRRShift {
1352                alu_op,
1353                size,
1354                rd,
1355                rn,
1356                rm,
1357                ref shiftop,
1358            } => {
1359                let op = op_name(alu_op);
1360                let rd = pretty_print_ireg(rd.to_reg(), size);
1361                let rn = pretty_print_ireg(rn, size);
1362                let rm = pretty_print_ireg(rm, size);
1363                let shiftop = shiftop.pretty_print(0);
1364                format!("{op} {rd}, {rn}, {rm}, {shiftop}")
1365            }
1366            &Inst::AluRRRExtend {
1367                alu_op,
1368                size,
1369                rd,
1370                rn,
1371                rm,
1372                ref extendop,
1373            } => {
1374                let op = op_name(alu_op);
1375                let rd = pretty_print_ireg(rd.to_reg(), size);
1376                let rn = pretty_print_ireg(rn, size);
1377                let rm = pretty_print_ireg(rm, size);
1378                let extendop = extendop.pretty_print(0);
1379                format!("{op} {rd}, {rn}, {rm}, {extendop}")
1380            }
1381            &Inst::BitRR { op, size, rd, rn } => {
1382                let op = op.op_str();
1383                let rd = pretty_print_ireg(rd.to_reg(), size);
1384                let rn = pretty_print_ireg(rn, size);
1385                format!("{op} {rd}, {rn}")
1386            }
1387            &Inst::ULoad8 { rd, ref mem, .. }
1388            | &Inst::SLoad8 { rd, ref mem, .. }
1389            | &Inst::ULoad16 { rd, ref mem, .. }
1390            | &Inst::SLoad16 { rd, ref mem, .. }
1391            | &Inst::ULoad32 { rd, ref mem, .. }
1392            | &Inst::SLoad32 { rd, ref mem, .. }
1393            | &Inst::ULoad64 { rd, ref mem, .. } => {
1394                let is_unscaled = match &mem {
1395                    &AMode::Unscaled { .. } => true,
1396                    _ => false,
1397                };
1398                let (op, size) = match (self, is_unscaled) {
1399                    (&Inst::ULoad8 { .. }, false) => ("ldrb", OperandSize::Size32),
1400                    (&Inst::ULoad8 { .. }, true) => ("ldurb", OperandSize::Size32),
1401                    (&Inst::SLoad8 { .. }, false) => ("ldrsb", OperandSize::Size64),
1402                    (&Inst::SLoad8 { .. }, true) => ("ldursb", OperandSize::Size64),
1403                    (&Inst::ULoad16 { .. }, false) => ("ldrh", OperandSize::Size32),
1404                    (&Inst::ULoad16 { .. }, true) => ("ldurh", OperandSize::Size32),
1405                    (&Inst::SLoad16 { .. }, false) => ("ldrsh", OperandSize::Size64),
1406                    (&Inst::SLoad16 { .. }, true) => ("ldursh", OperandSize::Size64),
1407                    (&Inst::ULoad32 { .. }, false) => ("ldr", OperandSize::Size32),
1408                    (&Inst::ULoad32 { .. }, true) => ("ldur", OperandSize::Size32),
1409                    (&Inst::SLoad32 { .. }, false) => ("ldrsw", OperandSize::Size64),
1410                    (&Inst::SLoad32 { .. }, true) => ("ldursw", OperandSize::Size64),
1411                    (&Inst::ULoad64 { .. }, false) => ("ldr", OperandSize::Size64),
1412                    (&Inst::ULoad64 { .. }, true) => ("ldur", OperandSize::Size64),
1413                    _ => unreachable!(),
1414                };
1415
1416                let rd = pretty_print_ireg(rd.to_reg(), size);
1417                let mem = mem.clone();
1418                let access_ty = self.mem_type().unwrap();
1419                let (mem_str, mem) = mem_finalize_for_show(&mem, access_ty, state);
1420
1421                format!("{mem_str}{op} {rd}, {mem}")
1422            }
1423            &Inst::Store8 { rd, ref mem, .. }
1424            | &Inst::Store16 { rd, ref mem, .. }
1425            | &Inst::Store32 { rd, ref mem, .. }
1426            | &Inst::Store64 { rd, ref mem, .. } => {
1427                let is_unscaled = match &mem {
1428                    &AMode::Unscaled { .. } => true,
1429                    _ => false,
1430                };
1431                let (op, size) = match (self, is_unscaled) {
1432                    (&Inst::Store8 { .. }, false) => ("strb", OperandSize::Size32),
1433                    (&Inst::Store8 { .. }, true) => ("sturb", OperandSize::Size32),
1434                    (&Inst::Store16 { .. }, false) => ("strh", OperandSize::Size32),
1435                    (&Inst::Store16 { .. }, true) => ("sturh", OperandSize::Size32),
1436                    (&Inst::Store32 { .. }, false) => ("str", OperandSize::Size32),
1437                    (&Inst::Store32 { .. }, true) => ("stur", OperandSize::Size32),
1438                    (&Inst::Store64 { .. }, false) => ("str", OperandSize::Size64),
1439                    (&Inst::Store64 { .. }, true) => ("stur", OperandSize::Size64),
1440                    _ => unreachable!(),
1441                };
1442
1443                let rd = pretty_print_ireg(rd, size);
1444                let mem = mem.clone();
1445                let access_ty = self.mem_type().unwrap();
1446                let (mem_str, mem) = mem_finalize_for_show(&mem, access_ty, state);
1447
1448                format!("{mem_str}{op} {rd}, {mem}")
1449            }
1450            &Inst::StoreP64 {
1451                rt, rt2, ref mem, ..
1452            } => {
1453                let rt = pretty_print_ireg(rt, OperandSize::Size64);
1454                let rt2 = pretty_print_ireg(rt2, OperandSize::Size64);
1455                let mem = mem.clone();
1456                let mem = mem.pretty_print_default();
1457                format!("stp {rt}, {rt2}, {mem}")
1458            }
1459            &Inst::LoadP64 {
1460                rt, rt2, ref mem, ..
1461            } => {
1462                let rt = pretty_print_ireg(rt.to_reg(), OperandSize::Size64);
1463                let rt2 = pretty_print_ireg(rt2.to_reg(), OperandSize::Size64);
1464                let mem = mem.clone();
1465                let mem = mem.pretty_print_default();
1466                format!("ldp {rt}, {rt2}, {mem}")
1467            }
1468            &Inst::Mov { size, rd, rm } => {
1469                let rd = pretty_print_ireg(rd.to_reg(), size);
1470                let rm = pretty_print_ireg(rm, size);
1471                format!("mov {rd}, {rm}")
1472            }
1473            &Inst::MovFromPReg { rd, rm } => {
1474                let rd = pretty_print_ireg(rd.to_reg(), OperandSize::Size64);
1475                let rm = show_ireg_sized(rm.into(), OperandSize::Size64);
1476                format!("mov {rd}, {rm}")
1477            }
1478            &Inst::MovToPReg { rd, rm } => {
1479                let rd = show_ireg_sized(rd.into(), OperandSize::Size64);
1480                let rm = pretty_print_ireg(rm, OperandSize::Size64);
1481                format!("mov {rd}, {rm}")
1482            }
1483            &Inst::MovWide {
1484                op,
1485                rd,
1486                ref imm,
1487                size,
1488            } => {
1489                let op_str = match op {
1490                    MoveWideOp::MovZ => "movz",
1491                    MoveWideOp::MovN => "movn",
1492                };
1493                let rd = pretty_print_ireg(rd.to_reg(), size);
1494                let imm = imm.pretty_print(0);
1495                format!("{op_str} {rd}, {imm}")
1496            }
1497            &Inst::MovK {
1498                rd,
1499                rn,
1500                ref imm,
1501                size,
1502            } => {
1503                let rn = pretty_print_ireg(rn, size);
1504                let rd = pretty_print_ireg(rd.to_reg(), size);
1505                let imm = imm.pretty_print(0);
1506                format!("movk {rd}, {rn}, {imm}")
1507            }
1508            &Inst::CSel { rd, rn, rm, cond } => {
1509                let rd = pretty_print_ireg(rd.to_reg(), OperandSize::Size64);
1510                let rn = pretty_print_ireg(rn, OperandSize::Size64);
1511                let rm = pretty_print_ireg(rm, OperandSize::Size64);
1512                let cond = cond.pretty_print(0);
1513                format!("csel {rd}, {rn}, {rm}, {cond}")
1514            }
1515            &Inst::CSNeg { rd, rn, rm, cond } => {
1516                let rd = pretty_print_ireg(rd.to_reg(), OperandSize::Size64);
1517                let rn = pretty_print_ireg(rn, OperandSize::Size64);
1518                let rm = pretty_print_ireg(rm, OperandSize::Size64);
1519                let cond = cond.pretty_print(0);
1520                format!("csneg {rd}, {rn}, {rm}, {cond}")
1521            }
1522            &Inst::CSet { rd, cond } => {
1523                let rd = pretty_print_ireg(rd.to_reg(), OperandSize::Size64);
1524                let cond = cond.pretty_print(0);
1525                format!("cset {rd}, {cond}")
1526            }
1527            &Inst::CSetm { rd, cond } => {
1528                let rd = pretty_print_ireg(rd.to_reg(), OperandSize::Size64);
1529                let cond = cond.pretty_print(0);
1530                format!("csetm {rd}, {cond}")
1531            }
1532            &Inst::CCmp {
1533                size,
1534                rn,
1535                rm,
1536                nzcv,
1537                cond,
1538            } => {
1539                let rn = pretty_print_ireg(rn, size);
1540                let rm = pretty_print_ireg(rm, size);
1541                let nzcv = nzcv.pretty_print(0);
1542                let cond = cond.pretty_print(0);
1543                format!("ccmp {rn}, {rm}, {nzcv}, {cond}")
1544            }
1545            &Inst::CCmpImm {
1546                size,
1547                rn,
1548                imm,
1549                nzcv,
1550                cond,
1551            } => {
1552                let rn = pretty_print_ireg(rn, size);
1553                let imm = imm.pretty_print(0);
1554                let nzcv = nzcv.pretty_print(0);
1555                let cond = cond.pretty_print(0);
1556                format!("ccmp {rn}, {imm}, {nzcv}, {cond}")
1557            }
1558            &Inst::AtomicRMW {
1559                rs, rt, rn, ty, op, ..
1560            } => {
1561                let op = match op {
1562                    AtomicRMWOp::Add => "ldaddal",
1563                    AtomicRMWOp::Clr => "ldclral",
1564                    AtomicRMWOp::Eor => "ldeoral",
1565                    AtomicRMWOp::Set => "ldsetal",
1566                    AtomicRMWOp::Smax => "ldsmaxal",
1567                    AtomicRMWOp::Umax => "ldumaxal",
1568                    AtomicRMWOp::Smin => "ldsminal",
1569                    AtomicRMWOp::Umin => "lduminal",
1570                    AtomicRMWOp::Swp => "swpal",
1571                };
1572
1573                let size = OperandSize::from_ty(ty);
1574                let rs = pretty_print_ireg(rs, size);
1575                let rt = pretty_print_ireg(rt.to_reg(), size);
1576                let rn = pretty_print_ireg(rn, OperandSize::Size64);
1577
1578                let ty_suffix = match ty {
1579                    I8 => "b",
1580                    I16 => "h",
1581                    _ => "",
1582                };
1583                format!("{op}{ty_suffix} {rs}, {rt}, [{rn}]")
1584            }
1585            &Inst::AtomicRMWLoop {
1586                ty,
1587                op,
1588                addr,
1589                operand,
1590                oldval,
1591                scratch1,
1592                scratch2,
1593                ..
1594            } => {
1595                let op = match op {
1596                    AtomicRMWLoopOp::Add => "add",
1597                    AtomicRMWLoopOp::Sub => "sub",
1598                    AtomicRMWLoopOp::Eor => "eor",
1599                    AtomicRMWLoopOp::Orr => "orr",
1600                    AtomicRMWLoopOp::And => "and",
1601                    AtomicRMWLoopOp::Nand => "nand",
1602                    AtomicRMWLoopOp::Smin => "smin",
1603                    AtomicRMWLoopOp::Smax => "smax",
1604                    AtomicRMWLoopOp::Umin => "umin",
1605                    AtomicRMWLoopOp::Umax => "umax",
1606                    AtomicRMWLoopOp::Xchg => "xchg",
1607                };
1608                let addr = pretty_print_ireg(addr, OperandSize::Size64);
1609                let operand = pretty_print_ireg(operand, OperandSize::Size64);
1610                let oldval = pretty_print_ireg(oldval.to_reg(), OperandSize::Size64);
1611                let scratch1 = pretty_print_ireg(scratch1.to_reg(), OperandSize::Size64);
1612                let scratch2 = pretty_print_ireg(scratch2.to_reg(), OperandSize::Size64);
1613                format!(
1614                    "atomic_rmw_loop_{}_{} addr={} operand={} oldval={} scratch1={} scratch2={}",
1615                    op,
1616                    ty.bits(),
1617                    addr,
1618                    operand,
1619                    oldval,
1620                    scratch1,
1621                    scratch2,
1622                )
1623            }
1624            &Inst::AtomicCAS {
1625                rd, rs, rt, rn, ty, ..
1626            } => {
1627                let op = match ty {
1628                    I8 => "casalb",
1629                    I16 => "casalh",
1630                    I32 | I64 => "casal",
1631                    _ => panic!("Unsupported type: {ty}"),
1632                };
1633                let size = OperandSize::from_ty(ty);
1634                let rd = pretty_print_ireg(rd.to_reg(), size);
1635                let rs = pretty_print_ireg(rs, size);
1636                let rt = pretty_print_ireg(rt, size);
1637                let rn = pretty_print_ireg(rn, OperandSize::Size64);
1638
1639                format!("{op} {rd}, {rs}, {rt}, [{rn}]")
1640            }
1641            &Inst::AtomicCASLoop {
1642                ty,
1643                addr,
1644                expected,
1645                replacement,
1646                oldval,
1647                scratch,
1648                ..
1649            } => {
1650                let addr = pretty_print_ireg(addr, OperandSize::Size64);
1651                let expected = pretty_print_ireg(expected, OperandSize::Size64);
1652                let replacement = pretty_print_ireg(replacement, OperandSize::Size64);
1653                let oldval = pretty_print_ireg(oldval.to_reg(), OperandSize::Size64);
1654                let scratch = pretty_print_ireg(scratch.to_reg(), OperandSize::Size64);
1655                format!(
1656                    "atomic_cas_loop_{} addr={}, expect={}, replacement={}, oldval={}, scratch={}",
1657                    ty.bits(),
1658                    addr,
1659                    expected,
1660                    replacement,
1661                    oldval,
1662                    scratch,
1663                )
1664            }
1665            &Inst::LoadAcquire {
1666                access_ty, rt, rn, ..
1667            } => {
1668                let (op, ty) = match access_ty {
1669                    I8 => ("ldarb", I32),
1670                    I16 => ("ldarh", I32),
1671                    I32 => ("ldar", I32),
1672                    I64 => ("ldar", I64),
1673                    _ => panic!("Unsupported type: {access_ty}"),
1674                };
1675                let size = OperandSize::from_ty(ty);
1676                let rn = pretty_print_ireg(rn, OperandSize::Size64);
1677                let rt = pretty_print_ireg(rt.to_reg(), size);
1678                format!("{op} {rt}, [{rn}]")
1679            }
1680            &Inst::StoreRelease {
1681                access_ty, rt, rn, ..
1682            } => {
1683                let (op, ty) = match access_ty {
1684                    I8 => ("stlrb", I32),
1685                    I16 => ("stlrh", I32),
1686                    I32 => ("stlr", I32),
1687                    I64 => ("stlr", I64),
1688                    _ => panic!("Unsupported type: {access_ty}"),
1689                };
1690                let size = OperandSize::from_ty(ty);
1691                let rn = pretty_print_ireg(rn, OperandSize::Size64);
1692                let rt = pretty_print_ireg(rt, size);
1693                format!("{op} {rt}, [{rn}]")
1694            }
1695            &Inst::Fence {} => {
1696                format!("dmb ish")
1697            }
1698            &Inst::Csdb {} => {
1699                format!("csdb")
1700            }
1701            &Inst::FpuMove32 { rd, rn } => {
1702                let rd = pretty_print_vreg_scalar(rd.to_reg(), ScalarSize::Size32);
1703                let rn = pretty_print_vreg_scalar(rn, ScalarSize::Size32);
1704                format!("fmov {rd}, {rn}")
1705            }
1706            &Inst::FpuMove64 { rd, rn } => {
1707                let rd = pretty_print_vreg_scalar(rd.to_reg(), ScalarSize::Size64);
1708                let rn = pretty_print_vreg_scalar(rn, ScalarSize::Size64);
1709                format!("fmov {rd}, {rn}")
1710            }
1711            &Inst::FpuMove128 { rd, rn } => {
1712                let rd = pretty_print_reg(rd.to_reg());
1713                let rn = pretty_print_reg(rn);
1714                format!("mov {rd}.16b, {rn}.16b")
1715            }
1716            &Inst::FpuMoveFromVec { rd, rn, idx, size } => {
1717                let rd = pretty_print_vreg_scalar(rd.to_reg(), size.lane_size());
1718                let rn = pretty_print_vreg_element(rn, idx as usize, size.lane_size());
1719                format!("mov {rd}, {rn}")
1720            }
1721            &Inst::FpuExtend { rd, rn, size } => {
1722                let rd = pretty_print_vreg_scalar(rd.to_reg(), size);
1723                let rn = pretty_print_vreg_scalar(rn, size);
1724                format!("fmov {rd}, {rn}")
1725            }
1726            &Inst::FpuRR {
1727                fpu_op,
1728                size,
1729                rd,
1730                rn,
1731            } => {
1732                let op = match fpu_op {
1733                    FPUOp1::Abs => "fabs",
1734                    FPUOp1::Neg => "fneg",
1735                    FPUOp1::Sqrt => "fsqrt",
1736                    FPUOp1::Cvt32To64 | FPUOp1::Cvt64To32 => "fcvt",
1737                };
1738                let dst_size = match fpu_op {
1739                    FPUOp1::Cvt32To64 => ScalarSize::Size64,
1740                    FPUOp1::Cvt64To32 => ScalarSize::Size32,
1741                    _ => size,
1742                };
1743                let rd = pretty_print_vreg_scalar(rd.to_reg(), dst_size);
1744                let rn = pretty_print_vreg_scalar(rn, size);
1745                format!("{op} {rd}, {rn}")
1746            }
1747            &Inst::FpuRRR {
1748                fpu_op,
1749                size,
1750                rd,
1751                rn,
1752                rm,
1753            } => {
1754                let op = match fpu_op {
1755                    FPUOp2::Add => "fadd",
1756                    FPUOp2::Sub => "fsub",
1757                    FPUOp2::Mul => "fmul",
1758                    FPUOp2::Div => "fdiv",
1759                    FPUOp2::Max => "fmax",
1760                    FPUOp2::Min => "fmin",
1761                };
1762                let rd = pretty_print_vreg_scalar(rd.to_reg(), size);
1763                let rn = pretty_print_vreg_scalar(rn, size);
1764                let rm = pretty_print_vreg_scalar(rm, size);
1765                format!("{op} {rd}, {rn}, {rm}")
1766            }
1767            &Inst::FpuRRI { fpu_op, rd, rn } => {
1768                let (op, imm, vector) = match fpu_op {
1769                    FPUOpRI::UShr32(imm) => ("ushr", imm.pretty_print(0), true),
1770                    FPUOpRI::UShr64(imm) => ("ushr", imm.pretty_print(0), false),
1771                };
1772
1773                let (rd, rn) = if vector {
1774                    (
1775                        pretty_print_vreg_vector(rd.to_reg(), VectorSize::Size32x2),
1776                        pretty_print_vreg_vector(rn, VectorSize::Size32x2),
1777                    )
1778                } else {
1779                    (
1780                        pretty_print_vreg_scalar(rd.to_reg(), ScalarSize::Size64),
1781                        pretty_print_vreg_scalar(rn, ScalarSize::Size64),
1782                    )
1783                };
1784                format!("{op} {rd}, {rn}, {imm}")
1785            }
1786            &Inst::FpuRRIMod { fpu_op, rd, ri, rn } => {
1787                let (op, imm, vector) = match fpu_op {
1788                    FPUOpRIMod::Sli32(imm) => ("sli", imm.pretty_print(0), true),
1789                    FPUOpRIMod::Sli64(imm) => ("sli", imm.pretty_print(0), false),
1790                };
1791
1792                let (rd, ri, rn) = if vector {
1793                    (
1794                        pretty_print_vreg_vector(rd.to_reg(), VectorSize::Size32x2),
1795                        pretty_print_vreg_vector(ri, VectorSize::Size32x2),
1796                        pretty_print_vreg_vector(rn, VectorSize::Size32x2),
1797                    )
1798                } else {
1799                    (
1800                        pretty_print_vreg_scalar(rd.to_reg(), ScalarSize::Size64),
1801                        pretty_print_vreg_scalar(ri, ScalarSize::Size64),
1802                        pretty_print_vreg_scalar(rn, ScalarSize::Size64),
1803                    )
1804                };
1805                format!("{op} {rd}, {ri}, {rn}, {imm}")
1806            }
1807            &Inst::FpuRRRR {
1808                fpu_op,
1809                size,
1810                rd,
1811                rn,
1812                rm,
1813                ra,
1814            } => {
1815                let op = match fpu_op {
1816                    FPUOp3::MAdd => "fmadd",
1817                    FPUOp3::MSub => "fmsub",
1818                    FPUOp3::NMAdd => "fnmadd",
1819                    FPUOp3::NMSub => "fnmsub",
1820                };
1821                let rd = pretty_print_vreg_scalar(rd.to_reg(), size);
1822                let rn = pretty_print_vreg_scalar(rn, size);
1823                let rm = pretty_print_vreg_scalar(rm, size);
1824                let ra = pretty_print_vreg_scalar(ra, size);
1825                format!("{op} {rd}, {rn}, {rm}, {ra}")
1826            }
1827            &Inst::FpuCmp { size, rn, rm } => {
1828                let rn = pretty_print_vreg_scalar(rn, size);
1829                let rm = pretty_print_vreg_scalar(rm, size);
1830                format!("fcmp {rn}, {rm}")
1831            }
1832            &Inst::FpuLoad16 { rd, ref mem, .. } => {
1833                let rd = pretty_print_vreg_scalar(rd.to_reg(), ScalarSize::Size16);
1834                let mem = mem.clone();
1835                let access_ty = self.mem_type().unwrap();
1836                let (mem_str, mem) = mem_finalize_for_show(&mem, access_ty, state);
1837                format!("{mem_str}ldr {rd}, {mem}")
1838            }
1839            &Inst::FpuLoad32 { rd, ref mem, .. } => {
1840                let rd = pretty_print_vreg_scalar(rd.to_reg(), ScalarSize::Size32);
1841                let mem = mem.clone();
1842                let access_ty = self.mem_type().unwrap();
1843                let (mem_str, mem) = mem_finalize_for_show(&mem, access_ty, state);
1844                format!("{mem_str}ldr {rd}, {mem}")
1845            }
1846            &Inst::FpuLoad64 { rd, ref mem, .. } => {
1847                let rd = pretty_print_vreg_scalar(rd.to_reg(), ScalarSize::Size64);
1848                let mem = mem.clone();
1849                let access_ty = self.mem_type().unwrap();
1850                let (mem_str, mem) = mem_finalize_for_show(&mem, access_ty, state);
1851                format!("{mem_str}ldr {rd}, {mem}")
1852            }
1853            &Inst::FpuLoad128 { rd, ref mem, .. } => {
1854                let rd = pretty_print_reg(rd.to_reg());
1855                let rd = "q".to_string() + &rd[1..];
1856                let mem = mem.clone();
1857                let access_ty = self.mem_type().unwrap();
1858                let (mem_str, mem) = mem_finalize_for_show(&mem, access_ty, state);
1859                format!("{mem_str}ldr {rd}, {mem}")
1860            }
1861            &Inst::FpuStore16 { rd, ref mem, .. } => {
1862                let rd = pretty_print_vreg_scalar(rd, ScalarSize::Size16);
1863                let mem = mem.clone();
1864                let access_ty = self.mem_type().unwrap();
1865                let (mem_str, mem) = mem_finalize_for_show(&mem, access_ty, state);
1866                format!("{mem_str}str {rd}, {mem}")
1867            }
1868            &Inst::FpuStore32 { rd, ref mem, .. } => {
1869                let rd = pretty_print_vreg_scalar(rd, ScalarSize::Size32);
1870                let mem = mem.clone();
1871                let access_ty = self.mem_type().unwrap();
1872                let (mem_str, mem) = mem_finalize_for_show(&mem, access_ty, state);
1873                format!("{mem_str}str {rd}, {mem}")
1874            }
1875            &Inst::FpuStore64 { rd, ref mem, .. } => {
1876                let rd = pretty_print_vreg_scalar(rd, ScalarSize::Size64);
1877                let mem = mem.clone();
1878                let access_ty = self.mem_type().unwrap();
1879                let (mem_str, mem) = mem_finalize_for_show(&mem, access_ty, state);
1880                format!("{mem_str}str {rd}, {mem}")
1881            }
1882            &Inst::FpuStore128 { rd, ref mem, .. } => {
1883                let rd = pretty_print_reg(rd);
1884                let rd = "q".to_string() + &rd[1..];
1885                let mem = mem.clone();
1886                let access_ty = self.mem_type().unwrap();
1887                let (mem_str, mem) = mem_finalize_for_show(&mem, access_ty, state);
1888                format!("{mem_str}str {rd}, {mem}")
1889            }
1890            &Inst::FpuLoadP64 {
1891                rt, rt2, ref mem, ..
1892            } => {
1893                let rt = pretty_print_vreg_scalar(rt.to_reg(), ScalarSize::Size64);
1894                let rt2 = pretty_print_vreg_scalar(rt2.to_reg(), ScalarSize::Size64);
1895                let mem = mem.clone();
1896                let mem = mem.pretty_print_default();
1897
1898                format!("ldp {rt}, {rt2}, {mem}")
1899            }
1900            &Inst::FpuStoreP64 {
1901                rt, rt2, ref mem, ..
1902            } => {
1903                let rt = pretty_print_vreg_scalar(rt, ScalarSize::Size64);
1904                let rt2 = pretty_print_vreg_scalar(rt2, ScalarSize::Size64);
1905                let mem = mem.clone();
1906                let mem = mem.pretty_print_default();
1907
1908                format!("stp {rt}, {rt2}, {mem}")
1909            }
1910            &Inst::FpuLoadP128 {
1911                rt, rt2, ref mem, ..
1912            } => {
1913                let rt = pretty_print_vreg_scalar(rt.to_reg(), ScalarSize::Size128);
1914                let rt2 = pretty_print_vreg_scalar(rt2.to_reg(), ScalarSize::Size128);
1915                let mem = mem.clone();
1916                let mem = mem.pretty_print_default();
1917
1918                format!("ldp {rt}, {rt2}, {mem}")
1919            }
1920            &Inst::FpuStoreP128 {
1921                rt, rt2, ref mem, ..
1922            } => {
1923                let rt = pretty_print_vreg_scalar(rt, ScalarSize::Size128);
1924                let rt2 = pretty_print_vreg_scalar(rt2, ScalarSize::Size128);
1925                let mem = mem.clone();
1926                let mem = mem.pretty_print_default();
1927
1928                format!("stp {rt}, {rt2}, {mem}")
1929            }
1930            &Inst::FpuToInt { op, rd, rn } => {
1931                let (op, sizesrc, sizedest) = match op {
1932                    FpuToIntOp::F32ToI32 => ("fcvtzs", ScalarSize::Size32, OperandSize::Size32),
1933                    FpuToIntOp::F32ToU32 => ("fcvtzu", ScalarSize::Size32, OperandSize::Size32),
1934                    FpuToIntOp::F32ToI64 => ("fcvtzs", ScalarSize::Size32, OperandSize::Size64),
1935                    FpuToIntOp::F32ToU64 => ("fcvtzu", ScalarSize::Size32, OperandSize::Size64),
1936                    FpuToIntOp::F64ToI32 => ("fcvtzs", ScalarSize::Size64, OperandSize::Size32),
1937                    FpuToIntOp::F64ToU32 => ("fcvtzu", ScalarSize::Size64, OperandSize::Size32),
1938                    FpuToIntOp::F64ToI64 => ("fcvtzs", ScalarSize::Size64, OperandSize::Size64),
1939                    FpuToIntOp::F64ToU64 => ("fcvtzu", ScalarSize::Size64, OperandSize::Size64),
1940                };
1941                let rd = pretty_print_ireg(rd.to_reg(), sizedest);
1942                let rn = pretty_print_vreg_scalar(rn, sizesrc);
1943                format!("{op} {rd}, {rn}")
1944            }
1945            &Inst::IntToFpu { op, rd, rn } => {
1946                let (op, sizesrc, sizedest) = match op {
1947                    IntToFpuOp::I32ToF32 => ("scvtf", OperandSize::Size32, ScalarSize::Size32),
1948                    IntToFpuOp::U32ToF32 => ("ucvtf", OperandSize::Size32, ScalarSize::Size32),
1949                    IntToFpuOp::I64ToF32 => ("scvtf", OperandSize::Size64, ScalarSize::Size32),
1950                    IntToFpuOp::U64ToF32 => ("ucvtf", OperandSize::Size64, ScalarSize::Size32),
1951                    IntToFpuOp::I32ToF64 => ("scvtf", OperandSize::Size32, ScalarSize::Size64),
1952                    IntToFpuOp::U32ToF64 => ("ucvtf", OperandSize::Size32, ScalarSize::Size64),
1953                    IntToFpuOp::I64ToF64 => ("scvtf", OperandSize::Size64, ScalarSize::Size64),
1954                    IntToFpuOp::U64ToF64 => ("ucvtf", OperandSize::Size64, ScalarSize::Size64),
1955                };
1956                let rd = pretty_print_vreg_scalar(rd.to_reg(), sizedest);
1957                let rn = pretty_print_ireg(rn, sizesrc);
1958                format!("{op} {rd}, {rn}")
1959            }
1960            &Inst::FpuCSel16 { rd, rn, rm, cond } => {
1961                let rd = pretty_print_vreg_scalar(rd.to_reg(), ScalarSize::Size16);
1962                let rn = pretty_print_vreg_scalar(rn, ScalarSize::Size16);
1963                let rm = pretty_print_vreg_scalar(rm, ScalarSize::Size16);
1964                let cond = cond.pretty_print(0);
1965                format!("fcsel {rd}, {rn}, {rm}, {cond}")
1966            }
1967            &Inst::FpuCSel32 { rd, rn, rm, cond } => {
1968                let rd = pretty_print_vreg_scalar(rd.to_reg(), ScalarSize::Size32);
1969                let rn = pretty_print_vreg_scalar(rn, ScalarSize::Size32);
1970                let rm = pretty_print_vreg_scalar(rm, ScalarSize::Size32);
1971                let cond = cond.pretty_print(0);
1972                format!("fcsel {rd}, {rn}, {rm}, {cond}")
1973            }
1974            &Inst::FpuCSel64 { rd, rn, rm, cond } => {
1975                let rd = pretty_print_vreg_scalar(rd.to_reg(), ScalarSize::Size64);
1976                let rn = pretty_print_vreg_scalar(rn, ScalarSize::Size64);
1977                let rm = pretty_print_vreg_scalar(rm, ScalarSize::Size64);
1978                let cond = cond.pretty_print(0);
1979                format!("fcsel {rd}, {rn}, {rm}, {cond}")
1980            }
1981            &Inst::FpuRound { op, rd, rn } => {
1982                let (inst, size) = match op {
1983                    FpuRoundMode::Minus32 => ("frintm", ScalarSize::Size32),
1984                    FpuRoundMode::Minus64 => ("frintm", ScalarSize::Size64),
1985                    FpuRoundMode::Plus32 => ("frintp", ScalarSize::Size32),
1986                    FpuRoundMode::Plus64 => ("frintp", ScalarSize::Size64),
1987                    FpuRoundMode::Zero32 => ("frintz", ScalarSize::Size32),
1988                    FpuRoundMode::Zero64 => ("frintz", ScalarSize::Size64),
1989                    FpuRoundMode::Nearest32 => ("frintn", ScalarSize::Size32),
1990                    FpuRoundMode::Nearest64 => ("frintn", ScalarSize::Size64),
1991                };
1992                let rd = pretty_print_vreg_scalar(rd.to_reg(), size);
1993                let rn = pretty_print_vreg_scalar(rn, size);
1994                format!("{inst} {rd}, {rn}")
1995            }
1996            &Inst::MovToFpu { rd, rn, size } => {
1997                let operand_size = size.operand_size();
1998                let rd = pretty_print_vreg_scalar(rd.to_reg(), size);
1999                let rn = pretty_print_ireg(rn, operand_size);
2000                format!("fmov {rd}, {rn}")
2001            }
2002            &Inst::FpuMoveFPImm { rd, imm, size } => {
2003                let imm = imm.pretty_print(0);
2004                let rd = pretty_print_vreg_scalar(rd.to_reg(), size);
2005
2006                format!("fmov {rd}, {imm}")
2007            }
2008            &Inst::MovToVec {
2009                rd,
2010                ri,
2011                rn,
2012                idx,
2013                size,
2014            } => {
2015                let rd = pretty_print_vreg_element(rd.to_reg(), idx as usize, size.lane_size());
2016                let ri = pretty_print_vreg_element(ri, idx as usize, size.lane_size());
2017                let rn = pretty_print_ireg(rn, size.operand_size());
2018                format!("mov {rd}, {ri}, {rn}")
2019            }
2020            &Inst::MovFromVec { rd, rn, idx, size } => {
2021                let op = match size {
2022                    ScalarSize::Size8 => "umov",
2023                    ScalarSize::Size16 => "umov",
2024                    ScalarSize::Size32 => "mov",
2025                    ScalarSize::Size64 => "mov",
2026                    _ => unimplemented!(),
2027                };
2028                let rd = pretty_print_ireg(rd.to_reg(), size.operand_size());
2029                let rn = pretty_print_vreg_element(rn, idx as usize, size);
2030                format!("{op} {rd}, {rn}")
2031            }
2032            &Inst::MovFromVecSigned {
2033                rd,
2034                rn,
2035                idx,
2036                size,
2037                scalar_size,
2038            } => {
2039                let rd = pretty_print_ireg(rd.to_reg(), scalar_size);
2040                let rn = pretty_print_vreg_element(rn, idx as usize, size.lane_size());
2041                format!("smov {rd}, {rn}")
2042            }
2043            &Inst::VecDup { rd, rn, size } => {
2044                let rd = pretty_print_vreg_vector(rd.to_reg(), size);
2045                let rn = pretty_print_ireg(rn, size.operand_size());
2046                format!("dup {rd}, {rn}")
2047            }
2048            &Inst::VecDupFromFpu { rd, rn, size, lane } => {
2049                let rd = pretty_print_vreg_vector(rd.to_reg(), size);
2050                let rn = pretty_print_vreg_element(rn, lane.into(), size.lane_size());
2051                format!("dup {rd}, {rn}")
2052            }
2053            &Inst::VecDupFPImm { rd, imm, size } => {
2054                let imm = imm.pretty_print(0);
2055                let rd = pretty_print_vreg_vector(rd.to_reg(), size);
2056
2057                format!("fmov {rd}, {imm}")
2058            }
2059            &Inst::VecDupImm {
2060                rd,
2061                imm,
2062                invert,
2063                size,
2064            } => {
2065                let imm = imm.pretty_print(0);
2066                let op = if invert { "mvni" } else { "movi" };
2067                let rd = pretty_print_vreg_vector(rd.to_reg(), size);
2068
2069                format!("{op} {rd}, {imm}")
2070            }
2071            &Inst::VecExtend {
2072                t,
2073                rd,
2074                rn,
2075                high_half,
2076                lane_size,
2077            } => {
2078                let vec64 = VectorSize::from_lane_size(lane_size.narrow(), false);
2079                let vec128 = VectorSize::from_lane_size(lane_size.narrow(), true);
2080                let rd_size = VectorSize::from_lane_size(lane_size, true);
2081                let (op, rn_size) = match (t, high_half) {
2082                    (VecExtendOp::Sxtl, false) => ("sxtl", vec64),
2083                    (VecExtendOp::Sxtl, true) => ("sxtl2", vec128),
2084                    (VecExtendOp::Uxtl, false) => ("uxtl", vec64),
2085                    (VecExtendOp::Uxtl, true) => ("uxtl2", vec128),
2086                };
2087                let rd = pretty_print_vreg_vector(rd.to_reg(), rd_size);
2088                let rn = pretty_print_vreg_vector(rn, rn_size);
2089                format!("{op} {rd}, {rn}")
2090            }
2091            &Inst::VecMovElement {
2092                rd,
2093                ri,
2094                rn,
2095                dest_idx,
2096                src_idx,
2097                size,
2098            } => {
2099                let rd =
2100                    pretty_print_vreg_element(rd.to_reg(), dest_idx as usize, size.lane_size());
2101                let ri = pretty_print_vreg_element(ri, dest_idx as usize, size.lane_size());
2102                let rn = pretty_print_vreg_element(rn, src_idx as usize, size.lane_size());
2103                format!("mov {rd}, {ri}, {rn}")
2104            }
2105            &Inst::VecRRLong {
2106                op,
2107                rd,
2108                rn,
2109                high_half,
2110            } => {
2111                let (op, rd_size, size, suffix) = match (op, high_half) {
2112                    (VecRRLongOp::Fcvtl16, false) => {
2113                        ("fcvtl", VectorSize::Size32x4, VectorSize::Size16x4, "")
2114                    }
2115                    (VecRRLongOp::Fcvtl16, true) => {
2116                        ("fcvtl2", VectorSize::Size32x4, VectorSize::Size16x8, "")
2117                    }
2118                    (VecRRLongOp::Fcvtl32, false) => {
2119                        ("fcvtl", VectorSize::Size64x2, VectorSize::Size32x2, "")
2120                    }
2121                    (VecRRLongOp::Fcvtl32, true) => {
2122                        ("fcvtl2", VectorSize::Size64x2, VectorSize::Size32x4, "")
2123                    }
2124                    (VecRRLongOp::Shll8, false) => {
2125                        ("shll", VectorSize::Size16x8, VectorSize::Size8x8, ", #8")
2126                    }
2127                    (VecRRLongOp::Shll8, true) => {
2128                        ("shll2", VectorSize::Size16x8, VectorSize::Size8x16, ", #8")
2129                    }
2130                    (VecRRLongOp::Shll16, false) => {
2131                        ("shll", VectorSize::Size32x4, VectorSize::Size16x4, ", #16")
2132                    }
2133                    (VecRRLongOp::Shll16, true) => {
2134                        ("shll2", VectorSize::Size32x4, VectorSize::Size16x8, ", #16")
2135                    }
2136                    (VecRRLongOp::Shll32, false) => {
2137                        ("shll", VectorSize::Size64x2, VectorSize::Size32x2, ", #32")
2138                    }
2139                    (VecRRLongOp::Shll32, true) => {
2140                        ("shll2", VectorSize::Size64x2, VectorSize::Size32x4, ", #32")
2141                    }
2142                };
2143                let rd = pretty_print_vreg_vector(rd.to_reg(), rd_size);
2144                let rn = pretty_print_vreg_vector(rn, size);
2145
2146                format!("{op} {rd}, {rn}{suffix}")
2147            }
2148            &Inst::VecRRNarrowLow {
2149                op,
2150                rd,
2151                rn,
2152                lane_size,
2153                ..
2154            }
2155            | &Inst::VecRRNarrowHigh {
2156                op,
2157                rd,
2158                rn,
2159                lane_size,
2160                ..
2161            } => {
2162                let vec64 = VectorSize::from_lane_size(lane_size, false);
2163                let vec128 = VectorSize::from_lane_size(lane_size, true);
2164                let rn_size = VectorSize::from_lane_size(lane_size.widen(), true);
2165                let high_half = match self {
2166                    &Inst::VecRRNarrowLow { .. } => false,
2167                    &Inst::VecRRNarrowHigh { .. } => true,
2168                    _ => unreachable!(),
2169                };
2170                let (op, rd_size) = match (op, high_half) {
2171                    (VecRRNarrowOp::Xtn, false) => ("xtn", vec64),
2172                    (VecRRNarrowOp::Xtn, true) => ("xtn2", vec128),
2173                    (VecRRNarrowOp::Sqxtn, false) => ("sqxtn", vec64),
2174                    (VecRRNarrowOp::Sqxtn, true) => ("sqxtn2", vec128),
2175                    (VecRRNarrowOp::Sqxtun, false) => ("sqxtun", vec64),
2176                    (VecRRNarrowOp::Sqxtun, true) => ("sqxtun2", vec128),
2177                    (VecRRNarrowOp::Uqxtn, false) => ("uqxtn", vec64),
2178                    (VecRRNarrowOp::Uqxtn, true) => ("uqxtn2", vec128),
2179                    (VecRRNarrowOp::Fcvtn, false) => ("fcvtn", vec64),
2180                    (VecRRNarrowOp::Fcvtn, true) => ("fcvtn2", vec128),
2181                };
2182                let rn = pretty_print_vreg_vector(rn, rn_size);
2183                let rd = pretty_print_vreg_vector(rd.to_reg(), rd_size);
2184                let ri = match self {
2185                    &Inst::VecRRNarrowLow { .. } => "".to_string(),
2186                    &Inst::VecRRNarrowHigh { ri, .. } => {
2187                        format!("{}, ", pretty_print_vreg_vector(ri, rd_size))
2188                    }
2189                    _ => unreachable!(),
2190                };
2191
2192                format!("{op} {rd}, {ri}{rn}")
2193            }
2194            &Inst::VecRRPair { op, rd, rn } => {
2195                let op = match op {
2196                    VecPairOp::Addp => "addp",
2197                };
2198                let rd = pretty_print_vreg_scalar(rd.to_reg(), ScalarSize::Size64);
2199                let rn = pretty_print_vreg_vector(rn, VectorSize::Size64x2);
2200
2201                format!("{op} {rd}, {rn}")
2202            }
2203            &Inst::VecRRPairLong { op, rd, rn } => {
2204                let (op, dest, src) = match op {
2205                    VecRRPairLongOp::Saddlp8 => {
2206                        ("saddlp", VectorSize::Size16x8, VectorSize::Size8x16)
2207                    }
2208                    VecRRPairLongOp::Saddlp16 => {
2209                        ("saddlp", VectorSize::Size32x4, VectorSize::Size16x8)
2210                    }
2211                    VecRRPairLongOp::Uaddlp8 => {
2212                        ("uaddlp", VectorSize::Size16x8, VectorSize::Size8x16)
2213                    }
2214                    VecRRPairLongOp::Uaddlp16 => {
2215                        ("uaddlp", VectorSize::Size32x4, VectorSize::Size16x8)
2216                    }
2217                };
2218                let rd = pretty_print_vreg_vector(rd.to_reg(), dest);
2219                let rn = pretty_print_vreg_vector(rn, src);
2220
2221                format!("{op} {rd}, {rn}")
2222            }
2223            &Inst::VecRRR {
2224                rd,
2225                rn,
2226                rm,
2227                alu_op,
2228                size,
2229            } => {
2230                let (op, size) = match alu_op {
2231                    VecALUOp::Sqadd => ("sqadd", size),
2232                    VecALUOp::Uqadd => ("uqadd", size),
2233                    VecALUOp::Sqsub => ("sqsub", size),
2234                    VecALUOp::Uqsub => ("uqsub", size),
2235                    VecALUOp::Cmeq => ("cmeq", size),
2236                    VecALUOp::Cmge => ("cmge", size),
2237                    VecALUOp::Cmgt => ("cmgt", size),
2238                    VecALUOp::Cmhs => ("cmhs", size),
2239                    VecALUOp::Cmhi => ("cmhi", size),
2240                    VecALUOp::Fcmeq => ("fcmeq", size),
2241                    VecALUOp::Fcmgt => ("fcmgt", size),
2242                    VecALUOp::Fcmge => ("fcmge", size),
2243                    VecALUOp::Umaxp => ("umaxp", size),
2244                    VecALUOp::Add => ("add", size),
2245                    VecALUOp::Sub => ("sub", size),
2246                    VecALUOp::Mul => ("mul", size),
2247                    VecALUOp::Sshl => ("sshl", size),
2248                    VecALUOp::Ushl => ("ushl", size),
2249                    VecALUOp::Umin => ("umin", size),
2250                    VecALUOp::Smin => ("smin", size),
2251                    VecALUOp::Umax => ("umax", size),
2252                    VecALUOp::Smax => ("smax", size),
2253                    VecALUOp::Urhadd => ("urhadd", size),
2254                    VecALUOp::Fadd => ("fadd", size),
2255                    VecALUOp::Fsub => ("fsub", size),
2256                    VecALUOp::Fdiv => ("fdiv", size),
2257                    VecALUOp::Fmax => ("fmax", size),
2258                    VecALUOp::Fmin => ("fmin", size),
2259                    VecALUOp::Fmul => ("fmul", size),
2260                    VecALUOp::Addp => ("addp", size),
2261                    VecALUOp::Zip1 => ("zip1", size),
2262                    VecALUOp::Zip2 => ("zip2", size),
2263                    VecALUOp::Sqrdmulh => ("sqrdmulh", size),
2264                    VecALUOp::Uzp1 => ("uzp1", size),
2265                    VecALUOp::Uzp2 => ("uzp2", size),
2266                    VecALUOp::Trn1 => ("trn1", size),
2267                    VecALUOp::Trn2 => ("trn2", size),
2268
2269                    // Lane division does not affect bitwise operations.
2270                    // However, when printing, use 8-bit lane division to conform to ARM formatting.
2271                    VecALUOp::And => ("and", size.as_scalar8_vector()),
2272                    VecALUOp::Bic => ("bic", size.as_scalar8_vector()),
2273                    VecALUOp::Orr => ("orr", size.as_scalar8_vector()),
2274                    VecALUOp::Orn => ("orn", size.as_scalar8_vector()),
2275                    VecALUOp::Eor => ("eor", size.as_scalar8_vector()),
2276                };
2277                let rd = pretty_print_vreg_vector(rd.to_reg(), size);
2278                let rn = pretty_print_vreg_vector(rn, size);
2279                let rm = pretty_print_vreg_vector(rm, size);
2280                format!("{op} {rd}, {rn}, {rm}")
2281            }
2282            &Inst::VecRRRMod {
2283                rd,
2284                ri,
2285                rn,
2286                rm,
2287                alu_op,
2288                size,
2289            } => {
2290                let (op, size) = match alu_op {
2291                    VecALUModOp::Bsl => ("bsl", VectorSize::Size8x16),
2292                    VecALUModOp::Fmla => ("fmla", size),
2293                    VecALUModOp::Fmls => ("fmls", size),
2294                    // Note: the real operand arrangement is .4s, .16b, .16b;
2295                    // this debug print renders all lanes as .4s.
2296                    VecALUModOp::Sdot => ("sdot", VectorSize::Size32x4),
2297                };
2298                let rd = pretty_print_vreg_vector(rd.to_reg(), size);
2299                let ri = pretty_print_vreg_vector(ri, size);
2300                let rn = pretty_print_vreg_vector(rn, size);
2301                let rm = pretty_print_vreg_vector(rm, size);
2302                format!("{op} {rd}, {ri}, {rn}, {rm}")
2303            }
2304            &Inst::VecFmlaElem {
2305                rd,
2306                ri,
2307                rn,
2308                rm,
2309                alu_op,
2310                size,
2311                idx,
2312            } => {
2313                let (op, size) = match alu_op {
2314                    VecALUModOp::Fmla => ("fmla", size),
2315                    VecALUModOp::Fmls => ("fmls", size),
2316                    _ => unreachable!(),
2317                };
2318                let rd = pretty_print_vreg_vector(rd.to_reg(), size);
2319                let ri = pretty_print_vreg_vector(ri, size);
2320                let rn = pretty_print_vreg_vector(rn, size);
2321                let rm = pretty_print_vreg_element(rm, idx.into(), size.lane_size());
2322                format!("{op} {rd}, {ri}, {rn}, {rm}")
2323            }
2324            &Inst::VecRRRLong {
2325                rd,
2326                rn,
2327                rm,
2328                alu_op,
2329                high_half,
2330            } => {
2331                let (op, dest_size, src_size) = match (alu_op, high_half) {
2332                    (VecRRRLongOp::Smull8, false) => {
2333                        ("smull", VectorSize::Size16x8, VectorSize::Size8x8)
2334                    }
2335                    (VecRRRLongOp::Smull8, true) => {
2336                        ("smull2", VectorSize::Size16x8, VectorSize::Size8x16)
2337                    }
2338                    (VecRRRLongOp::Smull16, false) => {
2339                        ("smull", VectorSize::Size32x4, VectorSize::Size16x4)
2340                    }
2341                    (VecRRRLongOp::Smull16, true) => {
2342                        ("smull2", VectorSize::Size32x4, VectorSize::Size16x8)
2343                    }
2344                    (VecRRRLongOp::Smull32, false) => {
2345                        ("smull", VectorSize::Size64x2, VectorSize::Size32x2)
2346                    }
2347                    (VecRRRLongOp::Smull32, true) => {
2348                        ("smull2", VectorSize::Size64x2, VectorSize::Size32x4)
2349                    }
2350                    (VecRRRLongOp::Umull8, false) => {
2351                        ("umull", VectorSize::Size16x8, VectorSize::Size8x8)
2352                    }
2353                    (VecRRRLongOp::Umull8, true) => {
2354                        ("umull2", VectorSize::Size16x8, VectorSize::Size8x16)
2355                    }
2356                    (VecRRRLongOp::Umull16, false) => {
2357                        ("umull", VectorSize::Size32x4, VectorSize::Size16x4)
2358                    }
2359                    (VecRRRLongOp::Umull16, true) => {
2360                        ("umull2", VectorSize::Size32x4, VectorSize::Size16x8)
2361                    }
2362                    (VecRRRLongOp::Umull32, false) => {
2363                        ("umull", VectorSize::Size64x2, VectorSize::Size32x2)
2364                    }
2365                    (VecRRRLongOp::Umull32, true) => {
2366                        ("umull2", VectorSize::Size64x2, VectorSize::Size32x4)
2367                    }
2368                };
2369                let rd = pretty_print_vreg_vector(rd.to_reg(), dest_size);
2370                let rn = pretty_print_vreg_vector(rn, src_size);
2371                let rm = pretty_print_vreg_vector(rm, src_size);
2372                format!("{op} {rd}, {rn}, {rm}")
2373            }
2374            &Inst::VecRRRLongMod {
2375                rd,
2376                ri,
2377                rn,
2378                rm,
2379                alu_op,
2380                high_half,
2381            } => {
2382                let (op, dest_size, src_size) = match (alu_op, high_half) {
2383                    (VecRRRLongModOp::Umlal8, false) => {
2384                        ("umlal", VectorSize::Size16x8, VectorSize::Size8x8)
2385                    }
2386                    (VecRRRLongModOp::Umlal8, true) => {
2387                        ("umlal2", VectorSize::Size16x8, VectorSize::Size8x16)
2388                    }
2389                    (VecRRRLongModOp::Umlal16, false) => {
2390                        ("umlal", VectorSize::Size32x4, VectorSize::Size16x4)
2391                    }
2392                    (VecRRRLongModOp::Umlal16, true) => {
2393                        ("umlal2", VectorSize::Size32x4, VectorSize::Size16x8)
2394                    }
2395                    (VecRRRLongModOp::Umlal32, false) => {
2396                        ("umlal", VectorSize::Size64x2, VectorSize::Size32x2)
2397                    }
2398                    (VecRRRLongModOp::Umlal32, true) => {
2399                        ("umlal2", VectorSize::Size64x2, VectorSize::Size32x4)
2400                    }
2401                };
2402                let rd = pretty_print_vreg_vector(rd.to_reg(), dest_size);
2403                let ri = pretty_print_vreg_vector(ri, dest_size);
2404                let rn = pretty_print_vreg_vector(rn, src_size);
2405                let rm = pretty_print_vreg_vector(rm, src_size);
2406                format!("{op} {rd}, {ri}, {rn}, {rm}")
2407            }
2408            &Inst::VecMisc { op, rd, rn, size } => {
2409                let (op, size, suffix) = match op {
2410                    VecMisc2::Neg => ("neg", size, ""),
2411                    VecMisc2::Abs => ("abs", size, ""),
2412                    VecMisc2::Fabs => ("fabs", size, ""),
2413                    VecMisc2::Fneg => ("fneg", size, ""),
2414                    VecMisc2::Fsqrt => ("fsqrt", size, ""),
2415                    VecMisc2::Rev16 => ("rev16", size, ""),
2416                    VecMisc2::Rev32 => ("rev32", size, ""),
2417                    VecMisc2::Rev64 => ("rev64", size, ""),
2418                    VecMisc2::Fcvtzs => ("fcvtzs", size, ""),
2419                    VecMisc2::Fcvtzu => ("fcvtzu", size, ""),
2420                    VecMisc2::Scvtf => ("scvtf", size, ""),
2421                    VecMisc2::Ucvtf => ("ucvtf", size, ""),
2422                    VecMisc2::Frintn => ("frintn", size, ""),
2423                    VecMisc2::Frintz => ("frintz", size, ""),
2424                    VecMisc2::Frintm => ("frintm", size, ""),
2425                    VecMisc2::Frintp => ("frintp", size, ""),
2426                    VecMisc2::Cnt => ("cnt", size, ""),
2427                    VecMisc2::Cmeq0 => ("cmeq", size, ", #0"),
2428                    VecMisc2::Cmge0 => ("cmge", size, ", #0"),
2429                    VecMisc2::Cmgt0 => ("cmgt", size, ", #0"),
2430                    VecMisc2::Cmle0 => ("cmle", size, ", #0"),
2431                    VecMisc2::Cmlt0 => ("cmlt", size, ", #0"),
2432                    VecMisc2::Fcmeq0 => ("fcmeq", size, ", #0.0"),
2433                    VecMisc2::Fcmge0 => ("fcmge", size, ", #0.0"),
2434                    VecMisc2::Fcmgt0 => ("fcmgt", size, ", #0.0"),
2435                    VecMisc2::Fcmle0 => ("fcmle", size, ", #0.0"),
2436                    VecMisc2::Fcmlt0 => ("fcmlt", size, ", #0.0"),
2437
2438                    // Lane division does not affect bitwise operations.
2439                    // However, when printing, use 8-bit lane division to conform to ARM formatting.
2440                    VecMisc2::Not => ("mvn", size.as_scalar8_vector(), ""),
2441                };
2442                let rd = pretty_print_vreg_vector(rd.to_reg(), size);
2443                let rn = pretty_print_vreg_vector(rn, size);
2444                format!("{op} {rd}, {rn}{suffix}")
2445            }
2446            &Inst::VecLanes { op, rd, rn, size } => {
2447                let op = match op {
2448                    VecLanesOp::Uminv => "uminv",
2449                    VecLanesOp::Addv => "addv",
2450                };
2451                let rd = pretty_print_vreg_scalar(rd.to_reg(), size.lane_size());
2452                let rn = pretty_print_vreg_vector(rn, size);
2453                format!("{op} {rd}, {rn}")
2454            }
2455            &Inst::VecShiftImm {
2456                op,
2457                rd,
2458                rn,
2459                size,
2460                imm,
2461            } => {
2462                let op = match op {
2463                    VecShiftImmOp::Shl => "shl",
2464                    VecShiftImmOp::Ushr => "ushr",
2465                    VecShiftImmOp::Sshr => "sshr",
2466                };
2467                let rd = pretty_print_vreg_vector(rd.to_reg(), size);
2468                let rn = pretty_print_vreg_vector(rn, size);
2469                format!("{op} {rd}, {rn}, #{imm}")
2470            }
2471            &Inst::VecShiftImmMod {
2472                op,
2473                rd,
2474                ri,
2475                rn,
2476                size,
2477                imm,
2478            } => {
2479                let op = match op {
2480                    VecShiftImmModOp::Sli => "sli",
2481                };
2482                let rd = pretty_print_vreg_vector(rd.to_reg(), size);
2483                let ri = pretty_print_vreg_vector(ri, size);
2484                let rn = pretty_print_vreg_vector(rn, size);
2485                format!("{op} {rd}, {ri}, {rn}, #{imm}")
2486            }
2487            &Inst::VecExtract { rd, rn, rm, imm4 } => {
2488                let rd = pretty_print_vreg_vector(rd.to_reg(), VectorSize::Size8x16);
2489                let rn = pretty_print_vreg_vector(rn, VectorSize::Size8x16);
2490                let rm = pretty_print_vreg_vector(rm, VectorSize::Size8x16);
2491                format!("ext {rd}, {rn}, {rm}, #{imm4}")
2492            }
2493            &Inst::VecTbl { rd, rn, rm } => {
2494                let rn = pretty_print_vreg_vector(rn, VectorSize::Size8x16);
2495                let rm = pretty_print_vreg_vector(rm, VectorSize::Size8x16);
2496                let rd = pretty_print_vreg_vector(rd.to_reg(), VectorSize::Size8x16);
2497                format!("tbl {rd}, {{ {rn} }}, {rm}")
2498            }
2499            &Inst::VecTblExt { rd, ri, rn, rm } => {
2500                let rn = pretty_print_vreg_vector(rn, VectorSize::Size8x16);
2501                let rm = pretty_print_vreg_vector(rm, VectorSize::Size8x16);
2502                let rd = pretty_print_vreg_vector(rd.to_reg(), VectorSize::Size8x16);
2503                let ri = pretty_print_vreg_vector(ri, VectorSize::Size8x16);
2504                format!("tbx {rd}, {ri}, {{ {rn} }}, {rm}")
2505            }
2506            &Inst::VecTbl2 { rd, rn, rn2, rm } => {
2507                let rn = pretty_print_vreg_vector(rn, VectorSize::Size8x16);
2508                let rn2 = pretty_print_vreg_vector(rn2, VectorSize::Size8x16);
2509                let rm = pretty_print_vreg_vector(rm, VectorSize::Size8x16);
2510                let rd = pretty_print_vreg_vector(rd.to_reg(), VectorSize::Size8x16);
2511                format!("tbl {rd}, {{ {rn}, {rn2} }}, {rm}")
2512            }
2513            &Inst::VecTbl2Ext {
2514                rd,
2515                ri,
2516                rn,
2517                rn2,
2518                rm,
2519            } => {
2520                let rn = pretty_print_vreg_vector(rn, VectorSize::Size8x16);
2521                let rn2 = pretty_print_vreg_vector(rn2, VectorSize::Size8x16);
2522                let rm = pretty_print_vreg_vector(rm, VectorSize::Size8x16);
2523                let rd = pretty_print_vreg_vector(rd.to_reg(), VectorSize::Size8x16);
2524                let ri = pretty_print_vreg_vector(ri, VectorSize::Size8x16);
2525                format!("tbx {rd}, {ri}, {{ {rn}, {rn2} }}, {rm}")
2526            }
2527            &Inst::VecLoadReplicate { rd, rn, size, .. } => {
2528                let rd = pretty_print_vreg_vector(rd.to_reg(), size);
2529                let rn = pretty_print_reg(rn);
2530
2531                format!("ld1r {{ {rd} }}, [{rn}]")
2532            }
2533            &Inst::VecCSel { rd, rn, rm, cond } => {
2534                let rd = pretty_print_vreg_vector(rd.to_reg(), VectorSize::Size8x16);
2535                let rn = pretty_print_vreg_vector(rn, VectorSize::Size8x16);
2536                let rm = pretty_print_vreg_vector(rm, VectorSize::Size8x16);
2537                let cond = cond.pretty_print(0);
2538                format!("vcsel {rd}, {rn}, {rm}, {cond} (if-then-else diamond)")
2539            }
2540            &Inst::MovToNZCV { rn } => {
2541                let rn = pretty_print_reg(rn);
2542                format!("msr nzcv, {rn}")
2543            }
2544            &Inst::MovFromNZCV { rd } => {
2545                let rd = pretty_print_reg(rd.to_reg());
2546                format!("mrs {rd}, nzcv")
2547            }
2548            &Inst::Extend {
2549                rd,
2550                rn,
2551                signed: false,
2552                from_bits: 1,
2553                ..
2554            } => {
2555                let rd = pretty_print_ireg(rd.to_reg(), OperandSize::Size32);
2556                let rn = pretty_print_ireg(rn, OperandSize::Size32);
2557                format!("and {rd}, {rn}, #1")
2558            }
2559            &Inst::Extend {
2560                rd,
2561                rn,
2562                signed: false,
2563                from_bits: 32,
2564                to_bits: 64,
2565            } => {
2566                // The case of a zero extension from 32 to 64 bits, is implemented
2567                // with a "mov" to a 32-bit (W-reg) dest, because this zeroes
2568                // the top 32 bits.
2569                let rd = pretty_print_ireg(rd.to_reg(), OperandSize::Size32);
2570                let rn = pretty_print_ireg(rn, OperandSize::Size32);
2571                format!("mov {rd}, {rn}")
2572            }
2573            &Inst::Extend {
2574                rd,
2575                rn,
2576                signed,
2577                from_bits,
2578                to_bits,
2579            } => {
2580                assert!(from_bits <= to_bits);
2581                let op = match (signed, from_bits) {
2582                    (false, 8) => "uxtb",
2583                    (true, 8) => "sxtb",
2584                    (false, 16) => "uxth",
2585                    (true, 16) => "sxth",
2586                    (true, 32) => "sxtw",
2587                    (true, _) => "sbfx",
2588                    (false, _) => "ubfx",
2589                };
2590                if op == "sbfx" || op == "ubfx" {
2591                    let dest_size = OperandSize::from_bits(to_bits);
2592                    let rd = pretty_print_ireg(rd.to_reg(), dest_size);
2593                    let rn = pretty_print_ireg(rn, dest_size);
2594                    format!("{op} {rd}, {rn}, #0, #{from_bits}")
2595                } else {
2596                    let dest_size = if signed {
2597                        OperandSize::from_bits(to_bits)
2598                    } else {
2599                        OperandSize::Size32
2600                    };
2601                    let rd = pretty_print_ireg(rd.to_reg(), dest_size);
2602                    let rn = pretty_print_ireg(rn, OperandSize::from_bits(from_bits));
2603                    format!("{op} {rd}, {rn}")
2604                }
2605            }
2606            &Inst::Call { ref info } => {
2607                let try_call = info
2608                    .try_call_info
2609                    .as_ref()
2610                    .map(|tci| pretty_print_try_call(tci))
2611                    .unwrap_or_default();
2612                format!("bl 0{try_call}")
2613            }
2614            &Inst::CallInd { ref info } => {
2615                let rn = pretty_print_reg(info.dest);
2616                let try_call = info
2617                    .try_call_info
2618                    .as_ref()
2619                    .map(|tci| pretty_print_try_call(tci))
2620                    .unwrap_or_default();
2621                format!("blr {rn}{try_call}")
2622            }
2623            &Inst::ReturnCall { ref info } => {
2624                let mut s = format!(
2625                    "return_call {:?} new_stack_arg_size:{}",
2626                    info.dest, info.new_stack_arg_size
2627                );
2628                for ret in &info.uses {
2629                    let preg = pretty_print_reg(ret.preg);
2630                    let vreg = pretty_print_reg(ret.vreg);
2631                    write!(&mut s, " {vreg}={preg}").unwrap();
2632                }
2633                s
2634            }
2635            &Inst::ReturnCallInd { ref info } => {
2636                let callee = pretty_print_reg(info.dest);
2637                let mut s = format!(
2638                    "return_call_ind {callee} new_stack_arg_size:{}",
2639                    info.new_stack_arg_size
2640                );
2641                for ret in &info.uses {
2642                    let preg = pretty_print_reg(ret.preg);
2643                    let vreg = pretty_print_reg(ret.vreg);
2644                    write!(&mut s, " {vreg}={preg}").unwrap();
2645                }
2646                s
2647            }
2648            &Inst::Args { ref args } => {
2649                let mut s = "args".to_string();
2650                for arg in args {
2651                    let preg = pretty_print_reg(arg.preg);
2652                    let def = pretty_print_reg(arg.vreg.to_reg());
2653                    write!(&mut s, " {def}={preg}").unwrap();
2654                }
2655                s
2656            }
2657            &Inst::Rets { ref rets } => {
2658                let mut s = "rets".to_string();
2659                for ret in rets {
2660                    let preg = pretty_print_reg(ret.preg);
2661                    let vreg = pretty_print_reg(ret.vreg);
2662                    write!(&mut s, " {vreg}={preg}").unwrap();
2663                }
2664                s
2665            }
2666            &Inst::Ret {} => "ret".to_string(),
2667            &Inst::AuthenticatedRet { key, is_hint } => {
2668                let key = match key {
2669                    APIKey::AZ => "az",
2670                    APIKey::BZ => "bz",
2671                    APIKey::ASP => "asp",
2672                    APIKey::BSP => "bsp",
2673                };
2674                match is_hint {
2675                    false => format!("reta{key}"),
2676                    true => format!("auti{key} ; ret"),
2677                }
2678            }
2679            &Inst::Jump { ref dest } => {
2680                let dest = dest.pretty_print(0);
2681                format!("b {dest}")
2682            }
2683            &Inst::CondBr {
2684                ref taken,
2685                ref not_taken,
2686                ref kind,
2687            } => {
2688                let taken = taken.pretty_print(0);
2689                let not_taken = not_taken.pretty_print(0);
2690                match kind {
2691                    &CondBrKind::Zero(reg, size) => {
2692                        let reg = pretty_print_reg_sized(reg, size);
2693                        format!("cbz {reg}, {taken} ; b {not_taken}")
2694                    }
2695                    &CondBrKind::NotZero(reg, size) => {
2696                        let reg = pretty_print_reg_sized(reg, size);
2697                        format!("cbnz {reg}, {taken} ; b {not_taken}")
2698                    }
2699                    &CondBrKind::Cond(c) => {
2700                        let c = c.pretty_print(0);
2701                        format!("b.{c} {taken} ; b {not_taken}")
2702                    }
2703                }
2704            }
2705            &Inst::TestBitAndBranch {
2706                kind,
2707                ref taken,
2708                ref not_taken,
2709                rn,
2710                bit,
2711            } => {
2712                let cond = match kind {
2713                    TestBitAndBranchKind::Z => "z",
2714                    TestBitAndBranchKind::NZ => "nz",
2715                };
2716                let taken = taken.pretty_print(0);
2717                let not_taken = not_taken.pretty_print(0);
2718                let rn = pretty_print_reg(rn);
2719                format!("tb{cond} {rn}, #{bit}, {taken} ; b {not_taken}")
2720            }
2721            &Inst::IndirectBr { rn, .. } => {
2722                let rn = pretty_print_reg(rn);
2723                format!("br {rn}")
2724            }
2725            &Inst::Brk => "brk #0xf000".to_string(),
2726            &Inst::Udf { .. } => "udf #0xc11f".to_string(),
2727            &Inst::TrapIf {
2728                ref kind,
2729                trap_code,
2730            } => match kind {
2731                &CondBrKind::Zero(reg, size) => {
2732                    let reg = pretty_print_reg_sized(reg, size);
2733                    format!("cbz {reg}, #trap={trap_code}")
2734                }
2735                &CondBrKind::NotZero(reg, size) => {
2736                    let reg = pretty_print_reg_sized(reg, size);
2737                    format!("cbnz {reg}, #trap={trap_code}")
2738                }
2739                &CondBrKind::Cond(c) => {
2740                    let c = c.pretty_print(0);
2741                    format!("b.{c} #trap={trap_code}")
2742                }
2743            },
2744            &Inst::Adr { rd, off } => {
2745                let rd = pretty_print_reg(rd.to_reg());
2746                format!("adr {rd}, pc+{off}")
2747            }
2748            &Inst::Adrp { rd, off } => {
2749                let rd = pretty_print_reg(rd.to_reg());
2750                // This instruction addresses 4KiB pages, so multiply it by the page size.
2751                let byte_offset = off * 4096;
2752                format!("adrp {rd}, pc+{byte_offset}")
2753            }
2754            &Inst::Word4 { data } => format!("data.i32 {data}"),
2755            &Inst::Word8 { data } => format!("data.i64 {data}"),
2756            &Inst::JTSequence {
2757                default,
2758                ref targets,
2759                ridx,
2760                rtmp1,
2761                rtmp2,
2762                ..
2763            } => {
2764                let ridx = pretty_print_reg(ridx);
2765                let rtmp1 = pretty_print_reg(rtmp1.to_reg());
2766                let rtmp2 = pretty_print_reg(rtmp2.to_reg());
2767                let default_target = BranchTarget::Label(default).pretty_print(0);
2768                format!(
2769                    concat!(
2770                        "b.hs {} ; ",
2771                        "csel {}, xzr, {}, hs ; ",
2772                        "csdb ; ",
2773                        "adr {}, pc+16 ; ",
2774                        "ldrsw {}, [{}, {}, uxtw #2] ; ",
2775                        "add {}, {}, {} ; ",
2776                        "br {} ; ",
2777                        "jt_entries {:?}"
2778                    ),
2779                    default_target,
2780                    rtmp2,
2781                    ridx,
2782                    rtmp1,
2783                    rtmp2,
2784                    rtmp1,
2785                    rtmp2,
2786                    rtmp1,
2787                    rtmp1,
2788                    rtmp2,
2789                    rtmp1,
2790                    targets
2791                )
2792            }
2793            &Inst::LoadExtNameGot { rd, ref name } => {
2794                let rd = pretty_print_reg(rd.to_reg());
2795                format!("load_ext_name_got {rd}, {name:?}")
2796            }
2797            &Inst::LoadExtNameNear {
2798                rd,
2799                ref name,
2800                offset,
2801            } => {
2802                let rd = pretty_print_reg(rd.to_reg());
2803                format!("load_ext_name_near {rd}, {name:?}+{offset}")
2804            }
2805            &Inst::LoadExtNameFar {
2806                rd,
2807                ref name,
2808                offset,
2809            } => {
2810                let rd = pretty_print_reg(rd.to_reg());
2811                format!("load_ext_name_far {rd}, {name:?}+{offset}")
2812            }
2813            &Inst::LoadAddr { rd, ref mem } => {
2814                // TODO: we really should find a better way to avoid duplication of
2815                // this logic between `emit()` and `show_rru()` -- a separate 1-to-N
2816                // expansion stage (i.e., legalization, but without the slow edit-in-place
2817                // of the existing legalization framework).
2818                let mem = mem.clone();
2819                let (mem_insts, mem) = mem_finalize(None, &mem, I8, state);
2820                let mut ret = String::new();
2821                for inst in mem_insts.into_iter() {
2822                    ret.push_str(&inst.print_with_state(&mut EmitState::default()));
2823                }
2824                let (reg, index_reg, offset) = match mem {
2825                    AMode::RegExtended { rn, rm, extendop } => (rn, Some((rm, extendop)), 0),
2826                    AMode::Unscaled { rn, simm9 } => (rn, None, simm9.value()),
2827                    AMode::UnsignedOffset { rn, uimm12 } => (rn, None, uimm12.value() as i32),
2828                    _ => panic!("Unsupported case for LoadAddr: {mem:?}"),
2829                };
2830                let abs_offset = if offset < 0 {
2831                    -offset as u64
2832                } else {
2833                    offset as u64
2834                };
2835                let alu_op = if offset < 0 { ALUOp::Sub } else { ALUOp::Add };
2836
2837                if let Some((idx, extendop)) = index_reg {
2838                    let add = Inst::AluRRRExtend {
2839                        alu_op: ALUOp::Add,
2840                        size: OperandSize::Size64,
2841                        rd,
2842                        rn: reg,
2843                        rm: idx,
2844                        extendop,
2845                    };
2846
2847                    ret.push_str(&add.print_with_state(&mut EmitState::default()));
2848                } else if offset == 0 {
2849                    let mov = Inst::gen_move(rd, reg, I64);
2850                    ret.push_str(&mov.print_with_state(&mut EmitState::default()));
2851                } else if let Some(imm12) = Imm12::maybe_from_u64(abs_offset) {
2852                    let add = Inst::AluRRImm12 {
2853                        alu_op,
2854                        size: OperandSize::Size64,
2855                        rd,
2856                        rn: reg,
2857                        imm12,
2858                    };
2859                    ret.push_str(&add.print_with_state(&mut EmitState::default()));
2860                } else {
2861                    let tmp = writable_spilltmp_reg();
2862                    for inst in Inst::load_constant(tmp, abs_offset).into_iter() {
2863                        ret.push_str(&inst.print_with_state(&mut EmitState::default()));
2864                    }
2865                    let add = Inst::AluRRR {
2866                        alu_op,
2867                        size: OperandSize::Size64,
2868                        rd,
2869                        rn: reg,
2870                        rm: tmp.to_reg(),
2871                    };
2872                    ret.push_str(&add.print_with_state(&mut EmitState::default()));
2873                }
2874                ret
2875            }
2876            &Inst::Paci { key } => {
2877                let key = match key {
2878                    APIKey::AZ => "az",
2879                    APIKey::BZ => "bz",
2880                    APIKey::ASP => "asp",
2881                    APIKey::BSP => "bsp",
2882                };
2883
2884                "paci".to_string() + key
2885            }
2886            &Inst::Xpaclri => "xpaclri".to_string(),
2887            &Inst::Bti { targets } => {
2888                let targets = match targets {
2889                    BranchTargetType::None => "",
2890                    BranchTargetType::C => " c",
2891                    BranchTargetType::J => " j",
2892                    BranchTargetType::JC => " jc",
2893                };
2894
2895                "bti".to_string() + targets
2896            }
2897            &Inst::EmitIsland { needed_space } => format!("emit_island {needed_space}"),
2898
2899            &Inst::ElfTlsGetAddr {
2900                ref symbol,
2901                rd,
2902                tmp,
2903            } => {
2904                let rd = pretty_print_reg(rd.to_reg());
2905                let tmp = pretty_print_reg(tmp.to_reg());
2906                format!("elf_tls_get_addr {}, {}, {}", rd, tmp, symbol.display(None))
2907            }
2908            &Inst::MachOTlsGetAddr { ref symbol, rd } => {
2909                let rd = pretty_print_reg(rd.to_reg());
2910                format!("macho_tls_get_addr {}, {}", rd, symbol.display(None))
2911            }
2912            &Inst::Unwind { ref inst } => {
2913                format!("unwind {inst:?}")
2914            }
2915            &Inst::DummyUse { reg } => {
2916                let reg = pretty_print_reg(reg);
2917                format!("dummy_use {reg}")
2918            }
2919            &Inst::LabelAddress { dst, label } => {
2920                let dst = pretty_print_reg(dst.to_reg());
2921                format!("label_address {dst}, {label:?}")
2922            }
2923            &Inst::SequencePoint {} => {
2924                format!("sequence_point")
2925            }
2926            &Inst::StackProbeLoop { start, end, step } => {
2927                let start = pretty_print_reg(start.to_reg());
2928                let end = pretty_print_reg(end);
2929                let step = step.pretty_print(0);
2930                format!("stack_probe_loop {start}, {end}, {step}")
2931            }
2932        }
2933    }
2934}
2935
2936//=============================================================================
2937// Label fixups and jump veneers.
2938
2939/// Different forms of label references for different instruction formats.
2940#[derive(Clone, Copy, Debug, PartialEq, Eq)]
2941pub enum LabelUse {
2942    /// 14-bit branch offset (conditional branches). PC-rel, offset is imm <<
2943    /// 2. Immediate is 14 signed bits, in bits 18:5. Used by tbz and tbnz.
2944    Branch14,
2945    /// 19-bit branch offset (conditional branches). PC-rel, offset is imm << 2. Immediate is 19
2946    /// signed bits, in bits 23:5. Used by cbz, cbnz, b.cond.
2947    Branch19,
2948    /// 26-bit branch offset (unconditional branches). PC-rel, offset is imm << 2. Immediate is 26
2949    /// signed bits, in bits 25:0. Used by b, bl.
2950    Branch26,
2951    /// 19-bit offset for LDR (load literal). PC-rel, offset is imm << 2. Immediate is 19 signed bits,
2952    /// in bits 23:5.
2953    Ldr19,
2954    /// 21-bit offset for ADR (get address of label). PC-rel, offset is not shifted. Immediate is
2955    /// 21 signed bits, with high 19 bits in bits 23:5 and low 2 bits in bits 30:29.
2956    Adr21,
2957    /// 32-bit PC relative constant offset (from address of constant itself),
2958    /// signed. Used in jump tables.
2959    PCRel32,
2960}
2961
2962impl MachInstLabelUse for LabelUse {
2963    /// Alignment for veneer code. Every AArch64 instruction must be 4-byte-aligned.
2964    const ALIGN: CodeOffset = 4;
2965
2966    /// Maximum PC-relative range (positive), inclusive.
2967    fn max_pos_range(self) -> CodeOffset {
2968        match self {
2969            // N-bit immediate, left-shifted by 2, for (N+2) bits of total
2970            // range. Signed, so +2^(N+1) from zero. Likewise for two other
2971            // shifted cases below.
2972            LabelUse::Branch14 => (1 << 15) - 1,
2973            LabelUse::Branch19 => (1 << 20) - 1,
2974            LabelUse::Branch26 => (1 << 27) - 1,
2975            LabelUse::Ldr19 => (1 << 20) - 1,
2976            // Adr does not shift its immediate, so the 21-bit immediate gives 21 bits of total
2977            // range.
2978            LabelUse::Adr21 => (1 << 20) - 1,
2979            LabelUse::PCRel32 => 0x7fffffff,
2980        }
2981    }
2982
2983    /// Maximum PC-relative range (negative).
2984    fn max_neg_range(self) -> CodeOffset {
2985        // All forms are twos-complement signed offsets, so negative limit is one more than
2986        // positive limit.
2987        self.max_pos_range() + 1
2988    }
2989
2990    /// Size of window into code needed to do the patch.
2991    fn patch_size(self) -> CodeOffset {
2992        // Patch is on one instruction only for all of these label reference types.
2993        4
2994    }
2995
2996    /// Perform the patch.
2997    fn patch(self, buffer: &mut [u8], use_offset: CodeOffset, label_offset: CodeOffset) {
2998        let pc_rel = (label_offset as i64) - (use_offset as i64);
2999        debug_assert!(pc_rel <= self.max_pos_range() as i64);
3000        debug_assert!(pc_rel >= -(self.max_neg_range() as i64));
3001        let pc_rel = pc_rel as u32;
3002        let insn_word = u32::from_le_bytes([buffer[0], buffer[1], buffer[2], buffer[3]]);
3003        let mask = match self {
3004            LabelUse::Branch14 => 0x0007ffe0, // bits 18..5 inclusive
3005            LabelUse::Branch19 => 0x00ffffe0, // bits 23..5 inclusive
3006            LabelUse::Branch26 => 0x03ffffff, // bits 25..0 inclusive
3007            LabelUse::Ldr19 => 0x00ffffe0,    // bits 23..5 inclusive
3008            LabelUse::Adr21 => 0x60ffffe0,    // bits 30..29, 25..5 inclusive
3009            LabelUse::PCRel32 => 0xffffffff,
3010        };
3011        let pc_rel_shifted = match self {
3012            LabelUse::Adr21 | LabelUse::PCRel32 => pc_rel,
3013            _ => {
3014                debug_assert!(pc_rel & 3 == 0);
3015                pc_rel >> 2
3016            }
3017        };
3018        let pc_rel_inserted = match self {
3019            LabelUse::Branch14 => (pc_rel_shifted & 0x3fff) << 5,
3020            LabelUse::Branch19 | LabelUse::Ldr19 => (pc_rel_shifted & 0x7ffff) << 5,
3021            LabelUse::Branch26 => pc_rel_shifted & 0x3ffffff,
3022            // Note: the *low* two bits of offset are put in the
3023            // *high* bits (30, 29).
3024            LabelUse::Adr21 => (pc_rel_shifted & 0x1ffffc) << 3 | (pc_rel_shifted & 3) << 29,
3025            LabelUse::PCRel32 => pc_rel_shifted,
3026        };
3027        let is_add = match self {
3028            LabelUse::PCRel32 => true,
3029            _ => false,
3030        };
3031        let insn_word = if is_add {
3032            insn_word.wrapping_add(pc_rel_inserted)
3033        } else {
3034            (insn_word & !mask) | pc_rel_inserted
3035        };
3036        buffer[0..4].clone_from_slice(&u32::to_le_bytes(insn_word));
3037    }
3038
3039    /// Is a veneer supported for this label reference type?
3040    fn supports_veneer(self) -> bool {
3041        match self {
3042            LabelUse::Branch14 | LabelUse::Branch19 => true, // veneer is a Branch26
3043            LabelUse::Branch26 => true,                      // veneer is a PCRel32
3044            _ => false,
3045        }
3046    }
3047
3048    /// How large is the veneer, if supported?
3049    fn veneer_size(self) -> CodeOffset {
3050        match self {
3051            LabelUse::Branch14 | LabelUse::Branch19 => 4,
3052            LabelUse::Branch26 => 20,
3053            _ => unreachable!(),
3054        }
3055    }
3056
3057    fn worst_case_veneer_size() -> CodeOffset {
3058        20
3059    }
3060
3061    /// Generate a veneer into the buffer, given that this veneer is at `veneer_offset`, and return
3062    /// an offset and label-use for the veneer's use of the original label.
3063    fn generate_veneer(
3064        self,
3065        buffer: &mut [u8],
3066        veneer_offset: CodeOffset,
3067    ) -> (CodeOffset, LabelUse) {
3068        match self {
3069            LabelUse::Branch14 | LabelUse::Branch19 => {
3070                // veneer is a Branch26 (unconditional branch). Just encode directly here -- don't
3071                // bother with constructing an Inst.
3072                let insn_word = 0b000101 << 26;
3073                buffer[0..4].clone_from_slice(&u32::to_le_bytes(insn_word));
3074                (veneer_offset, LabelUse::Branch26)
3075            }
3076
3077            // This is promoting a 26-bit call/jump to a 32-bit call/jump to
3078            // get a further range. This jump translates to a jump to a
3079            // relative location based on the address of the constant loaded
3080            // from here.
3081            //
3082            // If this path is taken from a call instruction then caller-saved
3083            // registers are available (minus arguments), so x16/x17 are
3084            // available. Otherwise for intra-function jumps we also reserve
3085            // x16/x17 as spill-style registers. In both cases these are
3086            // available for us to use.
3087            LabelUse::Branch26 => {
3088                let tmp1 = regs::spilltmp_reg();
3089                let tmp1_w = regs::writable_spilltmp_reg();
3090                let tmp2 = regs::tmp2_reg();
3091                let tmp2_w = regs::writable_tmp2_reg();
3092                // ldrsw x16, 16
3093                let ldr = emit::enc_ldst_imm19(0b1001_1000, 16 / 4, tmp1);
3094                // adr x17, 12
3095                let adr = emit::enc_adr(12, tmp2_w);
3096                // add x16, x16, x17
3097                let add = emit::enc_arith_rrr(0b10001011_000, 0, tmp1_w, tmp1, tmp2);
3098                // br x16
3099                let br = emit::enc_br(tmp1);
3100                buffer[0..4].clone_from_slice(&u32::to_le_bytes(ldr));
3101                buffer[4..8].clone_from_slice(&u32::to_le_bytes(adr));
3102                buffer[8..12].clone_from_slice(&u32::to_le_bytes(add));
3103                buffer[12..16].clone_from_slice(&u32::to_le_bytes(br));
3104                // the 4-byte signed immediate we'll load is after these
3105                // instructions, 16-bytes in.
3106                (veneer_offset + 16, LabelUse::PCRel32)
3107            }
3108
3109            _ => panic!("Unsupported label-reference type for veneer generation!"),
3110        }
3111    }
3112
3113    fn from_reloc(reloc: Reloc, addend: Addend) -> Option<LabelUse> {
3114        match (reloc, addend) {
3115            (Reloc::Arm64Call, 0) => Some(LabelUse::Branch26),
3116            _ => None,
3117        }
3118    }
3119}
3120
3121#[cfg(test)]
3122mod tests {
3123    use super::*;
3124
3125    #[test]
3126    fn inst_size_test() {
3127        // This test will help with unintentionally growing the size
3128        // of the Inst enum.
3129        assert_eq!(32, core::mem::size_of::<Inst>());
3130    }
3131}