Skip to main content

cranelift_codegen/isa/aarch64/inst/
mod.rs

1//! This module defines aarch64-specific machine instruction types.
2
3use crate::binemit::{Addend, CodeOffset, Reloc};
4use crate::ir::types::{F16, F32, F64, F128, I8, I8X16, I16, I32, I64, I128};
5use crate::ir::{MemFlagsData, Type, types};
6use crate::isa::{CallConv, FunctionAlignment};
7use crate::machinst::*;
8use crate::{CodegenError, CodegenResult, settings};
9
10use crate::machinst::{PrettyPrint, Reg, RegClass, Writable};
11
12use alloc::string::{String, ToString};
13use alloc::vec::Vec;
14use core::fmt::Write;
15use core::slice;
16use smallvec::{SmallVec, smallvec};
17
18pub(crate) mod regs;
19pub(crate) use self::regs::*;
20pub mod imms;
21pub use self::imms::*;
22pub mod args;
23pub use self::args::*;
24pub mod emit;
25pub(crate) use self::emit::*;
26use crate::isa::aarch64::abi::AArch64MachineDeps;
27
28pub(crate) mod unwind;
29
30#[cfg(test)]
31mod emit_tests;
32
33//=============================================================================
34// Instructions (top level): definition
35
36pub use crate::isa::aarch64::lower::isle::generated_code::{
37    ALUOp, ALUOp3, AMode, APIKey, AtomicRMWLoopOp, AtomicRMWOp, BitOp, BranchTargetType, FPUOp1,
38    FPUOp2, FPUOp3, FpuRoundMode, FpuToIntOp, IntToFpuOp, MInst as Inst, MoveWideOp, VecALUModOp,
39    VecALUOp, VecExtendOp, VecLanesOp, VecMisc2, VecPairOp, VecRRLongOp, VecRRNarrowOp,
40    VecRRPairLongOp, VecRRRLongModOp, VecRRRLongOp, VecShiftImmModOp, VecShiftImmOp,
41};
42
43/// A floating-point unit (FPU) operation with two args, a register and an immediate.
44#[derive(Copy, Clone, Debug)]
45pub enum FPUOpRI {
46    /// Unsigned right shift. Rd = Rn << #imm
47    UShr32(FPURightShiftImm),
48    /// Unsigned right shift. Rd = Rn << #imm
49    UShr64(FPURightShiftImm),
50}
51
52/// A floating-point unit (FPU) operation with two args, a register and
53/// an immediate that modifies its dest (so takes that input value as a
54/// separate virtual register).
55#[derive(Copy, Clone, Debug)]
56pub enum FPUOpRIMod {
57    /// Shift left and insert. Rd |= Rn << #imm
58    Sli32(FPULeftShiftImm),
59    /// Shift left and insert. Rd |= Rn << #imm
60    Sli64(FPULeftShiftImm),
61}
62
63impl BitOp {
64    /// Get the assembly mnemonic for this opcode.
65    pub fn op_str(&self) -> &'static str {
66        match self {
67            BitOp::RBit => "rbit",
68            BitOp::Clz => "clz",
69            BitOp::Cls => "cls",
70            BitOp::Rev16 => "rev16",
71            BitOp::Rev32 => "rev32",
72            BitOp::Rev64 => "rev64",
73        }
74    }
75}
76
77/// Additional information for `return_call[_ind]` instructions, left out of
78/// line to lower the size of the `Inst` enum.
79#[derive(Clone, Debug)]
80pub struct ReturnCallInfo<T> {
81    /// Where this call is going to
82    pub dest: T,
83    /// Arguments to the call instruction.
84    pub uses: CallArgList,
85    /// The size of the new stack frame's stack arguments. This is necessary
86    /// for copying the frame over our current frame. It must already be
87    /// allocated on the stack.
88    pub new_stack_arg_size: u32,
89    /// API key to use to restore the return address, if any.
90    pub key: Option<APIKey>,
91    /// Whether pointer-auth return addresses are signed even without frame setup.
92    pub sign_return_address_all: bool,
93}
94
95fn count_zero_half_words(mut value: u64, num_half_words: u8) -> usize {
96    let mut count = 0;
97    for _ in 0..num_half_words {
98        if value & 0xffff == 0 {
99            count += 1;
100        }
101        value >>= 16;
102    }
103
104    count
105}
106
107impl Inst {
108    /// Create an instruction that loads a constant, using one of several options (MOVZ, MOVN,
109    /// logical immediate, or constant pool).
110    pub fn load_constant(rd: Writable<Reg>, value: u64) -> SmallVec<[Inst; 4]> {
111        // NB: this is duplicated in `lower/isle.rs` and `inst.isle` right now,
112        // if modifications are made here before this is deleted after moving to
113        // ISLE then those locations should be updated as well.
114
115        if let Some(imm) = MoveWideConst::maybe_from_u64(value) {
116            // 16-bit immediate (shifted by 0, 16, 32 or 48 bits) in MOVZ
117            smallvec![Inst::MovWide {
118                op: MoveWideOp::MovZ,
119                rd,
120                imm,
121                size: OperandSize::Size64
122            }]
123        } else if let Some(imm) = MoveWideConst::maybe_from_u64(!value) {
124            // 16-bit immediate (shifted by 0, 16, 32 or 48 bits) in MOVN
125            smallvec![Inst::MovWide {
126                op: MoveWideOp::MovN,
127                rd,
128                imm,
129                size: OperandSize::Size64
130            }]
131        } else if let Some(imml) = ImmLogic::maybe_from_u64(value, I64) {
132            // Weird logical-instruction immediate in ORI using zero register
133            smallvec![Inst::AluRRImmLogic {
134                alu_op: ALUOp::Orr,
135                size: OperandSize::Size64,
136                rd,
137                rn: zero_reg(),
138                imml,
139            }]
140        } else {
141            let mut insts = smallvec![];
142
143            // If the top 32 bits are zero, use 32-bit `mov` operations.
144            let (num_half_words, size, negated) = if value >> 32 == 0 {
145                (2, OperandSize::Size32, (!value << 32) >> 32)
146            } else {
147                (4, OperandSize::Size64, !value)
148            };
149
150            // If the number of 0xffff half words is greater than the number of 0x0000 half words
151            // it is more efficient to use `movn` for the first instruction.
152            let first_is_inverted = count_zero_half_words(negated, num_half_words)
153                > count_zero_half_words(value, num_half_words);
154
155            // Either 0xffff or 0x0000 half words can be skipped, depending on the first
156            // instruction used.
157            let ignored_halfword = if first_is_inverted { 0xffff } else { 0 };
158
159            let halfwords: SmallVec<[_; 4]> = (0..num_half_words)
160                .filter_map(|i| {
161                    let imm16 = (value >> (16 * i)) & 0xffff;
162                    if imm16 == ignored_halfword {
163                        None
164                    } else {
165                        Some((i, imm16))
166                    }
167                })
168                .collect();
169
170            let mut prev_result = None;
171            for (i, imm16) in halfwords {
172                let shift = i * 16;
173
174                if let Some(rn) = prev_result {
175                    let imm = MoveWideConst::maybe_with_shift(imm16 as u16, shift).unwrap();
176                    insts.push(Inst::MovK { rd, rn, imm, size });
177                } else {
178                    if first_is_inverted {
179                        let imm =
180                            MoveWideConst::maybe_with_shift(((!imm16) & 0xffff) as u16, shift)
181                                .unwrap();
182                        insts.push(Inst::MovWide {
183                            op: MoveWideOp::MovN,
184                            rd,
185                            imm,
186                            size,
187                        });
188                    } else {
189                        let imm = MoveWideConst::maybe_with_shift(imm16 as u16, shift).unwrap();
190                        insts.push(Inst::MovWide {
191                            op: MoveWideOp::MovZ,
192                            rd,
193                            imm,
194                            size,
195                        });
196                    }
197                }
198
199                prev_result = Some(rd.to_reg());
200            }
201
202            assert!(prev_result.is_some());
203
204            insts
205        }
206    }
207
208    /// Generic constructor for a load (zero-extending where appropriate).
209    pub fn gen_load(into_reg: Writable<Reg>, mem: AMode, ty: Type, flags: MemFlagsData) -> Inst {
210        match ty {
211            I8 => Inst::ULoad8 {
212                rd: into_reg,
213                mem,
214                flags,
215            },
216            I16 => Inst::ULoad16 {
217                rd: into_reg,
218                mem,
219                flags,
220            },
221            I32 => Inst::ULoad32 {
222                rd: into_reg,
223                mem,
224                flags,
225            },
226            I64 => Inst::ULoad64 {
227                rd: into_reg,
228                mem,
229                flags,
230            },
231            _ => {
232                if ty.is_vector() || ty.is_float() {
233                    let bits = ty_bits(ty);
234                    let rd = into_reg;
235
236                    match bits {
237                        128 => Inst::FpuLoad128 { rd, mem, flags },
238                        64 => Inst::FpuLoad64 { rd, mem, flags },
239                        32 => Inst::FpuLoad32 { rd, mem, flags },
240                        16 => Inst::FpuLoad16 { rd, mem, flags },
241                        _ => unimplemented!("gen_load({})", ty),
242                    }
243                } else {
244                    unimplemented!("gen_load({})", ty);
245                }
246            }
247        }
248    }
249
250    /// Generic constructor for a store.
251    pub fn gen_store(mem: AMode, from_reg: Reg, ty: Type, flags: MemFlagsData) -> Inst {
252        match ty {
253            I8 => Inst::Store8 {
254                rd: from_reg,
255                mem,
256                flags,
257            },
258            I16 => Inst::Store16 {
259                rd: from_reg,
260                mem,
261                flags,
262            },
263            I32 => Inst::Store32 {
264                rd: from_reg,
265                mem,
266                flags,
267            },
268            I64 => Inst::Store64 {
269                rd: from_reg,
270                mem,
271                flags,
272            },
273            _ => {
274                if ty.is_vector() || ty.is_float() {
275                    let bits = ty_bits(ty);
276                    let rd = from_reg;
277
278                    match bits {
279                        128 => Inst::FpuStore128 { rd, mem, flags },
280                        64 => Inst::FpuStore64 { rd, mem, flags },
281                        32 => Inst::FpuStore32 { rd, mem, flags },
282                        16 => Inst::FpuStore16 { rd, mem, flags },
283                        _ => unimplemented!("gen_store({})", ty),
284                    }
285                } else {
286                    unimplemented!("gen_store({})", ty);
287                }
288            }
289        }
290    }
291
292    /// What type does this load or store instruction access in memory? When
293    /// uimm12 encoding is used, the size of this type is the amount that
294    /// immediate offsets are scaled by.
295    pub fn mem_type(&self) -> Option<Type> {
296        match self {
297            Inst::ULoad8 { .. } => Some(I8),
298            Inst::SLoad8 { .. } => Some(I8),
299            Inst::ULoad16 { .. } => Some(I16),
300            Inst::SLoad16 { .. } => Some(I16),
301            Inst::ULoad32 { .. } => Some(I32),
302            Inst::SLoad32 { .. } => Some(I32),
303            Inst::ULoad64 { .. } => Some(I64),
304            Inst::FpuLoad16 { .. } => Some(F16),
305            Inst::FpuLoad32 { .. } => Some(F32),
306            Inst::FpuLoad64 { .. } => Some(F64),
307            Inst::FpuLoad128 { .. } => Some(I8X16),
308            Inst::Store8 { .. } => Some(I8),
309            Inst::Store16 { .. } => Some(I16),
310            Inst::Store32 { .. } => Some(I32),
311            Inst::Store64 { .. } => Some(I64),
312            Inst::FpuStore16 { .. } => Some(F16),
313            Inst::FpuStore32 { .. } => Some(F32),
314            Inst::FpuStore64 { .. } => Some(F64),
315            Inst::FpuStore128 { .. } => Some(I8X16),
316            _ => None,
317        }
318    }
319}
320
321//=============================================================================
322// Instructions: get_regs
323
324fn memarg_operands(memarg: &mut AMode, collector: &mut impl OperandVisitor) {
325    match memarg {
326        AMode::Unscaled { rn, .. } | AMode::UnsignedOffset { rn, .. } => {
327            collector.reg_use(rn);
328        }
329        AMode::RegReg { rn, rm, .. }
330        | AMode::RegScaled { rn, rm, .. }
331        | AMode::RegScaledExtended { rn, rm, .. }
332        | AMode::RegExtended { rn, rm, .. } => {
333            collector.reg_use(rn);
334            collector.reg_use(rm);
335        }
336        AMode::Label { .. } => {}
337        AMode::SPPreIndexed { .. } | AMode::SPPostIndexed { .. } => {}
338        AMode::FPOffset { .. } | AMode::IncomingArg { .. } => {}
339        AMode::SPOffset { .. } | AMode::SlotOffset { .. } => {}
340        AMode::RegOffset { rn, .. } => {
341            collector.reg_use(rn);
342        }
343        AMode::Const { .. } => {}
344    }
345}
346
347fn pairmemarg_operands(pairmemarg: &mut PairAMode, collector: &mut impl OperandVisitor) {
348    match pairmemarg {
349        PairAMode::SignedOffset { reg, .. } => {
350            collector.reg_use(reg);
351        }
352        PairAMode::SPPreIndexed { .. } | PairAMode::SPPostIndexed { .. } => {}
353    }
354}
355
356fn aarch64_get_operands(inst: &mut Inst, collector: &mut impl OperandVisitor) {
357    match inst {
358        Inst::AluRRR { rd, rn, rm, .. } => {
359            collector.reg_def(rd);
360            collector.reg_use(rn);
361            collector.reg_use(rm);
362        }
363        Inst::AluRRRR { rd, rn, rm, ra, .. } => {
364            collector.reg_def(rd);
365            collector.reg_use(rn);
366            collector.reg_use(rm);
367            collector.reg_use(ra);
368        }
369        Inst::AluRRImm12 { rd, rn, .. } => {
370            collector.reg_def(rd);
371            collector.reg_use(rn);
372        }
373        Inst::AluRRImmLogic { rd, rn, .. } => {
374            collector.reg_def(rd);
375            collector.reg_use(rn);
376        }
377        Inst::AluRRImmShift { rd, rn, .. } => {
378            collector.reg_def(rd);
379            collector.reg_use(rn);
380        }
381        Inst::AluRRRShift { rd, rn, rm, .. } => {
382            collector.reg_def(rd);
383            collector.reg_use(rn);
384            collector.reg_use(rm);
385        }
386        Inst::AluRRRExtend { rd, rn, rm, .. } => {
387            collector.reg_def(rd);
388            collector.reg_use(rn);
389            collector.reg_use(rm);
390        }
391        Inst::BitRR { rd, rn, .. } => {
392            collector.reg_def(rd);
393            collector.reg_use(rn);
394        }
395        Inst::ULoad8 { rd, mem, .. }
396        | Inst::SLoad8 { rd, mem, .. }
397        | Inst::ULoad16 { rd, mem, .. }
398        | Inst::SLoad16 { rd, mem, .. }
399        | Inst::ULoad32 { rd, mem, .. }
400        | Inst::SLoad32 { rd, mem, .. }
401        | Inst::ULoad64 { rd, mem, .. } => {
402            collector.reg_def(rd);
403            memarg_operands(mem, collector);
404        }
405        Inst::Store8 { rd, mem, .. }
406        | Inst::Store16 { rd, mem, .. }
407        | Inst::Store32 { rd, mem, .. }
408        | Inst::Store64 { rd, mem, .. } => {
409            collector.reg_use(rd);
410            memarg_operands(mem, collector);
411        }
412        Inst::StoreP64 { rt, rt2, mem, .. } => {
413            collector.reg_use(rt);
414            collector.reg_use(rt2);
415            pairmemarg_operands(mem, collector);
416        }
417        Inst::LoadP64 { rt, rt2, mem, .. } => {
418            collector.reg_def(rt);
419            collector.reg_def(rt2);
420            pairmemarg_operands(mem, collector);
421        }
422        Inst::Mov { rd, rm, .. } => {
423            collector.reg_def(rd);
424            collector.reg_use(rm);
425        }
426        Inst::MovFromPReg { rd, rm } => {
427            debug_assert!(rd.to_reg().is_virtual());
428            collector.reg_def(rd);
429            collector.reg_fixed_nonallocatable(*rm);
430        }
431        Inst::MovToPReg { rd, rm } => {
432            debug_assert!(rm.is_virtual());
433            collector.reg_fixed_nonallocatable(*rd);
434            collector.reg_use(rm);
435        }
436        Inst::MovK { rd, rn, .. } => {
437            collector.reg_use(rn);
438            collector.reg_reuse_def(rd, 0); // `rn` == `rd`.
439        }
440        Inst::MovWide { rd, .. } => {
441            collector.reg_def(rd);
442        }
443        Inst::CSel { rd, rn, rm, .. } => {
444            collector.reg_def(rd);
445            collector.reg_use(rn);
446            collector.reg_use(rm);
447        }
448        Inst::CSNeg { rd, rn, rm, .. } => {
449            collector.reg_def(rd);
450            collector.reg_use(rn);
451            collector.reg_use(rm);
452        }
453        Inst::CSet { rd, .. } | Inst::CSetm { rd, .. } => {
454            collector.reg_def(rd);
455        }
456        Inst::CCmp { rn, rm, .. } => {
457            collector.reg_use(rn);
458            collector.reg_use(rm);
459        }
460        Inst::CCmpImm { rn, .. } => {
461            collector.reg_use(rn);
462        }
463        Inst::AtomicRMWLoop {
464            op,
465            addr,
466            operand,
467            oldval,
468            scratch1,
469            scratch2,
470            ..
471        } => {
472            collector.reg_fixed_use(addr, xreg(25));
473            collector.reg_fixed_use(operand, xreg(26));
474            collector.reg_fixed_def(oldval, xreg(27));
475            collector.reg_fixed_def(scratch1, xreg(24));
476            if *op != AtomicRMWLoopOp::Xchg {
477                collector.reg_fixed_def(scratch2, xreg(28));
478            }
479        }
480        Inst::AtomicRMW { rs, rt, rn, .. } => {
481            collector.reg_use(rs);
482            collector.reg_def(rt);
483            collector.reg_use(rn);
484        }
485        Inst::AtomicCAS { rd, rs, rt, rn, .. } => {
486            collector.reg_reuse_def(rd, 1); // reuse `rs`.
487            collector.reg_use(rs);
488            collector.reg_use(rt);
489            collector.reg_use(rn);
490        }
491        Inst::AtomicCASLoop {
492            addr,
493            expected,
494            replacement,
495            oldval,
496            scratch,
497            ..
498        } => {
499            collector.reg_fixed_use(addr, xreg(25));
500            collector.reg_fixed_use(expected, xreg(26));
501            collector.reg_fixed_use(replacement, xreg(28));
502            collector.reg_fixed_def(oldval, xreg(27));
503            collector.reg_fixed_def(scratch, xreg(24));
504        }
505        Inst::LoadAcquire { rt, rn, .. } => {
506            collector.reg_use(rn);
507            collector.reg_def(rt);
508        }
509        Inst::StoreRelease { rt, rn, .. } => {
510            collector.reg_use(rn);
511            collector.reg_use(rt);
512        }
513        Inst::Fence {} | Inst::Csdb {} => {}
514        Inst::FpuMove32 { rd, rn } => {
515            collector.reg_def(rd);
516            collector.reg_use(rn);
517        }
518        Inst::FpuMove64 { rd, rn } => {
519            collector.reg_def(rd);
520            collector.reg_use(rn);
521        }
522        Inst::FpuMove128 { rd, rn } => {
523            collector.reg_def(rd);
524            collector.reg_use(rn);
525        }
526        Inst::FpuMoveFromVec { rd, rn, .. } => {
527            collector.reg_def(rd);
528            collector.reg_use(rn);
529        }
530        Inst::FpuExtend { rd, rn, .. } => {
531            collector.reg_def(rd);
532            collector.reg_use(rn);
533        }
534        Inst::FpuRR { rd, rn, .. } => {
535            collector.reg_def(rd);
536            collector.reg_use(rn);
537        }
538        Inst::FpuRRR { rd, rn, rm, .. } => {
539            collector.reg_def(rd);
540            collector.reg_use(rn);
541            collector.reg_use(rm);
542        }
543        Inst::FpuRRI { rd, rn, .. } => {
544            collector.reg_def(rd);
545            collector.reg_use(rn);
546        }
547        Inst::FpuRRIMod { rd, ri, rn, .. } => {
548            collector.reg_reuse_def(rd, 1); // reuse `ri`.
549            collector.reg_use(ri);
550            collector.reg_use(rn);
551        }
552        Inst::FpuRRRR { rd, rn, rm, ra, .. } => {
553            collector.reg_def(rd);
554            collector.reg_use(rn);
555            collector.reg_use(rm);
556            collector.reg_use(ra);
557        }
558        Inst::VecMisc { rd, rn, .. } => {
559            collector.reg_def(rd);
560            collector.reg_use(rn);
561        }
562
563        Inst::VecLanes { rd, rn, .. } => {
564            collector.reg_def(rd);
565            collector.reg_use(rn);
566        }
567        Inst::VecShiftImm { rd, rn, .. } => {
568            collector.reg_def(rd);
569            collector.reg_use(rn);
570        }
571        Inst::VecShiftImmMod { rd, ri, rn, .. } => {
572            collector.reg_reuse_def(rd, 1); // `rd` == `ri`.
573            collector.reg_use(ri);
574            collector.reg_use(rn);
575        }
576        Inst::VecExtract { rd, rn, rm, .. } => {
577            collector.reg_def(rd);
578            collector.reg_use(rn);
579            collector.reg_use(rm);
580        }
581        Inst::VecTbl { rd, rn, rm } => {
582            collector.reg_use(rn);
583            collector.reg_use(rm);
584            collector.reg_def(rd);
585        }
586        Inst::VecTblExt { rd, ri, rn, rm } => {
587            collector.reg_use(rn);
588            collector.reg_use(rm);
589            collector.reg_reuse_def(rd, 3); // `rd` == `ri`.
590            collector.reg_use(ri);
591        }
592
593        Inst::VecTbl2 { rd, rn, rn2, rm } => {
594            // Constrain to v30 / v31 so that we satisfy the "adjacent
595            // registers" constraint without use of pinned vregs in
596            // lowering.
597            collector.reg_fixed_use(rn, vreg(30));
598            collector.reg_fixed_use(rn2, vreg(31));
599            collector.reg_use(rm);
600            collector.reg_def(rd);
601        }
602        Inst::VecTbl2Ext {
603            rd,
604            ri,
605            rn,
606            rn2,
607            rm,
608        } => {
609            // Constrain to v30 / v31 so that we satisfy the "adjacent
610            // registers" constraint without use of pinned vregs in
611            // lowering.
612            collector.reg_fixed_use(rn, vreg(30));
613            collector.reg_fixed_use(rn2, vreg(31));
614            collector.reg_use(rm);
615            collector.reg_reuse_def(rd, 4); // `rd` == `ri`.
616            collector.reg_use(ri);
617        }
618        Inst::VecLoadReplicate { rd, rn, .. } => {
619            collector.reg_def(rd);
620            collector.reg_use(rn);
621        }
622        Inst::VecCSel { rd, rn, rm, .. } => {
623            collector.reg_def(rd);
624            collector.reg_use(rn);
625            collector.reg_use(rm);
626        }
627        Inst::FpuCmp { rn, rm, .. } => {
628            collector.reg_use(rn);
629            collector.reg_use(rm);
630        }
631        Inst::FpuLoad16 { rd, mem, .. } => {
632            collector.reg_def(rd);
633            memarg_operands(mem, collector);
634        }
635        Inst::FpuLoad32 { rd, mem, .. } => {
636            collector.reg_def(rd);
637            memarg_operands(mem, collector);
638        }
639        Inst::FpuLoad64 { rd, mem, .. } => {
640            collector.reg_def(rd);
641            memarg_operands(mem, collector);
642        }
643        Inst::FpuLoad128 { rd, mem, .. } => {
644            collector.reg_def(rd);
645            memarg_operands(mem, collector);
646        }
647        Inst::FpuStore16 { rd, mem, .. } => {
648            collector.reg_use(rd);
649            memarg_operands(mem, collector);
650        }
651        Inst::FpuStore32 { rd, mem, .. } => {
652            collector.reg_use(rd);
653            memarg_operands(mem, collector);
654        }
655        Inst::FpuStore64 { rd, mem, .. } => {
656            collector.reg_use(rd);
657            memarg_operands(mem, collector);
658        }
659        Inst::FpuStore128 { rd, mem, .. } => {
660            collector.reg_use(rd);
661            memarg_operands(mem, collector);
662        }
663        Inst::FpuLoadP64 { rt, rt2, mem, .. } => {
664            collector.reg_def(rt);
665            collector.reg_def(rt2);
666            pairmemarg_operands(mem, collector);
667        }
668        Inst::FpuStoreP64 { rt, rt2, mem, .. } => {
669            collector.reg_use(rt);
670            collector.reg_use(rt2);
671            pairmemarg_operands(mem, collector);
672        }
673        Inst::FpuLoadP128 { rt, rt2, mem, .. } => {
674            collector.reg_def(rt);
675            collector.reg_def(rt2);
676            pairmemarg_operands(mem, collector);
677        }
678        Inst::FpuStoreP128 { rt, rt2, mem, .. } => {
679            collector.reg_use(rt);
680            collector.reg_use(rt2);
681            pairmemarg_operands(mem, collector);
682        }
683        Inst::FpuToInt { rd, rn, .. } => {
684            collector.reg_def(rd);
685            collector.reg_use(rn);
686        }
687        Inst::IntToFpu { rd, rn, .. } => {
688            collector.reg_def(rd);
689            collector.reg_use(rn);
690        }
691        Inst::FpuCSel16 { rd, rn, rm, .. }
692        | Inst::FpuCSel32 { rd, rn, rm, .. }
693        | Inst::FpuCSel64 { rd, rn, rm, .. } => {
694            collector.reg_def(rd);
695            collector.reg_use(rn);
696            collector.reg_use(rm);
697        }
698        Inst::FpuRound { rd, rn, .. } => {
699            collector.reg_def(rd);
700            collector.reg_use(rn);
701        }
702        Inst::MovToFpu { rd, rn, .. } => {
703            collector.reg_def(rd);
704            collector.reg_use(rn);
705        }
706        Inst::FpuMoveFPImm { rd, .. } => {
707            collector.reg_def(rd);
708        }
709        Inst::MovToVec { rd, ri, rn, .. } => {
710            collector.reg_reuse_def(rd, 1); // `rd` == `ri`.
711            collector.reg_use(ri);
712            collector.reg_use(rn);
713        }
714        Inst::MovFromVec { rd, rn, .. } | Inst::MovFromVecSigned { rd, rn, .. } => {
715            collector.reg_def(rd);
716            collector.reg_use(rn);
717        }
718        Inst::VecDup { rd, rn, .. } => {
719            collector.reg_def(rd);
720            collector.reg_use(rn);
721        }
722        Inst::VecDupFromFpu { rd, rn, .. } => {
723            collector.reg_def(rd);
724            collector.reg_use(rn);
725        }
726        Inst::VecDupFPImm { rd, .. } => {
727            collector.reg_def(rd);
728        }
729        Inst::VecDupImm { rd, .. } => {
730            collector.reg_def(rd);
731        }
732        Inst::VecExtend { rd, rn, .. } => {
733            collector.reg_def(rd);
734            collector.reg_use(rn);
735        }
736        Inst::VecMovElement { rd, ri, rn, .. } => {
737            collector.reg_reuse_def(rd, 1); // `rd` == `ri`.
738            collector.reg_use(ri);
739            collector.reg_use(rn);
740        }
741        Inst::VecRRLong { rd, rn, .. } => {
742            collector.reg_def(rd);
743            collector.reg_use(rn);
744        }
745        Inst::VecRRNarrowLow { rd, rn, .. } => {
746            collector.reg_use(rn);
747            collector.reg_def(rd);
748        }
749        Inst::VecRRNarrowHigh { rd, ri, rn, .. } => {
750            collector.reg_use(rn);
751            collector.reg_reuse_def(rd, 2); // `rd` == `ri`.
752            collector.reg_use(ri);
753        }
754        Inst::VecRRPair { rd, rn, .. } => {
755            collector.reg_def(rd);
756            collector.reg_use(rn);
757        }
758        Inst::VecRRRLong { rd, rn, rm, .. } => {
759            collector.reg_def(rd);
760            collector.reg_use(rn);
761            collector.reg_use(rm);
762        }
763        Inst::VecRRRLongMod { rd, ri, rn, rm, .. } => {
764            collector.reg_reuse_def(rd, 1); // `rd` == `ri`.
765            collector.reg_use(ri);
766            collector.reg_use(rn);
767            collector.reg_use(rm);
768        }
769        Inst::VecRRPairLong { rd, rn, .. } => {
770            collector.reg_def(rd);
771            collector.reg_use(rn);
772        }
773        Inst::VecRRR { rd, rn, rm, .. } => {
774            collector.reg_def(rd);
775            collector.reg_use(rn);
776            collector.reg_use(rm);
777        }
778        Inst::VecRRRMod { rd, ri, rn, rm, .. } | Inst::VecFmlaElem { rd, ri, rn, rm, .. } => {
779            collector.reg_reuse_def(rd, 1); // `rd` == `ri`.
780            collector.reg_use(ri);
781            collector.reg_use(rn);
782            collector.reg_use(rm);
783        }
784        Inst::MovToNZCV { rn } => {
785            collector.reg_use(rn);
786        }
787        Inst::MovFromNZCV { rd } => {
788            collector.reg_def(rd);
789        }
790        Inst::Extend { rd, rn, .. } => {
791            collector.reg_def(rd);
792            collector.reg_use(rn);
793        }
794        Inst::Args { args } => {
795            for ArgPair { vreg, preg } in args {
796                collector.reg_fixed_def(vreg, *preg);
797            }
798        }
799        Inst::Rets { rets } => {
800            for RetPair { vreg, preg } in rets {
801                collector.reg_fixed_use(vreg, *preg);
802            }
803        }
804        Inst::Ret { .. } | Inst::AuthenticatedRet { .. } => {}
805        Inst::Jump { .. } => {}
806        Inst::Call { info, .. } => {
807            let CallInfo { uses, defs, .. } = &mut **info;
808            for CallArgPair { vreg, preg } in uses {
809                collector.reg_fixed_use(vreg, *preg);
810            }
811            for CallRetPair { vreg, location } in defs {
812                match location {
813                    RetLocation::Reg(preg, ..) => collector.reg_fixed_def(vreg, *preg),
814                    RetLocation::Stack(..) => collector.any_def(vreg),
815                }
816            }
817            collector.reg_clobbers(info.clobbers);
818            if let Some(try_call_info) = &mut info.try_call_info {
819                try_call_info.collect_operands(collector);
820            }
821        }
822        Inst::CallInd { info, .. } => {
823            let CallInfo {
824                dest, uses, defs, ..
825            } = &mut **info;
826            collector.reg_use(dest);
827            for CallArgPair { vreg, preg } in uses {
828                collector.reg_fixed_use(vreg, *preg);
829            }
830            for CallRetPair { vreg, location } in defs {
831                match location {
832                    RetLocation::Reg(preg, ..) => collector.reg_fixed_def(vreg, *preg),
833                    RetLocation::Stack(..) => collector.any_def(vreg),
834                }
835            }
836            collector.reg_clobbers(info.clobbers);
837            if let Some(try_call_info) = &mut info.try_call_info {
838                try_call_info.collect_operands(collector);
839            }
840        }
841        Inst::ReturnCall { info } => {
842            for CallArgPair { vreg, preg } in &mut info.uses {
843                collector.reg_fixed_use(vreg, *preg);
844            }
845        }
846        Inst::ReturnCallInd { info } => {
847            // TODO(https://github.com/bytecodealliance/regalloc2/issues/145):
848            // This shouldn't be a fixed register constraint, but it's not clear how to pick a
849            // register that won't be clobbered by the callee-save restore code emitted with a
850            // return_call_indirect.
851            collector.reg_fixed_use(&mut info.dest, xreg(1));
852            for CallArgPair { vreg, preg } in &mut info.uses {
853                collector.reg_fixed_use(vreg, *preg);
854            }
855        }
856        Inst::CondBr { kind, .. } => match kind {
857            CondBrKind::Zero(rt, _) | CondBrKind::NotZero(rt, _) => collector.reg_use(rt),
858            CondBrKind::Cond(_) => {}
859        },
860        Inst::TestBitAndBranch { rn, .. } => {
861            collector.reg_use(rn);
862        }
863        Inst::IndirectBr { rn, .. } => {
864            collector.reg_use(rn);
865        }
866        Inst::Nop0 | Inst::Nop4 => {}
867        Inst::Brk => {}
868        Inst::Udf { .. } => {}
869        Inst::TrapIf { kind, .. } => match kind {
870            CondBrKind::Zero(rt, _) | CondBrKind::NotZero(rt, _) => collector.reg_use(rt),
871            CondBrKind::Cond(_) => {}
872        },
873        Inst::Adr { rd, .. } | Inst::Adrp { rd, .. } => {
874            collector.reg_def(rd);
875        }
876        Inst::Word4 { .. } | Inst::Word8 { .. } => {}
877        Inst::JTSequence {
878            ridx, rtmp1, rtmp2, ..
879        } => {
880            collector.reg_use(ridx);
881            collector.reg_early_def(rtmp1);
882            collector.reg_early_def(rtmp2);
883        }
884        Inst::LoadExtNameGot { rd, .. }
885        | Inst::LoadExtNameNear { rd, .. }
886        | Inst::LoadExtNameFar { rd, .. } => {
887            collector.reg_def(rd);
888        }
889        Inst::LoadAddr { rd, mem } => {
890            collector.reg_def(rd);
891            memarg_operands(mem, collector);
892        }
893        Inst::Paci { .. } | Inst::Xpaclri => {
894            // Neither LR nor SP is an allocatable register, so there is no need
895            // to do anything.
896        }
897        Inst::Bti { .. } => {}
898
899        Inst::ElfTlsGetAddr { rd, tmp, .. } => {
900            // TLSDESC has a very neat calling convention. It is required to preserve
901            // all registers except x0 and x30. X30 is non allocatable in cranelift since
902            // its the link register.
903            //
904            // Additionally we need a second register as a temporary register for the
905            // TLSDESC sequence. This register can be any register other than x0 (and x30).
906            collector.reg_fixed_def(rd, regs::xreg(0));
907            collector.reg_early_def(tmp);
908        }
909        Inst::MachOTlsGetAddr { rd, .. } => {
910            collector.reg_fixed_def(rd, regs::xreg(0));
911            let mut clobbers =
912                AArch64MachineDeps::get_regs_clobbered_by_call(CallConv::AppleAarch64, false);
913            clobbers.remove(regs::xreg_preg(0));
914            collector.reg_clobbers(clobbers);
915        }
916        Inst::Unwind { .. } => {}
917        Inst::EmitIsland { .. } => {}
918        Inst::DummyUse { reg } => {
919            collector.reg_use(reg);
920        }
921        Inst::LabelAddress { dst, .. } => {
922            collector.reg_def(dst);
923        }
924        Inst::SequencePoint { .. } => {}
925        Inst::StackProbeLoop { start, end, .. } => {
926            collector.reg_early_def(start);
927            collector.reg_use(end);
928        }
929    }
930}
931
932//=============================================================================
933// Instructions: misc functions and external interface
934
935impl MachInst for Inst {
936    type ABIMachineSpec = AArch64MachineDeps;
937    type LabelUse = LabelUse;
938
939    // "CLIF" in hex, to make the trap recognizable during
940    // debugging.
941    const TRAP_OPCODE: &'static [u8] = &0xc11f_u32.to_le_bytes();
942
943    fn get_operands(&mut self, collector: &mut impl OperandVisitor) {
944        aarch64_get_operands(self, collector);
945    }
946
947    fn is_move(&self) -> Option<(Writable<Reg>, Reg)> {
948        match self {
949            &Inst::Mov {
950                size: OperandSize::Size64,
951                rd,
952                rm,
953            } => Some((rd, rm)),
954            &Inst::FpuMove64 { rd, rn } => Some((rd, rn)),
955            &Inst::FpuMove128 { rd, rn } => Some((rd, rn)),
956            _ => None,
957        }
958    }
959
960    fn is_included_in_clobbers(&self) -> bool {
961        let (caller, callee, is_exception) = match self {
962            Inst::Args { .. } => return false,
963            Inst::Call { info } => (
964                info.caller_conv,
965                info.callee_conv,
966                info.try_call_info.is_some(),
967            ),
968            Inst::CallInd { info } => (
969                info.caller_conv,
970                info.callee_conv,
971                info.try_call_info.is_some(),
972            ),
973            _ => return true,
974        };
975
976        // We exclude call instructions from the clobber-set when they are calls
977        // from caller to callee that both clobber the same register (such as
978        // using the same or similar ABIs). Such calls cannot possibly force any
979        // new registers to be saved in the prologue, because anything that the
980        // callee clobbers, the caller is also allowed to clobber. This both
981        // saves work and enables us to more precisely follow the
982        // half-caller-save, half-callee-save SysV ABI for some vector
983        // registers.
984        //
985        // See the note in [crate::isa::aarch64::abi::is_caller_save_reg] for
986        // more information on this ABI-implementation hack.
987        let caller_clobbers = AArch64MachineDeps::get_regs_clobbered_by_call(caller, false);
988        let callee_clobbers = AArch64MachineDeps::get_regs_clobbered_by_call(callee, is_exception);
989
990        let mut all_clobbers = caller_clobbers;
991        all_clobbers.union_from(callee_clobbers);
992        all_clobbers != caller_clobbers
993    }
994
995    fn is_trap(&self) -> bool {
996        match self {
997            Self::Udf { .. } => true,
998            _ => false,
999        }
1000    }
1001
1002    fn is_args(&self) -> bool {
1003        match self {
1004            Self::Args { .. } => true,
1005            _ => false,
1006        }
1007    }
1008
1009    fn call_type(&self) -> CallType {
1010        match self {
1011            Inst::Call { .. }
1012            | Inst::CallInd { .. }
1013            | Inst::ElfTlsGetAddr { .. }
1014            | Inst::MachOTlsGetAddr { .. } => CallType::Regular,
1015
1016            Inst::ReturnCall { .. } | Inst::ReturnCallInd { .. } => CallType::TailCall,
1017
1018            _ => CallType::None,
1019        }
1020    }
1021
1022    fn is_term(&self) -> MachTerminator {
1023        match self {
1024            &Inst::Rets { .. } => MachTerminator::Ret,
1025            &Inst::ReturnCall { .. } | &Inst::ReturnCallInd { .. } => MachTerminator::RetCall,
1026            &Inst::Jump { .. } => MachTerminator::Branch,
1027            &Inst::CondBr { .. } => MachTerminator::Branch,
1028            &Inst::TestBitAndBranch { .. } => MachTerminator::Branch,
1029            &Inst::IndirectBr { .. } => MachTerminator::Branch,
1030            &Inst::JTSequence { .. } => MachTerminator::Branch,
1031            &Inst::Call { ref info } if info.try_call_info.is_some() => MachTerminator::Branch,
1032            &Inst::CallInd { ref info } if info.try_call_info.is_some() => MachTerminator::Branch,
1033            _ => MachTerminator::None,
1034        }
1035    }
1036
1037    fn is_mem_access(&self) -> bool {
1038        match self {
1039            &Inst::ULoad8 { .. }
1040            | &Inst::SLoad8 { .. }
1041            | &Inst::ULoad16 { .. }
1042            | &Inst::SLoad16 { .. }
1043            | &Inst::ULoad32 { .. }
1044            | &Inst::SLoad32 { .. }
1045            | &Inst::ULoad64 { .. }
1046            | &Inst::LoadP64 { .. }
1047            | &Inst::FpuLoad16 { .. }
1048            | &Inst::FpuLoad32 { .. }
1049            | &Inst::FpuLoad64 { .. }
1050            | &Inst::FpuLoad128 { .. }
1051            | &Inst::FpuLoadP64 { .. }
1052            | &Inst::FpuLoadP128 { .. }
1053            | &Inst::Store8 { .. }
1054            | &Inst::Store16 { .. }
1055            | &Inst::Store32 { .. }
1056            | &Inst::Store64 { .. }
1057            | &Inst::StoreP64 { .. }
1058            | &Inst::FpuStore16 { .. }
1059            | &Inst::FpuStore32 { .. }
1060            | &Inst::FpuStore64 { .. }
1061            | &Inst::FpuStore128 { .. } => true,
1062            // TODO: verify this carefully
1063            _ => false,
1064        }
1065    }
1066
1067    fn gen_move(to_reg: Writable<Reg>, from_reg: Reg, ty: Type) -> Inst {
1068        let bits = ty.bits();
1069
1070        assert!(bits <= 128);
1071        assert!(to_reg.to_reg().class() == from_reg.class());
1072        match from_reg.class() {
1073            RegClass::Int => Inst::Mov {
1074                size: OperandSize::Size64,
1075                rd: to_reg,
1076                rm: from_reg,
1077            },
1078            RegClass::Float => {
1079                if bits > 64 {
1080                    Inst::FpuMove128 {
1081                        rd: to_reg,
1082                        rn: from_reg,
1083                    }
1084                } else {
1085                    Inst::FpuMove64 {
1086                        rd: to_reg,
1087                        rn: from_reg,
1088                    }
1089                }
1090            }
1091            RegClass::Vector => unreachable!(),
1092        }
1093    }
1094
1095    fn is_safepoint(&self) -> bool {
1096        match self {
1097            Inst::Call { .. } | Inst::CallInd { .. } => true,
1098            _ => false,
1099        }
1100    }
1101
1102    fn gen_dummy_use(reg: Reg) -> Inst {
1103        Inst::DummyUse { reg }
1104    }
1105
1106    fn gen_nop(preferred_size: usize) -> Inst {
1107        if preferred_size == 0 {
1108            return Inst::Nop0;
1109        }
1110        // We can't give a NOP (or any insn) < 4 bytes.
1111        assert!(preferred_size >= 4);
1112        Inst::Nop4
1113    }
1114
1115    fn gen_nop_units() -> Vec<Vec<u8>> {
1116        vec![vec![0x1f, 0x20, 0x03, 0xd5]]
1117    }
1118
1119    fn rc_for_type(ty: Type) -> CodegenResult<(&'static [RegClass], &'static [Type])> {
1120        match ty {
1121            I8 => Ok((&[RegClass::Int], &[I8])),
1122            I16 => Ok((&[RegClass::Int], &[I16])),
1123            I32 => Ok((&[RegClass::Int], &[I32])),
1124            I64 => Ok((&[RegClass::Int], &[I64])),
1125            F16 => Ok((&[RegClass::Float], &[F16])),
1126            F32 => Ok((&[RegClass::Float], &[F32])),
1127            F64 => Ok((&[RegClass::Float], &[F64])),
1128            F128 => Ok((&[RegClass::Float], &[F128])),
1129            I128 => Ok((&[RegClass::Int, RegClass::Int], &[I64, I64])),
1130            _ if ty.is_vector() && ty.bits() <= 128 => {
1131                let types = &[types::I8X2, types::I8X4, types::I8X8, types::I8X16];
1132                Ok((
1133                    &[RegClass::Float],
1134                    slice::from_ref(&types[ty.bytes().ilog2() as usize - 1]),
1135                ))
1136            }
1137            _ if ty.is_dynamic_vector() => Ok((&[RegClass::Float], &[I8X16])),
1138            _ => Err(CodegenError::Unsupported(format!(
1139                "Unexpected SSA-value type: {ty}"
1140            ))),
1141        }
1142    }
1143
1144    fn canonical_type_for_rc(rc: RegClass) -> Type {
1145        match rc {
1146            RegClass::Float => types::I8X16,
1147            RegClass::Int => types::I64,
1148            RegClass::Vector => unreachable!(),
1149        }
1150    }
1151
1152    fn gen_jump(target: MachLabel) -> Inst {
1153        Inst::Jump {
1154            dest: BranchTarget::Label(target),
1155        }
1156    }
1157
1158    fn worst_case_size() -> CodeOffset {
1159        // The maximum size, in bytes, of any `Inst`'s emitted code. We have at least one case of
1160        // an 8-instruction sequence (saturating int-to-float conversions) with three embedded
1161        // 64-bit f64 constants.
1162        //
1163        // Note that inline jump-tables handle island/pool insertion separately, so we do not need
1164        // to account for them here (otherwise the worst case would be 2^31 * 4, clearly not
1165        // feasible for other reasons).
1166        44
1167    }
1168
1169    fn worst_case_island_growth() -> CodeOffset {
1170        // A single `Inst` may add to the buffer's pending-island state:
1171        //
1172        // - Up to three 8-byte constants (the saturating int-to-float sequence
1173        //   noted above); count alignment padding into each.
1174        // - Up to one deferred trap (TrapIf and similar), 4 bytes.
1175        // - Up to one fixup per emitted instruction word, each contributing at
1176        //   most `worst_case_veneer_size()` (= 20) bytes of veneer.
1177        //
1178        // We pick a conservative bound that comfortably covers these.
1179        128
1180    }
1181
1182    fn ref_type_regclass(_: &settings::Flags) -> RegClass {
1183        RegClass::Int
1184    }
1185
1186    fn gen_block_start(
1187        is_indirect_branch_target: bool,
1188        is_forward_edge_cfi_enabled: bool,
1189    ) -> Option<Self> {
1190        if is_indirect_branch_target && is_forward_edge_cfi_enabled {
1191            Some(Inst::Bti {
1192                targets: BranchTargetType::J,
1193            })
1194        } else {
1195            None
1196        }
1197    }
1198
1199    fn function_alignment() -> FunctionAlignment {
1200        // We use 32-byte alignment for performance reasons, but for correctness
1201        // we would only need 4-byte alignment.
1202        FunctionAlignment {
1203            minimum: 4,
1204            preferred: 32,
1205        }
1206    }
1207}
1208
1209//=============================================================================
1210// Pretty-printing of instructions.
1211
1212fn mem_finalize_for_show(mem: &AMode, access_ty: Type, state: &EmitState) -> (String, String) {
1213    let (mem_insts, mem) = mem_finalize(None, mem, access_ty, state);
1214    let mut mem_str = mem_insts
1215        .into_iter()
1216        .map(|inst| inst.print_with_state(&mut EmitState::default()))
1217        .collect::<Vec<_>>()
1218        .join(" ; ");
1219    if !mem_str.is_empty() {
1220        mem_str += " ; ";
1221    }
1222
1223    let mem = mem.pretty_print(access_ty.bytes() as u8);
1224    (mem_str, mem)
1225}
1226
1227fn pretty_print_try_call(info: &TryCallInfo) -> String {
1228    format!(
1229        "; b {:?}; catch [{}]",
1230        info.continuation,
1231        info.pretty_print_dests()
1232    )
1233}
1234
1235impl Inst {
1236    fn print_with_state(&self, state: &mut EmitState) -> String {
1237        fn op_name(alu_op: ALUOp) -> &'static str {
1238            match alu_op {
1239                ALUOp::Add => "add",
1240                ALUOp::Sub => "sub",
1241                ALUOp::Orr => "orr",
1242                ALUOp::And => "and",
1243                ALUOp::AndS => "ands",
1244                ALUOp::Eor => "eor",
1245                ALUOp::AddS => "adds",
1246                ALUOp::SubS => "subs",
1247                ALUOp::SMulH => "smulh",
1248                ALUOp::UMulH => "umulh",
1249                ALUOp::SDiv => "sdiv",
1250                ALUOp::UDiv => "udiv",
1251                ALUOp::AndNot => "bic",
1252                ALUOp::OrrNot => "orn",
1253                ALUOp::EorNot => "eon",
1254                ALUOp::Extr => "extr",
1255                ALUOp::Lsr => "lsr",
1256                ALUOp::Asr => "asr",
1257                ALUOp::Lsl => "lsl",
1258                ALUOp::Adc => "adc",
1259                ALUOp::AdcS => "adcs",
1260                ALUOp::Sbc => "sbc",
1261                ALUOp::SbcS => "sbcs",
1262            }
1263        }
1264
1265        match self {
1266            &Inst::Nop0 => "nop-zero-len".to_string(),
1267            &Inst::Nop4 => "nop".to_string(),
1268            &Inst::AluRRR {
1269                alu_op,
1270                size,
1271                rd,
1272                rn,
1273                rm,
1274            } => {
1275                let op = op_name(alu_op);
1276                let rd = pretty_print_ireg(rd.to_reg(), size);
1277                let rn = pretty_print_ireg(rn, size);
1278                let rm = pretty_print_ireg(rm, size);
1279                format!("{op} {rd}, {rn}, {rm}")
1280            }
1281            &Inst::AluRRRR {
1282                alu_op,
1283                size,
1284                rd,
1285                rn,
1286                rm,
1287                ra,
1288            } => {
1289                let (op, da_size) = match alu_op {
1290                    ALUOp3::MAdd => ("madd", size),
1291                    ALUOp3::MSub => ("msub", size),
1292                    ALUOp3::UMAddL => ("umaddl", OperandSize::Size64),
1293                    ALUOp3::SMAddL => ("smaddl", OperandSize::Size64),
1294                };
1295                let rd = pretty_print_ireg(rd.to_reg(), da_size);
1296                let rn = pretty_print_ireg(rn, size);
1297                let rm = pretty_print_ireg(rm, size);
1298                let ra = pretty_print_ireg(ra, da_size);
1299
1300                format!("{op} {rd}, {rn}, {rm}, {ra}")
1301            }
1302            &Inst::AluRRImm12 {
1303                alu_op,
1304                size,
1305                rd,
1306                rn,
1307                ref imm12,
1308            } => {
1309                let op = op_name(alu_op);
1310                let rd = pretty_print_ireg(rd.to_reg(), size);
1311                let rn = pretty_print_ireg(rn, size);
1312
1313                if imm12.bits == 0 && alu_op == ALUOp::Add && size.is64() {
1314                    // special-case MOV (used for moving into SP).
1315                    format!("mov {rd}, {rn}")
1316                } else {
1317                    let imm12 = imm12.pretty_print(0);
1318                    format!("{op} {rd}, {rn}, {imm12}")
1319                }
1320            }
1321            &Inst::AluRRImmLogic {
1322                alu_op,
1323                size,
1324                rd,
1325                rn,
1326                ref imml,
1327            } => {
1328                let op = op_name(alu_op);
1329                let rd = pretty_print_ireg(rd.to_reg(), size);
1330                let rn = pretty_print_ireg(rn, size);
1331                let imml = imml.pretty_print(0);
1332                format!("{op} {rd}, {rn}, {imml}")
1333            }
1334            &Inst::AluRRImmShift {
1335                alu_op,
1336                size,
1337                rd,
1338                rn,
1339                ref immshift,
1340            } => {
1341                let op = op_name(alu_op);
1342                let rd = pretty_print_ireg(rd.to_reg(), size);
1343                let rn = pretty_print_ireg(rn, size);
1344                let immshift = immshift.pretty_print(0);
1345                format!("{op} {rd}, {rn}, {immshift}")
1346            }
1347            &Inst::AluRRRShift {
1348                alu_op,
1349                size,
1350                rd,
1351                rn,
1352                rm,
1353                ref shiftop,
1354            } => {
1355                let op = op_name(alu_op);
1356                let rd = pretty_print_ireg(rd.to_reg(), size);
1357                let rn = pretty_print_ireg(rn, size);
1358                let rm = pretty_print_ireg(rm, size);
1359                let shiftop = shiftop.pretty_print(0);
1360                format!("{op} {rd}, {rn}, {rm}, {shiftop}")
1361            }
1362            &Inst::AluRRRExtend {
1363                alu_op,
1364                size,
1365                rd,
1366                rn,
1367                rm,
1368                ref extendop,
1369            } => {
1370                let op = op_name(alu_op);
1371                let rd = pretty_print_ireg(rd.to_reg(), size);
1372                let rn = pretty_print_ireg(rn, size);
1373                let rm = pretty_print_ireg(rm, size);
1374                let extendop = extendop.pretty_print(0);
1375                format!("{op} {rd}, {rn}, {rm}, {extendop}")
1376            }
1377            &Inst::BitRR { op, size, rd, rn } => {
1378                let op = op.op_str();
1379                let rd = pretty_print_ireg(rd.to_reg(), size);
1380                let rn = pretty_print_ireg(rn, size);
1381                format!("{op} {rd}, {rn}")
1382            }
1383            &Inst::ULoad8 { rd, ref mem, .. }
1384            | &Inst::SLoad8 { rd, ref mem, .. }
1385            | &Inst::ULoad16 { rd, ref mem, .. }
1386            | &Inst::SLoad16 { rd, ref mem, .. }
1387            | &Inst::ULoad32 { rd, ref mem, .. }
1388            | &Inst::SLoad32 { rd, ref mem, .. }
1389            | &Inst::ULoad64 { rd, ref mem, .. } => {
1390                let is_unscaled = match &mem {
1391                    &AMode::Unscaled { .. } => true,
1392                    _ => false,
1393                };
1394                let (op, size) = match (self, is_unscaled) {
1395                    (&Inst::ULoad8 { .. }, false) => ("ldrb", OperandSize::Size32),
1396                    (&Inst::ULoad8 { .. }, true) => ("ldurb", OperandSize::Size32),
1397                    (&Inst::SLoad8 { .. }, false) => ("ldrsb", OperandSize::Size64),
1398                    (&Inst::SLoad8 { .. }, true) => ("ldursb", OperandSize::Size64),
1399                    (&Inst::ULoad16 { .. }, false) => ("ldrh", OperandSize::Size32),
1400                    (&Inst::ULoad16 { .. }, true) => ("ldurh", OperandSize::Size32),
1401                    (&Inst::SLoad16 { .. }, false) => ("ldrsh", OperandSize::Size64),
1402                    (&Inst::SLoad16 { .. }, true) => ("ldursh", OperandSize::Size64),
1403                    (&Inst::ULoad32 { .. }, false) => ("ldr", OperandSize::Size32),
1404                    (&Inst::ULoad32 { .. }, true) => ("ldur", OperandSize::Size32),
1405                    (&Inst::SLoad32 { .. }, false) => ("ldrsw", OperandSize::Size64),
1406                    (&Inst::SLoad32 { .. }, true) => ("ldursw", OperandSize::Size64),
1407                    (&Inst::ULoad64 { .. }, false) => ("ldr", OperandSize::Size64),
1408                    (&Inst::ULoad64 { .. }, true) => ("ldur", OperandSize::Size64),
1409                    _ => unreachable!(),
1410                };
1411
1412                let rd = pretty_print_ireg(rd.to_reg(), size);
1413                let mem = mem.clone();
1414                let access_ty = self.mem_type().unwrap();
1415                let (mem_str, mem) = mem_finalize_for_show(&mem, access_ty, state);
1416
1417                format!("{mem_str}{op} {rd}, {mem}")
1418            }
1419            &Inst::Store8 { rd, ref mem, .. }
1420            | &Inst::Store16 { rd, ref mem, .. }
1421            | &Inst::Store32 { rd, ref mem, .. }
1422            | &Inst::Store64 { rd, ref mem, .. } => {
1423                let is_unscaled = match &mem {
1424                    &AMode::Unscaled { .. } => true,
1425                    _ => false,
1426                };
1427                let (op, size) = match (self, is_unscaled) {
1428                    (&Inst::Store8 { .. }, false) => ("strb", OperandSize::Size32),
1429                    (&Inst::Store8 { .. }, true) => ("sturb", OperandSize::Size32),
1430                    (&Inst::Store16 { .. }, false) => ("strh", OperandSize::Size32),
1431                    (&Inst::Store16 { .. }, true) => ("sturh", OperandSize::Size32),
1432                    (&Inst::Store32 { .. }, false) => ("str", OperandSize::Size32),
1433                    (&Inst::Store32 { .. }, true) => ("stur", OperandSize::Size32),
1434                    (&Inst::Store64 { .. }, false) => ("str", OperandSize::Size64),
1435                    (&Inst::Store64 { .. }, true) => ("stur", OperandSize::Size64),
1436                    _ => unreachable!(),
1437                };
1438
1439                let rd = pretty_print_ireg(rd, size);
1440                let mem = mem.clone();
1441                let access_ty = self.mem_type().unwrap();
1442                let (mem_str, mem) = mem_finalize_for_show(&mem, access_ty, state);
1443
1444                format!("{mem_str}{op} {rd}, {mem}")
1445            }
1446            &Inst::StoreP64 {
1447                rt, rt2, ref mem, ..
1448            } => {
1449                let rt = pretty_print_ireg(rt, OperandSize::Size64);
1450                let rt2 = pretty_print_ireg(rt2, OperandSize::Size64);
1451                let mem = mem.clone();
1452                let mem = mem.pretty_print_default();
1453                format!("stp {rt}, {rt2}, {mem}")
1454            }
1455            &Inst::LoadP64 {
1456                rt, rt2, ref mem, ..
1457            } => {
1458                let rt = pretty_print_ireg(rt.to_reg(), OperandSize::Size64);
1459                let rt2 = pretty_print_ireg(rt2.to_reg(), OperandSize::Size64);
1460                let mem = mem.clone();
1461                let mem = mem.pretty_print_default();
1462                format!("ldp {rt}, {rt2}, {mem}")
1463            }
1464            &Inst::Mov { size, rd, rm } => {
1465                let rd = pretty_print_ireg(rd.to_reg(), size);
1466                let rm = pretty_print_ireg(rm, size);
1467                format!("mov {rd}, {rm}")
1468            }
1469            &Inst::MovFromPReg { rd, rm } => {
1470                let rd = pretty_print_ireg(rd.to_reg(), OperandSize::Size64);
1471                let rm = show_ireg_sized(rm.into(), OperandSize::Size64);
1472                format!("mov {rd}, {rm}")
1473            }
1474            &Inst::MovToPReg { rd, rm } => {
1475                let rd = show_ireg_sized(rd.into(), OperandSize::Size64);
1476                let rm = pretty_print_ireg(rm, OperandSize::Size64);
1477                format!("mov {rd}, {rm}")
1478            }
1479            &Inst::MovWide {
1480                op,
1481                rd,
1482                ref imm,
1483                size,
1484            } => {
1485                let op_str = match op {
1486                    MoveWideOp::MovZ => "movz",
1487                    MoveWideOp::MovN => "movn",
1488                };
1489                let rd = pretty_print_ireg(rd.to_reg(), size);
1490                let imm = imm.pretty_print(0);
1491                format!("{op_str} {rd}, {imm}")
1492            }
1493            &Inst::MovK {
1494                rd,
1495                rn,
1496                ref imm,
1497                size,
1498            } => {
1499                let rn = pretty_print_ireg(rn, size);
1500                let rd = pretty_print_ireg(rd.to_reg(), size);
1501                let imm = imm.pretty_print(0);
1502                format!("movk {rd}, {rn}, {imm}")
1503            }
1504            &Inst::CSel { rd, rn, rm, cond } => {
1505                let rd = pretty_print_ireg(rd.to_reg(), OperandSize::Size64);
1506                let rn = pretty_print_ireg(rn, OperandSize::Size64);
1507                let rm = pretty_print_ireg(rm, OperandSize::Size64);
1508                let cond = cond.pretty_print(0);
1509                format!("csel {rd}, {rn}, {rm}, {cond}")
1510            }
1511            &Inst::CSNeg { rd, rn, rm, cond } => {
1512                let rd = pretty_print_ireg(rd.to_reg(), OperandSize::Size64);
1513                let rn = pretty_print_ireg(rn, OperandSize::Size64);
1514                let rm = pretty_print_ireg(rm, OperandSize::Size64);
1515                let cond = cond.pretty_print(0);
1516                format!("csneg {rd}, {rn}, {rm}, {cond}")
1517            }
1518            &Inst::CSet { rd, cond } => {
1519                let rd = pretty_print_ireg(rd.to_reg(), OperandSize::Size64);
1520                let cond = cond.pretty_print(0);
1521                format!("cset {rd}, {cond}")
1522            }
1523            &Inst::CSetm { rd, cond } => {
1524                let rd = pretty_print_ireg(rd.to_reg(), OperandSize::Size64);
1525                let cond = cond.pretty_print(0);
1526                format!("csetm {rd}, {cond}")
1527            }
1528            &Inst::CCmp {
1529                size,
1530                rn,
1531                rm,
1532                nzcv,
1533                cond,
1534            } => {
1535                let rn = pretty_print_ireg(rn, size);
1536                let rm = pretty_print_ireg(rm, size);
1537                let nzcv = nzcv.pretty_print(0);
1538                let cond = cond.pretty_print(0);
1539                format!("ccmp {rn}, {rm}, {nzcv}, {cond}")
1540            }
1541            &Inst::CCmpImm {
1542                size,
1543                rn,
1544                imm,
1545                nzcv,
1546                cond,
1547            } => {
1548                let rn = pretty_print_ireg(rn, size);
1549                let imm = imm.pretty_print(0);
1550                let nzcv = nzcv.pretty_print(0);
1551                let cond = cond.pretty_print(0);
1552                format!("ccmp {rn}, {imm}, {nzcv}, {cond}")
1553            }
1554            &Inst::AtomicRMW {
1555                rs, rt, rn, ty, op, ..
1556            } => {
1557                let op = match op {
1558                    AtomicRMWOp::Add => "ldaddal",
1559                    AtomicRMWOp::Clr => "ldclral",
1560                    AtomicRMWOp::Eor => "ldeoral",
1561                    AtomicRMWOp::Set => "ldsetal",
1562                    AtomicRMWOp::Smax => "ldsmaxal",
1563                    AtomicRMWOp::Umax => "ldumaxal",
1564                    AtomicRMWOp::Smin => "ldsminal",
1565                    AtomicRMWOp::Umin => "lduminal",
1566                    AtomicRMWOp::Swp => "swpal",
1567                };
1568
1569                let size = OperandSize::from_ty(ty);
1570                let rs = pretty_print_ireg(rs, size);
1571                let rt = pretty_print_ireg(rt.to_reg(), size);
1572                let rn = pretty_print_ireg(rn, OperandSize::Size64);
1573
1574                let ty_suffix = match ty {
1575                    I8 => "b",
1576                    I16 => "h",
1577                    _ => "",
1578                };
1579                format!("{op}{ty_suffix} {rs}, {rt}, [{rn}]")
1580            }
1581            &Inst::AtomicRMWLoop {
1582                ty,
1583                op,
1584                addr,
1585                operand,
1586                oldval,
1587                scratch1,
1588                scratch2,
1589                ..
1590            } => {
1591                let op = match op {
1592                    AtomicRMWLoopOp::Add => "add",
1593                    AtomicRMWLoopOp::Sub => "sub",
1594                    AtomicRMWLoopOp::Eor => "eor",
1595                    AtomicRMWLoopOp::Orr => "orr",
1596                    AtomicRMWLoopOp::And => "and",
1597                    AtomicRMWLoopOp::Nand => "nand",
1598                    AtomicRMWLoopOp::Smin => "smin",
1599                    AtomicRMWLoopOp::Smax => "smax",
1600                    AtomicRMWLoopOp::Umin => "umin",
1601                    AtomicRMWLoopOp::Umax => "umax",
1602                    AtomicRMWLoopOp::Xchg => "xchg",
1603                };
1604                let addr = pretty_print_ireg(addr, OperandSize::Size64);
1605                let operand = pretty_print_ireg(operand, OperandSize::Size64);
1606                let oldval = pretty_print_ireg(oldval.to_reg(), OperandSize::Size64);
1607                let scratch1 = pretty_print_ireg(scratch1.to_reg(), OperandSize::Size64);
1608                let scratch2 = pretty_print_ireg(scratch2.to_reg(), OperandSize::Size64);
1609                format!(
1610                    "atomic_rmw_loop_{}_{} addr={} operand={} oldval={} scratch1={} scratch2={}",
1611                    op,
1612                    ty.bits(),
1613                    addr,
1614                    operand,
1615                    oldval,
1616                    scratch1,
1617                    scratch2,
1618                )
1619            }
1620            &Inst::AtomicCAS {
1621                rd, rs, rt, rn, ty, ..
1622            } => {
1623                let op = match ty {
1624                    I8 => "casalb",
1625                    I16 => "casalh",
1626                    I32 | I64 => "casal",
1627                    _ => panic!("Unsupported type: {ty}"),
1628                };
1629                let size = OperandSize::from_ty(ty);
1630                let rd = pretty_print_ireg(rd.to_reg(), size);
1631                let rs = pretty_print_ireg(rs, size);
1632                let rt = pretty_print_ireg(rt, size);
1633                let rn = pretty_print_ireg(rn, OperandSize::Size64);
1634
1635                format!("{op} {rd}, {rs}, {rt}, [{rn}]")
1636            }
1637            &Inst::AtomicCASLoop {
1638                ty,
1639                addr,
1640                expected,
1641                replacement,
1642                oldval,
1643                scratch,
1644                ..
1645            } => {
1646                let addr = pretty_print_ireg(addr, OperandSize::Size64);
1647                let expected = pretty_print_ireg(expected, OperandSize::Size64);
1648                let replacement = pretty_print_ireg(replacement, OperandSize::Size64);
1649                let oldval = pretty_print_ireg(oldval.to_reg(), OperandSize::Size64);
1650                let scratch = pretty_print_ireg(scratch.to_reg(), OperandSize::Size64);
1651                format!(
1652                    "atomic_cas_loop_{} addr={}, expect={}, replacement={}, oldval={}, scratch={}",
1653                    ty.bits(),
1654                    addr,
1655                    expected,
1656                    replacement,
1657                    oldval,
1658                    scratch,
1659                )
1660            }
1661            &Inst::LoadAcquire {
1662                access_ty, rt, rn, ..
1663            } => {
1664                let (op, ty) = match access_ty {
1665                    I8 => ("ldarb", I32),
1666                    I16 => ("ldarh", I32),
1667                    I32 => ("ldar", I32),
1668                    I64 => ("ldar", I64),
1669                    _ => panic!("Unsupported type: {access_ty}"),
1670                };
1671                let size = OperandSize::from_ty(ty);
1672                let rn = pretty_print_ireg(rn, OperandSize::Size64);
1673                let rt = pretty_print_ireg(rt.to_reg(), size);
1674                format!("{op} {rt}, [{rn}]")
1675            }
1676            &Inst::StoreRelease {
1677                access_ty, rt, rn, ..
1678            } => {
1679                let (op, ty) = match access_ty {
1680                    I8 => ("stlrb", I32),
1681                    I16 => ("stlrh", I32),
1682                    I32 => ("stlr", I32),
1683                    I64 => ("stlr", I64),
1684                    _ => panic!("Unsupported type: {access_ty}"),
1685                };
1686                let size = OperandSize::from_ty(ty);
1687                let rn = pretty_print_ireg(rn, OperandSize::Size64);
1688                let rt = pretty_print_ireg(rt, size);
1689                format!("{op} {rt}, [{rn}]")
1690            }
1691            &Inst::Fence {} => {
1692                format!("dmb ish")
1693            }
1694            &Inst::Csdb {} => {
1695                format!("csdb")
1696            }
1697            &Inst::FpuMove32 { rd, rn } => {
1698                let rd = pretty_print_vreg_scalar(rd.to_reg(), ScalarSize::Size32);
1699                let rn = pretty_print_vreg_scalar(rn, ScalarSize::Size32);
1700                format!("fmov {rd}, {rn}")
1701            }
1702            &Inst::FpuMove64 { rd, rn } => {
1703                let rd = pretty_print_vreg_scalar(rd.to_reg(), ScalarSize::Size64);
1704                let rn = pretty_print_vreg_scalar(rn, ScalarSize::Size64);
1705                format!("fmov {rd}, {rn}")
1706            }
1707            &Inst::FpuMove128 { rd, rn } => {
1708                let rd = pretty_print_reg(rd.to_reg());
1709                let rn = pretty_print_reg(rn);
1710                format!("mov {rd}.16b, {rn}.16b")
1711            }
1712            &Inst::FpuMoveFromVec { rd, rn, idx, size } => {
1713                let rd = pretty_print_vreg_scalar(rd.to_reg(), size.lane_size());
1714                let rn = pretty_print_vreg_element(rn, idx as usize, size.lane_size());
1715                format!("mov {rd}, {rn}")
1716            }
1717            &Inst::FpuExtend { rd, rn, size } => {
1718                let rd = pretty_print_vreg_scalar(rd.to_reg(), size);
1719                let rn = pretty_print_vreg_scalar(rn, size);
1720                format!("fmov {rd}, {rn}")
1721            }
1722            &Inst::FpuRR {
1723                fpu_op,
1724                size,
1725                rd,
1726                rn,
1727            } => {
1728                let op = match fpu_op {
1729                    FPUOp1::Abs => "fabs",
1730                    FPUOp1::Neg => "fneg",
1731                    FPUOp1::Sqrt => "fsqrt",
1732                    FPUOp1::Cvt32To64 | FPUOp1::Cvt64To32 => "fcvt",
1733                };
1734                let dst_size = match fpu_op {
1735                    FPUOp1::Cvt32To64 => ScalarSize::Size64,
1736                    FPUOp1::Cvt64To32 => ScalarSize::Size32,
1737                    _ => size,
1738                };
1739                let rd = pretty_print_vreg_scalar(rd.to_reg(), dst_size);
1740                let rn = pretty_print_vreg_scalar(rn, size);
1741                format!("{op} {rd}, {rn}")
1742            }
1743            &Inst::FpuRRR {
1744                fpu_op,
1745                size,
1746                rd,
1747                rn,
1748                rm,
1749            } => {
1750                let op = match fpu_op {
1751                    FPUOp2::Add => "fadd",
1752                    FPUOp2::Sub => "fsub",
1753                    FPUOp2::Mul => "fmul",
1754                    FPUOp2::Div => "fdiv",
1755                    FPUOp2::Max => "fmax",
1756                    FPUOp2::Min => "fmin",
1757                };
1758                let rd = pretty_print_vreg_scalar(rd.to_reg(), size);
1759                let rn = pretty_print_vreg_scalar(rn, size);
1760                let rm = pretty_print_vreg_scalar(rm, size);
1761                format!("{op} {rd}, {rn}, {rm}")
1762            }
1763            &Inst::FpuRRI { fpu_op, rd, rn } => {
1764                let (op, imm, vector) = match fpu_op {
1765                    FPUOpRI::UShr32(imm) => ("ushr", imm.pretty_print(0), true),
1766                    FPUOpRI::UShr64(imm) => ("ushr", imm.pretty_print(0), false),
1767                };
1768
1769                let (rd, rn) = if vector {
1770                    (
1771                        pretty_print_vreg_vector(rd.to_reg(), VectorSize::Size32x2),
1772                        pretty_print_vreg_vector(rn, VectorSize::Size32x2),
1773                    )
1774                } else {
1775                    (
1776                        pretty_print_vreg_scalar(rd.to_reg(), ScalarSize::Size64),
1777                        pretty_print_vreg_scalar(rn, ScalarSize::Size64),
1778                    )
1779                };
1780                format!("{op} {rd}, {rn}, {imm}")
1781            }
1782            &Inst::FpuRRIMod { fpu_op, rd, ri, rn } => {
1783                let (op, imm, vector) = match fpu_op {
1784                    FPUOpRIMod::Sli32(imm) => ("sli", imm.pretty_print(0), true),
1785                    FPUOpRIMod::Sli64(imm) => ("sli", imm.pretty_print(0), false),
1786                };
1787
1788                let (rd, ri, rn) = if vector {
1789                    (
1790                        pretty_print_vreg_vector(rd.to_reg(), VectorSize::Size32x2),
1791                        pretty_print_vreg_vector(ri, VectorSize::Size32x2),
1792                        pretty_print_vreg_vector(rn, VectorSize::Size32x2),
1793                    )
1794                } else {
1795                    (
1796                        pretty_print_vreg_scalar(rd.to_reg(), ScalarSize::Size64),
1797                        pretty_print_vreg_scalar(ri, ScalarSize::Size64),
1798                        pretty_print_vreg_scalar(rn, ScalarSize::Size64),
1799                    )
1800                };
1801                format!("{op} {rd}, {ri}, {rn}, {imm}")
1802            }
1803            &Inst::FpuRRRR {
1804                fpu_op,
1805                size,
1806                rd,
1807                rn,
1808                rm,
1809                ra,
1810            } => {
1811                let op = match fpu_op {
1812                    FPUOp3::MAdd => "fmadd",
1813                    FPUOp3::MSub => "fmsub",
1814                    FPUOp3::NMAdd => "fnmadd",
1815                    FPUOp3::NMSub => "fnmsub",
1816                };
1817                let rd = pretty_print_vreg_scalar(rd.to_reg(), size);
1818                let rn = pretty_print_vreg_scalar(rn, size);
1819                let rm = pretty_print_vreg_scalar(rm, size);
1820                let ra = pretty_print_vreg_scalar(ra, size);
1821                format!("{op} {rd}, {rn}, {rm}, {ra}")
1822            }
1823            &Inst::FpuCmp { size, rn, rm } => {
1824                let rn = pretty_print_vreg_scalar(rn, size);
1825                let rm = pretty_print_vreg_scalar(rm, size);
1826                format!("fcmp {rn}, {rm}")
1827            }
1828            &Inst::FpuLoad16 { rd, ref mem, .. } => {
1829                let rd = pretty_print_vreg_scalar(rd.to_reg(), ScalarSize::Size16);
1830                let mem = mem.clone();
1831                let access_ty = self.mem_type().unwrap();
1832                let (mem_str, mem) = mem_finalize_for_show(&mem, access_ty, state);
1833                format!("{mem_str}ldr {rd}, {mem}")
1834            }
1835            &Inst::FpuLoad32 { rd, ref mem, .. } => {
1836                let rd = pretty_print_vreg_scalar(rd.to_reg(), ScalarSize::Size32);
1837                let mem = mem.clone();
1838                let access_ty = self.mem_type().unwrap();
1839                let (mem_str, mem) = mem_finalize_for_show(&mem, access_ty, state);
1840                format!("{mem_str}ldr {rd}, {mem}")
1841            }
1842            &Inst::FpuLoad64 { rd, ref mem, .. } => {
1843                let rd = pretty_print_vreg_scalar(rd.to_reg(), ScalarSize::Size64);
1844                let mem = mem.clone();
1845                let access_ty = self.mem_type().unwrap();
1846                let (mem_str, mem) = mem_finalize_for_show(&mem, access_ty, state);
1847                format!("{mem_str}ldr {rd}, {mem}")
1848            }
1849            &Inst::FpuLoad128 { rd, ref mem, .. } => {
1850                let rd = pretty_print_reg(rd.to_reg());
1851                let rd = "q".to_string() + &rd[1..];
1852                let mem = mem.clone();
1853                let access_ty = self.mem_type().unwrap();
1854                let (mem_str, mem) = mem_finalize_for_show(&mem, access_ty, state);
1855                format!("{mem_str}ldr {rd}, {mem}")
1856            }
1857            &Inst::FpuStore16 { rd, ref mem, .. } => {
1858                let rd = pretty_print_vreg_scalar(rd, ScalarSize::Size16);
1859                let mem = mem.clone();
1860                let access_ty = self.mem_type().unwrap();
1861                let (mem_str, mem) = mem_finalize_for_show(&mem, access_ty, state);
1862                format!("{mem_str}str {rd}, {mem}")
1863            }
1864            &Inst::FpuStore32 { rd, ref mem, .. } => {
1865                let rd = pretty_print_vreg_scalar(rd, ScalarSize::Size32);
1866                let mem = mem.clone();
1867                let access_ty = self.mem_type().unwrap();
1868                let (mem_str, mem) = mem_finalize_for_show(&mem, access_ty, state);
1869                format!("{mem_str}str {rd}, {mem}")
1870            }
1871            &Inst::FpuStore64 { rd, ref mem, .. } => {
1872                let rd = pretty_print_vreg_scalar(rd, ScalarSize::Size64);
1873                let mem = mem.clone();
1874                let access_ty = self.mem_type().unwrap();
1875                let (mem_str, mem) = mem_finalize_for_show(&mem, access_ty, state);
1876                format!("{mem_str}str {rd}, {mem}")
1877            }
1878            &Inst::FpuStore128 { rd, ref mem, .. } => {
1879                let rd = pretty_print_reg(rd);
1880                let rd = "q".to_string() + &rd[1..];
1881                let mem = mem.clone();
1882                let access_ty = self.mem_type().unwrap();
1883                let (mem_str, mem) = mem_finalize_for_show(&mem, access_ty, state);
1884                format!("{mem_str}str {rd}, {mem}")
1885            }
1886            &Inst::FpuLoadP64 {
1887                rt, rt2, ref mem, ..
1888            } => {
1889                let rt = pretty_print_vreg_scalar(rt.to_reg(), ScalarSize::Size64);
1890                let rt2 = pretty_print_vreg_scalar(rt2.to_reg(), ScalarSize::Size64);
1891                let mem = mem.clone();
1892                let mem = mem.pretty_print_default();
1893
1894                format!("ldp {rt}, {rt2}, {mem}")
1895            }
1896            &Inst::FpuStoreP64 {
1897                rt, rt2, ref mem, ..
1898            } => {
1899                let rt = pretty_print_vreg_scalar(rt, ScalarSize::Size64);
1900                let rt2 = pretty_print_vreg_scalar(rt2, ScalarSize::Size64);
1901                let mem = mem.clone();
1902                let mem = mem.pretty_print_default();
1903
1904                format!("stp {rt}, {rt2}, {mem}")
1905            }
1906            &Inst::FpuLoadP128 {
1907                rt, rt2, ref mem, ..
1908            } => {
1909                let rt = pretty_print_vreg_scalar(rt.to_reg(), ScalarSize::Size128);
1910                let rt2 = pretty_print_vreg_scalar(rt2.to_reg(), ScalarSize::Size128);
1911                let mem = mem.clone();
1912                let mem = mem.pretty_print_default();
1913
1914                format!("ldp {rt}, {rt2}, {mem}")
1915            }
1916            &Inst::FpuStoreP128 {
1917                rt, rt2, ref mem, ..
1918            } => {
1919                let rt = pretty_print_vreg_scalar(rt, ScalarSize::Size128);
1920                let rt2 = pretty_print_vreg_scalar(rt2, ScalarSize::Size128);
1921                let mem = mem.clone();
1922                let mem = mem.pretty_print_default();
1923
1924                format!("stp {rt}, {rt2}, {mem}")
1925            }
1926            &Inst::FpuToInt { op, rd, rn } => {
1927                let (op, sizesrc, sizedest) = match op {
1928                    FpuToIntOp::F32ToI32 => ("fcvtzs", ScalarSize::Size32, OperandSize::Size32),
1929                    FpuToIntOp::F32ToU32 => ("fcvtzu", ScalarSize::Size32, OperandSize::Size32),
1930                    FpuToIntOp::F32ToI64 => ("fcvtzs", ScalarSize::Size32, OperandSize::Size64),
1931                    FpuToIntOp::F32ToU64 => ("fcvtzu", ScalarSize::Size32, OperandSize::Size64),
1932                    FpuToIntOp::F64ToI32 => ("fcvtzs", ScalarSize::Size64, OperandSize::Size32),
1933                    FpuToIntOp::F64ToU32 => ("fcvtzu", ScalarSize::Size64, OperandSize::Size32),
1934                    FpuToIntOp::F64ToI64 => ("fcvtzs", ScalarSize::Size64, OperandSize::Size64),
1935                    FpuToIntOp::F64ToU64 => ("fcvtzu", ScalarSize::Size64, OperandSize::Size64),
1936                };
1937                let rd = pretty_print_ireg(rd.to_reg(), sizedest);
1938                let rn = pretty_print_vreg_scalar(rn, sizesrc);
1939                format!("{op} {rd}, {rn}")
1940            }
1941            &Inst::IntToFpu { op, rd, rn } => {
1942                let (op, sizesrc, sizedest) = match op {
1943                    IntToFpuOp::I32ToF32 => ("scvtf", OperandSize::Size32, ScalarSize::Size32),
1944                    IntToFpuOp::U32ToF32 => ("ucvtf", OperandSize::Size32, ScalarSize::Size32),
1945                    IntToFpuOp::I64ToF32 => ("scvtf", OperandSize::Size64, ScalarSize::Size32),
1946                    IntToFpuOp::U64ToF32 => ("ucvtf", OperandSize::Size64, ScalarSize::Size32),
1947                    IntToFpuOp::I32ToF64 => ("scvtf", OperandSize::Size32, ScalarSize::Size64),
1948                    IntToFpuOp::U32ToF64 => ("ucvtf", OperandSize::Size32, ScalarSize::Size64),
1949                    IntToFpuOp::I64ToF64 => ("scvtf", OperandSize::Size64, ScalarSize::Size64),
1950                    IntToFpuOp::U64ToF64 => ("ucvtf", OperandSize::Size64, ScalarSize::Size64),
1951                };
1952                let rd = pretty_print_vreg_scalar(rd.to_reg(), sizedest);
1953                let rn = pretty_print_ireg(rn, sizesrc);
1954                format!("{op} {rd}, {rn}")
1955            }
1956            &Inst::FpuCSel16 { rd, rn, rm, cond } => {
1957                let rd = pretty_print_vreg_scalar(rd.to_reg(), ScalarSize::Size16);
1958                let rn = pretty_print_vreg_scalar(rn, ScalarSize::Size16);
1959                let rm = pretty_print_vreg_scalar(rm, ScalarSize::Size16);
1960                let cond = cond.pretty_print(0);
1961                format!("fcsel {rd}, {rn}, {rm}, {cond}")
1962            }
1963            &Inst::FpuCSel32 { rd, rn, rm, cond } => {
1964                let rd = pretty_print_vreg_scalar(rd.to_reg(), ScalarSize::Size32);
1965                let rn = pretty_print_vreg_scalar(rn, ScalarSize::Size32);
1966                let rm = pretty_print_vreg_scalar(rm, ScalarSize::Size32);
1967                let cond = cond.pretty_print(0);
1968                format!("fcsel {rd}, {rn}, {rm}, {cond}")
1969            }
1970            &Inst::FpuCSel64 { rd, rn, rm, cond } => {
1971                let rd = pretty_print_vreg_scalar(rd.to_reg(), ScalarSize::Size64);
1972                let rn = pretty_print_vreg_scalar(rn, ScalarSize::Size64);
1973                let rm = pretty_print_vreg_scalar(rm, ScalarSize::Size64);
1974                let cond = cond.pretty_print(0);
1975                format!("fcsel {rd}, {rn}, {rm}, {cond}")
1976            }
1977            &Inst::FpuRound { op, rd, rn } => {
1978                let (inst, size) = match op {
1979                    FpuRoundMode::Minus32 => ("frintm", ScalarSize::Size32),
1980                    FpuRoundMode::Minus64 => ("frintm", ScalarSize::Size64),
1981                    FpuRoundMode::Plus32 => ("frintp", ScalarSize::Size32),
1982                    FpuRoundMode::Plus64 => ("frintp", ScalarSize::Size64),
1983                    FpuRoundMode::Zero32 => ("frintz", ScalarSize::Size32),
1984                    FpuRoundMode::Zero64 => ("frintz", ScalarSize::Size64),
1985                    FpuRoundMode::Nearest32 => ("frintn", ScalarSize::Size32),
1986                    FpuRoundMode::Nearest64 => ("frintn", ScalarSize::Size64),
1987                };
1988                let rd = pretty_print_vreg_scalar(rd.to_reg(), size);
1989                let rn = pretty_print_vreg_scalar(rn, size);
1990                format!("{inst} {rd}, {rn}")
1991            }
1992            &Inst::MovToFpu { rd, rn, size } => {
1993                let operand_size = size.operand_size();
1994                let rd = pretty_print_vreg_scalar(rd.to_reg(), size);
1995                let rn = pretty_print_ireg(rn, operand_size);
1996                format!("fmov {rd}, {rn}")
1997            }
1998            &Inst::FpuMoveFPImm { rd, imm, size } => {
1999                let imm = imm.pretty_print(0);
2000                let rd = pretty_print_vreg_scalar(rd.to_reg(), size);
2001
2002                format!("fmov {rd}, {imm}")
2003            }
2004            &Inst::MovToVec {
2005                rd,
2006                ri,
2007                rn,
2008                idx,
2009                size,
2010            } => {
2011                let rd = pretty_print_vreg_element(rd.to_reg(), idx as usize, size.lane_size());
2012                let ri = pretty_print_vreg_element(ri, idx as usize, size.lane_size());
2013                let rn = pretty_print_ireg(rn, size.operand_size());
2014                format!("mov {rd}, {ri}, {rn}")
2015            }
2016            &Inst::MovFromVec { rd, rn, idx, size } => {
2017                let op = match size {
2018                    ScalarSize::Size8 => "umov",
2019                    ScalarSize::Size16 => "umov",
2020                    ScalarSize::Size32 => "mov",
2021                    ScalarSize::Size64 => "mov",
2022                    _ => unimplemented!(),
2023                };
2024                let rd = pretty_print_ireg(rd.to_reg(), size.operand_size());
2025                let rn = pretty_print_vreg_element(rn, idx as usize, size);
2026                format!("{op} {rd}, {rn}")
2027            }
2028            &Inst::MovFromVecSigned {
2029                rd,
2030                rn,
2031                idx,
2032                size,
2033                scalar_size,
2034            } => {
2035                let rd = pretty_print_ireg(rd.to_reg(), scalar_size);
2036                let rn = pretty_print_vreg_element(rn, idx as usize, size.lane_size());
2037                format!("smov {rd}, {rn}")
2038            }
2039            &Inst::VecDup { rd, rn, size } => {
2040                let rd = pretty_print_vreg_vector(rd.to_reg(), size);
2041                let rn = pretty_print_ireg(rn, size.operand_size());
2042                format!("dup {rd}, {rn}")
2043            }
2044            &Inst::VecDupFromFpu { rd, rn, size, lane } => {
2045                let rd = pretty_print_vreg_vector(rd.to_reg(), size);
2046                let rn = pretty_print_vreg_element(rn, lane.into(), size.lane_size());
2047                format!("dup {rd}, {rn}")
2048            }
2049            &Inst::VecDupFPImm { rd, imm, size } => {
2050                let imm = imm.pretty_print(0);
2051                let rd = pretty_print_vreg_vector(rd.to_reg(), size);
2052
2053                format!("fmov {rd}, {imm}")
2054            }
2055            &Inst::VecDupImm {
2056                rd,
2057                imm,
2058                invert,
2059                size,
2060            } => {
2061                let imm = imm.pretty_print(0);
2062                let op = if invert { "mvni" } else { "movi" };
2063                let rd = pretty_print_vreg_vector(rd.to_reg(), size);
2064
2065                format!("{op} {rd}, {imm}")
2066            }
2067            &Inst::VecExtend {
2068                t,
2069                rd,
2070                rn,
2071                high_half,
2072                lane_size,
2073            } => {
2074                let vec64 = VectorSize::from_lane_size(lane_size.narrow(), false);
2075                let vec128 = VectorSize::from_lane_size(lane_size.narrow(), true);
2076                let rd_size = VectorSize::from_lane_size(lane_size, true);
2077                let (op, rn_size) = match (t, high_half) {
2078                    (VecExtendOp::Sxtl, false) => ("sxtl", vec64),
2079                    (VecExtendOp::Sxtl, true) => ("sxtl2", vec128),
2080                    (VecExtendOp::Uxtl, false) => ("uxtl", vec64),
2081                    (VecExtendOp::Uxtl, true) => ("uxtl2", vec128),
2082                };
2083                let rd = pretty_print_vreg_vector(rd.to_reg(), rd_size);
2084                let rn = pretty_print_vreg_vector(rn, rn_size);
2085                format!("{op} {rd}, {rn}")
2086            }
2087            &Inst::VecMovElement {
2088                rd,
2089                ri,
2090                rn,
2091                dest_idx,
2092                src_idx,
2093                size,
2094            } => {
2095                let rd =
2096                    pretty_print_vreg_element(rd.to_reg(), dest_idx as usize, size.lane_size());
2097                let ri = pretty_print_vreg_element(ri, dest_idx as usize, size.lane_size());
2098                let rn = pretty_print_vreg_element(rn, src_idx as usize, size.lane_size());
2099                format!("mov {rd}, {ri}, {rn}")
2100            }
2101            &Inst::VecRRLong {
2102                op,
2103                rd,
2104                rn,
2105                high_half,
2106            } => {
2107                let (op, rd_size, size, suffix) = match (op, high_half) {
2108                    (VecRRLongOp::Fcvtl16, false) => {
2109                        ("fcvtl", VectorSize::Size32x4, VectorSize::Size16x4, "")
2110                    }
2111                    (VecRRLongOp::Fcvtl16, true) => {
2112                        ("fcvtl2", VectorSize::Size32x4, VectorSize::Size16x8, "")
2113                    }
2114                    (VecRRLongOp::Fcvtl32, false) => {
2115                        ("fcvtl", VectorSize::Size64x2, VectorSize::Size32x2, "")
2116                    }
2117                    (VecRRLongOp::Fcvtl32, true) => {
2118                        ("fcvtl2", VectorSize::Size64x2, VectorSize::Size32x4, "")
2119                    }
2120                    (VecRRLongOp::Shll8, false) => {
2121                        ("shll", VectorSize::Size16x8, VectorSize::Size8x8, ", #8")
2122                    }
2123                    (VecRRLongOp::Shll8, true) => {
2124                        ("shll2", VectorSize::Size16x8, VectorSize::Size8x16, ", #8")
2125                    }
2126                    (VecRRLongOp::Shll16, false) => {
2127                        ("shll", VectorSize::Size32x4, VectorSize::Size16x4, ", #16")
2128                    }
2129                    (VecRRLongOp::Shll16, true) => {
2130                        ("shll2", VectorSize::Size32x4, VectorSize::Size16x8, ", #16")
2131                    }
2132                    (VecRRLongOp::Shll32, false) => {
2133                        ("shll", VectorSize::Size64x2, VectorSize::Size32x2, ", #32")
2134                    }
2135                    (VecRRLongOp::Shll32, true) => {
2136                        ("shll2", VectorSize::Size64x2, VectorSize::Size32x4, ", #32")
2137                    }
2138                };
2139                let rd = pretty_print_vreg_vector(rd.to_reg(), rd_size);
2140                let rn = pretty_print_vreg_vector(rn, size);
2141
2142                format!("{op} {rd}, {rn}{suffix}")
2143            }
2144            &Inst::VecRRNarrowLow {
2145                op,
2146                rd,
2147                rn,
2148                lane_size,
2149                ..
2150            }
2151            | &Inst::VecRRNarrowHigh {
2152                op,
2153                rd,
2154                rn,
2155                lane_size,
2156                ..
2157            } => {
2158                let vec64 = VectorSize::from_lane_size(lane_size, false);
2159                let vec128 = VectorSize::from_lane_size(lane_size, true);
2160                let rn_size = VectorSize::from_lane_size(lane_size.widen(), true);
2161                let high_half = match self {
2162                    &Inst::VecRRNarrowLow { .. } => false,
2163                    &Inst::VecRRNarrowHigh { .. } => true,
2164                    _ => unreachable!(),
2165                };
2166                let (op, rd_size) = match (op, high_half) {
2167                    (VecRRNarrowOp::Xtn, false) => ("xtn", vec64),
2168                    (VecRRNarrowOp::Xtn, true) => ("xtn2", vec128),
2169                    (VecRRNarrowOp::Sqxtn, false) => ("sqxtn", vec64),
2170                    (VecRRNarrowOp::Sqxtn, true) => ("sqxtn2", vec128),
2171                    (VecRRNarrowOp::Sqxtun, false) => ("sqxtun", vec64),
2172                    (VecRRNarrowOp::Sqxtun, true) => ("sqxtun2", vec128),
2173                    (VecRRNarrowOp::Uqxtn, false) => ("uqxtn", vec64),
2174                    (VecRRNarrowOp::Uqxtn, true) => ("uqxtn2", vec128),
2175                    (VecRRNarrowOp::Fcvtn, false) => ("fcvtn", vec64),
2176                    (VecRRNarrowOp::Fcvtn, true) => ("fcvtn2", vec128),
2177                };
2178                let rn = pretty_print_vreg_vector(rn, rn_size);
2179                let rd = pretty_print_vreg_vector(rd.to_reg(), rd_size);
2180                let ri = match self {
2181                    &Inst::VecRRNarrowLow { .. } => "".to_string(),
2182                    &Inst::VecRRNarrowHigh { ri, .. } => {
2183                        format!("{}, ", pretty_print_vreg_vector(ri, rd_size))
2184                    }
2185                    _ => unreachable!(),
2186                };
2187
2188                format!("{op} {rd}, {ri}{rn}")
2189            }
2190            &Inst::VecRRPair { op, rd, rn } => {
2191                let op = match op {
2192                    VecPairOp::Addp => "addp",
2193                };
2194                let rd = pretty_print_vreg_scalar(rd.to_reg(), ScalarSize::Size64);
2195                let rn = pretty_print_vreg_vector(rn, VectorSize::Size64x2);
2196
2197                format!("{op} {rd}, {rn}")
2198            }
2199            &Inst::VecRRPairLong { op, rd, rn } => {
2200                let (op, dest, src) = match op {
2201                    VecRRPairLongOp::Saddlp8 => {
2202                        ("saddlp", VectorSize::Size16x8, VectorSize::Size8x16)
2203                    }
2204                    VecRRPairLongOp::Saddlp16 => {
2205                        ("saddlp", VectorSize::Size32x4, VectorSize::Size16x8)
2206                    }
2207                    VecRRPairLongOp::Uaddlp8 => {
2208                        ("uaddlp", VectorSize::Size16x8, VectorSize::Size8x16)
2209                    }
2210                    VecRRPairLongOp::Uaddlp16 => {
2211                        ("uaddlp", VectorSize::Size32x4, VectorSize::Size16x8)
2212                    }
2213                };
2214                let rd = pretty_print_vreg_vector(rd.to_reg(), dest);
2215                let rn = pretty_print_vreg_vector(rn, src);
2216
2217                format!("{op} {rd}, {rn}")
2218            }
2219            &Inst::VecRRR {
2220                rd,
2221                rn,
2222                rm,
2223                alu_op,
2224                size,
2225            } => {
2226                let (op, size) = match alu_op {
2227                    VecALUOp::Sqadd => ("sqadd", size),
2228                    VecALUOp::Uqadd => ("uqadd", size),
2229                    VecALUOp::Sqsub => ("sqsub", size),
2230                    VecALUOp::Uqsub => ("uqsub", size),
2231                    VecALUOp::Cmeq => ("cmeq", size),
2232                    VecALUOp::Cmge => ("cmge", size),
2233                    VecALUOp::Cmgt => ("cmgt", size),
2234                    VecALUOp::Cmhs => ("cmhs", size),
2235                    VecALUOp::Cmhi => ("cmhi", size),
2236                    VecALUOp::Fcmeq => ("fcmeq", size),
2237                    VecALUOp::Fcmgt => ("fcmgt", size),
2238                    VecALUOp::Fcmge => ("fcmge", size),
2239                    VecALUOp::Umaxp => ("umaxp", size),
2240                    VecALUOp::Add => ("add", size),
2241                    VecALUOp::Sub => ("sub", size),
2242                    VecALUOp::Mul => ("mul", size),
2243                    VecALUOp::Sshl => ("sshl", size),
2244                    VecALUOp::Ushl => ("ushl", size),
2245                    VecALUOp::Umin => ("umin", size),
2246                    VecALUOp::Smin => ("smin", size),
2247                    VecALUOp::Umax => ("umax", size),
2248                    VecALUOp::Smax => ("smax", size),
2249                    VecALUOp::Urhadd => ("urhadd", size),
2250                    VecALUOp::Fadd => ("fadd", size),
2251                    VecALUOp::Fsub => ("fsub", size),
2252                    VecALUOp::Fdiv => ("fdiv", size),
2253                    VecALUOp::Fmax => ("fmax", size),
2254                    VecALUOp::Fmin => ("fmin", size),
2255                    VecALUOp::Fmul => ("fmul", size),
2256                    VecALUOp::Addp => ("addp", size),
2257                    VecALUOp::Zip1 => ("zip1", size),
2258                    VecALUOp::Zip2 => ("zip2", size),
2259                    VecALUOp::Sqrdmulh => ("sqrdmulh", size),
2260                    VecALUOp::Uzp1 => ("uzp1", size),
2261                    VecALUOp::Uzp2 => ("uzp2", size),
2262                    VecALUOp::Trn1 => ("trn1", size),
2263                    VecALUOp::Trn2 => ("trn2", size),
2264
2265                    // Lane division does not affect bitwise operations.
2266                    // However, when printing, use 8-bit lane division to conform to ARM formatting.
2267                    VecALUOp::And => ("and", size.as_scalar8_vector()),
2268                    VecALUOp::Bic => ("bic", size.as_scalar8_vector()),
2269                    VecALUOp::Orr => ("orr", size.as_scalar8_vector()),
2270                    VecALUOp::Orn => ("orn", size.as_scalar8_vector()),
2271                    VecALUOp::Eor => ("eor", size.as_scalar8_vector()),
2272                };
2273                let rd = pretty_print_vreg_vector(rd.to_reg(), size);
2274                let rn = pretty_print_vreg_vector(rn, size);
2275                let rm = pretty_print_vreg_vector(rm, size);
2276                format!("{op} {rd}, {rn}, {rm}")
2277            }
2278            &Inst::VecRRRMod {
2279                rd,
2280                ri,
2281                rn,
2282                rm,
2283                alu_op,
2284                size,
2285            } => {
2286                let (op, size) = match alu_op {
2287                    VecALUModOp::Bsl => ("bsl", VectorSize::Size8x16),
2288                    VecALUModOp::Fmla => ("fmla", size),
2289                    VecALUModOp::Fmls => ("fmls", size),
2290                };
2291                let rd = pretty_print_vreg_vector(rd.to_reg(), size);
2292                let ri = pretty_print_vreg_vector(ri, size);
2293                let rn = pretty_print_vreg_vector(rn, size);
2294                let rm = pretty_print_vreg_vector(rm, size);
2295                format!("{op} {rd}, {ri}, {rn}, {rm}")
2296            }
2297            &Inst::VecFmlaElem {
2298                rd,
2299                ri,
2300                rn,
2301                rm,
2302                alu_op,
2303                size,
2304                idx,
2305            } => {
2306                let (op, size) = match alu_op {
2307                    VecALUModOp::Fmla => ("fmla", size),
2308                    VecALUModOp::Fmls => ("fmls", size),
2309                    _ => unreachable!(),
2310                };
2311                let rd = pretty_print_vreg_vector(rd.to_reg(), size);
2312                let ri = pretty_print_vreg_vector(ri, size);
2313                let rn = pretty_print_vreg_vector(rn, size);
2314                let rm = pretty_print_vreg_element(rm, idx.into(), size.lane_size());
2315                format!("{op} {rd}, {ri}, {rn}, {rm}")
2316            }
2317            &Inst::VecRRRLong {
2318                rd,
2319                rn,
2320                rm,
2321                alu_op,
2322                high_half,
2323            } => {
2324                let (op, dest_size, src_size) = match (alu_op, high_half) {
2325                    (VecRRRLongOp::Smull8, false) => {
2326                        ("smull", VectorSize::Size16x8, VectorSize::Size8x8)
2327                    }
2328                    (VecRRRLongOp::Smull8, true) => {
2329                        ("smull2", VectorSize::Size16x8, VectorSize::Size8x16)
2330                    }
2331                    (VecRRRLongOp::Smull16, false) => {
2332                        ("smull", VectorSize::Size32x4, VectorSize::Size16x4)
2333                    }
2334                    (VecRRRLongOp::Smull16, true) => {
2335                        ("smull2", VectorSize::Size32x4, VectorSize::Size16x8)
2336                    }
2337                    (VecRRRLongOp::Smull32, false) => {
2338                        ("smull", VectorSize::Size64x2, VectorSize::Size32x2)
2339                    }
2340                    (VecRRRLongOp::Smull32, true) => {
2341                        ("smull2", VectorSize::Size64x2, VectorSize::Size32x4)
2342                    }
2343                    (VecRRRLongOp::Umull8, false) => {
2344                        ("umull", VectorSize::Size16x8, VectorSize::Size8x8)
2345                    }
2346                    (VecRRRLongOp::Umull8, true) => {
2347                        ("umull2", VectorSize::Size16x8, VectorSize::Size8x16)
2348                    }
2349                    (VecRRRLongOp::Umull16, false) => {
2350                        ("umull", VectorSize::Size32x4, VectorSize::Size16x4)
2351                    }
2352                    (VecRRRLongOp::Umull16, true) => {
2353                        ("umull2", VectorSize::Size32x4, VectorSize::Size16x8)
2354                    }
2355                    (VecRRRLongOp::Umull32, false) => {
2356                        ("umull", VectorSize::Size64x2, VectorSize::Size32x2)
2357                    }
2358                    (VecRRRLongOp::Umull32, true) => {
2359                        ("umull2", VectorSize::Size64x2, VectorSize::Size32x4)
2360                    }
2361                };
2362                let rd = pretty_print_vreg_vector(rd.to_reg(), dest_size);
2363                let rn = pretty_print_vreg_vector(rn, src_size);
2364                let rm = pretty_print_vreg_vector(rm, src_size);
2365                format!("{op} {rd}, {rn}, {rm}")
2366            }
2367            &Inst::VecRRRLongMod {
2368                rd,
2369                ri,
2370                rn,
2371                rm,
2372                alu_op,
2373                high_half,
2374            } => {
2375                let (op, dest_size, src_size) = match (alu_op, high_half) {
2376                    (VecRRRLongModOp::Umlal8, false) => {
2377                        ("umlal", VectorSize::Size16x8, VectorSize::Size8x8)
2378                    }
2379                    (VecRRRLongModOp::Umlal8, true) => {
2380                        ("umlal2", VectorSize::Size16x8, VectorSize::Size8x16)
2381                    }
2382                    (VecRRRLongModOp::Umlal16, false) => {
2383                        ("umlal", VectorSize::Size32x4, VectorSize::Size16x4)
2384                    }
2385                    (VecRRRLongModOp::Umlal16, true) => {
2386                        ("umlal2", VectorSize::Size32x4, VectorSize::Size16x8)
2387                    }
2388                    (VecRRRLongModOp::Umlal32, false) => {
2389                        ("umlal", VectorSize::Size64x2, VectorSize::Size32x2)
2390                    }
2391                    (VecRRRLongModOp::Umlal32, true) => {
2392                        ("umlal2", VectorSize::Size64x2, VectorSize::Size32x4)
2393                    }
2394                };
2395                let rd = pretty_print_vreg_vector(rd.to_reg(), dest_size);
2396                let ri = pretty_print_vreg_vector(ri, dest_size);
2397                let rn = pretty_print_vreg_vector(rn, src_size);
2398                let rm = pretty_print_vreg_vector(rm, src_size);
2399                format!("{op} {rd}, {ri}, {rn}, {rm}")
2400            }
2401            &Inst::VecMisc { op, rd, rn, size } => {
2402                let (op, size, suffix) = match op {
2403                    VecMisc2::Neg => ("neg", size, ""),
2404                    VecMisc2::Abs => ("abs", size, ""),
2405                    VecMisc2::Fabs => ("fabs", size, ""),
2406                    VecMisc2::Fneg => ("fneg", size, ""),
2407                    VecMisc2::Fsqrt => ("fsqrt", size, ""),
2408                    VecMisc2::Rev16 => ("rev16", size, ""),
2409                    VecMisc2::Rev32 => ("rev32", size, ""),
2410                    VecMisc2::Rev64 => ("rev64", size, ""),
2411                    VecMisc2::Fcvtzs => ("fcvtzs", size, ""),
2412                    VecMisc2::Fcvtzu => ("fcvtzu", size, ""),
2413                    VecMisc2::Scvtf => ("scvtf", size, ""),
2414                    VecMisc2::Ucvtf => ("ucvtf", size, ""),
2415                    VecMisc2::Frintn => ("frintn", size, ""),
2416                    VecMisc2::Frintz => ("frintz", size, ""),
2417                    VecMisc2::Frintm => ("frintm", size, ""),
2418                    VecMisc2::Frintp => ("frintp", size, ""),
2419                    VecMisc2::Cnt => ("cnt", size, ""),
2420                    VecMisc2::Cmeq0 => ("cmeq", size, ", #0"),
2421                    VecMisc2::Cmge0 => ("cmge", size, ", #0"),
2422                    VecMisc2::Cmgt0 => ("cmgt", size, ", #0"),
2423                    VecMisc2::Cmle0 => ("cmle", size, ", #0"),
2424                    VecMisc2::Cmlt0 => ("cmlt", size, ", #0"),
2425                    VecMisc2::Fcmeq0 => ("fcmeq", size, ", #0.0"),
2426                    VecMisc2::Fcmge0 => ("fcmge", size, ", #0.0"),
2427                    VecMisc2::Fcmgt0 => ("fcmgt", size, ", #0.0"),
2428                    VecMisc2::Fcmle0 => ("fcmle", size, ", #0.0"),
2429                    VecMisc2::Fcmlt0 => ("fcmlt", size, ", #0.0"),
2430
2431                    // Lane division does not affect bitwise operations.
2432                    // However, when printing, use 8-bit lane division to conform to ARM formatting.
2433                    VecMisc2::Not => ("mvn", size.as_scalar8_vector(), ""),
2434                };
2435                let rd = pretty_print_vreg_vector(rd.to_reg(), size);
2436                let rn = pretty_print_vreg_vector(rn, size);
2437                format!("{op} {rd}, {rn}{suffix}")
2438            }
2439            &Inst::VecLanes { op, rd, rn, size } => {
2440                let op = match op {
2441                    VecLanesOp::Uminv => "uminv",
2442                    VecLanesOp::Addv => "addv",
2443                };
2444                let rd = pretty_print_vreg_scalar(rd.to_reg(), size.lane_size());
2445                let rn = pretty_print_vreg_vector(rn, size);
2446                format!("{op} {rd}, {rn}")
2447            }
2448            &Inst::VecShiftImm {
2449                op,
2450                rd,
2451                rn,
2452                size,
2453                imm,
2454            } => {
2455                let op = match op {
2456                    VecShiftImmOp::Shl => "shl",
2457                    VecShiftImmOp::Ushr => "ushr",
2458                    VecShiftImmOp::Sshr => "sshr",
2459                };
2460                let rd = pretty_print_vreg_vector(rd.to_reg(), size);
2461                let rn = pretty_print_vreg_vector(rn, size);
2462                format!("{op} {rd}, {rn}, #{imm}")
2463            }
2464            &Inst::VecShiftImmMod {
2465                op,
2466                rd,
2467                ri,
2468                rn,
2469                size,
2470                imm,
2471            } => {
2472                let op = match op {
2473                    VecShiftImmModOp::Sli => "sli",
2474                };
2475                let rd = pretty_print_vreg_vector(rd.to_reg(), size);
2476                let ri = pretty_print_vreg_vector(ri, size);
2477                let rn = pretty_print_vreg_vector(rn, size);
2478                format!("{op} {rd}, {ri}, {rn}, #{imm}")
2479            }
2480            &Inst::VecExtract { rd, rn, rm, imm4 } => {
2481                let rd = pretty_print_vreg_vector(rd.to_reg(), VectorSize::Size8x16);
2482                let rn = pretty_print_vreg_vector(rn, VectorSize::Size8x16);
2483                let rm = pretty_print_vreg_vector(rm, VectorSize::Size8x16);
2484                format!("ext {rd}, {rn}, {rm}, #{imm4}")
2485            }
2486            &Inst::VecTbl { rd, rn, rm } => {
2487                let rn = pretty_print_vreg_vector(rn, VectorSize::Size8x16);
2488                let rm = pretty_print_vreg_vector(rm, VectorSize::Size8x16);
2489                let rd = pretty_print_vreg_vector(rd.to_reg(), VectorSize::Size8x16);
2490                format!("tbl {rd}, {{ {rn} }}, {rm}")
2491            }
2492            &Inst::VecTblExt { rd, ri, rn, rm } => {
2493                let rn = pretty_print_vreg_vector(rn, VectorSize::Size8x16);
2494                let rm = pretty_print_vreg_vector(rm, VectorSize::Size8x16);
2495                let rd = pretty_print_vreg_vector(rd.to_reg(), VectorSize::Size8x16);
2496                let ri = pretty_print_vreg_vector(ri, VectorSize::Size8x16);
2497                format!("tbx {rd}, {ri}, {{ {rn} }}, {rm}")
2498            }
2499            &Inst::VecTbl2 { rd, rn, rn2, rm } => {
2500                let rn = pretty_print_vreg_vector(rn, VectorSize::Size8x16);
2501                let rn2 = pretty_print_vreg_vector(rn2, VectorSize::Size8x16);
2502                let rm = pretty_print_vreg_vector(rm, VectorSize::Size8x16);
2503                let rd = pretty_print_vreg_vector(rd.to_reg(), VectorSize::Size8x16);
2504                format!("tbl {rd}, {{ {rn}, {rn2} }}, {rm}")
2505            }
2506            &Inst::VecTbl2Ext {
2507                rd,
2508                ri,
2509                rn,
2510                rn2,
2511                rm,
2512            } => {
2513                let rn = pretty_print_vreg_vector(rn, VectorSize::Size8x16);
2514                let rn2 = pretty_print_vreg_vector(rn2, VectorSize::Size8x16);
2515                let rm = pretty_print_vreg_vector(rm, VectorSize::Size8x16);
2516                let rd = pretty_print_vreg_vector(rd.to_reg(), VectorSize::Size8x16);
2517                let ri = pretty_print_vreg_vector(ri, VectorSize::Size8x16);
2518                format!("tbx {rd}, {ri}, {{ {rn}, {rn2} }}, {rm}")
2519            }
2520            &Inst::VecLoadReplicate { rd, rn, size, .. } => {
2521                let rd = pretty_print_vreg_vector(rd.to_reg(), size);
2522                let rn = pretty_print_reg(rn);
2523
2524                format!("ld1r {{ {rd} }}, [{rn}]")
2525            }
2526            &Inst::VecCSel { rd, rn, rm, cond } => {
2527                let rd = pretty_print_vreg_vector(rd.to_reg(), VectorSize::Size8x16);
2528                let rn = pretty_print_vreg_vector(rn, VectorSize::Size8x16);
2529                let rm = pretty_print_vreg_vector(rm, VectorSize::Size8x16);
2530                let cond = cond.pretty_print(0);
2531                format!("vcsel {rd}, {rn}, {rm}, {cond} (if-then-else diamond)")
2532            }
2533            &Inst::MovToNZCV { rn } => {
2534                let rn = pretty_print_reg(rn);
2535                format!("msr nzcv, {rn}")
2536            }
2537            &Inst::MovFromNZCV { rd } => {
2538                let rd = pretty_print_reg(rd.to_reg());
2539                format!("mrs {rd}, nzcv")
2540            }
2541            &Inst::Extend {
2542                rd,
2543                rn,
2544                signed: false,
2545                from_bits: 1,
2546                ..
2547            } => {
2548                let rd = pretty_print_ireg(rd.to_reg(), OperandSize::Size32);
2549                let rn = pretty_print_ireg(rn, OperandSize::Size32);
2550                format!("and {rd}, {rn}, #1")
2551            }
2552            &Inst::Extend {
2553                rd,
2554                rn,
2555                signed: false,
2556                from_bits: 32,
2557                to_bits: 64,
2558            } => {
2559                // The case of a zero extension from 32 to 64 bits, is implemented
2560                // with a "mov" to a 32-bit (W-reg) dest, because this zeroes
2561                // the top 32 bits.
2562                let rd = pretty_print_ireg(rd.to_reg(), OperandSize::Size32);
2563                let rn = pretty_print_ireg(rn, OperandSize::Size32);
2564                format!("mov {rd}, {rn}")
2565            }
2566            &Inst::Extend {
2567                rd,
2568                rn,
2569                signed,
2570                from_bits,
2571                to_bits,
2572            } => {
2573                assert!(from_bits <= to_bits);
2574                let op = match (signed, from_bits) {
2575                    (false, 8) => "uxtb",
2576                    (true, 8) => "sxtb",
2577                    (false, 16) => "uxth",
2578                    (true, 16) => "sxth",
2579                    (true, 32) => "sxtw",
2580                    (true, _) => "sbfx",
2581                    (false, _) => "ubfx",
2582                };
2583                if op == "sbfx" || op == "ubfx" {
2584                    let dest_size = OperandSize::from_bits(to_bits);
2585                    let rd = pretty_print_ireg(rd.to_reg(), dest_size);
2586                    let rn = pretty_print_ireg(rn, dest_size);
2587                    format!("{op} {rd}, {rn}, #0, #{from_bits}")
2588                } else {
2589                    let dest_size = if signed {
2590                        OperandSize::from_bits(to_bits)
2591                    } else {
2592                        OperandSize::Size32
2593                    };
2594                    let rd = pretty_print_ireg(rd.to_reg(), dest_size);
2595                    let rn = pretty_print_ireg(rn, OperandSize::from_bits(from_bits));
2596                    format!("{op} {rd}, {rn}")
2597                }
2598            }
2599            &Inst::Call { ref info } => {
2600                let try_call = info
2601                    .try_call_info
2602                    .as_ref()
2603                    .map(|tci| pretty_print_try_call(tci))
2604                    .unwrap_or_default();
2605                format!("bl 0{try_call}")
2606            }
2607            &Inst::CallInd { ref info } => {
2608                let rn = pretty_print_reg(info.dest);
2609                let try_call = info
2610                    .try_call_info
2611                    .as_ref()
2612                    .map(|tci| pretty_print_try_call(tci))
2613                    .unwrap_or_default();
2614                format!("blr {rn}{try_call}")
2615            }
2616            &Inst::ReturnCall { ref info } => {
2617                let mut s = format!(
2618                    "return_call {:?} new_stack_arg_size:{}",
2619                    info.dest, info.new_stack_arg_size
2620                );
2621                for ret in &info.uses {
2622                    let preg = pretty_print_reg(ret.preg);
2623                    let vreg = pretty_print_reg(ret.vreg);
2624                    write!(&mut s, " {vreg}={preg}").unwrap();
2625                }
2626                s
2627            }
2628            &Inst::ReturnCallInd { ref info } => {
2629                let callee = pretty_print_reg(info.dest);
2630                let mut s = format!(
2631                    "return_call_ind {callee} new_stack_arg_size:{}",
2632                    info.new_stack_arg_size
2633                );
2634                for ret in &info.uses {
2635                    let preg = pretty_print_reg(ret.preg);
2636                    let vreg = pretty_print_reg(ret.vreg);
2637                    write!(&mut s, " {vreg}={preg}").unwrap();
2638                }
2639                s
2640            }
2641            &Inst::Args { ref args } => {
2642                let mut s = "args".to_string();
2643                for arg in args {
2644                    let preg = pretty_print_reg(arg.preg);
2645                    let def = pretty_print_reg(arg.vreg.to_reg());
2646                    write!(&mut s, " {def}={preg}").unwrap();
2647                }
2648                s
2649            }
2650            &Inst::Rets { ref rets } => {
2651                let mut s = "rets".to_string();
2652                for ret in rets {
2653                    let preg = pretty_print_reg(ret.preg);
2654                    let vreg = pretty_print_reg(ret.vreg);
2655                    write!(&mut s, " {vreg}={preg}").unwrap();
2656                }
2657                s
2658            }
2659            &Inst::Ret {} => "ret".to_string(),
2660            &Inst::AuthenticatedRet { key, is_hint } => {
2661                let key = match key {
2662                    APIKey::AZ => "az",
2663                    APIKey::BZ => "bz",
2664                    APIKey::ASP => "asp",
2665                    APIKey::BSP => "bsp",
2666                };
2667                match is_hint {
2668                    false => format!("reta{key}"),
2669                    true => format!("auti{key} ; ret"),
2670                }
2671            }
2672            &Inst::Jump { ref dest } => {
2673                let dest = dest.pretty_print(0);
2674                format!("b {dest}")
2675            }
2676            &Inst::CondBr {
2677                ref taken,
2678                ref not_taken,
2679                ref kind,
2680            } => {
2681                let taken = taken.pretty_print(0);
2682                let not_taken = not_taken.pretty_print(0);
2683                match kind {
2684                    &CondBrKind::Zero(reg, size) => {
2685                        let reg = pretty_print_reg_sized(reg, size);
2686                        format!("cbz {reg}, {taken} ; b {not_taken}")
2687                    }
2688                    &CondBrKind::NotZero(reg, size) => {
2689                        let reg = pretty_print_reg_sized(reg, size);
2690                        format!("cbnz {reg}, {taken} ; b {not_taken}")
2691                    }
2692                    &CondBrKind::Cond(c) => {
2693                        let c = c.pretty_print(0);
2694                        format!("b.{c} {taken} ; b {not_taken}")
2695                    }
2696                }
2697            }
2698            &Inst::TestBitAndBranch {
2699                kind,
2700                ref taken,
2701                ref not_taken,
2702                rn,
2703                bit,
2704            } => {
2705                let cond = match kind {
2706                    TestBitAndBranchKind::Z => "z",
2707                    TestBitAndBranchKind::NZ => "nz",
2708                };
2709                let taken = taken.pretty_print(0);
2710                let not_taken = not_taken.pretty_print(0);
2711                let rn = pretty_print_reg(rn);
2712                format!("tb{cond} {rn}, #{bit}, {taken} ; b {not_taken}")
2713            }
2714            &Inst::IndirectBr { rn, .. } => {
2715                let rn = pretty_print_reg(rn);
2716                format!("br {rn}")
2717            }
2718            &Inst::Brk => "brk #0xf000".to_string(),
2719            &Inst::Udf { .. } => "udf #0xc11f".to_string(),
2720            &Inst::TrapIf {
2721                ref kind,
2722                trap_code,
2723            } => match kind {
2724                &CondBrKind::Zero(reg, size) => {
2725                    let reg = pretty_print_reg_sized(reg, size);
2726                    format!("cbz {reg}, #trap={trap_code}")
2727                }
2728                &CondBrKind::NotZero(reg, size) => {
2729                    let reg = pretty_print_reg_sized(reg, size);
2730                    format!("cbnz {reg}, #trap={trap_code}")
2731                }
2732                &CondBrKind::Cond(c) => {
2733                    let c = c.pretty_print(0);
2734                    format!("b.{c} #trap={trap_code}")
2735                }
2736            },
2737            &Inst::Adr { rd, off } => {
2738                let rd = pretty_print_reg(rd.to_reg());
2739                format!("adr {rd}, pc+{off}")
2740            }
2741            &Inst::Adrp { rd, off } => {
2742                let rd = pretty_print_reg(rd.to_reg());
2743                // This instruction addresses 4KiB pages, so multiply it by the page size.
2744                let byte_offset = off * 4096;
2745                format!("adrp {rd}, pc+{byte_offset}")
2746            }
2747            &Inst::Word4 { data } => format!("data.i32 {data}"),
2748            &Inst::Word8 { data } => format!("data.i64 {data}"),
2749            &Inst::JTSequence {
2750                default,
2751                ref targets,
2752                ridx,
2753                rtmp1,
2754                rtmp2,
2755                ..
2756            } => {
2757                let ridx = pretty_print_reg(ridx);
2758                let rtmp1 = pretty_print_reg(rtmp1.to_reg());
2759                let rtmp2 = pretty_print_reg(rtmp2.to_reg());
2760                let default_target = BranchTarget::Label(default).pretty_print(0);
2761                format!(
2762                    concat!(
2763                        "b.hs {} ; ",
2764                        "csel {}, xzr, {}, hs ; ",
2765                        "csdb ; ",
2766                        "adr {}, pc+16 ; ",
2767                        "ldrsw {}, [{}, {}, uxtw #2] ; ",
2768                        "add {}, {}, {} ; ",
2769                        "br {} ; ",
2770                        "jt_entries {:?}"
2771                    ),
2772                    default_target,
2773                    rtmp2,
2774                    ridx,
2775                    rtmp1,
2776                    rtmp2,
2777                    rtmp1,
2778                    rtmp2,
2779                    rtmp1,
2780                    rtmp1,
2781                    rtmp2,
2782                    rtmp1,
2783                    targets
2784                )
2785            }
2786            &Inst::LoadExtNameGot { rd, ref name } => {
2787                let rd = pretty_print_reg(rd.to_reg());
2788                format!("load_ext_name_got {rd}, {name:?}")
2789            }
2790            &Inst::LoadExtNameNear {
2791                rd,
2792                ref name,
2793                offset,
2794            } => {
2795                let rd = pretty_print_reg(rd.to_reg());
2796                format!("load_ext_name_near {rd}, {name:?}+{offset}")
2797            }
2798            &Inst::LoadExtNameFar {
2799                rd,
2800                ref name,
2801                offset,
2802            } => {
2803                let rd = pretty_print_reg(rd.to_reg());
2804                format!("load_ext_name_far {rd}, {name:?}+{offset}")
2805            }
2806            &Inst::LoadAddr { rd, ref mem } => {
2807                // TODO: we really should find a better way to avoid duplication of
2808                // this logic between `emit()` and `show_rru()` -- a separate 1-to-N
2809                // expansion stage (i.e., legalization, but without the slow edit-in-place
2810                // of the existing legalization framework).
2811                let mem = mem.clone();
2812                let (mem_insts, mem) = mem_finalize(None, &mem, I8, state);
2813                let mut ret = String::new();
2814                for inst in mem_insts.into_iter() {
2815                    ret.push_str(&inst.print_with_state(&mut EmitState::default()));
2816                }
2817                let (reg, index_reg, offset) = match mem {
2818                    AMode::RegExtended { rn, rm, extendop } => (rn, Some((rm, extendop)), 0),
2819                    AMode::Unscaled { rn, simm9 } => (rn, None, simm9.value()),
2820                    AMode::UnsignedOffset { rn, uimm12 } => (rn, None, uimm12.value() as i32),
2821                    _ => panic!("Unsupported case for LoadAddr: {mem:?}"),
2822                };
2823                let abs_offset = if offset < 0 {
2824                    -offset as u64
2825                } else {
2826                    offset as u64
2827                };
2828                let alu_op = if offset < 0 { ALUOp::Sub } else { ALUOp::Add };
2829
2830                if let Some((idx, extendop)) = index_reg {
2831                    let add = Inst::AluRRRExtend {
2832                        alu_op: ALUOp::Add,
2833                        size: OperandSize::Size64,
2834                        rd,
2835                        rn: reg,
2836                        rm: idx,
2837                        extendop,
2838                    };
2839
2840                    ret.push_str(&add.print_with_state(&mut EmitState::default()));
2841                } else if offset == 0 {
2842                    let mov = Inst::gen_move(rd, reg, I64);
2843                    ret.push_str(&mov.print_with_state(&mut EmitState::default()));
2844                } else if let Some(imm12) = Imm12::maybe_from_u64(abs_offset) {
2845                    let add = Inst::AluRRImm12 {
2846                        alu_op,
2847                        size: OperandSize::Size64,
2848                        rd,
2849                        rn: reg,
2850                        imm12,
2851                    };
2852                    ret.push_str(&add.print_with_state(&mut EmitState::default()));
2853                } else {
2854                    let tmp = writable_spilltmp_reg();
2855                    for inst in Inst::load_constant(tmp, abs_offset).into_iter() {
2856                        ret.push_str(&inst.print_with_state(&mut EmitState::default()));
2857                    }
2858                    let add = Inst::AluRRR {
2859                        alu_op,
2860                        size: OperandSize::Size64,
2861                        rd,
2862                        rn: reg,
2863                        rm: tmp.to_reg(),
2864                    };
2865                    ret.push_str(&add.print_with_state(&mut EmitState::default()));
2866                }
2867                ret
2868            }
2869            &Inst::Paci { key } => {
2870                let key = match key {
2871                    APIKey::AZ => "az",
2872                    APIKey::BZ => "bz",
2873                    APIKey::ASP => "asp",
2874                    APIKey::BSP => "bsp",
2875                };
2876
2877                "paci".to_string() + key
2878            }
2879            &Inst::Xpaclri => "xpaclri".to_string(),
2880            &Inst::Bti { targets } => {
2881                let targets = match targets {
2882                    BranchTargetType::None => "",
2883                    BranchTargetType::C => " c",
2884                    BranchTargetType::J => " j",
2885                    BranchTargetType::JC => " jc",
2886                };
2887
2888                "bti".to_string() + targets
2889            }
2890            &Inst::EmitIsland { needed_space } => format!("emit_island {needed_space}"),
2891
2892            &Inst::ElfTlsGetAddr {
2893                ref symbol,
2894                rd,
2895                tmp,
2896            } => {
2897                let rd = pretty_print_reg(rd.to_reg());
2898                let tmp = pretty_print_reg(tmp.to_reg());
2899                format!("elf_tls_get_addr {}, {}, {}", rd, tmp, symbol.display(None))
2900            }
2901            &Inst::MachOTlsGetAddr { ref symbol, rd } => {
2902                let rd = pretty_print_reg(rd.to_reg());
2903                format!("macho_tls_get_addr {}, {}", rd, symbol.display(None))
2904            }
2905            &Inst::Unwind { ref inst } => {
2906                format!("unwind {inst:?}")
2907            }
2908            &Inst::DummyUse { reg } => {
2909                let reg = pretty_print_reg(reg);
2910                format!("dummy_use {reg}")
2911            }
2912            &Inst::LabelAddress { dst, label } => {
2913                let dst = pretty_print_reg(dst.to_reg());
2914                format!("label_address {dst}, {label:?}")
2915            }
2916            &Inst::SequencePoint {} => {
2917                format!("sequence_point")
2918            }
2919            &Inst::StackProbeLoop { start, end, step } => {
2920                let start = pretty_print_reg(start.to_reg());
2921                let end = pretty_print_reg(end);
2922                let step = step.pretty_print(0);
2923                format!("stack_probe_loop {start}, {end}, {step}")
2924            }
2925        }
2926    }
2927}
2928
2929//=============================================================================
2930// Label fixups and jump veneers.
2931
2932/// Different forms of label references for different instruction formats.
2933#[derive(Clone, Copy, Debug, PartialEq, Eq)]
2934pub enum LabelUse {
2935    /// 14-bit branch offset (conditional branches). PC-rel, offset is imm <<
2936    /// 2. Immediate is 14 signed bits, in bits 18:5. Used by tbz and tbnz.
2937    Branch14,
2938    /// 19-bit branch offset (conditional branches). PC-rel, offset is imm << 2. Immediate is 19
2939    /// signed bits, in bits 23:5. Used by cbz, cbnz, b.cond.
2940    Branch19,
2941    /// 26-bit branch offset (unconditional branches). PC-rel, offset is imm << 2. Immediate is 26
2942    /// signed bits, in bits 25:0. Used by b, bl.
2943    Branch26,
2944    /// 19-bit offset for LDR (load literal). PC-rel, offset is imm << 2. Immediate is 19 signed bits,
2945    /// in bits 23:5.
2946    Ldr19,
2947    /// 21-bit offset for ADR (get address of label). PC-rel, offset is not shifted. Immediate is
2948    /// 21 signed bits, with high 19 bits in bits 23:5 and low 2 bits in bits 30:29.
2949    Adr21,
2950    /// 32-bit PC relative constant offset (from address of constant itself),
2951    /// signed. Used in jump tables.
2952    PCRel32,
2953}
2954
2955impl MachInstLabelUse for LabelUse {
2956    /// Alignment for veneer code. Every AArch64 instruction must be 4-byte-aligned.
2957    const ALIGN: CodeOffset = 4;
2958
2959    /// Maximum PC-relative range (positive), inclusive.
2960    fn max_pos_range(self) -> CodeOffset {
2961        match self {
2962            // N-bit immediate, left-shifted by 2, for (N+2) bits of total
2963            // range. Signed, so +2^(N+1) from zero. Likewise for two other
2964            // shifted cases below.
2965            LabelUse::Branch14 => (1 << 15) - 1,
2966            LabelUse::Branch19 => (1 << 20) - 1,
2967            LabelUse::Branch26 => (1 << 27) - 1,
2968            LabelUse::Ldr19 => (1 << 20) - 1,
2969            // Adr does not shift its immediate, so the 21-bit immediate gives 21 bits of total
2970            // range.
2971            LabelUse::Adr21 => (1 << 20) - 1,
2972            LabelUse::PCRel32 => 0x7fffffff,
2973        }
2974    }
2975
2976    /// Maximum PC-relative range (negative).
2977    fn max_neg_range(self) -> CodeOffset {
2978        // All forms are twos-complement signed offsets, so negative limit is one more than
2979        // positive limit.
2980        self.max_pos_range() + 1
2981    }
2982
2983    /// Size of window into code needed to do the patch.
2984    fn patch_size(self) -> CodeOffset {
2985        // Patch is on one instruction only for all of these label reference types.
2986        4
2987    }
2988
2989    /// Perform the patch.
2990    fn patch(self, buffer: &mut [u8], use_offset: CodeOffset, label_offset: CodeOffset) {
2991        let pc_rel = (label_offset as i64) - (use_offset as i64);
2992        debug_assert!(pc_rel <= self.max_pos_range() as i64);
2993        debug_assert!(pc_rel >= -(self.max_neg_range() as i64));
2994        let pc_rel = pc_rel as u32;
2995        let insn_word = u32::from_le_bytes([buffer[0], buffer[1], buffer[2], buffer[3]]);
2996        let mask = match self {
2997            LabelUse::Branch14 => 0x0007ffe0, // bits 18..5 inclusive
2998            LabelUse::Branch19 => 0x00ffffe0, // bits 23..5 inclusive
2999            LabelUse::Branch26 => 0x03ffffff, // bits 25..0 inclusive
3000            LabelUse::Ldr19 => 0x00ffffe0,    // bits 23..5 inclusive
3001            LabelUse::Adr21 => 0x60ffffe0,    // bits 30..29, 25..5 inclusive
3002            LabelUse::PCRel32 => 0xffffffff,
3003        };
3004        let pc_rel_shifted = match self {
3005            LabelUse::Adr21 | LabelUse::PCRel32 => pc_rel,
3006            _ => {
3007                debug_assert!(pc_rel & 3 == 0);
3008                pc_rel >> 2
3009            }
3010        };
3011        let pc_rel_inserted = match self {
3012            LabelUse::Branch14 => (pc_rel_shifted & 0x3fff) << 5,
3013            LabelUse::Branch19 | LabelUse::Ldr19 => (pc_rel_shifted & 0x7ffff) << 5,
3014            LabelUse::Branch26 => pc_rel_shifted & 0x3ffffff,
3015            // Note: the *low* two bits of offset are put in the
3016            // *high* bits (30, 29).
3017            LabelUse::Adr21 => (pc_rel_shifted & 0x1ffffc) << 3 | (pc_rel_shifted & 3) << 29,
3018            LabelUse::PCRel32 => pc_rel_shifted,
3019        };
3020        let is_add = match self {
3021            LabelUse::PCRel32 => true,
3022            _ => false,
3023        };
3024        let insn_word = if is_add {
3025            insn_word.wrapping_add(pc_rel_inserted)
3026        } else {
3027            (insn_word & !mask) | pc_rel_inserted
3028        };
3029        buffer[0..4].clone_from_slice(&u32::to_le_bytes(insn_word));
3030    }
3031
3032    /// Is a veneer supported for this label reference type?
3033    fn supports_veneer(self) -> bool {
3034        match self {
3035            LabelUse::Branch14 | LabelUse::Branch19 => true, // veneer is a Branch26
3036            LabelUse::Branch26 => true,                      // veneer is a PCRel32
3037            _ => false,
3038        }
3039    }
3040
3041    /// How large is the veneer, if supported?
3042    fn veneer_size(self) -> CodeOffset {
3043        match self {
3044            LabelUse::Branch14 | LabelUse::Branch19 => 4,
3045            LabelUse::Branch26 => 20,
3046            _ => unreachable!(),
3047        }
3048    }
3049
3050    fn worst_case_veneer_size() -> CodeOffset {
3051        20
3052    }
3053
3054    /// Generate a veneer into the buffer, given that this veneer is at `veneer_offset`, and return
3055    /// an offset and label-use for the veneer's use of the original label.
3056    fn generate_veneer(
3057        self,
3058        buffer: &mut [u8],
3059        veneer_offset: CodeOffset,
3060    ) -> (CodeOffset, LabelUse) {
3061        match self {
3062            LabelUse::Branch14 | LabelUse::Branch19 => {
3063                // veneer is a Branch26 (unconditional branch). Just encode directly here -- don't
3064                // bother with constructing an Inst.
3065                let insn_word = 0b000101 << 26;
3066                buffer[0..4].clone_from_slice(&u32::to_le_bytes(insn_word));
3067                (veneer_offset, LabelUse::Branch26)
3068            }
3069
3070            // This is promoting a 26-bit call/jump to a 32-bit call/jump to
3071            // get a further range. This jump translates to a jump to a
3072            // relative location based on the address of the constant loaded
3073            // from here.
3074            //
3075            // If this path is taken from a call instruction then caller-saved
3076            // registers are available (minus arguments), so x16/x17 are
3077            // available. Otherwise for intra-function jumps we also reserve
3078            // x16/x17 as spill-style registers. In both cases these are
3079            // available for us to use.
3080            LabelUse::Branch26 => {
3081                let tmp1 = regs::spilltmp_reg();
3082                let tmp1_w = regs::writable_spilltmp_reg();
3083                let tmp2 = regs::tmp2_reg();
3084                let tmp2_w = regs::writable_tmp2_reg();
3085                // ldrsw x16, 16
3086                let ldr = emit::enc_ldst_imm19(0b1001_1000, 16 / 4, tmp1);
3087                // adr x17, 12
3088                let adr = emit::enc_adr(12, tmp2_w);
3089                // add x16, x16, x17
3090                let add = emit::enc_arith_rrr(0b10001011_000, 0, tmp1_w, tmp1, tmp2);
3091                // br x16
3092                let br = emit::enc_br(tmp1);
3093                buffer[0..4].clone_from_slice(&u32::to_le_bytes(ldr));
3094                buffer[4..8].clone_from_slice(&u32::to_le_bytes(adr));
3095                buffer[8..12].clone_from_slice(&u32::to_le_bytes(add));
3096                buffer[12..16].clone_from_slice(&u32::to_le_bytes(br));
3097                // the 4-byte signed immediate we'll load is after these
3098                // instructions, 16-bytes in.
3099                (veneer_offset + 16, LabelUse::PCRel32)
3100            }
3101
3102            _ => panic!("Unsupported label-reference type for veneer generation!"),
3103        }
3104    }
3105
3106    fn from_reloc(reloc: Reloc, addend: Addend) -> Option<LabelUse> {
3107        match (reloc, addend) {
3108            (Reloc::Arm64Call, 0) => Some(LabelUse::Branch26),
3109            _ => None,
3110        }
3111    }
3112}
3113
3114#[cfg(test)]
3115mod tests {
3116    use super::*;
3117
3118    #[test]
3119    fn inst_size_test() {
3120        // This test will help with unintentionally growing the size
3121        // of the Inst enum.
3122        assert_eq!(32, core::mem::size_of::<Inst>());
3123    }
3124}