cranelift_codegen/isa/aarch64/inst/
mod.rs

1//! This module defines aarch64-specific machine instruction types.
2
3use crate::binemit::{Addend, CodeOffset, Reloc};
4use crate::ir::types::{F16, F32, F64, F128, I8, I8X16, I16, I32, I64, I128};
5use crate::ir::{MemFlags, Type, types};
6use crate::isa::{CallConv, FunctionAlignment};
7use crate::machinst::*;
8use crate::{CodegenError, CodegenResult, settings};
9
10use crate::machinst::{PrettyPrint, Reg, RegClass, Writable};
11
12use alloc::vec::Vec;
13use core::slice;
14use smallvec::{SmallVec, smallvec};
15use std::fmt::Write;
16use std::string::{String, ToString};
17
18pub(crate) mod regs;
19pub(crate) use self::regs::*;
20pub mod imms;
21pub use self::imms::*;
22pub mod args;
23pub use self::args::*;
24pub mod emit;
25pub(crate) use self::emit::*;
26use crate::isa::aarch64::abi::AArch64MachineDeps;
27
28pub(crate) mod unwind;
29
30#[cfg(test)]
31mod emit_tests;
32
33//=============================================================================
34// Instructions (top level): definition
35
36pub use crate::isa::aarch64::lower::isle::generated_code::{
37    ALUOp, ALUOp3, AMode, APIKey, AtomicRMWLoopOp, AtomicRMWOp, BitOp, BranchTargetType, FPUOp1,
38    FPUOp2, FPUOp3, FpuRoundMode, FpuToIntOp, IntToFpuOp, MInst as Inst, MoveWideOp, VecALUModOp,
39    VecALUOp, VecExtendOp, VecLanesOp, VecMisc2, VecPairOp, VecRRLongOp, VecRRNarrowOp,
40    VecRRPairLongOp, VecRRRLongModOp, VecRRRLongOp, VecShiftImmModOp, VecShiftImmOp,
41};
42
43/// A floating-point unit (FPU) operation with two args, a register and an immediate.
44#[derive(Copy, Clone, Debug)]
45pub enum FPUOpRI {
46    /// Unsigned right shift. Rd = Rn << #imm
47    UShr32(FPURightShiftImm),
48    /// Unsigned right shift. Rd = Rn << #imm
49    UShr64(FPURightShiftImm),
50}
51
52/// A floating-point unit (FPU) operation with two args, a register and
53/// an immediate that modifies its dest (so takes that input value as a
54/// separate virtual register).
55#[derive(Copy, Clone, Debug)]
56pub enum FPUOpRIMod {
57    /// Shift left and insert. Rd |= Rn << #imm
58    Sli32(FPULeftShiftImm),
59    /// Shift left and insert. Rd |= Rn << #imm
60    Sli64(FPULeftShiftImm),
61}
62
63impl BitOp {
64    /// Get the assembly mnemonic for this opcode.
65    pub fn op_str(&self) -> &'static str {
66        match self {
67            BitOp::RBit => "rbit",
68            BitOp::Clz => "clz",
69            BitOp::Cls => "cls",
70            BitOp::Rev16 => "rev16",
71            BitOp::Rev32 => "rev32",
72            BitOp::Rev64 => "rev64",
73        }
74    }
75}
76
77/// Additional information for `return_call[_ind]` instructions, left out of
78/// line to lower the size of the `Inst` enum.
79#[derive(Clone, Debug)]
80pub struct ReturnCallInfo<T> {
81    /// Where this call is going to
82    pub dest: T,
83    /// Arguments to the call instruction.
84    pub uses: CallArgList,
85    /// The size of the new stack frame's stack arguments. This is necessary
86    /// for copying the frame over our current frame. It must already be
87    /// allocated on the stack.
88    pub new_stack_arg_size: u32,
89    /// API key to use to restore the return address, if any.
90    pub key: Option<APIKey>,
91}
92
93fn count_zero_half_words(mut value: u64, num_half_words: u8) -> usize {
94    let mut count = 0;
95    for _ in 0..num_half_words {
96        if value & 0xffff == 0 {
97            count += 1;
98        }
99        value >>= 16;
100    }
101
102    count
103}
104
105impl Inst {
106    /// Create an instruction that loads a constant, using one of several options (MOVZ, MOVN,
107    /// logical immediate, or constant pool).
108    pub fn load_constant(rd: Writable<Reg>, value: u64) -> SmallVec<[Inst; 4]> {
109        // NB: this is duplicated in `lower/isle.rs` and `inst.isle` right now,
110        // if modifications are made here before this is deleted after moving to
111        // ISLE then those locations should be updated as well.
112
113        if let Some(imm) = MoveWideConst::maybe_from_u64(value) {
114            // 16-bit immediate (shifted by 0, 16, 32 or 48 bits) in MOVZ
115            smallvec![Inst::MovWide {
116                op: MoveWideOp::MovZ,
117                rd,
118                imm,
119                size: OperandSize::Size64
120            }]
121        } else if let Some(imm) = MoveWideConst::maybe_from_u64(!value) {
122            // 16-bit immediate (shifted by 0, 16, 32 or 48 bits) in MOVN
123            smallvec![Inst::MovWide {
124                op: MoveWideOp::MovN,
125                rd,
126                imm,
127                size: OperandSize::Size64
128            }]
129        } else if let Some(imml) = ImmLogic::maybe_from_u64(value, I64) {
130            // Weird logical-instruction immediate in ORI using zero register
131            smallvec![Inst::AluRRImmLogic {
132                alu_op: ALUOp::Orr,
133                size: OperandSize::Size64,
134                rd,
135                rn: zero_reg(),
136                imml,
137            }]
138        } else {
139            let mut insts = smallvec![];
140
141            // If the top 32 bits are zero, use 32-bit `mov` operations.
142            let (num_half_words, size, negated) = if value >> 32 == 0 {
143                (2, OperandSize::Size32, (!value << 32) >> 32)
144            } else {
145                (4, OperandSize::Size64, !value)
146            };
147
148            // If the number of 0xffff half words is greater than the number of 0x0000 half words
149            // it is more efficient to use `movn` for the first instruction.
150            let first_is_inverted = count_zero_half_words(negated, num_half_words)
151                > count_zero_half_words(value, num_half_words);
152
153            // Either 0xffff or 0x0000 half words can be skipped, depending on the first
154            // instruction used.
155            let ignored_halfword = if first_is_inverted { 0xffff } else { 0 };
156
157            let halfwords: SmallVec<[_; 4]> = (0..num_half_words)
158                .filter_map(|i| {
159                    let imm16 = (value >> (16 * i)) & 0xffff;
160                    if imm16 == ignored_halfword {
161                        None
162                    } else {
163                        Some((i, imm16))
164                    }
165                })
166                .collect();
167
168            let mut prev_result = None;
169            for (i, imm16) in halfwords {
170                let shift = i * 16;
171
172                if let Some(rn) = prev_result {
173                    let imm = MoveWideConst::maybe_with_shift(imm16 as u16, shift).unwrap();
174                    insts.push(Inst::MovK { rd, rn, imm, size });
175                } else {
176                    if first_is_inverted {
177                        let imm =
178                            MoveWideConst::maybe_with_shift(((!imm16) & 0xffff) as u16, shift)
179                                .unwrap();
180                        insts.push(Inst::MovWide {
181                            op: MoveWideOp::MovN,
182                            rd,
183                            imm,
184                            size,
185                        });
186                    } else {
187                        let imm = MoveWideConst::maybe_with_shift(imm16 as u16, shift).unwrap();
188                        insts.push(Inst::MovWide {
189                            op: MoveWideOp::MovZ,
190                            rd,
191                            imm,
192                            size,
193                        });
194                    }
195                }
196
197                prev_result = Some(rd.to_reg());
198            }
199
200            assert!(prev_result.is_some());
201
202            insts
203        }
204    }
205
206    /// Generic constructor for a load (zero-extending where appropriate).
207    pub fn gen_load(into_reg: Writable<Reg>, mem: AMode, ty: Type, flags: MemFlags) -> Inst {
208        match ty {
209            I8 => Inst::ULoad8 {
210                rd: into_reg,
211                mem,
212                flags,
213            },
214            I16 => Inst::ULoad16 {
215                rd: into_reg,
216                mem,
217                flags,
218            },
219            I32 => Inst::ULoad32 {
220                rd: into_reg,
221                mem,
222                flags,
223            },
224            I64 => Inst::ULoad64 {
225                rd: into_reg,
226                mem,
227                flags,
228            },
229            _ => {
230                if ty.is_vector() || ty.is_float() {
231                    let bits = ty_bits(ty);
232                    let rd = into_reg;
233
234                    match bits {
235                        128 => Inst::FpuLoad128 { rd, mem, flags },
236                        64 => Inst::FpuLoad64 { rd, mem, flags },
237                        32 => Inst::FpuLoad32 { rd, mem, flags },
238                        16 => Inst::FpuLoad16 { rd, mem, flags },
239                        _ => unimplemented!("gen_load({})", ty),
240                    }
241                } else {
242                    unimplemented!("gen_load({})", ty);
243                }
244            }
245        }
246    }
247
248    /// Generic constructor for a store.
249    pub fn gen_store(mem: AMode, from_reg: Reg, ty: Type, flags: MemFlags) -> Inst {
250        match ty {
251            I8 => Inst::Store8 {
252                rd: from_reg,
253                mem,
254                flags,
255            },
256            I16 => Inst::Store16 {
257                rd: from_reg,
258                mem,
259                flags,
260            },
261            I32 => Inst::Store32 {
262                rd: from_reg,
263                mem,
264                flags,
265            },
266            I64 => Inst::Store64 {
267                rd: from_reg,
268                mem,
269                flags,
270            },
271            _ => {
272                if ty.is_vector() || ty.is_float() {
273                    let bits = ty_bits(ty);
274                    let rd = from_reg;
275
276                    match bits {
277                        128 => Inst::FpuStore128 { rd, mem, flags },
278                        64 => Inst::FpuStore64 { rd, mem, flags },
279                        32 => Inst::FpuStore32 { rd, mem, flags },
280                        16 => Inst::FpuStore16 { rd, mem, flags },
281                        _ => unimplemented!("gen_store({})", ty),
282                    }
283                } else {
284                    unimplemented!("gen_store({})", ty);
285                }
286            }
287        }
288    }
289
290    /// What type does this load or store instruction access in memory? When
291    /// uimm12 encoding is used, the size of this type is the amount that
292    /// immediate offsets are scaled by.
293    pub fn mem_type(&self) -> Option<Type> {
294        match self {
295            Inst::ULoad8 { .. } => Some(I8),
296            Inst::SLoad8 { .. } => Some(I8),
297            Inst::ULoad16 { .. } => Some(I16),
298            Inst::SLoad16 { .. } => Some(I16),
299            Inst::ULoad32 { .. } => Some(I32),
300            Inst::SLoad32 { .. } => Some(I32),
301            Inst::ULoad64 { .. } => Some(I64),
302            Inst::FpuLoad16 { .. } => Some(F16),
303            Inst::FpuLoad32 { .. } => Some(F32),
304            Inst::FpuLoad64 { .. } => Some(F64),
305            Inst::FpuLoad128 { .. } => Some(I8X16),
306            Inst::Store8 { .. } => Some(I8),
307            Inst::Store16 { .. } => Some(I16),
308            Inst::Store32 { .. } => Some(I32),
309            Inst::Store64 { .. } => Some(I64),
310            Inst::FpuStore16 { .. } => Some(F16),
311            Inst::FpuStore32 { .. } => Some(F32),
312            Inst::FpuStore64 { .. } => Some(F64),
313            Inst::FpuStore128 { .. } => Some(I8X16),
314            _ => None,
315        }
316    }
317}
318
319//=============================================================================
320// Instructions: get_regs
321
322fn memarg_operands(memarg: &mut AMode, collector: &mut impl OperandVisitor) {
323    match memarg {
324        AMode::Unscaled { rn, .. } | AMode::UnsignedOffset { rn, .. } => {
325            collector.reg_use(rn);
326        }
327        AMode::RegReg { rn, rm, .. }
328        | AMode::RegScaled { rn, rm, .. }
329        | AMode::RegScaledExtended { rn, rm, .. }
330        | AMode::RegExtended { rn, rm, .. } => {
331            collector.reg_use(rn);
332            collector.reg_use(rm);
333        }
334        AMode::Label { .. } => {}
335        AMode::SPPreIndexed { .. } | AMode::SPPostIndexed { .. } => {}
336        AMode::FPOffset { .. } | AMode::IncomingArg { .. } => {}
337        AMode::SPOffset { .. } | AMode::SlotOffset { .. } => {}
338        AMode::RegOffset { rn, .. } => {
339            collector.reg_use(rn);
340        }
341        AMode::Const { .. } => {}
342    }
343}
344
345fn pairmemarg_operands(pairmemarg: &mut PairAMode, collector: &mut impl OperandVisitor) {
346    match pairmemarg {
347        PairAMode::SignedOffset { reg, .. } => {
348            collector.reg_use(reg);
349        }
350        PairAMode::SPPreIndexed { .. } | PairAMode::SPPostIndexed { .. } => {}
351    }
352}
353
354fn aarch64_get_operands(inst: &mut Inst, collector: &mut impl OperandVisitor) {
355    match inst {
356        Inst::AluRRR { rd, rn, rm, .. } => {
357            collector.reg_def(rd);
358            collector.reg_use(rn);
359            collector.reg_use(rm);
360        }
361        Inst::AluRRRR { rd, rn, rm, ra, .. } => {
362            collector.reg_def(rd);
363            collector.reg_use(rn);
364            collector.reg_use(rm);
365            collector.reg_use(ra);
366        }
367        Inst::AluRRImm12 { rd, rn, .. } => {
368            collector.reg_def(rd);
369            collector.reg_use(rn);
370        }
371        Inst::AluRRImmLogic { rd, rn, .. } => {
372            collector.reg_def(rd);
373            collector.reg_use(rn);
374        }
375        Inst::AluRRImmShift { rd, rn, .. } => {
376            collector.reg_def(rd);
377            collector.reg_use(rn);
378        }
379        Inst::AluRRRShift { rd, rn, rm, .. } => {
380            collector.reg_def(rd);
381            collector.reg_use(rn);
382            collector.reg_use(rm);
383        }
384        Inst::AluRRRExtend { rd, rn, rm, .. } => {
385            collector.reg_def(rd);
386            collector.reg_use(rn);
387            collector.reg_use(rm);
388        }
389        Inst::BitRR { rd, rn, .. } => {
390            collector.reg_def(rd);
391            collector.reg_use(rn);
392        }
393        Inst::ULoad8 { rd, mem, .. }
394        | Inst::SLoad8 { rd, mem, .. }
395        | Inst::ULoad16 { rd, mem, .. }
396        | Inst::SLoad16 { rd, mem, .. }
397        | Inst::ULoad32 { rd, mem, .. }
398        | Inst::SLoad32 { rd, mem, .. }
399        | Inst::ULoad64 { rd, mem, .. } => {
400            collector.reg_def(rd);
401            memarg_operands(mem, collector);
402        }
403        Inst::Store8 { rd, mem, .. }
404        | Inst::Store16 { rd, mem, .. }
405        | Inst::Store32 { rd, mem, .. }
406        | Inst::Store64 { rd, mem, .. } => {
407            collector.reg_use(rd);
408            memarg_operands(mem, collector);
409        }
410        Inst::StoreP64 { rt, rt2, mem, .. } => {
411            collector.reg_use(rt);
412            collector.reg_use(rt2);
413            pairmemarg_operands(mem, collector);
414        }
415        Inst::LoadP64 { rt, rt2, mem, .. } => {
416            collector.reg_def(rt);
417            collector.reg_def(rt2);
418            pairmemarg_operands(mem, collector);
419        }
420        Inst::Mov { rd, rm, .. } => {
421            collector.reg_def(rd);
422            collector.reg_use(rm);
423        }
424        Inst::MovFromPReg { rd, rm } => {
425            debug_assert!(rd.to_reg().is_virtual());
426            collector.reg_def(rd);
427            collector.reg_fixed_nonallocatable(*rm);
428        }
429        Inst::MovToPReg { rd, rm } => {
430            debug_assert!(rm.is_virtual());
431            collector.reg_fixed_nonallocatable(*rd);
432            collector.reg_use(rm);
433        }
434        Inst::MovK { rd, rn, .. } => {
435            collector.reg_use(rn);
436            collector.reg_reuse_def(rd, 0); // `rn` == `rd`.
437        }
438        Inst::MovWide { rd, .. } => {
439            collector.reg_def(rd);
440        }
441        Inst::CSel { rd, rn, rm, .. } => {
442            collector.reg_def(rd);
443            collector.reg_use(rn);
444            collector.reg_use(rm);
445        }
446        Inst::CSNeg { rd, rn, rm, .. } => {
447            collector.reg_def(rd);
448            collector.reg_use(rn);
449            collector.reg_use(rm);
450        }
451        Inst::CSet { rd, .. } | Inst::CSetm { rd, .. } => {
452            collector.reg_def(rd);
453        }
454        Inst::CCmp { rn, rm, .. } => {
455            collector.reg_use(rn);
456            collector.reg_use(rm);
457        }
458        Inst::CCmpImm { rn, .. } => {
459            collector.reg_use(rn);
460        }
461        Inst::AtomicRMWLoop {
462            op,
463            addr,
464            operand,
465            oldval,
466            scratch1,
467            scratch2,
468            ..
469        } => {
470            collector.reg_fixed_use(addr, xreg(25));
471            collector.reg_fixed_use(operand, xreg(26));
472            collector.reg_fixed_def(oldval, xreg(27));
473            collector.reg_fixed_def(scratch1, xreg(24));
474            if *op != AtomicRMWLoopOp::Xchg {
475                collector.reg_fixed_def(scratch2, xreg(28));
476            }
477        }
478        Inst::AtomicRMW { rs, rt, rn, .. } => {
479            collector.reg_use(rs);
480            collector.reg_def(rt);
481            collector.reg_use(rn);
482        }
483        Inst::AtomicCAS { rd, rs, rt, rn, .. } => {
484            collector.reg_reuse_def(rd, 1); // reuse `rs`.
485            collector.reg_use(rs);
486            collector.reg_use(rt);
487            collector.reg_use(rn);
488        }
489        Inst::AtomicCASLoop {
490            addr,
491            expected,
492            replacement,
493            oldval,
494            scratch,
495            ..
496        } => {
497            collector.reg_fixed_use(addr, xreg(25));
498            collector.reg_fixed_use(expected, xreg(26));
499            collector.reg_fixed_use(replacement, xreg(28));
500            collector.reg_fixed_def(oldval, xreg(27));
501            collector.reg_fixed_def(scratch, xreg(24));
502        }
503        Inst::LoadAcquire { rt, rn, .. } => {
504            collector.reg_use(rn);
505            collector.reg_def(rt);
506        }
507        Inst::StoreRelease { rt, rn, .. } => {
508            collector.reg_use(rn);
509            collector.reg_use(rt);
510        }
511        Inst::Fence {} | Inst::Csdb {} => {}
512        Inst::FpuMove32 { rd, rn } => {
513            collector.reg_def(rd);
514            collector.reg_use(rn);
515        }
516        Inst::FpuMove64 { rd, rn } => {
517            collector.reg_def(rd);
518            collector.reg_use(rn);
519        }
520        Inst::FpuMove128 { rd, rn } => {
521            collector.reg_def(rd);
522            collector.reg_use(rn);
523        }
524        Inst::FpuMoveFromVec { rd, rn, .. } => {
525            collector.reg_def(rd);
526            collector.reg_use(rn);
527        }
528        Inst::FpuExtend { rd, rn, .. } => {
529            collector.reg_def(rd);
530            collector.reg_use(rn);
531        }
532        Inst::FpuRR { rd, rn, .. } => {
533            collector.reg_def(rd);
534            collector.reg_use(rn);
535        }
536        Inst::FpuRRR { rd, rn, rm, .. } => {
537            collector.reg_def(rd);
538            collector.reg_use(rn);
539            collector.reg_use(rm);
540        }
541        Inst::FpuRRI { rd, rn, .. } => {
542            collector.reg_def(rd);
543            collector.reg_use(rn);
544        }
545        Inst::FpuRRIMod { rd, ri, rn, .. } => {
546            collector.reg_reuse_def(rd, 1); // reuse `ri`.
547            collector.reg_use(ri);
548            collector.reg_use(rn);
549        }
550        Inst::FpuRRRR { rd, rn, rm, ra, .. } => {
551            collector.reg_def(rd);
552            collector.reg_use(rn);
553            collector.reg_use(rm);
554            collector.reg_use(ra);
555        }
556        Inst::VecMisc { rd, rn, .. } => {
557            collector.reg_def(rd);
558            collector.reg_use(rn);
559        }
560
561        Inst::VecLanes { rd, rn, .. } => {
562            collector.reg_def(rd);
563            collector.reg_use(rn);
564        }
565        Inst::VecShiftImm { rd, rn, .. } => {
566            collector.reg_def(rd);
567            collector.reg_use(rn);
568        }
569        Inst::VecShiftImmMod { rd, ri, rn, .. } => {
570            collector.reg_reuse_def(rd, 1); // `rd` == `ri`.
571            collector.reg_use(ri);
572            collector.reg_use(rn);
573        }
574        Inst::VecExtract { rd, rn, rm, .. } => {
575            collector.reg_def(rd);
576            collector.reg_use(rn);
577            collector.reg_use(rm);
578        }
579        Inst::VecTbl { rd, rn, rm } => {
580            collector.reg_use(rn);
581            collector.reg_use(rm);
582            collector.reg_def(rd);
583        }
584        Inst::VecTblExt { rd, ri, rn, rm } => {
585            collector.reg_use(rn);
586            collector.reg_use(rm);
587            collector.reg_reuse_def(rd, 3); // `rd` == `ri`.
588            collector.reg_use(ri);
589        }
590
591        Inst::VecTbl2 { rd, rn, rn2, rm } => {
592            // Constrain to v30 / v31 so that we satisfy the "adjacent
593            // registers" constraint without use of pinned vregs in
594            // lowering.
595            collector.reg_fixed_use(rn, vreg(30));
596            collector.reg_fixed_use(rn2, vreg(31));
597            collector.reg_use(rm);
598            collector.reg_def(rd);
599        }
600        Inst::VecTbl2Ext {
601            rd,
602            ri,
603            rn,
604            rn2,
605            rm,
606        } => {
607            // Constrain to v30 / v31 so that we satisfy the "adjacent
608            // registers" constraint without use of pinned vregs in
609            // lowering.
610            collector.reg_fixed_use(rn, vreg(30));
611            collector.reg_fixed_use(rn2, vreg(31));
612            collector.reg_use(rm);
613            collector.reg_reuse_def(rd, 4); // `rd` == `ri`.
614            collector.reg_use(ri);
615        }
616        Inst::VecLoadReplicate { rd, rn, .. } => {
617            collector.reg_def(rd);
618            collector.reg_use(rn);
619        }
620        Inst::VecCSel { rd, rn, rm, .. } => {
621            collector.reg_def(rd);
622            collector.reg_use(rn);
623            collector.reg_use(rm);
624        }
625        Inst::FpuCmp { rn, rm, .. } => {
626            collector.reg_use(rn);
627            collector.reg_use(rm);
628        }
629        Inst::FpuLoad16 { rd, mem, .. } => {
630            collector.reg_def(rd);
631            memarg_operands(mem, collector);
632        }
633        Inst::FpuLoad32 { rd, mem, .. } => {
634            collector.reg_def(rd);
635            memarg_operands(mem, collector);
636        }
637        Inst::FpuLoad64 { rd, mem, .. } => {
638            collector.reg_def(rd);
639            memarg_operands(mem, collector);
640        }
641        Inst::FpuLoad128 { rd, mem, .. } => {
642            collector.reg_def(rd);
643            memarg_operands(mem, collector);
644        }
645        Inst::FpuStore16 { rd, mem, .. } => {
646            collector.reg_use(rd);
647            memarg_operands(mem, collector);
648        }
649        Inst::FpuStore32 { rd, mem, .. } => {
650            collector.reg_use(rd);
651            memarg_operands(mem, collector);
652        }
653        Inst::FpuStore64 { rd, mem, .. } => {
654            collector.reg_use(rd);
655            memarg_operands(mem, collector);
656        }
657        Inst::FpuStore128 { rd, mem, .. } => {
658            collector.reg_use(rd);
659            memarg_operands(mem, collector);
660        }
661        Inst::FpuLoadP64 { rt, rt2, mem, .. } => {
662            collector.reg_def(rt);
663            collector.reg_def(rt2);
664            pairmemarg_operands(mem, collector);
665        }
666        Inst::FpuStoreP64 { rt, rt2, mem, .. } => {
667            collector.reg_use(rt);
668            collector.reg_use(rt2);
669            pairmemarg_operands(mem, collector);
670        }
671        Inst::FpuLoadP128 { rt, rt2, mem, .. } => {
672            collector.reg_def(rt);
673            collector.reg_def(rt2);
674            pairmemarg_operands(mem, collector);
675        }
676        Inst::FpuStoreP128 { rt, rt2, mem, .. } => {
677            collector.reg_use(rt);
678            collector.reg_use(rt2);
679            pairmemarg_operands(mem, collector);
680        }
681        Inst::FpuToInt { rd, rn, .. } => {
682            collector.reg_def(rd);
683            collector.reg_use(rn);
684        }
685        Inst::IntToFpu { rd, rn, .. } => {
686            collector.reg_def(rd);
687            collector.reg_use(rn);
688        }
689        Inst::FpuCSel16 { rd, rn, rm, .. }
690        | Inst::FpuCSel32 { rd, rn, rm, .. }
691        | Inst::FpuCSel64 { rd, rn, rm, .. } => {
692            collector.reg_def(rd);
693            collector.reg_use(rn);
694            collector.reg_use(rm);
695        }
696        Inst::FpuRound { rd, rn, .. } => {
697            collector.reg_def(rd);
698            collector.reg_use(rn);
699        }
700        Inst::MovToFpu { rd, rn, .. } => {
701            collector.reg_def(rd);
702            collector.reg_use(rn);
703        }
704        Inst::FpuMoveFPImm { rd, .. } => {
705            collector.reg_def(rd);
706        }
707        Inst::MovToVec { rd, ri, rn, .. } => {
708            collector.reg_reuse_def(rd, 1); // `rd` == `ri`.
709            collector.reg_use(ri);
710            collector.reg_use(rn);
711        }
712        Inst::MovFromVec { rd, rn, .. } | Inst::MovFromVecSigned { rd, rn, .. } => {
713            collector.reg_def(rd);
714            collector.reg_use(rn);
715        }
716        Inst::VecDup { rd, rn, .. } => {
717            collector.reg_def(rd);
718            collector.reg_use(rn);
719        }
720        Inst::VecDupFromFpu { rd, rn, .. } => {
721            collector.reg_def(rd);
722            collector.reg_use(rn);
723        }
724        Inst::VecDupFPImm { rd, .. } => {
725            collector.reg_def(rd);
726        }
727        Inst::VecDupImm { rd, .. } => {
728            collector.reg_def(rd);
729        }
730        Inst::VecExtend { rd, rn, .. } => {
731            collector.reg_def(rd);
732            collector.reg_use(rn);
733        }
734        Inst::VecMovElement { rd, ri, rn, .. } => {
735            collector.reg_reuse_def(rd, 1); // `rd` == `ri`.
736            collector.reg_use(ri);
737            collector.reg_use(rn);
738        }
739        Inst::VecRRLong { rd, rn, .. } => {
740            collector.reg_def(rd);
741            collector.reg_use(rn);
742        }
743        Inst::VecRRNarrowLow { rd, rn, .. } => {
744            collector.reg_use(rn);
745            collector.reg_def(rd);
746        }
747        Inst::VecRRNarrowHigh { rd, ri, rn, .. } => {
748            collector.reg_use(rn);
749            collector.reg_reuse_def(rd, 2); // `rd` == `ri`.
750            collector.reg_use(ri);
751        }
752        Inst::VecRRPair { rd, rn, .. } => {
753            collector.reg_def(rd);
754            collector.reg_use(rn);
755        }
756        Inst::VecRRRLong { rd, rn, rm, .. } => {
757            collector.reg_def(rd);
758            collector.reg_use(rn);
759            collector.reg_use(rm);
760        }
761        Inst::VecRRRLongMod { rd, ri, rn, rm, .. } => {
762            collector.reg_reuse_def(rd, 1); // `rd` == `ri`.
763            collector.reg_use(ri);
764            collector.reg_use(rn);
765            collector.reg_use(rm);
766        }
767        Inst::VecRRPairLong { rd, rn, .. } => {
768            collector.reg_def(rd);
769            collector.reg_use(rn);
770        }
771        Inst::VecRRR { rd, rn, rm, .. } => {
772            collector.reg_def(rd);
773            collector.reg_use(rn);
774            collector.reg_use(rm);
775        }
776        Inst::VecRRRMod { rd, ri, rn, rm, .. } | Inst::VecFmlaElem { rd, ri, rn, rm, .. } => {
777            collector.reg_reuse_def(rd, 1); // `rd` == `ri`.
778            collector.reg_use(ri);
779            collector.reg_use(rn);
780            collector.reg_use(rm);
781        }
782        Inst::MovToNZCV { rn } => {
783            collector.reg_use(rn);
784        }
785        Inst::MovFromNZCV { rd } => {
786            collector.reg_def(rd);
787        }
788        Inst::Extend { rd, rn, .. } => {
789            collector.reg_def(rd);
790            collector.reg_use(rn);
791        }
792        Inst::Args { args } => {
793            for ArgPair { vreg, preg } in args {
794                collector.reg_fixed_def(vreg, *preg);
795            }
796        }
797        Inst::Rets { rets } => {
798            for RetPair { vreg, preg } in rets {
799                collector.reg_fixed_use(vreg, *preg);
800            }
801        }
802        Inst::Ret { .. } | Inst::AuthenticatedRet { .. } => {}
803        Inst::Jump { .. } => {}
804        Inst::Call { info, .. } => {
805            let CallInfo { uses, defs, .. } = &mut **info;
806            for CallArgPair { vreg, preg } in uses {
807                collector.reg_fixed_use(vreg, *preg);
808            }
809            for CallRetPair { vreg, location } in defs {
810                match location {
811                    RetLocation::Reg(preg, ..) => collector.reg_fixed_def(vreg, *preg),
812                    RetLocation::Stack(..) => collector.any_def(vreg),
813                }
814            }
815            collector.reg_clobbers(info.clobbers);
816            if let Some(try_call_info) = &mut info.try_call_info {
817                try_call_info.collect_operands(collector);
818            }
819        }
820        Inst::CallInd { info, .. } => {
821            let CallInfo {
822                dest, uses, defs, ..
823            } = &mut **info;
824            collector.reg_use(dest);
825            for CallArgPair { vreg, preg } in uses {
826                collector.reg_fixed_use(vreg, *preg);
827            }
828            for CallRetPair { vreg, location } in defs {
829                match location {
830                    RetLocation::Reg(preg, ..) => collector.reg_fixed_def(vreg, *preg),
831                    RetLocation::Stack(..) => collector.any_def(vreg),
832                }
833            }
834            collector.reg_clobbers(info.clobbers);
835            if let Some(try_call_info) = &mut info.try_call_info {
836                try_call_info.collect_operands(collector);
837            }
838        }
839        Inst::ReturnCall { info } => {
840            for CallArgPair { vreg, preg } in &mut info.uses {
841                collector.reg_fixed_use(vreg, *preg);
842            }
843        }
844        Inst::ReturnCallInd { info } => {
845            // TODO(https://github.com/bytecodealliance/regalloc2/issues/145):
846            // This shouldn't be a fixed register constraint, but it's not clear how to pick a
847            // register that won't be clobbered by the callee-save restore code emitted with a
848            // return_call_indirect.
849            collector.reg_fixed_use(&mut info.dest, xreg(1));
850            for CallArgPair { vreg, preg } in &mut info.uses {
851                collector.reg_fixed_use(vreg, *preg);
852            }
853        }
854        Inst::CondBr { kind, .. } => match kind {
855            CondBrKind::Zero(rt, _) | CondBrKind::NotZero(rt, _) => collector.reg_use(rt),
856            CondBrKind::Cond(_) => {}
857        },
858        Inst::TestBitAndBranch { rn, .. } => {
859            collector.reg_use(rn);
860        }
861        Inst::IndirectBr { rn, .. } => {
862            collector.reg_use(rn);
863        }
864        Inst::Nop0 | Inst::Nop4 => {}
865        Inst::Brk => {}
866        Inst::Udf { .. } => {}
867        Inst::TrapIf { kind, .. } => match kind {
868            CondBrKind::Zero(rt, _) | CondBrKind::NotZero(rt, _) => collector.reg_use(rt),
869            CondBrKind::Cond(_) => {}
870        },
871        Inst::Adr { rd, .. } | Inst::Adrp { rd, .. } => {
872            collector.reg_def(rd);
873        }
874        Inst::Word4 { .. } | Inst::Word8 { .. } => {}
875        Inst::JTSequence {
876            ridx, rtmp1, rtmp2, ..
877        } => {
878            collector.reg_use(ridx);
879            collector.reg_early_def(rtmp1);
880            collector.reg_early_def(rtmp2);
881        }
882        Inst::LoadExtNameGot { rd, .. }
883        | Inst::LoadExtNameNear { rd, .. }
884        | Inst::LoadExtNameFar { rd, .. } => {
885            collector.reg_def(rd);
886        }
887        Inst::LoadAddr { rd, mem } => {
888            collector.reg_def(rd);
889            memarg_operands(mem, collector);
890        }
891        Inst::Paci { .. } | Inst::Xpaclri => {
892            // Neither LR nor SP is an allocatable register, so there is no need
893            // to do anything.
894        }
895        Inst::Bti { .. } => {}
896
897        Inst::ElfTlsGetAddr { rd, tmp, .. } => {
898            // TLSDESC has a very neat calling convention. It is required to preserve
899            // all registers except x0 and x30. X30 is non allocatable in cranelift since
900            // its the link register.
901            //
902            // Additionally we need a second register as a temporary register for the
903            // TLSDESC sequence. This register can be any register other than x0 (and x30).
904            collector.reg_fixed_def(rd, regs::xreg(0));
905            collector.reg_early_def(tmp);
906        }
907        Inst::MachOTlsGetAddr { rd, .. } => {
908            collector.reg_fixed_def(rd, regs::xreg(0));
909            let mut clobbers =
910                AArch64MachineDeps::get_regs_clobbered_by_call(CallConv::AppleAarch64, false);
911            clobbers.remove(regs::xreg_preg(0));
912            collector.reg_clobbers(clobbers);
913        }
914        Inst::Unwind { .. } => {}
915        Inst::EmitIsland { .. } => {}
916        Inst::DummyUse { reg } => {
917            collector.reg_use(reg);
918        }
919        Inst::LabelAddress { dst, .. } => {
920            collector.reg_def(dst);
921        }
922        Inst::SequencePoint { .. } => {}
923        Inst::StackProbeLoop { start, end, .. } => {
924            collector.reg_early_def(start);
925            collector.reg_use(end);
926        }
927    }
928}
929
930//=============================================================================
931// Instructions: misc functions and external interface
932
933impl MachInst for Inst {
934    type ABIMachineSpec = AArch64MachineDeps;
935    type LabelUse = LabelUse;
936
937    // "CLIF" in hex, to make the trap recognizable during
938    // debugging.
939    const TRAP_OPCODE: &'static [u8] = &0xc11f_u32.to_le_bytes();
940
941    fn get_operands(&mut self, collector: &mut impl OperandVisitor) {
942        aarch64_get_operands(self, collector);
943    }
944
945    fn is_move(&self) -> Option<(Writable<Reg>, Reg)> {
946        match self {
947            &Inst::Mov {
948                size: OperandSize::Size64,
949                rd,
950                rm,
951            } => Some((rd, rm)),
952            &Inst::FpuMove64 { rd, rn } => Some((rd, rn)),
953            &Inst::FpuMove128 { rd, rn } => Some((rd, rn)),
954            _ => None,
955        }
956    }
957
958    fn is_included_in_clobbers(&self) -> bool {
959        let (caller, callee, is_exception) = match self {
960            Inst::Args { .. } => return false,
961            Inst::Call { info } => (
962                info.caller_conv,
963                info.callee_conv,
964                info.try_call_info.is_some(),
965            ),
966            Inst::CallInd { info } => (
967                info.caller_conv,
968                info.callee_conv,
969                info.try_call_info.is_some(),
970            ),
971            _ => return true,
972        };
973
974        // We exclude call instructions from the clobber-set when they are calls
975        // from caller to callee that both clobber the same register (such as
976        // using the same or similar ABIs). Such calls cannot possibly force any
977        // new registers to be saved in the prologue, because anything that the
978        // callee clobbers, the caller is also allowed to clobber. This both
979        // saves work and enables us to more precisely follow the
980        // half-caller-save, half-callee-save SysV ABI for some vector
981        // registers.
982        //
983        // See the note in [crate::isa::aarch64::abi::is_caller_save_reg] for
984        // more information on this ABI-implementation hack.
985        let caller_clobbers = AArch64MachineDeps::get_regs_clobbered_by_call(caller, false);
986        let callee_clobbers = AArch64MachineDeps::get_regs_clobbered_by_call(callee, is_exception);
987
988        let mut all_clobbers = caller_clobbers;
989        all_clobbers.union_from(callee_clobbers);
990        all_clobbers != caller_clobbers
991    }
992
993    fn is_trap(&self) -> bool {
994        match self {
995            Self::Udf { .. } => true,
996            _ => false,
997        }
998    }
999
1000    fn is_args(&self) -> bool {
1001        match self {
1002            Self::Args { .. } => true,
1003            _ => false,
1004        }
1005    }
1006
1007    fn call_type(&self) -> CallType {
1008        match self {
1009            Inst::Call { .. }
1010            | Inst::CallInd { .. }
1011            | Inst::ElfTlsGetAddr { .. }
1012            | Inst::MachOTlsGetAddr { .. } => CallType::Regular,
1013
1014            Inst::ReturnCall { .. } | Inst::ReturnCallInd { .. } => CallType::TailCall,
1015
1016            _ => CallType::None,
1017        }
1018    }
1019
1020    fn is_term(&self) -> MachTerminator {
1021        match self {
1022            &Inst::Rets { .. } => MachTerminator::Ret,
1023            &Inst::ReturnCall { .. } | &Inst::ReturnCallInd { .. } => MachTerminator::RetCall,
1024            &Inst::Jump { .. } => MachTerminator::Branch,
1025            &Inst::CondBr { .. } => MachTerminator::Branch,
1026            &Inst::TestBitAndBranch { .. } => MachTerminator::Branch,
1027            &Inst::IndirectBr { .. } => MachTerminator::Branch,
1028            &Inst::JTSequence { .. } => MachTerminator::Branch,
1029            &Inst::Call { ref info } if info.try_call_info.is_some() => MachTerminator::Branch,
1030            &Inst::CallInd { ref info } if info.try_call_info.is_some() => MachTerminator::Branch,
1031            _ => MachTerminator::None,
1032        }
1033    }
1034
1035    fn is_mem_access(&self) -> bool {
1036        match self {
1037            &Inst::ULoad8 { .. }
1038            | &Inst::SLoad8 { .. }
1039            | &Inst::ULoad16 { .. }
1040            | &Inst::SLoad16 { .. }
1041            | &Inst::ULoad32 { .. }
1042            | &Inst::SLoad32 { .. }
1043            | &Inst::ULoad64 { .. }
1044            | &Inst::LoadP64 { .. }
1045            | &Inst::FpuLoad16 { .. }
1046            | &Inst::FpuLoad32 { .. }
1047            | &Inst::FpuLoad64 { .. }
1048            | &Inst::FpuLoad128 { .. }
1049            | &Inst::FpuLoadP64 { .. }
1050            | &Inst::FpuLoadP128 { .. }
1051            | &Inst::Store8 { .. }
1052            | &Inst::Store16 { .. }
1053            | &Inst::Store32 { .. }
1054            | &Inst::Store64 { .. }
1055            | &Inst::StoreP64 { .. }
1056            | &Inst::FpuStore16 { .. }
1057            | &Inst::FpuStore32 { .. }
1058            | &Inst::FpuStore64 { .. }
1059            | &Inst::FpuStore128 { .. } => true,
1060            // TODO: verify this carefully
1061            _ => false,
1062        }
1063    }
1064
1065    fn gen_move(to_reg: Writable<Reg>, from_reg: Reg, ty: Type) -> Inst {
1066        let bits = ty.bits();
1067
1068        assert!(bits <= 128);
1069        assert!(to_reg.to_reg().class() == from_reg.class());
1070        match from_reg.class() {
1071            RegClass::Int => Inst::Mov {
1072                size: OperandSize::Size64,
1073                rd: to_reg,
1074                rm: from_reg,
1075            },
1076            RegClass::Float => {
1077                if bits > 64 {
1078                    Inst::FpuMove128 {
1079                        rd: to_reg,
1080                        rn: from_reg,
1081                    }
1082                } else {
1083                    Inst::FpuMove64 {
1084                        rd: to_reg,
1085                        rn: from_reg,
1086                    }
1087                }
1088            }
1089            RegClass::Vector => unreachable!(),
1090        }
1091    }
1092
1093    fn is_safepoint(&self) -> bool {
1094        match self {
1095            Inst::Call { .. } | Inst::CallInd { .. } => true,
1096            _ => false,
1097        }
1098    }
1099
1100    fn gen_dummy_use(reg: Reg) -> Inst {
1101        Inst::DummyUse { reg }
1102    }
1103
1104    fn gen_nop(preferred_size: usize) -> Inst {
1105        if preferred_size == 0 {
1106            return Inst::Nop0;
1107        }
1108        // We can't give a NOP (or any insn) < 4 bytes.
1109        assert!(preferred_size >= 4);
1110        Inst::Nop4
1111    }
1112
1113    fn gen_nop_units() -> Vec<Vec<u8>> {
1114        vec![vec![0x1f, 0x20, 0x03, 0xd5]]
1115    }
1116
1117    fn rc_for_type(ty: Type) -> CodegenResult<(&'static [RegClass], &'static [Type])> {
1118        match ty {
1119            I8 => Ok((&[RegClass::Int], &[I8])),
1120            I16 => Ok((&[RegClass::Int], &[I16])),
1121            I32 => Ok((&[RegClass::Int], &[I32])),
1122            I64 => Ok((&[RegClass::Int], &[I64])),
1123            F16 => Ok((&[RegClass::Float], &[F16])),
1124            F32 => Ok((&[RegClass::Float], &[F32])),
1125            F64 => Ok((&[RegClass::Float], &[F64])),
1126            F128 => Ok((&[RegClass::Float], &[F128])),
1127            I128 => Ok((&[RegClass::Int, RegClass::Int], &[I64, I64])),
1128            _ if ty.is_vector() && ty.bits() <= 128 => {
1129                let types = &[types::I8X2, types::I8X4, types::I8X8, types::I8X16];
1130                Ok((
1131                    &[RegClass::Float],
1132                    slice::from_ref(&types[ty.bytes().ilog2() as usize - 1]),
1133                ))
1134            }
1135            _ if ty.is_dynamic_vector() => Ok((&[RegClass::Float], &[I8X16])),
1136            _ => Err(CodegenError::Unsupported(format!(
1137                "Unexpected SSA-value type: {ty}"
1138            ))),
1139        }
1140    }
1141
1142    fn canonical_type_for_rc(rc: RegClass) -> Type {
1143        match rc {
1144            RegClass::Float => types::I8X16,
1145            RegClass::Int => types::I64,
1146            RegClass::Vector => unreachable!(),
1147        }
1148    }
1149
1150    fn gen_jump(target: MachLabel) -> Inst {
1151        Inst::Jump {
1152            dest: BranchTarget::Label(target),
1153        }
1154    }
1155
1156    fn worst_case_size() -> CodeOffset {
1157        // The maximum size, in bytes, of any `Inst`'s emitted code. We have at least one case of
1158        // an 8-instruction sequence (saturating int-to-float conversions) with three embedded
1159        // 64-bit f64 constants.
1160        //
1161        // Note that inline jump-tables handle island/pool insertion separately, so we do not need
1162        // to account for them here (otherwise the worst case would be 2^31 * 4, clearly not
1163        // feasible for other reasons).
1164        44
1165    }
1166
1167    fn ref_type_regclass(_: &settings::Flags) -> RegClass {
1168        RegClass::Int
1169    }
1170
1171    fn gen_block_start(
1172        is_indirect_branch_target: bool,
1173        is_forward_edge_cfi_enabled: bool,
1174    ) -> Option<Self> {
1175        if is_indirect_branch_target && is_forward_edge_cfi_enabled {
1176            Some(Inst::Bti {
1177                targets: BranchTargetType::J,
1178            })
1179        } else {
1180            None
1181        }
1182    }
1183
1184    fn function_alignment() -> FunctionAlignment {
1185        // We use 32-byte alignment for performance reasons, but for correctness
1186        // we would only need 4-byte alignment.
1187        FunctionAlignment {
1188            minimum: 4,
1189            preferred: 32,
1190        }
1191    }
1192}
1193
1194//=============================================================================
1195// Pretty-printing of instructions.
1196
1197fn mem_finalize_for_show(mem: &AMode, access_ty: Type, state: &EmitState) -> (String, String) {
1198    let (mem_insts, mem) = mem_finalize(None, mem, access_ty, state);
1199    let mut mem_str = mem_insts
1200        .into_iter()
1201        .map(|inst| inst.print_with_state(&mut EmitState::default()))
1202        .collect::<Vec<_>>()
1203        .join(" ; ");
1204    if !mem_str.is_empty() {
1205        mem_str += " ; ";
1206    }
1207
1208    let mem = mem.pretty_print(access_ty.bytes() as u8);
1209    (mem_str, mem)
1210}
1211
1212fn pretty_print_try_call(info: &TryCallInfo) -> String {
1213    format!(
1214        "; b {:?}; catch [{}]",
1215        info.continuation,
1216        info.pretty_print_dests()
1217    )
1218}
1219
1220impl Inst {
1221    fn print_with_state(&self, state: &mut EmitState) -> String {
1222        fn op_name(alu_op: ALUOp) -> &'static str {
1223            match alu_op {
1224                ALUOp::Add => "add",
1225                ALUOp::Sub => "sub",
1226                ALUOp::Orr => "orr",
1227                ALUOp::And => "and",
1228                ALUOp::AndS => "ands",
1229                ALUOp::Eor => "eor",
1230                ALUOp::AddS => "adds",
1231                ALUOp::SubS => "subs",
1232                ALUOp::SMulH => "smulh",
1233                ALUOp::UMulH => "umulh",
1234                ALUOp::SDiv => "sdiv",
1235                ALUOp::UDiv => "udiv",
1236                ALUOp::AndNot => "bic",
1237                ALUOp::OrrNot => "orn",
1238                ALUOp::EorNot => "eon",
1239                ALUOp::Extr => "extr",
1240                ALUOp::Lsr => "lsr",
1241                ALUOp::Asr => "asr",
1242                ALUOp::Lsl => "lsl",
1243                ALUOp::Adc => "adc",
1244                ALUOp::AdcS => "adcs",
1245                ALUOp::Sbc => "sbc",
1246                ALUOp::SbcS => "sbcs",
1247            }
1248        }
1249
1250        match self {
1251            &Inst::Nop0 => "nop-zero-len".to_string(),
1252            &Inst::Nop4 => "nop".to_string(),
1253            &Inst::AluRRR {
1254                alu_op,
1255                size,
1256                rd,
1257                rn,
1258                rm,
1259            } => {
1260                let op = op_name(alu_op);
1261                let rd = pretty_print_ireg(rd.to_reg(), size);
1262                let rn = pretty_print_ireg(rn, size);
1263                let rm = pretty_print_ireg(rm, size);
1264                format!("{op} {rd}, {rn}, {rm}")
1265            }
1266            &Inst::AluRRRR {
1267                alu_op,
1268                size,
1269                rd,
1270                rn,
1271                rm,
1272                ra,
1273            } => {
1274                let (op, da_size) = match alu_op {
1275                    ALUOp3::MAdd => ("madd", size),
1276                    ALUOp3::MSub => ("msub", size),
1277                    ALUOp3::UMAddL => ("umaddl", OperandSize::Size64),
1278                    ALUOp3::SMAddL => ("smaddl", OperandSize::Size64),
1279                };
1280                let rd = pretty_print_ireg(rd.to_reg(), da_size);
1281                let rn = pretty_print_ireg(rn, size);
1282                let rm = pretty_print_ireg(rm, size);
1283                let ra = pretty_print_ireg(ra, da_size);
1284
1285                format!("{op} {rd}, {rn}, {rm}, {ra}")
1286            }
1287            &Inst::AluRRImm12 {
1288                alu_op,
1289                size,
1290                rd,
1291                rn,
1292                ref imm12,
1293            } => {
1294                let op = op_name(alu_op);
1295                let rd = pretty_print_ireg(rd.to_reg(), size);
1296                let rn = pretty_print_ireg(rn, size);
1297
1298                if imm12.bits == 0 && alu_op == ALUOp::Add && size.is64() {
1299                    // special-case MOV (used for moving into SP).
1300                    format!("mov {rd}, {rn}")
1301                } else {
1302                    let imm12 = imm12.pretty_print(0);
1303                    format!("{op} {rd}, {rn}, {imm12}")
1304                }
1305            }
1306            &Inst::AluRRImmLogic {
1307                alu_op,
1308                size,
1309                rd,
1310                rn,
1311                ref imml,
1312            } => {
1313                let op = op_name(alu_op);
1314                let rd = pretty_print_ireg(rd.to_reg(), size);
1315                let rn = pretty_print_ireg(rn, size);
1316                let imml = imml.pretty_print(0);
1317                format!("{op} {rd}, {rn}, {imml}")
1318            }
1319            &Inst::AluRRImmShift {
1320                alu_op,
1321                size,
1322                rd,
1323                rn,
1324                ref immshift,
1325            } => {
1326                let op = op_name(alu_op);
1327                let rd = pretty_print_ireg(rd.to_reg(), size);
1328                let rn = pretty_print_ireg(rn, size);
1329                let immshift = immshift.pretty_print(0);
1330                format!("{op} {rd}, {rn}, {immshift}")
1331            }
1332            &Inst::AluRRRShift {
1333                alu_op,
1334                size,
1335                rd,
1336                rn,
1337                rm,
1338                ref shiftop,
1339            } => {
1340                let op = op_name(alu_op);
1341                let rd = pretty_print_ireg(rd.to_reg(), size);
1342                let rn = pretty_print_ireg(rn, size);
1343                let rm = pretty_print_ireg(rm, size);
1344                let shiftop = shiftop.pretty_print(0);
1345                format!("{op} {rd}, {rn}, {rm}, {shiftop}")
1346            }
1347            &Inst::AluRRRExtend {
1348                alu_op,
1349                size,
1350                rd,
1351                rn,
1352                rm,
1353                ref extendop,
1354            } => {
1355                let op = op_name(alu_op);
1356                let rd = pretty_print_ireg(rd.to_reg(), size);
1357                let rn = pretty_print_ireg(rn, size);
1358                let rm = pretty_print_ireg(rm, size);
1359                let extendop = extendop.pretty_print(0);
1360                format!("{op} {rd}, {rn}, {rm}, {extendop}")
1361            }
1362            &Inst::BitRR { op, size, rd, rn } => {
1363                let op = op.op_str();
1364                let rd = pretty_print_ireg(rd.to_reg(), size);
1365                let rn = pretty_print_ireg(rn, size);
1366                format!("{op} {rd}, {rn}")
1367            }
1368            &Inst::ULoad8 { rd, ref mem, .. }
1369            | &Inst::SLoad8 { rd, ref mem, .. }
1370            | &Inst::ULoad16 { rd, ref mem, .. }
1371            | &Inst::SLoad16 { rd, ref mem, .. }
1372            | &Inst::ULoad32 { rd, ref mem, .. }
1373            | &Inst::SLoad32 { rd, ref mem, .. }
1374            | &Inst::ULoad64 { rd, ref mem, .. } => {
1375                let is_unscaled = match &mem {
1376                    &AMode::Unscaled { .. } => true,
1377                    _ => false,
1378                };
1379                let (op, size) = match (self, is_unscaled) {
1380                    (&Inst::ULoad8 { .. }, false) => ("ldrb", OperandSize::Size32),
1381                    (&Inst::ULoad8 { .. }, true) => ("ldurb", OperandSize::Size32),
1382                    (&Inst::SLoad8 { .. }, false) => ("ldrsb", OperandSize::Size64),
1383                    (&Inst::SLoad8 { .. }, true) => ("ldursb", OperandSize::Size64),
1384                    (&Inst::ULoad16 { .. }, false) => ("ldrh", OperandSize::Size32),
1385                    (&Inst::ULoad16 { .. }, true) => ("ldurh", OperandSize::Size32),
1386                    (&Inst::SLoad16 { .. }, false) => ("ldrsh", OperandSize::Size64),
1387                    (&Inst::SLoad16 { .. }, true) => ("ldursh", OperandSize::Size64),
1388                    (&Inst::ULoad32 { .. }, false) => ("ldr", OperandSize::Size32),
1389                    (&Inst::ULoad32 { .. }, true) => ("ldur", OperandSize::Size32),
1390                    (&Inst::SLoad32 { .. }, false) => ("ldrsw", OperandSize::Size64),
1391                    (&Inst::SLoad32 { .. }, true) => ("ldursw", OperandSize::Size64),
1392                    (&Inst::ULoad64 { .. }, false) => ("ldr", OperandSize::Size64),
1393                    (&Inst::ULoad64 { .. }, true) => ("ldur", OperandSize::Size64),
1394                    _ => unreachable!(),
1395                };
1396
1397                let rd = pretty_print_ireg(rd.to_reg(), size);
1398                let mem = mem.clone();
1399                let access_ty = self.mem_type().unwrap();
1400                let (mem_str, mem) = mem_finalize_for_show(&mem, access_ty, state);
1401
1402                format!("{mem_str}{op} {rd}, {mem}")
1403            }
1404            &Inst::Store8 { rd, ref mem, .. }
1405            | &Inst::Store16 { rd, ref mem, .. }
1406            | &Inst::Store32 { rd, ref mem, .. }
1407            | &Inst::Store64 { rd, ref mem, .. } => {
1408                let is_unscaled = match &mem {
1409                    &AMode::Unscaled { .. } => true,
1410                    _ => false,
1411                };
1412                let (op, size) = match (self, is_unscaled) {
1413                    (&Inst::Store8 { .. }, false) => ("strb", OperandSize::Size32),
1414                    (&Inst::Store8 { .. }, true) => ("sturb", OperandSize::Size32),
1415                    (&Inst::Store16 { .. }, false) => ("strh", OperandSize::Size32),
1416                    (&Inst::Store16 { .. }, true) => ("sturh", OperandSize::Size32),
1417                    (&Inst::Store32 { .. }, false) => ("str", OperandSize::Size32),
1418                    (&Inst::Store32 { .. }, true) => ("stur", OperandSize::Size32),
1419                    (&Inst::Store64 { .. }, false) => ("str", OperandSize::Size64),
1420                    (&Inst::Store64 { .. }, true) => ("stur", OperandSize::Size64),
1421                    _ => unreachable!(),
1422                };
1423
1424                let rd = pretty_print_ireg(rd, size);
1425                let mem = mem.clone();
1426                let access_ty = self.mem_type().unwrap();
1427                let (mem_str, mem) = mem_finalize_for_show(&mem, access_ty, state);
1428
1429                format!("{mem_str}{op} {rd}, {mem}")
1430            }
1431            &Inst::StoreP64 {
1432                rt, rt2, ref mem, ..
1433            } => {
1434                let rt = pretty_print_ireg(rt, OperandSize::Size64);
1435                let rt2 = pretty_print_ireg(rt2, OperandSize::Size64);
1436                let mem = mem.clone();
1437                let mem = mem.pretty_print_default();
1438                format!("stp {rt}, {rt2}, {mem}")
1439            }
1440            &Inst::LoadP64 {
1441                rt, rt2, ref mem, ..
1442            } => {
1443                let rt = pretty_print_ireg(rt.to_reg(), OperandSize::Size64);
1444                let rt2 = pretty_print_ireg(rt2.to_reg(), OperandSize::Size64);
1445                let mem = mem.clone();
1446                let mem = mem.pretty_print_default();
1447                format!("ldp {rt}, {rt2}, {mem}")
1448            }
1449            &Inst::Mov { size, rd, rm } => {
1450                let rd = pretty_print_ireg(rd.to_reg(), size);
1451                let rm = pretty_print_ireg(rm, size);
1452                format!("mov {rd}, {rm}")
1453            }
1454            &Inst::MovFromPReg { rd, rm } => {
1455                let rd = pretty_print_ireg(rd.to_reg(), OperandSize::Size64);
1456                let rm = show_ireg_sized(rm.into(), OperandSize::Size64);
1457                format!("mov {rd}, {rm}")
1458            }
1459            &Inst::MovToPReg { rd, rm } => {
1460                let rd = show_ireg_sized(rd.into(), OperandSize::Size64);
1461                let rm = pretty_print_ireg(rm, OperandSize::Size64);
1462                format!("mov {rd}, {rm}")
1463            }
1464            &Inst::MovWide {
1465                op,
1466                rd,
1467                ref imm,
1468                size,
1469            } => {
1470                let op_str = match op {
1471                    MoveWideOp::MovZ => "movz",
1472                    MoveWideOp::MovN => "movn",
1473                };
1474                let rd = pretty_print_ireg(rd.to_reg(), size);
1475                let imm = imm.pretty_print(0);
1476                format!("{op_str} {rd}, {imm}")
1477            }
1478            &Inst::MovK {
1479                rd,
1480                rn,
1481                ref imm,
1482                size,
1483            } => {
1484                let rn = pretty_print_ireg(rn, size);
1485                let rd = pretty_print_ireg(rd.to_reg(), size);
1486                let imm = imm.pretty_print(0);
1487                format!("movk {rd}, {rn}, {imm}")
1488            }
1489            &Inst::CSel { rd, rn, rm, cond } => {
1490                let rd = pretty_print_ireg(rd.to_reg(), OperandSize::Size64);
1491                let rn = pretty_print_ireg(rn, OperandSize::Size64);
1492                let rm = pretty_print_ireg(rm, OperandSize::Size64);
1493                let cond = cond.pretty_print(0);
1494                format!("csel {rd}, {rn}, {rm}, {cond}")
1495            }
1496            &Inst::CSNeg { rd, rn, rm, cond } => {
1497                let rd = pretty_print_ireg(rd.to_reg(), OperandSize::Size64);
1498                let rn = pretty_print_ireg(rn, OperandSize::Size64);
1499                let rm = pretty_print_ireg(rm, OperandSize::Size64);
1500                let cond = cond.pretty_print(0);
1501                format!("csneg {rd}, {rn}, {rm}, {cond}")
1502            }
1503            &Inst::CSet { rd, cond } => {
1504                let rd = pretty_print_ireg(rd.to_reg(), OperandSize::Size64);
1505                let cond = cond.pretty_print(0);
1506                format!("cset {rd}, {cond}")
1507            }
1508            &Inst::CSetm { rd, cond } => {
1509                let rd = pretty_print_ireg(rd.to_reg(), OperandSize::Size64);
1510                let cond = cond.pretty_print(0);
1511                format!("csetm {rd}, {cond}")
1512            }
1513            &Inst::CCmp {
1514                size,
1515                rn,
1516                rm,
1517                nzcv,
1518                cond,
1519            } => {
1520                let rn = pretty_print_ireg(rn, size);
1521                let rm = pretty_print_ireg(rm, size);
1522                let nzcv = nzcv.pretty_print(0);
1523                let cond = cond.pretty_print(0);
1524                format!("ccmp {rn}, {rm}, {nzcv}, {cond}")
1525            }
1526            &Inst::CCmpImm {
1527                size,
1528                rn,
1529                imm,
1530                nzcv,
1531                cond,
1532            } => {
1533                let rn = pretty_print_ireg(rn, size);
1534                let imm = imm.pretty_print(0);
1535                let nzcv = nzcv.pretty_print(0);
1536                let cond = cond.pretty_print(0);
1537                format!("ccmp {rn}, {imm}, {nzcv}, {cond}")
1538            }
1539            &Inst::AtomicRMW {
1540                rs, rt, rn, ty, op, ..
1541            } => {
1542                let op = match op {
1543                    AtomicRMWOp::Add => "ldaddal",
1544                    AtomicRMWOp::Clr => "ldclral",
1545                    AtomicRMWOp::Eor => "ldeoral",
1546                    AtomicRMWOp::Set => "ldsetal",
1547                    AtomicRMWOp::Smax => "ldsmaxal",
1548                    AtomicRMWOp::Umax => "ldumaxal",
1549                    AtomicRMWOp::Smin => "ldsminal",
1550                    AtomicRMWOp::Umin => "lduminal",
1551                    AtomicRMWOp::Swp => "swpal",
1552                };
1553
1554                let size = OperandSize::from_ty(ty);
1555                let rs = pretty_print_ireg(rs, size);
1556                let rt = pretty_print_ireg(rt.to_reg(), size);
1557                let rn = pretty_print_ireg(rn, OperandSize::Size64);
1558
1559                let ty_suffix = match ty {
1560                    I8 => "b",
1561                    I16 => "h",
1562                    _ => "",
1563                };
1564                format!("{op}{ty_suffix} {rs}, {rt}, [{rn}]")
1565            }
1566            &Inst::AtomicRMWLoop {
1567                ty,
1568                op,
1569                addr,
1570                operand,
1571                oldval,
1572                scratch1,
1573                scratch2,
1574                ..
1575            } => {
1576                let op = match op {
1577                    AtomicRMWLoopOp::Add => "add",
1578                    AtomicRMWLoopOp::Sub => "sub",
1579                    AtomicRMWLoopOp::Eor => "eor",
1580                    AtomicRMWLoopOp::Orr => "orr",
1581                    AtomicRMWLoopOp::And => "and",
1582                    AtomicRMWLoopOp::Nand => "nand",
1583                    AtomicRMWLoopOp::Smin => "smin",
1584                    AtomicRMWLoopOp::Smax => "smax",
1585                    AtomicRMWLoopOp::Umin => "umin",
1586                    AtomicRMWLoopOp::Umax => "umax",
1587                    AtomicRMWLoopOp::Xchg => "xchg",
1588                };
1589                let addr = pretty_print_ireg(addr, OperandSize::Size64);
1590                let operand = pretty_print_ireg(operand, OperandSize::Size64);
1591                let oldval = pretty_print_ireg(oldval.to_reg(), OperandSize::Size64);
1592                let scratch1 = pretty_print_ireg(scratch1.to_reg(), OperandSize::Size64);
1593                let scratch2 = pretty_print_ireg(scratch2.to_reg(), OperandSize::Size64);
1594                format!(
1595                    "atomic_rmw_loop_{}_{} addr={} operand={} oldval={} scratch1={} scratch2={}",
1596                    op,
1597                    ty.bits(),
1598                    addr,
1599                    operand,
1600                    oldval,
1601                    scratch1,
1602                    scratch2,
1603                )
1604            }
1605            &Inst::AtomicCAS {
1606                rd, rs, rt, rn, ty, ..
1607            } => {
1608                let op = match ty {
1609                    I8 => "casalb",
1610                    I16 => "casalh",
1611                    I32 | I64 => "casal",
1612                    _ => panic!("Unsupported type: {ty}"),
1613                };
1614                let size = OperandSize::from_ty(ty);
1615                let rd = pretty_print_ireg(rd.to_reg(), size);
1616                let rs = pretty_print_ireg(rs, size);
1617                let rt = pretty_print_ireg(rt, size);
1618                let rn = pretty_print_ireg(rn, OperandSize::Size64);
1619
1620                format!("{op} {rd}, {rs}, {rt}, [{rn}]")
1621            }
1622            &Inst::AtomicCASLoop {
1623                ty,
1624                addr,
1625                expected,
1626                replacement,
1627                oldval,
1628                scratch,
1629                ..
1630            } => {
1631                let addr = pretty_print_ireg(addr, OperandSize::Size64);
1632                let expected = pretty_print_ireg(expected, OperandSize::Size64);
1633                let replacement = pretty_print_ireg(replacement, OperandSize::Size64);
1634                let oldval = pretty_print_ireg(oldval.to_reg(), OperandSize::Size64);
1635                let scratch = pretty_print_ireg(scratch.to_reg(), OperandSize::Size64);
1636                format!(
1637                    "atomic_cas_loop_{} addr={}, expect={}, replacement={}, oldval={}, scratch={}",
1638                    ty.bits(),
1639                    addr,
1640                    expected,
1641                    replacement,
1642                    oldval,
1643                    scratch,
1644                )
1645            }
1646            &Inst::LoadAcquire {
1647                access_ty, rt, rn, ..
1648            } => {
1649                let (op, ty) = match access_ty {
1650                    I8 => ("ldarb", I32),
1651                    I16 => ("ldarh", I32),
1652                    I32 => ("ldar", I32),
1653                    I64 => ("ldar", I64),
1654                    _ => panic!("Unsupported type: {access_ty}"),
1655                };
1656                let size = OperandSize::from_ty(ty);
1657                let rn = pretty_print_ireg(rn, OperandSize::Size64);
1658                let rt = pretty_print_ireg(rt.to_reg(), size);
1659                format!("{op} {rt}, [{rn}]")
1660            }
1661            &Inst::StoreRelease {
1662                access_ty, rt, rn, ..
1663            } => {
1664                let (op, ty) = match access_ty {
1665                    I8 => ("stlrb", I32),
1666                    I16 => ("stlrh", I32),
1667                    I32 => ("stlr", I32),
1668                    I64 => ("stlr", I64),
1669                    _ => panic!("Unsupported type: {access_ty}"),
1670                };
1671                let size = OperandSize::from_ty(ty);
1672                let rn = pretty_print_ireg(rn, OperandSize::Size64);
1673                let rt = pretty_print_ireg(rt, size);
1674                format!("{op} {rt}, [{rn}]")
1675            }
1676            &Inst::Fence {} => {
1677                format!("dmb ish")
1678            }
1679            &Inst::Csdb {} => {
1680                format!("csdb")
1681            }
1682            &Inst::FpuMove32 { rd, rn } => {
1683                let rd = pretty_print_vreg_scalar(rd.to_reg(), ScalarSize::Size32);
1684                let rn = pretty_print_vreg_scalar(rn, ScalarSize::Size32);
1685                format!("fmov {rd}, {rn}")
1686            }
1687            &Inst::FpuMove64 { rd, rn } => {
1688                let rd = pretty_print_vreg_scalar(rd.to_reg(), ScalarSize::Size64);
1689                let rn = pretty_print_vreg_scalar(rn, ScalarSize::Size64);
1690                format!("fmov {rd}, {rn}")
1691            }
1692            &Inst::FpuMove128 { rd, rn } => {
1693                let rd = pretty_print_reg(rd.to_reg());
1694                let rn = pretty_print_reg(rn);
1695                format!("mov {rd}.16b, {rn}.16b")
1696            }
1697            &Inst::FpuMoveFromVec { rd, rn, idx, size } => {
1698                let rd = pretty_print_vreg_scalar(rd.to_reg(), size.lane_size());
1699                let rn = pretty_print_vreg_element(rn, idx as usize, size.lane_size());
1700                format!("mov {rd}, {rn}")
1701            }
1702            &Inst::FpuExtend { rd, rn, size } => {
1703                let rd = pretty_print_vreg_scalar(rd.to_reg(), size);
1704                let rn = pretty_print_vreg_scalar(rn, size);
1705                format!("fmov {rd}, {rn}")
1706            }
1707            &Inst::FpuRR {
1708                fpu_op,
1709                size,
1710                rd,
1711                rn,
1712            } => {
1713                let op = match fpu_op {
1714                    FPUOp1::Abs => "fabs",
1715                    FPUOp1::Neg => "fneg",
1716                    FPUOp1::Sqrt => "fsqrt",
1717                    FPUOp1::Cvt32To64 | FPUOp1::Cvt64To32 => "fcvt",
1718                };
1719                let dst_size = match fpu_op {
1720                    FPUOp1::Cvt32To64 => ScalarSize::Size64,
1721                    FPUOp1::Cvt64To32 => ScalarSize::Size32,
1722                    _ => size,
1723                };
1724                let rd = pretty_print_vreg_scalar(rd.to_reg(), dst_size);
1725                let rn = pretty_print_vreg_scalar(rn, size);
1726                format!("{op} {rd}, {rn}")
1727            }
1728            &Inst::FpuRRR {
1729                fpu_op,
1730                size,
1731                rd,
1732                rn,
1733                rm,
1734            } => {
1735                let op = match fpu_op {
1736                    FPUOp2::Add => "fadd",
1737                    FPUOp2::Sub => "fsub",
1738                    FPUOp2::Mul => "fmul",
1739                    FPUOp2::Div => "fdiv",
1740                    FPUOp2::Max => "fmax",
1741                    FPUOp2::Min => "fmin",
1742                };
1743                let rd = pretty_print_vreg_scalar(rd.to_reg(), size);
1744                let rn = pretty_print_vreg_scalar(rn, size);
1745                let rm = pretty_print_vreg_scalar(rm, size);
1746                format!("{op} {rd}, {rn}, {rm}")
1747            }
1748            &Inst::FpuRRI { fpu_op, rd, rn } => {
1749                let (op, imm, vector) = match fpu_op {
1750                    FPUOpRI::UShr32(imm) => ("ushr", imm.pretty_print(0), true),
1751                    FPUOpRI::UShr64(imm) => ("ushr", imm.pretty_print(0), false),
1752                };
1753
1754                let (rd, rn) = if vector {
1755                    (
1756                        pretty_print_vreg_vector(rd.to_reg(), VectorSize::Size32x2),
1757                        pretty_print_vreg_vector(rn, VectorSize::Size32x2),
1758                    )
1759                } else {
1760                    (
1761                        pretty_print_vreg_scalar(rd.to_reg(), ScalarSize::Size64),
1762                        pretty_print_vreg_scalar(rn, ScalarSize::Size64),
1763                    )
1764                };
1765                format!("{op} {rd}, {rn}, {imm}")
1766            }
1767            &Inst::FpuRRIMod { fpu_op, rd, ri, rn } => {
1768                let (op, imm, vector) = match fpu_op {
1769                    FPUOpRIMod::Sli32(imm) => ("sli", imm.pretty_print(0), true),
1770                    FPUOpRIMod::Sli64(imm) => ("sli", imm.pretty_print(0), false),
1771                };
1772
1773                let (rd, ri, rn) = if vector {
1774                    (
1775                        pretty_print_vreg_vector(rd.to_reg(), VectorSize::Size32x2),
1776                        pretty_print_vreg_vector(ri, VectorSize::Size32x2),
1777                        pretty_print_vreg_vector(rn, VectorSize::Size32x2),
1778                    )
1779                } else {
1780                    (
1781                        pretty_print_vreg_scalar(rd.to_reg(), ScalarSize::Size64),
1782                        pretty_print_vreg_scalar(ri, ScalarSize::Size64),
1783                        pretty_print_vreg_scalar(rn, ScalarSize::Size64),
1784                    )
1785                };
1786                format!("{op} {rd}, {ri}, {rn}, {imm}")
1787            }
1788            &Inst::FpuRRRR {
1789                fpu_op,
1790                size,
1791                rd,
1792                rn,
1793                rm,
1794                ra,
1795            } => {
1796                let op = match fpu_op {
1797                    FPUOp3::MAdd => "fmadd",
1798                    FPUOp3::MSub => "fmsub",
1799                    FPUOp3::NMAdd => "fnmadd",
1800                    FPUOp3::NMSub => "fnmsub",
1801                };
1802                let rd = pretty_print_vreg_scalar(rd.to_reg(), size);
1803                let rn = pretty_print_vreg_scalar(rn, size);
1804                let rm = pretty_print_vreg_scalar(rm, size);
1805                let ra = pretty_print_vreg_scalar(ra, size);
1806                format!("{op} {rd}, {rn}, {rm}, {ra}")
1807            }
1808            &Inst::FpuCmp { size, rn, rm } => {
1809                let rn = pretty_print_vreg_scalar(rn, size);
1810                let rm = pretty_print_vreg_scalar(rm, size);
1811                format!("fcmp {rn}, {rm}")
1812            }
1813            &Inst::FpuLoad16 { rd, ref mem, .. } => {
1814                let rd = pretty_print_vreg_scalar(rd.to_reg(), ScalarSize::Size16);
1815                let mem = mem.clone();
1816                let access_ty = self.mem_type().unwrap();
1817                let (mem_str, mem) = mem_finalize_for_show(&mem, access_ty, state);
1818                format!("{mem_str}ldr {rd}, {mem}")
1819            }
1820            &Inst::FpuLoad32 { rd, ref mem, .. } => {
1821                let rd = pretty_print_vreg_scalar(rd.to_reg(), ScalarSize::Size32);
1822                let mem = mem.clone();
1823                let access_ty = self.mem_type().unwrap();
1824                let (mem_str, mem) = mem_finalize_for_show(&mem, access_ty, state);
1825                format!("{mem_str}ldr {rd}, {mem}")
1826            }
1827            &Inst::FpuLoad64 { rd, ref mem, .. } => {
1828                let rd = pretty_print_vreg_scalar(rd.to_reg(), ScalarSize::Size64);
1829                let mem = mem.clone();
1830                let access_ty = self.mem_type().unwrap();
1831                let (mem_str, mem) = mem_finalize_for_show(&mem, access_ty, state);
1832                format!("{mem_str}ldr {rd}, {mem}")
1833            }
1834            &Inst::FpuLoad128 { rd, ref mem, .. } => {
1835                let rd = pretty_print_reg(rd.to_reg());
1836                let rd = "q".to_string() + &rd[1..];
1837                let mem = mem.clone();
1838                let access_ty = self.mem_type().unwrap();
1839                let (mem_str, mem) = mem_finalize_for_show(&mem, access_ty, state);
1840                format!("{mem_str}ldr {rd}, {mem}")
1841            }
1842            &Inst::FpuStore16 { rd, ref mem, .. } => {
1843                let rd = pretty_print_vreg_scalar(rd, ScalarSize::Size16);
1844                let mem = mem.clone();
1845                let access_ty = self.mem_type().unwrap();
1846                let (mem_str, mem) = mem_finalize_for_show(&mem, access_ty, state);
1847                format!("{mem_str}str {rd}, {mem}")
1848            }
1849            &Inst::FpuStore32 { rd, ref mem, .. } => {
1850                let rd = pretty_print_vreg_scalar(rd, ScalarSize::Size32);
1851                let mem = mem.clone();
1852                let access_ty = self.mem_type().unwrap();
1853                let (mem_str, mem) = mem_finalize_for_show(&mem, access_ty, state);
1854                format!("{mem_str}str {rd}, {mem}")
1855            }
1856            &Inst::FpuStore64 { rd, ref mem, .. } => {
1857                let rd = pretty_print_vreg_scalar(rd, ScalarSize::Size64);
1858                let mem = mem.clone();
1859                let access_ty = self.mem_type().unwrap();
1860                let (mem_str, mem) = mem_finalize_for_show(&mem, access_ty, state);
1861                format!("{mem_str}str {rd}, {mem}")
1862            }
1863            &Inst::FpuStore128 { rd, ref mem, .. } => {
1864                let rd = pretty_print_reg(rd);
1865                let rd = "q".to_string() + &rd[1..];
1866                let mem = mem.clone();
1867                let access_ty = self.mem_type().unwrap();
1868                let (mem_str, mem) = mem_finalize_for_show(&mem, access_ty, state);
1869                format!("{mem_str}str {rd}, {mem}")
1870            }
1871            &Inst::FpuLoadP64 {
1872                rt, rt2, ref mem, ..
1873            } => {
1874                let rt = pretty_print_vreg_scalar(rt.to_reg(), ScalarSize::Size64);
1875                let rt2 = pretty_print_vreg_scalar(rt2.to_reg(), ScalarSize::Size64);
1876                let mem = mem.clone();
1877                let mem = mem.pretty_print_default();
1878
1879                format!("ldp {rt}, {rt2}, {mem}")
1880            }
1881            &Inst::FpuStoreP64 {
1882                rt, rt2, ref mem, ..
1883            } => {
1884                let rt = pretty_print_vreg_scalar(rt, ScalarSize::Size64);
1885                let rt2 = pretty_print_vreg_scalar(rt2, ScalarSize::Size64);
1886                let mem = mem.clone();
1887                let mem = mem.pretty_print_default();
1888
1889                format!("stp {rt}, {rt2}, {mem}")
1890            }
1891            &Inst::FpuLoadP128 {
1892                rt, rt2, ref mem, ..
1893            } => {
1894                let rt = pretty_print_vreg_scalar(rt.to_reg(), ScalarSize::Size128);
1895                let rt2 = pretty_print_vreg_scalar(rt2.to_reg(), ScalarSize::Size128);
1896                let mem = mem.clone();
1897                let mem = mem.pretty_print_default();
1898
1899                format!("ldp {rt}, {rt2}, {mem}")
1900            }
1901            &Inst::FpuStoreP128 {
1902                rt, rt2, ref mem, ..
1903            } => {
1904                let rt = pretty_print_vreg_scalar(rt, ScalarSize::Size128);
1905                let rt2 = pretty_print_vreg_scalar(rt2, ScalarSize::Size128);
1906                let mem = mem.clone();
1907                let mem = mem.pretty_print_default();
1908
1909                format!("stp {rt}, {rt2}, {mem}")
1910            }
1911            &Inst::FpuToInt { op, rd, rn } => {
1912                let (op, sizesrc, sizedest) = match op {
1913                    FpuToIntOp::F32ToI32 => ("fcvtzs", ScalarSize::Size32, OperandSize::Size32),
1914                    FpuToIntOp::F32ToU32 => ("fcvtzu", ScalarSize::Size32, OperandSize::Size32),
1915                    FpuToIntOp::F32ToI64 => ("fcvtzs", ScalarSize::Size32, OperandSize::Size64),
1916                    FpuToIntOp::F32ToU64 => ("fcvtzu", ScalarSize::Size32, OperandSize::Size64),
1917                    FpuToIntOp::F64ToI32 => ("fcvtzs", ScalarSize::Size64, OperandSize::Size32),
1918                    FpuToIntOp::F64ToU32 => ("fcvtzu", ScalarSize::Size64, OperandSize::Size32),
1919                    FpuToIntOp::F64ToI64 => ("fcvtzs", ScalarSize::Size64, OperandSize::Size64),
1920                    FpuToIntOp::F64ToU64 => ("fcvtzu", ScalarSize::Size64, OperandSize::Size64),
1921                };
1922                let rd = pretty_print_ireg(rd.to_reg(), sizedest);
1923                let rn = pretty_print_vreg_scalar(rn, sizesrc);
1924                format!("{op} {rd}, {rn}")
1925            }
1926            &Inst::IntToFpu { op, rd, rn } => {
1927                let (op, sizesrc, sizedest) = match op {
1928                    IntToFpuOp::I32ToF32 => ("scvtf", OperandSize::Size32, ScalarSize::Size32),
1929                    IntToFpuOp::U32ToF32 => ("ucvtf", OperandSize::Size32, ScalarSize::Size32),
1930                    IntToFpuOp::I64ToF32 => ("scvtf", OperandSize::Size64, ScalarSize::Size32),
1931                    IntToFpuOp::U64ToF32 => ("ucvtf", OperandSize::Size64, ScalarSize::Size32),
1932                    IntToFpuOp::I32ToF64 => ("scvtf", OperandSize::Size32, ScalarSize::Size64),
1933                    IntToFpuOp::U32ToF64 => ("ucvtf", OperandSize::Size32, ScalarSize::Size64),
1934                    IntToFpuOp::I64ToF64 => ("scvtf", OperandSize::Size64, ScalarSize::Size64),
1935                    IntToFpuOp::U64ToF64 => ("ucvtf", OperandSize::Size64, ScalarSize::Size64),
1936                };
1937                let rd = pretty_print_vreg_scalar(rd.to_reg(), sizedest);
1938                let rn = pretty_print_ireg(rn, sizesrc);
1939                format!("{op} {rd}, {rn}")
1940            }
1941            &Inst::FpuCSel16 { rd, rn, rm, cond } => {
1942                let rd = pretty_print_vreg_scalar(rd.to_reg(), ScalarSize::Size16);
1943                let rn = pretty_print_vreg_scalar(rn, ScalarSize::Size16);
1944                let rm = pretty_print_vreg_scalar(rm, ScalarSize::Size16);
1945                let cond = cond.pretty_print(0);
1946                format!("fcsel {rd}, {rn}, {rm}, {cond}")
1947            }
1948            &Inst::FpuCSel32 { rd, rn, rm, cond } => {
1949                let rd = pretty_print_vreg_scalar(rd.to_reg(), ScalarSize::Size32);
1950                let rn = pretty_print_vreg_scalar(rn, ScalarSize::Size32);
1951                let rm = pretty_print_vreg_scalar(rm, ScalarSize::Size32);
1952                let cond = cond.pretty_print(0);
1953                format!("fcsel {rd}, {rn}, {rm}, {cond}")
1954            }
1955            &Inst::FpuCSel64 { rd, rn, rm, cond } => {
1956                let rd = pretty_print_vreg_scalar(rd.to_reg(), ScalarSize::Size64);
1957                let rn = pretty_print_vreg_scalar(rn, ScalarSize::Size64);
1958                let rm = pretty_print_vreg_scalar(rm, ScalarSize::Size64);
1959                let cond = cond.pretty_print(0);
1960                format!("fcsel {rd}, {rn}, {rm}, {cond}")
1961            }
1962            &Inst::FpuRound { op, rd, rn } => {
1963                let (inst, size) = match op {
1964                    FpuRoundMode::Minus32 => ("frintm", ScalarSize::Size32),
1965                    FpuRoundMode::Minus64 => ("frintm", ScalarSize::Size64),
1966                    FpuRoundMode::Plus32 => ("frintp", ScalarSize::Size32),
1967                    FpuRoundMode::Plus64 => ("frintp", ScalarSize::Size64),
1968                    FpuRoundMode::Zero32 => ("frintz", ScalarSize::Size32),
1969                    FpuRoundMode::Zero64 => ("frintz", ScalarSize::Size64),
1970                    FpuRoundMode::Nearest32 => ("frintn", ScalarSize::Size32),
1971                    FpuRoundMode::Nearest64 => ("frintn", ScalarSize::Size64),
1972                };
1973                let rd = pretty_print_vreg_scalar(rd.to_reg(), size);
1974                let rn = pretty_print_vreg_scalar(rn, size);
1975                format!("{inst} {rd}, {rn}")
1976            }
1977            &Inst::MovToFpu { rd, rn, size } => {
1978                let operand_size = size.operand_size();
1979                let rd = pretty_print_vreg_scalar(rd.to_reg(), size);
1980                let rn = pretty_print_ireg(rn, operand_size);
1981                format!("fmov {rd}, {rn}")
1982            }
1983            &Inst::FpuMoveFPImm { rd, imm, size } => {
1984                let imm = imm.pretty_print(0);
1985                let rd = pretty_print_vreg_scalar(rd.to_reg(), size);
1986
1987                format!("fmov {rd}, {imm}")
1988            }
1989            &Inst::MovToVec {
1990                rd,
1991                ri,
1992                rn,
1993                idx,
1994                size,
1995            } => {
1996                let rd = pretty_print_vreg_element(rd.to_reg(), idx as usize, size.lane_size());
1997                let ri = pretty_print_vreg_element(ri, idx as usize, size.lane_size());
1998                let rn = pretty_print_ireg(rn, size.operand_size());
1999                format!("mov {rd}, {ri}, {rn}")
2000            }
2001            &Inst::MovFromVec { rd, rn, idx, size } => {
2002                let op = match size {
2003                    ScalarSize::Size8 => "umov",
2004                    ScalarSize::Size16 => "umov",
2005                    ScalarSize::Size32 => "mov",
2006                    ScalarSize::Size64 => "mov",
2007                    _ => unimplemented!(),
2008                };
2009                let rd = pretty_print_ireg(rd.to_reg(), size.operand_size());
2010                let rn = pretty_print_vreg_element(rn, idx as usize, size);
2011                format!("{op} {rd}, {rn}")
2012            }
2013            &Inst::MovFromVecSigned {
2014                rd,
2015                rn,
2016                idx,
2017                size,
2018                scalar_size,
2019            } => {
2020                let rd = pretty_print_ireg(rd.to_reg(), scalar_size);
2021                let rn = pretty_print_vreg_element(rn, idx as usize, size.lane_size());
2022                format!("smov {rd}, {rn}")
2023            }
2024            &Inst::VecDup { rd, rn, size } => {
2025                let rd = pretty_print_vreg_vector(rd.to_reg(), size);
2026                let rn = pretty_print_ireg(rn, size.operand_size());
2027                format!("dup {rd}, {rn}")
2028            }
2029            &Inst::VecDupFromFpu { rd, rn, size, lane } => {
2030                let rd = pretty_print_vreg_vector(rd.to_reg(), size);
2031                let rn = pretty_print_vreg_element(rn, lane.into(), size.lane_size());
2032                format!("dup {rd}, {rn}")
2033            }
2034            &Inst::VecDupFPImm { rd, imm, size } => {
2035                let imm = imm.pretty_print(0);
2036                let rd = pretty_print_vreg_vector(rd.to_reg(), size);
2037
2038                format!("fmov {rd}, {imm}")
2039            }
2040            &Inst::VecDupImm {
2041                rd,
2042                imm,
2043                invert,
2044                size,
2045            } => {
2046                let imm = imm.pretty_print(0);
2047                let op = if invert { "mvni" } else { "movi" };
2048                let rd = pretty_print_vreg_vector(rd.to_reg(), size);
2049
2050                format!("{op} {rd}, {imm}")
2051            }
2052            &Inst::VecExtend {
2053                t,
2054                rd,
2055                rn,
2056                high_half,
2057                lane_size,
2058            } => {
2059                let vec64 = VectorSize::from_lane_size(lane_size.narrow(), false);
2060                let vec128 = VectorSize::from_lane_size(lane_size.narrow(), true);
2061                let rd_size = VectorSize::from_lane_size(lane_size, true);
2062                let (op, rn_size) = match (t, high_half) {
2063                    (VecExtendOp::Sxtl, false) => ("sxtl", vec64),
2064                    (VecExtendOp::Sxtl, true) => ("sxtl2", vec128),
2065                    (VecExtendOp::Uxtl, false) => ("uxtl", vec64),
2066                    (VecExtendOp::Uxtl, true) => ("uxtl2", vec128),
2067                };
2068                let rd = pretty_print_vreg_vector(rd.to_reg(), rd_size);
2069                let rn = pretty_print_vreg_vector(rn, rn_size);
2070                format!("{op} {rd}, {rn}")
2071            }
2072            &Inst::VecMovElement {
2073                rd,
2074                ri,
2075                rn,
2076                dest_idx,
2077                src_idx,
2078                size,
2079            } => {
2080                let rd =
2081                    pretty_print_vreg_element(rd.to_reg(), dest_idx as usize, size.lane_size());
2082                let ri = pretty_print_vreg_element(ri, dest_idx as usize, size.lane_size());
2083                let rn = pretty_print_vreg_element(rn, src_idx as usize, size.lane_size());
2084                format!("mov {rd}, {ri}, {rn}")
2085            }
2086            &Inst::VecRRLong {
2087                op,
2088                rd,
2089                rn,
2090                high_half,
2091            } => {
2092                let (op, rd_size, size, suffix) = match (op, high_half) {
2093                    (VecRRLongOp::Fcvtl16, false) => {
2094                        ("fcvtl", VectorSize::Size32x4, VectorSize::Size16x4, "")
2095                    }
2096                    (VecRRLongOp::Fcvtl16, true) => {
2097                        ("fcvtl2", VectorSize::Size32x4, VectorSize::Size16x8, "")
2098                    }
2099                    (VecRRLongOp::Fcvtl32, false) => {
2100                        ("fcvtl", VectorSize::Size64x2, VectorSize::Size32x2, "")
2101                    }
2102                    (VecRRLongOp::Fcvtl32, true) => {
2103                        ("fcvtl2", VectorSize::Size64x2, VectorSize::Size32x4, "")
2104                    }
2105                    (VecRRLongOp::Shll8, false) => {
2106                        ("shll", VectorSize::Size16x8, VectorSize::Size8x8, ", #8")
2107                    }
2108                    (VecRRLongOp::Shll8, true) => {
2109                        ("shll2", VectorSize::Size16x8, VectorSize::Size8x16, ", #8")
2110                    }
2111                    (VecRRLongOp::Shll16, false) => {
2112                        ("shll", VectorSize::Size32x4, VectorSize::Size16x4, ", #16")
2113                    }
2114                    (VecRRLongOp::Shll16, true) => {
2115                        ("shll2", VectorSize::Size32x4, VectorSize::Size16x8, ", #16")
2116                    }
2117                    (VecRRLongOp::Shll32, false) => {
2118                        ("shll", VectorSize::Size64x2, VectorSize::Size32x2, ", #32")
2119                    }
2120                    (VecRRLongOp::Shll32, true) => {
2121                        ("shll2", VectorSize::Size64x2, VectorSize::Size32x4, ", #32")
2122                    }
2123                };
2124                let rd = pretty_print_vreg_vector(rd.to_reg(), rd_size);
2125                let rn = pretty_print_vreg_vector(rn, size);
2126
2127                format!("{op} {rd}, {rn}{suffix}")
2128            }
2129            &Inst::VecRRNarrowLow {
2130                op,
2131                rd,
2132                rn,
2133                lane_size,
2134                ..
2135            }
2136            | &Inst::VecRRNarrowHigh {
2137                op,
2138                rd,
2139                rn,
2140                lane_size,
2141                ..
2142            } => {
2143                let vec64 = VectorSize::from_lane_size(lane_size, false);
2144                let vec128 = VectorSize::from_lane_size(lane_size, true);
2145                let rn_size = VectorSize::from_lane_size(lane_size.widen(), true);
2146                let high_half = match self {
2147                    &Inst::VecRRNarrowLow { .. } => false,
2148                    &Inst::VecRRNarrowHigh { .. } => true,
2149                    _ => unreachable!(),
2150                };
2151                let (op, rd_size) = match (op, high_half) {
2152                    (VecRRNarrowOp::Xtn, false) => ("xtn", vec64),
2153                    (VecRRNarrowOp::Xtn, true) => ("xtn2", vec128),
2154                    (VecRRNarrowOp::Sqxtn, false) => ("sqxtn", vec64),
2155                    (VecRRNarrowOp::Sqxtn, true) => ("sqxtn2", vec128),
2156                    (VecRRNarrowOp::Sqxtun, false) => ("sqxtun", vec64),
2157                    (VecRRNarrowOp::Sqxtun, true) => ("sqxtun2", vec128),
2158                    (VecRRNarrowOp::Uqxtn, false) => ("uqxtn", vec64),
2159                    (VecRRNarrowOp::Uqxtn, true) => ("uqxtn2", vec128),
2160                    (VecRRNarrowOp::Fcvtn, false) => ("fcvtn", vec64),
2161                    (VecRRNarrowOp::Fcvtn, true) => ("fcvtn2", vec128),
2162                };
2163                let rn = pretty_print_vreg_vector(rn, rn_size);
2164                let rd = pretty_print_vreg_vector(rd.to_reg(), rd_size);
2165                let ri = match self {
2166                    &Inst::VecRRNarrowLow { .. } => "".to_string(),
2167                    &Inst::VecRRNarrowHigh { ri, .. } => {
2168                        format!("{}, ", pretty_print_vreg_vector(ri, rd_size))
2169                    }
2170                    _ => unreachable!(),
2171                };
2172
2173                format!("{op} {rd}, {ri}{rn}")
2174            }
2175            &Inst::VecRRPair { op, rd, rn } => {
2176                let op = match op {
2177                    VecPairOp::Addp => "addp",
2178                };
2179                let rd = pretty_print_vreg_scalar(rd.to_reg(), ScalarSize::Size64);
2180                let rn = pretty_print_vreg_vector(rn, VectorSize::Size64x2);
2181
2182                format!("{op} {rd}, {rn}")
2183            }
2184            &Inst::VecRRPairLong { op, rd, rn } => {
2185                let (op, dest, src) = match op {
2186                    VecRRPairLongOp::Saddlp8 => {
2187                        ("saddlp", VectorSize::Size16x8, VectorSize::Size8x16)
2188                    }
2189                    VecRRPairLongOp::Saddlp16 => {
2190                        ("saddlp", VectorSize::Size32x4, VectorSize::Size16x8)
2191                    }
2192                    VecRRPairLongOp::Uaddlp8 => {
2193                        ("uaddlp", VectorSize::Size16x8, VectorSize::Size8x16)
2194                    }
2195                    VecRRPairLongOp::Uaddlp16 => {
2196                        ("uaddlp", VectorSize::Size32x4, VectorSize::Size16x8)
2197                    }
2198                };
2199                let rd = pretty_print_vreg_vector(rd.to_reg(), dest);
2200                let rn = pretty_print_vreg_vector(rn, src);
2201
2202                format!("{op} {rd}, {rn}")
2203            }
2204            &Inst::VecRRR {
2205                rd,
2206                rn,
2207                rm,
2208                alu_op,
2209                size,
2210            } => {
2211                let (op, size) = match alu_op {
2212                    VecALUOp::Sqadd => ("sqadd", size),
2213                    VecALUOp::Uqadd => ("uqadd", size),
2214                    VecALUOp::Sqsub => ("sqsub", size),
2215                    VecALUOp::Uqsub => ("uqsub", size),
2216                    VecALUOp::Cmeq => ("cmeq", size),
2217                    VecALUOp::Cmge => ("cmge", size),
2218                    VecALUOp::Cmgt => ("cmgt", size),
2219                    VecALUOp::Cmhs => ("cmhs", size),
2220                    VecALUOp::Cmhi => ("cmhi", size),
2221                    VecALUOp::Fcmeq => ("fcmeq", size),
2222                    VecALUOp::Fcmgt => ("fcmgt", size),
2223                    VecALUOp::Fcmge => ("fcmge", size),
2224                    VecALUOp::Umaxp => ("umaxp", size),
2225                    VecALUOp::Add => ("add", size),
2226                    VecALUOp::Sub => ("sub", size),
2227                    VecALUOp::Mul => ("mul", size),
2228                    VecALUOp::Sshl => ("sshl", size),
2229                    VecALUOp::Ushl => ("ushl", size),
2230                    VecALUOp::Umin => ("umin", size),
2231                    VecALUOp::Smin => ("smin", size),
2232                    VecALUOp::Umax => ("umax", size),
2233                    VecALUOp::Smax => ("smax", size),
2234                    VecALUOp::Urhadd => ("urhadd", size),
2235                    VecALUOp::Fadd => ("fadd", size),
2236                    VecALUOp::Fsub => ("fsub", size),
2237                    VecALUOp::Fdiv => ("fdiv", size),
2238                    VecALUOp::Fmax => ("fmax", size),
2239                    VecALUOp::Fmin => ("fmin", size),
2240                    VecALUOp::Fmul => ("fmul", size),
2241                    VecALUOp::Addp => ("addp", size),
2242                    VecALUOp::Zip1 => ("zip1", size),
2243                    VecALUOp::Zip2 => ("zip2", size),
2244                    VecALUOp::Sqrdmulh => ("sqrdmulh", size),
2245                    VecALUOp::Uzp1 => ("uzp1", size),
2246                    VecALUOp::Uzp2 => ("uzp2", size),
2247                    VecALUOp::Trn1 => ("trn1", size),
2248                    VecALUOp::Trn2 => ("trn2", size),
2249
2250                    // Lane division does not affect bitwise operations.
2251                    // However, when printing, use 8-bit lane division to conform to ARM formatting.
2252                    VecALUOp::And => ("and", size.as_scalar8_vector()),
2253                    VecALUOp::Bic => ("bic", size.as_scalar8_vector()),
2254                    VecALUOp::Orr => ("orr", size.as_scalar8_vector()),
2255                    VecALUOp::Orn => ("orn", size.as_scalar8_vector()),
2256                    VecALUOp::Eor => ("eor", size.as_scalar8_vector()),
2257                };
2258                let rd = pretty_print_vreg_vector(rd.to_reg(), size);
2259                let rn = pretty_print_vreg_vector(rn, size);
2260                let rm = pretty_print_vreg_vector(rm, size);
2261                format!("{op} {rd}, {rn}, {rm}")
2262            }
2263            &Inst::VecRRRMod {
2264                rd,
2265                ri,
2266                rn,
2267                rm,
2268                alu_op,
2269                size,
2270            } => {
2271                let (op, size) = match alu_op {
2272                    VecALUModOp::Bsl => ("bsl", VectorSize::Size8x16),
2273                    VecALUModOp::Fmla => ("fmla", size),
2274                    VecALUModOp::Fmls => ("fmls", size),
2275                };
2276                let rd = pretty_print_vreg_vector(rd.to_reg(), size);
2277                let ri = pretty_print_vreg_vector(ri, size);
2278                let rn = pretty_print_vreg_vector(rn, size);
2279                let rm = pretty_print_vreg_vector(rm, size);
2280                format!("{op} {rd}, {ri}, {rn}, {rm}")
2281            }
2282            &Inst::VecFmlaElem {
2283                rd,
2284                ri,
2285                rn,
2286                rm,
2287                alu_op,
2288                size,
2289                idx,
2290            } => {
2291                let (op, size) = match alu_op {
2292                    VecALUModOp::Fmla => ("fmla", size),
2293                    VecALUModOp::Fmls => ("fmls", size),
2294                    _ => unreachable!(),
2295                };
2296                let rd = pretty_print_vreg_vector(rd.to_reg(), size);
2297                let ri = pretty_print_vreg_vector(ri, size);
2298                let rn = pretty_print_vreg_vector(rn, size);
2299                let rm = pretty_print_vreg_element(rm, idx.into(), size.lane_size());
2300                format!("{op} {rd}, {ri}, {rn}, {rm}")
2301            }
2302            &Inst::VecRRRLong {
2303                rd,
2304                rn,
2305                rm,
2306                alu_op,
2307                high_half,
2308            } => {
2309                let (op, dest_size, src_size) = match (alu_op, high_half) {
2310                    (VecRRRLongOp::Smull8, false) => {
2311                        ("smull", VectorSize::Size16x8, VectorSize::Size8x8)
2312                    }
2313                    (VecRRRLongOp::Smull8, true) => {
2314                        ("smull2", VectorSize::Size16x8, VectorSize::Size8x16)
2315                    }
2316                    (VecRRRLongOp::Smull16, false) => {
2317                        ("smull", VectorSize::Size32x4, VectorSize::Size16x4)
2318                    }
2319                    (VecRRRLongOp::Smull16, true) => {
2320                        ("smull2", VectorSize::Size32x4, VectorSize::Size16x8)
2321                    }
2322                    (VecRRRLongOp::Smull32, false) => {
2323                        ("smull", VectorSize::Size64x2, VectorSize::Size32x2)
2324                    }
2325                    (VecRRRLongOp::Smull32, true) => {
2326                        ("smull2", VectorSize::Size64x2, VectorSize::Size32x4)
2327                    }
2328                    (VecRRRLongOp::Umull8, false) => {
2329                        ("umull", VectorSize::Size16x8, VectorSize::Size8x8)
2330                    }
2331                    (VecRRRLongOp::Umull8, true) => {
2332                        ("umull2", VectorSize::Size16x8, VectorSize::Size8x16)
2333                    }
2334                    (VecRRRLongOp::Umull16, false) => {
2335                        ("umull", VectorSize::Size32x4, VectorSize::Size16x4)
2336                    }
2337                    (VecRRRLongOp::Umull16, true) => {
2338                        ("umull2", VectorSize::Size32x4, VectorSize::Size16x8)
2339                    }
2340                    (VecRRRLongOp::Umull32, false) => {
2341                        ("umull", VectorSize::Size64x2, VectorSize::Size32x2)
2342                    }
2343                    (VecRRRLongOp::Umull32, true) => {
2344                        ("umull2", VectorSize::Size64x2, VectorSize::Size32x4)
2345                    }
2346                };
2347                let rd = pretty_print_vreg_vector(rd.to_reg(), dest_size);
2348                let rn = pretty_print_vreg_vector(rn, src_size);
2349                let rm = pretty_print_vreg_vector(rm, src_size);
2350                format!("{op} {rd}, {rn}, {rm}")
2351            }
2352            &Inst::VecRRRLongMod {
2353                rd,
2354                ri,
2355                rn,
2356                rm,
2357                alu_op,
2358                high_half,
2359            } => {
2360                let (op, dest_size, src_size) = match (alu_op, high_half) {
2361                    (VecRRRLongModOp::Umlal8, false) => {
2362                        ("umlal", VectorSize::Size16x8, VectorSize::Size8x8)
2363                    }
2364                    (VecRRRLongModOp::Umlal8, true) => {
2365                        ("umlal2", VectorSize::Size16x8, VectorSize::Size8x16)
2366                    }
2367                    (VecRRRLongModOp::Umlal16, false) => {
2368                        ("umlal", VectorSize::Size32x4, VectorSize::Size16x4)
2369                    }
2370                    (VecRRRLongModOp::Umlal16, true) => {
2371                        ("umlal2", VectorSize::Size32x4, VectorSize::Size16x8)
2372                    }
2373                    (VecRRRLongModOp::Umlal32, false) => {
2374                        ("umlal", VectorSize::Size64x2, VectorSize::Size32x2)
2375                    }
2376                    (VecRRRLongModOp::Umlal32, true) => {
2377                        ("umlal2", VectorSize::Size64x2, VectorSize::Size32x4)
2378                    }
2379                };
2380                let rd = pretty_print_vreg_vector(rd.to_reg(), dest_size);
2381                let ri = pretty_print_vreg_vector(ri, dest_size);
2382                let rn = pretty_print_vreg_vector(rn, src_size);
2383                let rm = pretty_print_vreg_vector(rm, src_size);
2384                format!("{op} {rd}, {ri}, {rn}, {rm}")
2385            }
2386            &Inst::VecMisc { op, rd, rn, size } => {
2387                let (op, size, suffix) = match op {
2388                    VecMisc2::Neg => ("neg", size, ""),
2389                    VecMisc2::Abs => ("abs", size, ""),
2390                    VecMisc2::Fabs => ("fabs", size, ""),
2391                    VecMisc2::Fneg => ("fneg", size, ""),
2392                    VecMisc2::Fsqrt => ("fsqrt", size, ""),
2393                    VecMisc2::Rev16 => ("rev16", size, ""),
2394                    VecMisc2::Rev32 => ("rev32", size, ""),
2395                    VecMisc2::Rev64 => ("rev64", size, ""),
2396                    VecMisc2::Fcvtzs => ("fcvtzs", size, ""),
2397                    VecMisc2::Fcvtzu => ("fcvtzu", size, ""),
2398                    VecMisc2::Scvtf => ("scvtf", size, ""),
2399                    VecMisc2::Ucvtf => ("ucvtf", size, ""),
2400                    VecMisc2::Frintn => ("frintn", size, ""),
2401                    VecMisc2::Frintz => ("frintz", size, ""),
2402                    VecMisc2::Frintm => ("frintm", size, ""),
2403                    VecMisc2::Frintp => ("frintp", size, ""),
2404                    VecMisc2::Cnt => ("cnt", size, ""),
2405                    VecMisc2::Cmeq0 => ("cmeq", size, ", #0"),
2406                    VecMisc2::Cmge0 => ("cmge", size, ", #0"),
2407                    VecMisc2::Cmgt0 => ("cmgt", size, ", #0"),
2408                    VecMisc2::Cmle0 => ("cmle", size, ", #0"),
2409                    VecMisc2::Cmlt0 => ("cmlt", size, ", #0"),
2410                    VecMisc2::Fcmeq0 => ("fcmeq", size, ", #0.0"),
2411                    VecMisc2::Fcmge0 => ("fcmge", size, ", #0.0"),
2412                    VecMisc2::Fcmgt0 => ("fcmgt", size, ", #0.0"),
2413                    VecMisc2::Fcmle0 => ("fcmle", size, ", #0.0"),
2414                    VecMisc2::Fcmlt0 => ("fcmlt", size, ", #0.0"),
2415
2416                    // Lane division does not affect bitwise operations.
2417                    // However, when printing, use 8-bit lane division to conform to ARM formatting.
2418                    VecMisc2::Not => ("mvn", size.as_scalar8_vector(), ""),
2419                };
2420                let rd = pretty_print_vreg_vector(rd.to_reg(), size);
2421                let rn = pretty_print_vreg_vector(rn, size);
2422                format!("{op} {rd}, {rn}{suffix}")
2423            }
2424            &Inst::VecLanes { op, rd, rn, size } => {
2425                let op = match op {
2426                    VecLanesOp::Uminv => "uminv",
2427                    VecLanesOp::Addv => "addv",
2428                };
2429                let rd = pretty_print_vreg_scalar(rd.to_reg(), size.lane_size());
2430                let rn = pretty_print_vreg_vector(rn, size);
2431                format!("{op} {rd}, {rn}")
2432            }
2433            &Inst::VecShiftImm {
2434                op,
2435                rd,
2436                rn,
2437                size,
2438                imm,
2439            } => {
2440                let op = match op {
2441                    VecShiftImmOp::Shl => "shl",
2442                    VecShiftImmOp::Ushr => "ushr",
2443                    VecShiftImmOp::Sshr => "sshr",
2444                };
2445                let rd = pretty_print_vreg_vector(rd.to_reg(), size);
2446                let rn = pretty_print_vreg_vector(rn, size);
2447                format!("{op} {rd}, {rn}, #{imm}")
2448            }
2449            &Inst::VecShiftImmMod {
2450                op,
2451                rd,
2452                ri,
2453                rn,
2454                size,
2455                imm,
2456            } => {
2457                let op = match op {
2458                    VecShiftImmModOp::Sli => "sli",
2459                };
2460                let rd = pretty_print_vreg_vector(rd.to_reg(), size);
2461                let ri = pretty_print_vreg_vector(ri, size);
2462                let rn = pretty_print_vreg_vector(rn, size);
2463                format!("{op} {rd}, {ri}, {rn}, #{imm}")
2464            }
2465            &Inst::VecExtract { rd, rn, rm, imm4 } => {
2466                let rd = pretty_print_vreg_vector(rd.to_reg(), VectorSize::Size8x16);
2467                let rn = pretty_print_vreg_vector(rn, VectorSize::Size8x16);
2468                let rm = pretty_print_vreg_vector(rm, VectorSize::Size8x16);
2469                format!("ext {rd}, {rn}, {rm}, #{imm4}")
2470            }
2471            &Inst::VecTbl { rd, rn, rm } => {
2472                let rn = pretty_print_vreg_vector(rn, VectorSize::Size8x16);
2473                let rm = pretty_print_vreg_vector(rm, VectorSize::Size8x16);
2474                let rd = pretty_print_vreg_vector(rd.to_reg(), VectorSize::Size8x16);
2475                format!("tbl {rd}, {{ {rn} }}, {rm}")
2476            }
2477            &Inst::VecTblExt { rd, ri, rn, rm } => {
2478                let rn = pretty_print_vreg_vector(rn, VectorSize::Size8x16);
2479                let rm = pretty_print_vreg_vector(rm, VectorSize::Size8x16);
2480                let rd = pretty_print_vreg_vector(rd.to_reg(), VectorSize::Size8x16);
2481                let ri = pretty_print_vreg_vector(ri, VectorSize::Size8x16);
2482                format!("tbx {rd}, {ri}, {{ {rn} }}, {rm}")
2483            }
2484            &Inst::VecTbl2 { rd, rn, rn2, rm } => {
2485                let rn = pretty_print_vreg_vector(rn, VectorSize::Size8x16);
2486                let rn2 = pretty_print_vreg_vector(rn2, VectorSize::Size8x16);
2487                let rm = pretty_print_vreg_vector(rm, VectorSize::Size8x16);
2488                let rd = pretty_print_vreg_vector(rd.to_reg(), VectorSize::Size8x16);
2489                format!("tbl {rd}, {{ {rn}, {rn2} }}, {rm}")
2490            }
2491            &Inst::VecTbl2Ext {
2492                rd,
2493                ri,
2494                rn,
2495                rn2,
2496                rm,
2497            } => {
2498                let rn = pretty_print_vreg_vector(rn, VectorSize::Size8x16);
2499                let rn2 = pretty_print_vreg_vector(rn2, VectorSize::Size8x16);
2500                let rm = pretty_print_vreg_vector(rm, VectorSize::Size8x16);
2501                let rd = pretty_print_vreg_vector(rd.to_reg(), VectorSize::Size8x16);
2502                let ri = pretty_print_vreg_vector(ri, VectorSize::Size8x16);
2503                format!("tbx {rd}, {ri}, {{ {rn}, {rn2} }}, {rm}")
2504            }
2505            &Inst::VecLoadReplicate { rd, rn, size, .. } => {
2506                let rd = pretty_print_vreg_vector(rd.to_reg(), size);
2507                let rn = pretty_print_reg(rn);
2508
2509                format!("ld1r {{ {rd} }}, [{rn}]")
2510            }
2511            &Inst::VecCSel { rd, rn, rm, cond } => {
2512                let rd = pretty_print_vreg_vector(rd.to_reg(), VectorSize::Size8x16);
2513                let rn = pretty_print_vreg_vector(rn, VectorSize::Size8x16);
2514                let rm = pretty_print_vreg_vector(rm, VectorSize::Size8x16);
2515                let cond = cond.pretty_print(0);
2516                format!("vcsel {rd}, {rn}, {rm}, {cond} (if-then-else diamond)")
2517            }
2518            &Inst::MovToNZCV { rn } => {
2519                let rn = pretty_print_reg(rn);
2520                format!("msr nzcv, {rn}")
2521            }
2522            &Inst::MovFromNZCV { rd } => {
2523                let rd = pretty_print_reg(rd.to_reg());
2524                format!("mrs {rd}, nzcv")
2525            }
2526            &Inst::Extend {
2527                rd,
2528                rn,
2529                signed: false,
2530                from_bits: 1,
2531                ..
2532            } => {
2533                let rd = pretty_print_ireg(rd.to_reg(), OperandSize::Size32);
2534                let rn = pretty_print_ireg(rn, OperandSize::Size32);
2535                format!("and {rd}, {rn}, #1")
2536            }
2537            &Inst::Extend {
2538                rd,
2539                rn,
2540                signed: false,
2541                from_bits: 32,
2542                to_bits: 64,
2543            } => {
2544                // The case of a zero extension from 32 to 64 bits, is implemented
2545                // with a "mov" to a 32-bit (W-reg) dest, because this zeroes
2546                // the top 32 bits.
2547                let rd = pretty_print_ireg(rd.to_reg(), OperandSize::Size32);
2548                let rn = pretty_print_ireg(rn, OperandSize::Size32);
2549                format!("mov {rd}, {rn}")
2550            }
2551            &Inst::Extend {
2552                rd,
2553                rn,
2554                signed,
2555                from_bits,
2556                to_bits,
2557            } => {
2558                assert!(from_bits <= to_bits);
2559                let op = match (signed, from_bits) {
2560                    (false, 8) => "uxtb",
2561                    (true, 8) => "sxtb",
2562                    (false, 16) => "uxth",
2563                    (true, 16) => "sxth",
2564                    (true, 32) => "sxtw",
2565                    (true, _) => "sbfx",
2566                    (false, _) => "ubfx",
2567                };
2568                if op == "sbfx" || op == "ubfx" {
2569                    let dest_size = OperandSize::from_bits(to_bits);
2570                    let rd = pretty_print_ireg(rd.to_reg(), dest_size);
2571                    let rn = pretty_print_ireg(rn, dest_size);
2572                    format!("{op} {rd}, {rn}, #0, #{from_bits}")
2573                } else {
2574                    let dest_size = if signed {
2575                        OperandSize::from_bits(to_bits)
2576                    } else {
2577                        OperandSize::Size32
2578                    };
2579                    let rd = pretty_print_ireg(rd.to_reg(), dest_size);
2580                    let rn = pretty_print_ireg(rn, OperandSize::from_bits(from_bits));
2581                    format!("{op} {rd}, {rn}")
2582                }
2583            }
2584            &Inst::Call { ref info } => {
2585                let try_call = info
2586                    .try_call_info
2587                    .as_ref()
2588                    .map(|tci| pretty_print_try_call(tci))
2589                    .unwrap_or_default();
2590                format!("bl 0{try_call}")
2591            }
2592            &Inst::CallInd { ref info } => {
2593                let rn = pretty_print_reg(info.dest);
2594                let try_call = info
2595                    .try_call_info
2596                    .as_ref()
2597                    .map(|tci| pretty_print_try_call(tci))
2598                    .unwrap_or_default();
2599                format!("blr {rn}{try_call}")
2600            }
2601            &Inst::ReturnCall { ref info } => {
2602                let mut s = format!(
2603                    "return_call {:?} new_stack_arg_size:{}",
2604                    info.dest, info.new_stack_arg_size
2605                );
2606                for ret in &info.uses {
2607                    let preg = pretty_print_reg(ret.preg);
2608                    let vreg = pretty_print_reg(ret.vreg);
2609                    write!(&mut s, " {vreg}={preg}").unwrap();
2610                }
2611                s
2612            }
2613            &Inst::ReturnCallInd { ref info } => {
2614                let callee = pretty_print_reg(info.dest);
2615                let mut s = format!(
2616                    "return_call_ind {callee} new_stack_arg_size:{}",
2617                    info.new_stack_arg_size
2618                );
2619                for ret in &info.uses {
2620                    let preg = pretty_print_reg(ret.preg);
2621                    let vreg = pretty_print_reg(ret.vreg);
2622                    write!(&mut s, " {vreg}={preg}").unwrap();
2623                }
2624                s
2625            }
2626            &Inst::Args { ref args } => {
2627                let mut s = "args".to_string();
2628                for arg in args {
2629                    let preg = pretty_print_reg(arg.preg);
2630                    let def = pretty_print_reg(arg.vreg.to_reg());
2631                    write!(&mut s, " {def}={preg}").unwrap();
2632                }
2633                s
2634            }
2635            &Inst::Rets { ref rets } => {
2636                let mut s = "rets".to_string();
2637                for ret in rets {
2638                    let preg = pretty_print_reg(ret.preg);
2639                    let vreg = pretty_print_reg(ret.vreg);
2640                    write!(&mut s, " {vreg}={preg}").unwrap();
2641                }
2642                s
2643            }
2644            &Inst::Ret {} => "ret".to_string(),
2645            &Inst::AuthenticatedRet { key, is_hint } => {
2646                let key = match key {
2647                    APIKey::AZ => "az",
2648                    APIKey::BZ => "bz",
2649                    APIKey::ASP => "asp",
2650                    APIKey::BSP => "bsp",
2651                };
2652                match is_hint {
2653                    false => format!("reta{key}"),
2654                    true => format!("auti{key} ; ret"),
2655                }
2656            }
2657            &Inst::Jump { ref dest } => {
2658                let dest = dest.pretty_print(0);
2659                format!("b {dest}")
2660            }
2661            &Inst::CondBr {
2662                ref taken,
2663                ref not_taken,
2664                ref kind,
2665            } => {
2666                let taken = taken.pretty_print(0);
2667                let not_taken = not_taken.pretty_print(0);
2668                match kind {
2669                    &CondBrKind::Zero(reg, size) => {
2670                        let reg = pretty_print_reg_sized(reg, size);
2671                        format!("cbz {reg}, {taken} ; b {not_taken}")
2672                    }
2673                    &CondBrKind::NotZero(reg, size) => {
2674                        let reg = pretty_print_reg_sized(reg, size);
2675                        format!("cbnz {reg}, {taken} ; b {not_taken}")
2676                    }
2677                    &CondBrKind::Cond(c) => {
2678                        let c = c.pretty_print(0);
2679                        format!("b.{c} {taken} ; b {not_taken}")
2680                    }
2681                }
2682            }
2683            &Inst::TestBitAndBranch {
2684                kind,
2685                ref taken,
2686                ref not_taken,
2687                rn,
2688                bit,
2689            } => {
2690                let cond = match kind {
2691                    TestBitAndBranchKind::Z => "z",
2692                    TestBitAndBranchKind::NZ => "nz",
2693                };
2694                let taken = taken.pretty_print(0);
2695                let not_taken = not_taken.pretty_print(0);
2696                let rn = pretty_print_reg(rn);
2697                format!("tb{cond} {rn}, #{bit}, {taken} ; b {not_taken}")
2698            }
2699            &Inst::IndirectBr { rn, .. } => {
2700                let rn = pretty_print_reg(rn);
2701                format!("br {rn}")
2702            }
2703            &Inst::Brk => "brk #0xf000".to_string(),
2704            &Inst::Udf { .. } => "udf #0xc11f".to_string(),
2705            &Inst::TrapIf {
2706                ref kind,
2707                trap_code,
2708            } => match kind {
2709                &CondBrKind::Zero(reg, size) => {
2710                    let reg = pretty_print_reg_sized(reg, size);
2711                    format!("cbz {reg}, #trap={trap_code}")
2712                }
2713                &CondBrKind::NotZero(reg, size) => {
2714                    let reg = pretty_print_reg_sized(reg, size);
2715                    format!("cbnz {reg}, #trap={trap_code}")
2716                }
2717                &CondBrKind::Cond(c) => {
2718                    let c = c.pretty_print(0);
2719                    format!("b.{c} #trap={trap_code}")
2720                }
2721            },
2722            &Inst::Adr { rd, off } => {
2723                let rd = pretty_print_reg(rd.to_reg());
2724                format!("adr {rd}, pc+{off}")
2725            }
2726            &Inst::Adrp { rd, off } => {
2727                let rd = pretty_print_reg(rd.to_reg());
2728                // This instruction addresses 4KiB pages, so multiply it by the page size.
2729                let byte_offset = off * 4096;
2730                format!("adrp {rd}, pc+{byte_offset}")
2731            }
2732            &Inst::Word4 { data } => format!("data.i32 {data}"),
2733            &Inst::Word8 { data } => format!("data.i64 {data}"),
2734            &Inst::JTSequence {
2735                default,
2736                ref targets,
2737                ridx,
2738                rtmp1,
2739                rtmp2,
2740                ..
2741            } => {
2742                let ridx = pretty_print_reg(ridx);
2743                let rtmp1 = pretty_print_reg(rtmp1.to_reg());
2744                let rtmp2 = pretty_print_reg(rtmp2.to_reg());
2745                let default_target = BranchTarget::Label(default).pretty_print(0);
2746                format!(
2747                    concat!(
2748                        "b.hs {} ; ",
2749                        "csel {}, xzr, {}, hs ; ",
2750                        "csdb ; ",
2751                        "adr {}, pc+16 ; ",
2752                        "ldrsw {}, [{}, {}, uxtw #2] ; ",
2753                        "add {}, {}, {} ; ",
2754                        "br {} ; ",
2755                        "jt_entries {:?}"
2756                    ),
2757                    default_target,
2758                    rtmp2,
2759                    ridx,
2760                    rtmp1,
2761                    rtmp2,
2762                    rtmp1,
2763                    rtmp2,
2764                    rtmp1,
2765                    rtmp1,
2766                    rtmp2,
2767                    rtmp1,
2768                    targets
2769                )
2770            }
2771            &Inst::LoadExtNameGot { rd, ref name } => {
2772                let rd = pretty_print_reg(rd.to_reg());
2773                format!("load_ext_name_got {rd}, {name:?}")
2774            }
2775            &Inst::LoadExtNameNear {
2776                rd,
2777                ref name,
2778                offset,
2779            } => {
2780                let rd = pretty_print_reg(rd.to_reg());
2781                format!("load_ext_name_near {rd}, {name:?}+{offset}")
2782            }
2783            &Inst::LoadExtNameFar {
2784                rd,
2785                ref name,
2786                offset,
2787            } => {
2788                let rd = pretty_print_reg(rd.to_reg());
2789                format!("load_ext_name_far {rd}, {name:?}+{offset}")
2790            }
2791            &Inst::LoadAddr { rd, ref mem } => {
2792                // TODO: we really should find a better way to avoid duplication of
2793                // this logic between `emit()` and `show_rru()` -- a separate 1-to-N
2794                // expansion stage (i.e., legalization, but without the slow edit-in-place
2795                // of the existing legalization framework).
2796                let mem = mem.clone();
2797                let (mem_insts, mem) = mem_finalize(None, &mem, I8, state);
2798                let mut ret = String::new();
2799                for inst in mem_insts.into_iter() {
2800                    ret.push_str(&inst.print_with_state(&mut EmitState::default()));
2801                }
2802                let (reg, index_reg, offset) = match mem {
2803                    AMode::RegExtended { rn, rm, extendop } => (rn, Some((rm, extendop)), 0),
2804                    AMode::Unscaled { rn, simm9 } => (rn, None, simm9.value()),
2805                    AMode::UnsignedOffset { rn, uimm12 } => (rn, None, uimm12.value() as i32),
2806                    _ => panic!("Unsupported case for LoadAddr: {mem:?}"),
2807                };
2808                let abs_offset = if offset < 0 {
2809                    -offset as u64
2810                } else {
2811                    offset as u64
2812                };
2813                let alu_op = if offset < 0 { ALUOp::Sub } else { ALUOp::Add };
2814
2815                if let Some((idx, extendop)) = index_reg {
2816                    let add = Inst::AluRRRExtend {
2817                        alu_op: ALUOp::Add,
2818                        size: OperandSize::Size64,
2819                        rd,
2820                        rn: reg,
2821                        rm: idx,
2822                        extendop,
2823                    };
2824
2825                    ret.push_str(&add.print_with_state(&mut EmitState::default()));
2826                } else if offset == 0 {
2827                    let mov = Inst::gen_move(rd, reg, I64);
2828                    ret.push_str(&mov.print_with_state(&mut EmitState::default()));
2829                } else if let Some(imm12) = Imm12::maybe_from_u64(abs_offset) {
2830                    let add = Inst::AluRRImm12 {
2831                        alu_op,
2832                        size: OperandSize::Size64,
2833                        rd,
2834                        rn: reg,
2835                        imm12,
2836                    };
2837                    ret.push_str(&add.print_with_state(&mut EmitState::default()));
2838                } else {
2839                    let tmp = writable_spilltmp_reg();
2840                    for inst in Inst::load_constant(tmp, abs_offset).into_iter() {
2841                        ret.push_str(&inst.print_with_state(&mut EmitState::default()));
2842                    }
2843                    let add = Inst::AluRRR {
2844                        alu_op,
2845                        size: OperandSize::Size64,
2846                        rd,
2847                        rn: reg,
2848                        rm: tmp.to_reg(),
2849                    };
2850                    ret.push_str(&add.print_with_state(&mut EmitState::default()));
2851                }
2852                ret
2853            }
2854            &Inst::Paci { key } => {
2855                let key = match key {
2856                    APIKey::AZ => "az",
2857                    APIKey::BZ => "bz",
2858                    APIKey::ASP => "asp",
2859                    APIKey::BSP => "bsp",
2860                };
2861
2862                "paci".to_string() + key
2863            }
2864            &Inst::Xpaclri => "xpaclri".to_string(),
2865            &Inst::Bti { targets } => {
2866                let targets = match targets {
2867                    BranchTargetType::None => "",
2868                    BranchTargetType::C => " c",
2869                    BranchTargetType::J => " j",
2870                    BranchTargetType::JC => " jc",
2871                };
2872
2873                "bti".to_string() + targets
2874            }
2875            &Inst::EmitIsland { needed_space } => format!("emit_island {needed_space}"),
2876
2877            &Inst::ElfTlsGetAddr {
2878                ref symbol,
2879                rd,
2880                tmp,
2881            } => {
2882                let rd = pretty_print_reg(rd.to_reg());
2883                let tmp = pretty_print_reg(tmp.to_reg());
2884                format!("elf_tls_get_addr {}, {}, {}", rd, tmp, symbol.display(None))
2885            }
2886            &Inst::MachOTlsGetAddr { ref symbol, rd } => {
2887                let rd = pretty_print_reg(rd.to_reg());
2888                format!("macho_tls_get_addr {}, {}", rd, symbol.display(None))
2889            }
2890            &Inst::Unwind { ref inst } => {
2891                format!("unwind {inst:?}")
2892            }
2893            &Inst::DummyUse { reg } => {
2894                let reg = pretty_print_reg(reg);
2895                format!("dummy_use {reg}")
2896            }
2897            &Inst::LabelAddress { dst, label } => {
2898                let dst = pretty_print_reg(dst.to_reg());
2899                format!("label_address {dst}, {label:?}")
2900            }
2901            &Inst::SequencePoint {} => {
2902                format!("sequence_point")
2903            }
2904            &Inst::StackProbeLoop { start, end, step } => {
2905                let start = pretty_print_reg(start.to_reg());
2906                let end = pretty_print_reg(end);
2907                let step = step.pretty_print(0);
2908                format!("stack_probe_loop {start}, {end}, {step}")
2909            }
2910        }
2911    }
2912}
2913
2914//=============================================================================
2915// Label fixups and jump veneers.
2916
2917/// Different forms of label references for different instruction formats.
2918#[derive(Clone, Copy, Debug, PartialEq, Eq)]
2919pub enum LabelUse {
2920    /// 14-bit branch offset (conditional branches). PC-rel, offset is imm <<
2921    /// 2. Immediate is 14 signed bits, in bits 18:5. Used by tbz and tbnz.
2922    Branch14,
2923    /// 19-bit branch offset (conditional branches). PC-rel, offset is imm << 2. Immediate is 19
2924    /// signed bits, in bits 23:5. Used by cbz, cbnz, b.cond.
2925    Branch19,
2926    /// 26-bit branch offset (unconditional branches). PC-rel, offset is imm << 2. Immediate is 26
2927    /// signed bits, in bits 25:0. Used by b, bl.
2928    Branch26,
2929    /// 19-bit offset for LDR (load literal). PC-rel, offset is imm << 2. Immediate is 19 signed bits,
2930    /// in bits 23:5.
2931    Ldr19,
2932    /// 21-bit offset for ADR (get address of label). PC-rel, offset is not shifted. Immediate is
2933    /// 21 signed bits, with high 19 bits in bits 23:5 and low 2 bits in bits 30:29.
2934    Adr21,
2935    /// 32-bit PC relative constant offset (from address of constant itself),
2936    /// signed. Used in jump tables.
2937    PCRel32,
2938}
2939
2940impl MachInstLabelUse for LabelUse {
2941    /// Alignment for veneer code. Every AArch64 instruction must be 4-byte-aligned.
2942    const ALIGN: CodeOffset = 4;
2943
2944    /// Maximum PC-relative range (positive), inclusive.
2945    fn max_pos_range(self) -> CodeOffset {
2946        match self {
2947            // N-bit immediate, left-shifted by 2, for (N+2) bits of total
2948            // range. Signed, so +2^(N+1) from zero. Likewise for two other
2949            // shifted cases below.
2950            LabelUse::Branch14 => (1 << 15) - 1,
2951            LabelUse::Branch19 => (1 << 20) - 1,
2952            LabelUse::Branch26 => (1 << 27) - 1,
2953            LabelUse::Ldr19 => (1 << 20) - 1,
2954            // Adr does not shift its immediate, so the 21-bit immediate gives 21 bits of total
2955            // range.
2956            LabelUse::Adr21 => (1 << 20) - 1,
2957            LabelUse::PCRel32 => 0x7fffffff,
2958        }
2959    }
2960
2961    /// Maximum PC-relative range (negative).
2962    fn max_neg_range(self) -> CodeOffset {
2963        // All forms are twos-complement signed offsets, so negative limit is one more than
2964        // positive limit.
2965        self.max_pos_range() + 1
2966    }
2967
2968    /// Size of window into code needed to do the patch.
2969    fn patch_size(self) -> CodeOffset {
2970        // Patch is on one instruction only for all of these label reference types.
2971        4
2972    }
2973
2974    /// Perform the patch.
2975    fn patch(self, buffer: &mut [u8], use_offset: CodeOffset, label_offset: CodeOffset) {
2976        let pc_rel = (label_offset as i64) - (use_offset as i64);
2977        debug_assert!(pc_rel <= self.max_pos_range() as i64);
2978        debug_assert!(pc_rel >= -(self.max_neg_range() as i64));
2979        let pc_rel = pc_rel as u32;
2980        let insn_word = u32::from_le_bytes([buffer[0], buffer[1], buffer[2], buffer[3]]);
2981        let mask = match self {
2982            LabelUse::Branch14 => 0x0007ffe0, // bits 18..5 inclusive
2983            LabelUse::Branch19 => 0x00ffffe0, // bits 23..5 inclusive
2984            LabelUse::Branch26 => 0x03ffffff, // bits 25..0 inclusive
2985            LabelUse::Ldr19 => 0x00ffffe0,    // bits 23..5 inclusive
2986            LabelUse::Adr21 => 0x60ffffe0,    // bits 30..29, 25..5 inclusive
2987            LabelUse::PCRel32 => 0xffffffff,
2988        };
2989        let pc_rel_shifted = match self {
2990            LabelUse::Adr21 | LabelUse::PCRel32 => pc_rel,
2991            _ => {
2992                debug_assert!(pc_rel & 3 == 0);
2993                pc_rel >> 2
2994            }
2995        };
2996        let pc_rel_inserted = match self {
2997            LabelUse::Branch14 => (pc_rel_shifted & 0x3fff) << 5,
2998            LabelUse::Branch19 | LabelUse::Ldr19 => (pc_rel_shifted & 0x7ffff) << 5,
2999            LabelUse::Branch26 => pc_rel_shifted & 0x3ffffff,
3000            // Note: the *low* two bits of offset are put in the
3001            // *high* bits (30, 29).
3002            LabelUse::Adr21 => (pc_rel_shifted & 0x1ffffc) << 3 | (pc_rel_shifted & 3) << 29,
3003            LabelUse::PCRel32 => pc_rel_shifted,
3004        };
3005        let is_add = match self {
3006            LabelUse::PCRel32 => true,
3007            _ => false,
3008        };
3009        let insn_word = if is_add {
3010            insn_word.wrapping_add(pc_rel_inserted)
3011        } else {
3012            (insn_word & !mask) | pc_rel_inserted
3013        };
3014        buffer[0..4].clone_from_slice(&u32::to_le_bytes(insn_word));
3015    }
3016
3017    /// Is a veneer supported for this label reference type?
3018    fn supports_veneer(self) -> bool {
3019        match self {
3020            LabelUse::Branch14 | LabelUse::Branch19 => true, // veneer is a Branch26
3021            LabelUse::Branch26 => true,                      // veneer is a PCRel32
3022            _ => false,
3023        }
3024    }
3025
3026    /// How large is the veneer, if supported?
3027    fn veneer_size(self) -> CodeOffset {
3028        match self {
3029            LabelUse::Branch14 | LabelUse::Branch19 => 4,
3030            LabelUse::Branch26 => 20,
3031            _ => unreachable!(),
3032        }
3033    }
3034
3035    fn worst_case_veneer_size() -> CodeOffset {
3036        20
3037    }
3038
3039    /// Generate a veneer into the buffer, given that this veneer is at `veneer_offset`, and return
3040    /// an offset and label-use for the veneer's use of the original label.
3041    fn generate_veneer(
3042        self,
3043        buffer: &mut [u8],
3044        veneer_offset: CodeOffset,
3045    ) -> (CodeOffset, LabelUse) {
3046        match self {
3047            LabelUse::Branch14 | LabelUse::Branch19 => {
3048                // veneer is a Branch26 (unconditional branch). Just encode directly here -- don't
3049                // bother with constructing an Inst.
3050                let insn_word = 0b000101 << 26;
3051                buffer[0..4].clone_from_slice(&u32::to_le_bytes(insn_word));
3052                (veneer_offset, LabelUse::Branch26)
3053            }
3054
3055            // This is promoting a 26-bit call/jump to a 32-bit call/jump to
3056            // get a further range. This jump translates to a jump to a
3057            // relative location based on the address of the constant loaded
3058            // from here.
3059            //
3060            // If this path is taken from a call instruction then caller-saved
3061            // registers are available (minus arguments), so x16/x17 are
3062            // available. Otherwise for intra-function jumps we also reserve
3063            // x16/x17 as spill-style registers. In both cases these are
3064            // available for us to use.
3065            LabelUse::Branch26 => {
3066                let tmp1 = regs::spilltmp_reg();
3067                let tmp1_w = regs::writable_spilltmp_reg();
3068                let tmp2 = regs::tmp2_reg();
3069                let tmp2_w = regs::writable_tmp2_reg();
3070                // ldrsw x16, 16
3071                let ldr = emit::enc_ldst_imm19(0b1001_1000, 16 / 4, tmp1);
3072                // adr x17, 12
3073                let adr = emit::enc_adr(12, tmp2_w);
3074                // add x16, x16, x17
3075                let add = emit::enc_arith_rrr(0b10001011_000, 0, tmp1_w, tmp1, tmp2);
3076                // br x16
3077                let br = emit::enc_br(tmp1);
3078                buffer[0..4].clone_from_slice(&u32::to_le_bytes(ldr));
3079                buffer[4..8].clone_from_slice(&u32::to_le_bytes(adr));
3080                buffer[8..12].clone_from_slice(&u32::to_le_bytes(add));
3081                buffer[12..16].clone_from_slice(&u32::to_le_bytes(br));
3082                // the 4-byte signed immediate we'll load is after these
3083                // instructions, 16-bytes in.
3084                (veneer_offset + 16, LabelUse::PCRel32)
3085            }
3086
3087            _ => panic!("Unsupported label-reference type for veneer generation!"),
3088        }
3089    }
3090
3091    fn from_reloc(reloc: Reloc, addend: Addend) -> Option<LabelUse> {
3092        match (reloc, addend) {
3093            (Reloc::Arm64Call, 0) => Some(LabelUse::Branch26),
3094            _ => None,
3095        }
3096    }
3097}
3098
3099#[cfg(test)]
3100mod tests {
3101    use super::*;
3102
3103    #[test]
3104    fn inst_size_test() {
3105        // This test will help with unintentionally growing the size
3106        // of the Inst enum.
3107        let expected = if cfg!(target_pointer_width = "32") && !cfg!(target_arch = "arm") {
3108            28
3109        } else {
3110            32
3111        };
3112        assert_eq!(expected, std::mem::size_of::<Inst>());
3113    }
3114}