Skip to main content

cranelift_codegen/isa/aarch64/inst/
mod.rs

1//! This module defines aarch64-specific machine instruction types.
2
3use crate::binemit::{Addend, CodeOffset, Reloc};
4use crate::ir::types::{F16, F32, F64, F128, I8, I8X16, I16, I32, I64, I128};
5use crate::ir::{MemFlags, Type, types};
6use crate::isa::{CallConv, FunctionAlignment};
7use crate::machinst::*;
8use crate::{CodegenError, CodegenResult, settings};
9
10use crate::machinst::{PrettyPrint, Reg, RegClass, Writable};
11
12use alloc::string::{String, ToString};
13use alloc::vec::Vec;
14use core::fmt::Write;
15use core::slice;
16use smallvec::{SmallVec, smallvec};
17
18pub(crate) mod regs;
19pub(crate) use self::regs::*;
20pub mod imms;
21pub use self::imms::*;
22pub mod args;
23pub use self::args::*;
24pub mod emit;
25pub(crate) use self::emit::*;
26use crate::isa::aarch64::abi::AArch64MachineDeps;
27
28pub(crate) mod unwind;
29
30#[cfg(test)]
31mod emit_tests;
32
33//=============================================================================
34// Instructions (top level): definition
35
36pub use crate::isa::aarch64::lower::isle::generated_code::{
37    ALUOp, ALUOp3, AMode, APIKey, AtomicRMWLoopOp, AtomicRMWOp, BitOp, BranchTargetType, FPUOp1,
38    FPUOp2, FPUOp3, FpuRoundMode, FpuToIntOp, IntToFpuOp, MInst as Inst, MoveWideOp, VecALUModOp,
39    VecALUOp, VecExtendOp, VecLanesOp, VecMisc2, VecPairOp, VecRRLongOp, VecRRNarrowOp,
40    VecRRPairLongOp, VecRRRLongModOp, VecRRRLongOp, VecShiftImmModOp, VecShiftImmOp,
41};
42
43/// A floating-point unit (FPU) operation with two args, a register and an immediate.
44#[derive(Copy, Clone, Debug)]
45pub enum FPUOpRI {
46    /// Unsigned right shift. Rd = Rn << #imm
47    UShr32(FPURightShiftImm),
48    /// Unsigned right shift. Rd = Rn << #imm
49    UShr64(FPURightShiftImm),
50}
51
52/// A floating-point unit (FPU) operation with two args, a register and
53/// an immediate that modifies its dest (so takes that input value as a
54/// separate virtual register).
55#[derive(Copy, Clone, Debug)]
56pub enum FPUOpRIMod {
57    /// Shift left and insert. Rd |= Rn << #imm
58    Sli32(FPULeftShiftImm),
59    /// Shift left and insert. Rd |= Rn << #imm
60    Sli64(FPULeftShiftImm),
61}
62
63impl BitOp {
64    /// Get the assembly mnemonic for this opcode.
65    pub fn op_str(&self) -> &'static str {
66        match self {
67            BitOp::RBit => "rbit",
68            BitOp::Clz => "clz",
69            BitOp::Cls => "cls",
70            BitOp::Rev16 => "rev16",
71            BitOp::Rev32 => "rev32",
72            BitOp::Rev64 => "rev64",
73        }
74    }
75}
76
77/// Additional information for `return_call[_ind]` instructions, left out of
78/// line to lower the size of the `Inst` enum.
79#[derive(Clone, Debug)]
80pub struct ReturnCallInfo<T> {
81    /// Where this call is going to
82    pub dest: T,
83    /// Arguments to the call instruction.
84    pub uses: CallArgList,
85    /// The size of the new stack frame's stack arguments. This is necessary
86    /// for copying the frame over our current frame. It must already be
87    /// allocated on the stack.
88    pub new_stack_arg_size: u32,
89    /// API key to use to restore the return address, if any.
90    pub key: Option<APIKey>,
91    /// Whether pointer-auth return addresses are signed even without frame setup.
92    pub sign_return_address_all: bool,
93}
94
95fn count_zero_half_words(mut value: u64, num_half_words: u8) -> usize {
96    let mut count = 0;
97    for _ in 0..num_half_words {
98        if value & 0xffff == 0 {
99            count += 1;
100        }
101        value >>= 16;
102    }
103
104    count
105}
106
107impl Inst {
108    /// Create an instruction that loads a constant, using one of several options (MOVZ, MOVN,
109    /// logical immediate, or constant pool).
110    pub fn load_constant(rd: Writable<Reg>, value: u64) -> SmallVec<[Inst; 4]> {
111        // NB: this is duplicated in `lower/isle.rs` and `inst.isle` right now,
112        // if modifications are made here before this is deleted after moving to
113        // ISLE then those locations should be updated as well.
114
115        if let Some(imm) = MoveWideConst::maybe_from_u64(value) {
116            // 16-bit immediate (shifted by 0, 16, 32 or 48 bits) in MOVZ
117            smallvec![Inst::MovWide {
118                op: MoveWideOp::MovZ,
119                rd,
120                imm,
121                size: OperandSize::Size64
122            }]
123        } else if let Some(imm) = MoveWideConst::maybe_from_u64(!value) {
124            // 16-bit immediate (shifted by 0, 16, 32 or 48 bits) in MOVN
125            smallvec![Inst::MovWide {
126                op: MoveWideOp::MovN,
127                rd,
128                imm,
129                size: OperandSize::Size64
130            }]
131        } else if let Some(imml) = ImmLogic::maybe_from_u64(value, I64) {
132            // Weird logical-instruction immediate in ORI using zero register
133            smallvec![Inst::AluRRImmLogic {
134                alu_op: ALUOp::Orr,
135                size: OperandSize::Size64,
136                rd,
137                rn: zero_reg(),
138                imml,
139            }]
140        } else {
141            let mut insts = smallvec![];
142
143            // If the top 32 bits are zero, use 32-bit `mov` operations.
144            let (num_half_words, size, negated) = if value >> 32 == 0 {
145                (2, OperandSize::Size32, (!value << 32) >> 32)
146            } else {
147                (4, OperandSize::Size64, !value)
148            };
149
150            // If the number of 0xffff half words is greater than the number of 0x0000 half words
151            // it is more efficient to use `movn` for the first instruction.
152            let first_is_inverted = count_zero_half_words(negated, num_half_words)
153                > count_zero_half_words(value, num_half_words);
154
155            // Either 0xffff or 0x0000 half words can be skipped, depending on the first
156            // instruction used.
157            let ignored_halfword = if first_is_inverted { 0xffff } else { 0 };
158
159            let halfwords: SmallVec<[_; 4]> = (0..num_half_words)
160                .filter_map(|i| {
161                    let imm16 = (value >> (16 * i)) & 0xffff;
162                    if imm16 == ignored_halfword {
163                        None
164                    } else {
165                        Some((i, imm16))
166                    }
167                })
168                .collect();
169
170            let mut prev_result = None;
171            for (i, imm16) in halfwords {
172                let shift = i * 16;
173
174                if let Some(rn) = prev_result {
175                    let imm = MoveWideConst::maybe_with_shift(imm16 as u16, shift).unwrap();
176                    insts.push(Inst::MovK { rd, rn, imm, size });
177                } else {
178                    if first_is_inverted {
179                        let imm =
180                            MoveWideConst::maybe_with_shift(((!imm16) & 0xffff) as u16, shift)
181                                .unwrap();
182                        insts.push(Inst::MovWide {
183                            op: MoveWideOp::MovN,
184                            rd,
185                            imm,
186                            size,
187                        });
188                    } else {
189                        let imm = MoveWideConst::maybe_with_shift(imm16 as u16, shift).unwrap();
190                        insts.push(Inst::MovWide {
191                            op: MoveWideOp::MovZ,
192                            rd,
193                            imm,
194                            size,
195                        });
196                    }
197                }
198
199                prev_result = Some(rd.to_reg());
200            }
201
202            assert!(prev_result.is_some());
203
204            insts
205        }
206    }
207
208    /// Generic constructor for a load (zero-extending where appropriate).
209    pub fn gen_load(into_reg: Writable<Reg>, mem: AMode, ty: Type, flags: MemFlags) -> Inst {
210        match ty {
211            I8 => Inst::ULoad8 {
212                rd: into_reg,
213                mem,
214                flags,
215            },
216            I16 => Inst::ULoad16 {
217                rd: into_reg,
218                mem,
219                flags,
220            },
221            I32 => Inst::ULoad32 {
222                rd: into_reg,
223                mem,
224                flags,
225            },
226            I64 => Inst::ULoad64 {
227                rd: into_reg,
228                mem,
229                flags,
230            },
231            _ => {
232                if ty.is_vector() || ty.is_float() {
233                    let bits = ty_bits(ty);
234                    let rd = into_reg;
235
236                    match bits {
237                        128 => Inst::FpuLoad128 { rd, mem, flags },
238                        64 => Inst::FpuLoad64 { rd, mem, flags },
239                        32 => Inst::FpuLoad32 { rd, mem, flags },
240                        16 => Inst::FpuLoad16 { rd, mem, flags },
241                        _ => unimplemented!("gen_load({})", ty),
242                    }
243                } else {
244                    unimplemented!("gen_load({})", ty);
245                }
246            }
247        }
248    }
249
250    /// Generic constructor for a store.
251    pub fn gen_store(mem: AMode, from_reg: Reg, ty: Type, flags: MemFlags) -> Inst {
252        match ty {
253            I8 => Inst::Store8 {
254                rd: from_reg,
255                mem,
256                flags,
257            },
258            I16 => Inst::Store16 {
259                rd: from_reg,
260                mem,
261                flags,
262            },
263            I32 => Inst::Store32 {
264                rd: from_reg,
265                mem,
266                flags,
267            },
268            I64 => Inst::Store64 {
269                rd: from_reg,
270                mem,
271                flags,
272            },
273            _ => {
274                if ty.is_vector() || ty.is_float() {
275                    let bits = ty_bits(ty);
276                    let rd = from_reg;
277
278                    match bits {
279                        128 => Inst::FpuStore128 { rd, mem, flags },
280                        64 => Inst::FpuStore64 { rd, mem, flags },
281                        32 => Inst::FpuStore32 { rd, mem, flags },
282                        16 => Inst::FpuStore16 { rd, mem, flags },
283                        _ => unimplemented!("gen_store({})", ty),
284                    }
285                } else {
286                    unimplemented!("gen_store({})", ty);
287                }
288            }
289        }
290    }
291
292    /// What type does this load or store instruction access in memory? When
293    /// uimm12 encoding is used, the size of this type is the amount that
294    /// immediate offsets are scaled by.
295    pub fn mem_type(&self) -> Option<Type> {
296        match self {
297            Inst::ULoad8 { .. } => Some(I8),
298            Inst::SLoad8 { .. } => Some(I8),
299            Inst::ULoad16 { .. } => Some(I16),
300            Inst::SLoad16 { .. } => Some(I16),
301            Inst::ULoad32 { .. } => Some(I32),
302            Inst::SLoad32 { .. } => Some(I32),
303            Inst::ULoad64 { .. } => Some(I64),
304            Inst::FpuLoad16 { .. } => Some(F16),
305            Inst::FpuLoad32 { .. } => Some(F32),
306            Inst::FpuLoad64 { .. } => Some(F64),
307            Inst::FpuLoad128 { .. } => Some(I8X16),
308            Inst::Store8 { .. } => Some(I8),
309            Inst::Store16 { .. } => Some(I16),
310            Inst::Store32 { .. } => Some(I32),
311            Inst::Store64 { .. } => Some(I64),
312            Inst::FpuStore16 { .. } => Some(F16),
313            Inst::FpuStore32 { .. } => Some(F32),
314            Inst::FpuStore64 { .. } => Some(F64),
315            Inst::FpuStore128 { .. } => Some(I8X16),
316            _ => None,
317        }
318    }
319}
320
321//=============================================================================
322// Instructions: get_regs
323
324fn memarg_operands(memarg: &mut AMode, collector: &mut impl OperandVisitor) {
325    match memarg {
326        AMode::Unscaled { rn, .. } | AMode::UnsignedOffset { rn, .. } => {
327            collector.reg_use(rn);
328        }
329        AMode::RegReg { rn, rm, .. }
330        | AMode::RegScaled { rn, rm, .. }
331        | AMode::RegScaledExtended { rn, rm, .. }
332        | AMode::RegExtended { rn, rm, .. } => {
333            collector.reg_use(rn);
334            collector.reg_use(rm);
335        }
336        AMode::Label { .. } => {}
337        AMode::SPPreIndexed { .. } | AMode::SPPostIndexed { .. } => {}
338        AMode::FPOffset { .. } | AMode::IncomingArg { .. } => {}
339        AMode::SPOffset { .. } | AMode::SlotOffset { .. } => {}
340        AMode::RegOffset { rn, .. } => {
341            collector.reg_use(rn);
342        }
343        AMode::Const { .. } => {}
344    }
345}
346
347fn pairmemarg_operands(pairmemarg: &mut PairAMode, collector: &mut impl OperandVisitor) {
348    match pairmemarg {
349        PairAMode::SignedOffset { reg, .. } => {
350            collector.reg_use(reg);
351        }
352        PairAMode::SPPreIndexed { .. } | PairAMode::SPPostIndexed { .. } => {}
353    }
354}
355
356fn aarch64_get_operands(inst: &mut Inst, collector: &mut impl OperandVisitor) {
357    match inst {
358        Inst::AluRRR { rd, rn, rm, .. } => {
359            collector.reg_def(rd);
360            collector.reg_use(rn);
361            collector.reg_use(rm);
362        }
363        Inst::AluRRRR { rd, rn, rm, ra, .. } => {
364            collector.reg_def(rd);
365            collector.reg_use(rn);
366            collector.reg_use(rm);
367            collector.reg_use(ra);
368        }
369        Inst::AluRRImm12 { rd, rn, .. } => {
370            collector.reg_def(rd);
371            collector.reg_use(rn);
372        }
373        Inst::AluRRImmLogic { rd, rn, .. } => {
374            collector.reg_def(rd);
375            collector.reg_use(rn);
376        }
377        Inst::AluRRImmShift { rd, rn, .. } => {
378            collector.reg_def(rd);
379            collector.reg_use(rn);
380        }
381        Inst::AluRRRShift { rd, rn, rm, .. } => {
382            collector.reg_def(rd);
383            collector.reg_use(rn);
384            collector.reg_use(rm);
385        }
386        Inst::AluRRRExtend { rd, rn, rm, .. } => {
387            collector.reg_def(rd);
388            collector.reg_use(rn);
389            collector.reg_use(rm);
390        }
391        Inst::BitRR { rd, rn, .. } => {
392            collector.reg_def(rd);
393            collector.reg_use(rn);
394        }
395        Inst::ULoad8 { rd, mem, .. }
396        | Inst::SLoad8 { rd, mem, .. }
397        | Inst::ULoad16 { rd, mem, .. }
398        | Inst::SLoad16 { rd, mem, .. }
399        | Inst::ULoad32 { rd, mem, .. }
400        | Inst::SLoad32 { rd, mem, .. }
401        | Inst::ULoad64 { rd, mem, .. } => {
402            collector.reg_def(rd);
403            memarg_operands(mem, collector);
404        }
405        Inst::Store8 { rd, mem, .. }
406        | Inst::Store16 { rd, mem, .. }
407        | Inst::Store32 { rd, mem, .. }
408        | Inst::Store64 { rd, mem, .. } => {
409            collector.reg_use(rd);
410            memarg_operands(mem, collector);
411        }
412        Inst::StoreP64 { rt, rt2, mem, .. } => {
413            collector.reg_use(rt);
414            collector.reg_use(rt2);
415            pairmemarg_operands(mem, collector);
416        }
417        Inst::LoadP64 { rt, rt2, mem, .. } => {
418            collector.reg_def(rt);
419            collector.reg_def(rt2);
420            pairmemarg_operands(mem, collector);
421        }
422        Inst::Mov { rd, rm, .. } => {
423            collector.reg_def(rd);
424            collector.reg_use(rm);
425        }
426        Inst::MovFromPReg { rd, rm } => {
427            debug_assert!(rd.to_reg().is_virtual());
428            collector.reg_def(rd);
429            collector.reg_fixed_nonallocatable(*rm);
430        }
431        Inst::MovToPReg { rd, rm } => {
432            debug_assert!(rm.is_virtual());
433            collector.reg_fixed_nonallocatable(*rd);
434            collector.reg_use(rm);
435        }
436        Inst::MovK { rd, rn, .. } => {
437            collector.reg_use(rn);
438            collector.reg_reuse_def(rd, 0); // `rn` == `rd`.
439        }
440        Inst::MovWide { rd, .. } => {
441            collector.reg_def(rd);
442        }
443        Inst::CSel { rd, rn, rm, .. } => {
444            collector.reg_def(rd);
445            collector.reg_use(rn);
446            collector.reg_use(rm);
447        }
448        Inst::CSNeg { rd, rn, rm, .. } => {
449            collector.reg_def(rd);
450            collector.reg_use(rn);
451            collector.reg_use(rm);
452        }
453        Inst::CSet { rd, .. } | Inst::CSetm { rd, .. } => {
454            collector.reg_def(rd);
455        }
456        Inst::CCmp { rn, rm, .. } => {
457            collector.reg_use(rn);
458            collector.reg_use(rm);
459        }
460        Inst::CCmpImm { rn, .. } => {
461            collector.reg_use(rn);
462        }
463        Inst::AtomicRMWLoop {
464            op,
465            addr,
466            operand,
467            oldval,
468            scratch1,
469            scratch2,
470            ..
471        } => {
472            collector.reg_fixed_use(addr, xreg(25));
473            collector.reg_fixed_use(operand, xreg(26));
474            collector.reg_fixed_def(oldval, xreg(27));
475            collector.reg_fixed_def(scratch1, xreg(24));
476            if *op != AtomicRMWLoopOp::Xchg {
477                collector.reg_fixed_def(scratch2, xreg(28));
478            }
479        }
480        Inst::AtomicRMW { rs, rt, rn, .. } => {
481            collector.reg_use(rs);
482            collector.reg_def(rt);
483            collector.reg_use(rn);
484        }
485        Inst::AtomicCAS { rd, rs, rt, rn, .. } => {
486            collector.reg_reuse_def(rd, 1); // reuse `rs`.
487            collector.reg_use(rs);
488            collector.reg_use(rt);
489            collector.reg_use(rn);
490        }
491        Inst::AtomicCASLoop {
492            addr,
493            expected,
494            replacement,
495            oldval,
496            scratch,
497            ..
498        } => {
499            collector.reg_fixed_use(addr, xreg(25));
500            collector.reg_fixed_use(expected, xreg(26));
501            collector.reg_fixed_use(replacement, xreg(28));
502            collector.reg_fixed_def(oldval, xreg(27));
503            collector.reg_fixed_def(scratch, xreg(24));
504        }
505        Inst::LoadAcquire { rt, rn, .. } => {
506            collector.reg_use(rn);
507            collector.reg_def(rt);
508        }
509        Inst::StoreRelease { rt, rn, .. } => {
510            collector.reg_use(rn);
511            collector.reg_use(rt);
512        }
513        Inst::Fence {} | Inst::Csdb {} => {}
514        Inst::FpuMove32 { rd, rn } => {
515            collector.reg_def(rd);
516            collector.reg_use(rn);
517        }
518        Inst::FpuMove64 { rd, rn } => {
519            collector.reg_def(rd);
520            collector.reg_use(rn);
521        }
522        Inst::FpuMove128 { rd, rn } => {
523            collector.reg_def(rd);
524            collector.reg_use(rn);
525        }
526        Inst::FpuMoveFromVec { rd, rn, .. } => {
527            collector.reg_def(rd);
528            collector.reg_use(rn);
529        }
530        Inst::FpuExtend { rd, rn, .. } => {
531            collector.reg_def(rd);
532            collector.reg_use(rn);
533        }
534        Inst::FpuRR { rd, rn, .. } => {
535            collector.reg_def(rd);
536            collector.reg_use(rn);
537        }
538        Inst::FpuRRR { rd, rn, rm, .. } => {
539            collector.reg_def(rd);
540            collector.reg_use(rn);
541            collector.reg_use(rm);
542        }
543        Inst::FpuRRI { rd, rn, .. } => {
544            collector.reg_def(rd);
545            collector.reg_use(rn);
546        }
547        Inst::FpuRRIMod { rd, ri, rn, .. } => {
548            collector.reg_reuse_def(rd, 1); // reuse `ri`.
549            collector.reg_use(ri);
550            collector.reg_use(rn);
551        }
552        Inst::FpuRRRR { rd, rn, rm, ra, .. } => {
553            collector.reg_def(rd);
554            collector.reg_use(rn);
555            collector.reg_use(rm);
556            collector.reg_use(ra);
557        }
558        Inst::VecMisc { rd, rn, .. } => {
559            collector.reg_def(rd);
560            collector.reg_use(rn);
561        }
562
563        Inst::VecLanes { rd, rn, .. } => {
564            collector.reg_def(rd);
565            collector.reg_use(rn);
566        }
567        Inst::VecShiftImm { rd, rn, .. } => {
568            collector.reg_def(rd);
569            collector.reg_use(rn);
570        }
571        Inst::VecShiftImmMod { rd, ri, rn, .. } => {
572            collector.reg_reuse_def(rd, 1); // `rd` == `ri`.
573            collector.reg_use(ri);
574            collector.reg_use(rn);
575        }
576        Inst::VecExtract { rd, rn, rm, .. } => {
577            collector.reg_def(rd);
578            collector.reg_use(rn);
579            collector.reg_use(rm);
580        }
581        Inst::VecTbl { rd, rn, rm } => {
582            collector.reg_use(rn);
583            collector.reg_use(rm);
584            collector.reg_def(rd);
585        }
586        Inst::VecTblExt { rd, ri, rn, rm } => {
587            collector.reg_use(rn);
588            collector.reg_use(rm);
589            collector.reg_reuse_def(rd, 3); // `rd` == `ri`.
590            collector.reg_use(ri);
591        }
592
593        Inst::VecTbl2 { rd, rn, rn2, rm } => {
594            // Constrain to v30 / v31 so that we satisfy the "adjacent
595            // registers" constraint without use of pinned vregs in
596            // lowering.
597            collector.reg_fixed_use(rn, vreg(30));
598            collector.reg_fixed_use(rn2, vreg(31));
599            collector.reg_use(rm);
600            collector.reg_def(rd);
601        }
602        Inst::VecTbl2Ext {
603            rd,
604            ri,
605            rn,
606            rn2,
607            rm,
608        } => {
609            // Constrain to v30 / v31 so that we satisfy the "adjacent
610            // registers" constraint without use of pinned vregs in
611            // lowering.
612            collector.reg_fixed_use(rn, vreg(30));
613            collector.reg_fixed_use(rn2, vreg(31));
614            collector.reg_use(rm);
615            collector.reg_reuse_def(rd, 4); // `rd` == `ri`.
616            collector.reg_use(ri);
617        }
618        Inst::VecLoadReplicate { rd, rn, .. } => {
619            collector.reg_def(rd);
620            collector.reg_use(rn);
621        }
622        Inst::VecCSel { rd, rn, rm, .. } => {
623            collector.reg_def(rd);
624            collector.reg_use(rn);
625            collector.reg_use(rm);
626        }
627        Inst::FpuCmp { rn, rm, .. } => {
628            collector.reg_use(rn);
629            collector.reg_use(rm);
630        }
631        Inst::FpuLoad16 { rd, mem, .. } => {
632            collector.reg_def(rd);
633            memarg_operands(mem, collector);
634        }
635        Inst::FpuLoad32 { rd, mem, .. } => {
636            collector.reg_def(rd);
637            memarg_operands(mem, collector);
638        }
639        Inst::FpuLoad64 { rd, mem, .. } => {
640            collector.reg_def(rd);
641            memarg_operands(mem, collector);
642        }
643        Inst::FpuLoad128 { rd, mem, .. } => {
644            collector.reg_def(rd);
645            memarg_operands(mem, collector);
646        }
647        Inst::FpuStore16 { rd, mem, .. } => {
648            collector.reg_use(rd);
649            memarg_operands(mem, collector);
650        }
651        Inst::FpuStore32 { rd, mem, .. } => {
652            collector.reg_use(rd);
653            memarg_operands(mem, collector);
654        }
655        Inst::FpuStore64 { rd, mem, .. } => {
656            collector.reg_use(rd);
657            memarg_operands(mem, collector);
658        }
659        Inst::FpuStore128 { rd, mem, .. } => {
660            collector.reg_use(rd);
661            memarg_operands(mem, collector);
662        }
663        Inst::FpuLoadP64 { rt, rt2, mem, .. } => {
664            collector.reg_def(rt);
665            collector.reg_def(rt2);
666            pairmemarg_operands(mem, collector);
667        }
668        Inst::FpuStoreP64 { rt, rt2, mem, .. } => {
669            collector.reg_use(rt);
670            collector.reg_use(rt2);
671            pairmemarg_operands(mem, collector);
672        }
673        Inst::FpuLoadP128 { rt, rt2, mem, .. } => {
674            collector.reg_def(rt);
675            collector.reg_def(rt2);
676            pairmemarg_operands(mem, collector);
677        }
678        Inst::FpuStoreP128 { rt, rt2, mem, .. } => {
679            collector.reg_use(rt);
680            collector.reg_use(rt2);
681            pairmemarg_operands(mem, collector);
682        }
683        Inst::FpuToInt { rd, rn, .. } => {
684            collector.reg_def(rd);
685            collector.reg_use(rn);
686        }
687        Inst::IntToFpu { rd, rn, .. } => {
688            collector.reg_def(rd);
689            collector.reg_use(rn);
690        }
691        Inst::FpuCSel16 { rd, rn, rm, .. }
692        | Inst::FpuCSel32 { rd, rn, rm, .. }
693        | Inst::FpuCSel64 { rd, rn, rm, .. } => {
694            collector.reg_def(rd);
695            collector.reg_use(rn);
696            collector.reg_use(rm);
697        }
698        Inst::FpuRound { rd, rn, .. } => {
699            collector.reg_def(rd);
700            collector.reg_use(rn);
701        }
702        Inst::MovToFpu { rd, rn, .. } => {
703            collector.reg_def(rd);
704            collector.reg_use(rn);
705        }
706        Inst::FpuMoveFPImm { rd, .. } => {
707            collector.reg_def(rd);
708        }
709        Inst::MovToVec { rd, ri, rn, .. } => {
710            collector.reg_reuse_def(rd, 1); // `rd` == `ri`.
711            collector.reg_use(ri);
712            collector.reg_use(rn);
713        }
714        Inst::MovFromVec { rd, rn, .. } | Inst::MovFromVecSigned { rd, rn, .. } => {
715            collector.reg_def(rd);
716            collector.reg_use(rn);
717        }
718        Inst::VecDup { rd, rn, .. } => {
719            collector.reg_def(rd);
720            collector.reg_use(rn);
721        }
722        Inst::VecDupFromFpu { rd, rn, .. } => {
723            collector.reg_def(rd);
724            collector.reg_use(rn);
725        }
726        Inst::VecDupFPImm { rd, .. } => {
727            collector.reg_def(rd);
728        }
729        Inst::VecDupImm { rd, .. } => {
730            collector.reg_def(rd);
731        }
732        Inst::VecExtend { rd, rn, .. } => {
733            collector.reg_def(rd);
734            collector.reg_use(rn);
735        }
736        Inst::VecMovElement { rd, ri, rn, .. } => {
737            collector.reg_reuse_def(rd, 1); // `rd` == `ri`.
738            collector.reg_use(ri);
739            collector.reg_use(rn);
740        }
741        Inst::VecRRLong { rd, rn, .. } => {
742            collector.reg_def(rd);
743            collector.reg_use(rn);
744        }
745        Inst::VecRRNarrowLow { rd, rn, .. } => {
746            collector.reg_use(rn);
747            collector.reg_def(rd);
748        }
749        Inst::VecRRNarrowHigh { rd, ri, rn, .. } => {
750            collector.reg_use(rn);
751            collector.reg_reuse_def(rd, 2); // `rd` == `ri`.
752            collector.reg_use(ri);
753        }
754        Inst::VecRRPair { rd, rn, .. } => {
755            collector.reg_def(rd);
756            collector.reg_use(rn);
757        }
758        Inst::VecRRRLong { rd, rn, rm, .. } => {
759            collector.reg_def(rd);
760            collector.reg_use(rn);
761            collector.reg_use(rm);
762        }
763        Inst::VecRRRLongMod { rd, ri, rn, rm, .. } => {
764            collector.reg_reuse_def(rd, 1); // `rd` == `ri`.
765            collector.reg_use(ri);
766            collector.reg_use(rn);
767            collector.reg_use(rm);
768        }
769        Inst::VecRRPairLong { rd, rn, .. } => {
770            collector.reg_def(rd);
771            collector.reg_use(rn);
772        }
773        Inst::VecRRR { rd, rn, rm, .. } => {
774            collector.reg_def(rd);
775            collector.reg_use(rn);
776            collector.reg_use(rm);
777        }
778        Inst::VecRRRMod { rd, ri, rn, rm, .. } | Inst::VecFmlaElem { rd, ri, rn, rm, .. } => {
779            collector.reg_reuse_def(rd, 1); // `rd` == `ri`.
780            collector.reg_use(ri);
781            collector.reg_use(rn);
782            collector.reg_use(rm);
783        }
784        Inst::MovToNZCV { rn } => {
785            collector.reg_use(rn);
786        }
787        Inst::MovFromNZCV { rd } => {
788            collector.reg_def(rd);
789        }
790        Inst::Extend { rd, rn, .. } => {
791            collector.reg_def(rd);
792            collector.reg_use(rn);
793        }
794        Inst::Args { args } => {
795            for ArgPair { vreg, preg } in args {
796                collector.reg_fixed_def(vreg, *preg);
797            }
798        }
799        Inst::Rets { rets } => {
800            for RetPair { vreg, preg } in rets {
801                collector.reg_fixed_use(vreg, *preg);
802            }
803        }
804        Inst::Ret { .. } | Inst::AuthenticatedRet { .. } => {}
805        Inst::Jump { .. } => {}
806        Inst::Call { info, .. } => {
807            let CallInfo { uses, defs, .. } = &mut **info;
808            for CallArgPair { vreg, preg } in uses {
809                collector.reg_fixed_use(vreg, *preg);
810            }
811            for CallRetPair { vreg, location } in defs {
812                match location {
813                    RetLocation::Reg(preg, ..) => collector.reg_fixed_def(vreg, *preg),
814                    RetLocation::Stack(..) => collector.any_def(vreg),
815                }
816            }
817            collector.reg_clobbers(info.clobbers);
818            if let Some(try_call_info) = &mut info.try_call_info {
819                try_call_info.collect_operands(collector);
820            }
821        }
822        Inst::CallInd { info, .. } => {
823            let CallInfo {
824                dest, uses, defs, ..
825            } = &mut **info;
826            collector.reg_use(dest);
827            for CallArgPair { vreg, preg } in uses {
828                collector.reg_fixed_use(vreg, *preg);
829            }
830            for CallRetPair { vreg, location } in defs {
831                match location {
832                    RetLocation::Reg(preg, ..) => collector.reg_fixed_def(vreg, *preg),
833                    RetLocation::Stack(..) => collector.any_def(vreg),
834                }
835            }
836            collector.reg_clobbers(info.clobbers);
837            if let Some(try_call_info) = &mut info.try_call_info {
838                try_call_info.collect_operands(collector);
839            }
840        }
841        Inst::ReturnCall { info } => {
842            for CallArgPair { vreg, preg } in &mut info.uses {
843                collector.reg_fixed_use(vreg, *preg);
844            }
845        }
846        Inst::ReturnCallInd { info } => {
847            // TODO(https://github.com/bytecodealliance/regalloc2/issues/145):
848            // This shouldn't be a fixed register constraint, but it's not clear how to pick a
849            // register that won't be clobbered by the callee-save restore code emitted with a
850            // return_call_indirect.
851            collector.reg_fixed_use(&mut info.dest, xreg(1));
852            for CallArgPair { vreg, preg } in &mut info.uses {
853                collector.reg_fixed_use(vreg, *preg);
854            }
855        }
856        Inst::CondBr { kind, .. } => match kind {
857            CondBrKind::Zero(rt, _) | CondBrKind::NotZero(rt, _) => collector.reg_use(rt),
858            CondBrKind::Cond(_) => {}
859        },
860        Inst::TestBitAndBranch { rn, .. } => {
861            collector.reg_use(rn);
862        }
863        Inst::IndirectBr { rn, .. } => {
864            collector.reg_use(rn);
865        }
866        Inst::Nop0 | Inst::Nop4 => {}
867        Inst::Brk => {}
868        Inst::Udf { .. } => {}
869        Inst::TrapIf { kind, .. } => match kind {
870            CondBrKind::Zero(rt, _) | CondBrKind::NotZero(rt, _) => collector.reg_use(rt),
871            CondBrKind::Cond(_) => {}
872        },
873        Inst::Adr { rd, .. } | Inst::Adrp { rd, .. } => {
874            collector.reg_def(rd);
875        }
876        Inst::Word4 { .. } | Inst::Word8 { .. } => {}
877        Inst::JTSequence {
878            ridx, rtmp1, rtmp2, ..
879        } => {
880            collector.reg_use(ridx);
881            collector.reg_early_def(rtmp1);
882            collector.reg_early_def(rtmp2);
883        }
884        Inst::LoadExtNameGot { rd, .. }
885        | Inst::LoadExtNameNear { rd, .. }
886        | Inst::LoadExtNameFar { rd, .. } => {
887            collector.reg_def(rd);
888        }
889        Inst::LoadAddr { rd, mem } => {
890            collector.reg_def(rd);
891            memarg_operands(mem, collector);
892        }
893        Inst::Paci { .. } | Inst::Xpaclri => {
894            // Neither LR nor SP is an allocatable register, so there is no need
895            // to do anything.
896        }
897        Inst::Bti { .. } => {}
898
899        Inst::ElfTlsGetAddr { rd, tmp, .. } => {
900            // TLSDESC has a very neat calling convention. It is required to preserve
901            // all registers except x0 and x30. X30 is non allocatable in cranelift since
902            // its the link register.
903            //
904            // Additionally we need a second register as a temporary register for the
905            // TLSDESC sequence. This register can be any register other than x0 (and x30).
906            collector.reg_fixed_def(rd, regs::xreg(0));
907            collector.reg_early_def(tmp);
908        }
909        Inst::MachOTlsGetAddr { rd, .. } => {
910            collector.reg_fixed_def(rd, regs::xreg(0));
911            let mut clobbers =
912                AArch64MachineDeps::get_regs_clobbered_by_call(CallConv::AppleAarch64, false);
913            clobbers.remove(regs::xreg_preg(0));
914            collector.reg_clobbers(clobbers);
915        }
916        Inst::Unwind { .. } => {}
917        Inst::EmitIsland { .. } => {}
918        Inst::DummyUse { reg } => {
919            collector.reg_use(reg);
920        }
921        Inst::LabelAddress { dst, .. } => {
922            collector.reg_def(dst);
923        }
924        Inst::SequencePoint { .. } => {}
925        Inst::StackProbeLoop { start, end, .. } => {
926            collector.reg_early_def(start);
927            collector.reg_use(end);
928        }
929    }
930}
931
932//=============================================================================
933// Instructions: misc functions and external interface
934
935impl MachInst for Inst {
936    type ABIMachineSpec = AArch64MachineDeps;
937    type LabelUse = LabelUse;
938
939    // "CLIF" in hex, to make the trap recognizable during
940    // debugging.
941    const TRAP_OPCODE: &'static [u8] = &0xc11f_u32.to_le_bytes();
942
943    fn get_operands(&mut self, collector: &mut impl OperandVisitor) {
944        aarch64_get_operands(self, collector);
945    }
946
947    fn is_move(&self) -> Option<(Writable<Reg>, Reg)> {
948        match self {
949            &Inst::Mov {
950                size: OperandSize::Size64,
951                rd,
952                rm,
953            } => Some((rd, rm)),
954            &Inst::FpuMove64 { rd, rn } => Some((rd, rn)),
955            &Inst::FpuMove128 { rd, rn } => Some((rd, rn)),
956            _ => None,
957        }
958    }
959
960    fn is_included_in_clobbers(&self) -> bool {
961        let (caller, callee, is_exception) = match self {
962            Inst::Args { .. } => return false,
963            Inst::Call { info } => (
964                info.caller_conv,
965                info.callee_conv,
966                info.try_call_info.is_some(),
967            ),
968            Inst::CallInd { info } => (
969                info.caller_conv,
970                info.callee_conv,
971                info.try_call_info.is_some(),
972            ),
973            _ => return true,
974        };
975
976        // We exclude call instructions from the clobber-set when they are calls
977        // from caller to callee that both clobber the same register (such as
978        // using the same or similar ABIs). Such calls cannot possibly force any
979        // new registers to be saved in the prologue, because anything that the
980        // callee clobbers, the caller is also allowed to clobber. This both
981        // saves work and enables us to more precisely follow the
982        // half-caller-save, half-callee-save SysV ABI for some vector
983        // registers.
984        //
985        // See the note in [crate::isa::aarch64::abi::is_caller_save_reg] for
986        // more information on this ABI-implementation hack.
987        let caller_clobbers = AArch64MachineDeps::get_regs_clobbered_by_call(caller, false);
988        let callee_clobbers = AArch64MachineDeps::get_regs_clobbered_by_call(callee, is_exception);
989
990        let mut all_clobbers = caller_clobbers;
991        all_clobbers.union_from(callee_clobbers);
992        all_clobbers != caller_clobbers
993    }
994
995    fn is_trap(&self) -> bool {
996        match self {
997            Self::Udf { .. } => true,
998            _ => false,
999        }
1000    }
1001
1002    fn is_args(&self) -> bool {
1003        match self {
1004            Self::Args { .. } => true,
1005            _ => false,
1006        }
1007    }
1008
1009    fn call_type(&self) -> CallType {
1010        match self {
1011            Inst::Call { .. }
1012            | Inst::CallInd { .. }
1013            | Inst::ElfTlsGetAddr { .. }
1014            | Inst::MachOTlsGetAddr { .. } => CallType::Regular,
1015
1016            Inst::ReturnCall { .. } | Inst::ReturnCallInd { .. } => CallType::TailCall,
1017
1018            _ => CallType::None,
1019        }
1020    }
1021
1022    fn is_term(&self) -> MachTerminator {
1023        match self {
1024            &Inst::Rets { .. } => MachTerminator::Ret,
1025            &Inst::ReturnCall { .. } | &Inst::ReturnCallInd { .. } => MachTerminator::RetCall,
1026            &Inst::Jump { .. } => MachTerminator::Branch,
1027            &Inst::CondBr { .. } => MachTerminator::Branch,
1028            &Inst::TestBitAndBranch { .. } => MachTerminator::Branch,
1029            &Inst::IndirectBr { .. } => MachTerminator::Branch,
1030            &Inst::JTSequence { .. } => MachTerminator::Branch,
1031            &Inst::Call { ref info } if info.try_call_info.is_some() => MachTerminator::Branch,
1032            &Inst::CallInd { ref info } if info.try_call_info.is_some() => MachTerminator::Branch,
1033            _ => MachTerminator::None,
1034        }
1035    }
1036
1037    fn is_mem_access(&self) -> bool {
1038        match self {
1039            &Inst::ULoad8 { .. }
1040            | &Inst::SLoad8 { .. }
1041            | &Inst::ULoad16 { .. }
1042            | &Inst::SLoad16 { .. }
1043            | &Inst::ULoad32 { .. }
1044            | &Inst::SLoad32 { .. }
1045            | &Inst::ULoad64 { .. }
1046            | &Inst::LoadP64 { .. }
1047            | &Inst::FpuLoad16 { .. }
1048            | &Inst::FpuLoad32 { .. }
1049            | &Inst::FpuLoad64 { .. }
1050            | &Inst::FpuLoad128 { .. }
1051            | &Inst::FpuLoadP64 { .. }
1052            | &Inst::FpuLoadP128 { .. }
1053            | &Inst::Store8 { .. }
1054            | &Inst::Store16 { .. }
1055            | &Inst::Store32 { .. }
1056            | &Inst::Store64 { .. }
1057            | &Inst::StoreP64 { .. }
1058            | &Inst::FpuStore16 { .. }
1059            | &Inst::FpuStore32 { .. }
1060            | &Inst::FpuStore64 { .. }
1061            | &Inst::FpuStore128 { .. } => true,
1062            // TODO: verify this carefully
1063            _ => false,
1064        }
1065    }
1066
1067    fn gen_move(to_reg: Writable<Reg>, from_reg: Reg, ty: Type) -> Inst {
1068        let bits = ty.bits();
1069
1070        assert!(bits <= 128);
1071        assert!(to_reg.to_reg().class() == from_reg.class());
1072        match from_reg.class() {
1073            RegClass::Int => Inst::Mov {
1074                size: OperandSize::Size64,
1075                rd: to_reg,
1076                rm: from_reg,
1077            },
1078            RegClass::Float => {
1079                if bits > 64 {
1080                    Inst::FpuMove128 {
1081                        rd: to_reg,
1082                        rn: from_reg,
1083                    }
1084                } else {
1085                    Inst::FpuMove64 {
1086                        rd: to_reg,
1087                        rn: from_reg,
1088                    }
1089                }
1090            }
1091            RegClass::Vector => unreachable!(),
1092        }
1093    }
1094
1095    fn is_safepoint(&self) -> bool {
1096        match self {
1097            Inst::Call { .. } | Inst::CallInd { .. } => true,
1098            _ => false,
1099        }
1100    }
1101
1102    fn gen_dummy_use(reg: Reg) -> Inst {
1103        Inst::DummyUse { reg }
1104    }
1105
1106    fn gen_nop(preferred_size: usize) -> Inst {
1107        if preferred_size == 0 {
1108            return Inst::Nop0;
1109        }
1110        // We can't give a NOP (or any insn) < 4 bytes.
1111        assert!(preferred_size >= 4);
1112        Inst::Nop4
1113    }
1114
1115    fn gen_nop_units() -> Vec<Vec<u8>> {
1116        vec![vec![0x1f, 0x20, 0x03, 0xd5]]
1117    }
1118
1119    fn rc_for_type(ty: Type) -> CodegenResult<(&'static [RegClass], &'static [Type])> {
1120        match ty {
1121            I8 => Ok((&[RegClass::Int], &[I8])),
1122            I16 => Ok((&[RegClass::Int], &[I16])),
1123            I32 => Ok((&[RegClass::Int], &[I32])),
1124            I64 => Ok((&[RegClass::Int], &[I64])),
1125            F16 => Ok((&[RegClass::Float], &[F16])),
1126            F32 => Ok((&[RegClass::Float], &[F32])),
1127            F64 => Ok((&[RegClass::Float], &[F64])),
1128            F128 => Ok((&[RegClass::Float], &[F128])),
1129            I128 => Ok((&[RegClass::Int, RegClass::Int], &[I64, I64])),
1130            _ if ty.is_vector() && ty.bits() <= 128 => {
1131                let types = &[types::I8X2, types::I8X4, types::I8X8, types::I8X16];
1132                Ok((
1133                    &[RegClass::Float],
1134                    slice::from_ref(&types[ty.bytes().ilog2() as usize - 1]),
1135                ))
1136            }
1137            _ if ty.is_dynamic_vector() => Ok((&[RegClass::Float], &[I8X16])),
1138            _ => Err(CodegenError::Unsupported(format!(
1139                "Unexpected SSA-value type: {ty}"
1140            ))),
1141        }
1142    }
1143
1144    fn canonical_type_for_rc(rc: RegClass) -> Type {
1145        match rc {
1146            RegClass::Float => types::I8X16,
1147            RegClass::Int => types::I64,
1148            RegClass::Vector => unreachable!(),
1149        }
1150    }
1151
1152    fn gen_jump(target: MachLabel) -> Inst {
1153        Inst::Jump {
1154            dest: BranchTarget::Label(target),
1155        }
1156    }
1157
1158    fn worst_case_size() -> CodeOffset {
1159        // The maximum size, in bytes, of any `Inst`'s emitted code. We have at least one case of
1160        // an 8-instruction sequence (saturating int-to-float conversions) with three embedded
1161        // 64-bit f64 constants.
1162        //
1163        // Note that inline jump-tables handle island/pool insertion separately, so we do not need
1164        // to account for them here (otherwise the worst case would be 2^31 * 4, clearly not
1165        // feasible for other reasons).
1166        44
1167    }
1168
1169    fn ref_type_regclass(_: &settings::Flags) -> RegClass {
1170        RegClass::Int
1171    }
1172
1173    fn gen_block_start(
1174        is_indirect_branch_target: bool,
1175        is_forward_edge_cfi_enabled: bool,
1176    ) -> Option<Self> {
1177        if is_indirect_branch_target && is_forward_edge_cfi_enabled {
1178            Some(Inst::Bti {
1179                targets: BranchTargetType::J,
1180            })
1181        } else {
1182            None
1183        }
1184    }
1185
1186    fn function_alignment() -> FunctionAlignment {
1187        // We use 32-byte alignment for performance reasons, but for correctness
1188        // we would only need 4-byte alignment.
1189        FunctionAlignment {
1190            minimum: 4,
1191            preferred: 32,
1192        }
1193    }
1194}
1195
1196//=============================================================================
1197// Pretty-printing of instructions.
1198
1199fn mem_finalize_for_show(mem: &AMode, access_ty: Type, state: &EmitState) -> (String, String) {
1200    let (mem_insts, mem) = mem_finalize(None, mem, access_ty, state);
1201    let mut mem_str = mem_insts
1202        .into_iter()
1203        .map(|inst| inst.print_with_state(&mut EmitState::default()))
1204        .collect::<Vec<_>>()
1205        .join(" ; ");
1206    if !mem_str.is_empty() {
1207        mem_str += " ; ";
1208    }
1209
1210    let mem = mem.pretty_print(access_ty.bytes() as u8);
1211    (mem_str, mem)
1212}
1213
1214fn pretty_print_try_call(info: &TryCallInfo) -> String {
1215    format!(
1216        "; b {:?}; catch [{}]",
1217        info.continuation,
1218        info.pretty_print_dests()
1219    )
1220}
1221
1222impl Inst {
1223    fn print_with_state(&self, state: &mut EmitState) -> String {
1224        fn op_name(alu_op: ALUOp) -> &'static str {
1225            match alu_op {
1226                ALUOp::Add => "add",
1227                ALUOp::Sub => "sub",
1228                ALUOp::Orr => "orr",
1229                ALUOp::And => "and",
1230                ALUOp::AndS => "ands",
1231                ALUOp::Eor => "eor",
1232                ALUOp::AddS => "adds",
1233                ALUOp::SubS => "subs",
1234                ALUOp::SMulH => "smulh",
1235                ALUOp::UMulH => "umulh",
1236                ALUOp::SDiv => "sdiv",
1237                ALUOp::UDiv => "udiv",
1238                ALUOp::AndNot => "bic",
1239                ALUOp::OrrNot => "orn",
1240                ALUOp::EorNot => "eon",
1241                ALUOp::Extr => "extr",
1242                ALUOp::Lsr => "lsr",
1243                ALUOp::Asr => "asr",
1244                ALUOp::Lsl => "lsl",
1245                ALUOp::Adc => "adc",
1246                ALUOp::AdcS => "adcs",
1247                ALUOp::Sbc => "sbc",
1248                ALUOp::SbcS => "sbcs",
1249            }
1250        }
1251
1252        match self {
1253            &Inst::Nop0 => "nop-zero-len".to_string(),
1254            &Inst::Nop4 => "nop".to_string(),
1255            &Inst::AluRRR {
1256                alu_op,
1257                size,
1258                rd,
1259                rn,
1260                rm,
1261            } => {
1262                let op = op_name(alu_op);
1263                let rd = pretty_print_ireg(rd.to_reg(), size);
1264                let rn = pretty_print_ireg(rn, size);
1265                let rm = pretty_print_ireg(rm, size);
1266                format!("{op} {rd}, {rn}, {rm}")
1267            }
1268            &Inst::AluRRRR {
1269                alu_op,
1270                size,
1271                rd,
1272                rn,
1273                rm,
1274                ra,
1275            } => {
1276                let (op, da_size) = match alu_op {
1277                    ALUOp3::MAdd => ("madd", size),
1278                    ALUOp3::MSub => ("msub", size),
1279                    ALUOp3::UMAddL => ("umaddl", OperandSize::Size64),
1280                    ALUOp3::SMAddL => ("smaddl", OperandSize::Size64),
1281                };
1282                let rd = pretty_print_ireg(rd.to_reg(), da_size);
1283                let rn = pretty_print_ireg(rn, size);
1284                let rm = pretty_print_ireg(rm, size);
1285                let ra = pretty_print_ireg(ra, da_size);
1286
1287                format!("{op} {rd}, {rn}, {rm}, {ra}")
1288            }
1289            &Inst::AluRRImm12 {
1290                alu_op,
1291                size,
1292                rd,
1293                rn,
1294                ref imm12,
1295            } => {
1296                let op = op_name(alu_op);
1297                let rd = pretty_print_ireg(rd.to_reg(), size);
1298                let rn = pretty_print_ireg(rn, size);
1299
1300                if imm12.bits == 0 && alu_op == ALUOp::Add && size.is64() {
1301                    // special-case MOV (used for moving into SP).
1302                    format!("mov {rd}, {rn}")
1303                } else {
1304                    let imm12 = imm12.pretty_print(0);
1305                    format!("{op} {rd}, {rn}, {imm12}")
1306                }
1307            }
1308            &Inst::AluRRImmLogic {
1309                alu_op,
1310                size,
1311                rd,
1312                rn,
1313                ref imml,
1314            } => {
1315                let op = op_name(alu_op);
1316                let rd = pretty_print_ireg(rd.to_reg(), size);
1317                let rn = pretty_print_ireg(rn, size);
1318                let imml = imml.pretty_print(0);
1319                format!("{op} {rd}, {rn}, {imml}")
1320            }
1321            &Inst::AluRRImmShift {
1322                alu_op,
1323                size,
1324                rd,
1325                rn,
1326                ref immshift,
1327            } => {
1328                let op = op_name(alu_op);
1329                let rd = pretty_print_ireg(rd.to_reg(), size);
1330                let rn = pretty_print_ireg(rn, size);
1331                let immshift = immshift.pretty_print(0);
1332                format!("{op} {rd}, {rn}, {immshift}")
1333            }
1334            &Inst::AluRRRShift {
1335                alu_op,
1336                size,
1337                rd,
1338                rn,
1339                rm,
1340                ref shiftop,
1341            } => {
1342                let op = op_name(alu_op);
1343                let rd = pretty_print_ireg(rd.to_reg(), size);
1344                let rn = pretty_print_ireg(rn, size);
1345                let rm = pretty_print_ireg(rm, size);
1346                let shiftop = shiftop.pretty_print(0);
1347                format!("{op} {rd}, {rn}, {rm}, {shiftop}")
1348            }
1349            &Inst::AluRRRExtend {
1350                alu_op,
1351                size,
1352                rd,
1353                rn,
1354                rm,
1355                ref extendop,
1356            } => {
1357                let op = op_name(alu_op);
1358                let rd = pretty_print_ireg(rd.to_reg(), size);
1359                let rn = pretty_print_ireg(rn, size);
1360                let rm = pretty_print_ireg(rm, size);
1361                let extendop = extendop.pretty_print(0);
1362                format!("{op} {rd}, {rn}, {rm}, {extendop}")
1363            }
1364            &Inst::BitRR { op, size, rd, rn } => {
1365                let op = op.op_str();
1366                let rd = pretty_print_ireg(rd.to_reg(), size);
1367                let rn = pretty_print_ireg(rn, size);
1368                format!("{op} {rd}, {rn}")
1369            }
1370            &Inst::ULoad8 { rd, ref mem, .. }
1371            | &Inst::SLoad8 { rd, ref mem, .. }
1372            | &Inst::ULoad16 { rd, ref mem, .. }
1373            | &Inst::SLoad16 { rd, ref mem, .. }
1374            | &Inst::ULoad32 { rd, ref mem, .. }
1375            | &Inst::SLoad32 { rd, ref mem, .. }
1376            | &Inst::ULoad64 { rd, ref mem, .. } => {
1377                let is_unscaled = match &mem {
1378                    &AMode::Unscaled { .. } => true,
1379                    _ => false,
1380                };
1381                let (op, size) = match (self, is_unscaled) {
1382                    (&Inst::ULoad8 { .. }, false) => ("ldrb", OperandSize::Size32),
1383                    (&Inst::ULoad8 { .. }, true) => ("ldurb", OperandSize::Size32),
1384                    (&Inst::SLoad8 { .. }, false) => ("ldrsb", OperandSize::Size64),
1385                    (&Inst::SLoad8 { .. }, true) => ("ldursb", OperandSize::Size64),
1386                    (&Inst::ULoad16 { .. }, false) => ("ldrh", OperandSize::Size32),
1387                    (&Inst::ULoad16 { .. }, true) => ("ldurh", OperandSize::Size32),
1388                    (&Inst::SLoad16 { .. }, false) => ("ldrsh", OperandSize::Size64),
1389                    (&Inst::SLoad16 { .. }, true) => ("ldursh", OperandSize::Size64),
1390                    (&Inst::ULoad32 { .. }, false) => ("ldr", OperandSize::Size32),
1391                    (&Inst::ULoad32 { .. }, true) => ("ldur", OperandSize::Size32),
1392                    (&Inst::SLoad32 { .. }, false) => ("ldrsw", OperandSize::Size64),
1393                    (&Inst::SLoad32 { .. }, true) => ("ldursw", OperandSize::Size64),
1394                    (&Inst::ULoad64 { .. }, false) => ("ldr", OperandSize::Size64),
1395                    (&Inst::ULoad64 { .. }, true) => ("ldur", OperandSize::Size64),
1396                    _ => unreachable!(),
1397                };
1398
1399                let rd = pretty_print_ireg(rd.to_reg(), size);
1400                let mem = mem.clone();
1401                let access_ty = self.mem_type().unwrap();
1402                let (mem_str, mem) = mem_finalize_for_show(&mem, access_ty, state);
1403
1404                format!("{mem_str}{op} {rd}, {mem}")
1405            }
1406            &Inst::Store8 { rd, ref mem, .. }
1407            | &Inst::Store16 { rd, ref mem, .. }
1408            | &Inst::Store32 { rd, ref mem, .. }
1409            | &Inst::Store64 { rd, ref mem, .. } => {
1410                let is_unscaled = match &mem {
1411                    &AMode::Unscaled { .. } => true,
1412                    _ => false,
1413                };
1414                let (op, size) = match (self, is_unscaled) {
1415                    (&Inst::Store8 { .. }, false) => ("strb", OperandSize::Size32),
1416                    (&Inst::Store8 { .. }, true) => ("sturb", OperandSize::Size32),
1417                    (&Inst::Store16 { .. }, false) => ("strh", OperandSize::Size32),
1418                    (&Inst::Store16 { .. }, true) => ("sturh", OperandSize::Size32),
1419                    (&Inst::Store32 { .. }, false) => ("str", OperandSize::Size32),
1420                    (&Inst::Store32 { .. }, true) => ("stur", OperandSize::Size32),
1421                    (&Inst::Store64 { .. }, false) => ("str", OperandSize::Size64),
1422                    (&Inst::Store64 { .. }, true) => ("stur", OperandSize::Size64),
1423                    _ => unreachable!(),
1424                };
1425
1426                let rd = pretty_print_ireg(rd, size);
1427                let mem = mem.clone();
1428                let access_ty = self.mem_type().unwrap();
1429                let (mem_str, mem) = mem_finalize_for_show(&mem, access_ty, state);
1430
1431                format!("{mem_str}{op} {rd}, {mem}")
1432            }
1433            &Inst::StoreP64 {
1434                rt, rt2, ref mem, ..
1435            } => {
1436                let rt = pretty_print_ireg(rt, OperandSize::Size64);
1437                let rt2 = pretty_print_ireg(rt2, OperandSize::Size64);
1438                let mem = mem.clone();
1439                let mem = mem.pretty_print_default();
1440                format!("stp {rt}, {rt2}, {mem}")
1441            }
1442            &Inst::LoadP64 {
1443                rt, rt2, ref mem, ..
1444            } => {
1445                let rt = pretty_print_ireg(rt.to_reg(), OperandSize::Size64);
1446                let rt2 = pretty_print_ireg(rt2.to_reg(), OperandSize::Size64);
1447                let mem = mem.clone();
1448                let mem = mem.pretty_print_default();
1449                format!("ldp {rt}, {rt2}, {mem}")
1450            }
1451            &Inst::Mov { size, rd, rm } => {
1452                let rd = pretty_print_ireg(rd.to_reg(), size);
1453                let rm = pretty_print_ireg(rm, size);
1454                format!("mov {rd}, {rm}")
1455            }
1456            &Inst::MovFromPReg { rd, rm } => {
1457                let rd = pretty_print_ireg(rd.to_reg(), OperandSize::Size64);
1458                let rm = show_ireg_sized(rm.into(), OperandSize::Size64);
1459                format!("mov {rd}, {rm}")
1460            }
1461            &Inst::MovToPReg { rd, rm } => {
1462                let rd = show_ireg_sized(rd.into(), OperandSize::Size64);
1463                let rm = pretty_print_ireg(rm, OperandSize::Size64);
1464                format!("mov {rd}, {rm}")
1465            }
1466            &Inst::MovWide {
1467                op,
1468                rd,
1469                ref imm,
1470                size,
1471            } => {
1472                let op_str = match op {
1473                    MoveWideOp::MovZ => "movz",
1474                    MoveWideOp::MovN => "movn",
1475                };
1476                let rd = pretty_print_ireg(rd.to_reg(), size);
1477                let imm = imm.pretty_print(0);
1478                format!("{op_str} {rd}, {imm}")
1479            }
1480            &Inst::MovK {
1481                rd,
1482                rn,
1483                ref imm,
1484                size,
1485            } => {
1486                let rn = pretty_print_ireg(rn, size);
1487                let rd = pretty_print_ireg(rd.to_reg(), size);
1488                let imm = imm.pretty_print(0);
1489                format!("movk {rd}, {rn}, {imm}")
1490            }
1491            &Inst::CSel { rd, rn, rm, cond } => {
1492                let rd = pretty_print_ireg(rd.to_reg(), OperandSize::Size64);
1493                let rn = pretty_print_ireg(rn, OperandSize::Size64);
1494                let rm = pretty_print_ireg(rm, OperandSize::Size64);
1495                let cond = cond.pretty_print(0);
1496                format!("csel {rd}, {rn}, {rm}, {cond}")
1497            }
1498            &Inst::CSNeg { rd, rn, rm, cond } => {
1499                let rd = pretty_print_ireg(rd.to_reg(), OperandSize::Size64);
1500                let rn = pretty_print_ireg(rn, OperandSize::Size64);
1501                let rm = pretty_print_ireg(rm, OperandSize::Size64);
1502                let cond = cond.pretty_print(0);
1503                format!("csneg {rd}, {rn}, {rm}, {cond}")
1504            }
1505            &Inst::CSet { rd, cond } => {
1506                let rd = pretty_print_ireg(rd.to_reg(), OperandSize::Size64);
1507                let cond = cond.pretty_print(0);
1508                format!("cset {rd}, {cond}")
1509            }
1510            &Inst::CSetm { rd, cond } => {
1511                let rd = pretty_print_ireg(rd.to_reg(), OperandSize::Size64);
1512                let cond = cond.pretty_print(0);
1513                format!("csetm {rd}, {cond}")
1514            }
1515            &Inst::CCmp {
1516                size,
1517                rn,
1518                rm,
1519                nzcv,
1520                cond,
1521            } => {
1522                let rn = pretty_print_ireg(rn, size);
1523                let rm = pretty_print_ireg(rm, size);
1524                let nzcv = nzcv.pretty_print(0);
1525                let cond = cond.pretty_print(0);
1526                format!("ccmp {rn}, {rm}, {nzcv}, {cond}")
1527            }
1528            &Inst::CCmpImm {
1529                size,
1530                rn,
1531                imm,
1532                nzcv,
1533                cond,
1534            } => {
1535                let rn = pretty_print_ireg(rn, size);
1536                let imm = imm.pretty_print(0);
1537                let nzcv = nzcv.pretty_print(0);
1538                let cond = cond.pretty_print(0);
1539                format!("ccmp {rn}, {imm}, {nzcv}, {cond}")
1540            }
1541            &Inst::AtomicRMW {
1542                rs, rt, rn, ty, op, ..
1543            } => {
1544                let op = match op {
1545                    AtomicRMWOp::Add => "ldaddal",
1546                    AtomicRMWOp::Clr => "ldclral",
1547                    AtomicRMWOp::Eor => "ldeoral",
1548                    AtomicRMWOp::Set => "ldsetal",
1549                    AtomicRMWOp::Smax => "ldsmaxal",
1550                    AtomicRMWOp::Umax => "ldumaxal",
1551                    AtomicRMWOp::Smin => "ldsminal",
1552                    AtomicRMWOp::Umin => "lduminal",
1553                    AtomicRMWOp::Swp => "swpal",
1554                };
1555
1556                let size = OperandSize::from_ty(ty);
1557                let rs = pretty_print_ireg(rs, size);
1558                let rt = pretty_print_ireg(rt.to_reg(), size);
1559                let rn = pretty_print_ireg(rn, OperandSize::Size64);
1560
1561                let ty_suffix = match ty {
1562                    I8 => "b",
1563                    I16 => "h",
1564                    _ => "",
1565                };
1566                format!("{op}{ty_suffix} {rs}, {rt}, [{rn}]")
1567            }
1568            &Inst::AtomicRMWLoop {
1569                ty,
1570                op,
1571                addr,
1572                operand,
1573                oldval,
1574                scratch1,
1575                scratch2,
1576                ..
1577            } => {
1578                let op = match op {
1579                    AtomicRMWLoopOp::Add => "add",
1580                    AtomicRMWLoopOp::Sub => "sub",
1581                    AtomicRMWLoopOp::Eor => "eor",
1582                    AtomicRMWLoopOp::Orr => "orr",
1583                    AtomicRMWLoopOp::And => "and",
1584                    AtomicRMWLoopOp::Nand => "nand",
1585                    AtomicRMWLoopOp::Smin => "smin",
1586                    AtomicRMWLoopOp::Smax => "smax",
1587                    AtomicRMWLoopOp::Umin => "umin",
1588                    AtomicRMWLoopOp::Umax => "umax",
1589                    AtomicRMWLoopOp::Xchg => "xchg",
1590                };
1591                let addr = pretty_print_ireg(addr, OperandSize::Size64);
1592                let operand = pretty_print_ireg(operand, OperandSize::Size64);
1593                let oldval = pretty_print_ireg(oldval.to_reg(), OperandSize::Size64);
1594                let scratch1 = pretty_print_ireg(scratch1.to_reg(), OperandSize::Size64);
1595                let scratch2 = pretty_print_ireg(scratch2.to_reg(), OperandSize::Size64);
1596                format!(
1597                    "atomic_rmw_loop_{}_{} addr={} operand={} oldval={} scratch1={} scratch2={}",
1598                    op,
1599                    ty.bits(),
1600                    addr,
1601                    operand,
1602                    oldval,
1603                    scratch1,
1604                    scratch2,
1605                )
1606            }
1607            &Inst::AtomicCAS {
1608                rd, rs, rt, rn, ty, ..
1609            } => {
1610                let op = match ty {
1611                    I8 => "casalb",
1612                    I16 => "casalh",
1613                    I32 | I64 => "casal",
1614                    _ => panic!("Unsupported type: {ty}"),
1615                };
1616                let size = OperandSize::from_ty(ty);
1617                let rd = pretty_print_ireg(rd.to_reg(), size);
1618                let rs = pretty_print_ireg(rs, size);
1619                let rt = pretty_print_ireg(rt, size);
1620                let rn = pretty_print_ireg(rn, OperandSize::Size64);
1621
1622                format!("{op} {rd}, {rs}, {rt}, [{rn}]")
1623            }
1624            &Inst::AtomicCASLoop {
1625                ty,
1626                addr,
1627                expected,
1628                replacement,
1629                oldval,
1630                scratch,
1631                ..
1632            } => {
1633                let addr = pretty_print_ireg(addr, OperandSize::Size64);
1634                let expected = pretty_print_ireg(expected, OperandSize::Size64);
1635                let replacement = pretty_print_ireg(replacement, OperandSize::Size64);
1636                let oldval = pretty_print_ireg(oldval.to_reg(), OperandSize::Size64);
1637                let scratch = pretty_print_ireg(scratch.to_reg(), OperandSize::Size64);
1638                format!(
1639                    "atomic_cas_loop_{} addr={}, expect={}, replacement={}, oldval={}, scratch={}",
1640                    ty.bits(),
1641                    addr,
1642                    expected,
1643                    replacement,
1644                    oldval,
1645                    scratch,
1646                )
1647            }
1648            &Inst::LoadAcquire {
1649                access_ty, rt, rn, ..
1650            } => {
1651                let (op, ty) = match access_ty {
1652                    I8 => ("ldarb", I32),
1653                    I16 => ("ldarh", I32),
1654                    I32 => ("ldar", I32),
1655                    I64 => ("ldar", I64),
1656                    _ => panic!("Unsupported type: {access_ty}"),
1657                };
1658                let size = OperandSize::from_ty(ty);
1659                let rn = pretty_print_ireg(rn, OperandSize::Size64);
1660                let rt = pretty_print_ireg(rt.to_reg(), size);
1661                format!("{op} {rt}, [{rn}]")
1662            }
1663            &Inst::StoreRelease {
1664                access_ty, rt, rn, ..
1665            } => {
1666                let (op, ty) = match access_ty {
1667                    I8 => ("stlrb", I32),
1668                    I16 => ("stlrh", I32),
1669                    I32 => ("stlr", I32),
1670                    I64 => ("stlr", I64),
1671                    _ => panic!("Unsupported type: {access_ty}"),
1672                };
1673                let size = OperandSize::from_ty(ty);
1674                let rn = pretty_print_ireg(rn, OperandSize::Size64);
1675                let rt = pretty_print_ireg(rt, size);
1676                format!("{op} {rt}, [{rn}]")
1677            }
1678            &Inst::Fence {} => {
1679                format!("dmb ish")
1680            }
1681            &Inst::Csdb {} => {
1682                format!("csdb")
1683            }
1684            &Inst::FpuMove32 { rd, rn } => {
1685                let rd = pretty_print_vreg_scalar(rd.to_reg(), ScalarSize::Size32);
1686                let rn = pretty_print_vreg_scalar(rn, ScalarSize::Size32);
1687                format!("fmov {rd}, {rn}")
1688            }
1689            &Inst::FpuMove64 { rd, rn } => {
1690                let rd = pretty_print_vreg_scalar(rd.to_reg(), ScalarSize::Size64);
1691                let rn = pretty_print_vreg_scalar(rn, ScalarSize::Size64);
1692                format!("fmov {rd}, {rn}")
1693            }
1694            &Inst::FpuMove128 { rd, rn } => {
1695                let rd = pretty_print_reg(rd.to_reg());
1696                let rn = pretty_print_reg(rn);
1697                format!("mov {rd}.16b, {rn}.16b")
1698            }
1699            &Inst::FpuMoveFromVec { rd, rn, idx, size } => {
1700                let rd = pretty_print_vreg_scalar(rd.to_reg(), size.lane_size());
1701                let rn = pretty_print_vreg_element(rn, idx as usize, size.lane_size());
1702                format!("mov {rd}, {rn}")
1703            }
1704            &Inst::FpuExtend { rd, rn, size } => {
1705                let rd = pretty_print_vreg_scalar(rd.to_reg(), size);
1706                let rn = pretty_print_vreg_scalar(rn, size);
1707                format!("fmov {rd}, {rn}")
1708            }
1709            &Inst::FpuRR {
1710                fpu_op,
1711                size,
1712                rd,
1713                rn,
1714            } => {
1715                let op = match fpu_op {
1716                    FPUOp1::Abs => "fabs",
1717                    FPUOp1::Neg => "fneg",
1718                    FPUOp1::Sqrt => "fsqrt",
1719                    FPUOp1::Cvt32To64 | FPUOp1::Cvt64To32 => "fcvt",
1720                };
1721                let dst_size = match fpu_op {
1722                    FPUOp1::Cvt32To64 => ScalarSize::Size64,
1723                    FPUOp1::Cvt64To32 => ScalarSize::Size32,
1724                    _ => size,
1725                };
1726                let rd = pretty_print_vreg_scalar(rd.to_reg(), dst_size);
1727                let rn = pretty_print_vreg_scalar(rn, size);
1728                format!("{op} {rd}, {rn}")
1729            }
1730            &Inst::FpuRRR {
1731                fpu_op,
1732                size,
1733                rd,
1734                rn,
1735                rm,
1736            } => {
1737                let op = match fpu_op {
1738                    FPUOp2::Add => "fadd",
1739                    FPUOp2::Sub => "fsub",
1740                    FPUOp2::Mul => "fmul",
1741                    FPUOp2::Div => "fdiv",
1742                    FPUOp2::Max => "fmax",
1743                    FPUOp2::Min => "fmin",
1744                };
1745                let rd = pretty_print_vreg_scalar(rd.to_reg(), size);
1746                let rn = pretty_print_vreg_scalar(rn, size);
1747                let rm = pretty_print_vreg_scalar(rm, size);
1748                format!("{op} {rd}, {rn}, {rm}")
1749            }
1750            &Inst::FpuRRI { fpu_op, rd, rn } => {
1751                let (op, imm, vector) = match fpu_op {
1752                    FPUOpRI::UShr32(imm) => ("ushr", imm.pretty_print(0), true),
1753                    FPUOpRI::UShr64(imm) => ("ushr", imm.pretty_print(0), false),
1754                };
1755
1756                let (rd, rn) = if vector {
1757                    (
1758                        pretty_print_vreg_vector(rd.to_reg(), VectorSize::Size32x2),
1759                        pretty_print_vreg_vector(rn, VectorSize::Size32x2),
1760                    )
1761                } else {
1762                    (
1763                        pretty_print_vreg_scalar(rd.to_reg(), ScalarSize::Size64),
1764                        pretty_print_vreg_scalar(rn, ScalarSize::Size64),
1765                    )
1766                };
1767                format!("{op} {rd}, {rn}, {imm}")
1768            }
1769            &Inst::FpuRRIMod { fpu_op, rd, ri, rn } => {
1770                let (op, imm, vector) = match fpu_op {
1771                    FPUOpRIMod::Sli32(imm) => ("sli", imm.pretty_print(0), true),
1772                    FPUOpRIMod::Sli64(imm) => ("sli", imm.pretty_print(0), false),
1773                };
1774
1775                let (rd, ri, rn) = if vector {
1776                    (
1777                        pretty_print_vreg_vector(rd.to_reg(), VectorSize::Size32x2),
1778                        pretty_print_vreg_vector(ri, VectorSize::Size32x2),
1779                        pretty_print_vreg_vector(rn, VectorSize::Size32x2),
1780                    )
1781                } else {
1782                    (
1783                        pretty_print_vreg_scalar(rd.to_reg(), ScalarSize::Size64),
1784                        pretty_print_vreg_scalar(ri, ScalarSize::Size64),
1785                        pretty_print_vreg_scalar(rn, ScalarSize::Size64),
1786                    )
1787                };
1788                format!("{op} {rd}, {ri}, {rn}, {imm}")
1789            }
1790            &Inst::FpuRRRR {
1791                fpu_op,
1792                size,
1793                rd,
1794                rn,
1795                rm,
1796                ra,
1797            } => {
1798                let op = match fpu_op {
1799                    FPUOp3::MAdd => "fmadd",
1800                    FPUOp3::MSub => "fmsub",
1801                    FPUOp3::NMAdd => "fnmadd",
1802                    FPUOp3::NMSub => "fnmsub",
1803                };
1804                let rd = pretty_print_vreg_scalar(rd.to_reg(), size);
1805                let rn = pretty_print_vreg_scalar(rn, size);
1806                let rm = pretty_print_vreg_scalar(rm, size);
1807                let ra = pretty_print_vreg_scalar(ra, size);
1808                format!("{op} {rd}, {rn}, {rm}, {ra}")
1809            }
1810            &Inst::FpuCmp { size, rn, rm } => {
1811                let rn = pretty_print_vreg_scalar(rn, size);
1812                let rm = pretty_print_vreg_scalar(rm, size);
1813                format!("fcmp {rn}, {rm}")
1814            }
1815            &Inst::FpuLoad16 { rd, ref mem, .. } => {
1816                let rd = pretty_print_vreg_scalar(rd.to_reg(), ScalarSize::Size16);
1817                let mem = mem.clone();
1818                let access_ty = self.mem_type().unwrap();
1819                let (mem_str, mem) = mem_finalize_for_show(&mem, access_ty, state);
1820                format!("{mem_str}ldr {rd}, {mem}")
1821            }
1822            &Inst::FpuLoad32 { rd, ref mem, .. } => {
1823                let rd = pretty_print_vreg_scalar(rd.to_reg(), ScalarSize::Size32);
1824                let mem = mem.clone();
1825                let access_ty = self.mem_type().unwrap();
1826                let (mem_str, mem) = mem_finalize_for_show(&mem, access_ty, state);
1827                format!("{mem_str}ldr {rd}, {mem}")
1828            }
1829            &Inst::FpuLoad64 { rd, ref mem, .. } => {
1830                let rd = pretty_print_vreg_scalar(rd.to_reg(), ScalarSize::Size64);
1831                let mem = mem.clone();
1832                let access_ty = self.mem_type().unwrap();
1833                let (mem_str, mem) = mem_finalize_for_show(&mem, access_ty, state);
1834                format!("{mem_str}ldr {rd}, {mem}")
1835            }
1836            &Inst::FpuLoad128 { rd, ref mem, .. } => {
1837                let rd = pretty_print_reg(rd.to_reg());
1838                let rd = "q".to_string() + &rd[1..];
1839                let mem = mem.clone();
1840                let access_ty = self.mem_type().unwrap();
1841                let (mem_str, mem) = mem_finalize_for_show(&mem, access_ty, state);
1842                format!("{mem_str}ldr {rd}, {mem}")
1843            }
1844            &Inst::FpuStore16 { rd, ref mem, .. } => {
1845                let rd = pretty_print_vreg_scalar(rd, ScalarSize::Size16);
1846                let mem = mem.clone();
1847                let access_ty = self.mem_type().unwrap();
1848                let (mem_str, mem) = mem_finalize_for_show(&mem, access_ty, state);
1849                format!("{mem_str}str {rd}, {mem}")
1850            }
1851            &Inst::FpuStore32 { rd, ref mem, .. } => {
1852                let rd = pretty_print_vreg_scalar(rd, ScalarSize::Size32);
1853                let mem = mem.clone();
1854                let access_ty = self.mem_type().unwrap();
1855                let (mem_str, mem) = mem_finalize_for_show(&mem, access_ty, state);
1856                format!("{mem_str}str {rd}, {mem}")
1857            }
1858            &Inst::FpuStore64 { rd, ref mem, .. } => {
1859                let rd = pretty_print_vreg_scalar(rd, ScalarSize::Size64);
1860                let mem = mem.clone();
1861                let access_ty = self.mem_type().unwrap();
1862                let (mem_str, mem) = mem_finalize_for_show(&mem, access_ty, state);
1863                format!("{mem_str}str {rd}, {mem}")
1864            }
1865            &Inst::FpuStore128 { rd, ref mem, .. } => {
1866                let rd = pretty_print_reg(rd);
1867                let rd = "q".to_string() + &rd[1..];
1868                let mem = mem.clone();
1869                let access_ty = self.mem_type().unwrap();
1870                let (mem_str, mem) = mem_finalize_for_show(&mem, access_ty, state);
1871                format!("{mem_str}str {rd}, {mem}")
1872            }
1873            &Inst::FpuLoadP64 {
1874                rt, rt2, ref mem, ..
1875            } => {
1876                let rt = pretty_print_vreg_scalar(rt.to_reg(), ScalarSize::Size64);
1877                let rt2 = pretty_print_vreg_scalar(rt2.to_reg(), ScalarSize::Size64);
1878                let mem = mem.clone();
1879                let mem = mem.pretty_print_default();
1880
1881                format!("ldp {rt}, {rt2}, {mem}")
1882            }
1883            &Inst::FpuStoreP64 {
1884                rt, rt2, ref mem, ..
1885            } => {
1886                let rt = pretty_print_vreg_scalar(rt, ScalarSize::Size64);
1887                let rt2 = pretty_print_vreg_scalar(rt2, ScalarSize::Size64);
1888                let mem = mem.clone();
1889                let mem = mem.pretty_print_default();
1890
1891                format!("stp {rt}, {rt2}, {mem}")
1892            }
1893            &Inst::FpuLoadP128 {
1894                rt, rt2, ref mem, ..
1895            } => {
1896                let rt = pretty_print_vreg_scalar(rt.to_reg(), ScalarSize::Size128);
1897                let rt2 = pretty_print_vreg_scalar(rt2.to_reg(), ScalarSize::Size128);
1898                let mem = mem.clone();
1899                let mem = mem.pretty_print_default();
1900
1901                format!("ldp {rt}, {rt2}, {mem}")
1902            }
1903            &Inst::FpuStoreP128 {
1904                rt, rt2, ref mem, ..
1905            } => {
1906                let rt = pretty_print_vreg_scalar(rt, ScalarSize::Size128);
1907                let rt2 = pretty_print_vreg_scalar(rt2, ScalarSize::Size128);
1908                let mem = mem.clone();
1909                let mem = mem.pretty_print_default();
1910
1911                format!("stp {rt}, {rt2}, {mem}")
1912            }
1913            &Inst::FpuToInt { op, rd, rn } => {
1914                let (op, sizesrc, sizedest) = match op {
1915                    FpuToIntOp::F32ToI32 => ("fcvtzs", ScalarSize::Size32, OperandSize::Size32),
1916                    FpuToIntOp::F32ToU32 => ("fcvtzu", ScalarSize::Size32, OperandSize::Size32),
1917                    FpuToIntOp::F32ToI64 => ("fcvtzs", ScalarSize::Size32, OperandSize::Size64),
1918                    FpuToIntOp::F32ToU64 => ("fcvtzu", ScalarSize::Size32, OperandSize::Size64),
1919                    FpuToIntOp::F64ToI32 => ("fcvtzs", ScalarSize::Size64, OperandSize::Size32),
1920                    FpuToIntOp::F64ToU32 => ("fcvtzu", ScalarSize::Size64, OperandSize::Size32),
1921                    FpuToIntOp::F64ToI64 => ("fcvtzs", ScalarSize::Size64, OperandSize::Size64),
1922                    FpuToIntOp::F64ToU64 => ("fcvtzu", ScalarSize::Size64, OperandSize::Size64),
1923                };
1924                let rd = pretty_print_ireg(rd.to_reg(), sizedest);
1925                let rn = pretty_print_vreg_scalar(rn, sizesrc);
1926                format!("{op} {rd}, {rn}")
1927            }
1928            &Inst::IntToFpu { op, rd, rn } => {
1929                let (op, sizesrc, sizedest) = match op {
1930                    IntToFpuOp::I32ToF32 => ("scvtf", OperandSize::Size32, ScalarSize::Size32),
1931                    IntToFpuOp::U32ToF32 => ("ucvtf", OperandSize::Size32, ScalarSize::Size32),
1932                    IntToFpuOp::I64ToF32 => ("scvtf", OperandSize::Size64, ScalarSize::Size32),
1933                    IntToFpuOp::U64ToF32 => ("ucvtf", OperandSize::Size64, ScalarSize::Size32),
1934                    IntToFpuOp::I32ToF64 => ("scvtf", OperandSize::Size32, ScalarSize::Size64),
1935                    IntToFpuOp::U32ToF64 => ("ucvtf", OperandSize::Size32, ScalarSize::Size64),
1936                    IntToFpuOp::I64ToF64 => ("scvtf", OperandSize::Size64, ScalarSize::Size64),
1937                    IntToFpuOp::U64ToF64 => ("ucvtf", OperandSize::Size64, ScalarSize::Size64),
1938                };
1939                let rd = pretty_print_vreg_scalar(rd.to_reg(), sizedest);
1940                let rn = pretty_print_ireg(rn, sizesrc);
1941                format!("{op} {rd}, {rn}")
1942            }
1943            &Inst::FpuCSel16 { rd, rn, rm, cond } => {
1944                let rd = pretty_print_vreg_scalar(rd.to_reg(), ScalarSize::Size16);
1945                let rn = pretty_print_vreg_scalar(rn, ScalarSize::Size16);
1946                let rm = pretty_print_vreg_scalar(rm, ScalarSize::Size16);
1947                let cond = cond.pretty_print(0);
1948                format!("fcsel {rd}, {rn}, {rm}, {cond}")
1949            }
1950            &Inst::FpuCSel32 { rd, rn, rm, cond } => {
1951                let rd = pretty_print_vreg_scalar(rd.to_reg(), ScalarSize::Size32);
1952                let rn = pretty_print_vreg_scalar(rn, ScalarSize::Size32);
1953                let rm = pretty_print_vreg_scalar(rm, ScalarSize::Size32);
1954                let cond = cond.pretty_print(0);
1955                format!("fcsel {rd}, {rn}, {rm}, {cond}")
1956            }
1957            &Inst::FpuCSel64 { rd, rn, rm, cond } => {
1958                let rd = pretty_print_vreg_scalar(rd.to_reg(), ScalarSize::Size64);
1959                let rn = pretty_print_vreg_scalar(rn, ScalarSize::Size64);
1960                let rm = pretty_print_vreg_scalar(rm, ScalarSize::Size64);
1961                let cond = cond.pretty_print(0);
1962                format!("fcsel {rd}, {rn}, {rm}, {cond}")
1963            }
1964            &Inst::FpuRound { op, rd, rn } => {
1965                let (inst, size) = match op {
1966                    FpuRoundMode::Minus32 => ("frintm", ScalarSize::Size32),
1967                    FpuRoundMode::Minus64 => ("frintm", ScalarSize::Size64),
1968                    FpuRoundMode::Plus32 => ("frintp", ScalarSize::Size32),
1969                    FpuRoundMode::Plus64 => ("frintp", ScalarSize::Size64),
1970                    FpuRoundMode::Zero32 => ("frintz", ScalarSize::Size32),
1971                    FpuRoundMode::Zero64 => ("frintz", ScalarSize::Size64),
1972                    FpuRoundMode::Nearest32 => ("frintn", ScalarSize::Size32),
1973                    FpuRoundMode::Nearest64 => ("frintn", ScalarSize::Size64),
1974                };
1975                let rd = pretty_print_vreg_scalar(rd.to_reg(), size);
1976                let rn = pretty_print_vreg_scalar(rn, size);
1977                format!("{inst} {rd}, {rn}")
1978            }
1979            &Inst::MovToFpu { rd, rn, size } => {
1980                let operand_size = size.operand_size();
1981                let rd = pretty_print_vreg_scalar(rd.to_reg(), size);
1982                let rn = pretty_print_ireg(rn, operand_size);
1983                format!("fmov {rd}, {rn}")
1984            }
1985            &Inst::FpuMoveFPImm { rd, imm, size } => {
1986                let imm = imm.pretty_print(0);
1987                let rd = pretty_print_vreg_scalar(rd.to_reg(), size);
1988
1989                format!("fmov {rd}, {imm}")
1990            }
1991            &Inst::MovToVec {
1992                rd,
1993                ri,
1994                rn,
1995                idx,
1996                size,
1997            } => {
1998                let rd = pretty_print_vreg_element(rd.to_reg(), idx as usize, size.lane_size());
1999                let ri = pretty_print_vreg_element(ri, idx as usize, size.lane_size());
2000                let rn = pretty_print_ireg(rn, size.operand_size());
2001                format!("mov {rd}, {ri}, {rn}")
2002            }
2003            &Inst::MovFromVec { rd, rn, idx, size } => {
2004                let op = match size {
2005                    ScalarSize::Size8 => "umov",
2006                    ScalarSize::Size16 => "umov",
2007                    ScalarSize::Size32 => "mov",
2008                    ScalarSize::Size64 => "mov",
2009                    _ => unimplemented!(),
2010                };
2011                let rd = pretty_print_ireg(rd.to_reg(), size.operand_size());
2012                let rn = pretty_print_vreg_element(rn, idx as usize, size);
2013                format!("{op} {rd}, {rn}")
2014            }
2015            &Inst::MovFromVecSigned {
2016                rd,
2017                rn,
2018                idx,
2019                size,
2020                scalar_size,
2021            } => {
2022                let rd = pretty_print_ireg(rd.to_reg(), scalar_size);
2023                let rn = pretty_print_vreg_element(rn, idx as usize, size.lane_size());
2024                format!("smov {rd}, {rn}")
2025            }
2026            &Inst::VecDup { rd, rn, size } => {
2027                let rd = pretty_print_vreg_vector(rd.to_reg(), size);
2028                let rn = pretty_print_ireg(rn, size.operand_size());
2029                format!("dup {rd}, {rn}")
2030            }
2031            &Inst::VecDupFromFpu { rd, rn, size, lane } => {
2032                let rd = pretty_print_vreg_vector(rd.to_reg(), size);
2033                let rn = pretty_print_vreg_element(rn, lane.into(), size.lane_size());
2034                format!("dup {rd}, {rn}")
2035            }
2036            &Inst::VecDupFPImm { rd, imm, size } => {
2037                let imm = imm.pretty_print(0);
2038                let rd = pretty_print_vreg_vector(rd.to_reg(), size);
2039
2040                format!("fmov {rd}, {imm}")
2041            }
2042            &Inst::VecDupImm {
2043                rd,
2044                imm,
2045                invert,
2046                size,
2047            } => {
2048                let imm = imm.pretty_print(0);
2049                let op = if invert { "mvni" } else { "movi" };
2050                let rd = pretty_print_vreg_vector(rd.to_reg(), size);
2051
2052                format!("{op} {rd}, {imm}")
2053            }
2054            &Inst::VecExtend {
2055                t,
2056                rd,
2057                rn,
2058                high_half,
2059                lane_size,
2060            } => {
2061                let vec64 = VectorSize::from_lane_size(lane_size.narrow(), false);
2062                let vec128 = VectorSize::from_lane_size(lane_size.narrow(), true);
2063                let rd_size = VectorSize::from_lane_size(lane_size, true);
2064                let (op, rn_size) = match (t, high_half) {
2065                    (VecExtendOp::Sxtl, false) => ("sxtl", vec64),
2066                    (VecExtendOp::Sxtl, true) => ("sxtl2", vec128),
2067                    (VecExtendOp::Uxtl, false) => ("uxtl", vec64),
2068                    (VecExtendOp::Uxtl, true) => ("uxtl2", vec128),
2069                };
2070                let rd = pretty_print_vreg_vector(rd.to_reg(), rd_size);
2071                let rn = pretty_print_vreg_vector(rn, rn_size);
2072                format!("{op} {rd}, {rn}")
2073            }
2074            &Inst::VecMovElement {
2075                rd,
2076                ri,
2077                rn,
2078                dest_idx,
2079                src_idx,
2080                size,
2081            } => {
2082                let rd =
2083                    pretty_print_vreg_element(rd.to_reg(), dest_idx as usize, size.lane_size());
2084                let ri = pretty_print_vreg_element(ri, dest_idx as usize, size.lane_size());
2085                let rn = pretty_print_vreg_element(rn, src_idx as usize, size.lane_size());
2086                format!("mov {rd}, {ri}, {rn}")
2087            }
2088            &Inst::VecRRLong {
2089                op,
2090                rd,
2091                rn,
2092                high_half,
2093            } => {
2094                let (op, rd_size, size, suffix) = match (op, high_half) {
2095                    (VecRRLongOp::Fcvtl16, false) => {
2096                        ("fcvtl", VectorSize::Size32x4, VectorSize::Size16x4, "")
2097                    }
2098                    (VecRRLongOp::Fcvtl16, true) => {
2099                        ("fcvtl2", VectorSize::Size32x4, VectorSize::Size16x8, "")
2100                    }
2101                    (VecRRLongOp::Fcvtl32, false) => {
2102                        ("fcvtl", VectorSize::Size64x2, VectorSize::Size32x2, "")
2103                    }
2104                    (VecRRLongOp::Fcvtl32, true) => {
2105                        ("fcvtl2", VectorSize::Size64x2, VectorSize::Size32x4, "")
2106                    }
2107                    (VecRRLongOp::Shll8, false) => {
2108                        ("shll", VectorSize::Size16x8, VectorSize::Size8x8, ", #8")
2109                    }
2110                    (VecRRLongOp::Shll8, true) => {
2111                        ("shll2", VectorSize::Size16x8, VectorSize::Size8x16, ", #8")
2112                    }
2113                    (VecRRLongOp::Shll16, false) => {
2114                        ("shll", VectorSize::Size32x4, VectorSize::Size16x4, ", #16")
2115                    }
2116                    (VecRRLongOp::Shll16, true) => {
2117                        ("shll2", VectorSize::Size32x4, VectorSize::Size16x8, ", #16")
2118                    }
2119                    (VecRRLongOp::Shll32, false) => {
2120                        ("shll", VectorSize::Size64x2, VectorSize::Size32x2, ", #32")
2121                    }
2122                    (VecRRLongOp::Shll32, true) => {
2123                        ("shll2", VectorSize::Size64x2, VectorSize::Size32x4, ", #32")
2124                    }
2125                };
2126                let rd = pretty_print_vreg_vector(rd.to_reg(), rd_size);
2127                let rn = pretty_print_vreg_vector(rn, size);
2128
2129                format!("{op} {rd}, {rn}{suffix}")
2130            }
2131            &Inst::VecRRNarrowLow {
2132                op,
2133                rd,
2134                rn,
2135                lane_size,
2136                ..
2137            }
2138            | &Inst::VecRRNarrowHigh {
2139                op,
2140                rd,
2141                rn,
2142                lane_size,
2143                ..
2144            } => {
2145                let vec64 = VectorSize::from_lane_size(lane_size, false);
2146                let vec128 = VectorSize::from_lane_size(lane_size, true);
2147                let rn_size = VectorSize::from_lane_size(lane_size.widen(), true);
2148                let high_half = match self {
2149                    &Inst::VecRRNarrowLow { .. } => false,
2150                    &Inst::VecRRNarrowHigh { .. } => true,
2151                    _ => unreachable!(),
2152                };
2153                let (op, rd_size) = match (op, high_half) {
2154                    (VecRRNarrowOp::Xtn, false) => ("xtn", vec64),
2155                    (VecRRNarrowOp::Xtn, true) => ("xtn2", vec128),
2156                    (VecRRNarrowOp::Sqxtn, false) => ("sqxtn", vec64),
2157                    (VecRRNarrowOp::Sqxtn, true) => ("sqxtn2", vec128),
2158                    (VecRRNarrowOp::Sqxtun, false) => ("sqxtun", vec64),
2159                    (VecRRNarrowOp::Sqxtun, true) => ("sqxtun2", vec128),
2160                    (VecRRNarrowOp::Uqxtn, false) => ("uqxtn", vec64),
2161                    (VecRRNarrowOp::Uqxtn, true) => ("uqxtn2", vec128),
2162                    (VecRRNarrowOp::Fcvtn, false) => ("fcvtn", vec64),
2163                    (VecRRNarrowOp::Fcvtn, true) => ("fcvtn2", vec128),
2164                };
2165                let rn = pretty_print_vreg_vector(rn, rn_size);
2166                let rd = pretty_print_vreg_vector(rd.to_reg(), rd_size);
2167                let ri = match self {
2168                    &Inst::VecRRNarrowLow { .. } => "".to_string(),
2169                    &Inst::VecRRNarrowHigh { ri, .. } => {
2170                        format!("{}, ", pretty_print_vreg_vector(ri, rd_size))
2171                    }
2172                    _ => unreachable!(),
2173                };
2174
2175                format!("{op} {rd}, {ri}{rn}")
2176            }
2177            &Inst::VecRRPair { op, rd, rn } => {
2178                let op = match op {
2179                    VecPairOp::Addp => "addp",
2180                };
2181                let rd = pretty_print_vreg_scalar(rd.to_reg(), ScalarSize::Size64);
2182                let rn = pretty_print_vreg_vector(rn, VectorSize::Size64x2);
2183
2184                format!("{op} {rd}, {rn}")
2185            }
2186            &Inst::VecRRPairLong { op, rd, rn } => {
2187                let (op, dest, src) = match op {
2188                    VecRRPairLongOp::Saddlp8 => {
2189                        ("saddlp", VectorSize::Size16x8, VectorSize::Size8x16)
2190                    }
2191                    VecRRPairLongOp::Saddlp16 => {
2192                        ("saddlp", VectorSize::Size32x4, VectorSize::Size16x8)
2193                    }
2194                    VecRRPairLongOp::Uaddlp8 => {
2195                        ("uaddlp", VectorSize::Size16x8, VectorSize::Size8x16)
2196                    }
2197                    VecRRPairLongOp::Uaddlp16 => {
2198                        ("uaddlp", VectorSize::Size32x4, VectorSize::Size16x8)
2199                    }
2200                };
2201                let rd = pretty_print_vreg_vector(rd.to_reg(), dest);
2202                let rn = pretty_print_vreg_vector(rn, src);
2203
2204                format!("{op} {rd}, {rn}")
2205            }
2206            &Inst::VecRRR {
2207                rd,
2208                rn,
2209                rm,
2210                alu_op,
2211                size,
2212            } => {
2213                let (op, size) = match alu_op {
2214                    VecALUOp::Sqadd => ("sqadd", size),
2215                    VecALUOp::Uqadd => ("uqadd", size),
2216                    VecALUOp::Sqsub => ("sqsub", size),
2217                    VecALUOp::Uqsub => ("uqsub", size),
2218                    VecALUOp::Cmeq => ("cmeq", size),
2219                    VecALUOp::Cmge => ("cmge", size),
2220                    VecALUOp::Cmgt => ("cmgt", size),
2221                    VecALUOp::Cmhs => ("cmhs", size),
2222                    VecALUOp::Cmhi => ("cmhi", size),
2223                    VecALUOp::Fcmeq => ("fcmeq", size),
2224                    VecALUOp::Fcmgt => ("fcmgt", size),
2225                    VecALUOp::Fcmge => ("fcmge", size),
2226                    VecALUOp::Umaxp => ("umaxp", size),
2227                    VecALUOp::Add => ("add", size),
2228                    VecALUOp::Sub => ("sub", size),
2229                    VecALUOp::Mul => ("mul", size),
2230                    VecALUOp::Sshl => ("sshl", size),
2231                    VecALUOp::Ushl => ("ushl", size),
2232                    VecALUOp::Umin => ("umin", size),
2233                    VecALUOp::Smin => ("smin", size),
2234                    VecALUOp::Umax => ("umax", size),
2235                    VecALUOp::Smax => ("smax", size),
2236                    VecALUOp::Urhadd => ("urhadd", size),
2237                    VecALUOp::Fadd => ("fadd", size),
2238                    VecALUOp::Fsub => ("fsub", size),
2239                    VecALUOp::Fdiv => ("fdiv", size),
2240                    VecALUOp::Fmax => ("fmax", size),
2241                    VecALUOp::Fmin => ("fmin", size),
2242                    VecALUOp::Fmul => ("fmul", size),
2243                    VecALUOp::Addp => ("addp", size),
2244                    VecALUOp::Zip1 => ("zip1", size),
2245                    VecALUOp::Zip2 => ("zip2", size),
2246                    VecALUOp::Sqrdmulh => ("sqrdmulh", size),
2247                    VecALUOp::Uzp1 => ("uzp1", size),
2248                    VecALUOp::Uzp2 => ("uzp2", size),
2249                    VecALUOp::Trn1 => ("trn1", size),
2250                    VecALUOp::Trn2 => ("trn2", size),
2251
2252                    // Lane division does not affect bitwise operations.
2253                    // However, when printing, use 8-bit lane division to conform to ARM formatting.
2254                    VecALUOp::And => ("and", size.as_scalar8_vector()),
2255                    VecALUOp::Bic => ("bic", size.as_scalar8_vector()),
2256                    VecALUOp::Orr => ("orr", size.as_scalar8_vector()),
2257                    VecALUOp::Orn => ("orn", size.as_scalar8_vector()),
2258                    VecALUOp::Eor => ("eor", size.as_scalar8_vector()),
2259                };
2260                let rd = pretty_print_vreg_vector(rd.to_reg(), size);
2261                let rn = pretty_print_vreg_vector(rn, size);
2262                let rm = pretty_print_vreg_vector(rm, size);
2263                format!("{op} {rd}, {rn}, {rm}")
2264            }
2265            &Inst::VecRRRMod {
2266                rd,
2267                ri,
2268                rn,
2269                rm,
2270                alu_op,
2271                size,
2272            } => {
2273                let (op, size) = match alu_op {
2274                    VecALUModOp::Bsl => ("bsl", VectorSize::Size8x16),
2275                    VecALUModOp::Fmla => ("fmla", size),
2276                    VecALUModOp::Fmls => ("fmls", size),
2277                };
2278                let rd = pretty_print_vreg_vector(rd.to_reg(), size);
2279                let ri = pretty_print_vreg_vector(ri, size);
2280                let rn = pretty_print_vreg_vector(rn, size);
2281                let rm = pretty_print_vreg_vector(rm, size);
2282                format!("{op} {rd}, {ri}, {rn}, {rm}")
2283            }
2284            &Inst::VecFmlaElem {
2285                rd,
2286                ri,
2287                rn,
2288                rm,
2289                alu_op,
2290                size,
2291                idx,
2292            } => {
2293                let (op, size) = match alu_op {
2294                    VecALUModOp::Fmla => ("fmla", size),
2295                    VecALUModOp::Fmls => ("fmls", size),
2296                    _ => unreachable!(),
2297                };
2298                let rd = pretty_print_vreg_vector(rd.to_reg(), size);
2299                let ri = pretty_print_vreg_vector(ri, size);
2300                let rn = pretty_print_vreg_vector(rn, size);
2301                let rm = pretty_print_vreg_element(rm, idx.into(), size.lane_size());
2302                format!("{op} {rd}, {ri}, {rn}, {rm}")
2303            }
2304            &Inst::VecRRRLong {
2305                rd,
2306                rn,
2307                rm,
2308                alu_op,
2309                high_half,
2310            } => {
2311                let (op, dest_size, src_size) = match (alu_op, high_half) {
2312                    (VecRRRLongOp::Smull8, false) => {
2313                        ("smull", VectorSize::Size16x8, VectorSize::Size8x8)
2314                    }
2315                    (VecRRRLongOp::Smull8, true) => {
2316                        ("smull2", VectorSize::Size16x8, VectorSize::Size8x16)
2317                    }
2318                    (VecRRRLongOp::Smull16, false) => {
2319                        ("smull", VectorSize::Size32x4, VectorSize::Size16x4)
2320                    }
2321                    (VecRRRLongOp::Smull16, true) => {
2322                        ("smull2", VectorSize::Size32x4, VectorSize::Size16x8)
2323                    }
2324                    (VecRRRLongOp::Smull32, false) => {
2325                        ("smull", VectorSize::Size64x2, VectorSize::Size32x2)
2326                    }
2327                    (VecRRRLongOp::Smull32, true) => {
2328                        ("smull2", VectorSize::Size64x2, VectorSize::Size32x4)
2329                    }
2330                    (VecRRRLongOp::Umull8, false) => {
2331                        ("umull", VectorSize::Size16x8, VectorSize::Size8x8)
2332                    }
2333                    (VecRRRLongOp::Umull8, true) => {
2334                        ("umull2", VectorSize::Size16x8, VectorSize::Size8x16)
2335                    }
2336                    (VecRRRLongOp::Umull16, false) => {
2337                        ("umull", VectorSize::Size32x4, VectorSize::Size16x4)
2338                    }
2339                    (VecRRRLongOp::Umull16, true) => {
2340                        ("umull2", VectorSize::Size32x4, VectorSize::Size16x8)
2341                    }
2342                    (VecRRRLongOp::Umull32, false) => {
2343                        ("umull", VectorSize::Size64x2, VectorSize::Size32x2)
2344                    }
2345                    (VecRRRLongOp::Umull32, true) => {
2346                        ("umull2", VectorSize::Size64x2, VectorSize::Size32x4)
2347                    }
2348                };
2349                let rd = pretty_print_vreg_vector(rd.to_reg(), dest_size);
2350                let rn = pretty_print_vreg_vector(rn, src_size);
2351                let rm = pretty_print_vreg_vector(rm, src_size);
2352                format!("{op} {rd}, {rn}, {rm}")
2353            }
2354            &Inst::VecRRRLongMod {
2355                rd,
2356                ri,
2357                rn,
2358                rm,
2359                alu_op,
2360                high_half,
2361            } => {
2362                let (op, dest_size, src_size) = match (alu_op, high_half) {
2363                    (VecRRRLongModOp::Umlal8, false) => {
2364                        ("umlal", VectorSize::Size16x8, VectorSize::Size8x8)
2365                    }
2366                    (VecRRRLongModOp::Umlal8, true) => {
2367                        ("umlal2", VectorSize::Size16x8, VectorSize::Size8x16)
2368                    }
2369                    (VecRRRLongModOp::Umlal16, false) => {
2370                        ("umlal", VectorSize::Size32x4, VectorSize::Size16x4)
2371                    }
2372                    (VecRRRLongModOp::Umlal16, true) => {
2373                        ("umlal2", VectorSize::Size32x4, VectorSize::Size16x8)
2374                    }
2375                    (VecRRRLongModOp::Umlal32, false) => {
2376                        ("umlal", VectorSize::Size64x2, VectorSize::Size32x2)
2377                    }
2378                    (VecRRRLongModOp::Umlal32, true) => {
2379                        ("umlal2", VectorSize::Size64x2, VectorSize::Size32x4)
2380                    }
2381                };
2382                let rd = pretty_print_vreg_vector(rd.to_reg(), dest_size);
2383                let ri = pretty_print_vreg_vector(ri, dest_size);
2384                let rn = pretty_print_vreg_vector(rn, src_size);
2385                let rm = pretty_print_vreg_vector(rm, src_size);
2386                format!("{op} {rd}, {ri}, {rn}, {rm}")
2387            }
2388            &Inst::VecMisc { op, rd, rn, size } => {
2389                let (op, size, suffix) = match op {
2390                    VecMisc2::Neg => ("neg", size, ""),
2391                    VecMisc2::Abs => ("abs", size, ""),
2392                    VecMisc2::Fabs => ("fabs", size, ""),
2393                    VecMisc2::Fneg => ("fneg", size, ""),
2394                    VecMisc2::Fsqrt => ("fsqrt", size, ""),
2395                    VecMisc2::Rev16 => ("rev16", size, ""),
2396                    VecMisc2::Rev32 => ("rev32", size, ""),
2397                    VecMisc2::Rev64 => ("rev64", size, ""),
2398                    VecMisc2::Fcvtzs => ("fcvtzs", size, ""),
2399                    VecMisc2::Fcvtzu => ("fcvtzu", size, ""),
2400                    VecMisc2::Scvtf => ("scvtf", size, ""),
2401                    VecMisc2::Ucvtf => ("ucvtf", size, ""),
2402                    VecMisc2::Frintn => ("frintn", size, ""),
2403                    VecMisc2::Frintz => ("frintz", size, ""),
2404                    VecMisc2::Frintm => ("frintm", size, ""),
2405                    VecMisc2::Frintp => ("frintp", size, ""),
2406                    VecMisc2::Cnt => ("cnt", size, ""),
2407                    VecMisc2::Cmeq0 => ("cmeq", size, ", #0"),
2408                    VecMisc2::Cmge0 => ("cmge", size, ", #0"),
2409                    VecMisc2::Cmgt0 => ("cmgt", size, ", #0"),
2410                    VecMisc2::Cmle0 => ("cmle", size, ", #0"),
2411                    VecMisc2::Cmlt0 => ("cmlt", size, ", #0"),
2412                    VecMisc2::Fcmeq0 => ("fcmeq", size, ", #0.0"),
2413                    VecMisc2::Fcmge0 => ("fcmge", size, ", #0.0"),
2414                    VecMisc2::Fcmgt0 => ("fcmgt", size, ", #0.0"),
2415                    VecMisc2::Fcmle0 => ("fcmle", size, ", #0.0"),
2416                    VecMisc2::Fcmlt0 => ("fcmlt", size, ", #0.0"),
2417
2418                    // Lane division does not affect bitwise operations.
2419                    // However, when printing, use 8-bit lane division to conform to ARM formatting.
2420                    VecMisc2::Not => ("mvn", size.as_scalar8_vector(), ""),
2421                };
2422                let rd = pretty_print_vreg_vector(rd.to_reg(), size);
2423                let rn = pretty_print_vreg_vector(rn, size);
2424                format!("{op} {rd}, {rn}{suffix}")
2425            }
2426            &Inst::VecLanes { op, rd, rn, size } => {
2427                let op = match op {
2428                    VecLanesOp::Uminv => "uminv",
2429                    VecLanesOp::Addv => "addv",
2430                };
2431                let rd = pretty_print_vreg_scalar(rd.to_reg(), size.lane_size());
2432                let rn = pretty_print_vreg_vector(rn, size);
2433                format!("{op} {rd}, {rn}")
2434            }
2435            &Inst::VecShiftImm {
2436                op,
2437                rd,
2438                rn,
2439                size,
2440                imm,
2441            } => {
2442                let op = match op {
2443                    VecShiftImmOp::Shl => "shl",
2444                    VecShiftImmOp::Ushr => "ushr",
2445                    VecShiftImmOp::Sshr => "sshr",
2446                };
2447                let rd = pretty_print_vreg_vector(rd.to_reg(), size);
2448                let rn = pretty_print_vreg_vector(rn, size);
2449                format!("{op} {rd}, {rn}, #{imm}")
2450            }
2451            &Inst::VecShiftImmMod {
2452                op,
2453                rd,
2454                ri,
2455                rn,
2456                size,
2457                imm,
2458            } => {
2459                let op = match op {
2460                    VecShiftImmModOp::Sli => "sli",
2461                };
2462                let rd = pretty_print_vreg_vector(rd.to_reg(), size);
2463                let ri = pretty_print_vreg_vector(ri, size);
2464                let rn = pretty_print_vreg_vector(rn, size);
2465                format!("{op} {rd}, {ri}, {rn}, #{imm}")
2466            }
2467            &Inst::VecExtract { rd, rn, rm, imm4 } => {
2468                let rd = pretty_print_vreg_vector(rd.to_reg(), VectorSize::Size8x16);
2469                let rn = pretty_print_vreg_vector(rn, VectorSize::Size8x16);
2470                let rm = pretty_print_vreg_vector(rm, VectorSize::Size8x16);
2471                format!("ext {rd}, {rn}, {rm}, #{imm4}")
2472            }
2473            &Inst::VecTbl { rd, rn, rm } => {
2474                let rn = pretty_print_vreg_vector(rn, VectorSize::Size8x16);
2475                let rm = pretty_print_vreg_vector(rm, VectorSize::Size8x16);
2476                let rd = pretty_print_vreg_vector(rd.to_reg(), VectorSize::Size8x16);
2477                format!("tbl {rd}, {{ {rn} }}, {rm}")
2478            }
2479            &Inst::VecTblExt { rd, ri, rn, rm } => {
2480                let rn = pretty_print_vreg_vector(rn, VectorSize::Size8x16);
2481                let rm = pretty_print_vreg_vector(rm, VectorSize::Size8x16);
2482                let rd = pretty_print_vreg_vector(rd.to_reg(), VectorSize::Size8x16);
2483                let ri = pretty_print_vreg_vector(ri, VectorSize::Size8x16);
2484                format!("tbx {rd}, {ri}, {{ {rn} }}, {rm}")
2485            }
2486            &Inst::VecTbl2 { rd, rn, rn2, rm } => {
2487                let rn = pretty_print_vreg_vector(rn, VectorSize::Size8x16);
2488                let rn2 = pretty_print_vreg_vector(rn2, VectorSize::Size8x16);
2489                let rm = pretty_print_vreg_vector(rm, VectorSize::Size8x16);
2490                let rd = pretty_print_vreg_vector(rd.to_reg(), VectorSize::Size8x16);
2491                format!("tbl {rd}, {{ {rn}, {rn2} }}, {rm}")
2492            }
2493            &Inst::VecTbl2Ext {
2494                rd,
2495                ri,
2496                rn,
2497                rn2,
2498                rm,
2499            } => {
2500                let rn = pretty_print_vreg_vector(rn, VectorSize::Size8x16);
2501                let rn2 = pretty_print_vreg_vector(rn2, VectorSize::Size8x16);
2502                let rm = pretty_print_vreg_vector(rm, VectorSize::Size8x16);
2503                let rd = pretty_print_vreg_vector(rd.to_reg(), VectorSize::Size8x16);
2504                let ri = pretty_print_vreg_vector(ri, VectorSize::Size8x16);
2505                format!("tbx {rd}, {ri}, {{ {rn}, {rn2} }}, {rm}")
2506            }
2507            &Inst::VecLoadReplicate { rd, rn, size, .. } => {
2508                let rd = pretty_print_vreg_vector(rd.to_reg(), size);
2509                let rn = pretty_print_reg(rn);
2510
2511                format!("ld1r {{ {rd} }}, [{rn}]")
2512            }
2513            &Inst::VecCSel { rd, rn, rm, cond } => {
2514                let rd = pretty_print_vreg_vector(rd.to_reg(), VectorSize::Size8x16);
2515                let rn = pretty_print_vreg_vector(rn, VectorSize::Size8x16);
2516                let rm = pretty_print_vreg_vector(rm, VectorSize::Size8x16);
2517                let cond = cond.pretty_print(0);
2518                format!("vcsel {rd}, {rn}, {rm}, {cond} (if-then-else diamond)")
2519            }
2520            &Inst::MovToNZCV { rn } => {
2521                let rn = pretty_print_reg(rn);
2522                format!("msr nzcv, {rn}")
2523            }
2524            &Inst::MovFromNZCV { rd } => {
2525                let rd = pretty_print_reg(rd.to_reg());
2526                format!("mrs {rd}, nzcv")
2527            }
2528            &Inst::Extend {
2529                rd,
2530                rn,
2531                signed: false,
2532                from_bits: 1,
2533                ..
2534            } => {
2535                let rd = pretty_print_ireg(rd.to_reg(), OperandSize::Size32);
2536                let rn = pretty_print_ireg(rn, OperandSize::Size32);
2537                format!("and {rd}, {rn}, #1")
2538            }
2539            &Inst::Extend {
2540                rd,
2541                rn,
2542                signed: false,
2543                from_bits: 32,
2544                to_bits: 64,
2545            } => {
2546                // The case of a zero extension from 32 to 64 bits, is implemented
2547                // with a "mov" to a 32-bit (W-reg) dest, because this zeroes
2548                // the top 32 bits.
2549                let rd = pretty_print_ireg(rd.to_reg(), OperandSize::Size32);
2550                let rn = pretty_print_ireg(rn, OperandSize::Size32);
2551                format!("mov {rd}, {rn}")
2552            }
2553            &Inst::Extend {
2554                rd,
2555                rn,
2556                signed,
2557                from_bits,
2558                to_bits,
2559            } => {
2560                assert!(from_bits <= to_bits);
2561                let op = match (signed, from_bits) {
2562                    (false, 8) => "uxtb",
2563                    (true, 8) => "sxtb",
2564                    (false, 16) => "uxth",
2565                    (true, 16) => "sxth",
2566                    (true, 32) => "sxtw",
2567                    (true, _) => "sbfx",
2568                    (false, _) => "ubfx",
2569                };
2570                if op == "sbfx" || op == "ubfx" {
2571                    let dest_size = OperandSize::from_bits(to_bits);
2572                    let rd = pretty_print_ireg(rd.to_reg(), dest_size);
2573                    let rn = pretty_print_ireg(rn, dest_size);
2574                    format!("{op} {rd}, {rn}, #0, #{from_bits}")
2575                } else {
2576                    let dest_size = if signed {
2577                        OperandSize::from_bits(to_bits)
2578                    } else {
2579                        OperandSize::Size32
2580                    };
2581                    let rd = pretty_print_ireg(rd.to_reg(), dest_size);
2582                    let rn = pretty_print_ireg(rn, OperandSize::from_bits(from_bits));
2583                    format!("{op} {rd}, {rn}")
2584                }
2585            }
2586            &Inst::Call { ref info } => {
2587                let try_call = info
2588                    .try_call_info
2589                    .as_ref()
2590                    .map(|tci| pretty_print_try_call(tci))
2591                    .unwrap_or_default();
2592                format!("bl 0{try_call}")
2593            }
2594            &Inst::CallInd { ref info } => {
2595                let rn = pretty_print_reg(info.dest);
2596                let try_call = info
2597                    .try_call_info
2598                    .as_ref()
2599                    .map(|tci| pretty_print_try_call(tci))
2600                    .unwrap_or_default();
2601                format!("blr {rn}{try_call}")
2602            }
2603            &Inst::ReturnCall { ref info } => {
2604                let mut s = format!(
2605                    "return_call {:?} new_stack_arg_size:{}",
2606                    info.dest, info.new_stack_arg_size
2607                );
2608                for ret in &info.uses {
2609                    let preg = pretty_print_reg(ret.preg);
2610                    let vreg = pretty_print_reg(ret.vreg);
2611                    write!(&mut s, " {vreg}={preg}").unwrap();
2612                }
2613                s
2614            }
2615            &Inst::ReturnCallInd { ref info } => {
2616                let callee = pretty_print_reg(info.dest);
2617                let mut s = format!(
2618                    "return_call_ind {callee} new_stack_arg_size:{}",
2619                    info.new_stack_arg_size
2620                );
2621                for ret in &info.uses {
2622                    let preg = pretty_print_reg(ret.preg);
2623                    let vreg = pretty_print_reg(ret.vreg);
2624                    write!(&mut s, " {vreg}={preg}").unwrap();
2625                }
2626                s
2627            }
2628            &Inst::Args { ref args } => {
2629                let mut s = "args".to_string();
2630                for arg in args {
2631                    let preg = pretty_print_reg(arg.preg);
2632                    let def = pretty_print_reg(arg.vreg.to_reg());
2633                    write!(&mut s, " {def}={preg}").unwrap();
2634                }
2635                s
2636            }
2637            &Inst::Rets { ref rets } => {
2638                let mut s = "rets".to_string();
2639                for ret in rets {
2640                    let preg = pretty_print_reg(ret.preg);
2641                    let vreg = pretty_print_reg(ret.vreg);
2642                    write!(&mut s, " {vreg}={preg}").unwrap();
2643                }
2644                s
2645            }
2646            &Inst::Ret {} => "ret".to_string(),
2647            &Inst::AuthenticatedRet { key, is_hint } => {
2648                let key = match key {
2649                    APIKey::AZ => "az",
2650                    APIKey::BZ => "bz",
2651                    APIKey::ASP => "asp",
2652                    APIKey::BSP => "bsp",
2653                };
2654                match is_hint {
2655                    false => format!("reta{key}"),
2656                    true => format!("auti{key} ; ret"),
2657                }
2658            }
2659            &Inst::Jump { ref dest } => {
2660                let dest = dest.pretty_print(0);
2661                format!("b {dest}")
2662            }
2663            &Inst::CondBr {
2664                ref taken,
2665                ref not_taken,
2666                ref kind,
2667            } => {
2668                let taken = taken.pretty_print(0);
2669                let not_taken = not_taken.pretty_print(0);
2670                match kind {
2671                    &CondBrKind::Zero(reg, size) => {
2672                        let reg = pretty_print_reg_sized(reg, size);
2673                        format!("cbz {reg}, {taken} ; b {not_taken}")
2674                    }
2675                    &CondBrKind::NotZero(reg, size) => {
2676                        let reg = pretty_print_reg_sized(reg, size);
2677                        format!("cbnz {reg}, {taken} ; b {not_taken}")
2678                    }
2679                    &CondBrKind::Cond(c) => {
2680                        let c = c.pretty_print(0);
2681                        format!("b.{c} {taken} ; b {not_taken}")
2682                    }
2683                }
2684            }
2685            &Inst::TestBitAndBranch {
2686                kind,
2687                ref taken,
2688                ref not_taken,
2689                rn,
2690                bit,
2691            } => {
2692                let cond = match kind {
2693                    TestBitAndBranchKind::Z => "z",
2694                    TestBitAndBranchKind::NZ => "nz",
2695                };
2696                let taken = taken.pretty_print(0);
2697                let not_taken = not_taken.pretty_print(0);
2698                let rn = pretty_print_reg(rn);
2699                format!("tb{cond} {rn}, #{bit}, {taken} ; b {not_taken}")
2700            }
2701            &Inst::IndirectBr { rn, .. } => {
2702                let rn = pretty_print_reg(rn);
2703                format!("br {rn}")
2704            }
2705            &Inst::Brk => "brk #0xf000".to_string(),
2706            &Inst::Udf { .. } => "udf #0xc11f".to_string(),
2707            &Inst::TrapIf {
2708                ref kind,
2709                trap_code,
2710            } => match kind {
2711                &CondBrKind::Zero(reg, size) => {
2712                    let reg = pretty_print_reg_sized(reg, size);
2713                    format!("cbz {reg}, #trap={trap_code}")
2714                }
2715                &CondBrKind::NotZero(reg, size) => {
2716                    let reg = pretty_print_reg_sized(reg, size);
2717                    format!("cbnz {reg}, #trap={trap_code}")
2718                }
2719                &CondBrKind::Cond(c) => {
2720                    let c = c.pretty_print(0);
2721                    format!("b.{c} #trap={trap_code}")
2722                }
2723            },
2724            &Inst::Adr { rd, off } => {
2725                let rd = pretty_print_reg(rd.to_reg());
2726                format!("adr {rd}, pc+{off}")
2727            }
2728            &Inst::Adrp { rd, off } => {
2729                let rd = pretty_print_reg(rd.to_reg());
2730                // This instruction addresses 4KiB pages, so multiply it by the page size.
2731                let byte_offset = off * 4096;
2732                format!("adrp {rd}, pc+{byte_offset}")
2733            }
2734            &Inst::Word4 { data } => format!("data.i32 {data}"),
2735            &Inst::Word8 { data } => format!("data.i64 {data}"),
2736            &Inst::JTSequence {
2737                default,
2738                ref targets,
2739                ridx,
2740                rtmp1,
2741                rtmp2,
2742                ..
2743            } => {
2744                let ridx = pretty_print_reg(ridx);
2745                let rtmp1 = pretty_print_reg(rtmp1.to_reg());
2746                let rtmp2 = pretty_print_reg(rtmp2.to_reg());
2747                let default_target = BranchTarget::Label(default).pretty_print(0);
2748                format!(
2749                    concat!(
2750                        "b.hs {} ; ",
2751                        "csel {}, xzr, {}, hs ; ",
2752                        "csdb ; ",
2753                        "adr {}, pc+16 ; ",
2754                        "ldrsw {}, [{}, {}, uxtw #2] ; ",
2755                        "add {}, {}, {} ; ",
2756                        "br {} ; ",
2757                        "jt_entries {:?}"
2758                    ),
2759                    default_target,
2760                    rtmp2,
2761                    ridx,
2762                    rtmp1,
2763                    rtmp2,
2764                    rtmp1,
2765                    rtmp2,
2766                    rtmp1,
2767                    rtmp1,
2768                    rtmp2,
2769                    rtmp1,
2770                    targets
2771                )
2772            }
2773            &Inst::LoadExtNameGot { rd, ref name } => {
2774                let rd = pretty_print_reg(rd.to_reg());
2775                format!("load_ext_name_got {rd}, {name:?}")
2776            }
2777            &Inst::LoadExtNameNear {
2778                rd,
2779                ref name,
2780                offset,
2781            } => {
2782                let rd = pretty_print_reg(rd.to_reg());
2783                format!("load_ext_name_near {rd}, {name:?}+{offset}")
2784            }
2785            &Inst::LoadExtNameFar {
2786                rd,
2787                ref name,
2788                offset,
2789            } => {
2790                let rd = pretty_print_reg(rd.to_reg());
2791                format!("load_ext_name_far {rd}, {name:?}+{offset}")
2792            }
2793            &Inst::LoadAddr { rd, ref mem } => {
2794                // TODO: we really should find a better way to avoid duplication of
2795                // this logic between `emit()` and `show_rru()` -- a separate 1-to-N
2796                // expansion stage (i.e., legalization, but without the slow edit-in-place
2797                // of the existing legalization framework).
2798                let mem = mem.clone();
2799                let (mem_insts, mem) = mem_finalize(None, &mem, I8, state);
2800                let mut ret = String::new();
2801                for inst in mem_insts.into_iter() {
2802                    ret.push_str(&inst.print_with_state(&mut EmitState::default()));
2803                }
2804                let (reg, index_reg, offset) = match mem {
2805                    AMode::RegExtended { rn, rm, extendop } => (rn, Some((rm, extendop)), 0),
2806                    AMode::Unscaled { rn, simm9 } => (rn, None, simm9.value()),
2807                    AMode::UnsignedOffset { rn, uimm12 } => (rn, None, uimm12.value() as i32),
2808                    _ => panic!("Unsupported case for LoadAddr: {mem:?}"),
2809                };
2810                let abs_offset = if offset < 0 {
2811                    -offset as u64
2812                } else {
2813                    offset as u64
2814                };
2815                let alu_op = if offset < 0 { ALUOp::Sub } else { ALUOp::Add };
2816
2817                if let Some((idx, extendop)) = index_reg {
2818                    let add = Inst::AluRRRExtend {
2819                        alu_op: ALUOp::Add,
2820                        size: OperandSize::Size64,
2821                        rd,
2822                        rn: reg,
2823                        rm: idx,
2824                        extendop,
2825                    };
2826
2827                    ret.push_str(&add.print_with_state(&mut EmitState::default()));
2828                } else if offset == 0 {
2829                    let mov = Inst::gen_move(rd, reg, I64);
2830                    ret.push_str(&mov.print_with_state(&mut EmitState::default()));
2831                } else if let Some(imm12) = Imm12::maybe_from_u64(abs_offset) {
2832                    let add = Inst::AluRRImm12 {
2833                        alu_op,
2834                        size: OperandSize::Size64,
2835                        rd,
2836                        rn: reg,
2837                        imm12,
2838                    };
2839                    ret.push_str(&add.print_with_state(&mut EmitState::default()));
2840                } else {
2841                    let tmp = writable_spilltmp_reg();
2842                    for inst in Inst::load_constant(tmp, abs_offset).into_iter() {
2843                        ret.push_str(&inst.print_with_state(&mut EmitState::default()));
2844                    }
2845                    let add = Inst::AluRRR {
2846                        alu_op,
2847                        size: OperandSize::Size64,
2848                        rd,
2849                        rn: reg,
2850                        rm: tmp.to_reg(),
2851                    };
2852                    ret.push_str(&add.print_with_state(&mut EmitState::default()));
2853                }
2854                ret
2855            }
2856            &Inst::Paci { key } => {
2857                let key = match key {
2858                    APIKey::AZ => "az",
2859                    APIKey::BZ => "bz",
2860                    APIKey::ASP => "asp",
2861                    APIKey::BSP => "bsp",
2862                };
2863
2864                "paci".to_string() + key
2865            }
2866            &Inst::Xpaclri => "xpaclri".to_string(),
2867            &Inst::Bti { targets } => {
2868                let targets = match targets {
2869                    BranchTargetType::None => "",
2870                    BranchTargetType::C => " c",
2871                    BranchTargetType::J => " j",
2872                    BranchTargetType::JC => " jc",
2873                };
2874
2875                "bti".to_string() + targets
2876            }
2877            &Inst::EmitIsland { needed_space } => format!("emit_island {needed_space}"),
2878
2879            &Inst::ElfTlsGetAddr {
2880                ref symbol,
2881                rd,
2882                tmp,
2883            } => {
2884                let rd = pretty_print_reg(rd.to_reg());
2885                let tmp = pretty_print_reg(tmp.to_reg());
2886                format!("elf_tls_get_addr {}, {}, {}", rd, tmp, symbol.display(None))
2887            }
2888            &Inst::MachOTlsGetAddr { ref symbol, rd } => {
2889                let rd = pretty_print_reg(rd.to_reg());
2890                format!("macho_tls_get_addr {}, {}", rd, symbol.display(None))
2891            }
2892            &Inst::Unwind { ref inst } => {
2893                format!("unwind {inst:?}")
2894            }
2895            &Inst::DummyUse { reg } => {
2896                let reg = pretty_print_reg(reg);
2897                format!("dummy_use {reg}")
2898            }
2899            &Inst::LabelAddress { dst, label } => {
2900                let dst = pretty_print_reg(dst.to_reg());
2901                format!("label_address {dst}, {label:?}")
2902            }
2903            &Inst::SequencePoint {} => {
2904                format!("sequence_point")
2905            }
2906            &Inst::StackProbeLoop { start, end, step } => {
2907                let start = pretty_print_reg(start.to_reg());
2908                let end = pretty_print_reg(end);
2909                let step = step.pretty_print(0);
2910                format!("stack_probe_loop {start}, {end}, {step}")
2911            }
2912        }
2913    }
2914}
2915
2916//=============================================================================
2917// Label fixups and jump veneers.
2918
2919/// Different forms of label references for different instruction formats.
2920#[derive(Clone, Copy, Debug, PartialEq, Eq)]
2921pub enum LabelUse {
2922    /// 14-bit branch offset (conditional branches). PC-rel, offset is imm <<
2923    /// 2. Immediate is 14 signed bits, in bits 18:5. Used by tbz and tbnz.
2924    Branch14,
2925    /// 19-bit branch offset (conditional branches). PC-rel, offset is imm << 2. Immediate is 19
2926    /// signed bits, in bits 23:5. Used by cbz, cbnz, b.cond.
2927    Branch19,
2928    /// 26-bit branch offset (unconditional branches). PC-rel, offset is imm << 2. Immediate is 26
2929    /// signed bits, in bits 25:0. Used by b, bl.
2930    Branch26,
2931    /// 19-bit offset for LDR (load literal). PC-rel, offset is imm << 2. Immediate is 19 signed bits,
2932    /// in bits 23:5.
2933    Ldr19,
2934    /// 21-bit offset for ADR (get address of label). PC-rel, offset is not shifted. Immediate is
2935    /// 21 signed bits, with high 19 bits in bits 23:5 and low 2 bits in bits 30:29.
2936    Adr21,
2937    /// 32-bit PC relative constant offset (from address of constant itself),
2938    /// signed. Used in jump tables.
2939    PCRel32,
2940}
2941
2942impl MachInstLabelUse for LabelUse {
2943    /// Alignment for veneer code. Every AArch64 instruction must be 4-byte-aligned.
2944    const ALIGN: CodeOffset = 4;
2945
2946    /// Maximum PC-relative range (positive), inclusive.
2947    fn max_pos_range(self) -> CodeOffset {
2948        match self {
2949            // N-bit immediate, left-shifted by 2, for (N+2) bits of total
2950            // range. Signed, so +2^(N+1) from zero. Likewise for two other
2951            // shifted cases below.
2952            LabelUse::Branch14 => (1 << 15) - 1,
2953            LabelUse::Branch19 => (1 << 20) - 1,
2954            LabelUse::Branch26 => (1 << 27) - 1,
2955            LabelUse::Ldr19 => (1 << 20) - 1,
2956            // Adr does not shift its immediate, so the 21-bit immediate gives 21 bits of total
2957            // range.
2958            LabelUse::Adr21 => (1 << 20) - 1,
2959            LabelUse::PCRel32 => 0x7fffffff,
2960        }
2961    }
2962
2963    /// Maximum PC-relative range (negative).
2964    fn max_neg_range(self) -> CodeOffset {
2965        // All forms are twos-complement signed offsets, so negative limit is one more than
2966        // positive limit.
2967        self.max_pos_range() + 1
2968    }
2969
2970    /// Size of window into code needed to do the patch.
2971    fn patch_size(self) -> CodeOffset {
2972        // Patch is on one instruction only for all of these label reference types.
2973        4
2974    }
2975
2976    /// Perform the patch.
2977    fn patch(self, buffer: &mut [u8], use_offset: CodeOffset, label_offset: CodeOffset) {
2978        let pc_rel = (label_offset as i64) - (use_offset as i64);
2979        debug_assert!(pc_rel <= self.max_pos_range() as i64);
2980        debug_assert!(pc_rel >= -(self.max_neg_range() as i64));
2981        let pc_rel = pc_rel as u32;
2982        let insn_word = u32::from_le_bytes([buffer[0], buffer[1], buffer[2], buffer[3]]);
2983        let mask = match self {
2984            LabelUse::Branch14 => 0x0007ffe0, // bits 18..5 inclusive
2985            LabelUse::Branch19 => 0x00ffffe0, // bits 23..5 inclusive
2986            LabelUse::Branch26 => 0x03ffffff, // bits 25..0 inclusive
2987            LabelUse::Ldr19 => 0x00ffffe0,    // bits 23..5 inclusive
2988            LabelUse::Adr21 => 0x60ffffe0,    // bits 30..29, 25..5 inclusive
2989            LabelUse::PCRel32 => 0xffffffff,
2990        };
2991        let pc_rel_shifted = match self {
2992            LabelUse::Adr21 | LabelUse::PCRel32 => pc_rel,
2993            _ => {
2994                debug_assert!(pc_rel & 3 == 0);
2995                pc_rel >> 2
2996            }
2997        };
2998        let pc_rel_inserted = match self {
2999            LabelUse::Branch14 => (pc_rel_shifted & 0x3fff) << 5,
3000            LabelUse::Branch19 | LabelUse::Ldr19 => (pc_rel_shifted & 0x7ffff) << 5,
3001            LabelUse::Branch26 => pc_rel_shifted & 0x3ffffff,
3002            // Note: the *low* two bits of offset are put in the
3003            // *high* bits (30, 29).
3004            LabelUse::Adr21 => (pc_rel_shifted & 0x1ffffc) << 3 | (pc_rel_shifted & 3) << 29,
3005            LabelUse::PCRel32 => pc_rel_shifted,
3006        };
3007        let is_add = match self {
3008            LabelUse::PCRel32 => true,
3009            _ => false,
3010        };
3011        let insn_word = if is_add {
3012            insn_word.wrapping_add(pc_rel_inserted)
3013        } else {
3014            (insn_word & !mask) | pc_rel_inserted
3015        };
3016        buffer[0..4].clone_from_slice(&u32::to_le_bytes(insn_word));
3017    }
3018
3019    /// Is a veneer supported for this label reference type?
3020    fn supports_veneer(self) -> bool {
3021        match self {
3022            LabelUse::Branch14 | LabelUse::Branch19 => true, // veneer is a Branch26
3023            LabelUse::Branch26 => true,                      // veneer is a PCRel32
3024            _ => false,
3025        }
3026    }
3027
3028    /// How large is the veneer, if supported?
3029    fn veneer_size(self) -> CodeOffset {
3030        match self {
3031            LabelUse::Branch14 | LabelUse::Branch19 => 4,
3032            LabelUse::Branch26 => 20,
3033            _ => unreachable!(),
3034        }
3035    }
3036
3037    fn worst_case_veneer_size() -> CodeOffset {
3038        20
3039    }
3040
3041    /// Generate a veneer into the buffer, given that this veneer is at `veneer_offset`, and return
3042    /// an offset and label-use for the veneer's use of the original label.
3043    fn generate_veneer(
3044        self,
3045        buffer: &mut [u8],
3046        veneer_offset: CodeOffset,
3047    ) -> (CodeOffset, LabelUse) {
3048        match self {
3049            LabelUse::Branch14 | LabelUse::Branch19 => {
3050                // veneer is a Branch26 (unconditional branch). Just encode directly here -- don't
3051                // bother with constructing an Inst.
3052                let insn_word = 0b000101 << 26;
3053                buffer[0..4].clone_from_slice(&u32::to_le_bytes(insn_word));
3054                (veneer_offset, LabelUse::Branch26)
3055            }
3056
3057            // This is promoting a 26-bit call/jump to a 32-bit call/jump to
3058            // get a further range. This jump translates to a jump to a
3059            // relative location based on the address of the constant loaded
3060            // from here.
3061            //
3062            // If this path is taken from a call instruction then caller-saved
3063            // registers are available (minus arguments), so x16/x17 are
3064            // available. Otherwise for intra-function jumps we also reserve
3065            // x16/x17 as spill-style registers. In both cases these are
3066            // available for us to use.
3067            LabelUse::Branch26 => {
3068                let tmp1 = regs::spilltmp_reg();
3069                let tmp1_w = regs::writable_spilltmp_reg();
3070                let tmp2 = regs::tmp2_reg();
3071                let tmp2_w = regs::writable_tmp2_reg();
3072                // ldrsw x16, 16
3073                let ldr = emit::enc_ldst_imm19(0b1001_1000, 16 / 4, tmp1);
3074                // adr x17, 12
3075                let adr = emit::enc_adr(12, tmp2_w);
3076                // add x16, x16, x17
3077                let add = emit::enc_arith_rrr(0b10001011_000, 0, tmp1_w, tmp1, tmp2);
3078                // br x16
3079                let br = emit::enc_br(tmp1);
3080                buffer[0..4].clone_from_slice(&u32::to_le_bytes(ldr));
3081                buffer[4..8].clone_from_slice(&u32::to_le_bytes(adr));
3082                buffer[8..12].clone_from_slice(&u32::to_le_bytes(add));
3083                buffer[12..16].clone_from_slice(&u32::to_le_bytes(br));
3084                // the 4-byte signed immediate we'll load is after these
3085                // instructions, 16-bytes in.
3086                (veneer_offset + 16, LabelUse::PCRel32)
3087            }
3088
3089            _ => panic!("Unsupported label-reference type for veneer generation!"),
3090        }
3091    }
3092
3093    fn from_reloc(reloc: Reloc, addend: Addend) -> Option<LabelUse> {
3094        match (reloc, addend) {
3095            (Reloc::Arm64Call, 0) => Some(LabelUse::Branch26),
3096            _ => None,
3097        }
3098    }
3099}
3100
3101#[cfg(test)]
3102mod tests {
3103    use super::*;
3104
3105    #[test]
3106    fn inst_size_test() {
3107        // This test will help with unintentionally growing the size
3108        // of the Inst enum.
3109        let expected = if cfg!(target_pointer_width = "32") && !cfg!(target_arch = "arm") {
3110            28
3111        } else {
3112            32
3113        };
3114        assert_eq!(expected, core::mem::size_of::<Inst>());
3115    }
3116}