Skip to main content

cranelift_codegen/isa/aarch64/inst/
emit.rs

1//! AArch64 ISA: binary code emission.
2
3use cranelift_control::ControlPlane;
4
5use crate::ir::{self, types::*};
6use crate::isa::aarch64;
7use crate::isa::aarch64::inst::*;
8use crate::trace;
9
10/// Memory addressing mode finalization: convert "special" modes (e.g.,
11/// generic arbitrary stack offset) into real addressing modes, possibly by
12/// emitting some helper instructions that come immediately before the use
13/// of this amode.
14pub fn mem_finalize(
15    sink: Option<&mut MachBuffer<Inst>>,
16    mem: &AMode,
17    access_ty: Type,
18    state: &EmitState,
19) -> (SmallVec<[Inst; 4]>, AMode) {
20    match mem {
21        &AMode::RegOffset { off, .. }
22        | &AMode::SPOffset { off }
23        | &AMode::FPOffset { off }
24        | &AMode::IncomingArg { off }
25        | &AMode::SlotOffset { off } => {
26            let basereg = match mem {
27                &AMode::RegOffset { rn, .. } => rn,
28                &AMode::SPOffset { .. }
29                | &AMode::SlotOffset { .. }
30                | &AMode::IncomingArg { .. } => stack_reg(),
31                &AMode::FPOffset { .. } => fp_reg(),
32                _ => unreachable!(),
33            };
34            let off = match mem {
35                &AMode::IncomingArg { .. } => {
36                    let frame_layout = state.frame_layout();
37                    i64::from(
38                        frame_layout.setup_area_size
39                            + frame_layout.tail_args_size
40                            + frame_layout.clobber_size
41                            + frame_layout.fixed_frame_storage_size
42                            + frame_layout.outgoing_args_size,
43                    ) - off
44                }
45                &AMode::SlotOffset { .. } => {
46                    let adj = i64::from(state.frame_layout().outgoing_args_size);
47                    trace!(
48                        "mem_finalize: slot offset {} + adj {} -> {}",
49                        off,
50                        adj,
51                        off + adj
52                    );
53                    off + adj
54                }
55                _ => off,
56            };
57
58            if let Some(simm9) = SImm9::maybe_from_i64(off) {
59                let mem = AMode::Unscaled { rn: basereg, simm9 };
60                (smallvec![], mem)
61            } else if let Some(uimm12) = UImm12Scaled::maybe_from_i64(off, access_ty) {
62                let mem = AMode::UnsignedOffset {
63                    rn: basereg,
64                    uimm12,
65                };
66                (smallvec![], mem)
67            } else {
68                let tmp = writable_spilltmp_reg();
69                (
70                    Inst::load_constant(tmp, off as u64),
71                    AMode::RegExtended {
72                        rn: basereg,
73                        rm: tmp.to_reg(),
74                        extendop: ExtendOp::SXTX,
75                    },
76                )
77            }
78        }
79
80        AMode::Const { addr } => {
81            let sink = match sink {
82                Some(sink) => sink,
83                None => return (smallvec![], mem.clone()),
84            };
85            let label = sink.get_label_for_constant(*addr);
86            let label = MemLabel::Mach(label);
87            (smallvec![], AMode::Label { label })
88        }
89
90        _ => (smallvec![], mem.clone()),
91    }
92}
93
94//=============================================================================
95// Instructions and subcomponents: emission
96
97pub(crate) fn machreg_to_gpr(m: Reg) -> u32 {
98    assert_eq!(m.class(), RegClass::Int);
99    u32::from(m.to_real_reg().unwrap().hw_enc() & 31)
100}
101
102pub(crate) fn machreg_to_vec(m: Reg) -> u32 {
103    assert_eq!(m.class(), RegClass::Float);
104    u32::from(m.to_real_reg().unwrap().hw_enc())
105}
106
107fn machreg_to_gpr_or_vec(m: Reg) -> u32 {
108    u32::from(m.to_real_reg().unwrap().hw_enc() & 31)
109}
110
111/// Encode a 3-register aeithmeric instruction.
112pub fn enc_arith_rrr(bits_31_21: u32, bits_15_10: u32, rd: Writable<Reg>, rn: Reg, rm: Reg) -> u32 {
113    (bits_31_21 << 21)
114        | (bits_15_10 << 10)
115        | machreg_to_gpr(rd.to_reg())
116        | (machreg_to_gpr(rn) << 5)
117        | (machreg_to_gpr(rm) << 16)
118}
119
120fn enc_arith_rr_imm12(
121    bits_31_24: u32,
122    immshift: u32,
123    imm12: u32,
124    rn: Reg,
125    rd: Writable<Reg>,
126) -> u32 {
127    (bits_31_24 << 24)
128        | (immshift << 22)
129        | (imm12 << 10)
130        | (machreg_to_gpr(rn) << 5)
131        | machreg_to_gpr(rd.to_reg())
132}
133
134fn enc_arith_rr_imml(bits_31_23: u32, imm_bits: u32, rn: Reg, rd: Writable<Reg>) -> u32 {
135    (bits_31_23 << 23) | (imm_bits << 10) | (machreg_to_gpr(rn) << 5) | machreg_to_gpr(rd.to_reg())
136}
137
138fn enc_arith_rrrr(top11: u32, rm: Reg, bit15: u32, ra: Reg, rn: Reg, rd: Writable<Reg>) -> u32 {
139    (top11 << 21)
140        | (machreg_to_gpr(rm) << 16)
141        | (bit15 << 15)
142        | (machreg_to_gpr(ra) << 10)
143        | (machreg_to_gpr(rn) << 5)
144        | machreg_to_gpr(rd.to_reg())
145}
146
147fn enc_jump26(op_31_26: u32, off_26_0: u32) -> u32 {
148    assert!(off_26_0 < (1 << 26));
149    (op_31_26 << 26) | off_26_0
150}
151
152fn enc_cmpbr(op_31_24: u32, off_18_0: u32, reg: Reg) -> u32 {
153    assert!(off_18_0 < (1 << 19));
154    (op_31_24 << 24) | (off_18_0 << 5) | machreg_to_gpr(reg)
155}
156
157fn enc_cbr(op_31_24: u32, off_18_0: u32, op_4: u32, cond: u32) -> u32 {
158    assert!(off_18_0 < (1 << 19));
159    assert!(cond < (1 << 4));
160    (op_31_24 << 24) | (off_18_0 << 5) | (op_4 << 4) | cond
161}
162
163/// Set the size bit of an instruction.
164fn enc_op_size(op: u32, size: OperandSize) -> u32 {
165    (op & !(1 << 31)) | (size.sf_bit() << 31)
166}
167
168fn enc_conditional_br(taken: BranchTarget, kind: CondBrKind) -> u32 {
169    match kind {
170        CondBrKind::Zero(reg, size) => enc_op_size(
171            enc_cmpbr(0b0_011010_0, taken.as_offset19_or_zero(), reg),
172            size,
173        ),
174        CondBrKind::NotZero(reg, size) => enc_op_size(
175            enc_cmpbr(0b0_011010_1, taken.as_offset19_or_zero(), reg),
176            size,
177        ),
178        CondBrKind::Cond(c) => enc_cbr(0b01010100, taken.as_offset19_or_zero(), 0b0, c.bits()),
179    }
180}
181
182fn enc_test_bit_and_branch(
183    kind: TestBitAndBranchKind,
184    taken: BranchTarget,
185    reg: Reg,
186    bit: u8,
187) -> u32 {
188    assert!(bit < 64);
189    let op_31 = u32::from(bit >> 5);
190    let op_23_19 = u32::from(bit & 0b11111);
191    let op_30_24 = 0b0110110
192        | match kind {
193            TestBitAndBranchKind::Z => 0,
194            TestBitAndBranchKind::NZ => 1,
195        };
196    (op_31 << 31)
197        | (op_30_24 << 24)
198        | (op_23_19 << 19)
199        | (taken.as_offset14_or_zero() << 5)
200        | machreg_to_gpr(reg)
201}
202
203/// Encode a move-wide instruction.
204pub fn enc_move_wide(
205    op: MoveWideOp,
206    rd: Writable<Reg>,
207    imm: MoveWideConst,
208    size: OperandSize,
209) -> u32 {
210    assert!(imm.shift <= 0b11);
211    let op = match op {
212        MoveWideOp::MovN => 0b00,
213        MoveWideOp::MovZ => 0b10,
214    };
215    0x12800000
216        | size.sf_bit() << 31
217        | op << 29
218        | u32::from(imm.shift) << 21
219        | u32::from(imm.bits) << 5
220        | machreg_to_gpr(rd.to_reg())
221}
222
223/// Encode a move-keep immediate instruction.
224pub fn enc_movk(rd: Writable<Reg>, imm: MoveWideConst, size: OperandSize) -> u32 {
225    assert!(imm.shift <= 0b11);
226    0x72800000
227        | size.sf_bit() << 31
228        | u32::from(imm.shift) << 21
229        | u32::from(imm.bits) << 5
230        | machreg_to_gpr(rd.to_reg())
231}
232
233fn enc_ldst_pair(op_31_22: u32, simm7: SImm7Scaled, rn: Reg, rt: Reg, rt2: Reg) -> u32 {
234    (op_31_22 << 22)
235        | (simm7.bits() << 15)
236        | (machreg_to_gpr(rt2) << 10)
237        | (machreg_to_gpr(rn) << 5)
238        | machreg_to_gpr(rt)
239}
240
241fn enc_ldst_simm9(op_31_22: u32, simm9: SImm9, op_11_10: u32, rn: Reg, rd: Reg) -> u32 {
242    (op_31_22 << 22)
243        | (simm9.bits() << 12)
244        | (op_11_10 << 10)
245        | (machreg_to_gpr(rn) << 5)
246        | machreg_to_gpr_or_vec(rd)
247}
248
249fn enc_ldst_uimm12(op_31_22: u32, uimm12: UImm12Scaled, rn: Reg, rd: Reg) -> u32 {
250    (op_31_22 << 22)
251        | (0b1 << 24)
252        | (uimm12.bits() << 10)
253        | (machreg_to_gpr(rn) << 5)
254        | machreg_to_gpr_or_vec(rd)
255}
256
257fn enc_ldst_reg(
258    op_31_22: u32,
259    rn: Reg,
260    rm: Reg,
261    s_bit: bool,
262    extendop: Option<ExtendOp>,
263    rd: Reg,
264) -> u32 {
265    let s_bit = if s_bit { 1 } else { 0 };
266    let extend_bits = match extendop {
267        Some(ExtendOp::UXTW) => 0b010,
268        Some(ExtendOp::SXTW) => 0b110,
269        Some(ExtendOp::SXTX) => 0b111,
270        None => 0b011, // LSL
271        _ => panic!("bad extend mode for ld/st AMode"),
272    };
273    (op_31_22 << 22)
274        | (1 << 21)
275        | (machreg_to_gpr(rm) << 16)
276        | (extend_bits << 13)
277        | (s_bit << 12)
278        | (0b10 << 10)
279        | (machreg_to_gpr(rn) << 5)
280        | machreg_to_gpr_or_vec(rd)
281}
282
283pub(crate) fn enc_ldst_imm19(op_31_24: u32, imm19: u32, rd: Reg) -> u32 {
284    (op_31_24 << 24) | (imm19 << 5) | machreg_to_gpr_or_vec(rd)
285}
286
287fn enc_ldst_vec(q: u32, size: u32, rn: Reg, rt: Writable<Reg>) -> u32 {
288    debug_assert_eq!(q & 0b1, q);
289    debug_assert_eq!(size & 0b11, size);
290    0b0_0_0011010_10_00000_110_0_00_00000_00000
291        | q << 30
292        | size << 10
293        | machreg_to_gpr(rn) << 5
294        | machreg_to_vec(rt.to_reg())
295}
296
297fn enc_ldst_vec_pair(
298    opc: u32,
299    amode: u32,
300    is_load: bool,
301    simm7: SImm7Scaled,
302    rn: Reg,
303    rt: Reg,
304    rt2: Reg,
305) -> u32 {
306    debug_assert_eq!(opc & 0b11, opc);
307    debug_assert_eq!(amode & 0b11, amode);
308
309    0b00_10110_00_0_0000000_00000_00000_00000
310        | opc << 30
311        | amode << 23
312        | (is_load as u32) << 22
313        | simm7.bits() << 15
314        | machreg_to_vec(rt2) << 10
315        | machreg_to_gpr(rn) << 5
316        | machreg_to_vec(rt)
317}
318
319fn enc_vec_rrr(top11: u32, rm: Reg, bit15_10: u32, rn: Reg, rd: Writable<Reg>) -> u32 {
320    (top11 << 21)
321        | (machreg_to_vec(rm) << 16)
322        | (bit15_10 << 10)
323        | (machreg_to_vec(rn) << 5)
324        | machreg_to_vec(rd.to_reg())
325}
326
327fn enc_vec_rrr_long(
328    q: u32,
329    u: u32,
330    size: u32,
331    bit14: u32,
332    rm: Reg,
333    rn: Reg,
334    rd: Writable<Reg>,
335) -> u32 {
336    debug_assert_eq!(q & 0b1, q);
337    debug_assert_eq!(u & 0b1, u);
338    debug_assert_eq!(size & 0b11, size);
339    debug_assert_eq!(bit14 & 0b1, bit14);
340
341    0b0_0_0_01110_00_1_00000_100000_00000_00000
342        | q << 30
343        | u << 29
344        | size << 22
345        | bit14 << 14
346        | (machreg_to_vec(rm) << 16)
347        | (machreg_to_vec(rn) << 5)
348        | machreg_to_vec(rd.to_reg())
349}
350
351fn enc_bit_rr(size: u32, opcode2: u32, opcode1: u32, rn: Reg, rd: Writable<Reg>) -> u32 {
352    (0b01011010110 << 21)
353        | size << 31
354        | opcode2 << 16
355        | opcode1 << 10
356        | machreg_to_gpr(rn) << 5
357        | machreg_to_gpr(rd.to_reg())
358}
359
360pub(crate) fn enc_br(rn: Reg) -> u32 {
361    0b1101011_0000_11111_000000_00000_00000 | (machreg_to_gpr(rn) << 5)
362}
363
364pub(crate) fn enc_adr_inst(opcode: u32, off: i32, rd: Writable<Reg>) -> u32 {
365    let off = u32::try_from(off).unwrap();
366    let immlo = off & 3;
367    let immhi = (off >> 2) & ((1 << 19) - 1);
368    opcode | (immlo << 29) | (immhi << 5) | machreg_to_gpr(rd.to_reg())
369}
370
371pub(crate) fn enc_adr(off: i32, rd: Writable<Reg>) -> u32 {
372    let opcode = 0b00010000 << 24;
373    enc_adr_inst(opcode, off, rd)
374}
375
376pub(crate) fn enc_adrp(off: i32, rd: Writable<Reg>) -> u32 {
377    let opcode = 0b10010000 << 24;
378    enc_adr_inst(opcode, off, rd)
379}
380
381fn enc_csel(rd: Writable<Reg>, rn: Reg, rm: Reg, cond: Cond, op: u32, o2: u32) -> u32 {
382    debug_assert_eq!(op & 0b1, op);
383    debug_assert_eq!(o2 & 0b1, o2);
384    0b100_11010100_00000_0000_00_00000_00000
385        | (op << 30)
386        | (machreg_to_gpr(rm) << 16)
387        | (cond.bits() << 12)
388        | (o2 << 10)
389        | (machreg_to_gpr(rn) << 5)
390        | machreg_to_gpr(rd.to_reg())
391}
392
393fn enc_fcsel(rd: Writable<Reg>, rn: Reg, rm: Reg, cond: Cond, size: ScalarSize) -> u32 {
394    0b000_11110_00_1_00000_0000_11_00000_00000
395        | (size.ftype() << 22)
396        | (machreg_to_vec(rm) << 16)
397        | (machreg_to_vec(rn) << 5)
398        | machreg_to_vec(rd.to_reg())
399        | (cond.bits() << 12)
400}
401
402fn enc_ccmp(size: OperandSize, rn: Reg, rm: Reg, nzcv: NZCV, cond: Cond) -> u32 {
403    0b0_1_1_11010010_00000_0000_00_00000_0_0000
404        | size.sf_bit() << 31
405        | machreg_to_gpr(rm) << 16
406        | cond.bits() << 12
407        | machreg_to_gpr(rn) << 5
408        | nzcv.bits()
409}
410
411fn enc_ccmp_imm(size: OperandSize, rn: Reg, imm: UImm5, nzcv: NZCV, cond: Cond) -> u32 {
412    0b0_1_1_11010010_00000_0000_10_00000_0_0000
413        | size.sf_bit() << 31
414        | imm.bits() << 16
415        | cond.bits() << 12
416        | machreg_to_gpr(rn) << 5
417        | nzcv.bits()
418}
419
420fn enc_bfm(opc: u8, size: OperandSize, rd: Writable<Reg>, rn: Reg, immr: u8, imms: u8) -> u32 {
421    match size {
422        OperandSize::Size64 => {
423            debug_assert!(immr <= 63);
424            debug_assert!(imms <= 63);
425        }
426        OperandSize::Size32 => {
427            debug_assert!(immr <= 31);
428            debug_assert!(imms <= 31);
429        }
430    }
431    debug_assert_eq!(opc & 0b11, opc);
432    let n_bit = size.sf_bit();
433    0b0_00_100110_0_000000_000000_00000_00000
434        | size.sf_bit() << 31
435        | u32::from(opc) << 29
436        | n_bit << 22
437        | u32::from(immr) << 16
438        | u32::from(imms) << 10
439        | machreg_to_gpr(rn) << 5
440        | machreg_to_gpr(rd.to_reg())
441}
442
443fn enc_vecmov(is_16b: bool, rd: Writable<Reg>, rn: Reg) -> u32 {
444    0b00001110_101_00000_00011_1_00000_00000
445        | ((is_16b as u32) << 30)
446        | machreg_to_vec(rd.to_reg())
447        | (machreg_to_vec(rn) << 16)
448        | (machreg_to_vec(rn) << 5)
449}
450
451fn enc_fpurr(top22: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
452    (top22 << 10) | (machreg_to_vec(rn) << 5) | machreg_to_vec(rd.to_reg())
453}
454
455fn enc_fpurrr(top22: u32, rd: Writable<Reg>, rn: Reg, rm: Reg) -> u32 {
456    (top22 << 10)
457        | (machreg_to_vec(rm) << 16)
458        | (machreg_to_vec(rn) << 5)
459        | machreg_to_vec(rd.to_reg())
460}
461
462fn enc_fpurrrr(top17: u32, rd: Writable<Reg>, rn: Reg, rm: Reg, ra: Reg) -> u32 {
463    (top17 << 15)
464        | (machreg_to_vec(rm) << 16)
465        | (machreg_to_vec(ra) << 10)
466        | (machreg_to_vec(rn) << 5)
467        | machreg_to_vec(rd.to_reg())
468}
469
470fn enc_fcmp(size: ScalarSize, rn: Reg, rm: Reg) -> u32 {
471    0b000_11110_00_1_00000_00_1000_00000_00000
472        | (size.ftype() << 22)
473        | (machreg_to_vec(rm) << 16)
474        | (machreg_to_vec(rn) << 5)
475}
476
477fn enc_fputoint(top16: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
478    (top16 << 16) | (machreg_to_vec(rn) << 5) | machreg_to_gpr(rd.to_reg())
479}
480
481fn enc_inttofpu(top16: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
482    (top16 << 16) | (machreg_to_gpr(rn) << 5) | machreg_to_vec(rd.to_reg())
483}
484
485fn enc_fround(top22: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
486    (top22 << 10) | (machreg_to_vec(rn) << 5) | machreg_to_vec(rd.to_reg())
487}
488
489fn enc_vec_rr_misc(qu: u32, size: u32, bits_12_16: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
490    debug_assert_eq!(qu & 0b11, qu);
491    debug_assert_eq!(size & 0b11, size);
492    debug_assert_eq!(bits_12_16 & 0b11111, bits_12_16);
493    let bits = 0b0_00_01110_00_10000_00000_10_00000_00000;
494    bits | qu << 29
495        | size << 22
496        | bits_12_16 << 12
497        | machreg_to_vec(rn) << 5
498        | machreg_to_vec(rd.to_reg())
499}
500
501fn enc_vec_rr_pair(bits_12_16: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
502    debug_assert_eq!(bits_12_16 & 0b11111, bits_12_16);
503
504    0b010_11110_11_11000_11011_10_00000_00000
505        | bits_12_16 << 12
506        | machreg_to_vec(rn) << 5
507        | machreg_to_vec(rd.to_reg())
508}
509
510fn enc_vec_rr_pair_long(u: u32, enc_size: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
511    debug_assert_eq!(u & 0b1, u);
512    debug_assert_eq!(enc_size & 0b1, enc_size);
513
514    0b0_1_0_01110_00_10000_00_0_10_10_00000_00000
515        | u << 29
516        | enc_size << 22
517        | machreg_to_vec(rn) << 5
518        | machreg_to_vec(rd.to_reg())
519}
520
521fn enc_vec_lanes(q: u32, u: u32, size: u32, opcode: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
522    debug_assert_eq!(q & 0b1, q);
523    debug_assert_eq!(u & 0b1, u);
524    debug_assert_eq!(size & 0b11, size);
525    debug_assert_eq!(opcode & 0b11111, opcode);
526    0b0_0_0_01110_00_11000_0_0000_10_00000_00000
527        | q << 30
528        | u << 29
529        | size << 22
530        | opcode << 12
531        | machreg_to_vec(rn) << 5
532        | machreg_to_vec(rd.to_reg())
533}
534
535fn enc_tbl(is_extension: bool, len: u32, rd: Writable<Reg>, rn: Reg, rm: Reg) -> u32 {
536    debug_assert_eq!(len & 0b11, len);
537    0b0_1_001110_000_00000_0_00_0_00_00000_00000
538        | (machreg_to_vec(rm) << 16)
539        | len << 13
540        | (is_extension as u32) << 12
541        | (machreg_to_vec(rn) << 5)
542        | machreg_to_vec(rd.to_reg())
543}
544
545fn enc_dmb_ish() -> u32 {
546    0xD5033BBF
547}
548
549fn enc_acq_rel(ty: Type, op: AtomicRMWOp, rs: Reg, rt: Writable<Reg>, rn: Reg) -> u32 {
550    assert!(machreg_to_gpr(rt.to_reg()) != 31);
551    let sz = match ty {
552        I64 => 0b11,
553        I32 => 0b10,
554        I16 => 0b01,
555        I8 => 0b00,
556        _ => unreachable!(),
557    };
558    let bit15 = match op {
559        AtomicRMWOp::Swp => 0b1,
560        _ => 0b0,
561    };
562    let op = match op {
563        AtomicRMWOp::Add => 0b000,
564        AtomicRMWOp::Clr => 0b001,
565        AtomicRMWOp::Eor => 0b010,
566        AtomicRMWOp::Set => 0b011,
567        AtomicRMWOp::Smax => 0b100,
568        AtomicRMWOp::Smin => 0b101,
569        AtomicRMWOp::Umax => 0b110,
570        AtomicRMWOp::Umin => 0b111,
571        AtomicRMWOp::Swp => 0b000,
572    };
573    0b00_111_000_111_00000_0_000_00_00000_00000
574        | (sz << 30)
575        | (machreg_to_gpr(rs) << 16)
576        | bit15 << 15
577        | (op << 12)
578        | (machreg_to_gpr(rn) << 5)
579        | machreg_to_gpr(rt.to_reg())
580}
581
582fn enc_ldar(ty: Type, rt: Writable<Reg>, rn: Reg) -> u32 {
583    let sz = match ty {
584        I64 => 0b11,
585        I32 => 0b10,
586        I16 => 0b01,
587        I8 => 0b00,
588        _ => unreachable!(),
589    };
590    0b00_001000_1_1_0_11111_1_11111_00000_00000
591        | (sz << 30)
592        | (machreg_to_gpr(rn) << 5)
593        | machreg_to_gpr(rt.to_reg())
594}
595
596fn enc_stlr(ty: Type, rt: Reg, rn: Reg) -> u32 {
597    let sz = match ty {
598        I64 => 0b11,
599        I32 => 0b10,
600        I16 => 0b01,
601        I8 => 0b00,
602        _ => unreachable!(),
603    };
604    0b00_001000_100_11111_1_11111_00000_00000
605        | (sz << 30)
606        | (machreg_to_gpr(rn) << 5)
607        | machreg_to_gpr(rt)
608}
609
610fn enc_ldaxr(ty: Type, rt: Writable<Reg>, rn: Reg) -> u32 {
611    let sz = match ty {
612        I64 => 0b11,
613        I32 => 0b10,
614        I16 => 0b01,
615        I8 => 0b00,
616        _ => unreachable!(),
617    };
618    0b00_001000_0_1_0_11111_1_11111_00000_00000
619        | (sz << 30)
620        | (machreg_to_gpr(rn) << 5)
621        | machreg_to_gpr(rt.to_reg())
622}
623
624fn enc_stlxr(ty: Type, rs: Writable<Reg>, rt: Reg, rn: Reg) -> u32 {
625    let sz = match ty {
626        I64 => 0b11,
627        I32 => 0b10,
628        I16 => 0b01,
629        I8 => 0b00,
630        _ => unreachable!(),
631    };
632    0b00_001000_000_00000_1_11111_00000_00000
633        | (sz << 30)
634        | (machreg_to_gpr(rs.to_reg()) << 16)
635        | (machreg_to_gpr(rn) << 5)
636        | machreg_to_gpr(rt)
637}
638
639fn enc_cas(size: u32, rs: Writable<Reg>, rt: Reg, rn: Reg) -> u32 {
640    debug_assert_eq!(size & 0b11, size);
641
642    0b00_0010001_1_1_00000_1_11111_00000_00000
643        | size << 30
644        | machreg_to_gpr(rs.to_reg()) << 16
645        | machreg_to_gpr(rn) << 5
646        | machreg_to_gpr(rt)
647}
648
649fn enc_asimd_mod_imm(rd: Writable<Reg>, q_op: u32, cmode: u32, imm: u8) -> u32 {
650    let abc = (imm >> 5) as u32;
651    let defgh = (imm & 0b11111) as u32;
652
653    debug_assert_eq!(cmode & 0b1111, cmode);
654    debug_assert_eq!(q_op & 0b11, q_op);
655
656    0b0_0_0_0111100000_000_0000_01_00000_00000
657        | (q_op << 29)
658        | (abc << 16)
659        | (cmode << 12)
660        | (defgh << 5)
661        | machreg_to_vec(rd.to_reg())
662}
663
664/// State carried between emissions of a sequence of instructions.
665#[derive(Default, Clone, Debug)]
666pub struct EmitState {
667    /// The user stack map for the upcoming instruction, as provided to
668    /// `pre_safepoint()`.
669    user_stack_map: Option<ir::UserStackMap>,
670
671    /// Only used during fuzz-testing. Otherwise, it is a zero-sized struct and
672    /// optimized away at compiletime. See [cranelift_control].
673    ctrl_plane: ControlPlane,
674
675    frame_layout: FrameLayout,
676}
677
678impl MachInstEmitState<Inst> for EmitState {
679    fn new(abi: &Callee<AArch64MachineDeps>, ctrl_plane: ControlPlane) -> Self {
680        EmitState {
681            user_stack_map: None,
682            ctrl_plane,
683            frame_layout: abi.frame_layout().clone(),
684        }
685    }
686
687    fn pre_safepoint(&mut self, user_stack_map: Option<ir::UserStackMap>) {
688        self.user_stack_map = user_stack_map;
689    }
690
691    fn ctrl_plane_mut(&mut self) -> &mut ControlPlane {
692        &mut self.ctrl_plane
693    }
694
695    fn take_ctrl_plane(self) -> ControlPlane {
696        self.ctrl_plane
697    }
698
699    fn frame_layout(&self) -> &FrameLayout {
700        &self.frame_layout
701    }
702}
703
704impl EmitState {
705    fn take_stack_map(&mut self) -> Option<ir::UserStackMap> {
706        self.user_stack_map.take()
707    }
708
709    fn clear_post_insn(&mut self) {
710        self.user_stack_map = None;
711    }
712}
713
714/// Constant state used during function compilation.
715pub struct EmitInfo {
716    flags: settings::Flags,
717    isa_flags: aarch64::settings::Flags,
718}
719
720impl EmitInfo {
721    /// Create a constant state for emission of instructions.
722    pub fn new(flags: settings::Flags, isa_flags: aarch64::settings::Flags) -> Self {
723        Self { flags, isa_flags }
724    }
725}
726
727impl MachInstEmit for Inst {
728    type State = EmitState;
729    type Info = EmitInfo;
730
731    fn emit(&self, sink: &mut MachBuffer<Inst>, emit_info: &Self::Info, state: &mut EmitState) {
732        // N.B.: we *must* not exceed the "worst-case size" used to compute
733        // where to insert islands, except when islands are explicitly triggered
734        // (with an `EmitIsland`). We check this in debug builds. This is `mut`
735        // to allow disabling the check for `JTSequence`, which is always
736        // emitted following an `EmitIsland`.
737        let mut start_off = sink.cur_offset();
738
739        match self {
740            &Inst::AluRRR {
741                alu_op,
742                size,
743                rd,
744                rn,
745                rm,
746            } => {
747                debug_assert!(match alu_op {
748                    ALUOp::SMulH | ALUOp::UMulH => size == OperandSize::Size64,
749                    _ => true,
750                });
751                let top11 = match alu_op {
752                    ALUOp::Add => 0b00001011_000,
753                    ALUOp::Adc => 0b00011010_000,
754                    ALUOp::AdcS => 0b00111010_000,
755                    ALUOp::Sub => 0b01001011_000,
756                    ALUOp::Sbc => 0b01011010_000,
757                    ALUOp::SbcS => 0b01111010_000,
758                    ALUOp::Orr => 0b00101010_000,
759                    ALUOp::And => 0b00001010_000,
760                    ALUOp::AndS => 0b01101010_000,
761                    ALUOp::Eor => 0b01001010_000,
762                    ALUOp::OrrNot => 0b00101010_001,
763                    ALUOp::AndNot => 0b00001010_001,
764                    ALUOp::EorNot => 0b01001010_001,
765                    ALUOp::AddS => 0b00101011_000,
766                    ALUOp::SubS => 0b01101011_000,
767                    ALUOp::SDiv | ALUOp::UDiv => 0b00011010_110,
768                    ALUOp::Extr | ALUOp::Lsr | ALUOp::Asr | ALUOp::Lsl => 0b00011010_110,
769                    ALUOp::SMulH => 0b10011011_010,
770                    ALUOp::UMulH => 0b10011011_110,
771                };
772
773                let top11 = top11 | size.sf_bit() << 10;
774                let bit15_10 = match alu_op {
775                    ALUOp::SDiv => 0b000011,
776                    ALUOp::UDiv => 0b000010,
777                    ALUOp::Extr => 0b001011,
778                    ALUOp::Lsr => 0b001001,
779                    ALUOp::Asr => 0b001010,
780                    ALUOp::Lsl => 0b001000,
781                    ALUOp::SMulH | ALUOp::UMulH => 0b011111,
782                    _ => 0b000000,
783                };
784                debug_assert_ne!(writable_stack_reg(), rd);
785                // The stack pointer is the zero register in this context, so this might be an
786                // indication that something is wrong.
787                debug_assert_ne!(stack_reg(), rn);
788                debug_assert_ne!(stack_reg(), rm);
789                sink.put4(enc_arith_rrr(top11, bit15_10, rd, rn, rm));
790            }
791            &Inst::AluRRRR {
792                alu_op,
793                size,
794                rd,
795                rm,
796                rn,
797                ra,
798            } => {
799                let (top11, bit15) = match alu_op {
800                    ALUOp3::MAdd => (0b0_00_11011_000, 0),
801                    ALUOp3::MSub => (0b0_00_11011_000, 1),
802                    ALUOp3::UMAddL => {
803                        debug_assert!(size == OperandSize::Size32);
804                        (0b1_00_11011_1_01, 0)
805                    }
806                    ALUOp3::SMAddL => {
807                        debug_assert!(size == OperandSize::Size32);
808                        (0b1_00_11011_0_01, 0)
809                    }
810                };
811                let top11 = top11 | size.sf_bit() << 10;
812                sink.put4(enc_arith_rrrr(top11, rm, bit15, ra, rn, rd));
813            }
814            &Inst::AluRRImm12 {
815                alu_op,
816                size,
817                rd,
818                rn,
819                ref imm12,
820            } => {
821                let top8 = match alu_op {
822                    ALUOp::Add => 0b000_10001,
823                    ALUOp::Sub => 0b010_10001,
824                    ALUOp::AddS => 0b001_10001,
825                    ALUOp::SubS => 0b011_10001,
826                    _ => unimplemented!("{:?}", alu_op),
827                };
828                let top8 = top8 | size.sf_bit() << 7;
829                sink.put4(enc_arith_rr_imm12(
830                    top8,
831                    imm12.shift_bits(),
832                    imm12.imm_bits(),
833                    rn,
834                    rd,
835                ));
836            }
837            &Inst::AluRRImmLogic {
838                alu_op,
839                size,
840                rd,
841                rn,
842                ref imml,
843            } => {
844                let (top9, inv) = match alu_op {
845                    ALUOp::Orr => (0b001_100100, false),
846                    ALUOp::And => (0b000_100100, false),
847                    ALUOp::AndS => (0b011_100100, false),
848                    ALUOp::Eor => (0b010_100100, false),
849                    ALUOp::OrrNot => (0b001_100100, true),
850                    ALUOp::AndNot => (0b000_100100, true),
851                    ALUOp::EorNot => (0b010_100100, true),
852                    _ => unimplemented!("{:?}", alu_op),
853                };
854                let top9 = top9 | size.sf_bit() << 8;
855                let imml = if inv { imml.invert() } else { *imml };
856                sink.put4(enc_arith_rr_imml(top9, imml.enc_bits(), rn, rd));
857            }
858
859            &Inst::AluRRImmShift {
860                alu_op,
861                size,
862                rd,
863                rn,
864                ref immshift,
865            } => {
866                let amt = immshift.value();
867                let (top10, immr, imms) = match alu_op {
868                    ALUOp::Extr => (0b0001001110, machreg_to_gpr(rn), u32::from(amt)),
869                    ALUOp::Lsr => (0b0101001100, u32::from(amt), 0b011111),
870                    ALUOp::Asr => (0b0001001100, u32::from(amt), 0b011111),
871                    ALUOp::Lsl => {
872                        let bits = if size.is64() { 64 } else { 32 };
873                        (
874                            0b0101001100,
875                            u32::from((bits - amt) % bits),
876                            u32::from(bits - 1 - amt),
877                        )
878                    }
879                    _ => unimplemented!("{:?}", alu_op),
880                };
881                let top10 = top10 | size.sf_bit() << 9 | size.sf_bit();
882                let imms = match alu_op {
883                    ALUOp::Lsr | ALUOp::Asr => imms | size.sf_bit() << 5,
884                    _ => imms,
885                };
886                sink.put4(
887                    (top10 << 22)
888                        | (immr << 16)
889                        | (imms << 10)
890                        | (machreg_to_gpr(rn) << 5)
891                        | machreg_to_gpr(rd.to_reg()),
892                );
893            }
894
895            &Inst::AluRRRShift {
896                alu_op,
897                size,
898                rd,
899                rn,
900                rm,
901                ref shiftop,
902            } => {
903                let top11: u32 = match alu_op {
904                    ALUOp::Add => 0b000_01011000,
905                    ALUOp::AddS => 0b001_01011000,
906                    ALUOp::Sub => 0b010_01011000,
907                    ALUOp::SubS => 0b011_01011000,
908                    ALUOp::Orr => 0b001_01010000,
909                    ALUOp::And => 0b000_01010000,
910                    ALUOp::AndS => 0b011_01010000,
911                    ALUOp::Eor => 0b010_01010000,
912                    ALUOp::OrrNot => 0b001_01010001,
913                    ALUOp::EorNot => 0b010_01010001,
914                    ALUOp::AndNot => 0b000_01010001,
915                    ALUOp::Extr => 0b000_10011100,
916                    _ => unimplemented!("{:?}", alu_op),
917                };
918                let top11 = top11 | size.sf_bit() << 10;
919                let top11 = top11 | (u32::from(shiftop.op().bits()) << 1);
920                let bits_15_10 = u32::from(shiftop.amt().value());
921                sink.put4(enc_arith_rrr(top11, bits_15_10, rd, rn, rm));
922            }
923
924            &Inst::AluRRRExtend {
925                alu_op,
926                size,
927                rd,
928                rn,
929                rm,
930                extendop,
931            } => {
932                let top11: u32 = match alu_op {
933                    ALUOp::Add => 0b00001011001,
934                    ALUOp::Sub => 0b01001011001,
935                    ALUOp::AddS => 0b00101011001,
936                    ALUOp::SubS => 0b01101011001,
937                    _ => unimplemented!("{:?}", alu_op),
938                };
939                let top11 = top11 | size.sf_bit() << 10;
940                let bits_15_10 = u32::from(extendop.bits()) << 3;
941                sink.put4(enc_arith_rrr(top11, bits_15_10, rd, rn, rm));
942            }
943
944            &Inst::BitRR {
945                op, size, rd, rn, ..
946            } => {
947                let (op1, op2) = match op {
948                    BitOp::RBit => (0b00000, 0b000000),
949                    BitOp::Clz => (0b00000, 0b000100),
950                    BitOp::Cls => (0b00000, 0b000101),
951                    BitOp::Rev16 => (0b00000, 0b000001),
952                    BitOp::Rev32 => (0b00000, 0b000010),
953                    BitOp::Rev64 => (0b00000, 0b000011),
954                };
955                sink.put4(enc_bit_rr(size.sf_bit(), op1, op2, rn, rd))
956            }
957
958            &Inst::ULoad8 { rd, ref mem, flags }
959            | &Inst::SLoad8 { rd, ref mem, flags }
960            | &Inst::ULoad16 { rd, ref mem, flags }
961            | &Inst::SLoad16 { rd, ref mem, flags }
962            | &Inst::ULoad32 { rd, ref mem, flags }
963            | &Inst::SLoad32 { rd, ref mem, flags }
964            | &Inst::ULoad64 {
965                rd, ref mem, flags, ..
966            }
967            | &Inst::FpuLoad16 { rd, ref mem, flags }
968            | &Inst::FpuLoad32 { rd, ref mem, flags }
969            | &Inst::FpuLoad64 { rd, ref mem, flags }
970            | &Inst::FpuLoad128 { rd, ref mem, flags } => {
971                let mem = mem.clone();
972                let access_ty = self.mem_type().unwrap();
973                let (mem_insts, mem) = mem_finalize(Some(sink), &mem, access_ty, state);
974
975                for inst in mem_insts.into_iter() {
976                    inst.emit(sink, emit_info, state);
977                }
978
979                // ldst encoding helpers take Reg, not Writable<Reg>.
980                let rd = rd.to_reg();
981
982                // This is the base opcode (top 10 bits) for the "unscaled
983                // immediate" form (Unscaled). Other addressing modes will OR in
984                // other values for bits 24/25 (bits 1/2 of this constant).
985                let op = match self {
986                    Inst::ULoad8 { .. } => 0b0011100001,
987                    Inst::SLoad8 { .. } => 0b0011100010,
988                    Inst::ULoad16 { .. } => 0b0111100001,
989                    Inst::SLoad16 { .. } => 0b0111100010,
990                    Inst::ULoad32 { .. } => 0b1011100001,
991                    Inst::SLoad32 { .. } => 0b1011100010,
992                    Inst::ULoad64 { .. } => 0b1111100001,
993                    Inst::FpuLoad16 { .. } => 0b0111110001,
994                    Inst::FpuLoad32 { .. } => 0b1011110001,
995                    Inst::FpuLoad64 { .. } => 0b1111110001,
996                    Inst::FpuLoad128 { .. } => 0b0011110011,
997                    _ => unreachable!(),
998                };
999
1000                if let Some(trap_code) = flags.trap_code() {
1001                    // Register the offset at which the actual load instruction starts.
1002                    sink.add_trap(trap_code);
1003                }
1004
1005                match &mem {
1006                    &AMode::Unscaled { rn, simm9 } => {
1007                        let reg = rn;
1008                        sink.put4(enc_ldst_simm9(op, simm9, 0b00, reg, rd));
1009                    }
1010                    &AMode::UnsignedOffset { rn, uimm12 } => {
1011                        let reg = rn;
1012                        sink.put4(enc_ldst_uimm12(op, uimm12, reg, rd));
1013                    }
1014                    &AMode::RegReg { rn, rm } => {
1015                        let r1 = rn;
1016                        let r2 = rm;
1017                        sink.put4(enc_ldst_reg(
1018                            op, r1, r2, /* scaled = */ false, /* extendop = */ None, rd,
1019                        ));
1020                    }
1021                    &AMode::RegScaled { rn, rm } | &AMode::RegScaledExtended { rn, rm, .. } => {
1022                        let r1 = rn;
1023                        let r2 = rm;
1024                        let extendop = match &mem {
1025                            &AMode::RegScaled { .. } => None,
1026                            &AMode::RegScaledExtended { extendop, .. } => Some(extendop),
1027                            _ => unreachable!(),
1028                        };
1029                        sink.put4(enc_ldst_reg(
1030                            op, r1, r2, /* scaled = */ true, extendop, rd,
1031                        ));
1032                    }
1033                    &AMode::RegExtended { rn, rm, extendop } => {
1034                        let r1 = rn;
1035                        let r2 = rm;
1036                        sink.put4(enc_ldst_reg(
1037                            op,
1038                            r1,
1039                            r2,
1040                            /* scaled = */ false,
1041                            Some(extendop),
1042                            rd,
1043                        ));
1044                    }
1045                    &AMode::Label { ref label } => {
1046                        let offset = match label {
1047                            // cast i32 to u32 (two's-complement)
1048                            MemLabel::PCRel(off) => *off as u32,
1049                            // Emit a relocation into the `MachBuffer`
1050                            // for the label that's being loaded from and
1051                            // encode an address of 0 in its place which will
1052                            // get filled in by relocation resolution later on.
1053                            MemLabel::Mach(label) => {
1054                                sink.use_label_at_offset(
1055                                    sink.cur_offset(),
1056                                    *label,
1057                                    LabelUse::Ldr19,
1058                                );
1059                                0
1060                            }
1061                        } / 4;
1062                        assert!(offset < (1 << 19));
1063                        match self {
1064                            &Inst::ULoad32 { .. } => {
1065                                sink.put4(enc_ldst_imm19(0b00011000, offset, rd));
1066                            }
1067                            &Inst::SLoad32 { .. } => {
1068                                sink.put4(enc_ldst_imm19(0b10011000, offset, rd));
1069                            }
1070                            &Inst::FpuLoad32 { .. } => {
1071                                sink.put4(enc_ldst_imm19(0b00011100, offset, rd));
1072                            }
1073                            &Inst::ULoad64 { .. } => {
1074                                sink.put4(enc_ldst_imm19(0b01011000, offset, rd));
1075                            }
1076                            &Inst::FpuLoad64 { .. } => {
1077                                sink.put4(enc_ldst_imm19(0b01011100, offset, rd));
1078                            }
1079                            &Inst::FpuLoad128 { .. } => {
1080                                sink.put4(enc_ldst_imm19(0b10011100, offset, rd));
1081                            }
1082                            _ => panic!("Unsupported size for LDR from constant pool!"),
1083                        }
1084                    }
1085                    &AMode::SPPreIndexed { simm9 } => {
1086                        let reg = stack_reg();
1087                        sink.put4(enc_ldst_simm9(op, simm9, 0b11, reg, rd));
1088                    }
1089                    &AMode::SPPostIndexed { simm9 } => {
1090                        let reg = stack_reg();
1091                        sink.put4(enc_ldst_simm9(op, simm9, 0b01, reg, rd));
1092                    }
1093                    // Eliminated by `mem_finalize()` above.
1094                    &AMode::SPOffset { .. }
1095                    | &AMode::FPOffset { .. }
1096                    | &AMode::IncomingArg { .. }
1097                    | &AMode::SlotOffset { .. }
1098                    | &AMode::Const { .. }
1099                    | &AMode::RegOffset { .. } => {
1100                        panic!("Should not see {mem:?} here!")
1101                    }
1102                }
1103            }
1104
1105            &Inst::Store8 { rd, ref mem, flags }
1106            | &Inst::Store16 { rd, ref mem, flags }
1107            | &Inst::Store32 { rd, ref mem, flags }
1108            | &Inst::Store64 { rd, ref mem, flags }
1109            | &Inst::FpuStore16 { rd, ref mem, flags }
1110            | &Inst::FpuStore32 { rd, ref mem, flags }
1111            | &Inst::FpuStore64 { rd, ref mem, flags }
1112            | &Inst::FpuStore128 { rd, ref mem, flags } => {
1113                let mem = mem.clone();
1114                let access_ty = self.mem_type().unwrap();
1115                let (mem_insts, mem) = mem_finalize(Some(sink), &mem, access_ty, state);
1116
1117                for inst in mem_insts.into_iter() {
1118                    inst.emit(sink, emit_info, state);
1119                }
1120
1121                let op = match self {
1122                    Inst::Store8 { .. } => 0b0011100000,
1123                    Inst::Store16 { .. } => 0b0111100000,
1124                    Inst::Store32 { .. } => 0b1011100000,
1125                    Inst::Store64 { .. } => 0b1111100000,
1126                    Inst::FpuStore16 { .. } => 0b0111110000,
1127                    Inst::FpuStore32 { .. } => 0b1011110000,
1128                    Inst::FpuStore64 { .. } => 0b1111110000,
1129                    Inst::FpuStore128 { .. } => 0b0011110010,
1130                    _ => unreachable!(),
1131                };
1132
1133                if let Some(trap_code) = flags.trap_code() {
1134                    // Register the offset at which the actual store instruction starts.
1135                    sink.add_trap(trap_code);
1136                }
1137
1138                match &mem {
1139                    &AMode::Unscaled { rn, simm9 } => {
1140                        let reg = rn;
1141                        sink.put4(enc_ldst_simm9(op, simm9, 0b00, reg, rd));
1142                    }
1143                    &AMode::UnsignedOffset { rn, uimm12 } => {
1144                        let reg = rn;
1145                        sink.put4(enc_ldst_uimm12(op, uimm12, reg, rd));
1146                    }
1147                    &AMode::RegReg { rn, rm } => {
1148                        let r1 = rn;
1149                        let r2 = rm;
1150                        sink.put4(enc_ldst_reg(
1151                            op, r1, r2, /* scaled = */ false, /* extendop = */ None, rd,
1152                        ));
1153                    }
1154                    &AMode::RegScaled { rn, rm } | &AMode::RegScaledExtended { rn, rm, .. } => {
1155                        let r1 = rn;
1156                        let r2 = rm;
1157                        let extendop = match &mem {
1158                            &AMode::RegScaled { .. } => None,
1159                            &AMode::RegScaledExtended { extendop, .. } => Some(extendop),
1160                            _ => unreachable!(),
1161                        };
1162                        sink.put4(enc_ldst_reg(
1163                            op, r1, r2, /* scaled = */ true, extendop, rd,
1164                        ));
1165                    }
1166                    &AMode::RegExtended { rn, rm, extendop } => {
1167                        let r1 = rn;
1168                        let r2 = rm;
1169                        sink.put4(enc_ldst_reg(
1170                            op,
1171                            r1,
1172                            r2,
1173                            /* scaled = */ false,
1174                            Some(extendop),
1175                            rd,
1176                        ));
1177                    }
1178                    &AMode::Label { .. } => {
1179                        panic!("Store to a MemLabel not implemented!");
1180                    }
1181                    &AMode::SPPreIndexed { simm9 } => {
1182                        let reg = stack_reg();
1183                        sink.put4(enc_ldst_simm9(op, simm9, 0b11, reg, rd));
1184                    }
1185                    &AMode::SPPostIndexed { simm9 } => {
1186                        let reg = stack_reg();
1187                        sink.put4(enc_ldst_simm9(op, simm9, 0b01, reg, rd));
1188                    }
1189                    // Eliminated by `mem_finalize()` above.
1190                    &AMode::SPOffset { .. }
1191                    | &AMode::FPOffset { .. }
1192                    | &AMode::IncomingArg { .. }
1193                    | &AMode::SlotOffset { .. }
1194                    | &AMode::Const { .. }
1195                    | &AMode::RegOffset { .. } => {
1196                        panic!("Should not see {mem:?} here!")
1197                    }
1198                }
1199            }
1200
1201            &Inst::StoreP64 {
1202                rt,
1203                rt2,
1204                ref mem,
1205                flags,
1206            } => {
1207                let mem = mem.clone();
1208                if let Some(trap_code) = flags.trap_code() {
1209                    // Register the offset at which the actual store instruction starts.
1210                    sink.add_trap(trap_code);
1211                }
1212                match &mem {
1213                    &PairAMode::SignedOffset { reg, simm7 } => {
1214                        assert_eq!(simm7.scale_ty, I64);
1215                        sink.put4(enc_ldst_pair(0b1010100100, simm7, reg, rt, rt2));
1216                    }
1217                    &PairAMode::SPPreIndexed { simm7 } => {
1218                        assert_eq!(simm7.scale_ty, I64);
1219                        let reg = stack_reg();
1220                        sink.put4(enc_ldst_pair(0b1010100110, simm7, reg, rt, rt2));
1221                    }
1222                    &PairAMode::SPPostIndexed { simm7 } => {
1223                        assert_eq!(simm7.scale_ty, I64);
1224                        let reg = stack_reg();
1225                        sink.put4(enc_ldst_pair(0b1010100010, simm7, reg, rt, rt2));
1226                    }
1227                }
1228            }
1229            &Inst::LoadP64 {
1230                rt,
1231                rt2,
1232                ref mem,
1233                flags,
1234            } => {
1235                let rt = rt.to_reg();
1236                let rt2 = rt2.to_reg();
1237                let mem = mem.clone();
1238                if let Some(trap_code) = flags.trap_code() {
1239                    // Register the offset at which the actual load instruction starts.
1240                    sink.add_trap(trap_code);
1241                }
1242
1243                match &mem {
1244                    &PairAMode::SignedOffset { reg, simm7 } => {
1245                        assert_eq!(simm7.scale_ty, I64);
1246                        sink.put4(enc_ldst_pair(0b1010100101, simm7, reg, rt, rt2));
1247                    }
1248                    &PairAMode::SPPreIndexed { simm7 } => {
1249                        assert_eq!(simm7.scale_ty, I64);
1250                        let reg = stack_reg();
1251                        sink.put4(enc_ldst_pair(0b1010100111, simm7, reg, rt, rt2));
1252                    }
1253                    &PairAMode::SPPostIndexed { simm7 } => {
1254                        assert_eq!(simm7.scale_ty, I64);
1255                        let reg = stack_reg();
1256                        sink.put4(enc_ldst_pair(0b1010100011, simm7, reg, rt, rt2));
1257                    }
1258                }
1259            }
1260            &Inst::FpuLoadP64 {
1261                rt,
1262                rt2,
1263                ref mem,
1264                flags,
1265            }
1266            | &Inst::FpuLoadP128 {
1267                rt,
1268                rt2,
1269                ref mem,
1270                flags,
1271            } => {
1272                let rt = rt.to_reg();
1273                let rt2 = rt2.to_reg();
1274                let mem = mem.clone();
1275
1276                if let Some(trap_code) = flags.trap_code() {
1277                    // Register the offset at which the actual load instruction starts.
1278                    sink.add_trap(trap_code);
1279                }
1280
1281                let opc = match self {
1282                    &Inst::FpuLoadP64 { .. } => 0b01,
1283                    &Inst::FpuLoadP128 { .. } => 0b10,
1284                    _ => unreachable!(),
1285                };
1286
1287                match &mem {
1288                    &PairAMode::SignedOffset { reg, simm7 } => {
1289                        assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16);
1290                        sink.put4(enc_ldst_vec_pair(opc, 0b10, true, simm7, reg, rt, rt2));
1291                    }
1292                    &PairAMode::SPPreIndexed { simm7 } => {
1293                        assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16);
1294                        let reg = stack_reg();
1295                        sink.put4(enc_ldst_vec_pair(opc, 0b11, true, simm7, reg, rt, rt2));
1296                    }
1297                    &PairAMode::SPPostIndexed { simm7 } => {
1298                        assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16);
1299                        let reg = stack_reg();
1300                        sink.put4(enc_ldst_vec_pair(opc, 0b01, true, simm7, reg, rt, rt2));
1301                    }
1302                }
1303            }
1304            &Inst::FpuStoreP64 {
1305                rt,
1306                rt2,
1307                ref mem,
1308                flags,
1309            }
1310            | &Inst::FpuStoreP128 {
1311                rt,
1312                rt2,
1313                ref mem,
1314                flags,
1315            } => {
1316                let mem = mem.clone();
1317
1318                if let Some(trap_code) = flags.trap_code() {
1319                    // Register the offset at which the actual store instruction starts.
1320                    sink.add_trap(trap_code);
1321                }
1322
1323                let opc = match self {
1324                    &Inst::FpuStoreP64 { .. } => 0b01,
1325                    &Inst::FpuStoreP128 { .. } => 0b10,
1326                    _ => unreachable!(),
1327                };
1328
1329                match &mem {
1330                    &PairAMode::SignedOffset { reg, simm7 } => {
1331                        assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16);
1332                        sink.put4(enc_ldst_vec_pair(opc, 0b10, false, simm7, reg, rt, rt2));
1333                    }
1334                    &PairAMode::SPPreIndexed { simm7 } => {
1335                        assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16);
1336                        let reg = stack_reg();
1337                        sink.put4(enc_ldst_vec_pair(opc, 0b11, false, simm7, reg, rt, rt2));
1338                    }
1339                    &PairAMode::SPPostIndexed { simm7 } => {
1340                        assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16);
1341                        let reg = stack_reg();
1342                        sink.put4(enc_ldst_vec_pair(opc, 0b01, false, simm7, reg, rt, rt2));
1343                    }
1344                }
1345            }
1346            &Inst::Mov { size, rd, rm } => {
1347                assert!(rd.to_reg().class() == rm.class());
1348                assert!(rm.class() == RegClass::Int);
1349
1350                match size {
1351                    OperandSize::Size64 => {
1352                        // MOV to SP is interpreted as MOV to XZR instead. And our codegen
1353                        // should never MOV to XZR.
1354                        assert!(rd.to_reg() != stack_reg());
1355
1356                        if rm == stack_reg() {
1357                            // We can't use ORR here, so use an `add rd, sp, #0` instead.
1358                            let imm12 = Imm12::maybe_from_u64(0).unwrap();
1359                            sink.put4(enc_arith_rr_imm12(
1360                                0b100_10001,
1361                                imm12.shift_bits(),
1362                                imm12.imm_bits(),
1363                                rm,
1364                                rd,
1365                            ));
1366                        } else {
1367                            // Encoded as ORR rd, rm, zero.
1368                            sink.put4(enc_arith_rrr(0b10101010_000, 0b000_000, rd, zero_reg(), rm));
1369                        }
1370                    }
1371                    OperandSize::Size32 => {
1372                        // MOV to SP is interpreted as MOV to XZR instead. And our codegen
1373                        // should never MOV to XZR.
1374                        assert!(machreg_to_gpr(rd.to_reg()) != 31);
1375                        // Encoded as ORR rd, rm, zero.
1376                        sink.put4(enc_arith_rrr(0b00101010_000, 0b000_000, rd, zero_reg(), rm));
1377                    }
1378                }
1379            }
1380            &Inst::MovFromPReg { rd, rm } => {
1381                let rm: Reg = rm.into();
1382                debug_assert!(
1383                    [
1384                        regs::fp_reg(),
1385                        regs::stack_reg(),
1386                        regs::link_reg(),
1387                        regs::pinned_reg()
1388                    ]
1389                    .contains(&rm)
1390                );
1391                assert!(rm.class() == RegClass::Int);
1392                assert!(rd.to_reg().class() == rm.class());
1393                let size = OperandSize::Size64;
1394                Inst::Mov { size, rd, rm }.emit(sink, emit_info, state);
1395            }
1396            &Inst::MovToPReg { rd, rm } => {
1397                let rd: Writable<Reg> = Writable::from_reg(rd.into());
1398                debug_assert!(
1399                    [
1400                        regs::fp_reg(),
1401                        regs::stack_reg(),
1402                        regs::link_reg(),
1403                        regs::pinned_reg()
1404                    ]
1405                    .contains(&rd.to_reg())
1406                );
1407                assert!(rd.to_reg().class() == RegClass::Int);
1408                assert!(rm.class() == rd.to_reg().class());
1409                let size = OperandSize::Size64;
1410                Inst::Mov { size, rd, rm }.emit(sink, emit_info, state);
1411            }
1412            &Inst::MovWide { op, rd, imm, size } => {
1413                sink.put4(enc_move_wide(op, rd, imm, size));
1414            }
1415            &Inst::MovK { rd, rn, imm, size } => {
1416                debug_assert_eq!(rn, rd.to_reg());
1417                sink.put4(enc_movk(rd, imm, size));
1418            }
1419            &Inst::CSel { rd, rn, rm, cond } => {
1420                sink.put4(enc_csel(rd, rn, rm, cond, 0, 0));
1421            }
1422            &Inst::CSNeg { rd, rn, rm, cond } => {
1423                sink.put4(enc_csel(rd, rn, rm, cond, 1, 1));
1424            }
1425            &Inst::CSet { rd, cond } => {
1426                sink.put4(enc_csel(rd, zero_reg(), zero_reg(), cond.invert(), 0, 1));
1427            }
1428            &Inst::CSetm { rd, cond } => {
1429                sink.put4(enc_csel(rd, zero_reg(), zero_reg(), cond.invert(), 1, 0));
1430            }
1431            &Inst::CCmp {
1432                size,
1433                rn,
1434                rm,
1435                nzcv,
1436                cond,
1437            } => {
1438                sink.put4(enc_ccmp(size, rn, rm, nzcv, cond));
1439            }
1440            &Inst::CCmpImm {
1441                size,
1442                rn,
1443                imm,
1444                nzcv,
1445                cond,
1446            } => {
1447                sink.put4(enc_ccmp_imm(size, rn, imm, nzcv, cond));
1448            }
1449            &Inst::AtomicRMW {
1450                ty,
1451                op,
1452                rs,
1453                rt,
1454                rn,
1455                flags,
1456            } => {
1457                if let Some(trap_code) = flags.trap_code() {
1458                    sink.add_trap(trap_code);
1459                }
1460
1461                sink.put4(enc_acq_rel(ty, op, rs, rt, rn));
1462            }
1463            &Inst::AtomicRMWLoop { ty, op, flags, .. } => {
1464                /* Emit this:
1465                     again:
1466                      ldaxr{,b,h}  x/w27, [x25]
1467                      // maybe sign extend
1468                      op          x28, x27, x26 // op is add,sub,and,orr,eor
1469                      stlxr{,b,h}  w24, x/w28, [x25]
1470                      cbnz        x24, again
1471
1472                   Operand conventions:
1473                      IN:  x25 (addr), x26 (2nd arg for op)
1474                      OUT: x27 (old value), x24 (trashed), x28 (trashed)
1475
1476                   It is unfortunate that, per the ARM documentation, x28 cannot be used for
1477                   both the store-data and success-flag operands of stlxr.  This causes the
1478                   instruction's behaviour to be "CONSTRAINED UNPREDICTABLE", so we use x24
1479                   instead for the success-flag.
1480                */
1481                // TODO: We should not hardcode registers here, a better idea would be to
1482                // pass some scratch registers in the AtomicRMWLoop pseudo-instruction, and use those
1483                let xzr = zero_reg();
1484                let x24 = xreg(24);
1485                let x25 = xreg(25);
1486                let x26 = xreg(26);
1487                let x27 = xreg(27);
1488                let x28 = xreg(28);
1489                let x24wr = writable_xreg(24);
1490                let x27wr = writable_xreg(27);
1491                let x28wr = writable_xreg(28);
1492                let again_label = sink.get_label();
1493
1494                // again:
1495                sink.bind_label(again_label, &mut state.ctrl_plane);
1496
1497                if let Some(trap_code) = flags.trap_code() {
1498                    sink.add_trap(trap_code);
1499                }
1500
1501                sink.put4(enc_ldaxr(ty, x27wr, x25)); // ldaxr x27, [x25]
1502                let size = OperandSize::from_ty(ty);
1503                let sign_ext = match op {
1504                    AtomicRMWLoopOp::Smin | AtomicRMWLoopOp::Smax => match ty {
1505                        I16 => Some((ExtendOp::SXTH, 16)),
1506                        I8 => Some((ExtendOp::SXTB, 8)),
1507                        _ => None,
1508                    },
1509                    _ => None,
1510                };
1511
1512                // sxt{b|h} the loaded result if necessary.
1513                if sign_ext.is_some() {
1514                    let (_, from_bits) = sign_ext.unwrap();
1515                    Inst::Extend {
1516                        rd: x27wr,
1517                        rn: x27,
1518                        signed: true,
1519                        from_bits,
1520                        to_bits: size.bits(),
1521                    }
1522                    .emit(sink, emit_info, state);
1523                }
1524
1525                match op {
1526                    AtomicRMWLoopOp::Xchg => {} // do nothing
1527                    AtomicRMWLoopOp::Nand => {
1528                        // and x28, x27, x26
1529                        // mvn x28, x28
1530
1531                        Inst::AluRRR {
1532                            alu_op: ALUOp::And,
1533                            size,
1534                            rd: x28wr,
1535                            rn: x27,
1536                            rm: x26,
1537                        }
1538                        .emit(sink, emit_info, state);
1539
1540                        Inst::AluRRR {
1541                            alu_op: ALUOp::OrrNot,
1542                            size,
1543                            rd: x28wr,
1544                            rn: xzr,
1545                            rm: x28,
1546                        }
1547                        .emit(sink, emit_info, state);
1548                    }
1549                    AtomicRMWLoopOp::Umin
1550                    | AtomicRMWLoopOp::Umax
1551                    | AtomicRMWLoopOp::Smin
1552                    | AtomicRMWLoopOp::Smax => {
1553                        // cmp x27, x26 {?sxt}
1554                        // csel.op x28, x27, x26
1555
1556                        let cond = match op {
1557                            AtomicRMWLoopOp::Umin => Cond::Lo,
1558                            AtomicRMWLoopOp::Umax => Cond::Hi,
1559                            AtomicRMWLoopOp::Smin => Cond::Lt,
1560                            AtomicRMWLoopOp::Smax => Cond::Gt,
1561                            _ => unreachable!(),
1562                        };
1563
1564                        if sign_ext.is_some() {
1565                            let (extendop, _) = sign_ext.unwrap();
1566                            Inst::AluRRRExtend {
1567                                alu_op: ALUOp::SubS,
1568                                size,
1569                                rd: writable_zero_reg(),
1570                                rn: x27,
1571                                rm: x26,
1572                                extendop,
1573                            }
1574                            .emit(sink, emit_info, state);
1575                        } else {
1576                            Inst::AluRRR {
1577                                alu_op: ALUOp::SubS,
1578                                size,
1579                                rd: writable_zero_reg(),
1580                                rn: x27,
1581                                rm: x26,
1582                            }
1583                            .emit(sink, emit_info, state);
1584                        }
1585
1586                        Inst::CSel {
1587                            cond,
1588                            rd: x28wr,
1589                            rn: x27,
1590                            rm: x26,
1591                        }
1592                        .emit(sink, emit_info, state);
1593                    }
1594                    _ => {
1595                        // add/sub/and/orr/eor x28, x27, x26
1596                        let alu_op = match op {
1597                            AtomicRMWLoopOp::Add => ALUOp::Add,
1598                            AtomicRMWLoopOp::Sub => ALUOp::Sub,
1599                            AtomicRMWLoopOp::And => ALUOp::And,
1600                            AtomicRMWLoopOp::Orr => ALUOp::Orr,
1601                            AtomicRMWLoopOp::Eor => ALUOp::Eor,
1602                            AtomicRMWLoopOp::Nand
1603                            | AtomicRMWLoopOp::Umin
1604                            | AtomicRMWLoopOp::Umax
1605                            | AtomicRMWLoopOp::Smin
1606                            | AtomicRMWLoopOp::Smax
1607                            | AtomicRMWLoopOp::Xchg => unreachable!(),
1608                        };
1609
1610                        Inst::AluRRR {
1611                            alu_op,
1612                            size,
1613                            rd: x28wr,
1614                            rn: x27,
1615                            rm: x26,
1616                        }
1617                        .emit(sink, emit_info, state);
1618                    }
1619                }
1620
1621                if let Some(trap_code) = flags.trap_code() {
1622                    sink.add_trap(trap_code);
1623                }
1624                if op == AtomicRMWLoopOp::Xchg {
1625                    sink.put4(enc_stlxr(ty, x24wr, x26, x25)); // stlxr w24, x26, [x25]
1626                } else {
1627                    sink.put4(enc_stlxr(ty, x24wr, x28, x25)); // stlxr w24, x28, [x25]
1628                }
1629
1630                // cbnz w24, again
1631                // Note, we're actually testing x24, and relying on the default zero-high-half
1632                // rule in the assignment that `stlxr` does.
1633                let br_offset = sink.cur_offset();
1634                sink.put4(enc_conditional_br(
1635                    BranchTarget::Label(again_label),
1636                    CondBrKind::NotZero(x24, OperandSize::Size64),
1637                ));
1638                sink.use_label_at_offset(br_offset, again_label, LabelUse::Branch19);
1639            }
1640            &Inst::AtomicCAS {
1641                rd,
1642                rs,
1643                rt,
1644                rn,
1645                ty,
1646                flags,
1647            } => {
1648                debug_assert_eq!(rd.to_reg(), rs);
1649                let size = match ty {
1650                    I8 => 0b00,
1651                    I16 => 0b01,
1652                    I32 => 0b10,
1653                    I64 => 0b11,
1654                    _ => panic!("Unsupported type: {ty}"),
1655                };
1656
1657                if let Some(trap_code) = flags.trap_code() {
1658                    sink.add_trap(trap_code);
1659                }
1660
1661                sink.put4(enc_cas(size, rd, rt, rn));
1662            }
1663            &Inst::AtomicCASLoop { ty, flags, .. } => {
1664                /* Emit this:
1665                    again:
1666                     ldaxr{,b,h} x/w27, [x25]
1667                     cmp         x27, x/w26 uxt{b,h}
1668                     b.ne        out
1669                     stlxr{,b,h} w24, x/w28, [x25]
1670                     cbnz        x24, again
1671                    out:
1672
1673                  Operand conventions:
1674                     IN:  x25 (addr), x26 (expected value), x28 (replacement value)
1675                     OUT: x27 (old value), x24 (trashed)
1676                */
1677                let x24 = xreg(24);
1678                let x25 = xreg(25);
1679                let x26 = xreg(26);
1680                let x27 = xreg(27);
1681                let x28 = xreg(28);
1682                let xzrwr = writable_zero_reg();
1683                let x24wr = writable_xreg(24);
1684                let x27wr = writable_xreg(27);
1685                let again_label = sink.get_label();
1686                let out_label = sink.get_label();
1687
1688                // again:
1689                sink.bind_label(again_label, &mut state.ctrl_plane);
1690
1691                if let Some(trap_code) = flags.trap_code() {
1692                    sink.add_trap(trap_code);
1693                }
1694
1695                // ldaxr x27, [x25]
1696                sink.put4(enc_ldaxr(ty, x27wr, x25));
1697
1698                // The top 32-bits are zero-extended by the ldaxr so we don't
1699                // have to use UXTW, just the x-form of the register.
1700                let (bit21, extend_op) = match ty {
1701                    I8 => (0b1, 0b000000),
1702                    I16 => (0b1, 0b001000),
1703                    _ => (0b0, 0b000000),
1704                };
1705                let bits_31_21 = 0b111_01011_000 | bit21;
1706                // cmp x27, x26 (== subs xzr, x27, x26)
1707                sink.put4(enc_arith_rrr(bits_31_21, extend_op, xzrwr, x27, x26));
1708
1709                // b.ne out
1710                let br_out_offset = sink.cur_offset();
1711                sink.put4(enc_conditional_br(
1712                    BranchTarget::Label(out_label),
1713                    CondBrKind::Cond(Cond::Ne),
1714                ));
1715                sink.use_label_at_offset(br_out_offset, out_label, LabelUse::Branch19);
1716
1717                if let Some(trap_code) = flags.trap_code() {
1718                    sink.add_trap(trap_code);
1719                }
1720
1721                sink.put4(enc_stlxr(ty, x24wr, x28, x25)); // stlxr w24, x28, [x25]
1722
1723                // cbnz w24, again.
1724                // Note, we're actually testing x24, and relying on the default zero-high-half
1725                // rule in the assignment that `stlxr` does.
1726                let br_again_offset = sink.cur_offset();
1727                sink.put4(enc_conditional_br(
1728                    BranchTarget::Label(again_label),
1729                    CondBrKind::NotZero(x24, OperandSize::Size64),
1730                ));
1731                sink.use_label_at_offset(br_again_offset, again_label, LabelUse::Branch19);
1732
1733                // out:
1734                sink.bind_label(out_label, &mut state.ctrl_plane);
1735            }
1736            &Inst::LoadAcquire {
1737                access_ty,
1738                rt,
1739                rn,
1740                flags,
1741            } => {
1742                if let Some(trap_code) = flags.trap_code() {
1743                    sink.add_trap(trap_code);
1744                }
1745
1746                sink.put4(enc_ldar(access_ty, rt, rn));
1747            }
1748            &Inst::StoreRelease {
1749                access_ty,
1750                rt,
1751                rn,
1752                flags,
1753            } => {
1754                if let Some(trap_code) = flags.trap_code() {
1755                    sink.add_trap(trap_code);
1756                }
1757
1758                sink.put4(enc_stlr(access_ty, rt, rn));
1759            }
1760            &Inst::Fence {} => {
1761                sink.put4(enc_dmb_ish()); // dmb ish
1762            }
1763            &Inst::Csdb {} => {
1764                sink.put4(0xd503229f);
1765            }
1766            &Inst::FpuMove32 { rd, rn } => {
1767                sink.put4(enc_fpurr(0b000_11110_00_1_000000_10000, rd, rn));
1768            }
1769            &Inst::FpuMove64 { rd, rn } => {
1770                sink.put4(enc_fpurr(0b000_11110_01_1_000000_10000, rd, rn));
1771            }
1772            &Inst::FpuMove128 { rd, rn } => {
1773                sink.put4(enc_vecmov(/* 16b = */ true, rd, rn));
1774            }
1775            &Inst::FpuMoveFromVec { rd, rn, idx, size } => {
1776                let (imm5, shift, mask) = match size.lane_size() {
1777                    ScalarSize::Size32 => (0b00100, 3, 0b011),
1778                    ScalarSize::Size64 => (0b01000, 4, 0b001),
1779                    _ => unimplemented!(),
1780                };
1781                debug_assert_eq!(idx & mask, idx);
1782                let imm5 = imm5 | ((idx as u32) << shift);
1783                sink.put4(
1784                    0b010_11110000_00000_000001_00000_00000
1785                        | (imm5 << 16)
1786                        | (machreg_to_vec(rn) << 5)
1787                        | machreg_to_vec(rd.to_reg()),
1788                );
1789            }
1790            &Inst::FpuExtend { rd, rn, size } => {
1791                sink.put4(enc_fpurr(
1792                    0b000_11110_00_1_000000_10000 | (size.ftype() << 12),
1793                    rd,
1794                    rn,
1795                ));
1796            }
1797            &Inst::FpuRR {
1798                fpu_op,
1799                size,
1800                rd,
1801                rn,
1802            } => {
1803                let top22 = match fpu_op {
1804                    FPUOp1::Abs => 0b000_11110_00_1_000001_10000,
1805                    FPUOp1::Neg => 0b000_11110_00_1_000010_10000,
1806                    FPUOp1::Sqrt => 0b000_11110_00_1_000011_10000,
1807                    FPUOp1::Cvt32To64 => {
1808                        debug_assert_eq!(size, ScalarSize::Size32);
1809                        0b000_11110_00_1_000101_10000
1810                    }
1811                    FPUOp1::Cvt64To32 => {
1812                        debug_assert_eq!(size, ScalarSize::Size64);
1813                        0b000_11110_01_1_000100_10000
1814                    }
1815                };
1816                let top22 = top22 | size.ftype() << 12;
1817                sink.put4(enc_fpurr(top22, rd, rn));
1818            }
1819            &Inst::FpuRRR {
1820                fpu_op,
1821                size,
1822                rd,
1823                rn,
1824                rm,
1825            } => {
1826                let top22 = match fpu_op {
1827                    FPUOp2::Add => 0b000_11110_00_1_00000_001010,
1828                    FPUOp2::Sub => 0b000_11110_00_1_00000_001110,
1829                    FPUOp2::Mul => 0b000_11110_00_1_00000_000010,
1830                    FPUOp2::Div => 0b000_11110_00_1_00000_000110,
1831                    FPUOp2::Max => 0b000_11110_00_1_00000_010010,
1832                    FPUOp2::Min => 0b000_11110_00_1_00000_010110,
1833                };
1834                let top22 = top22 | size.ftype() << 12;
1835                sink.put4(enc_fpurrr(top22, rd, rn, rm));
1836            }
1837            &Inst::FpuRRI { fpu_op, rd, rn } => match fpu_op {
1838                FPUOpRI::UShr32(imm) => {
1839                    debug_assert_eq!(32, imm.lane_size_in_bits);
1840                    sink.put4(
1841                        0b0_0_1_011110_0000000_00_0_0_0_1_00000_00000
1842                            | imm.enc() << 16
1843                            | machreg_to_vec(rn) << 5
1844                            | machreg_to_vec(rd.to_reg()),
1845                    )
1846                }
1847                FPUOpRI::UShr64(imm) => {
1848                    debug_assert_eq!(64, imm.lane_size_in_bits);
1849                    sink.put4(
1850                        0b01_1_111110_0000000_00_0_0_0_1_00000_00000
1851                            | imm.enc() << 16
1852                            | machreg_to_vec(rn) << 5
1853                            | machreg_to_vec(rd.to_reg()),
1854                    )
1855                }
1856            },
1857            &Inst::FpuRRIMod { fpu_op, rd, ri, rn } => {
1858                debug_assert_eq!(rd.to_reg(), ri);
1859                match fpu_op {
1860                    FPUOpRIMod::Sli64(imm) => {
1861                        debug_assert_eq!(64, imm.lane_size_in_bits);
1862                        sink.put4(
1863                            0b01_1_111110_0000000_010101_00000_00000
1864                                | imm.enc() << 16
1865                                | machreg_to_vec(rn) << 5
1866                                | machreg_to_vec(rd.to_reg()),
1867                        )
1868                    }
1869                    FPUOpRIMod::Sli32(imm) => {
1870                        debug_assert_eq!(32, imm.lane_size_in_bits);
1871                        sink.put4(
1872                            0b0_0_1_011110_0000000_010101_00000_00000
1873                                | imm.enc() << 16
1874                                | machreg_to_vec(rn) << 5
1875                                | machreg_to_vec(rd.to_reg()),
1876                        )
1877                    }
1878                }
1879            }
1880            &Inst::FpuRRRR {
1881                fpu_op,
1882                size,
1883                rd,
1884                rn,
1885                rm,
1886                ra,
1887            } => {
1888                let top17 = match fpu_op {
1889                    FPUOp3::MAdd => 0b000_11111_00_0_00000_0,
1890                    FPUOp3::MSub => 0b000_11111_00_0_00000_1,
1891                    FPUOp3::NMAdd => 0b000_11111_00_1_00000_0,
1892                    FPUOp3::NMSub => 0b000_11111_00_1_00000_1,
1893                };
1894                let top17 = top17 | size.ftype() << 7;
1895                sink.put4(enc_fpurrrr(top17, rd, rn, rm, ra));
1896            }
1897            &Inst::VecMisc { op, rd, rn, size } => {
1898                let (q, enc_size) = size.enc_size();
1899                let (u, bits_12_16, size) = match op {
1900                    VecMisc2::Not => (0b1, 0b00101, 0b00),
1901                    VecMisc2::Neg => (0b1, 0b01011, enc_size),
1902                    VecMisc2::Abs => (0b0, 0b01011, enc_size),
1903                    VecMisc2::Fabs => {
1904                        debug_assert!(
1905                            size == VectorSize::Size32x2
1906                                || size == VectorSize::Size32x4
1907                                || size == VectorSize::Size64x2
1908                        );
1909                        (0b0, 0b01111, enc_size)
1910                    }
1911                    VecMisc2::Fneg => {
1912                        debug_assert!(
1913                            size == VectorSize::Size32x2
1914                                || size == VectorSize::Size32x4
1915                                || size == VectorSize::Size64x2
1916                        );
1917                        (0b1, 0b01111, enc_size)
1918                    }
1919                    VecMisc2::Fsqrt => {
1920                        debug_assert!(
1921                            size == VectorSize::Size32x2
1922                                || size == VectorSize::Size32x4
1923                                || size == VectorSize::Size64x2
1924                        );
1925                        (0b1, 0b11111, enc_size)
1926                    }
1927                    VecMisc2::Rev16 => {
1928                        debug_assert_eq!(size, VectorSize::Size8x16);
1929                        (0b0, 0b00001, enc_size)
1930                    }
1931                    VecMisc2::Rev32 => {
1932                        debug_assert!(size == VectorSize::Size8x16 || size == VectorSize::Size16x8);
1933                        (0b1, 0b00000, enc_size)
1934                    }
1935                    VecMisc2::Rev64 => {
1936                        debug_assert!(
1937                            size == VectorSize::Size8x16
1938                                || size == VectorSize::Size16x8
1939                                || size == VectorSize::Size32x4
1940                        );
1941                        (0b0, 0b00000, enc_size)
1942                    }
1943                    VecMisc2::Fcvtzs => {
1944                        debug_assert!(
1945                            size == VectorSize::Size32x2
1946                                || size == VectorSize::Size32x4
1947                                || size == VectorSize::Size64x2
1948                        );
1949                        (0b0, 0b11011, enc_size)
1950                    }
1951                    VecMisc2::Fcvtzu => {
1952                        debug_assert!(
1953                            size == VectorSize::Size32x2
1954                                || size == VectorSize::Size32x4
1955                                || size == VectorSize::Size64x2
1956                        );
1957                        (0b1, 0b11011, enc_size)
1958                    }
1959                    VecMisc2::Scvtf => {
1960                        debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2);
1961                        (0b0, 0b11101, enc_size & 0b1)
1962                    }
1963                    VecMisc2::Ucvtf => {
1964                        debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2);
1965                        (0b1, 0b11101, enc_size & 0b1)
1966                    }
1967                    VecMisc2::Frintn => {
1968                        debug_assert!(
1969                            size == VectorSize::Size32x2
1970                                || size == VectorSize::Size32x4
1971                                || size == VectorSize::Size64x2
1972                        );
1973                        (0b0, 0b11000, enc_size & 0b01)
1974                    }
1975                    VecMisc2::Frintz => {
1976                        debug_assert!(
1977                            size == VectorSize::Size32x2
1978                                || size == VectorSize::Size32x4
1979                                || size == VectorSize::Size64x2
1980                        );
1981                        (0b0, 0b11001, enc_size)
1982                    }
1983                    VecMisc2::Frintm => {
1984                        debug_assert!(
1985                            size == VectorSize::Size32x2
1986                                || size == VectorSize::Size32x4
1987                                || size == VectorSize::Size64x2
1988                        );
1989                        (0b0, 0b11001, enc_size & 0b01)
1990                    }
1991                    VecMisc2::Frintp => {
1992                        debug_assert!(
1993                            size == VectorSize::Size32x2
1994                                || size == VectorSize::Size32x4
1995                                || size == VectorSize::Size64x2
1996                        );
1997                        (0b0, 0b11000, enc_size)
1998                    }
1999                    VecMisc2::Cnt => {
2000                        debug_assert!(size == VectorSize::Size8x8 || size == VectorSize::Size8x16);
2001                        (0b0, 0b00101, enc_size)
2002                    }
2003                    VecMisc2::Cmeq0 => (0b0, 0b01001, enc_size),
2004                    VecMisc2::Cmge0 => (0b1, 0b01000, enc_size),
2005                    VecMisc2::Cmgt0 => (0b0, 0b01000, enc_size),
2006                    VecMisc2::Cmle0 => (0b1, 0b01001, enc_size),
2007                    VecMisc2::Cmlt0 => (0b0, 0b01010, enc_size),
2008                    VecMisc2::Fcmeq0 => {
2009                        debug_assert!(
2010                            size == VectorSize::Size32x2
2011                                || size == VectorSize::Size32x4
2012                                || size == VectorSize::Size64x2
2013                        );
2014                        (0b0, 0b01101, enc_size)
2015                    }
2016                    VecMisc2::Fcmge0 => {
2017                        debug_assert!(
2018                            size == VectorSize::Size32x2
2019                                || size == VectorSize::Size32x4
2020                                || size == VectorSize::Size64x2
2021                        );
2022                        (0b1, 0b01100, enc_size)
2023                    }
2024                    VecMisc2::Fcmgt0 => {
2025                        debug_assert!(
2026                            size == VectorSize::Size32x2
2027                                || size == VectorSize::Size32x4
2028                                || size == VectorSize::Size64x2
2029                        );
2030                        (0b0, 0b01100, enc_size)
2031                    }
2032                    VecMisc2::Fcmle0 => {
2033                        debug_assert!(
2034                            size == VectorSize::Size32x2
2035                                || size == VectorSize::Size32x4
2036                                || size == VectorSize::Size64x2
2037                        );
2038                        (0b1, 0b01101, enc_size)
2039                    }
2040                    VecMisc2::Fcmlt0 => {
2041                        debug_assert!(
2042                            size == VectorSize::Size32x2
2043                                || size == VectorSize::Size32x4
2044                                || size == VectorSize::Size64x2
2045                        );
2046                        (0b0, 0b01110, enc_size)
2047                    }
2048                };
2049                sink.put4(enc_vec_rr_misc((q << 1) | u, size, bits_12_16, rd, rn));
2050            }
2051            &Inst::VecLanes { op, rd, rn, size } => {
2052                let (q, size) = match size {
2053                    VectorSize::Size8x8 => (0b0, 0b00),
2054                    VectorSize::Size8x16 => (0b1, 0b00),
2055                    VectorSize::Size16x4 => (0b0, 0b01),
2056                    VectorSize::Size16x8 => (0b1, 0b01),
2057                    VectorSize::Size32x4 => (0b1, 0b10),
2058                    _ => unreachable!(),
2059                };
2060                let (u, opcode) = match op {
2061                    VecLanesOp::Uminv => (0b1, 0b11010),
2062                    VecLanesOp::Addv => (0b0, 0b11011),
2063                };
2064                sink.put4(enc_vec_lanes(q, u, size, opcode, rd, rn));
2065            }
2066            &Inst::VecShiftImm {
2067                op,
2068                rd,
2069                rn,
2070                size,
2071                imm,
2072            } => {
2073                let (is_shr, mut template) = match op {
2074                    VecShiftImmOp::Ushr => (true, 0b_001_011110_0000_000_000001_00000_00000_u32),
2075                    VecShiftImmOp::Sshr => (true, 0b_000_011110_0000_000_000001_00000_00000_u32),
2076                    VecShiftImmOp::Shl => (false, 0b_000_011110_0000_000_010101_00000_00000_u32),
2077                };
2078                if size.is_128bits() {
2079                    template |= 0b1 << 30;
2080                }
2081                let imm = imm as u32;
2082                // Deal with the somewhat strange encoding scheme for, and limits on,
2083                // the shift amount.
2084                let immh_immb = match (size.lane_size(), is_shr) {
2085                    (ScalarSize::Size64, true) if imm >= 1 && imm <= 64 => {
2086                        0b_1000_000_u32 | (64 - imm)
2087                    }
2088                    (ScalarSize::Size32, true) if imm >= 1 && imm <= 32 => {
2089                        0b_0100_000_u32 | (32 - imm)
2090                    }
2091                    (ScalarSize::Size16, true) if imm >= 1 && imm <= 16 => {
2092                        0b_0010_000_u32 | (16 - imm)
2093                    }
2094                    (ScalarSize::Size8, true) if imm >= 1 && imm <= 8 => {
2095                        0b_0001_000_u32 | (8 - imm)
2096                    }
2097                    (ScalarSize::Size64, false) if imm <= 63 => 0b_1000_000_u32 | imm,
2098                    (ScalarSize::Size32, false) if imm <= 31 => 0b_0100_000_u32 | imm,
2099                    (ScalarSize::Size16, false) if imm <= 15 => 0b_0010_000_u32 | imm,
2100                    (ScalarSize::Size8, false) if imm <= 7 => 0b_0001_000_u32 | imm,
2101                    _ => panic!(
2102                        "aarch64: Inst::VecShiftImm: emit: invalid op/size/imm {op:?}, {size:?}, {imm:?}"
2103                    ),
2104                };
2105                let rn_enc = machreg_to_vec(rn);
2106                let rd_enc = machreg_to_vec(rd.to_reg());
2107                sink.put4(template | (immh_immb << 16) | (rn_enc << 5) | rd_enc);
2108            }
2109            &Inst::VecShiftImmMod {
2110                op,
2111                rd,
2112                ri,
2113                rn,
2114                size,
2115                imm,
2116            } => {
2117                debug_assert_eq!(rd.to_reg(), ri);
2118                let (is_shr, mut template) = match op {
2119                    VecShiftImmModOp::Sli => (false, 0b_001_011110_0000_000_010101_00000_00000_u32),
2120                };
2121                if size.is_128bits() {
2122                    template |= 0b1 << 30;
2123                }
2124                let imm = imm as u32;
2125                // Deal with the somewhat strange encoding scheme for, and limits on,
2126                // the shift amount.
2127                let immh_immb = match (size.lane_size(), is_shr) {
2128                    (ScalarSize::Size64, true) if imm >= 1 && imm <= 64 => {
2129                        0b_1000_000_u32 | (64 - imm)
2130                    }
2131                    (ScalarSize::Size32, true) if imm >= 1 && imm <= 32 => {
2132                        0b_0100_000_u32 | (32 - imm)
2133                    }
2134                    (ScalarSize::Size16, true) if imm >= 1 && imm <= 16 => {
2135                        0b_0010_000_u32 | (16 - imm)
2136                    }
2137                    (ScalarSize::Size8, true) if imm >= 1 && imm <= 8 => {
2138                        0b_0001_000_u32 | (8 - imm)
2139                    }
2140                    (ScalarSize::Size64, false) if imm <= 63 => 0b_1000_000_u32 | imm,
2141                    (ScalarSize::Size32, false) if imm <= 31 => 0b_0100_000_u32 | imm,
2142                    (ScalarSize::Size16, false) if imm <= 15 => 0b_0010_000_u32 | imm,
2143                    (ScalarSize::Size8, false) if imm <= 7 => 0b_0001_000_u32 | imm,
2144                    _ => panic!(
2145                        "aarch64: Inst::VecShiftImmMod: emit: invalid op/size/imm {op:?}, {size:?}, {imm:?}"
2146                    ),
2147                };
2148                let rn_enc = machreg_to_vec(rn);
2149                let rd_enc = machreg_to_vec(rd.to_reg());
2150                sink.put4(template | (immh_immb << 16) | (rn_enc << 5) | rd_enc);
2151            }
2152            &Inst::VecExtract { rd, rn, rm, imm4 } => {
2153                if imm4 < 16 {
2154                    let template = 0b_01_101110_000_00000_0_0000_0_00000_00000_u32;
2155                    let rm_enc = machreg_to_vec(rm);
2156                    let rn_enc = machreg_to_vec(rn);
2157                    let rd_enc = machreg_to_vec(rd.to_reg());
2158                    sink.put4(
2159                        template | (rm_enc << 16) | ((imm4 as u32) << 11) | (rn_enc << 5) | rd_enc,
2160                    );
2161                } else {
2162                    panic!("aarch64: Inst::VecExtract: emit: invalid extract index {imm4}");
2163                }
2164            }
2165            &Inst::VecTbl { rd, rn, rm } => {
2166                sink.put4(enc_tbl(/* is_extension = */ false, 0b00, rd, rn, rm));
2167            }
2168            &Inst::VecTblExt { rd, ri, rn, rm } => {
2169                debug_assert_eq!(rd.to_reg(), ri);
2170                sink.put4(enc_tbl(/* is_extension = */ true, 0b00, rd, rn, rm));
2171            }
2172            &Inst::VecTbl2 { rd, rn, rn2, rm } => {
2173                assert_eq!(machreg_to_vec(rn2), (machreg_to_vec(rn) + 1) % 32);
2174                sink.put4(enc_tbl(/* is_extension = */ false, 0b01, rd, rn, rm));
2175            }
2176            &Inst::VecTbl2Ext {
2177                rd,
2178                ri,
2179                rn,
2180                rn2,
2181                rm,
2182            } => {
2183                debug_assert_eq!(rd.to_reg(), ri);
2184                assert_eq!(machreg_to_vec(rn2), (machreg_to_vec(rn) + 1) % 32);
2185                sink.put4(enc_tbl(/* is_extension = */ true, 0b01, rd, rn, rm));
2186            }
2187            &Inst::FpuCmp { size, rn, rm } => {
2188                sink.put4(enc_fcmp(size, rn, rm));
2189            }
2190            &Inst::FpuToInt { op, rd, rn } => {
2191                let top16 = match op {
2192                    // FCVTZS (32/32-bit)
2193                    FpuToIntOp::F32ToI32 => 0b000_11110_00_1_11_000,
2194                    // FCVTZU (32/32-bit)
2195                    FpuToIntOp::F32ToU32 => 0b000_11110_00_1_11_001,
2196                    // FCVTZS (32/64-bit)
2197                    FpuToIntOp::F32ToI64 => 0b100_11110_00_1_11_000,
2198                    // FCVTZU (32/64-bit)
2199                    FpuToIntOp::F32ToU64 => 0b100_11110_00_1_11_001,
2200                    // FCVTZS (64/32-bit)
2201                    FpuToIntOp::F64ToI32 => 0b000_11110_01_1_11_000,
2202                    // FCVTZU (64/32-bit)
2203                    FpuToIntOp::F64ToU32 => 0b000_11110_01_1_11_001,
2204                    // FCVTZS (64/64-bit)
2205                    FpuToIntOp::F64ToI64 => 0b100_11110_01_1_11_000,
2206                    // FCVTZU (64/64-bit)
2207                    FpuToIntOp::F64ToU64 => 0b100_11110_01_1_11_001,
2208                };
2209                sink.put4(enc_fputoint(top16, rd, rn));
2210            }
2211            &Inst::IntToFpu { op, rd, rn } => {
2212                let top16 = match op {
2213                    // SCVTF (32/32-bit)
2214                    IntToFpuOp::I32ToF32 => 0b000_11110_00_1_00_010,
2215                    // UCVTF (32/32-bit)
2216                    IntToFpuOp::U32ToF32 => 0b000_11110_00_1_00_011,
2217                    // SCVTF (64/32-bit)
2218                    IntToFpuOp::I64ToF32 => 0b100_11110_00_1_00_010,
2219                    // UCVTF (64/32-bit)
2220                    IntToFpuOp::U64ToF32 => 0b100_11110_00_1_00_011,
2221                    // SCVTF (32/64-bit)
2222                    IntToFpuOp::I32ToF64 => 0b000_11110_01_1_00_010,
2223                    // UCVTF (32/64-bit)
2224                    IntToFpuOp::U32ToF64 => 0b000_11110_01_1_00_011,
2225                    // SCVTF (64/64-bit)
2226                    IntToFpuOp::I64ToF64 => 0b100_11110_01_1_00_010,
2227                    // UCVTF (64/64-bit)
2228                    IntToFpuOp::U64ToF64 => 0b100_11110_01_1_00_011,
2229                };
2230                sink.put4(enc_inttofpu(top16, rd, rn));
2231            }
2232            &Inst::FpuCSel16 { rd, rn, rm, cond } => {
2233                sink.put4(enc_fcsel(rd, rn, rm, cond, ScalarSize::Size16));
2234            }
2235            &Inst::FpuCSel32 { rd, rn, rm, cond } => {
2236                sink.put4(enc_fcsel(rd, rn, rm, cond, ScalarSize::Size32));
2237            }
2238            &Inst::FpuCSel64 { rd, rn, rm, cond } => {
2239                sink.put4(enc_fcsel(rd, rn, rm, cond, ScalarSize::Size64));
2240            }
2241            &Inst::FpuRound { op, rd, rn } => {
2242                let top22 = match op {
2243                    FpuRoundMode::Minus32 => 0b000_11110_00_1_001_010_10000,
2244                    FpuRoundMode::Minus64 => 0b000_11110_01_1_001_010_10000,
2245                    FpuRoundMode::Plus32 => 0b000_11110_00_1_001_001_10000,
2246                    FpuRoundMode::Plus64 => 0b000_11110_01_1_001_001_10000,
2247                    FpuRoundMode::Zero32 => 0b000_11110_00_1_001_011_10000,
2248                    FpuRoundMode::Zero64 => 0b000_11110_01_1_001_011_10000,
2249                    FpuRoundMode::Nearest32 => 0b000_11110_00_1_001_000_10000,
2250                    FpuRoundMode::Nearest64 => 0b000_11110_01_1_001_000_10000,
2251                };
2252                sink.put4(enc_fround(top22, rd, rn));
2253            }
2254            &Inst::MovToFpu { rd, rn, size } => {
2255                let template = match size {
2256                    ScalarSize::Size16 => 0b000_11110_11_1_00_111_000000_00000_00000,
2257                    ScalarSize::Size32 => 0b000_11110_00_1_00_111_000000_00000_00000,
2258                    ScalarSize::Size64 => 0b100_11110_01_1_00_111_000000_00000_00000,
2259                    _ => unreachable!(),
2260                };
2261                sink.put4(template | (machreg_to_gpr(rn) << 5) | machreg_to_vec(rd.to_reg()));
2262            }
2263            &Inst::FpuMoveFPImm { rd, imm, size } => {
2264                sink.put4(
2265                    0b000_11110_00_1_00_000_000100_00000_00000
2266                        | size.ftype() << 22
2267                        | ((imm.enc_bits() as u32) << 13)
2268                        | machreg_to_vec(rd.to_reg()),
2269                );
2270            }
2271            &Inst::MovToVec {
2272                rd,
2273                ri,
2274                rn,
2275                idx,
2276                size,
2277            } => {
2278                debug_assert_eq!(rd.to_reg(), ri);
2279                let (imm5, shift) = match size.lane_size() {
2280                    ScalarSize::Size8 => (0b00001, 1),
2281                    ScalarSize::Size16 => (0b00010, 2),
2282                    ScalarSize::Size32 => (0b00100, 3),
2283                    ScalarSize::Size64 => (0b01000, 4),
2284                    _ => unreachable!(),
2285                };
2286                debug_assert_eq!(idx & (0b11111 >> shift), idx);
2287                let imm5 = imm5 | ((idx as u32) << shift);
2288                sink.put4(
2289                    0b010_01110000_00000_0_0011_1_00000_00000
2290                        | (imm5 << 16)
2291                        | (machreg_to_gpr(rn) << 5)
2292                        | machreg_to_vec(rd.to_reg()),
2293                );
2294            }
2295            &Inst::MovFromVec { rd, rn, idx, size } => {
2296                let (q, imm5, shift, mask) = match size {
2297                    ScalarSize::Size8 => (0b0, 0b00001, 1, 0b1111),
2298                    ScalarSize::Size16 => (0b0, 0b00010, 2, 0b0111),
2299                    ScalarSize::Size32 => (0b0, 0b00100, 3, 0b0011),
2300                    ScalarSize::Size64 => (0b1, 0b01000, 4, 0b0001),
2301                    _ => panic!("Unexpected scalar FP operand size: {size:?}"),
2302                };
2303                debug_assert_eq!(idx & mask, idx);
2304                let imm5 = imm5 | ((idx as u32) << shift);
2305                sink.put4(
2306                    0b000_01110000_00000_0_0111_1_00000_00000
2307                        | (q << 30)
2308                        | (imm5 << 16)
2309                        | (machreg_to_vec(rn) << 5)
2310                        | machreg_to_gpr(rd.to_reg()),
2311                );
2312            }
2313            &Inst::MovFromVecSigned {
2314                rd,
2315                rn,
2316                idx,
2317                size,
2318                scalar_size,
2319            } => {
2320                let (imm5, shift, half) = match size {
2321                    VectorSize::Size8x8 => (0b00001, 1, true),
2322                    VectorSize::Size8x16 => (0b00001, 1, false),
2323                    VectorSize::Size16x4 => (0b00010, 2, true),
2324                    VectorSize::Size16x8 => (0b00010, 2, false),
2325                    VectorSize::Size32x2 => {
2326                        debug_assert_ne!(scalar_size, OperandSize::Size32);
2327                        (0b00100, 3, true)
2328                    }
2329                    VectorSize::Size32x4 => {
2330                        debug_assert_ne!(scalar_size, OperandSize::Size32);
2331                        (0b00100, 3, false)
2332                    }
2333                    _ => panic!("Unexpected vector operand size"),
2334                };
2335                debug_assert_eq!(idx & (0b11111 >> (half as u32 + shift)), idx);
2336                let imm5 = imm5 | ((idx as u32) << shift);
2337                sink.put4(
2338                    0b000_01110000_00000_0_0101_1_00000_00000
2339                        | (scalar_size.is64() as u32) << 30
2340                        | (imm5 << 16)
2341                        | (machreg_to_vec(rn) << 5)
2342                        | machreg_to_gpr(rd.to_reg()),
2343                );
2344            }
2345            &Inst::VecDup { rd, rn, size } => {
2346                let q = size.is_128bits() as u32;
2347                let imm5 = match size.lane_size() {
2348                    ScalarSize::Size8 => 0b00001,
2349                    ScalarSize::Size16 => 0b00010,
2350                    ScalarSize::Size32 => 0b00100,
2351                    ScalarSize::Size64 => 0b01000,
2352                    _ => unreachable!(),
2353                };
2354                sink.put4(
2355                    0b0_0_0_01110000_00000_000011_00000_00000
2356                        | (q << 30)
2357                        | (imm5 << 16)
2358                        | (machreg_to_gpr(rn) << 5)
2359                        | machreg_to_vec(rd.to_reg()),
2360                );
2361            }
2362            &Inst::VecDupFromFpu { rd, rn, size, lane } => {
2363                let q = size.is_128bits() as u32;
2364                let imm5 = match size.lane_size() {
2365                    ScalarSize::Size8 => {
2366                        assert!(lane < 16);
2367                        0b00001 | (u32::from(lane) << 1)
2368                    }
2369                    ScalarSize::Size16 => {
2370                        assert!(lane < 8);
2371                        0b00010 | (u32::from(lane) << 2)
2372                    }
2373                    ScalarSize::Size32 => {
2374                        assert!(lane < 4);
2375                        0b00100 | (u32::from(lane) << 3)
2376                    }
2377                    ScalarSize::Size64 => {
2378                        assert!(lane < 2);
2379                        0b01000 | (u32::from(lane) << 4)
2380                    }
2381                    _ => unimplemented!(),
2382                };
2383                sink.put4(
2384                    0b000_01110000_00000_000001_00000_00000
2385                        | (q << 30)
2386                        | (imm5 << 16)
2387                        | (machreg_to_vec(rn) << 5)
2388                        | machreg_to_vec(rd.to_reg()),
2389                );
2390            }
2391            &Inst::VecDupFPImm { rd, imm, size } => {
2392                let imm = imm.enc_bits();
2393                let op = match size.lane_size() {
2394                    ScalarSize::Size32 => 0,
2395                    ScalarSize::Size64 => 1,
2396                    _ => unimplemented!(),
2397                };
2398                let q_op = op | ((size.is_128bits() as u32) << 1);
2399
2400                sink.put4(enc_asimd_mod_imm(rd, q_op, 0b1111, imm));
2401            }
2402            &Inst::VecDupImm {
2403                rd,
2404                imm,
2405                invert,
2406                size,
2407            } => {
2408                let (imm, shift, shift_ones) = imm.value();
2409                let (op, cmode) = match size.lane_size() {
2410                    ScalarSize::Size8 => {
2411                        assert!(!invert);
2412                        assert_eq!(shift, 0);
2413
2414                        (0, 0b1110)
2415                    }
2416                    ScalarSize::Size16 => {
2417                        let s = shift & 8;
2418
2419                        assert!(!shift_ones);
2420                        assert_eq!(s, shift);
2421
2422                        (invert as u32, 0b1000 | (s >> 2))
2423                    }
2424                    ScalarSize::Size32 => {
2425                        if shift_ones {
2426                            assert!(shift == 8 || shift == 16);
2427
2428                            (invert as u32, 0b1100 | (shift >> 4))
2429                        } else {
2430                            let s = shift & 24;
2431
2432                            assert_eq!(s, shift);
2433
2434                            (invert as u32, 0b0000 | (s >> 2))
2435                        }
2436                    }
2437                    ScalarSize::Size64 => {
2438                        assert!(!invert);
2439                        assert_eq!(shift, 0);
2440
2441                        (1, 0b1110)
2442                    }
2443                    _ => unreachable!(),
2444                };
2445                let q_op = op | ((size.is_128bits() as u32) << 1);
2446
2447                sink.put4(enc_asimd_mod_imm(rd, q_op, cmode, imm));
2448            }
2449            &Inst::VecExtend {
2450                t,
2451                rd,
2452                rn,
2453                high_half,
2454                lane_size,
2455            } => {
2456                let immh = match lane_size {
2457                    ScalarSize::Size16 => 0b001,
2458                    ScalarSize::Size32 => 0b010,
2459                    ScalarSize::Size64 => 0b100,
2460                    _ => panic!("Unexpected VecExtend to lane size of {lane_size:?}"),
2461                };
2462                let u = match t {
2463                    VecExtendOp::Sxtl => 0b0,
2464                    VecExtendOp::Uxtl => 0b1,
2465                };
2466                sink.put4(
2467                    0b000_011110_0000_000_101001_00000_00000
2468                        | ((high_half as u32) << 30)
2469                        | (u << 29)
2470                        | (immh << 19)
2471                        | (machreg_to_vec(rn) << 5)
2472                        | machreg_to_vec(rd.to_reg()),
2473                );
2474            }
2475            &Inst::VecRRLong {
2476                op,
2477                rd,
2478                rn,
2479                high_half,
2480            } => {
2481                let (u, size, bits_12_16) = match op {
2482                    VecRRLongOp::Fcvtl16 => (0b0, 0b00, 0b10111),
2483                    VecRRLongOp::Fcvtl32 => (0b0, 0b01, 0b10111),
2484                    VecRRLongOp::Shll8 => (0b1, 0b00, 0b10011),
2485                    VecRRLongOp::Shll16 => (0b1, 0b01, 0b10011),
2486                    VecRRLongOp::Shll32 => (0b1, 0b10, 0b10011),
2487                };
2488
2489                sink.put4(enc_vec_rr_misc(
2490                    ((high_half as u32) << 1) | u,
2491                    size,
2492                    bits_12_16,
2493                    rd,
2494                    rn,
2495                ));
2496            }
2497            &Inst::VecRRNarrowLow {
2498                op,
2499                rd,
2500                rn,
2501                lane_size,
2502            }
2503            | &Inst::VecRRNarrowHigh {
2504                op,
2505                rd,
2506                rn,
2507                lane_size,
2508                ..
2509            } => {
2510                let high_half = match self {
2511                    &Inst::VecRRNarrowLow { .. } => false,
2512                    &Inst::VecRRNarrowHigh { .. } => true,
2513                    _ => unreachable!(),
2514                };
2515
2516                let size = match lane_size {
2517                    ScalarSize::Size8 => 0b00,
2518                    ScalarSize::Size16 => 0b01,
2519                    ScalarSize::Size32 => 0b10,
2520                    _ => panic!("unsupported size: {lane_size:?}"),
2521                };
2522
2523                // Floats use a single bit, to encode either half or single.
2524                let size = match op {
2525                    VecRRNarrowOp::Fcvtn => size >> 1,
2526                    _ => size,
2527                };
2528
2529                let (u, bits_12_16) = match op {
2530                    VecRRNarrowOp::Xtn => (0b0, 0b10010),
2531                    VecRRNarrowOp::Sqxtn => (0b0, 0b10100),
2532                    VecRRNarrowOp::Sqxtun => (0b1, 0b10010),
2533                    VecRRNarrowOp::Uqxtn => (0b1, 0b10100),
2534                    VecRRNarrowOp::Fcvtn => (0b0, 0b10110),
2535                };
2536
2537                sink.put4(enc_vec_rr_misc(
2538                    ((high_half as u32) << 1) | u,
2539                    size,
2540                    bits_12_16,
2541                    rd,
2542                    rn,
2543                ));
2544            }
2545            &Inst::VecMovElement {
2546                rd,
2547                ri,
2548                rn,
2549                dest_idx,
2550                src_idx,
2551                size,
2552            } => {
2553                debug_assert_eq!(rd.to_reg(), ri);
2554                let (imm5, shift) = match size.lane_size() {
2555                    ScalarSize::Size8 => (0b00001, 1),
2556                    ScalarSize::Size16 => (0b00010, 2),
2557                    ScalarSize::Size32 => (0b00100, 3),
2558                    ScalarSize::Size64 => (0b01000, 4),
2559                    _ => unreachable!(),
2560                };
2561                let mask = 0b11111 >> shift;
2562                debug_assert_eq!(dest_idx & mask, dest_idx);
2563                debug_assert_eq!(src_idx & mask, src_idx);
2564                let imm4 = (src_idx as u32) << (shift - 1);
2565                let imm5 = imm5 | ((dest_idx as u32) << shift);
2566                sink.put4(
2567                    0b011_01110000_00000_0_0000_1_00000_00000
2568                        | (imm5 << 16)
2569                        | (imm4 << 11)
2570                        | (machreg_to_vec(rn) << 5)
2571                        | machreg_to_vec(rd.to_reg()),
2572                );
2573            }
2574            &Inst::VecRRPair { op, rd, rn } => {
2575                let bits_12_16 = match op {
2576                    VecPairOp::Addp => 0b11011,
2577                };
2578
2579                sink.put4(enc_vec_rr_pair(bits_12_16, rd, rn));
2580            }
2581            &Inst::VecRRRLong {
2582                rd,
2583                rn,
2584                rm,
2585                alu_op,
2586                high_half,
2587            } => {
2588                let (u, size, bit14) = match alu_op {
2589                    VecRRRLongOp::Smull8 => (0b0, 0b00, 0b1),
2590                    VecRRRLongOp::Smull16 => (0b0, 0b01, 0b1),
2591                    VecRRRLongOp::Smull32 => (0b0, 0b10, 0b1),
2592                    VecRRRLongOp::Umull8 => (0b1, 0b00, 0b1),
2593                    VecRRRLongOp::Umull16 => (0b1, 0b01, 0b1),
2594                    VecRRRLongOp::Umull32 => (0b1, 0b10, 0b1),
2595                };
2596                sink.put4(enc_vec_rrr_long(
2597                    high_half as u32,
2598                    u,
2599                    size,
2600                    bit14,
2601                    rm,
2602                    rn,
2603                    rd,
2604                ));
2605            }
2606            &Inst::VecRRRLongMod {
2607                rd,
2608                ri,
2609                rn,
2610                rm,
2611                alu_op,
2612                high_half,
2613            } => {
2614                debug_assert_eq!(rd.to_reg(), ri);
2615                let (u, size, bit14) = match alu_op {
2616                    VecRRRLongModOp::Umlal8 => (0b1, 0b00, 0b0),
2617                    VecRRRLongModOp::Umlal16 => (0b1, 0b01, 0b0),
2618                    VecRRRLongModOp::Umlal32 => (0b1, 0b10, 0b0),
2619                };
2620                sink.put4(enc_vec_rrr_long(
2621                    high_half as u32,
2622                    u,
2623                    size,
2624                    bit14,
2625                    rm,
2626                    rn,
2627                    rd,
2628                ));
2629            }
2630            &Inst::VecRRPairLong { op, rd, rn } => {
2631                let (u, size) = match op {
2632                    VecRRPairLongOp::Saddlp8 => (0b0, 0b0),
2633                    VecRRPairLongOp::Uaddlp8 => (0b1, 0b0),
2634                    VecRRPairLongOp::Saddlp16 => (0b0, 0b1),
2635                    VecRRPairLongOp::Uaddlp16 => (0b1, 0b1),
2636                };
2637
2638                sink.put4(enc_vec_rr_pair_long(u, size, rd, rn));
2639            }
2640            &Inst::VecRRR {
2641                rd,
2642                rn,
2643                rm,
2644                alu_op,
2645                size,
2646            } => {
2647                let (q, enc_size) = size.enc_size();
2648                let is_float = match alu_op {
2649                    VecALUOp::Fcmeq
2650                    | VecALUOp::Fcmgt
2651                    | VecALUOp::Fcmge
2652                    | VecALUOp::Fadd
2653                    | VecALUOp::Fsub
2654                    | VecALUOp::Fdiv
2655                    | VecALUOp::Fmax
2656                    | VecALUOp::Fmin
2657                    | VecALUOp::Fmul => true,
2658                    _ => false,
2659                };
2660
2661                let (top11, bit15_10) = match alu_op {
2662                    VecALUOp::Sqadd => (0b000_01110_00_1 | enc_size << 1, 0b000011),
2663                    VecALUOp::Sqsub => (0b000_01110_00_1 | enc_size << 1, 0b001011),
2664                    VecALUOp::Uqadd => (0b001_01110_00_1 | enc_size << 1, 0b000011),
2665                    VecALUOp::Uqsub => (0b001_01110_00_1 | enc_size << 1, 0b001011),
2666                    VecALUOp::Cmeq => (0b001_01110_00_1 | enc_size << 1, 0b100011),
2667                    VecALUOp::Cmge => (0b000_01110_00_1 | enc_size << 1, 0b001111),
2668                    VecALUOp::Cmgt => (0b000_01110_00_1 | enc_size << 1, 0b001101),
2669                    VecALUOp::Cmhi => (0b001_01110_00_1 | enc_size << 1, 0b001101),
2670                    VecALUOp::Cmhs => (0b001_01110_00_1 | enc_size << 1, 0b001111),
2671                    VecALUOp::Fcmeq => (0b000_01110_00_1, 0b111001),
2672                    VecALUOp::Fcmgt => (0b001_01110_10_1, 0b111001),
2673                    VecALUOp::Fcmge => (0b001_01110_00_1, 0b111001),
2674                    // The following logical instructions operate on bytes, so are not encoded differently
2675                    // for the different vector types.
2676                    VecALUOp::And => (0b000_01110_00_1, 0b000111),
2677                    VecALUOp::Bic => (0b000_01110_01_1, 0b000111),
2678                    VecALUOp::Orr => (0b000_01110_10_1, 0b000111),
2679                    VecALUOp::Orn => (0b000_01110_11_1, 0b000111),
2680                    VecALUOp::Eor => (0b001_01110_00_1, 0b000111),
2681                    VecALUOp::Umaxp => {
2682                        debug_assert_ne!(size, VectorSize::Size64x2);
2683
2684                        (0b001_01110_00_1 | enc_size << 1, 0b101001)
2685                    }
2686                    VecALUOp::Add => (0b000_01110_00_1 | enc_size << 1, 0b100001),
2687                    VecALUOp::Sub => (0b001_01110_00_1 | enc_size << 1, 0b100001),
2688                    VecALUOp::Mul => {
2689                        debug_assert_ne!(size, VectorSize::Size64x2);
2690                        (0b000_01110_00_1 | enc_size << 1, 0b100111)
2691                    }
2692                    VecALUOp::Sshl => (0b000_01110_00_1 | enc_size << 1, 0b010001),
2693                    VecALUOp::Ushl => (0b001_01110_00_1 | enc_size << 1, 0b010001),
2694                    VecALUOp::Umin => {
2695                        debug_assert_ne!(size, VectorSize::Size64x2);
2696
2697                        (0b001_01110_00_1 | enc_size << 1, 0b011011)
2698                    }
2699                    VecALUOp::Smin => {
2700                        debug_assert_ne!(size, VectorSize::Size64x2);
2701
2702                        (0b000_01110_00_1 | enc_size << 1, 0b011011)
2703                    }
2704                    VecALUOp::Umax => {
2705                        debug_assert_ne!(size, VectorSize::Size64x2);
2706
2707                        (0b001_01110_00_1 | enc_size << 1, 0b011001)
2708                    }
2709                    VecALUOp::Smax => {
2710                        debug_assert_ne!(size, VectorSize::Size64x2);
2711
2712                        (0b000_01110_00_1 | enc_size << 1, 0b011001)
2713                    }
2714                    VecALUOp::Urhadd => {
2715                        debug_assert_ne!(size, VectorSize::Size64x2);
2716
2717                        (0b001_01110_00_1 | enc_size << 1, 0b000101)
2718                    }
2719                    VecALUOp::Fadd => (0b000_01110_00_1, 0b110101),
2720                    VecALUOp::Fsub => (0b000_01110_10_1, 0b110101),
2721                    VecALUOp::Fdiv => (0b001_01110_00_1, 0b111111),
2722                    VecALUOp::Fmax => (0b000_01110_00_1, 0b111101),
2723                    VecALUOp::Fmin => (0b000_01110_10_1, 0b111101),
2724                    VecALUOp::Fmul => (0b001_01110_00_1, 0b110111),
2725                    VecALUOp::Addp => (0b000_01110_00_1 | enc_size << 1, 0b101111),
2726                    VecALUOp::Zip1 => (0b01001110_00_0 | enc_size << 1, 0b001110),
2727                    VecALUOp::Zip2 => (0b01001110_00_0 | enc_size << 1, 0b011110),
2728                    VecALUOp::Sqrdmulh => {
2729                        debug_assert!(
2730                            size.lane_size() == ScalarSize::Size16
2731                                || size.lane_size() == ScalarSize::Size32
2732                        );
2733
2734                        (0b001_01110_00_1 | enc_size << 1, 0b101101)
2735                    }
2736                    VecALUOp::Uzp1 => (0b01001110_00_0 | enc_size << 1, 0b000110),
2737                    VecALUOp::Uzp2 => (0b01001110_00_0 | enc_size << 1, 0b010110),
2738                    VecALUOp::Trn1 => (0b01001110_00_0 | enc_size << 1, 0b001010),
2739                    VecALUOp::Trn2 => (0b01001110_00_0 | enc_size << 1, 0b011010),
2740                };
2741                let top11 = if is_float {
2742                    top11 | size.enc_float_size() << 1
2743                } else {
2744                    top11
2745                };
2746                sink.put4(enc_vec_rrr(top11 | q << 9, rm, bit15_10, rn, rd));
2747            }
2748            &Inst::VecRRRMod {
2749                rd,
2750                ri,
2751                rn,
2752                rm,
2753                alu_op,
2754                size,
2755            } => {
2756                debug_assert_eq!(rd.to_reg(), ri);
2757                let (q, _enc_size) = size.enc_size();
2758
2759                let (top11, bit15_10) = match alu_op {
2760                    VecALUModOp::Bsl => (0b001_01110_01_1, 0b000111),
2761                    VecALUModOp::Fmla => {
2762                        (0b000_01110_00_1 | (size.enc_float_size() << 1), 0b110011)
2763                    }
2764                    VecALUModOp::Fmls => {
2765                        (0b000_01110_10_1 | (size.enc_float_size() << 1), 0b110011)
2766                    }
2767                };
2768                sink.put4(enc_vec_rrr(top11 | q << 9, rm, bit15_10, rn, rd));
2769            }
2770            &Inst::VecFmlaElem {
2771                rd,
2772                ri,
2773                rn,
2774                rm,
2775                alu_op,
2776                size,
2777                idx,
2778            } => {
2779                debug_assert_eq!(rd.to_reg(), ri);
2780                let idx = u32::from(idx);
2781
2782                let (q, _size) = size.enc_size();
2783                let o2 = match alu_op {
2784                    VecALUModOp::Fmla => 0b0,
2785                    VecALUModOp::Fmls => 0b1,
2786                    _ => unreachable!(),
2787                };
2788
2789                let (h, l) = match size {
2790                    VectorSize::Size32x4 => {
2791                        assert!(idx < 4);
2792                        (idx >> 1, idx & 1)
2793                    }
2794                    VectorSize::Size64x2 => {
2795                        assert!(idx < 2);
2796                        (idx, 0)
2797                    }
2798                    _ => unreachable!(),
2799                };
2800
2801                let top11 = 0b000_011111_00 | (q << 9) | (size.enc_float_size() << 1) | l;
2802                let bit15_10 = 0b000100 | (o2 << 4) | (h << 1);
2803                sink.put4(enc_vec_rrr(top11, rm, bit15_10, rn, rd));
2804            }
2805            &Inst::VecLoadReplicate {
2806                rd,
2807                rn,
2808                size,
2809                flags,
2810            } => {
2811                let (q, size) = size.enc_size();
2812
2813                if let Some(trap_code) = flags.trap_code() {
2814                    // Register the offset at which the actual load instruction starts.
2815                    sink.add_trap(trap_code);
2816                }
2817
2818                sink.put4(enc_ldst_vec(q, size, rn, rd));
2819            }
2820            &Inst::VecCSel { rd, rn, rm, cond } => {
2821                /* Emit this:
2822                      b.cond  else
2823                      mov     rd, rm
2824                      b       out
2825                     else:
2826                      mov     rd, rn
2827                     out:
2828
2829                   Note, we could do better in the cases where rd == rn or rd == rm.
2830                */
2831                let else_label = sink.get_label();
2832                let out_label = sink.get_label();
2833
2834                // b.cond else
2835                let br_else_offset = sink.cur_offset();
2836                sink.put4(enc_conditional_br(
2837                    BranchTarget::Label(else_label),
2838                    CondBrKind::Cond(cond),
2839                ));
2840                sink.use_label_at_offset(br_else_offset, else_label, LabelUse::Branch19);
2841
2842                // mov rd, rm
2843                sink.put4(enc_vecmov(/* 16b = */ true, rd, rm));
2844
2845                // b out
2846                let b_out_offset = sink.cur_offset();
2847                sink.use_label_at_offset(b_out_offset, out_label, LabelUse::Branch26);
2848                sink.add_uncond_branch(b_out_offset, b_out_offset + 4, out_label);
2849                sink.put4(enc_jump26(0b000101, 0 /* will be fixed up later */));
2850
2851                // else:
2852                sink.bind_label(else_label, &mut state.ctrl_plane);
2853
2854                // mov rd, rn
2855                sink.put4(enc_vecmov(/* 16b = */ true, rd, rn));
2856
2857                // out:
2858                sink.bind_label(out_label, &mut state.ctrl_plane);
2859            }
2860            &Inst::MovToNZCV { rn } => {
2861                sink.put4(0xd51b4200 | machreg_to_gpr(rn));
2862            }
2863            &Inst::MovFromNZCV { rd } => {
2864                sink.put4(0xd53b4200 | machreg_to_gpr(rd.to_reg()));
2865            }
2866            &Inst::Extend {
2867                rd,
2868                rn,
2869                signed: false,
2870                from_bits: 1,
2871                to_bits,
2872            } => {
2873                assert!(to_bits <= 64);
2874                // Reduce zero-extend-from-1-bit to:
2875                // - and rd, rn, #1
2876                // Note: This is special cased as UBFX may take more cycles
2877                // than AND on smaller cores.
2878                let imml = ImmLogic::maybe_from_u64(1, I32).unwrap();
2879                Inst::AluRRImmLogic {
2880                    alu_op: ALUOp::And,
2881                    size: OperandSize::Size32,
2882                    rd,
2883                    rn,
2884                    imml,
2885                }
2886                .emit(sink, emit_info, state);
2887            }
2888            &Inst::Extend {
2889                rd,
2890                rn,
2891                signed: false,
2892                from_bits: 32,
2893                to_bits: 64,
2894            } => {
2895                let mov = Inst::Mov {
2896                    size: OperandSize::Size32,
2897                    rd,
2898                    rm: rn,
2899                };
2900                mov.emit(sink, emit_info, state);
2901            }
2902            &Inst::Extend {
2903                rd,
2904                rn,
2905                signed,
2906                from_bits,
2907                to_bits,
2908            } => {
2909                let (opc, size) = if signed {
2910                    (0b00, OperandSize::from_bits(to_bits))
2911                } else {
2912                    (0b10, OperandSize::Size32)
2913                };
2914                sink.put4(enc_bfm(opc, size, rd, rn, 0, from_bits - 1));
2915            }
2916            &Inst::Jump { ref dest } => {
2917                let off = sink.cur_offset();
2918                // Indicate that the jump uses a label, if so, so that a fixup can occur later.
2919                if let Some(l) = dest.as_label() {
2920                    sink.use_label_at_offset(off, l, LabelUse::Branch26);
2921                    sink.add_uncond_branch(off, off + 4, l);
2922                }
2923                // Emit the jump itself.
2924                sink.put4(enc_jump26(0b000101, dest.as_offset26_or_zero()));
2925            }
2926            &Inst::Args { .. } | &Inst::Rets { .. } => {
2927                // Nothing: this is a pseudoinstruction that serves
2928                // only to constrain registers at a certain point.
2929            }
2930            &Inst::Ret {} => {
2931                sink.put4(0xd65f03c0);
2932            }
2933            &Inst::AuthenticatedRet { key, is_hint } => {
2934                let (op2, is_hint) = match key {
2935                    APIKey::AZ => (0b100, true),
2936                    APIKey::ASP => (0b101, is_hint),
2937                    APIKey::BZ => (0b110, true),
2938                    APIKey::BSP => (0b111, is_hint),
2939                };
2940
2941                if is_hint {
2942                    sink.put4(key.enc_auti_hint());
2943                    Inst::Ret {}.emit(sink, emit_info, state);
2944                } else {
2945                    sink.put4(0xd65f0bff | (op2 << 9)); // reta{key}
2946                }
2947            }
2948            &Inst::Call { ref info } => {
2949                let start = sink.cur_offset();
2950                let user_stack_map = state.take_stack_map();
2951                sink.add_reloc(Reloc::Arm64Call, &info.dest, 0);
2952                sink.put4(enc_jump26(0b100101, 0));
2953                if let Some(s) = user_stack_map {
2954                    let offset = sink.cur_offset();
2955                    sink.push_user_stack_map(state, offset, s);
2956                }
2957
2958                if let Some(try_call) = info.try_call_info.as_ref() {
2959                    sink.add_try_call_site(
2960                        Some(state.frame_layout.sp_to_fp()),
2961                        try_call.exception_handlers(&state.frame_layout),
2962                    );
2963                } else {
2964                    sink.add_call_site();
2965                }
2966
2967                if info.callee_pop_size > 0 {
2968                    let callee_pop_size =
2969                        i32::try_from(info.callee_pop_size).expect("callee popped more than 2GB");
2970                    for inst in AArch64MachineDeps::gen_sp_reg_adjust(-callee_pop_size) {
2971                        inst.emit(sink, emit_info, state);
2972                    }
2973                }
2974
2975                if info.patchable {
2976                    sink.add_patchable_call_site(sink.cur_offset() - start);
2977                } else {
2978                    // Load any stack-carried return values.
2979                    info.emit_retval_loads::<AArch64MachineDeps, _, _>(
2980                        state.frame_layout().stackslots_size,
2981                        |inst| inst.emit(sink, emit_info, state),
2982                        |needed_space| Some(Inst::EmitIsland { needed_space }),
2983                    );
2984                }
2985
2986                // If this is a try-call, jump to the continuation
2987                // (normal-return) block.
2988                if let Some(try_call) = info.try_call_info.as_ref() {
2989                    let jmp = Inst::Jump {
2990                        dest: BranchTarget::Label(try_call.continuation),
2991                    };
2992                    jmp.emit(sink, emit_info, state);
2993                }
2994
2995                // We produce an island above if needed, so disable
2996                // the worst-case-size check in this case.
2997                start_off = sink.cur_offset();
2998            }
2999            &Inst::CallInd { ref info } => {
3000                let user_stack_map = state.take_stack_map();
3001                sink.put4(
3002                    0b1101011_0001_11111_000000_00000_00000 | (machreg_to_gpr(info.dest) << 5),
3003                );
3004                if let Some(s) = user_stack_map {
3005                    let offset = sink.cur_offset();
3006                    sink.push_user_stack_map(state, offset, s);
3007                }
3008
3009                if let Some(try_call) = info.try_call_info.as_ref() {
3010                    sink.add_try_call_site(
3011                        Some(state.frame_layout.sp_to_fp()),
3012                        try_call.exception_handlers(&state.frame_layout),
3013                    );
3014                } else {
3015                    sink.add_call_site();
3016                }
3017
3018                if info.callee_pop_size > 0 {
3019                    let callee_pop_size =
3020                        i32::try_from(info.callee_pop_size).expect("callee popped more than 2GB");
3021                    for inst in AArch64MachineDeps::gen_sp_reg_adjust(-callee_pop_size) {
3022                        inst.emit(sink, emit_info, state);
3023                    }
3024                }
3025
3026                // Load any stack-carried return values.
3027                info.emit_retval_loads::<AArch64MachineDeps, _, _>(
3028                    state.frame_layout().stackslots_size,
3029                    |inst| inst.emit(sink, emit_info, state),
3030                    |needed_space| Some(Inst::EmitIsland { needed_space }),
3031                );
3032
3033                // If this is a try-call, jump to the continuation
3034                // (normal-return) block.
3035                if let Some(try_call) = info.try_call_info.as_ref() {
3036                    let jmp = Inst::Jump {
3037                        dest: BranchTarget::Label(try_call.continuation),
3038                    };
3039                    jmp.emit(sink, emit_info, state);
3040                }
3041
3042                // We produce an island above if needed, so disable
3043                // the worst-case-size check in this case.
3044                start_off = sink.cur_offset();
3045            }
3046            &Inst::ReturnCall { ref info } => {
3047                emit_return_call_common_sequence(sink, emit_info, state, info);
3048
3049                // Note: this is not `Inst::Jump { .. }.emit(..)` because we
3050                // have different metadata in this case: we don't have a label
3051                // for the target, but rather a function relocation.
3052                sink.add_reloc(Reloc::Arm64Call, &info.dest, 0);
3053                sink.put4(enc_jump26(0b000101, 0));
3054                sink.add_call_site();
3055
3056                // `emit_return_call_common_sequence` emits an island if
3057                // necessary, so we can safely disable the worst-case-size check
3058                // in this case.
3059                start_off = sink.cur_offset();
3060            }
3061            &Inst::ReturnCallInd { ref info } => {
3062                emit_return_call_common_sequence(sink, emit_info, state, info);
3063
3064                Inst::IndirectBr {
3065                    rn: info.dest,
3066                    targets: vec![],
3067                }
3068                .emit(sink, emit_info, state);
3069                sink.add_call_site();
3070
3071                // `emit_return_call_common_sequence` emits an island if
3072                // necessary, so we can safely disable the worst-case-size check
3073                // in this case.
3074                start_off = sink.cur_offset();
3075            }
3076            &Inst::CondBr {
3077                taken,
3078                not_taken,
3079                kind,
3080            } => {
3081                // Conditional part first.
3082                let cond_off = sink.cur_offset();
3083                if let Some(l) = taken.as_label() {
3084                    sink.use_label_at_offset(cond_off, l, LabelUse::Branch19);
3085                    let inverted = enc_conditional_br(taken, kind.invert()).to_le_bytes();
3086                    sink.add_cond_branch(cond_off, cond_off + 4, l, &inverted[..]);
3087                }
3088                sink.put4(enc_conditional_br(taken, kind));
3089
3090                // Unconditional part next.
3091                let uncond_off = sink.cur_offset();
3092                if let Some(l) = not_taken.as_label() {
3093                    sink.use_label_at_offset(uncond_off, l, LabelUse::Branch26);
3094                    sink.add_uncond_branch(uncond_off, uncond_off + 4, l);
3095                }
3096                sink.put4(enc_jump26(0b000101, not_taken.as_offset26_or_zero()));
3097            }
3098            &Inst::TestBitAndBranch {
3099                taken,
3100                not_taken,
3101                kind,
3102                rn,
3103                bit,
3104            } => {
3105                // Emit the conditional branch first
3106                let cond_off = sink.cur_offset();
3107                if let Some(l) = taken.as_label() {
3108                    sink.use_label_at_offset(cond_off, l, LabelUse::Branch14);
3109                    let inverted =
3110                        enc_test_bit_and_branch(kind.complement(), taken, rn, bit).to_le_bytes();
3111                    sink.add_cond_branch(cond_off, cond_off + 4, l, &inverted[..]);
3112                }
3113                sink.put4(enc_test_bit_and_branch(kind, taken, rn, bit));
3114
3115                // Unconditional part next.
3116                let uncond_off = sink.cur_offset();
3117                if let Some(l) = not_taken.as_label() {
3118                    sink.use_label_at_offset(uncond_off, l, LabelUse::Branch26);
3119                    sink.add_uncond_branch(uncond_off, uncond_off + 4, l);
3120                }
3121                sink.put4(enc_jump26(0b000101, not_taken.as_offset26_or_zero()));
3122            }
3123            &Inst::TrapIf { kind, trap_code } => {
3124                let label = sink.defer_trap(trap_code);
3125                // condbr KIND, LABEL
3126                let off = sink.cur_offset();
3127                sink.put4(enc_conditional_br(BranchTarget::Label(label), kind));
3128                sink.use_label_at_offset(off, label, LabelUse::Branch19);
3129            }
3130            &Inst::IndirectBr { rn, .. } => {
3131                sink.put4(enc_br(rn));
3132            }
3133            &Inst::Nop0 => {}
3134            &Inst::Nop4 => {
3135                sink.put4(0xd503201f);
3136            }
3137            &Inst::Brk => {
3138                sink.put4(0xd43e0000);
3139            }
3140            &Inst::Udf { trap_code } => {
3141                sink.add_trap(trap_code);
3142                sink.put_data(Inst::TRAP_OPCODE);
3143            }
3144            &Inst::Adr { rd, off } => {
3145                assert!(off > -(1 << 20));
3146                assert!(off < (1 << 20));
3147                sink.put4(enc_adr(off, rd));
3148            }
3149            &Inst::Adrp { rd, off } => {
3150                assert!(off > -(1 << 20));
3151                assert!(off < (1 << 20));
3152                sink.put4(enc_adrp(off, rd));
3153            }
3154            &Inst::Word4 { data } => {
3155                sink.put4(data);
3156            }
3157            &Inst::Word8 { data } => {
3158                sink.put8(data);
3159            }
3160            &Inst::JTSequence {
3161                ridx,
3162                rtmp1,
3163                rtmp2,
3164                default,
3165                ref targets,
3166                ..
3167            } => {
3168                // This sequence is *one* instruction in the vcode, and is expanded only here at
3169                // emission time, because we cannot allow the regalloc to insert spills/reloads in
3170                // the middle; we depend on hardcoded PC-rel addressing below.
3171
3172                // Branch to default when condition code from prior comparison indicates.
3173                let br =
3174                    enc_conditional_br(BranchTarget::Label(default), CondBrKind::Cond(Cond::Hs));
3175
3176                // No need to inform the sink's branch folding logic about this branch, because it
3177                // will not be merged with any other branch, flipped, or elided (it is not preceded
3178                // or succeeded by any other branch). Just emit it with the label use.
3179                let default_br_offset = sink.cur_offset();
3180                sink.use_label_at_offset(default_br_offset, default, LabelUse::Branch19);
3181                sink.put4(br);
3182
3183                // Overwrite the index with a zero when the above
3184                // branch misspeculates (Spectre mitigation). Save the
3185                // resulting index in rtmp2.
3186                let inst = Inst::CSel {
3187                    rd: rtmp2,
3188                    cond: Cond::Hs,
3189                    rn: zero_reg(),
3190                    rm: ridx,
3191                };
3192                inst.emit(sink, emit_info, state);
3193                // Prevent any data value speculation if spectre mitigations are
3194                // enabled.
3195                if emit_info.flags.enable_table_access_spectre_mitigation()
3196                    && emit_info.isa_flags.use_csdb()
3197                {
3198                    Inst::Csdb.emit(sink, emit_info, state);
3199                }
3200
3201                // Load address of jump table
3202                let inst = Inst::Adr { rd: rtmp1, off: 16 };
3203                inst.emit(sink, emit_info, state);
3204                // Load value out of jump table
3205                let inst = Inst::SLoad32 {
3206                    rd: rtmp2,
3207                    mem: AMode::reg_plus_reg_scaled_extended(
3208                        rtmp1.to_reg(),
3209                        rtmp2.to_reg(),
3210                        ExtendOp::UXTW,
3211                    ),
3212                    flags: MemFlags::trusted(),
3213                };
3214                inst.emit(sink, emit_info, state);
3215                // Add base of jump table to jump-table-sourced block offset
3216                let inst = Inst::AluRRR {
3217                    alu_op: ALUOp::Add,
3218                    size: OperandSize::Size64,
3219                    rd: rtmp1,
3220                    rn: rtmp1.to_reg(),
3221                    rm: rtmp2.to_reg(),
3222                };
3223                inst.emit(sink, emit_info, state);
3224                // Branch to computed address. (`targets` here is only used for successor queries
3225                // and is not needed for emission.)
3226                let inst = Inst::IndirectBr {
3227                    rn: rtmp1.to_reg(),
3228                    targets: vec![],
3229                };
3230                inst.emit(sink, emit_info, state);
3231                // Emit jump table (table of 32-bit offsets).
3232                let jt_off = sink.cur_offset();
3233                for &target in targets.iter() {
3234                    let word_off = sink.cur_offset();
3235                    // off_into_table is an addend here embedded in the label to be later patched
3236                    // at the end of codegen. The offset is initially relative to this jump table
3237                    // entry; with the extra addend, it'll be relative to the jump table's start,
3238                    // after patching.
3239                    let off_into_table = word_off - jt_off;
3240                    sink.use_label_at_offset(word_off, target, LabelUse::PCRel32);
3241                    sink.put4(off_into_table);
3242                }
3243
3244                // Lowering produces an EmitIsland before using a JTSequence, so we can safely
3245                // disable the worst-case-size check in this case.
3246                start_off = sink.cur_offset();
3247            }
3248            &Inst::LoadExtNameGot { rd, ref name } => {
3249                // See this CE Example for the variations of this with and without BTI & PAUTH
3250                // https://godbolt.org/z/ncqjbbvvn
3251                //
3252                // Emit the following code:
3253                //   adrp    rd, :got:X
3254                //   ldr     rd, [rd, :got_lo12:X]
3255
3256                // adrp rd, symbol
3257                sink.add_reloc(Reloc::Aarch64AdrGotPage21, &**name, 0);
3258                let inst = Inst::Adrp { rd, off: 0 };
3259                inst.emit(sink, emit_info, state);
3260
3261                // ldr rd, [rd, :got_lo12:X]
3262                sink.add_reloc(Reloc::Aarch64Ld64GotLo12Nc, &**name, 0);
3263                let inst = Inst::ULoad64 {
3264                    rd,
3265                    mem: AMode::reg(rd.to_reg()),
3266                    flags: MemFlags::trusted(),
3267                };
3268                inst.emit(sink, emit_info, state);
3269            }
3270            &Inst::LoadExtNameNear {
3271                rd,
3272                ref name,
3273                offset,
3274            } => {
3275                // Emit the following code:
3276                //   adrp    rd, X
3277                //   add     rd, rd, :lo12:X
3278                //
3279                // See https://godbolt.org/z/855KEvM5r for an example.
3280
3281                // adrp rd, symbol
3282                sink.add_reloc(Reloc::Aarch64AdrPrelPgHi21, &**name, offset);
3283                let inst = Inst::Adrp { rd, off: 0 };
3284                inst.emit(sink, emit_info, state);
3285
3286                // add rd, rd, :lo12:X
3287                sink.add_reloc(Reloc::Aarch64AddAbsLo12Nc, &**name, offset);
3288                let inst = Inst::AluRRImm12 {
3289                    alu_op: ALUOp::Add,
3290                    size: OperandSize::Size64,
3291                    rd,
3292                    rn: rd.to_reg(),
3293                    imm12: Imm12::ZERO,
3294                };
3295                inst.emit(sink, emit_info, state);
3296            }
3297            &Inst::LoadExtNameFar {
3298                rd,
3299                ref name,
3300                offset,
3301            } => {
3302                // With absolute offsets we set up a load from a preallocated space, and then jump
3303                // over it.
3304                //
3305                // Emit the following code:
3306                //   ldr     rd, #8
3307                //   b       #0x10
3308                //   <8 byte space>
3309
3310                let inst = Inst::ULoad64 {
3311                    rd,
3312                    mem: AMode::Label {
3313                        label: MemLabel::PCRel(8),
3314                    },
3315                    flags: MemFlags::trusted(),
3316                };
3317                inst.emit(sink, emit_info, state);
3318                let inst = Inst::Jump {
3319                    dest: BranchTarget::ResolvedOffset(12),
3320                };
3321                inst.emit(sink, emit_info, state);
3322                sink.add_reloc(Reloc::Abs8, &**name, offset);
3323                sink.put8(0);
3324            }
3325            &Inst::LoadAddr { rd, ref mem } => {
3326                let mem = mem.clone();
3327                let (mem_insts, mem) = mem_finalize(Some(sink), &mem, I8, state);
3328                for inst in mem_insts.into_iter() {
3329                    inst.emit(sink, emit_info, state);
3330                }
3331
3332                let (reg, index_reg, offset) = match mem {
3333                    AMode::RegExtended { rn, rm, extendop } => {
3334                        let r = rn;
3335                        (r, Some((rm, extendop)), 0)
3336                    }
3337                    AMode::Unscaled { rn, simm9 } => {
3338                        let r = rn;
3339                        (r, None, simm9.value())
3340                    }
3341                    AMode::UnsignedOffset { rn, uimm12 } => {
3342                        let r = rn;
3343                        (r, None, uimm12.value() as i32)
3344                    }
3345                    _ => panic!("Unsupported case for LoadAddr: {mem:?}"),
3346                };
3347                let abs_offset = if offset < 0 {
3348                    -offset as u64
3349                } else {
3350                    offset as u64
3351                };
3352                let alu_op = if offset < 0 { ALUOp::Sub } else { ALUOp::Add };
3353
3354                if let Some((idx, extendop)) = index_reg {
3355                    let add = Inst::AluRRRExtend {
3356                        alu_op: ALUOp::Add,
3357                        size: OperandSize::Size64,
3358                        rd,
3359                        rn: reg,
3360                        rm: idx,
3361                        extendop,
3362                    };
3363
3364                    add.emit(sink, emit_info, state);
3365                } else if offset == 0 {
3366                    if reg != rd.to_reg() {
3367                        let mov = Inst::Mov {
3368                            size: OperandSize::Size64,
3369                            rd,
3370                            rm: reg,
3371                        };
3372
3373                        mov.emit(sink, emit_info, state);
3374                    }
3375                } else if let Some(imm12) = Imm12::maybe_from_u64(abs_offset) {
3376                    let add = Inst::AluRRImm12 {
3377                        alu_op,
3378                        size: OperandSize::Size64,
3379                        rd,
3380                        rn: reg,
3381                        imm12,
3382                    };
3383                    add.emit(sink, emit_info, state);
3384                } else {
3385                    // Use `tmp2` here: `reg` may be `spilltmp` if the `AMode` on this instruction
3386                    // was initially an `SPOffset`. Assert that `tmp2` is truly free to use. Note
3387                    // that no other instructions will be inserted here (we're emitting directly),
3388                    // and a live range of `tmp2` should not span this instruction, so this use
3389                    // should otherwise be correct.
3390                    debug_assert!(rd.to_reg() != tmp2_reg());
3391                    debug_assert!(reg != tmp2_reg());
3392                    let tmp = writable_tmp2_reg();
3393                    for insn in Inst::load_constant(tmp, abs_offset).into_iter() {
3394                        insn.emit(sink, emit_info, state);
3395                    }
3396                    let add = Inst::AluRRR {
3397                        alu_op,
3398                        size: OperandSize::Size64,
3399                        rd,
3400                        rn: reg,
3401                        rm: tmp.to_reg(),
3402                    };
3403                    add.emit(sink, emit_info, state);
3404                }
3405            }
3406            &Inst::Paci { key } => {
3407                let (crm, op2) = match key {
3408                    APIKey::AZ => (0b0011, 0b000),
3409                    APIKey::ASP => (0b0011, 0b001),
3410                    APIKey::BZ => (0b0011, 0b010),
3411                    APIKey::BSP => (0b0011, 0b011),
3412                };
3413
3414                sink.put4(0xd503211f | (crm << 8) | (op2 << 5));
3415            }
3416            &Inst::Xpaclri => sink.put4(0xd50320ff),
3417            &Inst::Bti { targets } => {
3418                let targets = match targets {
3419                    BranchTargetType::None => 0b00,
3420                    BranchTargetType::C => 0b01,
3421                    BranchTargetType::J => 0b10,
3422                    BranchTargetType::JC => 0b11,
3423                };
3424
3425                sink.put4(0xd503241f | targets << 6);
3426            }
3427            &Inst::EmitIsland { needed_space } => {
3428                if sink.island_needed(needed_space + 4) {
3429                    let jump_around_label = sink.get_label();
3430                    let jmp = Inst::Jump {
3431                        dest: BranchTarget::Label(jump_around_label),
3432                    };
3433                    jmp.emit(sink, emit_info, state);
3434                    sink.emit_island(needed_space + 4, &mut state.ctrl_plane);
3435                    sink.bind_label(jump_around_label, &mut state.ctrl_plane);
3436                }
3437            }
3438
3439            &Inst::ElfTlsGetAddr {
3440                ref symbol,
3441                rd,
3442                tmp,
3443            } => {
3444                assert_eq!(xreg(0), rd.to_reg());
3445
3446                // See the original proposal for TLSDESC.
3447                // http://www.fsfla.org/~lxoliva/writeups/TLS/paper-lk2006.pdf
3448                //
3449                // Implement the TLSDESC instruction sequence:
3450                //   adrp x0, :tlsdesc:tlsvar
3451                //   ldr  tmp, [x0, :tlsdesc_lo12:tlsvar]
3452                //   add  x0, x0, :tlsdesc_lo12:tlsvar
3453                //   blr  tmp
3454                //   mrs  tmp, tpidr_el0
3455                //   add  x0, x0, tmp
3456                //
3457                // This is the instruction sequence that GCC emits for ELF GD TLS Relocations in aarch64
3458                // See: https://gcc.godbolt.org/z/e4j7MdErh
3459
3460                // adrp x0, :tlsdesc:tlsvar
3461                sink.add_reloc(Reloc::Aarch64TlsDescAdrPage21, &**symbol, 0);
3462                Inst::Adrp { rd, off: 0 }.emit(sink, emit_info, state);
3463
3464                // ldr  tmp, [x0, :tlsdesc_lo12:tlsvar]
3465                sink.add_reloc(Reloc::Aarch64TlsDescLd64Lo12, &**symbol, 0);
3466                Inst::ULoad64 {
3467                    rd: tmp,
3468                    mem: AMode::reg(rd.to_reg()),
3469                    flags: MemFlags::trusted(),
3470                }
3471                .emit(sink, emit_info, state);
3472
3473                // add x0, x0, :tlsdesc_lo12:tlsvar
3474                sink.add_reloc(Reloc::Aarch64TlsDescAddLo12, &**symbol, 0);
3475                Inst::AluRRImm12 {
3476                    alu_op: ALUOp::Add,
3477                    size: OperandSize::Size64,
3478                    rd,
3479                    rn: rd.to_reg(),
3480                    imm12: Imm12::maybe_from_u64(0).unwrap(),
3481                }
3482                .emit(sink, emit_info, state);
3483
3484                // blr tmp
3485                sink.add_reloc(Reloc::Aarch64TlsDescCall, &**symbol, 0);
3486                Inst::CallInd {
3487                    info: crate::isa::Box::new(CallInfo::empty(tmp.to_reg(), CallConv::SystemV)),
3488                }
3489                .emit(sink, emit_info, state);
3490
3491                // mrs tmp, tpidr_el0
3492                sink.put4(0xd53bd040 | machreg_to_gpr(tmp.to_reg()));
3493
3494                // add x0, x0, tmp
3495                Inst::AluRRR {
3496                    alu_op: ALUOp::Add,
3497                    size: OperandSize::Size64,
3498                    rd,
3499                    rn: rd.to_reg(),
3500                    rm: tmp.to_reg(),
3501                }
3502                .emit(sink, emit_info, state);
3503            }
3504
3505            &Inst::MachOTlsGetAddr { ref symbol, rd } => {
3506                // Each thread local variable gets a descriptor, where the first xword of the descriptor is a pointer
3507                // to a function that takes the descriptor address in x0, and after the function returns x0
3508                // contains the address for the thread local variable
3509                //
3510                // what we want to emit is basically:
3511                //
3512                // adrp x0, <label>@TLVPPAGE  ; Load the address of the page of the thread local variable pointer (TLVP)
3513                // ldr x0, [x0, <label>@TLVPPAGEOFF] ; Load the descriptor's address into x0
3514                // ldr x1, [x0] ; Load the function pointer (the first part of the descriptor)
3515                // blr x1 ; Call the function pointer with the descriptor address in x0
3516                // ; x0 now contains the TLV address
3517
3518                assert_eq!(xreg(0), rd.to_reg());
3519                let rtmp = writable_xreg(1);
3520
3521                // adrp x0, <label>@TLVPPAGE
3522                sink.add_reloc(Reloc::MachOAarch64TlsAdrPage21, symbol, 0);
3523                sink.put4(0x90000000);
3524
3525                // ldr x0, [x0, <label>@TLVPPAGEOFF]
3526                sink.add_reloc(Reloc::MachOAarch64TlsAdrPageOff12, symbol, 0);
3527                sink.put4(0xf9400000);
3528
3529                // load [x0] into temp register
3530                Inst::ULoad64 {
3531                    rd: rtmp,
3532                    mem: AMode::reg(rd.to_reg()),
3533                    flags: MemFlags::trusted(),
3534                }
3535                .emit(sink, emit_info, state);
3536
3537                // call function pointer in temp register
3538                Inst::CallInd {
3539                    info: crate::isa::Box::new(CallInfo::empty(
3540                        rtmp.to_reg(),
3541                        CallConv::AppleAarch64,
3542                    )),
3543                }
3544                .emit(sink, emit_info, state);
3545            }
3546
3547            &Inst::Unwind { ref inst } => {
3548                sink.add_unwind(inst.clone());
3549            }
3550
3551            &Inst::DummyUse { .. } => {}
3552
3553            &Inst::LabelAddress { dst, label } => {
3554                // We emit an ADR only, which is +/- 2MiB range. This
3555                // should be sufficient for the typical use-case of
3556                // this instruction, which is insmall trampolines to
3557                // get exception-handler addresses.
3558                let inst = Inst::Adr { rd: dst, off: 0 };
3559                let offset = sink.cur_offset();
3560                inst.emit(sink, emit_info, state);
3561                sink.use_label_at_offset(offset, label, LabelUse::Adr21);
3562            }
3563
3564            &Inst::SequencePoint { .. } => {
3565                // Nothing.
3566            }
3567
3568            &Inst::StackProbeLoop { start, end, step } => {
3569                assert!(emit_info.flags.enable_probestack());
3570
3571                // The loop generated here uses `start` as a counter register to
3572                // count backwards until negating it exceeds `end`. In other
3573                // words `start` is an offset from `sp` we're testing where
3574                // `end` is the max size we need to test. The loop looks like:
3575                //
3576                //      loop_start:
3577                //          sub start, start, #step
3578                //          stur xzr, [sp, start]
3579                //          cmn start, end
3580                //          br.gt loop_start
3581                //      loop_end:
3582                //
3583                // Note that this loop cannot use the spilltmp and tmp2
3584                // registers as those are currently used as the input to this
3585                // loop when generating the instruction. This means that some
3586                // more flavorful address modes and lowerings need to be
3587                // avoided.
3588                //
3589                // Perhaps someone more clever than I can figure out how to use
3590                // `subs` or the like and skip the `cmn`, but I can't figure it
3591                // out at this time.
3592
3593                let loop_start = sink.get_label();
3594                sink.bind_label(loop_start, &mut state.ctrl_plane);
3595
3596                Inst::AluRRImm12 {
3597                    alu_op: ALUOp::Sub,
3598                    size: OperandSize::Size64,
3599                    rd: start,
3600                    rn: start.to_reg(),
3601                    imm12: step,
3602                }
3603                .emit(sink, emit_info, state);
3604                Inst::Store32 {
3605                    rd: regs::zero_reg(),
3606                    mem: AMode::RegReg {
3607                        rn: regs::stack_reg(),
3608                        rm: start.to_reg(),
3609                    },
3610                    flags: MemFlags::trusted(),
3611                }
3612                .emit(sink, emit_info, state);
3613                Inst::AluRRR {
3614                    alu_op: ALUOp::AddS,
3615                    size: OperandSize::Size64,
3616                    rd: regs::writable_zero_reg(),
3617                    rn: start.to_reg(),
3618                    rm: end,
3619                }
3620                .emit(sink, emit_info, state);
3621
3622                let loop_end = sink.get_label();
3623                Inst::CondBr {
3624                    taken: BranchTarget::Label(loop_start),
3625                    not_taken: BranchTarget::Label(loop_end),
3626                    kind: CondBrKind::Cond(Cond::Gt),
3627                }
3628                .emit(sink, emit_info, state);
3629                sink.bind_label(loop_end, &mut state.ctrl_plane);
3630            }
3631        }
3632
3633        let end_off = sink.cur_offset();
3634        debug_assert!(
3635            (end_off - start_off) <= Inst::worst_case_size()
3636                || matches!(self, Inst::EmitIsland { .. }),
3637            "Worst case size exceed for {:?}: {}",
3638            self,
3639            end_off - start_off
3640        );
3641
3642        state.clear_post_insn();
3643    }
3644
3645    fn pretty_print_inst(&self, state: &mut Self::State) -> String {
3646        self.print_with_state(state)
3647    }
3648}
3649
3650fn emit_return_call_common_sequence<T>(
3651    sink: &mut MachBuffer<Inst>,
3652    emit_info: &EmitInfo,
3653    state: &mut EmitState,
3654    info: &ReturnCallInfo<T>,
3655) {
3656    for inst in AArch64MachineDeps::gen_clobber_restore(
3657        CallConv::Tail,
3658        &emit_info.flags,
3659        state.frame_layout(),
3660    ) {
3661        inst.emit(sink, emit_info, state);
3662    }
3663
3664    let setup_area_size = state.frame_layout().setup_area_size;
3665    if setup_area_size > 0 {
3666        // N.B.: sp is already adjusted to the appropriate place by the
3667        // clobber-restore code (which also frees the fixed frame). Hence, there
3668        // is no need for the usual `mov sp, fp` here.
3669
3670        // `ldp fp, lr, [sp], #16`
3671        Inst::LoadP64 {
3672            rt: writable_fp_reg(),
3673            rt2: writable_link_reg(),
3674            mem: PairAMode::SPPostIndexed {
3675                // TODO: we could fold the increment for incoming_args_diff here, as long as that
3676                // value is less than 502*8, by adding it to `setup_area_size`.
3677                // https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/LDP--Load-Pair-of-Registers-
3678                simm7: SImm7Scaled::maybe_from_i64(i64::from(setup_area_size), types::I64).unwrap(),
3679            },
3680            flags: MemFlags::trusted(),
3681        }
3682        .emit(sink, emit_info, state);
3683    }
3684
3685    // Adjust SP to account for the possible over-allocation in the prologue.
3686    let incoming_args_diff = state.frame_layout().tail_args_size - info.new_stack_arg_size;
3687    if incoming_args_diff > 0 {
3688        for inst in
3689            AArch64MachineDeps::gen_sp_reg_adjust(i32::try_from(incoming_args_diff).unwrap())
3690        {
3691            inst.emit(sink, emit_info, state);
3692        }
3693    }
3694
3695    if (setup_area_size > 0 || info.sign_return_address_all)
3696        && let Some(key) = info.key
3697    {
3698        sink.put4(key.enc_auti_hint());
3699    }
3700}