cranelift_codegen/isa/aarch64/inst/
emit.rs

1//! AArch64 ISA: binary code emission.
2
3use cranelift_control::ControlPlane;
4
5use crate::ir::{self, types::*};
6use crate::isa::aarch64::inst::*;
7use crate::trace;
8
9/// Memory addressing mode finalization: convert "special" modes (e.g.,
10/// generic arbitrary stack offset) into real addressing modes, possibly by
11/// emitting some helper instructions that come immediately before the use
12/// of this amode.
13pub fn mem_finalize(
14    sink: Option<&mut MachBuffer<Inst>>,
15    mem: &AMode,
16    access_ty: Type,
17    state: &EmitState,
18) -> (SmallVec<[Inst; 4]>, AMode) {
19    match mem {
20        &AMode::RegOffset { off, .. }
21        | &AMode::SPOffset { off }
22        | &AMode::FPOffset { off }
23        | &AMode::IncomingArg { off }
24        | &AMode::SlotOffset { off } => {
25            let basereg = match mem {
26                &AMode::RegOffset { rn, .. } => rn,
27                &AMode::SPOffset { .. }
28                | &AMode::SlotOffset { .. }
29                | &AMode::IncomingArg { .. } => stack_reg(),
30                &AMode::FPOffset { .. } => fp_reg(),
31                _ => unreachable!(),
32            };
33            let off = match mem {
34                &AMode::IncomingArg { .. } => {
35                    let frame_layout = state.frame_layout();
36                    i64::from(
37                        frame_layout.setup_area_size
38                            + frame_layout.tail_args_size
39                            + frame_layout.clobber_size
40                            + frame_layout.fixed_frame_storage_size
41                            + frame_layout.outgoing_args_size,
42                    ) - off
43                }
44                &AMode::SlotOffset { .. } => {
45                    let adj = i64::from(state.frame_layout().outgoing_args_size);
46                    trace!(
47                        "mem_finalize: slot offset {} + adj {} -> {}",
48                        off,
49                        adj,
50                        off + adj
51                    );
52                    off + adj
53                }
54                _ => off,
55            };
56
57            if let Some(simm9) = SImm9::maybe_from_i64(off) {
58                let mem = AMode::Unscaled { rn: basereg, simm9 };
59                (smallvec![], mem)
60            } else if let Some(uimm12) = UImm12Scaled::maybe_from_i64(off, access_ty) {
61                let mem = AMode::UnsignedOffset {
62                    rn: basereg,
63                    uimm12,
64                };
65                (smallvec![], mem)
66            } else {
67                let tmp = writable_spilltmp_reg();
68                (
69                    Inst::load_constant(tmp, off as u64, &mut |_| tmp),
70                    AMode::RegExtended {
71                        rn: basereg,
72                        rm: tmp.to_reg(),
73                        extendop: ExtendOp::SXTX,
74                    },
75                )
76            }
77        }
78
79        AMode::Const { addr } => {
80            let sink = match sink {
81                Some(sink) => sink,
82                None => return (smallvec![], mem.clone()),
83            };
84            let label = sink.get_label_for_constant(*addr);
85            let label = MemLabel::Mach(label);
86            (smallvec![], AMode::Label { label })
87        }
88
89        _ => (smallvec![], mem.clone()),
90    }
91}
92
93//=============================================================================
94// Instructions and subcomponents: emission
95
96pub(crate) fn machreg_to_gpr(m: Reg) -> u32 {
97    assert_eq!(m.class(), RegClass::Int);
98    u32::from(m.to_real_reg().unwrap().hw_enc() & 31)
99}
100
101pub(crate) fn machreg_to_vec(m: Reg) -> u32 {
102    assert_eq!(m.class(), RegClass::Float);
103    u32::from(m.to_real_reg().unwrap().hw_enc())
104}
105
106fn machreg_to_gpr_or_vec(m: Reg) -> u32 {
107    u32::from(m.to_real_reg().unwrap().hw_enc() & 31)
108}
109
110/// Encode a 3-register aeithmeric instruction.
111pub fn enc_arith_rrr(bits_31_21: u32, bits_15_10: u32, rd: Writable<Reg>, rn: Reg, rm: Reg) -> u32 {
112    (bits_31_21 << 21)
113        | (bits_15_10 << 10)
114        | machreg_to_gpr(rd.to_reg())
115        | (machreg_to_gpr(rn) << 5)
116        | (machreg_to_gpr(rm) << 16)
117}
118
119fn enc_arith_rr_imm12(
120    bits_31_24: u32,
121    immshift: u32,
122    imm12: u32,
123    rn: Reg,
124    rd: Writable<Reg>,
125) -> u32 {
126    (bits_31_24 << 24)
127        | (immshift << 22)
128        | (imm12 << 10)
129        | (machreg_to_gpr(rn) << 5)
130        | machreg_to_gpr(rd.to_reg())
131}
132
133fn enc_arith_rr_imml(bits_31_23: u32, imm_bits: u32, rn: Reg, rd: Writable<Reg>) -> u32 {
134    (bits_31_23 << 23) | (imm_bits << 10) | (machreg_to_gpr(rn) << 5) | machreg_to_gpr(rd.to_reg())
135}
136
137fn enc_arith_rrrr(top11: u32, rm: Reg, bit15: u32, ra: Reg, rn: Reg, rd: Writable<Reg>) -> u32 {
138    (top11 << 21)
139        | (machreg_to_gpr(rm) << 16)
140        | (bit15 << 15)
141        | (machreg_to_gpr(ra) << 10)
142        | (machreg_to_gpr(rn) << 5)
143        | machreg_to_gpr(rd.to_reg())
144}
145
146fn enc_jump26(op_31_26: u32, off_26_0: u32) -> u32 {
147    assert!(off_26_0 < (1 << 26));
148    (op_31_26 << 26) | off_26_0
149}
150
151fn enc_cmpbr(op_31_24: u32, off_18_0: u32, reg: Reg) -> u32 {
152    assert!(off_18_0 < (1 << 19));
153    (op_31_24 << 24) | (off_18_0 << 5) | machreg_to_gpr(reg)
154}
155
156fn enc_cbr(op_31_24: u32, off_18_0: u32, op_4: u32, cond: u32) -> u32 {
157    assert!(off_18_0 < (1 << 19));
158    assert!(cond < (1 << 4));
159    (op_31_24 << 24) | (off_18_0 << 5) | (op_4 << 4) | cond
160}
161
162/// Set the size bit of an instruction.
163fn enc_op_size(op: u32, size: OperandSize) -> u32 {
164    (op & !(1 << 31)) | (size.sf_bit() << 31)
165}
166
167fn enc_conditional_br(taken: BranchTarget, kind: CondBrKind) -> u32 {
168    match kind {
169        CondBrKind::Zero(reg, size) => enc_op_size(
170            enc_cmpbr(0b0_011010_0, taken.as_offset19_or_zero(), reg),
171            size,
172        ),
173        CondBrKind::NotZero(reg, size) => enc_op_size(
174            enc_cmpbr(0b0_011010_1, taken.as_offset19_or_zero(), reg),
175            size,
176        ),
177        CondBrKind::Cond(c) => enc_cbr(0b01010100, taken.as_offset19_or_zero(), 0b0, c.bits()),
178    }
179}
180
181fn enc_test_bit_and_branch(
182    kind: TestBitAndBranchKind,
183    taken: BranchTarget,
184    reg: Reg,
185    bit: u8,
186) -> u32 {
187    assert!(bit < 64);
188    let op_31 = u32::from(bit >> 5);
189    let op_23_19 = u32::from(bit & 0b11111);
190    let op_30_24 = 0b0110110
191        | match kind {
192            TestBitAndBranchKind::Z => 0,
193            TestBitAndBranchKind::NZ => 1,
194        };
195    (op_31 << 31)
196        | (op_30_24 << 24)
197        | (op_23_19 << 19)
198        | (taken.as_offset14_or_zero() << 5)
199        | machreg_to_gpr(reg)
200}
201
202/// Encode a move-wide instruction.
203pub fn enc_move_wide(
204    op: MoveWideOp,
205    rd: Writable<Reg>,
206    imm: MoveWideConst,
207    size: OperandSize,
208) -> u32 {
209    assert!(imm.shift <= 0b11);
210    let op = match op {
211        MoveWideOp::MovN => 0b00,
212        MoveWideOp::MovZ => 0b10,
213    };
214    0x12800000
215        | size.sf_bit() << 31
216        | op << 29
217        | u32::from(imm.shift) << 21
218        | u32::from(imm.bits) << 5
219        | machreg_to_gpr(rd.to_reg())
220}
221
222/// Encode a move-keep immediate instruction.
223pub fn enc_movk(rd: Writable<Reg>, imm: MoveWideConst, size: OperandSize) -> u32 {
224    assert!(imm.shift <= 0b11);
225    0x72800000
226        | size.sf_bit() << 31
227        | u32::from(imm.shift) << 21
228        | u32::from(imm.bits) << 5
229        | machreg_to_gpr(rd.to_reg())
230}
231
232fn enc_ldst_pair(op_31_22: u32, simm7: SImm7Scaled, rn: Reg, rt: Reg, rt2: Reg) -> u32 {
233    (op_31_22 << 22)
234        | (simm7.bits() << 15)
235        | (machreg_to_gpr(rt2) << 10)
236        | (machreg_to_gpr(rn) << 5)
237        | machreg_to_gpr(rt)
238}
239
240fn enc_ldst_simm9(op_31_22: u32, simm9: SImm9, op_11_10: u32, rn: Reg, rd: Reg) -> u32 {
241    (op_31_22 << 22)
242        | (simm9.bits() << 12)
243        | (op_11_10 << 10)
244        | (machreg_to_gpr(rn) << 5)
245        | machreg_to_gpr_or_vec(rd)
246}
247
248fn enc_ldst_uimm12(op_31_22: u32, uimm12: UImm12Scaled, rn: Reg, rd: Reg) -> u32 {
249    (op_31_22 << 22)
250        | (0b1 << 24)
251        | (uimm12.bits() << 10)
252        | (machreg_to_gpr(rn) << 5)
253        | machreg_to_gpr_or_vec(rd)
254}
255
256fn enc_ldst_reg(
257    op_31_22: u32,
258    rn: Reg,
259    rm: Reg,
260    s_bit: bool,
261    extendop: Option<ExtendOp>,
262    rd: Reg,
263) -> u32 {
264    let s_bit = if s_bit { 1 } else { 0 };
265    let extend_bits = match extendop {
266        Some(ExtendOp::UXTW) => 0b010,
267        Some(ExtendOp::SXTW) => 0b110,
268        Some(ExtendOp::SXTX) => 0b111,
269        None => 0b011, // LSL
270        _ => panic!("bad extend mode for ld/st AMode"),
271    };
272    (op_31_22 << 22)
273        | (1 << 21)
274        | (machreg_to_gpr(rm) << 16)
275        | (extend_bits << 13)
276        | (s_bit << 12)
277        | (0b10 << 10)
278        | (machreg_to_gpr(rn) << 5)
279        | machreg_to_gpr_or_vec(rd)
280}
281
282pub(crate) fn enc_ldst_imm19(op_31_24: u32, imm19: u32, rd: Reg) -> u32 {
283    (op_31_24 << 24) | (imm19 << 5) | machreg_to_gpr_or_vec(rd)
284}
285
286fn enc_ldst_vec(q: u32, size: u32, rn: Reg, rt: Writable<Reg>) -> u32 {
287    debug_assert_eq!(q & 0b1, q);
288    debug_assert_eq!(size & 0b11, size);
289    0b0_0_0011010_10_00000_110_0_00_00000_00000
290        | q << 30
291        | size << 10
292        | machreg_to_gpr(rn) << 5
293        | machreg_to_vec(rt.to_reg())
294}
295
296fn enc_ldst_vec_pair(
297    opc: u32,
298    amode: u32,
299    is_load: bool,
300    simm7: SImm7Scaled,
301    rn: Reg,
302    rt: Reg,
303    rt2: Reg,
304) -> u32 {
305    debug_assert_eq!(opc & 0b11, opc);
306    debug_assert_eq!(amode & 0b11, amode);
307
308    0b00_10110_00_0_0000000_00000_00000_00000
309        | opc << 30
310        | amode << 23
311        | (is_load as u32) << 22
312        | simm7.bits() << 15
313        | machreg_to_vec(rt2) << 10
314        | machreg_to_gpr(rn) << 5
315        | machreg_to_vec(rt)
316}
317
318fn enc_vec_rrr(top11: u32, rm: Reg, bit15_10: u32, rn: Reg, rd: Writable<Reg>) -> u32 {
319    (top11 << 21)
320        | (machreg_to_vec(rm) << 16)
321        | (bit15_10 << 10)
322        | (machreg_to_vec(rn) << 5)
323        | machreg_to_vec(rd.to_reg())
324}
325
326fn enc_vec_rrr_long(
327    q: u32,
328    u: u32,
329    size: u32,
330    bit14: u32,
331    rm: Reg,
332    rn: Reg,
333    rd: Writable<Reg>,
334) -> u32 {
335    debug_assert_eq!(q & 0b1, q);
336    debug_assert_eq!(u & 0b1, u);
337    debug_assert_eq!(size & 0b11, size);
338    debug_assert_eq!(bit14 & 0b1, bit14);
339
340    0b0_0_0_01110_00_1_00000_100000_00000_00000
341        | q << 30
342        | u << 29
343        | size << 22
344        | bit14 << 14
345        | (machreg_to_vec(rm) << 16)
346        | (machreg_to_vec(rn) << 5)
347        | machreg_to_vec(rd.to_reg())
348}
349
350fn enc_bit_rr(size: u32, opcode2: u32, opcode1: u32, rn: Reg, rd: Writable<Reg>) -> u32 {
351    (0b01011010110 << 21)
352        | size << 31
353        | opcode2 << 16
354        | opcode1 << 10
355        | machreg_to_gpr(rn) << 5
356        | machreg_to_gpr(rd.to_reg())
357}
358
359pub(crate) fn enc_br(rn: Reg) -> u32 {
360    0b1101011_0000_11111_000000_00000_00000 | (machreg_to_gpr(rn) << 5)
361}
362
363pub(crate) fn enc_adr_inst(opcode: u32, off: i32, rd: Writable<Reg>) -> u32 {
364    let off = u32::try_from(off).unwrap();
365    let immlo = off & 3;
366    let immhi = (off >> 2) & ((1 << 19) - 1);
367    opcode | (immlo << 29) | (immhi << 5) | machreg_to_gpr(rd.to_reg())
368}
369
370pub(crate) fn enc_adr(off: i32, rd: Writable<Reg>) -> u32 {
371    let opcode = 0b00010000 << 24;
372    enc_adr_inst(opcode, off, rd)
373}
374
375pub(crate) fn enc_adrp(off: i32, rd: Writable<Reg>) -> u32 {
376    let opcode = 0b10010000 << 24;
377    enc_adr_inst(opcode, off, rd)
378}
379
380fn enc_csel(rd: Writable<Reg>, rn: Reg, rm: Reg, cond: Cond, op: u32, o2: u32) -> u32 {
381    debug_assert_eq!(op & 0b1, op);
382    debug_assert_eq!(o2 & 0b1, o2);
383    0b100_11010100_00000_0000_00_00000_00000
384        | (op << 30)
385        | (machreg_to_gpr(rm) << 16)
386        | (cond.bits() << 12)
387        | (o2 << 10)
388        | (machreg_to_gpr(rn) << 5)
389        | machreg_to_gpr(rd.to_reg())
390}
391
392fn enc_fcsel(rd: Writable<Reg>, rn: Reg, rm: Reg, cond: Cond, size: ScalarSize) -> u32 {
393    0b000_11110_00_1_00000_0000_11_00000_00000
394        | (size.ftype() << 22)
395        | (machreg_to_vec(rm) << 16)
396        | (machreg_to_vec(rn) << 5)
397        | machreg_to_vec(rd.to_reg())
398        | (cond.bits() << 12)
399}
400
401fn enc_ccmp(size: OperandSize, rn: Reg, rm: Reg, nzcv: NZCV, cond: Cond) -> u32 {
402    0b0_1_1_11010010_00000_0000_00_00000_0_0000
403        | size.sf_bit() << 31
404        | machreg_to_gpr(rm) << 16
405        | cond.bits() << 12
406        | machreg_to_gpr(rn) << 5
407        | nzcv.bits()
408}
409
410fn enc_ccmp_imm(size: OperandSize, rn: Reg, imm: UImm5, nzcv: NZCV, cond: Cond) -> u32 {
411    0b0_1_1_11010010_00000_0000_10_00000_0_0000
412        | size.sf_bit() << 31
413        | imm.bits() << 16
414        | cond.bits() << 12
415        | machreg_to_gpr(rn) << 5
416        | nzcv.bits()
417}
418
419fn enc_bfm(opc: u8, size: OperandSize, rd: Writable<Reg>, rn: Reg, immr: u8, imms: u8) -> u32 {
420    match size {
421        OperandSize::Size64 => {
422            debug_assert!(immr <= 63);
423            debug_assert!(imms <= 63);
424        }
425        OperandSize::Size32 => {
426            debug_assert!(immr <= 31);
427            debug_assert!(imms <= 31);
428        }
429    }
430    debug_assert_eq!(opc & 0b11, opc);
431    let n_bit = size.sf_bit();
432    0b0_00_100110_0_000000_000000_00000_00000
433        | size.sf_bit() << 31
434        | u32::from(opc) << 29
435        | n_bit << 22
436        | u32::from(immr) << 16
437        | u32::from(imms) << 10
438        | machreg_to_gpr(rn) << 5
439        | machreg_to_gpr(rd.to_reg())
440}
441
442fn enc_vecmov(is_16b: bool, rd: Writable<Reg>, rn: Reg) -> u32 {
443    0b00001110_101_00000_00011_1_00000_00000
444        | ((is_16b as u32) << 30)
445        | machreg_to_vec(rd.to_reg())
446        | (machreg_to_vec(rn) << 16)
447        | (machreg_to_vec(rn) << 5)
448}
449
450fn enc_fpurr(top22: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
451    (top22 << 10) | (machreg_to_vec(rn) << 5) | machreg_to_vec(rd.to_reg())
452}
453
454fn enc_fpurrr(top22: u32, rd: Writable<Reg>, rn: Reg, rm: Reg) -> u32 {
455    (top22 << 10)
456        | (machreg_to_vec(rm) << 16)
457        | (machreg_to_vec(rn) << 5)
458        | machreg_to_vec(rd.to_reg())
459}
460
461fn enc_fpurrrr(top17: u32, rd: Writable<Reg>, rn: Reg, rm: Reg, ra: Reg) -> u32 {
462    (top17 << 15)
463        | (machreg_to_vec(rm) << 16)
464        | (machreg_to_vec(ra) << 10)
465        | (machreg_to_vec(rn) << 5)
466        | machreg_to_vec(rd.to_reg())
467}
468
469fn enc_fcmp(size: ScalarSize, rn: Reg, rm: Reg) -> u32 {
470    0b000_11110_00_1_00000_00_1000_00000_00000
471        | (size.ftype() << 22)
472        | (machreg_to_vec(rm) << 16)
473        | (machreg_to_vec(rn) << 5)
474}
475
476fn enc_fputoint(top16: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
477    (top16 << 16) | (machreg_to_vec(rn) << 5) | machreg_to_gpr(rd.to_reg())
478}
479
480fn enc_inttofpu(top16: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
481    (top16 << 16) | (machreg_to_gpr(rn) << 5) | machreg_to_vec(rd.to_reg())
482}
483
484fn enc_fround(top22: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
485    (top22 << 10) | (machreg_to_vec(rn) << 5) | machreg_to_vec(rd.to_reg())
486}
487
488fn enc_vec_rr_misc(qu: u32, size: u32, bits_12_16: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
489    debug_assert_eq!(qu & 0b11, qu);
490    debug_assert_eq!(size & 0b11, size);
491    debug_assert_eq!(bits_12_16 & 0b11111, bits_12_16);
492    let bits = 0b0_00_01110_00_10000_00000_10_00000_00000;
493    bits | qu << 29
494        | size << 22
495        | bits_12_16 << 12
496        | machreg_to_vec(rn) << 5
497        | machreg_to_vec(rd.to_reg())
498}
499
500fn enc_vec_rr_pair(bits_12_16: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
501    debug_assert_eq!(bits_12_16 & 0b11111, bits_12_16);
502
503    0b010_11110_11_11000_11011_10_00000_00000
504        | bits_12_16 << 12
505        | machreg_to_vec(rn) << 5
506        | machreg_to_vec(rd.to_reg())
507}
508
509fn enc_vec_rr_pair_long(u: u32, enc_size: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
510    debug_assert_eq!(u & 0b1, u);
511    debug_assert_eq!(enc_size & 0b1, enc_size);
512
513    0b0_1_0_01110_00_10000_00_0_10_10_00000_00000
514        | u << 29
515        | enc_size << 22
516        | machreg_to_vec(rn) << 5
517        | machreg_to_vec(rd.to_reg())
518}
519
520fn enc_vec_lanes(q: u32, u: u32, size: u32, opcode: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
521    debug_assert_eq!(q & 0b1, q);
522    debug_assert_eq!(u & 0b1, u);
523    debug_assert_eq!(size & 0b11, size);
524    debug_assert_eq!(opcode & 0b11111, opcode);
525    0b0_0_0_01110_00_11000_0_0000_10_00000_00000
526        | q << 30
527        | u << 29
528        | size << 22
529        | opcode << 12
530        | machreg_to_vec(rn) << 5
531        | machreg_to_vec(rd.to_reg())
532}
533
534fn enc_tbl(is_extension: bool, len: u32, rd: Writable<Reg>, rn: Reg, rm: Reg) -> u32 {
535    debug_assert_eq!(len & 0b11, len);
536    0b0_1_001110_000_00000_0_00_0_00_00000_00000
537        | (machreg_to_vec(rm) << 16)
538        | len << 13
539        | (is_extension as u32) << 12
540        | (machreg_to_vec(rn) << 5)
541        | machreg_to_vec(rd.to_reg())
542}
543
544fn enc_dmb_ish() -> u32 {
545    0xD5033BBF
546}
547
548fn enc_acq_rel(ty: Type, op: AtomicRMWOp, rs: Reg, rt: Writable<Reg>, rn: Reg) -> u32 {
549    assert!(machreg_to_gpr(rt.to_reg()) != 31);
550    let sz = match ty {
551        I64 => 0b11,
552        I32 => 0b10,
553        I16 => 0b01,
554        I8 => 0b00,
555        _ => unreachable!(),
556    };
557    let bit15 = match op {
558        AtomicRMWOp::Swp => 0b1,
559        _ => 0b0,
560    };
561    let op = match op {
562        AtomicRMWOp::Add => 0b000,
563        AtomicRMWOp::Clr => 0b001,
564        AtomicRMWOp::Eor => 0b010,
565        AtomicRMWOp::Set => 0b011,
566        AtomicRMWOp::Smax => 0b100,
567        AtomicRMWOp::Smin => 0b101,
568        AtomicRMWOp::Umax => 0b110,
569        AtomicRMWOp::Umin => 0b111,
570        AtomicRMWOp::Swp => 0b000,
571    };
572    0b00_111_000_111_00000_0_000_00_00000_00000
573        | (sz << 30)
574        | (machreg_to_gpr(rs) << 16)
575        | bit15 << 15
576        | (op << 12)
577        | (machreg_to_gpr(rn) << 5)
578        | machreg_to_gpr(rt.to_reg())
579}
580
581fn enc_ldar(ty: Type, rt: Writable<Reg>, rn: Reg) -> u32 {
582    let sz = match ty {
583        I64 => 0b11,
584        I32 => 0b10,
585        I16 => 0b01,
586        I8 => 0b00,
587        _ => unreachable!(),
588    };
589    0b00_001000_1_1_0_11111_1_11111_00000_00000
590        | (sz << 30)
591        | (machreg_to_gpr(rn) << 5)
592        | machreg_to_gpr(rt.to_reg())
593}
594
595fn enc_stlr(ty: Type, rt: Reg, rn: Reg) -> u32 {
596    let sz = match ty {
597        I64 => 0b11,
598        I32 => 0b10,
599        I16 => 0b01,
600        I8 => 0b00,
601        _ => unreachable!(),
602    };
603    0b00_001000_100_11111_1_11111_00000_00000
604        | (sz << 30)
605        | (machreg_to_gpr(rn) << 5)
606        | machreg_to_gpr(rt)
607}
608
609fn enc_ldaxr(ty: Type, rt: Writable<Reg>, rn: Reg) -> u32 {
610    let sz = match ty {
611        I64 => 0b11,
612        I32 => 0b10,
613        I16 => 0b01,
614        I8 => 0b00,
615        _ => unreachable!(),
616    };
617    0b00_001000_0_1_0_11111_1_11111_00000_00000
618        | (sz << 30)
619        | (machreg_to_gpr(rn) << 5)
620        | machreg_to_gpr(rt.to_reg())
621}
622
623fn enc_stlxr(ty: Type, rs: Writable<Reg>, rt: Reg, rn: Reg) -> u32 {
624    let sz = match ty {
625        I64 => 0b11,
626        I32 => 0b10,
627        I16 => 0b01,
628        I8 => 0b00,
629        _ => unreachable!(),
630    };
631    0b00_001000_000_00000_1_11111_00000_00000
632        | (sz << 30)
633        | (machreg_to_gpr(rs.to_reg()) << 16)
634        | (machreg_to_gpr(rn) << 5)
635        | machreg_to_gpr(rt)
636}
637
638fn enc_cas(size: u32, rs: Writable<Reg>, rt: Reg, rn: Reg) -> u32 {
639    debug_assert_eq!(size & 0b11, size);
640
641    0b00_0010001_1_1_00000_1_11111_00000_00000
642        | size << 30
643        | machreg_to_gpr(rs.to_reg()) << 16
644        | machreg_to_gpr(rn) << 5
645        | machreg_to_gpr(rt)
646}
647
648fn enc_asimd_mod_imm(rd: Writable<Reg>, q_op: u32, cmode: u32, imm: u8) -> u32 {
649    let abc = (imm >> 5) as u32;
650    let defgh = (imm & 0b11111) as u32;
651
652    debug_assert_eq!(cmode & 0b1111, cmode);
653    debug_assert_eq!(q_op & 0b11, q_op);
654
655    0b0_0_0_0111100000_000_0000_01_00000_00000
656        | (q_op << 29)
657        | (abc << 16)
658        | (cmode << 12)
659        | (defgh << 5)
660        | machreg_to_vec(rd.to_reg())
661}
662
663/// State carried between emissions of a sequence of instructions.
664#[derive(Default, Clone, Debug)]
665pub struct EmitState {
666    /// The user stack map for the upcoming instruction, as provided to
667    /// `pre_safepoint()`.
668    user_stack_map: Option<ir::UserStackMap>,
669
670    /// Only used during fuzz-testing. Otherwise, it is a zero-sized struct and
671    /// optimized away at compiletime. See [cranelift_control].
672    ctrl_plane: ControlPlane,
673
674    frame_layout: FrameLayout,
675}
676
677impl MachInstEmitState<Inst> for EmitState {
678    fn new(abi: &Callee<AArch64MachineDeps>, ctrl_plane: ControlPlane) -> Self {
679        EmitState {
680            user_stack_map: None,
681            ctrl_plane,
682            frame_layout: abi.frame_layout().clone(),
683        }
684    }
685
686    fn pre_safepoint(&mut self, user_stack_map: Option<ir::UserStackMap>) {
687        self.user_stack_map = user_stack_map;
688    }
689
690    fn ctrl_plane_mut(&mut self) -> &mut ControlPlane {
691        &mut self.ctrl_plane
692    }
693
694    fn take_ctrl_plane(self) -> ControlPlane {
695        self.ctrl_plane
696    }
697
698    fn frame_layout(&self) -> &FrameLayout {
699        &self.frame_layout
700    }
701}
702
703impl EmitState {
704    fn take_stack_map(&mut self) -> Option<ir::UserStackMap> {
705        self.user_stack_map.take()
706    }
707
708    fn clear_post_insn(&mut self) {
709        self.user_stack_map = None;
710    }
711}
712
713/// Constant state used during function compilation.
714pub struct EmitInfo(settings::Flags);
715
716impl EmitInfo {
717    /// Create a constant state for emission of instructions.
718    pub fn new(flags: settings::Flags) -> Self {
719        Self(flags)
720    }
721}
722
723impl MachInstEmit for Inst {
724    type State = EmitState;
725    type Info = EmitInfo;
726
727    fn emit(&self, sink: &mut MachBuffer<Inst>, emit_info: &Self::Info, state: &mut EmitState) {
728        // N.B.: we *must* not exceed the "worst-case size" used to compute
729        // where to insert islands, except when islands are explicitly triggered
730        // (with an `EmitIsland`). We check this in debug builds. This is `mut`
731        // to allow disabling the check for `JTSequence`, which is always
732        // emitted following an `EmitIsland`.
733        let mut start_off = sink.cur_offset();
734
735        match self {
736            &Inst::AluRRR {
737                alu_op,
738                size,
739                rd,
740                rn,
741                rm,
742            } => {
743                debug_assert!(match alu_op {
744                    ALUOp::SMulH | ALUOp::UMulH => size == OperandSize::Size64,
745                    _ => true,
746                });
747                let top11 = match alu_op {
748                    ALUOp::Add => 0b00001011_000,
749                    ALUOp::Adc => 0b00011010_000,
750                    ALUOp::AdcS => 0b00111010_000,
751                    ALUOp::Sub => 0b01001011_000,
752                    ALUOp::Sbc => 0b01011010_000,
753                    ALUOp::SbcS => 0b01111010_000,
754                    ALUOp::Orr => 0b00101010_000,
755                    ALUOp::And => 0b00001010_000,
756                    ALUOp::AndS => 0b01101010_000,
757                    ALUOp::Eor => 0b01001010_000,
758                    ALUOp::OrrNot => 0b00101010_001,
759                    ALUOp::AndNot => 0b00001010_001,
760                    ALUOp::EorNot => 0b01001010_001,
761                    ALUOp::AddS => 0b00101011_000,
762                    ALUOp::SubS => 0b01101011_000,
763                    ALUOp::SDiv | ALUOp::UDiv => 0b00011010_110,
764                    ALUOp::Extr | ALUOp::Lsr | ALUOp::Asr | ALUOp::Lsl => 0b00011010_110,
765                    ALUOp::SMulH => 0b10011011_010,
766                    ALUOp::UMulH => 0b10011011_110,
767                };
768
769                let top11 = top11 | size.sf_bit() << 10;
770                let bit15_10 = match alu_op {
771                    ALUOp::SDiv => 0b000011,
772                    ALUOp::UDiv => 0b000010,
773                    ALUOp::Extr => 0b001011,
774                    ALUOp::Lsr => 0b001001,
775                    ALUOp::Asr => 0b001010,
776                    ALUOp::Lsl => 0b001000,
777                    ALUOp::SMulH | ALUOp::UMulH => 0b011111,
778                    _ => 0b000000,
779                };
780                debug_assert_ne!(writable_stack_reg(), rd);
781                // The stack pointer is the zero register in this context, so this might be an
782                // indication that something is wrong.
783                debug_assert_ne!(stack_reg(), rn);
784                debug_assert_ne!(stack_reg(), rm);
785                sink.put4(enc_arith_rrr(top11, bit15_10, rd, rn, rm));
786            }
787            &Inst::AluRRRR {
788                alu_op,
789                size,
790                rd,
791                rm,
792                rn,
793                ra,
794            } => {
795                let (top11, bit15) = match alu_op {
796                    ALUOp3::MAdd => (0b0_00_11011_000, 0),
797                    ALUOp3::MSub => (0b0_00_11011_000, 1),
798                    ALUOp3::UMAddL => {
799                        debug_assert!(size == OperandSize::Size32);
800                        (0b1_00_11011_1_01, 0)
801                    }
802                    ALUOp3::SMAddL => {
803                        debug_assert!(size == OperandSize::Size32);
804                        (0b1_00_11011_0_01, 0)
805                    }
806                };
807                let top11 = top11 | size.sf_bit() << 10;
808                sink.put4(enc_arith_rrrr(top11, rm, bit15, ra, rn, rd));
809            }
810            &Inst::AluRRImm12 {
811                alu_op,
812                size,
813                rd,
814                rn,
815                ref imm12,
816            } => {
817                let top8 = match alu_op {
818                    ALUOp::Add => 0b000_10001,
819                    ALUOp::Sub => 0b010_10001,
820                    ALUOp::AddS => 0b001_10001,
821                    ALUOp::SubS => 0b011_10001,
822                    _ => unimplemented!("{:?}", alu_op),
823                };
824                let top8 = top8 | size.sf_bit() << 7;
825                sink.put4(enc_arith_rr_imm12(
826                    top8,
827                    imm12.shift_bits(),
828                    imm12.imm_bits(),
829                    rn,
830                    rd,
831                ));
832            }
833            &Inst::AluRRImmLogic {
834                alu_op,
835                size,
836                rd,
837                rn,
838                ref imml,
839            } => {
840                let (top9, inv) = match alu_op {
841                    ALUOp::Orr => (0b001_100100, false),
842                    ALUOp::And => (0b000_100100, false),
843                    ALUOp::AndS => (0b011_100100, false),
844                    ALUOp::Eor => (0b010_100100, false),
845                    ALUOp::OrrNot => (0b001_100100, true),
846                    ALUOp::AndNot => (0b000_100100, true),
847                    ALUOp::EorNot => (0b010_100100, true),
848                    _ => unimplemented!("{:?}", alu_op),
849                };
850                let top9 = top9 | size.sf_bit() << 8;
851                let imml = if inv { imml.invert() } else { *imml };
852                sink.put4(enc_arith_rr_imml(top9, imml.enc_bits(), rn, rd));
853            }
854
855            &Inst::AluRRImmShift {
856                alu_op,
857                size,
858                rd,
859                rn,
860                ref immshift,
861            } => {
862                let amt = immshift.value();
863                let (top10, immr, imms) = match alu_op {
864                    ALUOp::Extr => (0b0001001110, machreg_to_gpr(rn), u32::from(amt)),
865                    ALUOp::Lsr => (0b0101001100, u32::from(amt), 0b011111),
866                    ALUOp::Asr => (0b0001001100, u32::from(amt), 0b011111),
867                    ALUOp::Lsl => {
868                        let bits = if size.is64() { 64 } else { 32 };
869                        (
870                            0b0101001100,
871                            u32::from((bits - amt) % bits),
872                            u32::from(bits - 1 - amt),
873                        )
874                    }
875                    _ => unimplemented!("{:?}", alu_op),
876                };
877                let top10 = top10 | size.sf_bit() << 9 | size.sf_bit();
878                let imms = match alu_op {
879                    ALUOp::Lsr | ALUOp::Asr => imms | size.sf_bit() << 5,
880                    _ => imms,
881                };
882                sink.put4(
883                    (top10 << 22)
884                        | (immr << 16)
885                        | (imms << 10)
886                        | (machreg_to_gpr(rn) << 5)
887                        | machreg_to_gpr(rd.to_reg()),
888                );
889            }
890
891            &Inst::AluRRRShift {
892                alu_op,
893                size,
894                rd,
895                rn,
896                rm,
897                ref shiftop,
898            } => {
899                let top11: u32 = match alu_op {
900                    ALUOp::Add => 0b000_01011000,
901                    ALUOp::AddS => 0b001_01011000,
902                    ALUOp::Sub => 0b010_01011000,
903                    ALUOp::SubS => 0b011_01011000,
904                    ALUOp::Orr => 0b001_01010000,
905                    ALUOp::And => 0b000_01010000,
906                    ALUOp::AndS => 0b011_01010000,
907                    ALUOp::Eor => 0b010_01010000,
908                    ALUOp::OrrNot => 0b001_01010001,
909                    ALUOp::EorNot => 0b010_01010001,
910                    ALUOp::AndNot => 0b000_01010001,
911                    ALUOp::Extr => 0b000_10011100,
912                    _ => unimplemented!("{:?}", alu_op),
913                };
914                let top11 = top11 | size.sf_bit() << 10;
915                let top11 = top11 | (u32::from(shiftop.op().bits()) << 1);
916                let bits_15_10 = u32::from(shiftop.amt().value());
917                sink.put4(enc_arith_rrr(top11, bits_15_10, rd, rn, rm));
918            }
919
920            &Inst::AluRRRExtend {
921                alu_op,
922                size,
923                rd,
924                rn,
925                rm,
926                extendop,
927            } => {
928                let top11: u32 = match alu_op {
929                    ALUOp::Add => 0b00001011001,
930                    ALUOp::Sub => 0b01001011001,
931                    ALUOp::AddS => 0b00101011001,
932                    ALUOp::SubS => 0b01101011001,
933                    _ => unimplemented!("{:?}", alu_op),
934                };
935                let top11 = top11 | size.sf_bit() << 10;
936                let bits_15_10 = u32::from(extendop.bits()) << 3;
937                sink.put4(enc_arith_rrr(top11, bits_15_10, rd, rn, rm));
938            }
939
940            &Inst::BitRR {
941                op, size, rd, rn, ..
942            } => {
943                let (op1, op2) = match op {
944                    BitOp::RBit => (0b00000, 0b000000),
945                    BitOp::Clz => (0b00000, 0b000100),
946                    BitOp::Cls => (0b00000, 0b000101),
947                    BitOp::Rev16 => (0b00000, 0b000001),
948                    BitOp::Rev32 => (0b00000, 0b000010),
949                    BitOp::Rev64 => (0b00000, 0b000011),
950                };
951                sink.put4(enc_bit_rr(size.sf_bit(), op1, op2, rn, rd))
952            }
953
954            &Inst::ULoad8 { rd, ref mem, flags }
955            | &Inst::SLoad8 { rd, ref mem, flags }
956            | &Inst::ULoad16 { rd, ref mem, flags }
957            | &Inst::SLoad16 { rd, ref mem, flags }
958            | &Inst::ULoad32 { rd, ref mem, flags }
959            | &Inst::SLoad32 { rd, ref mem, flags }
960            | &Inst::ULoad64 {
961                rd, ref mem, flags, ..
962            }
963            | &Inst::FpuLoad16 { rd, ref mem, flags }
964            | &Inst::FpuLoad32 { rd, ref mem, flags }
965            | &Inst::FpuLoad64 { rd, ref mem, flags }
966            | &Inst::FpuLoad128 { rd, ref mem, flags } => {
967                let mem = mem.clone();
968                let access_ty = self.mem_type().unwrap();
969                let (mem_insts, mem) = mem_finalize(Some(sink), &mem, access_ty, state);
970
971                for inst in mem_insts.into_iter() {
972                    inst.emit(sink, emit_info, state);
973                }
974
975                // ldst encoding helpers take Reg, not Writable<Reg>.
976                let rd = rd.to_reg();
977
978                // This is the base opcode (top 10 bits) for the "unscaled
979                // immediate" form (Unscaled). Other addressing modes will OR in
980                // other values for bits 24/25 (bits 1/2 of this constant).
981                let op = match self {
982                    Inst::ULoad8 { .. } => 0b0011100001,
983                    Inst::SLoad8 { .. } => 0b0011100010,
984                    Inst::ULoad16 { .. } => 0b0111100001,
985                    Inst::SLoad16 { .. } => 0b0111100010,
986                    Inst::ULoad32 { .. } => 0b1011100001,
987                    Inst::SLoad32 { .. } => 0b1011100010,
988                    Inst::ULoad64 { .. } => 0b1111100001,
989                    Inst::FpuLoad16 { .. } => 0b0111110001,
990                    Inst::FpuLoad32 { .. } => 0b1011110001,
991                    Inst::FpuLoad64 { .. } => 0b1111110001,
992                    Inst::FpuLoad128 { .. } => 0b0011110011,
993                    _ => unreachable!(),
994                };
995
996                if let Some(trap_code) = flags.trap_code() {
997                    // Register the offset at which the actual load instruction starts.
998                    sink.add_trap(trap_code);
999                }
1000
1001                match &mem {
1002                    &AMode::Unscaled { rn, simm9 } => {
1003                        let reg = rn;
1004                        sink.put4(enc_ldst_simm9(op, simm9, 0b00, reg, rd));
1005                    }
1006                    &AMode::UnsignedOffset { rn, uimm12 } => {
1007                        let reg = rn;
1008                        sink.put4(enc_ldst_uimm12(op, uimm12, reg, rd));
1009                    }
1010                    &AMode::RegReg { rn, rm } => {
1011                        let r1 = rn;
1012                        let r2 = rm;
1013                        sink.put4(enc_ldst_reg(
1014                            op, r1, r2, /* scaled = */ false, /* extendop = */ None, rd,
1015                        ));
1016                    }
1017                    &AMode::RegScaled { rn, rm } | &AMode::RegScaledExtended { rn, rm, .. } => {
1018                        let r1 = rn;
1019                        let r2 = rm;
1020                        let extendop = match &mem {
1021                            &AMode::RegScaled { .. } => None,
1022                            &AMode::RegScaledExtended { extendop, .. } => Some(extendop),
1023                            _ => unreachable!(),
1024                        };
1025                        sink.put4(enc_ldst_reg(
1026                            op, r1, r2, /* scaled = */ true, extendop, rd,
1027                        ));
1028                    }
1029                    &AMode::RegExtended { rn, rm, extendop } => {
1030                        let r1 = rn;
1031                        let r2 = rm;
1032                        sink.put4(enc_ldst_reg(
1033                            op,
1034                            r1,
1035                            r2,
1036                            /* scaled = */ false,
1037                            Some(extendop),
1038                            rd,
1039                        ));
1040                    }
1041                    &AMode::Label { ref label } => {
1042                        let offset = match label {
1043                            // cast i32 to u32 (two's-complement)
1044                            MemLabel::PCRel(off) => *off as u32,
1045                            // Emit a relocation into the `MachBuffer`
1046                            // for the label that's being loaded from and
1047                            // encode an address of 0 in its place which will
1048                            // get filled in by relocation resolution later on.
1049                            MemLabel::Mach(label) => {
1050                                sink.use_label_at_offset(
1051                                    sink.cur_offset(),
1052                                    *label,
1053                                    LabelUse::Ldr19,
1054                                );
1055                                0
1056                            }
1057                        } / 4;
1058                        assert!(offset < (1 << 19));
1059                        match self {
1060                            &Inst::ULoad32 { .. } => {
1061                                sink.put4(enc_ldst_imm19(0b00011000, offset, rd));
1062                            }
1063                            &Inst::SLoad32 { .. } => {
1064                                sink.put4(enc_ldst_imm19(0b10011000, offset, rd));
1065                            }
1066                            &Inst::FpuLoad32 { .. } => {
1067                                sink.put4(enc_ldst_imm19(0b00011100, offset, rd));
1068                            }
1069                            &Inst::ULoad64 { .. } => {
1070                                sink.put4(enc_ldst_imm19(0b01011000, offset, rd));
1071                            }
1072                            &Inst::FpuLoad64 { .. } => {
1073                                sink.put4(enc_ldst_imm19(0b01011100, offset, rd));
1074                            }
1075                            &Inst::FpuLoad128 { .. } => {
1076                                sink.put4(enc_ldst_imm19(0b10011100, offset, rd));
1077                            }
1078                            _ => panic!("Unsupported size for LDR from constant pool!"),
1079                        }
1080                    }
1081                    &AMode::SPPreIndexed { simm9 } => {
1082                        let reg = stack_reg();
1083                        sink.put4(enc_ldst_simm9(op, simm9, 0b11, reg, rd));
1084                    }
1085                    &AMode::SPPostIndexed { simm9 } => {
1086                        let reg = stack_reg();
1087                        sink.put4(enc_ldst_simm9(op, simm9, 0b01, reg, rd));
1088                    }
1089                    // Eliminated by `mem_finalize()` above.
1090                    &AMode::SPOffset { .. }
1091                    | &AMode::FPOffset { .. }
1092                    | &AMode::IncomingArg { .. }
1093                    | &AMode::SlotOffset { .. }
1094                    | &AMode::Const { .. }
1095                    | &AMode::RegOffset { .. } => {
1096                        panic!("Should not see {mem:?} here!")
1097                    }
1098                }
1099            }
1100
1101            &Inst::Store8 { rd, ref mem, flags }
1102            | &Inst::Store16 { rd, ref mem, flags }
1103            | &Inst::Store32 { rd, ref mem, flags }
1104            | &Inst::Store64 { rd, ref mem, flags }
1105            | &Inst::FpuStore16 { rd, ref mem, flags }
1106            | &Inst::FpuStore32 { rd, ref mem, flags }
1107            | &Inst::FpuStore64 { rd, ref mem, flags }
1108            | &Inst::FpuStore128 { rd, ref mem, flags } => {
1109                let mem = mem.clone();
1110                let access_ty = self.mem_type().unwrap();
1111                let (mem_insts, mem) = mem_finalize(Some(sink), &mem, access_ty, state);
1112
1113                for inst in mem_insts.into_iter() {
1114                    inst.emit(sink, emit_info, state);
1115                }
1116
1117                let op = match self {
1118                    Inst::Store8 { .. } => 0b0011100000,
1119                    Inst::Store16 { .. } => 0b0111100000,
1120                    Inst::Store32 { .. } => 0b1011100000,
1121                    Inst::Store64 { .. } => 0b1111100000,
1122                    Inst::FpuStore16 { .. } => 0b0111110000,
1123                    Inst::FpuStore32 { .. } => 0b1011110000,
1124                    Inst::FpuStore64 { .. } => 0b1111110000,
1125                    Inst::FpuStore128 { .. } => 0b0011110010,
1126                    _ => unreachable!(),
1127                };
1128
1129                if let Some(trap_code) = flags.trap_code() {
1130                    // Register the offset at which the actual store instruction starts.
1131                    sink.add_trap(trap_code);
1132                }
1133
1134                match &mem {
1135                    &AMode::Unscaled { rn, simm9 } => {
1136                        let reg = rn;
1137                        sink.put4(enc_ldst_simm9(op, simm9, 0b00, reg, rd));
1138                    }
1139                    &AMode::UnsignedOffset { rn, uimm12 } => {
1140                        let reg = rn;
1141                        sink.put4(enc_ldst_uimm12(op, uimm12, reg, rd));
1142                    }
1143                    &AMode::RegReg { rn, rm } => {
1144                        let r1 = rn;
1145                        let r2 = rm;
1146                        sink.put4(enc_ldst_reg(
1147                            op, r1, r2, /* scaled = */ false, /* extendop = */ None, rd,
1148                        ));
1149                    }
1150                    &AMode::RegScaled { rn, rm } | &AMode::RegScaledExtended { rn, rm, .. } => {
1151                        let r1 = rn;
1152                        let r2 = rm;
1153                        let extendop = match &mem {
1154                            &AMode::RegScaled { .. } => None,
1155                            &AMode::RegScaledExtended { extendop, .. } => Some(extendop),
1156                            _ => unreachable!(),
1157                        };
1158                        sink.put4(enc_ldst_reg(
1159                            op, r1, r2, /* scaled = */ true, extendop, rd,
1160                        ));
1161                    }
1162                    &AMode::RegExtended { rn, rm, extendop } => {
1163                        let r1 = rn;
1164                        let r2 = rm;
1165                        sink.put4(enc_ldst_reg(
1166                            op,
1167                            r1,
1168                            r2,
1169                            /* scaled = */ false,
1170                            Some(extendop),
1171                            rd,
1172                        ));
1173                    }
1174                    &AMode::Label { .. } => {
1175                        panic!("Store to a MemLabel not implemented!");
1176                    }
1177                    &AMode::SPPreIndexed { simm9 } => {
1178                        let reg = stack_reg();
1179                        sink.put4(enc_ldst_simm9(op, simm9, 0b11, reg, rd));
1180                    }
1181                    &AMode::SPPostIndexed { simm9 } => {
1182                        let reg = stack_reg();
1183                        sink.put4(enc_ldst_simm9(op, simm9, 0b01, reg, rd));
1184                    }
1185                    // Eliminated by `mem_finalize()` above.
1186                    &AMode::SPOffset { .. }
1187                    | &AMode::FPOffset { .. }
1188                    | &AMode::IncomingArg { .. }
1189                    | &AMode::SlotOffset { .. }
1190                    | &AMode::Const { .. }
1191                    | &AMode::RegOffset { .. } => {
1192                        panic!("Should not see {mem:?} here!")
1193                    }
1194                }
1195            }
1196
1197            &Inst::StoreP64 {
1198                rt,
1199                rt2,
1200                ref mem,
1201                flags,
1202            } => {
1203                let mem = mem.clone();
1204                if let Some(trap_code) = flags.trap_code() {
1205                    // Register the offset at which the actual store instruction starts.
1206                    sink.add_trap(trap_code);
1207                }
1208                match &mem {
1209                    &PairAMode::SignedOffset { reg, simm7 } => {
1210                        assert_eq!(simm7.scale_ty, I64);
1211                        sink.put4(enc_ldst_pair(0b1010100100, simm7, reg, rt, rt2));
1212                    }
1213                    &PairAMode::SPPreIndexed { simm7 } => {
1214                        assert_eq!(simm7.scale_ty, I64);
1215                        let reg = stack_reg();
1216                        sink.put4(enc_ldst_pair(0b1010100110, simm7, reg, rt, rt2));
1217                    }
1218                    &PairAMode::SPPostIndexed { simm7 } => {
1219                        assert_eq!(simm7.scale_ty, I64);
1220                        let reg = stack_reg();
1221                        sink.put4(enc_ldst_pair(0b1010100010, simm7, reg, rt, rt2));
1222                    }
1223                }
1224            }
1225            &Inst::LoadP64 {
1226                rt,
1227                rt2,
1228                ref mem,
1229                flags,
1230            } => {
1231                let rt = rt.to_reg();
1232                let rt2 = rt2.to_reg();
1233                let mem = mem.clone();
1234                if let Some(trap_code) = flags.trap_code() {
1235                    // Register the offset at which the actual load instruction starts.
1236                    sink.add_trap(trap_code);
1237                }
1238
1239                match &mem {
1240                    &PairAMode::SignedOffset { reg, simm7 } => {
1241                        assert_eq!(simm7.scale_ty, I64);
1242                        sink.put4(enc_ldst_pair(0b1010100101, simm7, reg, rt, rt2));
1243                    }
1244                    &PairAMode::SPPreIndexed { simm7 } => {
1245                        assert_eq!(simm7.scale_ty, I64);
1246                        let reg = stack_reg();
1247                        sink.put4(enc_ldst_pair(0b1010100111, simm7, reg, rt, rt2));
1248                    }
1249                    &PairAMode::SPPostIndexed { simm7 } => {
1250                        assert_eq!(simm7.scale_ty, I64);
1251                        let reg = stack_reg();
1252                        sink.put4(enc_ldst_pair(0b1010100011, simm7, reg, rt, rt2));
1253                    }
1254                }
1255            }
1256            &Inst::FpuLoadP64 {
1257                rt,
1258                rt2,
1259                ref mem,
1260                flags,
1261            }
1262            | &Inst::FpuLoadP128 {
1263                rt,
1264                rt2,
1265                ref mem,
1266                flags,
1267            } => {
1268                let rt = rt.to_reg();
1269                let rt2 = rt2.to_reg();
1270                let mem = mem.clone();
1271
1272                if let Some(trap_code) = flags.trap_code() {
1273                    // Register the offset at which the actual load instruction starts.
1274                    sink.add_trap(trap_code);
1275                }
1276
1277                let opc = match self {
1278                    &Inst::FpuLoadP64 { .. } => 0b01,
1279                    &Inst::FpuLoadP128 { .. } => 0b10,
1280                    _ => unreachable!(),
1281                };
1282
1283                match &mem {
1284                    &PairAMode::SignedOffset { reg, simm7 } => {
1285                        assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16);
1286                        sink.put4(enc_ldst_vec_pair(opc, 0b10, true, simm7, reg, rt, rt2));
1287                    }
1288                    &PairAMode::SPPreIndexed { simm7 } => {
1289                        assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16);
1290                        let reg = stack_reg();
1291                        sink.put4(enc_ldst_vec_pair(opc, 0b11, true, simm7, reg, rt, rt2));
1292                    }
1293                    &PairAMode::SPPostIndexed { simm7 } => {
1294                        assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16);
1295                        let reg = stack_reg();
1296                        sink.put4(enc_ldst_vec_pair(opc, 0b01, true, simm7, reg, rt, rt2));
1297                    }
1298                }
1299            }
1300            &Inst::FpuStoreP64 {
1301                rt,
1302                rt2,
1303                ref mem,
1304                flags,
1305            }
1306            | &Inst::FpuStoreP128 {
1307                rt,
1308                rt2,
1309                ref mem,
1310                flags,
1311            } => {
1312                let mem = mem.clone();
1313
1314                if let Some(trap_code) = flags.trap_code() {
1315                    // Register the offset at which the actual store instruction starts.
1316                    sink.add_trap(trap_code);
1317                }
1318
1319                let opc = match self {
1320                    &Inst::FpuStoreP64 { .. } => 0b01,
1321                    &Inst::FpuStoreP128 { .. } => 0b10,
1322                    _ => unreachable!(),
1323                };
1324
1325                match &mem {
1326                    &PairAMode::SignedOffset { reg, simm7 } => {
1327                        assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16);
1328                        sink.put4(enc_ldst_vec_pair(opc, 0b10, false, simm7, reg, rt, rt2));
1329                    }
1330                    &PairAMode::SPPreIndexed { simm7 } => {
1331                        assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16);
1332                        let reg = stack_reg();
1333                        sink.put4(enc_ldst_vec_pair(opc, 0b11, false, simm7, reg, rt, rt2));
1334                    }
1335                    &PairAMode::SPPostIndexed { simm7 } => {
1336                        assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16);
1337                        let reg = stack_reg();
1338                        sink.put4(enc_ldst_vec_pair(opc, 0b01, false, simm7, reg, rt, rt2));
1339                    }
1340                }
1341            }
1342            &Inst::Mov { size, rd, rm } => {
1343                assert!(rd.to_reg().class() == rm.class());
1344                assert!(rm.class() == RegClass::Int);
1345
1346                match size {
1347                    OperandSize::Size64 => {
1348                        // MOV to SP is interpreted as MOV to XZR instead. And our codegen
1349                        // should never MOV to XZR.
1350                        assert!(rd.to_reg() != stack_reg());
1351
1352                        if rm == stack_reg() {
1353                            // We can't use ORR here, so use an `add rd, sp, #0` instead.
1354                            let imm12 = Imm12::maybe_from_u64(0).unwrap();
1355                            sink.put4(enc_arith_rr_imm12(
1356                                0b100_10001,
1357                                imm12.shift_bits(),
1358                                imm12.imm_bits(),
1359                                rm,
1360                                rd,
1361                            ));
1362                        } else {
1363                            // Encoded as ORR rd, rm, zero.
1364                            sink.put4(enc_arith_rrr(0b10101010_000, 0b000_000, rd, zero_reg(), rm));
1365                        }
1366                    }
1367                    OperandSize::Size32 => {
1368                        // MOV to SP is interpreted as MOV to XZR instead. And our codegen
1369                        // should never MOV to XZR.
1370                        assert!(machreg_to_gpr(rd.to_reg()) != 31);
1371                        // Encoded as ORR rd, rm, zero.
1372                        sink.put4(enc_arith_rrr(0b00101010_000, 0b000_000, rd, zero_reg(), rm));
1373                    }
1374                }
1375            }
1376            &Inst::MovFromPReg { rd, rm } => {
1377                let rm: Reg = rm.into();
1378                debug_assert!(
1379                    [
1380                        regs::fp_reg(),
1381                        regs::stack_reg(),
1382                        regs::link_reg(),
1383                        regs::pinned_reg()
1384                    ]
1385                    .contains(&rm)
1386                );
1387                assert!(rm.class() == RegClass::Int);
1388                assert!(rd.to_reg().class() == rm.class());
1389                let size = OperandSize::Size64;
1390                Inst::Mov { size, rd, rm }.emit(sink, emit_info, state);
1391            }
1392            &Inst::MovToPReg { rd, rm } => {
1393                let rd: Writable<Reg> = Writable::from_reg(rd.into());
1394                debug_assert!(
1395                    [
1396                        regs::fp_reg(),
1397                        regs::stack_reg(),
1398                        regs::link_reg(),
1399                        regs::pinned_reg()
1400                    ]
1401                    .contains(&rd.to_reg())
1402                );
1403                assert!(rd.to_reg().class() == RegClass::Int);
1404                assert!(rm.class() == rd.to_reg().class());
1405                let size = OperandSize::Size64;
1406                Inst::Mov { size, rd, rm }.emit(sink, emit_info, state);
1407            }
1408            &Inst::MovWide { op, rd, imm, size } => {
1409                sink.put4(enc_move_wide(op, rd, imm, size));
1410            }
1411            &Inst::MovK { rd, rn, imm, size } => {
1412                debug_assert_eq!(rn, rd.to_reg());
1413                sink.put4(enc_movk(rd, imm, size));
1414            }
1415            &Inst::CSel { rd, rn, rm, cond } => {
1416                sink.put4(enc_csel(rd, rn, rm, cond, 0, 0));
1417            }
1418            &Inst::CSNeg { rd, rn, rm, cond } => {
1419                sink.put4(enc_csel(rd, rn, rm, cond, 1, 1));
1420            }
1421            &Inst::CSet { rd, cond } => {
1422                sink.put4(enc_csel(rd, zero_reg(), zero_reg(), cond.invert(), 0, 1));
1423            }
1424            &Inst::CSetm { rd, cond } => {
1425                sink.put4(enc_csel(rd, zero_reg(), zero_reg(), cond.invert(), 1, 0));
1426            }
1427            &Inst::CCmp {
1428                size,
1429                rn,
1430                rm,
1431                nzcv,
1432                cond,
1433            } => {
1434                sink.put4(enc_ccmp(size, rn, rm, nzcv, cond));
1435            }
1436            &Inst::CCmpImm {
1437                size,
1438                rn,
1439                imm,
1440                nzcv,
1441                cond,
1442            } => {
1443                sink.put4(enc_ccmp_imm(size, rn, imm, nzcv, cond));
1444            }
1445            &Inst::AtomicRMW {
1446                ty,
1447                op,
1448                rs,
1449                rt,
1450                rn,
1451                flags,
1452            } => {
1453                if let Some(trap_code) = flags.trap_code() {
1454                    sink.add_trap(trap_code);
1455                }
1456
1457                sink.put4(enc_acq_rel(ty, op, rs, rt, rn));
1458            }
1459            &Inst::AtomicRMWLoop { ty, op, flags, .. } => {
1460                /* Emit this:
1461                     again:
1462                      ldaxr{,b,h}  x/w27, [x25]
1463                      // maybe sign extend
1464                      op          x28, x27, x26 // op is add,sub,and,orr,eor
1465                      stlxr{,b,h}  w24, x/w28, [x25]
1466                      cbnz        x24, again
1467
1468                   Operand conventions:
1469                      IN:  x25 (addr), x26 (2nd arg for op)
1470                      OUT: x27 (old value), x24 (trashed), x28 (trashed)
1471
1472                   It is unfortunate that, per the ARM documentation, x28 cannot be used for
1473                   both the store-data and success-flag operands of stlxr.  This causes the
1474                   instruction's behaviour to be "CONSTRAINED UNPREDICTABLE", so we use x24
1475                   instead for the success-flag.
1476                */
1477                // TODO: We should not hardcode registers here, a better idea would be to
1478                // pass some scratch registers in the AtomicRMWLoop pseudo-instruction, and use those
1479                let xzr = zero_reg();
1480                let x24 = xreg(24);
1481                let x25 = xreg(25);
1482                let x26 = xreg(26);
1483                let x27 = xreg(27);
1484                let x28 = xreg(28);
1485                let x24wr = writable_xreg(24);
1486                let x27wr = writable_xreg(27);
1487                let x28wr = writable_xreg(28);
1488                let again_label = sink.get_label();
1489
1490                // again:
1491                sink.bind_label(again_label, &mut state.ctrl_plane);
1492
1493                if let Some(trap_code) = flags.trap_code() {
1494                    sink.add_trap(trap_code);
1495                }
1496
1497                sink.put4(enc_ldaxr(ty, x27wr, x25)); // ldaxr x27, [x25]
1498                let size = OperandSize::from_ty(ty);
1499                let sign_ext = match op {
1500                    AtomicRMWLoopOp::Smin | AtomicRMWLoopOp::Smax => match ty {
1501                        I16 => Some((ExtendOp::SXTH, 16)),
1502                        I8 => Some((ExtendOp::SXTB, 8)),
1503                        _ => None,
1504                    },
1505                    _ => None,
1506                };
1507
1508                // sxt{b|h} the loaded result if necessary.
1509                if sign_ext.is_some() {
1510                    let (_, from_bits) = sign_ext.unwrap();
1511                    Inst::Extend {
1512                        rd: x27wr,
1513                        rn: x27,
1514                        signed: true,
1515                        from_bits,
1516                        to_bits: size.bits(),
1517                    }
1518                    .emit(sink, emit_info, state);
1519                }
1520
1521                match op {
1522                    AtomicRMWLoopOp::Xchg => {} // do nothing
1523                    AtomicRMWLoopOp::Nand => {
1524                        // and x28, x27, x26
1525                        // mvn x28, x28
1526
1527                        Inst::AluRRR {
1528                            alu_op: ALUOp::And,
1529                            size,
1530                            rd: x28wr,
1531                            rn: x27,
1532                            rm: x26,
1533                        }
1534                        .emit(sink, emit_info, state);
1535
1536                        Inst::AluRRR {
1537                            alu_op: ALUOp::OrrNot,
1538                            size,
1539                            rd: x28wr,
1540                            rn: xzr,
1541                            rm: x28,
1542                        }
1543                        .emit(sink, emit_info, state);
1544                    }
1545                    AtomicRMWLoopOp::Umin
1546                    | AtomicRMWLoopOp::Umax
1547                    | AtomicRMWLoopOp::Smin
1548                    | AtomicRMWLoopOp::Smax => {
1549                        // cmp x27, x26 {?sxt}
1550                        // csel.op x28, x27, x26
1551
1552                        let cond = match op {
1553                            AtomicRMWLoopOp::Umin => Cond::Lo,
1554                            AtomicRMWLoopOp::Umax => Cond::Hi,
1555                            AtomicRMWLoopOp::Smin => Cond::Lt,
1556                            AtomicRMWLoopOp::Smax => Cond::Gt,
1557                            _ => unreachable!(),
1558                        };
1559
1560                        if sign_ext.is_some() {
1561                            let (extendop, _) = sign_ext.unwrap();
1562                            Inst::AluRRRExtend {
1563                                alu_op: ALUOp::SubS,
1564                                size,
1565                                rd: writable_zero_reg(),
1566                                rn: x27,
1567                                rm: x26,
1568                                extendop,
1569                            }
1570                            .emit(sink, emit_info, state);
1571                        } else {
1572                            Inst::AluRRR {
1573                                alu_op: ALUOp::SubS,
1574                                size,
1575                                rd: writable_zero_reg(),
1576                                rn: x27,
1577                                rm: x26,
1578                            }
1579                            .emit(sink, emit_info, state);
1580                        }
1581
1582                        Inst::CSel {
1583                            cond,
1584                            rd: x28wr,
1585                            rn: x27,
1586                            rm: x26,
1587                        }
1588                        .emit(sink, emit_info, state);
1589                    }
1590                    _ => {
1591                        // add/sub/and/orr/eor x28, x27, x26
1592                        let alu_op = match op {
1593                            AtomicRMWLoopOp::Add => ALUOp::Add,
1594                            AtomicRMWLoopOp::Sub => ALUOp::Sub,
1595                            AtomicRMWLoopOp::And => ALUOp::And,
1596                            AtomicRMWLoopOp::Orr => ALUOp::Orr,
1597                            AtomicRMWLoopOp::Eor => ALUOp::Eor,
1598                            AtomicRMWLoopOp::Nand
1599                            | AtomicRMWLoopOp::Umin
1600                            | AtomicRMWLoopOp::Umax
1601                            | AtomicRMWLoopOp::Smin
1602                            | AtomicRMWLoopOp::Smax
1603                            | AtomicRMWLoopOp::Xchg => unreachable!(),
1604                        };
1605
1606                        Inst::AluRRR {
1607                            alu_op,
1608                            size,
1609                            rd: x28wr,
1610                            rn: x27,
1611                            rm: x26,
1612                        }
1613                        .emit(sink, emit_info, state);
1614                    }
1615                }
1616
1617                if let Some(trap_code) = flags.trap_code() {
1618                    sink.add_trap(trap_code);
1619                }
1620                if op == AtomicRMWLoopOp::Xchg {
1621                    sink.put4(enc_stlxr(ty, x24wr, x26, x25)); // stlxr w24, x26, [x25]
1622                } else {
1623                    sink.put4(enc_stlxr(ty, x24wr, x28, x25)); // stlxr w24, x28, [x25]
1624                }
1625
1626                // cbnz w24, again
1627                // Note, we're actually testing x24, and relying on the default zero-high-half
1628                // rule in the assignment that `stlxr` does.
1629                let br_offset = sink.cur_offset();
1630                sink.put4(enc_conditional_br(
1631                    BranchTarget::Label(again_label),
1632                    CondBrKind::NotZero(x24, OperandSize::Size64),
1633                ));
1634                sink.use_label_at_offset(br_offset, again_label, LabelUse::Branch19);
1635            }
1636            &Inst::AtomicCAS {
1637                rd,
1638                rs,
1639                rt,
1640                rn,
1641                ty,
1642                flags,
1643            } => {
1644                debug_assert_eq!(rd.to_reg(), rs);
1645                let size = match ty {
1646                    I8 => 0b00,
1647                    I16 => 0b01,
1648                    I32 => 0b10,
1649                    I64 => 0b11,
1650                    _ => panic!("Unsupported type: {ty}"),
1651                };
1652
1653                if let Some(trap_code) = flags.trap_code() {
1654                    sink.add_trap(trap_code);
1655                }
1656
1657                sink.put4(enc_cas(size, rd, rt, rn));
1658            }
1659            &Inst::AtomicCASLoop { ty, flags, .. } => {
1660                /* Emit this:
1661                    again:
1662                     ldaxr{,b,h} x/w27, [x25]
1663                     cmp         x27, x/w26 uxt{b,h}
1664                     b.ne        out
1665                     stlxr{,b,h} w24, x/w28, [x25]
1666                     cbnz        x24, again
1667                    out:
1668
1669                  Operand conventions:
1670                     IN:  x25 (addr), x26 (expected value), x28 (replacement value)
1671                     OUT: x27 (old value), x24 (trashed)
1672                */
1673                let x24 = xreg(24);
1674                let x25 = xreg(25);
1675                let x26 = xreg(26);
1676                let x27 = xreg(27);
1677                let x28 = xreg(28);
1678                let xzrwr = writable_zero_reg();
1679                let x24wr = writable_xreg(24);
1680                let x27wr = writable_xreg(27);
1681                let again_label = sink.get_label();
1682                let out_label = sink.get_label();
1683
1684                // again:
1685                sink.bind_label(again_label, &mut state.ctrl_plane);
1686
1687                if let Some(trap_code) = flags.trap_code() {
1688                    sink.add_trap(trap_code);
1689                }
1690
1691                // ldaxr x27, [x25]
1692                sink.put4(enc_ldaxr(ty, x27wr, x25));
1693
1694                // The top 32-bits are zero-extended by the ldaxr so we don't
1695                // have to use UXTW, just the x-form of the register.
1696                let (bit21, extend_op) = match ty {
1697                    I8 => (0b1, 0b000000),
1698                    I16 => (0b1, 0b001000),
1699                    _ => (0b0, 0b000000),
1700                };
1701                let bits_31_21 = 0b111_01011_000 | bit21;
1702                // cmp x27, x26 (== subs xzr, x27, x26)
1703                sink.put4(enc_arith_rrr(bits_31_21, extend_op, xzrwr, x27, x26));
1704
1705                // b.ne out
1706                let br_out_offset = sink.cur_offset();
1707                sink.put4(enc_conditional_br(
1708                    BranchTarget::Label(out_label),
1709                    CondBrKind::Cond(Cond::Ne),
1710                ));
1711                sink.use_label_at_offset(br_out_offset, out_label, LabelUse::Branch19);
1712
1713                if let Some(trap_code) = flags.trap_code() {
1714                    sink.add_trap(trap_code);
1715                }
1716
1717                sink.put4(enc_stlxr(ty, x24wr, x28, x25)); // stlxr w24, x28, [x25]
1718
1719                // cbnz w24, again.
1720                // Note, we're actually testing x24, and relying on the default zero-high-half
1721                // rule in the assignment that `stlxr` does.
1722                let br_again_offset = sink.cur_offset();
1723                sink.put4(enc_conditional_br(
1724                    BranchTarget::Label(again_label),
1725                    CondBrKind::NotZero(x24, OperandSize::Size64),
1726                ));
1727                sink.use_label_at_offset(br_again_offset, again_label, LabelUse::Branch19);
1728
1729                // out:
1730                sink.bind_label(out_label, &mut state.ctrl_plane);
1731            }
1732            &Inst::LoadAcquire {
1733                access_ty,
1734                rt,
1735                rn,
1736                flags,
1737            } => {
1738                if let Some(trap_code) = flags.trap_code() {
1739                    sink.add_trap(trap_code);
1740                }
1741
1742                sink.put4(enc_ldar(access_ty, rt, rn));
1743            }
1744            &Inst::StoreRelease {
1745                access_ty,
1746                rt,
1747                rn,
1748                flags,
1749            } => {
1750                if let Some(trap_code) = flags.trap_code() {
1751                    sink.add_trap(trap_code);
1752                }
1753
1754                sink.put4(enc_stlr(access_ty, rt, rn));
1755            }
1756            &Inst::Fence {} => {
1757                sink.put4(enc_dmb_ish()); // dmb ish
1758            }
1759            &Inst::Csdb {} => {
1760                sink.put4(0xd503229f);
1761            }
1762            &Inst::FpuMove32 { rd, rn } => {
1763                sink.put4(enc_fpurr(0b000_11110_00_1_000000_10000, rd, rn));
1764            }
1765            &Inst::FpuMove64 { rd, rn } => {
1766                sink.put4(enc_fpurr(0b000_11110_01_1_000000_10000, rd, rn));
1767            }
1768            &Inst::FpuMove128 { rd, rn } => {
1769                sink.put4(enc_vecmov(/* 16b = */ true, rd, rn));
1770            }
1771            &Inst::FpuMoveFromVec { rd, rn, idx, size } => {
1772                let (imm5, shift, mask) = match size.lane_size() {
1773                    ScalarSize::Size32 => (0b00100, 3, 0b011),
1774                    ScalarSize::Size64 => (0b01000, 4, 0b001),
1775                    _ => unimplemented!(),
1776                };
1777                debug_assert_eq!(idx & mask, idx);
1778                let imm5 = imm5 | ((idx as u32) << shift);
1779                sink.put4(
1780                    0b010_11110000_00000_000001_00000_00000
1781                        | (imm5 << 16)
1782                        | (machreg_to_vec(rn) << 5)
1783                        | machreg_to_vec(rd.to_reg()),
1784                );
1785            }
1786            &Inst::FpuExtend { rd, rn, size } => {
1787                sink.put4(enc_fpurr(
1788                    0b000_11110_00_1_000000_10000 | (size.ftype() << 12),
1789                    rd,
1790                    rn,
1791                ));
1792            }
1793            &Inst::FpuRR {
1794                fpu_op,
1795                size,
1796                rd,
1797                rn,
1798            } => {
1799                let top22 = match fpu_op {
1800                    FPUOp1::Abs => 0b000_11110_00_1_000001_10000,
1801                    FPUOp1::Neg => 0b000_11110_00_1_000010_10000,
1802                    FPUOp1::Sqrt => 0b000_11110_00_1_000011_10000,
1803                    FPUOp1::Cvt32To64 => {
1804                        debug_assert_eq!(size, ScalarSize::Size32);
1805                        0b000_11110_00_1_000101_10000
1806                    }
1807                    FPUOp1::Cvt64To32 => {
1808                        debug_assert_eq!(size, ScalarSize::Size64);
1809                        0b000_11110_01_1_000100_10000
1810                    }
1811                };
1812                let top22 = top22 | size.ftype() << 12;
1813                sink.put4(enc_fpurr(top22, rd, rn));
1814            }
1815            &Inst::FpuRRR {
1816                fpu_op,
1817                size,
1818                rd,
1819                rn,
1820                rm,
1821            } => {
1822                let top22 = match fpu_op {
1823                    FPUOp2::Add => 0b000_11110_00_1_00000_001010,
1824                    FPUOp2::Sub => 0b000_11110_00_1_00000_001110,
1825                    FPUOp2::Mul => 0b000_11110_00_1_00000_000010,
1826                    FPUOp2::Div => 0b000_11110_00_1_00000_000110,
1827                    FPUOp2::Max => 0b000_11110_00_1_00000_010010,
1828                    FPUOp2::Min => 0b000_11110_00_1_00000_010110,
1829                };
1830                let top22 = top22 | size.ftype() << 12;
1831                sink.put4(enc_fpurrr(top22, rd, rn, rm));
1832            }
1833            &Inst::FpuRRI { fpu_op, rd, rn } => match fpu_op {
1834                FPUOpRI::UShr32(imm) => {
1835                    debug_assert_eq!(32, imm.lane_size_in_bits);
1836                    sink.put4(
1837                        0b0_0_1_011110_0000000_00_0_0_0_1_00000_00000
1838                            | imm.enc() << 16
1839                            | machreg_to_vec(rn) << 5
1840                            | machreg_to_vec(rd.to_reg()),
1841                    )
1842                }
1843                FPUOpRI::UShr64(imm) => {
1844                    debug_assert_eq!(64, imm.lane_size_in_bits);
1845                    sink.put4(
1846                        0b01_1_111110_0000000_00_0_0_0_1_00000_00000
1847                            | imm.enc() << 16
1848                            | machreg_to_vec(rn) << 5
1849                            | machreg_to_vec(rd.to_reg()),
1850                    )
1851                }
1852            },
1853            &Inst::FpuRRIMod { fpu_op, rd, ri, rn } => {
1854                debug_assert_eq!(rd.to_reg(), ri);
1855                match fpu_op {
1856                    FPUOpRIMod::Sli64(imm) => {
1857                        debug_assert_eq!(64, imm.lane_size_in_bits);
1858                        sink.put4(
1859                            0b01_1_111110_0000000_010101_00000_00000
1860                                | imm.enc() << 16
1861                                | machreg_to_vec(rn) << 5
1862                                | machreg_to_vec(rd.to_reg()),
1863                        )
1864                    }
1865                    FPUOpRIMod::Sli32(imm) => {
1866                        debug_assert_eq!(32, imm.lane_size_in_bits);
1867                        sink.put4(
1868                            0b0_0_1_011110_0000000_010101_00000_00000
1869                                | imm.enc() << 16
1870                                | machreg_to_vec(rn) << 5
1871                                | machreg_to_vec(rd.to_reg()),
1872                        )
1873                    }
1874                }
1875            }
1876            &Inst::FpuRRRR {
1877                fpu_op,
1878                size,
1879                rd,
1880                rn,
1881                rm,
1882                ra,
1883            } => {
1884                let top17 = match fpu_op {
1885                    FPUOp3::MAdd => 0b000_11111_00_0_00000_0,
1886                    FPUOp3::MSub => 0b000_11111_00_0_00000_1,
1887                    FPUOp3::NMAdd => 0b000_11111_00_1_00000_0,
1888                    FPUOp3::NMSub => 0b000_11111_00_1_00000_1,
1889                };
1890                let top17 = top17 | size.ftype() << 7;
1891                sink.put4(enc_fpurrrr(top17, rd, rn, rm, ra));
1892            }
1893            &Inst::VecMisc { op, rd, rn, size } => {
1894                let (q, enc_size) = size.enc_size();
1895                let (u, bits_12_16, size) = match op {
1896                    VecMisc2::Not => (0b1, 0b00101, 0b00),
1897                    VecMisc2::Neg => (0b1, 0b01011, enc_size),
1898                    VecMisc2::Abs => (0b0, 0b01011, enc_size),
1899                    VecMisc2::Fabs => {
1900                        debug_assert!(
1901                            size == VectorSize::Size32x2
1902                                || size == VectorSize::Size32x4
1903                                || size == VectorSize::Size64x2
1904                        );
1905                        (0b0, 0b01111, enc_size)
1906                    }
1907                    VecMisc2::Fneg => {
1908                        debug_assert!(
1909                            size == VectorSize::Size32x2
1910                                || size == VectorSize::Size32x4
1911                                || size == VectorSize::Size64x2
1912                        );
1913                        (0b1, 0b01111, enc_size)
1914                    }
1915                    VecMisc2::Fsqrt => {
1916                        debug_assert!(
1917                            size == VectorSize::Size32x2
1918                                || size == VectorSize::Size32x4
1919                                || size == VectorSize::Size64x2
1920                        );
1921                        (0b1, 0b11111, enc_size)
1922                    }
1923                    VecMisc2::Rev16 => {
1924                        debug_assert_eq!(size, VectorSize::Size8x16);
1925                        (0b0, 0b00001, enc_size)
1926                    }
1927                    VecMisc2::Rev32 => {
1928                        debug_assert!(size == VectorSize::Size8x16 || size == VectorSize::Size16x8);
1929                        (0b1, 0b00000, enc_size)
1930                    }
1931                    VecMisc2::Rev64 => {
1932                        debug_assert!(
1933                            size == VectorSize::Size8x16
1934                                || size == VectorSize::Size16x8
1935                                || size == VectorSize::Size32x4
1936                        );
1937                        (0b0, 0b00000, enc_size)
1938                    }
1939                    VecMisc2::Fcvtzs => {
1940                        debug_assert!(
1941                            size == VectorSize::Size32x2
1942                                || size == VectorSize::Size32x4
1943                                || size == VectorSize::Size64x2
1944                        );
1945                        (0b0, 0b11011, enc_size)
1946                    }
1947                    VecMisc2::Fcvtzu => {
1948                        debug_assert!(
1949                            size == VectorSize::Size32x2
1950                                || size == VectorSize::Size32x4
1951                                || size == VectorSize::Size64x2
1952                        );
1953                        (0b1, 0b11011, enc_size)
1954                    }
1955                    VecMisc2::Scvtf => {
1956                        debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2);
1957                        (0b0, 0b11101, enc_size & 0b1)
1958                    }
1959                    VecMisc2::Ucvtf => {
1960                        debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2);
1961                        (0b1, 0b11101, enc_size & 0b1)
1962                    }
1963                    VecMisc2::Frintn => {
1964                        debug_assert!(
1965                            size == VectorSize::Size32x2
1966                                || size == VectorSize::Size32x4
1967                                || size == VectorSize::Size64x2
1968                        );
1969                        (0b0, 0b11000, enc_size & 0b01)
1970                    }
1971                    VecMisc2::Frintz => {
1972                        debug_assert!(
1973                            size == VectorSize::Size32x2
1974                                || size == VectorSize::Size32x4
1975                                || size == VectorSize::Size64x2
1976                        );
1977                        (0b0, 0b11001, enc_size)
1978                    }
1979                    VecMisc2::Frintm => {
1980                        debug_assert!(
1981                            size == VectorSize::Size32x2
1982                                || size == VectorSize::Size32x4
1983                                || size == VectorSize::Size64x2
1984                        );
1985                        (0b0, 0b11001, enc_size & 0b01)
1986                    }
1987                    VecMisc2::Frintp => {
1988                        debug_assert!(
1989                            size == VectorSize::Size32x2
1990                                || size == VectorSize::Size32x4
1991                                || size == VectorSize::Size64x2
1992                        );
1993                        (0b0, 0b11000, enc_size)
1994                    }
1995                    VecMisc2::Cnt => {
1996                        debug_assert!(size == VectorSize::Size8x8 || size == VectorSize::Size8x16);
1997                        (0b0, 0b00101, enc_size)
1998                    }
1999                    VecMisc2::Cmeq0 => (0b0, 0b01001, enc_size),
2000                    VecMisc2::Cmge0 => (0b1, 0b01000, enc_size),
2001                    VecMisc2::Cmgt0 => (0b0, 0b01000, enc_size),
2002                    VecMisc2::Cmle0 => (0b1, 0b01001, enc_size),
2003                    VecMisc2::Cmlt0 => (0b0, 0b01010, enc_size),
2004                    VecMisc2::Fcmeq0 => {
2005                        debug_assert!(
2006                            size == VectorSize::Size32x2
2007                                || size == VectorSize::Size32x4
2008                                || size == VectorSize::Size64x2
2009                        );
2010                        (0b0, 0b01101, enc_size)
2011                    }
2012                    VecMisc2::Fcmge0 => {
2013                        debug_assert!(
2014                            size == VectorSize::Size32x2
2015                                || size == VectorSize::Size32x4
2016                                || size == VectorSize::Size64x2
2017                        );
2018                        (0b1, 0b01100, enc_size)
2019                    }
2020                    VecMisc2::Fcmgt0 => {
2021                        debug_assert!(
2022                            size == VectorSize::Size32x2
2023                                || size == VectorSize::Size32x4
2024                                || size == VectorSize::Size64x2
2025                        );
2026                        (0b0, 0b01100, enc_size)
2027                    }
2028                    VecMisc2::Fcmle0 => {
2029                        debug_assert!(
2030                            size == VectorSize::Size32x2
2031                                || size == VectorSize::Size32x4
2032                                || size == VectorSize::Size64x2
2033                        );
2034                        (0b1, 0b01101, enc_size)
2035                    }
2036                    VecMisc2::Fcmlt0 => {
2037                        debug_assert!(
2038                            size == VectorSize::Size32x2
2039                                || size == VectorSize::Size32x4
2040                                || size == VectorSize::Size64x2
2041                        );
2042                        (0b0, 0b01110, enc_size)
2043                    }
2044                };
2045                sink.put4(enc_vec_rr_misc((q << 1) | u, size, bits_12_16, rd, rn));
2046            }
2047            &Inst::VecLanes { op, rd, rn, size } => {
2048                let (q, size) = match size {
2049                    VectorSize::Size8x8 => (0b0, 0b00),
2050                    VectorSize::Size8x16 => (0b1, 0b00),
2051                    VectorSize::Size16x4 => (0b0, 0b01),
2052                    VectorSize::Size16x8 => (0b1, 0b01),
2053                    VectorSize::Size32x4 => (0b1, 0b10),
2054                    _ => unreachable!(),
2055                };
2056                let (u, opcode) = match op {
2057                    VecLanesOp::Uminv => (0b1, 0b11010),
2058                    VecLanesOp::Addv => (0b0, 0b11011),
2059                };
2060                sink.put4(enc_vec_lanes(q, u, size, opcode, rd, rn));
2061            }
2062            &Inst::VecShiftImm {
2063                op,
2064                rd,
2065                rn,
2066                size,
2067                imm,
2068            } => {
2069                let (is_shr, mut template) = match op {
2070                    VecShiftImmOp::Ushr => (true, 0b_001_011110_0000_000_000001_00000_00000_u32),
2071                    VecShiftImmOp::Sshr => (true, 0b_000_011110_0000_000_000001_00000_00000_u32),
2072                    VecShiftImmOp::Shl => (false, 0b_000_011110_0000_000_010101_00000_00000_u32),
2073                };
2074                if size.is_128bits() {
2075                    template |= 0b1 << 30;
2076                }
2077                let imm = imm as u32;
2078                // Deal with the somewhat strange encoding scheme for, and limits on,
2079                // the shift amount.
2080                let immh_immb = match (size.lane_size(), is_shr) {
2081                    (ScalarSize::Size64, true) if imm >= 1 && imm <= 64 => {
2082                        0b_1000_000_u32 | (64 - imm)
2083                    }
2084                    (ScalarSize::Size32, true) if imm >= 1 && imm <= 32 => {
2085                        0b_0100_000_u32 | (32 - imm)
2086                    }
2087                    (ScalarSize::Size16, true) if imm >= 1 && imm <= 16 => {
2088                        0b_0010_000_u32 | (16 - imm)
2089                    }
2090                    (ScalarSize::Size8, true) if imm >= 1 && imm <= 8 => {
2091                        0b_0001_000_u32 | (8 - imm)
2092                    }
2093                    (ScalarSize::Size64, false) if imm <= 63 => 0b_1000_000_u32 | imm,
2094                    (ScalarSize::Size32, false) if imm <= 31 => 0b_0100_000_u32 | imm,
2095                    (ScalarSize::Size16, false) if imm <= 15 => 0b_0010_000_u32 | imm,
2096                    (ScalarSize::Size8, false) if imm <= 7 => 0b_0001_000_u32 | imm,
2097                    _ => panic!(
2098                        "aarch64: Inst::VecShiftImm: emit: invalid op/size/imm {op:?}, {size:?}, {imm:?}"
2099                    ),
2100                };
2101                let rn_enc = machreg_to_vec(rn);
2102                let rd_enc = machreg_to_vec(rd.to_reg());
2103                sink.put4(template | (immh_immb << 16) | (rn_enc << 5) | rd_enc);
2104            }
2105            &Inst::VecShiftImmMod {
2106                op,
2107                rd,
2108                ri,
2109                rn,
2110                size,
2111                imm,
2112            } => {
2113                debug_assert_eq!(rd.to_reg(), ri);
2114                let (is_shr, mut template) = match op {
2115                    VecShiftImmModOp::Sli => (false, 0b_001_011110_0000_000_010101_00000_00000_u32),
2116                };
2117                if size.is_128bits() {
2118                    template |= 0b1 << 30;
2119                }
2120                let imm = imm as u32;
2121                // Deal with the somewhat strange encoding scheme for, and limits on,
2122                // the shift amount.
2123                let immh_immb = match (size.lane_size(), is_shr) {
2124                    (ScalarSize::Size64, true) if imm >= 1 && imm <= 64 => {
2125                        0b_1000_000_u32 | (64 - imm)
2126                    }
2127                    (ScalarSize::Size32, true) if imm >= 1 && imm <= 32 => {
2128                        0b_0100_000_u32 | (32 - imm)
2129                    }
2130                    (ScalarSize::Size16, true) if imm >= 1 && imm <= 16 => {
2131                        0b_0010_000_u32 | (16 - imm)
2132                    }
2133                    (ScalarSize::Size8, true) if imm >= 1 && imm <= 8 => {
2134                        0b_0001_000_u32 | (8 - imm)
2135                    }
2136                    (ScalarSize::Size64, false) if imm <= 63 => 0b_1000_000_u32 | imm,
2137                    (ScalarSize::Size32, false) if imm <= 31 => 0b_0100_000_u32 | imm,
2138                    (ScalarSize::Size16, false) if imm <= 15 => 0b_0010_000_u32 | imm,
2139                    (ScalarSize::Size8, false) if imm <= 7 => 0b_0001_000_u32 | imm,
2140                    _ => panic!(
2141                        "aarch64: Inst::VecShiftImmMod: emit: invalid op/size/imm {op:?}, {size:?}, {imm:?}"
2142                    ),
2143                };
2144                let rn_enc = machreg_to_vec(rn);
2145                let rd_enc = machreg_to_vec(rd.to_reg());
2146                sink.put4(template | (immh_immb << 16) | (rn_enc << 5) | rd_enc);
2147            }
2148            &Inst::VecExtract { rd, rn, rm, imm4 } => {
2149                if imm4 < 16 {
2150                    let template = 0b_01_101110_000_00000_0_0000_0_00000_00000_u32;
2151                    let rm_enc = machreg_to_vec(rm);
2152                    let rn_enc = machreg_to_vec(rn);
2153                    let rd_enc = machreg_to_vec(rd.to_reg());
2154                    sink.put4(
2155                        template | (rm_enc << 16) | ((imm4 as u32) << 11) | (rn_enc << 5) | rd_enc,
2156                    );
2157                } else {
2158                    panic!("aarch64: Inst::VecExtract: emit: invalid extract index {imm4}");
2159                }
2160            }
2161            &Inst::VecTbl { rd, rn, rm } => {
2162                sink.put4(enc_tbl(/* is_extension = */ false, 0b00, rd, rn, rm));
2163            }
2164            &Inst::VecTblExt { rd, ri, rn, rm } => {
2165                debug_assert_eq!(rd.to_reg(), ri);
2166                sink.put4(enc_tbl(/* is_extension = */ true, 0b00, rd, rn, rm));
2167            }
2168            &Inst::VecTbl2 { rd, rn, rn2, rm } => {
2169                assert_eq!(machreg_to_vec(rn2), (machreg_to_vec(rn) + 1) % 32);
2170                sink.put4(enc_tbl(/* is_extension = */ false, 0b01, rd, rn, rm));
2171            }
2172            &Inst::VecTbl2Ext {
2173                rd,
2174                ri,
2175                rn,
2176                rn2,
2177                rm,
2178            } => {
2179                debug_assert_eq!(rd.to_reg(), ri);
2180                assert_eq!(machreg_to_vec(rn2), (machreg_to_vec(rn) + 1) % 32);
2181                sink.put4(enc_tbl(/* is_extension = */ true, 0b01, rd, rn, rm));
2182            }
2183            &Inst::FpuCmp { size, rn, rm } => {
2184                sink.put4(enc_fcmp(size, rn, rm));
2185            }
2186            &Inst::FpuToInt { op, rd, rn } => {
2187                let top16 = match op {
2188                    // FCVTZS (32/32-bit)
2189                    FpuToIntOp::F32ToI32 => 0b000_11110_00_1_11_000,
2190                    // FCVTZU (32/32-bit)
2191                    FpuToIntOp::F32ToU32 => 0b000_11110_00_1_11_001,
2192                    // FCVTZS (32/64-bit)
2193                    FpuToIntOp::F32ToI64 => 0b100_11110_00_1_11_000,
2194                    // FCVTZU (32/64-bit)
2195                    FpuToIntOp::F32ToU64 => 0b100_11110_00_1_11_001,
2196                    // FCVTZS (64/32-bit)
2197                    FpuToIntOp::F64ToI32 => 0b000_11110_01_1_11_000,
2198                    // FCVTZU (64/32-bit)
2199                    FpuToIntOp::F64ToU32 => 0b000_11110_01_1_11_001,
2200                    // FCVTZS (64/64-bit)
2201                    FpuToIntOp::F64ToI64 => 0b100_11110_01_1_11_000,
2202                    // FCVTZU (64/64-bit)
2203                    FpuToIntOp::F64ToU64 => 0b100_11110_01_1_11_001,
2204                };
2205                sink.put4(enc_fputoint(top16, rd, rn));
2206            }
2207            &Inst::IntToFpu { op, rd, rn } => {
2208                let top16 = match op {
2209                    // SCVTF (32/32-bit)
2210                    IntToFpuOp::I32ToF32 => 0b000_11110_00_1_00_010,
2211                    // UCVTF (32/32-bit)
2212                    IntToFpuOp::U32ToF32 => 0b000_11110_00_1_00_011,
2213                    // SCVTF (64/32-bit)
2214                    IntToFpuOp::I64ToF32 => 0b100_11110_00_1_00_010,
2215                    // UCVTF (64/32-bit)
2216                    IntToFpuOp::U64ToF32 => 0b100_11110_00_1_00_011,
2217                    // SCVTF (32/64-bit)
2218                    IntToFpuOp::I32ToF64 => 0b000_11110_01_1_00_010,
2219                    // UCVTF (32/64-bit)
2220                    IntToFpuOp::U32ToF64 => 0b000_11110_01_1_00_011,
2221                    // SCVTF (64/64-bit)
2222                    IntToFpuOp::I64ToF64 => 0b100_11110_01_1_00_010,
2223                    // UCVTF (64/64-bit)
2224                    IntToFpuOp::U64ToF64 => 0b100_11110_01_1_00_011,
2225                };
2226                sink.put4(enc_inttofpu(top16, rd, rn));
2227            }
2228            &Inst::FpuCSel16 { rd, rn, rm, cond } => {
2229                sink.put4(enc_fcsel(rd, rn, rm, cond, ScalarSize::Size16));
2230            }
2231            &Inst::FpuCSel32 { rd, rn, rm, cond } => {
2232                sink.put4(enc_fcsel(rd, rn, rm, cond, ScalarSize::Size32));
2233            }
2234            &Inst::FpuCSel64 { rd, rn, rm, cond } => {
2235                sink.put4(enc_fcsel(rd, rn, rm, cond, ScalarSize::Size64));
2236            }
2237            &Inst::FpuRound { op, rd, rn } => {
2238                let top22 = match op {
2239                    FpuRoundMode::Minus32 => 0b000_11110_00_1_001_010_10000,
2240                    FpuRoundMode::Minus64 => 0b000_11110_01_1_001_010_10000,
2241                    FpuRoundMode::Plus32 => 0b000_11110_00_1_001_001_10000,
2242                    FpuRoundMode::Plus64 => 0b000_11110_01_1_001_001_10000,
2243                    FpuRoundMode::Zero32 => 0b000_11110_00_1_001_011_10000,
2244                    FpuRoundMode::Zero64 => 0b000_11110_01_1_001_011_10000,
2245                    FpuRoundMode::Nearest32 => 0b000_11110_00_1_001_000_10000,
2246                    FpuRoundMode::Nearest64 => 0b000_11110_01_1_001_000_10000,
2247                };
2248                sink.put4(enc_fround(top22, rd, rn));
2249            }
2250            &Inst::MovToFpu { rd, rn, size } => {
2251                let template = match size {
2252                    ScalarSize::Size16 => 0b000_11110_11_1_00_111_000000_00000_00000,
2253                    ScalarSize::Size32 => 0b000_11110_00_1_00_111_000000_00000_00000,
2254                    ScalarSize::Size64 => 0b100_11110_01_1_00_111_000000_00000_00000,
2255                    _ => unreachable!(),
2256                };
2257                sink.put4(template | (machreg_to_gpr(rn) << 5) | machreg_to_vec(rd.to_reg()));
2258            }
2259            &Inst::FpuMoveFPImm { rd, imm, size } => {
2260                sink.put4(
2261                    0b000_11110_00_1_00_000_000100_00000_00000
2262                        | size.ftype() << 22
2263                        | ((imm.enc_bits() as u32) << 13)
2264                        | machreg_to_vec(rd.to_reg()),
2265                );
2266            }
2267            &Inst::MovToVec {
2268                rd,
2269                ri,
2270                rn,
2271                idx,
2272                size,
2273            } => {
2274                debug_assert_eq!(rd.to_reg(), ri);
2275                let (imm5, shift) = match size.lane_size() {
2276                    ScalarSize::Size8 => (0b00001, 1),
2277                    ScalarSize::Size16 => (0b00010, 2),
2278                    ScalarSize::Size32 => (0b00100, 3),
2279                    ScalarSize::Size64 => (0b01000, 4),
2280                    _ => unreachable!(),
2281                };
2282                debug_assert_eq!(idx & (0b11111 >> shift), idx);
2283                let imm5 = imm5 | ((idx as u32) << shift);
2284                sink.put4(
2285                    0b010_01110000_00000_0_0011_1_00000_00000
2286                        | (imm5 << 16)
2287                        | (machreg_to_gpr(rn) << 5)
2288                        | machreg_to_vec(rd.to_reg()),
2289                );
2290            }
2291            &Inst::MovFromVec { rd, rn, idx, size } => {
2292                let (q, imm5, shift, mask) = match size {
2293                    ScalarSize::Size8 => (0b0, 0b00001, 1, 0b1111),
2294                    ScalarSize::Size16 => (0b0, 0b00010, 2, 0b0111),
2295                    ScalarSize::Size32 => (0b0, 0b00100, 3, 0b0011),
2296                    ScalarSize::Size64 => (0b1, 0b01000, 4, 0b0001),
2297                    _ => panic!("Unexpected scalar FP operand size: {size:?}"),
2298                };
2299                debug_assert_eq!(idx & mask, idx);
2300                let imm5 = imm5 | ((idx as u32) << shift);
2301                sink.put4(
2302                    0b000_01110000_00000_0_0111_1_00000_00000
2303                        | (q << 30)
2304                        | (imm5 << 16)
2305                        | (machreg_to_vec(rn) << 5)
2306                        | machreg_to_gpr(rd.to_reg()),
2307                );
2308            }
2309            &Inst::MovFromVecSigned {
2310                rd,
2311                rn,
2312                idx,
2313                size,
2314                scalar_size,
2315            } => {
2316                let (imm5, shift, half) = match size {
2317                    VectorSize::Size8x8 => (0b00001, 1, true),
2318                    VectorSize::Size8x16 => (0b00001, 1, false),
2319                    VectorSize::Size16x4 => (0b00010, 2, true),
2320                    VectorSize::Size16x8 => (0b00010, 2, false),
2321                    VectorSize::Size32x2 => {
2322                        debug_assert_ne!(scalar_size, OperandSize::Size32);
2323                        (0b00100, 3, true)
2324                    }
2325                    VectorSize::Size32x4 => {
2326                        debug_assert_ne!(scalar_size, OperandSize::Size32);
2327                        (0b00100, 3, false)
2328                    }
2329                    _ => panic!("Unexpected vector operand size"),
2330                };
2331                debug_assert_eq!(idx & (0b11111 >> (half as u32 + shift)), idx);
2332                let imm5 = imm5 | ((idx as u32) << shift);
2333                sink.put4(
2334                    0b000_01110000_00000_0_0101_1_00000_00000
2335                        | (scalar_size.is64() as u32) << 30
2336                        | (imm5 << 16)
2337                        | (machreg_to_vec(rn) << 5)
2338                        | machreg_to_gpr(rd.to_reg()),
2339                );
2340            }
2341            &Inst::VecDup { rd, rn, size } => {
2342                let q = size.is_128bits() as u32;
2343                let imm5 = match size.lane_size() {
2344                    ScalarSize::Size8 => 0b00001,
2345                    ScalarSize::Size16 => 0b00010,
2346                    ScalarSize::Size32 => 0b00100,
2347                    ScalarSize::Size64 => 0b01000,
2348                    _ => unreachable!(),
2349                };
2350                sink.put4(
2351                    0b0_0_0_01110000_00000_000011_00000_00000
2352                        | (q << 30)
2353                        | (imm5 << 16)
2354                        | (machreg_to_gpr(rn) << 5)
2355                        | machreg_to_vec(rd.to_reg()),
2356                );
2357            }
2358            &Inst::VecDupFromFpu { rd, rn, size, lane } => {
2359                let q = size.is_128bits() as u32;
2360                let imm5 = match size.lane_size() {
2361                    ScalarSize::Size8 => {
2362                        assert!(lane < 16);
2363                        0b00001 | (u32::from(lane) << 1)
2364                    }
2365                    ScalarSize::Size16 => {
2366                        assert!(lane < 8);
2367                        0b00010 | (u32::from(lane) << 2)
2368                    }
2369                    ScalarSize::Size32 => {
2370                        assert!(lane < 4);
2371                        0b00100 | (u32::from(lane) << 3)
2372                    }
2373                    ScalarSize::Size64 => {
2374                        assert!(lane < 2);
2375                        0b01000 | (u32::from(lane) << 4)
2376                    }
2377                    _ => unimplemented!(),
2378                };
2379                sink.put4(
2380                    0b000_01110000_00000_000001_00000_00000
2381                        | (q << 30)
2382                        | (imm5 << 16)
2383                        | (machreg_to_vec(rn) << 5)
2384                        | machreg_to_vec(rd.to_reg()),
2385                );
2386            }
2387            &Inst::VecDupFPImm { rd, imm, size } => {
2388                let imm = imm.enc_bits();
2389                let op = match size.lane_size() {
2390                    ScalarSize::Size32 => 0,
2391                    ScalarSize::Size64 => 1,
2392                    _ => unimplemented!(),
2393                };
2394                let q_op = op | ((size.is_128bits() as u32) << 1);
2395
2396                sink.put4(enc_asimd_mod_imm(rd, q_op, 0b1111, imm));
2397            }
2398            &Inst::VecDupImm {
2399                rd,
2400                imm,
2401                invert,
2402                size,
2403            } => {
2404                let (imm, shift, shift_ones) = imm.value();
2405                let (op, cmode) = match size.lane_size() {
2406                    ScalarSize::Size8 => {
2407                        assert!(!invert);
2408                        assert_eq!(shift, 0);
2409
2410                        (0, 0b1110)
2411                    }
2412                    ScalarSize::Size16 => {
2413                        let s = shift & 8;
2414
2415                        assert!(!shift_ones);
2416                        assert_eq!(s, shift);
2417
2418                        (invert as u32, 0b1000 | (s >> 2))
2419                    }
2420                    ScalarSize::Size32 => {
2421                        if shift_ones {
2422                            assert!(shift == 8 || shift == 16);
2423
2424                            (invert as u32, 0b1100 | (shift >> 4))
2425                        } else {
2426                            let s = shift & 24;
2427
2428                            assert_eq!(s, shift);
2429
2430                            (invert as u32, 0b0000 | (s >> 2))
2431                        }
2432                    }
2433                    ScalarSize::Size64 => {
2434                        assert!(!invert);
2435                        assert_eq!(shift, 0);
2436
2437                        (1, 0b1110)
2438                    }
2439                    _ => unreachable!(),
2440                };
2441                let q_op = op | ((size.is_128bits() as u32) << 1);
2442
2443                sink.put4(enc_asimd_mod_imm(rd, q_op, cmode, imm));
2444            }
2445            &Inst::VecExtend {
2446                t,
2447                rd,
2448                rn,
2449                high_half,
2450                lane_size,
2451            } => {
2452                let immh = match lane_size {
2453                    ScalarSize::Size16 => 0b001,
2454                    ScalarSize::Size32 => 0b010,
2455                    ScalarSize::Size64 => 0b100,
2456                    _ => panic!("Unexpected VecExtend to lane size of {lane_size:?}"),
2457                };
2458                let u = match t {
2459                    VecExtendOp::Sxtl => 0b0,
2460                    VecExtendOp::Uxtl => 0b1,
2461                };
2462                sink.put4(
2463                    0b000_011110_0000_000_101001_00000_00000
2464                        | ((high_half as u32) << 30)
2465                        | (u << 29)
2466                        | (immh << 19)
2467                        | (machreg_to_vec(rn) << 5)
2468                        | machreg_to_vec(rd.to_reg()),
2469                );
2470            }
2471            &Inst::VecRRLong {
2472                op,
2473                rd,
2474                rn,
2475                high_half,
2476            } => {
2477                let (u, size, bits_12_16) = match op {
2478                    VecRRLongOp::Fcvtl16 => (0b0, 0b00, 0b10111),
2479                    VecRRLongOp::Fcvtl32 => (0b0, 0b01, 0b10111),
2480                    VecRRLongOp::Shll8 => (0b1, 0b00, 0b10011),
2481                    VecRRLongOp::Shll16 => (0b1, 0b01, 0b10011),
2482                    VecRRLongOp::Shll32 => (0b1, 0b10, 0b10011),
2483                };
2484
2485                sink.put4(enc_vec_rr_misc(
2486                    ((high_half as u32) << 1) | u,
2487                    size,
2488                    bits_12_16,
2489                    rd,
2490                    rn,
2491                ));
2492            }
2493            &Inst::VecRRNarrowLow {
2494                op,
2495                rd,
2496                rn,
2497                lane_size,
2498            }
2499            | &Inst::VecRRNarrowHigh {
2500                op,
2501                rd,
2502                rn,
2503                lane_size,
2504                ..
2505            } => {
2506                let high_half = match self {
2507                    &Inst::VecRRNarrowLow { .. } => false,
2508                    &Inst::VecRRNarrowHigh { .. } => true,
2509                    _ => unreachable!(),
2510                };
2511
2512                let size = match lane_size {
2513                    ScalarSize::Size8 => 0b00,
2514                    ScalarSize::Size16 => 0b01,
2515                    ScalarSize::Size32 => 0b10,
2516                    _ => panic!("unsupported size: {lane_size:?}"),
2517                };
2518
2519                // Floats use a single bit, to encode either half or single.
2520                let size = match op {
2521                    VecRRNarrowOp::Fcvtn => size >> 1,
2522                    _ => size,
2523                };
2524
2525                let (u, bits_12_16) = match op {
2526                    VecRRNarrowOp::Xtn => (0b0, 0b10010),
2527                    VecRRNarrowOp::Sqxtn => (0b0, 0b10100),
2528                    VecRRNarrowOp::Sqxtun => (0b1, 0b10010),
2529                    VecRRNarrowOp::Uqxtn => (0b1, 0b10100),
2530                    VecRRNarrowOp::Fcvtn => (0b0, 0b10110),
2531                };
2532
2533                sink.put4(enc_vec_rr_misc(
2534                    ((high_half as u32) << 1) | u,
2535                    size,
2536                    bits_12_16,
2537                    rd,
2538                    rn,
2539                ));
2540            }
2541            &Inst::VecMovElement {
2542                rd,
2543                ri,
2544                rn,
2545                dest_idx,
2546                src_idx,
2547                size,
2548            } => {
2549                debug_assert_eq!(rd.to_reg(), ri);
2550                let (imm5, shift) = match size.lane_size() {
2551                    ScalarSize::Size8 => (0b00001, 1),
2552                    ScalarSize::Size16 => (0b00010, 2),
2553                    ScalarSize::Size32 => (0b00100, 3),
2554                    ScalarSize::Size64 => (0b01000, 4),
2555                    _ => unreachable!(),
2556                };
2557                let mask = 0b11111 >> shift;
2558                debug_assert_eq!(dest_idx & mask, dest_idx);
2559                debug_assert_eq!(src_idx & mask, src_idx);
2560                let imm4 = (src_idx as u32) << (shift - 1);
2561                let imm5 = imm5 | ((dest_idx as u32) << shift);
2562                sink.put4(
2563                    0b011_01110000_00000_0_0000_1_00000_00000
2564                        | (imm5 << 16)
2565                        | (imm4 << 11)
2566                        | (machreg_to_vec(rn) << 5)
2567                        | machreg_to_vec(rd.to_reg()),
2568                );
2569            }
2570            &Inst::VecRRPair { op, rd, rn } => {
2571                let bits_12_16 = match op {
2572                    VecPairOp::Addp => 0b11011,
2573                };
2574
2575                sink.put4(enc_vec_rr_pair(bits_12_16, rd, rn));
2576            }
2577            &Inst::VecRRRLong {
2578                rd,
2579                rn,
2580                rm,
2581                alu_op,
2582                high_half,
2583            } => {
2584                let (u, size, bit14) = match alu_op {
2585                    VecRRRLongOp::Smull8 => (0b0, 0b00, 0b1),
2586                    VecRRRLongOp::Smull16 => (0b0, 0b01, 0b1),
2587                    VecRRRLongOp::Smull32 => (0b0, 0b10, 0b1),
2588                    VecRRRLongOp::Umull8 => (0b1, 0b00, 0b1),
2589                    VecRRRLongOp::Umull16 => (0b1, 0b01, 0b1),
2590                    VecRRRLongOp::Umull32 => (0b1, 0b10, 0b1),
2591                };
2592                sink.put4(enc_vec_rrr_long(
2593                    high_half as u32,
2594                    u,
2595                    size,
2596                    bit14,
2597                    rm,
2598                    rn,
2599                    rd,
2600                ));
2601            }
2602            &Inst::VecRRRLongMod {
2603                rd,
2604                ri,
2605                rn,
2606                rm,
2607                alu_op,
2608                high_half,
2609            } => {
2610                debug_assert_eq!(rd.to_reg(), ri);
2611                let (u, size, bit14) = match alu_op {
2612                    VecRRRLongModOp::Umlal8 => (0b1, 0b00, 0b0),
2613                    VecRRRLongModOp::Umlal16 => (0b1, 0b01, 0b0),
2614                    VecRRRLongModOp::Umlal32 => (0b1, 0b10, 0b0),
2615                };
2616                sink.put4(enc_vec_rrr_long(
2617                    high_half as u32,
2618                    u,
2619                    size,
2620                    bit14,
2621                    rm,
2622                    rn,
2623                    rd,
2624                ));
2625            }
2626            &Inst::VecRRPairLong { op, rd, rn } => {
2627                let (u, size) = match op {
2628                    VecRRPairLongOp::Saddlp8 => (0b0, 0b0),
2629                    VecRRPairLongOp::Uaddlp8 => (0b1, 0b0),
2630                    VecRRPairLongOp::Saddlp16 => (0b0, 0b1),
2631                    VecRRPairLongOp::Uaddlp16 => (0b1, 0b1),
2632                };
2633
2634                sink.put4(enc_vec_rr_pair_long(u, size, rd, rn));
2635            }
2636            &Inst::VecRRR {
2637                rd,
2638                rn,
2639                rm,
2640                alu_op,
2641                size,
2642            } => {
2643                let (q, enc_size) = size.enc_size();
2644                let is_float = match alu_op {
2645                    VecALUOp::Fcmeq
2646                    | VecALUOp::Fcmgt
2647                    | VecALUOp::Fcmge
2648                    | VecALUOp::Fadd
2649                    | VecALUOp::Fsub
2650                    | VecALUOp::Fdiv
2651                    | VecALUOp::Fmax
2652                    | VecALUOp::Fmin
2653                    | VecALUOp::Fmul => true,
2654                    _ => false,
2655                };
2656
2657                let (top11, bit15_10) = match alu_op {
2658                    VecALUOp::Sqadd => (0b000_01110_00_1 | enc_size << 1, 0b000011),
2659                    VecALUOp::Sqsub => (0b000_01110_00_1 | enc_size << 1, 0b001011),
2660                    VecALUOp::Uqadd => (0b001_01110_00_1 | enc_size << 1, 0b000011),
2661                    VecALUOp::Uqsub => (0b001_01110_00_1 | enc_size << 1, 0b001011),
2662                    VecALUOp::Cmeq => (0b001_01110_00_1 | enc_size << 1, 0b100011),
2663                    VecALUOp::Cmge => (0b000_01110_00_1 | enc_size << 1, 0b001111),
2664                    VecALUOp::Cmgt => (0b000_01110_00_1 | enc_size << 1, 0b001101),
2665                    VecALUOp::Cmhi => (0b001_01110_00_1 | enc_size << 1, 0b001101),
2666                    VecALUOp::Cmhs => (0b001_01110_00_1 | enc_size << 1, 0b001111),
2667                    VecALUOp::Fcmeq => (0b000_01110_00_1, 0b111001),
2668                    VecALUOp::Fcmgt => (0b001_01110_10_1, 0b111001),
2669                    VecALUOp::Fcmge => (0b001_01110_00_1, 0b111001),
2670                    // The following logical instructions operate on bytes, so are not encoded differently
2671                    // for the different vector types.
2672                    VecALUOp::And => (0b000_01110_00_1, 0b000111),
2673                    VecALUOp::Bic => (0b000_01110_01_1, 0b000111),
2674                    VecALUOp::Orr => (0b000_01110_10_1, 0b000111),
2675                    VecALUOp::Eor => (0b001_01110_00_1, 0b000111),
2676                    VecALUOp::Umaxp => {
2677                        debug_assert_ne!(size, VectorSize::Size64x2);
2678
2679                        (0b001_01110_00_1 | enc_size << 1, 0b101001)
2680                    }
2681                    VecALUOp::Add => (0b000_01110_00_1 | enc_size << 1, 0b100001),
2682                    VecALUOp::Sub => (0b001_01110_00_1 | enc_size << 1, 0b100001),
2683                    VecALUOp::Mul => {
2684                        debug_assert_ne!(size, VectorSize::Size64x2);
2685                        (0b000_01110_00_1 | enc_size << 1, 0b100111)
2686                    }
2687                    VecALUOp::Sshl => (0b000_01110_00_1 | enc_size << 1, 0b010001),
2688                    VecALUOp::Ushl => (0b001_01110_00_1 | enc_size << 1, 0b010001),
2689                    VecALUOp::Umin => {
2690                        debug_assert_ne!(size, VectorSize::Size64x2);
2691
2692                        (0b001_01110_00_1 | enc_size << 1, 0b011011)
2693                    }
2694                    VecALUOp::Smin => {
2695                        debug_assert_ne!(size, VectorSize::Size64x2);
2696
2697                        (0b000_01110_00_1 | enc_size << 1, 0b011011)
2698                    }
2699                    VecALUOp::Umax => {
2700                        debug_assert_ne!(size, VectorSize::Size64x2);
2701
2702                        (0b001_01110_00_1 | enc_size << 1, 0b011001)
2703                    }
2704                    VecALUOp::Smax => {
2705                        debug_assert_ne!(size, VectorSize::Size64x2);
2706
2707                        (0b000_01110_00_1 | enc_size << 1, 0b011001)
2708                    }
2709                    VecALUOp::Urhadd => {
2710                        debug_assert_ne!(size, VectorSize::Size64x2);
2711
2712                        (0b001_01110_00_1 | enc_size << 1, 0b000101)
2713                    }
2714                    VecALUOp::Fadd => (0b000_01110_00_1, 0b110101),
2715                    VecALUOp::Fsub => (0b000_01110_10_1, 0b110101),
2716                    VecALUOp::Fdiv => (0b001_01110_00_1, 0b111111),
2717                    VecALUOp::Fmax => (0b000_01110_00_1, 0b111101),
2718                    VecALUOp::Fmin => (0b000_01110_10_1, 0b111101),
2719                    VecALUOp::Fmul => (0b001_01110_00_1, 0b110111),
2720                    VecALUOp::Addp => (0b000_01110_00_1 | enc_size << 1, 0b101111),
2721                    VecALUOp::Zip1 => (0b01001110_00_0 | enc_size << 1, 0b001110),
2722                    VecALUOp::Zip2 => (0b01001110_00_0 | enc_size << 1, 0b011110),
2723                    VecALUOp::Sqrdmulh => {
2724                        debug_assert!(
2725                            size.lane_size() == ScalarSize::Size16
2726                                || size.lane_size() == ScalarSize::Size32
2727                        );
2728
2729                        (0b001_01110_00_1 | enc_size << 1, 0b101101)
2730                    }
2731                    VecALUOp::Uzp1 => (0b01001110_00_0 | enc_size << 1, 0b000110),
2732                    VecALUOp::Uzp2 => (0b01001110_00_0 | enc_size << 1, 0b010110),
2733                    VecALUOp::Trn1 => (0b01001110_00_0 | enc_size << 1, 0b001010),
2734                    VecALUOp::Trn2 => (0b01001110_00_0 | enc_size << 1, 0b011010),
2735                };
2736                let top11 = if is_float {
2737                    top11 | size.enc_float_size() << 1
2738                } else {
2739                    top11
2740                };
2741                sink.put4(enc_vec_rrr(top11 | q << 9, rm, bit15_10, rn, rd));
2742            }
2743            &Inst::VecRRRMod {
2744                rd,
2745                ri,
2746                rn,
2747                rm,
2748                alu_op,
2749                size,
2750            } => {
2751                debug_assert_eq!(rd.to_reg(), ri);
2752                let (q, _enc_size) = size.enc_size();
2753
2754                let (top11, bit15_10) = match alu_op {
2755                    VecALUModOp::Bsl => (0b001_01110_01_1, 0b000111),
2756                    VecALUModOp::Fmla => {
2757                        (0b000_01110_00_1 | (size.enc_float_size() << 1), 0b110011)
2758                    }
2759                    VecALUModOp::Fmls => {
2760                        (0b000_01110_10_1 | (size.enc_float_size() << 1), 0b110011)
2761                    }
2762                };
2763                sink.put4(enc_vec_rrr(top11 | q << 9, rm, bit15_10, rn, rd));
2764            }
2765            &Inst::VecFmlaElem {
2766                rd,
2767                ri,
2768                rn,
2769                rm,
2770                alu_op,
2771                size,
2772                idx,
2773            } => {
2774                debug_assert_eq!(rd.to_reg(), ri);
2775                let idx = u32::from(idx);
2776
2777                let (q, _size) = size.enc_size();
2778                let o2 = match alu_op {
2779                    VecALUModOp::Fmla => 0b0,
2780                    VecALUModOp::Fmls => 0b1,
2781                    _ => unreachable!(),
2782                };
2783
2784                let (h, l) = match size {
2785                    VectorSize::Size32x4 => {
2786                        assert!(idx < 4);
2787                        (idx >> 1, idx & 1)
2788                    }
2789                    VectorSize::Size64x2 => {
2790                        assert!(idx < 2);
2791                        (idx, 0)
2792                    }
2793                    _ => unreachable!(),
2794                };
2795
2796                let top11 = 0b000_011111_00 | (q << 9) | (size.enc_float_size() << 1) | l;
2797                let bit15_10 = 0b000100 | (o2 << 4) | (h << 1);
2798                sink.put4(enc_vec_rrr(top11, rm, bit15_10, rn, rd));
2799            }
2800            &Inst::VecLoadReplicate {
2801                rd,
2802                rn,
2803                size,
2804                flags,
2805            } => {
2806                let (q, size) = size.enc_size();
2807
2808                if let Some(trap_code) = flags.trap_code() {
2809                    // Register the offset at which the actual load instruction starts.
2810                    sink.add_trap(trap_code);
2811                }
2812
2813                sink.put4(enc_ldst_vec(q, size, rn, rd));
2814            }
2815            &Inst::VecCSel { rd, rn, rm, cond } => {
2816                /* Emit this:
2817                      b.cond  else
2818                      mov     rd, rm
2819                      b       out
2820                     else:
2821                      mov     rd, rn
2822                     out:
2823
2824                   Note, we could do better in the cases where rd == rn or rd == rm.
2825                */
2826                let else_label = sink.get_label();
2827                let out_label = sink.get_label();
2828
2829                // b.cond else
2830                let br_else_offset = sink.cur_offset();
2831                sink.put4(enc_conditional_br(
2832                    BranchTarget::Label(else_label),
2833                    CondBrKind::Cond(cond),
2834                ));
2835                sink.use_label_at_offset(br_else_offset, else_label, LabelUse::Branch19);
2836
2837                // mov rd, rm
2838                sink.put4(enc_vecmov(/* 16b = */ true, rd, rm));
2839
2840                // b out
2841                let b_out_offset = sink.cur_offset();
2842                sink.use_label_at_offset(b_out_offset, out_label, LabelUse::Branch26);
2843                sink.add_uncond_branch(b_out_offset, b_out_offset + 4, out_label);
2844                sink.put4(enc_jump26(0b000101, 0 /* will be fixed up later */));
2845
2846                // else:
2847                sink.bind_label(else_label, &mut state.ctrl_plane);
2848
2849                // mov rd, rn
2850                sink.put4(enc_vecmov(/* 16b = */ true, rd, rn));
2851
2852                // out:
2853                sink.bind_label(out_label, &mut state.ctrl_plane);
2854            }
2855            &Inst::MovToNZCV { rn } => {
2856                sink.put4(0xd51b4200 | machreg_to_gpr(rn));
2857            }
2858            &Inst::MovFromNZCV { rd } => {
2859                sink.put4(0xd53b4200 | machreg_to_gpr(rd.to_reg()));
2860            }
2861            &Inst::Extend {
2862                rd,
2863                rn,
2864                signed: false,
2865                from_bits: 1,
2866                to_bits,
2867            } => {
2868                assert!(to_bits <= 64);
2869                // Reduce zero-extend-from-1-bit to:
2870                // - and rd, rn, #1
2871                // Note: This is special cased as UBFX may take more cycles
2872                // than AND on smaller cores.
2873                let imml = ImmLogic::maybe_from_u64(1, I32).unwrap();
2874                Inst::AluRRImmLogic {
2875                    alu_op: ALUOp::And,
2876                    size: OperandSize::Size32,
2877                    rd,
2878                    rn,
2879                    imml,
2880                }
2881                .emit(sink, emit_info, state);
2882            }
2883            &Inst::Extend {
2884                rd,
2885                rn,
2886                signed: false,
2887                from_bits: 32,
2888                to_bits: 64,
2889            } => {
2890                let mov = Inst::Mov {
2891                    size: OperandSize::Size32,
2892                    rd,
2893                    rm: rn,
2894                };
2895                mov.emit(sink, emit_info, state);
2896            }
2897            &Inst::Extend {
2898                rd,
2899                rn,
2900                signed,
2901                from_bits,
2902                to_bits,
2903            } => {
2904                let (opc, size) = if signed {
2905                    (0b00, OperandSize::from_bits(to_bits))
2906                } else {
2907                    (0b10, OperandSize::Size32)
2908                };
2909                sink.put4(enc_bfm(opc, size, rd, rn, 0, from_bits - 1));
2910            }
2911            &Inst::Jump { ref dest } => {
2912                let off = sink.cur_offset();
2913                // Indicate that the jump uses a label, if so, so that a fixup can occur later.
2914                if let Some(l) = dest.as_label() {
2915                    sink.use_label_at_offset(off, l, LabelUse::Branch26);
2916                    sink.add_uncond_branch(off, off + 4, l);
2917                }
2918                // Emit the jump itself.
2919                sink.put4(enc_jump26(0b000101, dest.as_offset26_or_zero()));
2920            }
2921            &Inst::Args { .. } | &Inst::Rets { .. } => {
2922                // Nothing: this is a pseudoinstruction that serves
2923                // only to constrain registers at a certain point.
2924            }
2925            &Inst::Ret {} => {
2926                sink.put4(0xd65f03c0);
2927            }
2928            &Inst::AuthenticatedRet { key, is_hint } => {
2929                let (op2, is_hint) = match key {
2930                    APIKey::AZ => (0b100, true),
2931                    APIKey::ASP => (0b101, is_hint),
2932                    APIKey::BZ => (0b110, true),
2933                    APIKey::BSP => (0b111, is_hint),
2934                };
2935
2936                if is_hint {
2937                    sink.put4(key.enc_auti_hint());
2938                    Inst::Ret {}.emit(sink, emit_info, state);
2939                } else {
2940                    sink.put4(0xd65f0bff | (op2 << 9)); // reta{key}
2941                }
2942            }
2943            &Inst::Call { ref info } => {
2944                let user_stack_map = state.take_stack_map();
2945                sink.add_reloc(Reloc::Arm64Call, &info.dest, 0);
2946                sink.put4(enc_jump26(0b100101, 0));
2947                if let Some(s) = user_stack_map {
2948                    let offset = sink.cur_offset();
2949                    sink.push_user_stack_map(state, offset, s);
2950                }
2951
2952                if let Some(try_call) = info.try_call_info.as_ref() {
2953                    sink.add_call_site(&try_call.exception_dests);
2954                } else {
2955                    sink.add_call_site(&[]);
2956                }
2957
2958                if info.callee_pop_size > 0 {
2959                    let callee_pop_size =
2960                        i32::try_from(info.callee_pop_size).expect("callee popped more than 2GB");
2961                    for inst in AArch64MachineDeps::gen_sp_reg_adjust(-callee_pop_size) {
2962                        inst.emit(sink, emit_info, state);
2963                    }
2964                }
2965
2966                // Load any stack-carried return values.
2967                info.emit_retval_loads::<AArch64MachineDeps, _, _>(
2968                    state.frame_layout().stackslots_size,
2969                    |inst| inst.emit(sink, emit_info, state),
2970                    |needed_space| Some(Inst::EmitIsland { needed_space }),
2971                );
2972
2973                // If this is a try-call, jump to the continuation
2974                // (normal-return) block.
2975                if let Some(try_call) = info.try_call_info.as_ref() {
2976                    let jmp = Inst::Jump {
2977                        dest: BranchTarget::Label(try_call.continuation),
2978                    };
2979                    jmp.emit(sink, emit_info, state);
2980                }
2981
2982                // We produce an island above if needed, so disable
2983                // the worst-case-size check in this case.
2984                start_off = sink.cur_offset();
2985            }
2986            &Inst::CallInd { ref info } => {
2987                let user_stack_map = state.take_stack_map();
2988                sink.put4(
2989                    0b1101011_0001_11111_000000_00000_00000 | (machreg_to_gpr(info.dest) << 5),
2990                );
2991                if let Some(s) = user_stack_map {
2992                    let offset = sink.cur_offset();
2993                    sink.push_user_stack_map(state, offset, s);
2994                }
2995
2996                if let Some(try_call) = info.try_call_info.as_ref() {
2997                    sink.add_call_site(&try_call.exception_dests);
2998                } else {
2999                    sink.add_call_site(&[]);
3000                }
3001
3002                if info.callee_pop_size > 0 {
3003                    let callee_pop_size =
3004                        i32::try_from(info.callee_pop_size).expect("callee popped more than 2GB");
3005                    for inst in AArch64MachineDeps::gen_sp_reg_adjust(-callee_pop_size) {
3006                        inst.emit(sink, emit_info, state);
3007                    }
3008                }
3009
3010                // Load any stack-carried return values.
3011                info.emit_retval_loads::<AArch64MachineDeps, _, _>(
3012                    state.frame_layout().stackslots_size,
3013                    |inst| inst.emit(sink, emit_info, state),
3014                    |needed_space| Some(Inst::EmitIsland { needed_space }),
3015                );
3016
3017                // If this is a try-call, jump to the continuation
3018                // (normal-return) block.
3019                if let Some(try_call) = info.try_call_info.as_ref() {
3020                    let jmp = Inst::Jump {
3021                        dest: BranchTarget::Label(try_call.continuation),
3022                    };
3023                    jmp.emit(sink, emit_info, state);
3024                }
3025
3026                // We produce an island above if needed, so disable
3027                // the worst-case-size check in this case.
3028                start_off = sink.cur_offset();
3029            }
3030            &Inst::ReturnCall { ref info } => {
3031                emit_return_call_common_sequence(sink, emit_info, state, info);
3032
3033                // Note: this is not `Inst::Jump { .. }.emit(..)` because we
3034                // have different metadata in this case: we don't have a label
3035                // for the target, but rather a function relocation.
3036                sink.add_reloc(Reloc::Arm64Call, &info.dest, 0);
3037                sink.put4(enc_jump26(0b000101, 0));
3038                sink.add_call_site(&[]);
3039
3040                // `emit_return_call_common_sequence` emits an island if
3041                // necessary, so we can safely disable the worst-case-size check
3042                // in this case.
3043                start_off = sink.cur_offset();
3044            }
3045            &Inst::ReturnCallInd { ref info } => {
3046                emit_return_call_common_sequence(sink, emit_info, state, info);
3047
3048                Inst::IndirectBr {
3049                    rn: info.dest,
3050                    targets: vec![],
3051                }
3052                .emit(sink, emit_info, state);
3053                sink.add_call_site(&[]);
3054
3055                // `emit_return_call_common_sequence` emits an island if
3056                // necessary, so we can safely disable the worst-case-size check
3057                // in this case.
3058                start_off = sink.cur_offset();
3059            }
3060            &Inst::CondBr {
3061                taken,
3062                not_taken,
3063                kind,
3064            } => {
3065                // Conditional part first.
3066                let cond_off = sink.cur_offset();
3067                if let Some(l) = taken.as_label() {
3068                    sink.use_label_at_offset(cond_off, l, LabelUse::Branch19);
3069                    let inverted = enc_conditional_br(taken, kind.invert()).to_le_bytes();
3070                    sink.add_cond_branch(cond_off, cond_off + 4, l, &inverted[..]);
3071                }
3072                sink.put4(enc_conditional_br(taken, kind));
3073
3074                // Unconditional part next.
3075                let uncond_off = sink.cur_offset();
3076                if let Some(l) = not_taken.as_label() {
3077                    sink.use_label_at_offset(uncond_off, l, LabelUse::Branch26);
3078                    sink.add_uncond_branch(uncond_off, uncond_off + 4, l);
3079                }
3080                sink.put4(enc_jump26(0b000101, not_taken.as_offset26_or_zero()));
3081            }
3082            &Inst::TestBitAndBranch {
3083                taken,
3084                not_taken,
3085                kind,
3086                rn,
3087                bit,
3088            } => {
3089                // Emit the conditional branch first
3090                let cond_off = sink.cur_offset();
3091                if let Some(l) = taken.as_label() {
3092                    sink.use_label_at_offset(cond_off, l, LabelUse::Branch14);
3093                    let inverted =
3094                        enc_test_bit_and_branch(kind.complement(), taken, rn, bit).to_le_bytes();
3095                    sink.add_cond_branch(cond_off, cond_off + 4, l, &inverted[..]);
3096                }
3097                sink.put4(enc_test_bit_and_branch(kind, taken, rn, bit));
3098
3099                // Unconditional part next.
3100                let uncond_off = sink.cur_offset();
3101                if let Some(l) = not_taken.as_label() {
3102                    sink.use_label_at_offset(uncond_off, l, LabelUse::Branch26);
3103                    sink.add_uncond_branch(uncond_off, uncond_off + 4, l);
3104                }
3105                sink.put4(enc_jump26(0b000101, not_taken.as_offset26_or_zero()));
3106            }
3107            &Inst::TrapIf { kind, trap_code } => {
3108                let label = sink.defer_trap(trap_code);
3109                // condbr KIND, LABEL
3110                let off = sink.cur_offset();
3111                sink.put4(enc_conditional_br(BranchTarget::Label(label), kind));
3112                sink.use_label_at_offset(off, label, LabelUse::Branch19);
3113            }
3114            &Inst::IndirectBr { rn, .. } => {
3115                sink.put4(enc_br(rn));
3116            }
3117            &Inst::Nop0 => {}
3118            &Inst::Nop4 => {
3119                sink.put4(0xd503201f);
3120            }
3121            &Inst::Brk => {
3122                sink.put4(0xd43e0000);
3123            }
3124            &Inst::Udf { trap_code } => {
3125                sink.add_trap(trap_code);
3126                sink.put_data(Inst::TRAP_OPCODE);
3127            }
3128            &Inst::Adr { rd, off } => {
3129                assert!(off > -(1 << 20));
3130                assert!(off < (1 << 20));
3131                sink.put4(enc_adr(off, rd));
3132            }
3133            &Inst::Adrp { rd, off } => {
3134                assert!(off > -(1 << 20));
3135                assert!(off < (1 << 20));
3136                sink.put4(enc_adrp(off, rd));
3137            }
3138            &Inst::Word4 { data } => {
3139                sink.put4(data);
3140            }
3141            &Inst::Word8 { data } => {
3142                sink.put8(data);
3143            }
3144            &Inst::JTSequence {
3145                ridx,
3146                rtmp1,
3147                rtmp2,
3148                default,
3149                ref targets,
3150                ..
3151            } => {
3152                // This sequence is *one* instruction in the vcode, and is expanded only here at
3153                // emission time, because we cannot allow the regalloc to insert spills/reloads in
3154                // the middle; we depend on hardcoded PC-rel addressing below.
3155
3156                // Branch to default when condition code from prior comparison indicates.
3157                let br =
3158                    enc_conditional_br(BranchTarget::Label(default), CondBrKind::Cond(Cond::Hs));
3159
3160                // No need to inform the sink's branch folding logic about this branch, because it
3161                // will not be merged with any other branch, flipped, or elided (it is not preceded
3162                // or succeeded by any other branch). Just emit it with the label use.
3163                let default_br_offset = sink.cur_offset();
3164                sink.use_label_at_offset(default_br_offset, default, LabelUse::Branch19);
3165                sink.put4(br);
3166
3167                // Overwrite the index with a zero when the above
3168                // branch misspeculates (Spectre mitigation). Save the
3169                // resulting index in rtmp2.
3170                let inst = Inst::CSel {
3171                    rd: rtmp2,
3172                    cond: Cond::Hs,
3173                    rn: zero_reg(),
3174                    rm: ridx,
3175                };
3176                inst.emit(sink, emit_info, state);
3177                // Prevent any data value speculation.
3178                Inst::Csdb.emit(sink, emit_info, state);
3179
3180                // Load address of jump table
3181                let inst = Inst::Adr { rd: rtmp1, off: 16 };
3182                inst.emit(sink, emit_info, state);
3183                // Load value out of jump table
3184                let inst = Inst::SLoad32 {
3185                    rd: rtmp2,
3186                    mem: AMode::reg_plus_reg_scaled_extended(
3187                        rtmp1.to_reg(),
3188                        rtmp2.to_reg(),
3189                        ExtendOp::UXTW,
3190                    ),
3191                    flags: MemFlags::trusted(),
3192                };
3193                inst.emit(sink, emit_info, state);
3194                // Add base of jump table to jump-table-sourced block offset
3195                let inst = Inst::AluRRR {
3196                    alu_op: ALUOp::Add,
3197                    size: OperandSize::Size64,
3198                    rd: rtmp1,
3199                    rn: rtmp1.to_reg(),
3200                    rm: rtmp2.to_reg(),
3201                };
3202                inst.emit(sink, emit_info, state);
3203                // Branch to computed address. (`targets` here is only used for successor queries
3204                // and is not needed for emission.)
3205                let inst = Inst::IndirectBr {
3206                    rn: rtmp1.to_reg(),
3207                    targets: vec![],
3208                };
3209                inst.emit(sink, emit_info, state);
3210                // Emit jump table (table of 32-bit offsets).
3211                let jt_off = sink.cur_offset();
3212                for &target in targets.iter() {
3213                    let word_off = sink.cur_offset();
3214                    // off_into_table is an addend here embedded in the label to be later patched
3215                    // at the end of codegen. The offset is initially relative to this jump table
3216                    // entry; with the extra addend, it'll be relative to the jump table's start,
3217                    // after patching.
3218                    let off_into_table = word_off - jt_off;
3219                    sink.use_label_at_offset(word_off, target, LabelUse::PCRel32);
3220                    sink.put4(off_into_table);
3221                }
3222
3223                // Lowering produces an EmitIsland before using a JTSequence, so we can safely
3224                // disable the worst-case-size check in this case.
3225                start_off = sink.cur_offset();
3226            }
3227            &Inst::LoadExtName {
3228                rd,
3229                ref name,
3230                offset,
3231            } => {
3232                if emit_info.0.is_pic() {
3233                    // See this CE Example for the variations of this with and without BTI & PAUTH
3234                    // https://godbolt.org/z/ncqjbbvvn
3235                    //
3236                    // Emit the following code:
3237                    //   adrp    rd, :got:X
3238                    //   ldr     rd, [rd, :got_lo12:X]
3239
3240                    // adrp rd, symbol
3241                    sink.add_reloc(Reloc::Aarch64AdrGotPage21, &**name, 0);
3242                    let inst = Inst::Adrp { rd, off: 0 };
3243                    inst.emit(sink, emit_info, state);
3244
3245                    // ldr rd, [rd, :got_lo12:X]
3246                    sink.add_reloc(Reloc::Aarch64Ld64GotLo12Nc, &**name, 0);
3247                    let inst = Inst::ULoad64 {
3248                        rd,
3249                        mem: AMode::reg(rd.to_reg()),
3250                        flags: MemFlags::trusted(),
3251                    };
3252                    inst.emit(sink, emit_info, state);
3253                } else {
3254                    // With absolute offsets we set up a load from a preallocated space, and then jump
3255                    // over it.
3256                    //
3257                    // Emit the following code:
3258                    //   ldr     rd, #8
3259                    //   b       #0x10
3260                    //   <8 byte space>
3261
3262                    let inst = Inst::ULoad64 {
3263                        rd,
3264                        mem: AMode::Label {
3265                            label: MemLabel::PCRel(8),
3266                        },
3267                        flags: MemFlags::trusted(),
3268                    };
3269                    inst.emit(sink, emit_info, state);
3270                    let inst = Inst::Jump {
3271                        dest: BranchTarget::ResolvedOffset(12),
3272                    };
3273                    inst.emit(sink, emit_info, state);
3274                    sink.add_reloc(Reloc::Abs8, &**name, offset);
3275                    sink.put8(0);
3276                }
3277            }
3278            &Inst::LoadAddr { rd, ref mem } => {
3279                let mem = mem.clone();
3280                let (mem_insts, mem) = mem_finalize(Some(sink), &mem, I8, state);
3281                for inst in mem_insts.into_iter() {
3282                    inst.emit(sink, emit_info, state);
3283                }
3284
3285                let (reg, index_reg, offset) = match mem {
3286                    AMode::RegExtended { rn, rm, extendop } => {
3287                        let r = rn;
3288                        (r, Some((rm, extendop)), 0)
3289                    }
3290                    AMode::Unscaled { rn, simm9 } => {
3291                        let r = rn;
3292                        (r, None, simm9.value())
3293                    }
3294                    AMode::UnsignedOffset { rn, uimm12 } => {
3295                        let r = rn;
3296                        (r, None, uimm12.value() as i32)
3297                    }
3298                    _ => panic!("Unsupported case for LoadAddr: {mem:?}"),
3299                };
3300                let abs_offset = if offset < 0 {
3301                    -offset as u64
3302                } else {
3303                    offset as u64
3304                };
3305                let alu_op = if offset < 0 { ALUOp::Sub } else { ALUOp::Add };
3306
3307                if let Some((idx, extendop)) = index_reg {
3308                    let add = Inst::AluRRRExtend {
3309                        alu_op: ALUOp::Add,
3310                        size: OperandSize::Size64,
3311                        rd,
3312                        rn: reg,
3313                        rm: idx,
3314                        extendop,
3315                    };
3316
3317                    add.emit(sink, emit_info, state);
3318                } else if offset == 0 {
3319                    if reg != rd.to_reg() {
3320                        let mov = Inst::Mov {
3321                            size: OperandSize::Size64,
3322                            rd,
3323                            rm: reg,
3324                        };
3325
3326                        mov.emit(sink, emit_info, state);
3327                    }
3328                } else if let Some(imm12) = Imm12::maybe_from_u64(abs_offset) {
3329                    let add = Inst::AluRRImm12 {
3330                        alu_op,
3331                        size: OperandSize::Size64,
3332                        rd,
3333                        rn: reg,
3334                        imm12,
3335                    };
3336                    add.emit(sink, emit_info, state);
3337                } else {
3338                    // Use `tmp2` here: `reg` may be `spilltmp` if the `AMode` on this instruction
3339                    // was initially an `SPOffset`. Assert that `tmp2` is truly free to use. Note
3340                    // that no other instructions will be inserted here (we're emitting directly),
3341                    // and a live range of `tmp2` should not span this instruction, so this use
3342                    // should otherwise be correct.
3343                    debug_assert!(rd.to_reg() != tmp2_reg());
3344                    debug_assert!(reg != tmp2_reg());
3345                    let tmp = writable_tmp2_reg();
3346                    for insn in Inst::load_constant(tmp, abs_offset, &mut |_| tmp).into_iter() {
3347                        insn.emit(sink, emit_info, state);
3348                    }
3349                    let add = Inst::AluRRR {
3350                        alu_op,
3351                        size: OperandSize::Size64,
3352                        rd,
3353                        rn: reg,
3354                        rm: tmp.to_reg(),
3355                    };
3356                    add.emit(sink, emit_info, state);
3357                }
3358            }
3359            &Inst::Paci { key } => {
3360                let (crm, op2) = match key {
3361                    APIKey::AZ => (0b0011, 0b000),
3362                    APIKey::ASP => (0b0011, 0b001),
3363                    APIKey::BZ => (0b0011, 0b010),
3364                    APIKey::BSP => (0b0011, 0b011),
3365                };
3366
3367                sink.put4(0xd503211f | (crm << 8) | (op2 << 5));
3368            }
3369            &Inst::Xpaclri => sink.put4(0xd50320ff),
3370            &Inst::Bti { targets } => {
3371                let targets = match targets {
3372                    BranchTargetType::None => 0b00,
3373                    BranchTargetType::C => 0b01,
3374                    BranchTargetType::J => 0b10,
3375                    BranchTargetType::JC => 0b11,
3376                };
3377
3378                sink.put4(0xd503241f | targets << 6);
3379            }
3380            &Inst::EmitIsland { needed_space } => {
3381                if sink.island_needed(needed_space + 4) {
3382                    let jump_around_label = sink.get_label();
3383                    let jmp = Inst::Jump {
3384                        dest: BranchTarget::Label(jump_around_label),
3385                    };
3386                    jmp.emit(sink, emit_info, state);
3387                    sink.emit_island(needed_space + 4, &mut state.ctrl_plane);
3388                    sink.bind_label(jump_around_label, &mut state.ctrl_plane);
3389                }
3390            }
3391
3392            &Inst::ElfTlsGetAddr {
3393                ref symbol,
3394                rd,
3395                tmp,
3396            } => {
3397                assert_eq!(xreg(0), rd.to_reg());
3398
3399                // See the original proposal for TLSDESC.
3400                // http://www.fsfla.org/~lxoliva/writeups/TLS/paper-lk2006.pdf
3401                //
3402                // Implement the TLSDESC instruction sequence:
3403                //   adrp x0, :tlsdesc:tlsvar
3404                //   ldr  tmp, [x0, :tlsdesc_lo12:tlsvar]
3405                //   add  x0, x0, :tlsdesc_lo12:tlsvar
3406                //   blr  tmp
3407                //   mrs  tmp, tpidr_el0
3408                //   add  x0, x0, tmp
3409                //
3410                // This is the instruction sequence that GCC emits for ELF GD TLS Relocations in aarch64
3411                // See: https://gcc.godbolt.org/z/e4j7MdErh
3412
3413                // adrp x0, :tlsdesc:tlsvar
3414                sink.add_reloc(Reloc::Aarch64TlsDescAdrPage21, &**symbol, 0);
3415                Inst::Adrp { rd, off: 0 }.emit(sink, emit_info, state);
3416
3417                // ldr  tmp, [x0, :tlsdesc_lo12:tlsvar]
3418                sink.add_reloc(Reloc::Aarch64TlsDescLd64Lo12, &**symbol, 0);
3419                Inst::ULoad64 {
3420                    rd: tmp,
3421                    mem: AMode::reg(rd.to_reg()),
3422                    flags: MemFlags::trusted(),
3423                }
3424                .emit(sink, emit_info, state);
3425
3426                // add x0, x0, :tlsdesc_lo12:tlsvar
3427                sink.add_reloc(Reloc::Aarch64TlsDescAddLo12, &**symbol, 0);
3428                Inst::AluRRImm12 {
3429                    alu_op: ALUOp::Add,
3430                    size: OperandSize::Size64,
3431                    rd,
3432                    rn: rd.to_reg(),
3433                    imm12: Imm12::maybe_from_u64(0).unwrap(),
3434                }
3435                .emit(sink, emit_info, state);
3436
3437                // blr tmp
3438                sink.add_reloc(Reloc::Aarch64TlsDescCall, &**symbol, 0);
3439                Inst::CallInd {
3440                    info: crate::isa::Box::new(CallInfo::empty(tmp.to_reg(), CallConv::SystemV)),
3441                }
3442                .emit(sink, emit_info, state);
3443
3444                // mrs tmp, tpidr_el0
3445                sink.put4(0xd53bd040 | machreg_to_gpr(tmp.to_reg()));
3446
3447                // add x0, x0, tmp
3448                Inst::AluRRR {
3449                    alu_op: ALUOp::Add,
3450                    size: OperandSize::Size64,
3451                    rd,
3452                    rn: rd.to_reg(),
3453                    rm: tmp.to_reg(),
3454                }
3455                .emit(sink, emit_info, state);
3456            }
3457
3458            &Inst::MachOTlsGetAddr { ref symbol, rd } => {
3459                // Each thread local variable gets a descriptor, where the first xword of the descriptor is a pointer
3460                // to a function that takes the descriptor address in x0, and after the function returns x0
3461                // contains the address for the thread local variable
3462                //
3463                // what we want to emit is basically:
3464                //
3465                // adrp x0, <label>@TLVPPAGE  ; Load the address of the page of the thread local variable pointer (TLVP)
3466                // ldr x0, [x0, <label>@TLVPPAGEOFF] ; Load the descriptor's address into x0
3467                // ldr x1, [x0] ; Load the function pointer (the first part of the descriptor)
3468                // blr x1 ; Call the function pointer with the descriptor address in x0
3469                // ; x0 now contains the TLV address
3470
3471                assert_eq!(xreg(0), rd.to_reg());
3472                let rtmp = writable_xreg(1);
3473
3474                // adrp x0, <label>@TLVPPAGE
3475                sink.add_reloc(Reloc::MachOAarch64TlsAdrPage21, symbol, 0);
3476                sink.put4(0x90000000);
3477
3478                // ldr x0, [x0, <label>@TLVPPAGEOFF]
3479                sink.add_reloc(Reloc::MachOAarch64TlsAdrPageOff12, symbol, 0);
3480                sink.put4(0xf9400000);
3481
3482                // load [x0] into temp register
3483                Inst::ULoad64 {
3484                    rd: rtmp,
3485                    mem: AMode::reg(rd.to_reg()),
3486                    flags: MemFlags::trusted(),
3487                }
3488                .emit(sink, emit_info, state);
3489
3490                // call function pointer in temp register
3491                Inst::CallInd {
3492                    info: crate::isa::Box::new(CallInfo::empty(
3493                        rtmp.to_reg(),
3494                        CallConv::AppleAarch64,
3495                    )),
3496                }
3497                .emit(sink, emit_info, state);
3498            }
3499
3500            &Inst::Unwind { ref inst } => {
3501                sink.add_unwind(inst.clone());
3502            }
3503
3504            &Inst::DummyUse { .. } => {}
3505
3506            &Inst::StackProbeLoop { start, end, step } => {
3507                assert!(emit_info.0.enable_probestack());
3508
3509                // The loop generated here uses `start` as a counter register to
3510                // count backwards until negating it exceeds `end`. In other
3511                // words `start` is an offset from `sp` we're testing where
3512                // `end` is the max size we need to test. The loop looks like:
3513                //
3514                //      loop_start:
3515                //          sub start, start, #step
3516                //          stur xzr, [sp, start]
3517                //          cmn start, end
3518                //          br.gt loop_start
3519                //      loop_end:
3520                //
3521                // Note that this loop cannot use the spilltmp and tmp2
3522                // registers as those are currently used as the input to this
3523                // loop when generating the instruction. This means that some
3524                // more flavorful address modes and lowerings need to be
3525                // avoided.
3526                //
3527                // Perhaps someone more clever than I can figure out how to use
3528                // `subs` or the like and skip the `cmn`, but I can't figure it
3529                // out at this time.
3530
3531                let loop_start = sink.get_label();
3532                sink.bind_label(loop_start, &mut state.ctrl_plane);
3533
3534                Inst::AluRRImm12 {
3535                    alu_op: ALUOp::Sub,
3536                    size: OperandSize::Size64,
3537                    rd: start,
3538                    rn: start.to_reg(),
3539                    imm12: step,
3540                }
3541                .emit(sink, emit_info, state);
3542                Inst::Store32 {
3543                    rd: regs::zero_reg(),
3544                    mem: AMode::RegReg {
3545                        rn: regs::stack_reg(),
3546                        rm: start.to_reg(),
3547                    },
3548                    flags: MemFlags::trusted(),
3549                }
3550                .emit(sink, emit_info, state);
3551                Inst::AluRRR {
3552                    alu_op: ALUOp::AddS,
3553                    size: OperandSize::Size64,
3554                    rd: regs::writable_zero_reg(),
3555                    rn: start.to_reg(),
3556                    rm: end,
3557                }
3558                .emit(sink, emit_info, state);
3559
3560                let loop_end = sink.get_label();
3561                Inst::CondBr {
3562                    taken: BranchTarget::Label(loop_start),
3563                    not_taken: BranchTarget::Label(loop_end),
3564                    kind: CondBrKind::Cond(Cond::Gt),
3565                }
3566                .emit(sink, emit_info, state);
3567                sink.bind_label(loop_end, &mut state.ctrl_plane);
3568            }
3569        }
3570
3571        let end_off = sink.cur_offset();
3572        debug_assert!(
3573            (end_off - start_off) <= Inst::worst_case_size()
3574                || matches!(self, Inst::EmitIsland { .. }),
3575            "Worst case size exceed for {:?}: {}",
3576            self,
3577            end_off - start_off
3578        );
3579
3580        state.clear_post_insn();
3581    }
3582
3583    fn pretty_print_inst(&self, state: &mut Self::State) -> String {
3584        self.print_with_state(state)
3585    }
3586}
3587
3588fn emit_return_call_common_sequence<T>(
3589    sink: &mut MachBuffer<Inst>,
3590    emit_info: &EmitInfo,
3591    state: &mut EmitState,
3592    info: &ReturnCallInfo<T>,
3593) {
3594    for inst in
3595        AArch64MachineDeps::gen_clobber_restore(CallConv::Tail, &emit_info.0, state.frame_layout())
3596    {
3597        inst.emit(sink, emit_info, state);
3598    }
3599
3600    let setup_area_size = state.frame_layout().setup_area_size;
3601    if setup_area_size > 0 {
3602        // N.B.: sp is already adjusted to the appropriate place by the
3603        // clobber-restore code (which also frees the fixed frame). Hence, there
3604        // is no need for the usual `mov sp, fp` here.
3605
3606        // `ldp fp, lr, [sp], #16`
3607        Inst::LoadP64 {
3608            rt: writable_fp_reg(),
3609            rt2: writable_link_reg(),
3610            mem: PairAMode::SPPostIndexed {
3611                // TODO: we could fold the increment for incoming_args_diff here, as long as that
3612                // value is less than 502*8, by adding it to `setup_area_size`.
3613                // https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/LDP--Load-Pair-of-Registers-
3614                simm7: SImm7Scaled::maybe_from_i64(i64::from(setup_area_size), types::I64).unwrap(),
3615            },
3616            flags: MemFlags::trusted(),
3617        }
3618        .emit(sink, emit_info, state);
3619    }
3620
3621    // Adjust SP to account for the possible over-allocation in the prologue.
3622    let incoming_args_diff = state.frame_layout().tail_args_size - info.new_stack_arg_size;
3623    if incoming_args_diff > 0 {
3624        for inst in
3625            AArch64MachineDeps::gen_sp_reg_adjust(i32::try_from(incoming_args_diff).unwrap())
3626        {
3627            inst.emit(sink, emit_info, state);
3628        }
3629    }
3630
3631    if let Some(key) = info.key {
3632        sink.put4(key.enc_auti_hint());
3633    }
3634}