cranelift_codegen/isa/x64/lower/
isle.rs

1//! ISLE integration glue code for x64 lowering.
2
3// Pull in the ISLE generated code.
4pub(crate) mod generated_code;
5use crate::{ir::types, ir::AtomicRmwOp, isa};
6use generated_code::{AssemblerOutputs, Context, MInst, RegisterClass};
7
8// Types that the generated ISLE code uses via `use super::*`.
9use super::external::{isle_assembler_methods, CraneliftRegisters, PairedGpr, PairedXmm};
10use super::{is_int_or_ref_ty, is_mergeable_load, lower_to_amode, MergeableLoadSize};
11use crate::ir::condcodes::{FloatCC, IntCC};
12use crate::ir::immediates::*;
13use crate::ir::types::*;
14use crate::ir::{
15    BlockCall, Inst, InstructionData, LibCall, MemFlags, Opcode, TrapCode, Value, ValueList,
16};
17use crate::isa::x64::abi::X64CallSite;
18use crate::isa::x64::inst::{args::*, regs, ReturnCallInfo};
19use crate::isa::x64::lower::emit_vm_call;
20use crate::isa::x64::X64Backend;
21use crate::machinst::isle::*;
22use crate::machinst::{
23    ArgPair, CallInfo, InsnInput, InstOutput, IsTailCall, MachInst, VCodeConstant,
24    VCodeConstantData,
25};
26use alloc::vec::Vec;
27use cranelift_assembler_x64 as asm;
28use regalloc2::PReg;
29use std::boxed::Box;
30
31/// Type representing out-of-line data for calls. This type optional because the
32/// call instruction is also used by Winch to emit calls, but the
33/// `Box<CallInfo>` field is not used, it's only used by Cranelift. By making it
34/// optional, we reduce the number of heap allocations in Winch.
35type BoxCallInfo = Box<CallInfo<ExternalName>>;
36type BoxCallIndInfo = Box<CallInfo<RegMem>>;
37type BoxReturnCallInfo = Box<ReturnCallInfo<ExternalName>>;
38type BoxReturnCallIndInfo = Box<ReturnCallInfo<Reg>>;
39type VecArgPair = Vec<ArgPair>;
40type BoxSyntheticAmode = Box<SyntheticAmode>;
41
42/// When interacting with the external assembler (see `external.rs`), we
43/// need to fix the types we'll use.
44type AssemblerInst = asm::Inst<CraneliftRegisters>;
45
46pub struct SinkableLoad {
47    inst: Inst,
48    addr_input: InsnInput,
49    offset: i32,
50}
51
52/// The main entry point for lowering with ISLE.
53pub(crate) fn lower(
54    lower_ctx: &mut Lower<MInst>,
55    backend: &X64Backend,
56    inst: Inst,
57) -> Option<InstOutput> {
58    // TODO: reuse the ISLE context across lowerings so we can reuse its
59    // internal heap allocations.
60    let mut isle_ctx = IsleContext { lower_ctx, backend };
61    generated_code::constructor_lower(&mut isle_ctx, inst)
62}
63
64pub(crate) fn lower_branch(
65    lower_ctx: &mut Lower<MInst>,
66    backend: &X64Backend,
67    branch: Inst,
68    targets: &[MachLabel],
69) -> Option<()> {
70    // TODO: reuse the ISLE context across lowerings so we can reuse its
71    // internal heap allocations.
72    let mut isle_ctx = IsleContext { lower_ctx, backend };
73    generated_code::constructor_lower_branch(&mut isle_ctx, branch, &targets)
74}
75
76impl Context for IsleContext<'_, '_, MInst, X64Backend> {
77    isle_lower_prelude_methods!();
78    isle_prelude_caller_methods!(X64CallSite);
79    isle_assembler_methods!();
80
81    #[inline]
82    fn operand_size_of_type_32_64(&mut self, ty: Type) -> OperandSize {
83        if ty.bits() == 64 {
84            OperandSize::Size64
85        } else {
86            OperandSize::Size32
87        }
88    }
89
90    #[inline]
91    fn raw_operand_size_of_type(&mut self, ty: Type) -> OperandSize {
92        OperandSize::from_ty(ty)
93    }
94
95    fn put_in_reg_mem_imm(&mut self, val: Value) -> RegMemImm {
96        let inputs = self.lower_ctx.get_value_as_source_or_const(val);
97
98        if let Some(c) = inputs.constant {
99            let ty = self.lower_ctx.dfg().value_type(val);
100            if let Some(imm) = to_simm32(c as i64, ty) {
101                return imm.to_reg_mem_imm();
102            }
103        }
104
105        self.put_in_reg_mem(val).into()
106    }
107
108    fn put_in_xmm_mem_imm(&mut self, val: Value) -> XmmMemImm {
109        let inputs = self.lower_ctx.get_value_as_source_or_const(val);
110
111        if let Some(c) = inputs.constant {
112            let ty = self.lower_ctx.dfg().value_type(val);
113            if let Some(imm) = to_simm32(c as i64, ty) {
114                return XmmMemImm::unwrap_new(imm.to_reg_mem_imm());
115            }
116        }
117
118        let res = match self.put_in_xmm_mem(val).to_reg_mem() {
119            RegMem::Reg { reg } => RegMemImm::Reg { reg },
120            RegMem::Mem { addr } => RegMemImm::Mem { addr },
121        };
122
123        XmmMemImm::unwrap_new(res)
124    }
125
126    fn put_in_xmm_mem(&mut self, val: Value) -> XmmMem {
127        let inputs = self.lower_ctx.get_value_as_source_or_const(val);
128
129        if let Some(c) = inputs.constant {
130            // A load from the constant pool is better than a rematerialization into a register,
131            // because it reduces register pressure.
132            //
133            // NOTE: this is where behavior differs from `put_in_reg_mem`, as we always force
134            // constants to be 16 bytes when a constant will be used in place of an xmm register.
135            let vcode_constant = self.emit_u128_le_const(c as u128);
136            return XmmMem::unwrap_new(RegMem::mem(SyntheticAmode::ConstantOffset(vcode_constant)));
137        }
138
139        XmmMem::unwrap_new(self.put_in_reg_mem(val))
140    }
141
142    fn put_in_reg_mem(&mut self, val: Value) -> RegMem {
143        let inputs = self.lower_ctx.get_value_as_source_or_const(val);
144
145        if let Some(c) = inputs.constant {
146            // A load from the constant pool is better than a
147            // rematerialization into a register, because it reduces
148            // register pressure.
149            let vcode_constant = self.emit_u64_le_const(c);
150            return RegMem::mem(SyntheticAmode::ConstantOffset(vcode_constant));
151        }
152
153        if let Some(load) = self.sinkable_load(val) {
154            return RegMem::Mem {
155                addr: self.sink_load(&load),
156            };
157        }
158
159        RegMem::reg(self.put_in_reg(val))
160    }
161
162    #[inline]
163    fn encode_fcmp_imm(&mut self, imm: &FcmpImm) -> u8 {
164        imm.encode()
165    }
166
167    #[inline]
168    fn encode_round_imm(&mut self, imm: &RoundImm) -> u8 {
169        imm.encode()
170    }
171
172    #[inline]
173    fn use_avx(&mut self) -> bool {
174        self.backend.x64_flags.use_avx()
175    }
176
177    #[inline]
178    fn use_avx2(&mut self) -> bool {
179        self.backend.x64_flags.use_avx2()
180    }
181
182    #[inline]
183    fn use_avx512vl(&mut self) -> bool {
184        self.backend.x64_flags.use_avx512vl()
185    }
186
187    #[inline]
188    fn use_avx512dq(&mut self) -> bool {
189        self.backend.x64_flags.use_avx512dq()
190    }
191
192    #[inline]
193    fn use_avx512f(&mut self) -> bool {
194        self.backend.x64_flags.use_avx512f()
195    }
196
197    #[inline]
198    fn use_avx512bitalg(&mut self) -> bool {
199        self.backend.x64_flags.use_avx512bitalg()
200    }
201
202    #[inline]
203    fn use_avx512vbmi(&mut self) -> bool {
204        self.backend.x64_flags.use_avx512vbmi()
205    }
206
207    #[inline]
208    fn use_lzcnt(&mut self) -> bool {
209        self.backend.x64_flags.use_lzcnt()
210    }
211
212    #[inline]
213    fn use_bmi1(&mut self) -> bool {
214        self.backend.x64_flags.use_bmi1()
215    }
216
217    #[inline]
218    fn use_bmi2(&mut self) -> bool {
219        self.backend.x64_flags.use_bmi2()
220    }
221
222    #[inline]
223    fn use_popcnt(&mut self) -> bool {
224        self.backend.x64_flags.use_popcnt()
225    }
226
227    #[inline]
228    fn use_fma(&mut self) -> bool {
229        self.backend.x64_flags.use_fma()
230    }
231
232    #[inline]
233    fn use_ssse3(&mut self) -> bool {
234        self.backend.x64_flags.use_ssse3()
235    }
236
237    #[inline]
238    fn use_sse41(&mut self) -> bool {
239        self.backend.x64_flags.use_sse41()
240    }
241
242    #[inline]
243    fn use_sse42(&mut self) -> bool {
244        self.backend.x64_flags.use_sse42()
245    }
246
247    #[inline]
248    fn use_cmpxchg16b(&mut self) -> bool {
249        self.backend.x64_flags.use_cmpxchg16b()
250    }
251
252    #[inline]
253    fn imm8_from_value(&mut self, val: Value) -> Option<Imm8Reg> {
254        let inst = self.lower_ctx.dfg().value_def(val).inst()?;
255        let constant = self.lower_ctx.get_constant(inst)?;
256        let imm = u8::try_from(constant).ok()?;
257        Some(Imm8Reg::Imm8 { imm })
258    }
259
260    #[inline]
261    fn const_to_type_masked_imm8(&mut self, c: u64, ty: Type) -> Imm8Gpr {
262        let mask = self.shift_mask(ty) as u64;
263        Imm8Gpr::unwrap_new(Imm8Reg::Imm8 {
264            imm: (c & mask) as u8,
265        })
266    }
267
268    #[inline]
269    fn shift_mask(&mut self, ty: Type) -> u8 {
270        debug_assert!(ty.lane_bits().is_power_of_two());
271
272        (ty.lane_bits() - 1) as u8
273    }
274
275    fn shift_amount_masked(&mut self, ty: Type, val: Imm64) -> u8 {
276        (val.bits() as u8) & self.shift_mask(ty)
277    }
278
279    #[inline]
280    fn simm32_from_value(&mut self, val: Value) -> Option<GprMemImm> {
281        let inst = self.lower_ctx.dfg().value_def(val).inst()?;
282        let constant: u64 = self.lower_ctx.get_constant(inst)?;
283        let ty = self.lower_ctx.dfg().value_type(val);
284        let constant = constant as i64;
285        to_simm32(constant, ty)
286    }
287
288    fn sinkable_load(&mut self, val: Value) -> Option<SinkableLoad> {
289        if let Some(inst) = self.is_sinkable_inst(val) {
290            if let Some((addr_input, offset)) =
291                is_mergeable_load(self.lower_ctx, inst, MergeableLoadSize::Min32)
292            {
293                return Some(SinkableLoad {
294                    inst,
295                    addr_input,
296                    offset,
297                });
298            }
299        }
300        None
301    }
302
303    fn sinkable_load_exact(&mut self, val: Value) -> Option<SinkableLoad> {
304        if let Some(inst) = self.is_sinkable_inst(val) {
305            if let Some((addr_input, offset)) =
306                is_mergeable_load(self.lower_ctx, inst, MergeableLoadSize::Exact)
307            {
308                return Some(SinkableLoad {
309                    inst,
310                    addr_input,
311                    offset,
312                });
313            }
314        }
315        None
316    }
317
318    fn sink_load(&mut self, load: &SinkableLoad) -> SyntheticAmode {
319        self.lower_ctx.sink_inst(load.inst);
320        let addr = lower_to_amode(self.lower_ctx, load.addr_input, load.offset);
321        SyntheticAmode::Real(addr)
322    }
323
324    #[inline]
325    fn ext_mode(&mut self, from_bits: u16, to_bits: u16) -> ExtMode {
326        ExtMode::new(from_bits, to_bits).unwrap()
327    }
328
329    fn emit(&mut self, inst: &MInst) -> Unit {
330        self.lower_ctx.emit(inst.clone());
331    }
332
333    #[inline]
334    fn nonzero_u64_fits_in_u32(&mut self, x: u64) -> Option<u64> {
335        if x != 0 && x < u64::from(u32::MAX) {
336            Some(x)
337        } else {
338            None
339        }
340    }
341
342    #[inline]
343    fn sse_insertps_lane_imm(&mut self, lane: u8) -> u8 {
344        // Insert 32-bits from replacement (at index 00, bits 7:8) to vector (lane
345        // shifted into bits 5:6).
346        0b00_00_00_00 | lane << 4
347    }
348
349    #[inline]
350    fn synthetic_amode_to_reg_mem(&mut self, addr: &SyntheticAmode) -> RegMem {
351        RegMem::mem(addr.clone())
352    }
353
354    #[inline]
355    fn amode_to_synthetic_amode(&mut self, amode: &Amode) -> SyntheticAmode {
356        amode.clone().into()
357    }
358
359    #[inline]
360    fn const_to_synthetic_amode(&mut self, c: VCodeConstant) -> SyntheticAmode {
361        SyntheticAmode::ConstantOffset(c)
362    }
363
364    #[inline]
365    fn writable_gpr_to_reg(&mut self, r: WritableGpr) -> WritableReg {
366        r.to_writable_reg()
367    }
368
369    #[inline]
370    fn writable_xmm_to_reg(&mut self, r: WritableXmm) -> WritableReg {
371        r.to_writable_reg()
372    }
373
374    fn ishl_i8x16_mask_for_const(&mut self, amt: u32) -> SyntheticAmode {
375        // When the shift amount is known, we can statically (i.e. at compile
376        // time) determine the mask to use and only emit that.
377        debug_assert!(amt < 8);
378        let mask_offset = amt as usize * 16;
379        let mask_constant = self.lower_ctx.use_constant(VCodeConstantData::WellKnown(
380            &I8X16_ISHL_MASKS[mask_offset..mask_offset + 16],
381        ));
382        SyntheticAmode::ConstantOffset(mask_constant)
383    }
384
385    fn ishl_i8x16_mask_table(&mut self) -> SyntheticAmode {
386        let mask_table = self
387            .lower_ctx
388            .use_constant(VCodeConstantData::WellKnown(&I8X16_ISHL_MASKS));
389        SyntheticAmode::ConstantOffset(mask_table)
390    }
391
392    fn ushr_i8x16_mask_for_const(&mut self, amt: u32) -> SyntheticAmode {
393        // When the shift amount is known, we can statically (i.e. at compile
394        // time) determine the mask to use and only emit that.
395        debug_assert!(amt < 8);
396        let mask_offset = amt as usize * 16;
397        let mask_constant = self.lower_ctx.use_constant(VCodeConstantData::WellKnown(
398            &I8X16_USHR_MASKS[mask_offset..mask_offset + 16],
399        ));
400        SyntheticAmode::ConstantOffset(mask_constant)
401    }
402
403    fn ushr_i8x16_mask_table(&mut self) -> SyntheticAmode {
404        let mask_table = self
405            .lower_ctx
406            .use_constant(VCodeConstantData::WellKnown(&I8X16_USHR_MASKS));
407        SyntheticAmode::ConstantOffset(mask_table)
408    }
409
410    #[inline]
411    fn writable_reg_to_xmm(&mut self, r: WritableReg) -> WritableXmm {
412        Writable::from_reg(Xmm::unwrap_new(r.to_reg()))
413    }
414
415    #[inline]
416    fn writable_xmm_to_xmm(&mut self, r: WritableXmm) -> Xmm {
417        r.to_reg()
418    }
419
420    #[inline]
421    fn writable_gpr_to_gpr(&mut self, r: WritableGpr) -> Gpr {
422        r.to_reg()
423    }
424
425    #[inline]
426    fn gpr_to_reg(&mut self, r: Gpr) -> Reg {
427        r.into()
428    }
429
430    #[inline]
431    fn xmm_to_reg(&mut self, r: Xmm) -> Reg {
432        r.into()
433    }
434
435    #[inline]
436    fn xmm_to_xmm_mem_imm(&mut self, r: Xmm) -> XmmMemImm {
437        r.into()
438    }
439
440    #[inline]
441    fn xmm_mem_to_xmm_mem_imm(&mut self, r: &XmmMem) -> XmmMemImm {
442        XmmMemImm::unwrap_new(r.clone().to_reg_mem().into())
443    }
444
445    #[inline]
446    fn temp_writable_gpr(&mut self) -> WritableGpr {
447        self.lower_ctx.temp_writable_gpr()
448    }
449
450    #[inline]
451    fn temp_writable_xmm(&mut self) -> WritableXmm {
452        self.lower_ctx.temp_writable_xmm()
453    }
454
455    #[inline]
456    fn reg_to_reg_mem_imm(&mut self, reg: Reg) -> RegMemImm {
457        RegMemImm::Reg { reg }
458    }
459
460    #[inline]
461    fn reg_mem_to_xmm_mem(&mut self, rm: &RegMem) -> XmmMem {
462        XmmMem::unwrap_new(rm.clone())
463    }
464
465    #[inline]
466    fn gpr_mem_imm_new(&mut self, rmi: &RegMemImm) -> GprMemImm {
467        GprMemImm::unwrap_new(rmi.clone())
468    }
469
470    #[inline]
471    fn xmm_mem_imm_new(&mut self, rmi: &RegMemImm) -> XmmMemImm {
472        XmmMemImm::unwrap_new(rmi.clone())
473    }
474
475    #[inline]
476    fn xmm_to_xmm_mem(&mut self, r: Xmm) -> XmmMem {
477        r.into()
478    }
479
480    #[inline]
481    fn xmm_mem_to_reg_mem(&mut self, xm: &XmmMem) -> RegMem {
482        xm.clone().into()
483    }
484
485    #[inline]
486    fn gpr_mem_to_reg_mem(&mut self, gm: &GprMem) -> RegMem {
487        gm.clone().into()
488    }
489
490    #[inline]
491    fn xmm_new(&mut self, r: Reg) -> Xmm {
492        Xmm::unwrap_new(r)
493    }
494
495    #[inline]
496    fn gpr_new(&mut self, r: Reg) -> Gpr {
497        Gpr::unwrap_new(r)
498    }
499
500    #[inline]
501    fn reg_mem_to_gpr_mem(&mut self, rm: &RegMem) -> GprMem {
502        GprMem::unwrap_new(rm.clone())
503    }
504
505    #[inline]
506    fn reg_to_gpr_mem(&mut self, r: Reg) -> GprMem {
507        GprMem::unwrap_new(RegMem::reg(r))
508    }
509
510    #[inline]
511    fn imm8_reg_to_imm8_gpr(&mut self, ir: &Imm8Reg) -> Imm8Gpr {
512        Imm8Gpr::unwrap_new(ir.clone())
513    }
514
515    #[inline]
516    fn gpr_to_gpr_mem(&mut self, gpr: Gpr) -> GprMem {
517        GprMem::from(gpr)
518    }
519
520    #[inline]
521    fn gpr_to_gpr_mem_imm(&mut self, gpr: Gpr) -> GprMemImm {
522        GprMemImm::from(gpr)
523    }
524
525    #[inline]
526    fn gpr_to_imm8_gpr(&mut self, gpr: Gpr) -> Imm8Gpr {
527        Imm8Gpr::from(gpr)
528    }
529
530    #[inline]
531    fn imm8_to_imm8_gpr(&mut self, imm: u8) -> Imm8Gpr {
532        Imm8Gpr::unwrap_new(Imm8Reg::Imm8 { imm })
533    }
534
535    fn gpr_from_imm8_gpr(&mut self, val: &Imm8Gpr) -> Option<Gpr> {
536        match val.as_imm8_reg() {
537            &Imm8Reg::Reg { reg } => Some(Gpr::unwrap_new(reg)),
538            Imm8Reg::Imm8 { .. } => None,
539        }
540    }
541
542    fn imm8_from_imm8_gpr(&mut self, val: &Imm8Gpr) -> Option<u8> {
543        match val.as_imm8_reg() {
544            &Imm8Reg::Imm8 { imm } => Some(imm),
545            Imm8Reg::Reg { .. } => None,
546        }
547    }
548
549    #[inline]
550    fn type_register_class(&mut self, ty: Type) -> Option<RegisterClass> {
551        if is_int_or_ref_ty(ty) || ty == I128 {
552            Some(RegisterClass::Gpr {
553                single_register: ty != I128,
554            })
555        } else if ty.is_float() || (ty.is_vector() && ty.bits() == 128) {
556            Some(RegisterClass::Xmm)
557        } else {
558            None
559        }
560    }
561
562    #[inline]
563    fn ty_int_bool_or_ref(&mut self, ty: Type) -> Option<()> {
564        match ty {
565            types::I8 | types::I16 | types::I32 | types::I64 => Some(()),
566            _ => None,
567        }
568    }
569
570    #[inline]
571    fn intcc_to_cc(&mut self, intcc: &IntCC) -> CC {
572        CC::from_intcc(*intcc)
573    }
574
575    #[inline]
576    fn cc_invert(&mut self, cc: &CC) -> CC {
577        cc.invert()
578    }
579
580    #[inline]
581    fn cc_nz_or_z(&mut self, cc: &CC) -> Option<CC> {
582        match cc {
583            CC::Z => Some(*cc),
584            CC::NZ => Some(*cc),
585            _ => None,
586        }
587    }
588
589    #[inline]
590    fn sum_extend_fits_in_32_bits(
591        &mut self,
592        extend_from_ty: Type,
593        constant_value: Imm64,
594        offset: Offset32,
595    ) -> Option<u32> {
596        let offset: i64 = offset.into();
597        let constant_value: u64 = constant_value.bits() as u64;
598        // If necessary, zero extend `constant_value` up to 64 bits.
599        let shift = 64 - extend_from_ty.bits();
600        let zero_extended_constant_value = (constant_value << shift) >> shift;
601        // Sum up the two operands.
602        let sum = offset.wrapping_add(zero_extended_constant_value as i64);
603        // Check that the sum will fit in 32-bits.
604        if sum == ((sum << 32) >> 32) {
605            Some(sum as u32)
606        } else {
607            None
608        }
609    }
610
611    #[inline]
612    fn amode_offset(&mut self, addr: &Amode, offset: i32) -> Amode {
613        addr.offset(offset)
614    }
615
616    #[inline]
617    fn zero_offset(&mut self) -> Offset32 {
618        Offset32::new(0)
619    }
620
621    #[inline]
622    fn preg_rbp(&mut self) -> PReg {
623        regs::rbp().to_real_reg().unwrap().into()
624    }
625
626    #[inline]
627    fn preg_rsp(&mut self) -> PReg {
628        regs::rsp().to_real_reg().unwrap().into()
629    }
630
631    #[inline]
632    fn preg_pinned(&mut self) -> PReg {
633        regs::pinned_reg().to_real_reg().unwrap().into()
634    }
635
636    fn libcall_1(&mut self, libcall: &LibCall, a: Reg) -> Reg {
637        let outputs = emit_vm_call(
638            self.lower_ctx,
639            &self.backend.flags,
640            &self.backend.triple,
641            *libcall,
642            &[a],
643        )
644        .expect("Failed to emit LibCall");
645
646        debug_assert_eq!(outputs.len(), 1);
647
648        outputs[0]
649    }
650
651    fn libcall_2(&mut self, libcall: &LibCall, a: Reg, b: Reg) -> Reg {
652        let outputs = emit_vm_call(
653            self.lower_ctx,
654            &self.backend.flags,
655            &self.backend.triple,
656            *libcall,
657            &[a, b],
658        )
659        .expect("Failed to emit LibCall");
660
661        debug_assert_eq!(outputs.len(), 1);
662
663        outputs[0]
664    }
665
666    fn libcall_3(&mut self, libcall: &LibCall, a: Reg, b: Reg, c: Reg) -> Reg {
667        let outputs = emit_vm_call(
668            self.lower_ctx,
669            &self.backend.flags,
670            &self.backend.triple,
671            *libcall,
672            &[a, b, c],
673        )
674        .expect("Failed to emit LibCall");
675
676        debug_assert_eq!(outputs.len(), 1);
677
678        outputs[0]
679    }
680
681    #[inline]
682    fn vconst_all_ones_or_all_zeros(&mut self, constant: Constant) -> Option<()> {
683        let const_data = self.lower_ctx.get_constant_data(constant);
684        if const_data.iter().all(|&b| b == 0 || b == 0xFF) {
685            return Some(());
686        }
687        None
688    }
689
690    #[inline]
691    fn shuffle_0_31_mask(&mut self, mask: &VecMask) -> VCodeConstant {
692        let mask = mask
693            .iter()
694            .map(|&b| if b > 15 { b.wrapping_sub(16) } else { b })
695            .map(|b| if b > 15 { 0b10000000 } else { b })
696            .collect();
697        self.lower_ctx
698            .use_constant(VCodeConstantData::Generated(mask))
699    }
700
701    #[inline]
702    fn shuffle_0_15_mask(&mut self, mask: &VecMask) -> VCodeConstant {
703        let mask = mask
704            .iter()
705            .map(|&b| if b > 15 { 0b10000000 } else { b })
706            .collect();
707        self.lower_ctx
708            .use_constant(VCodeConstantData::Generated(mask))
709    }
710
711    #[inline]
712    fn shuffle_16_31_mask(&mut self, mask: &VecMask) -> VCodeConstant {
713        let mask = mask
714            .iter()
715            .map(|&b| b.wrapping_sub(16))
716            .map(|b| if b > 15 { 0b10000000 } else { b })
717            .collect();
718        self.lower_ctx
719            .use_constant(VCodeConstantData::Generated(mask))
720    }
721
722    #[inline]
723    fn perm_from_mask_with_zeros(
724        &mut self,
725        mask: &VecMask,
726    ) -> Option<(VCodeConstant, VCodeConstant)> {
727        if !mask.iter().any(|&b| b > 31) {
728            return None;
729        }
730
731        let zeros = mask
732            .iter()
733            .map(|&b| if b > 31 { 0x00 } else { 0xff })
734            .collect();
735
736        Some((
737            self.perm_from_mask(mask),
738            self.lower_ctx
739                .use_constant(VCodeConstantData::Generated(zeros)),
740        ))
741    }
742
743    #[inline]
744    fn perm_from_mask(&mut self, mask: &VecMask) -> VCodeConstant {
745        let mask = mask.iter().cloned().collect();
746        self.lower_ctx
747            .use_constant(VCodeConstantData::Generated(mask))
748    }
749
750    fn xmm_mem_to_xmm_mem_aligned(&mut self, arg: &XmmMem) -> XmmMemAligned {
751        match XmmMemAligned::new(arg.clone().into()) {
752            Some(aligned) => aligned,
753            None => match arg.clone().into() {
754                RegMem::Mem { addr } => self.load_xmm_unaligned(addr).into(),
755                _ => unreachable!(),
756            },
757        }
758    }
759
760    fn xmm_mem_imm_to_xmm_mem_aligned_imm(&mut self, arg: &XmmMemImm) -> XmmMemAlignedImm {
761        match XmmMemAlignedImm::new(arg.clone().into()) {
762            Some(aligned) => aligned,
763            None => match arg.clone().into() {
764                RegMemImm::Mem { addr } => self.load_xmm_unaligned(addr).into(),
765                _ => unreachable!(),
766            },
767        }
768    }
769
770    fn pshufd_lhs_imm(&mut self, imm: Immediate) -> Option<u8> {
771        let (a, b, c, d) = self.shuffle32_from_imm(imm)?;
772        if a < 4 && b < 4 && c < 4 && d < 4 {
773            Some(a | (b << 2) | (c << 4) | (d << 6))
774        } else {
775            None
776        }
777    }
778
779    fn pshufd_rhs_imm(&mut self, imm: Immediate) -> Option<u8> {
780        let (a, b, c, d) = self.shuffle32_from_imm(imm)?;
781        // When selecting from the right-hand-side, subtract these all by 4
782        // which will bail out if anything is less than 4. Afterwards the check
783        // is the same as `pshufd_lhs_imm` above.
784        let a = a.checked_sub(4)?;
785        let b = b.checked_sub(4)?;
786        let c = c.checked_sub(4)?;
787        let d = d.checked_sub(4)?;
788        if a < 4 && b < 4 && c < 4 && d < 4 {
789            Some(a | (b << 2) | (c << 4) | (d << 6))
790        } else {
791            None
792        }
793    }
794
795    fn shufps_imm(&mut self, imm: Immediate) -> Option<u8> {
796        // The `shufps` instruction selects the first two elements from the
797        // first vector and the second two elements from the second vector, so
798        // offset the third/fourth selectors by 4 and then make sure everything
799        // fits in 32-bits.
800        let (a, b, c, d) = self.shuffle32_from_imm(imm)?;
801        let c = c.checked_sub(4)?;
802        let d = d.checked_sub(4)?;
803        if a < 4 && b < 4 && c < 4 && d < 4 {
804            Some(a | (b << 2) | (c << 4) | (d << 6))
805        } else {
806            None
807        }
808    }
809
810    fn shufps_rev_imm(&mut self, imm: Immediate) -> Option<u8> {
811        // This is almost the same as `shufps_imm` except the elements that are
812        // subtracted are reversed. This handles the case that `shufps`
813        // instruction can be emitted if the order of the operands are swapped.
814        let (a, b, c, d) = self.shuffle32_from_imm(imm)?;
815        let a = a.checked_sub(4)?;
816        let b = b.checked_sub(4)?;
817        if a < 4 && b < 4 && c < 4 && d < 4 {
818            Some(a | (b << 2) | (c << 4) | (d << 6))
819        } else {
820            None
821        }
822    }
823
824    fn pshuflw_lhs_imm(&mut self, imm: Immediate) -> Option<u8> {
825        // Similar to `shufps` except this operates over 16-bit values so four
826        // of them must be fixed and the other four must be in-range to encode
827        // in the immediate.
828        let (a, b, c, d, e, f, g, h) = self.shuffle16_from_imm(imm)?;
829        if a < 4 && b < 4 && c < 4 && d < 4 && [e, f, g, h] == [4, 5, 6, 7] {
830            Some(a | (b << 2) | (c << 4) | (d << 6))
831        } else {
832            None
833        }
834    }
835
836    fn pshuflw_rhs_imm(&mut self, imm: Immediate) -> Option<u8> {
837        let (a, b, c, d, e, f, g, h) = self.shuffle16_from_imm(imm)?;
838        let a = a.checked_sub(8)?;
839        let b = b.checked_sub(8)?;
840        let c = c.checked_sub(8)?;
841        let d = d.checked_sub(8)?;
842        let e = e.checked_sub(8)?;
843        let f = f.checked_sub(8)?;
844        let g = g.checked_sub(8)?;
845        let h = h.checked_sub(8)?;
846        if a < 4 && b < 4 && c < 4 && d < 4 && [e, f, g, h] == [4, 5, 6, 7] {
847            Some(a | (b << 2) | (c << 4) | (d << 6))
848        } else {
849            None
850        }
851    }
852
853    fn pshufhw_lhs_imm(&mut self, imm: Immediate) -> Option<u8> {
854        // Similar to `pshuflw` except that the first four operands must be
855        // fixed and the second four are offset by an extra 4 and tested to
856        // make sure they're all in the range [4, 8).
857        let (a, b, c, d, e, f, g, h) = self.shuffle16_from_imm(imm)?;
858        let e = e.checked_sub(4)?;
859        let f = f.checked_sub(4)?;
860        let g = g.checked_sub(4)?;
861        let h = h.checked_sub(4)?;
862        if e < 4 && f < 4 && g < 4 && h < 4 && [a, b, c, d] == [0, 1, 2, 3] {
863            Some(e | (f << 2) | (g << 4) | (h << 6))
864        } else {
865            None
866        }
867    }
868
869    fn pshufhw_rhs_imm(&mut self, imm: Immediate) -> Option<u8> {
870        // Note that everything here is offset by at least 8 and the upper
871        // bits are offset by 12 to test they're in the range of [12, 16).
872        let (a, b, c, d, e, f, g, h) = self.shuffle16_from_imm(imm)?;
873        let a = a.checked_sub(8)?;
874        let b = b.checked_sub(8)?;
875        let c = c.checked_sub(8)?;
876        let d = d.checked_sub(8)?;
877        let e = e.checked_sub(12)?;
878        let f = f.checked_sub(12)?;
879        let g = g.checked_sub(12)?;
880        let h = h.checked_sub(12)?;
881        if e < 4 && f < 4 && g < 4 && h < 4 && [a, b, c, d] == [0, 1, 2, 3] {
882            Some(e | (f << 2) | (g << 4) | (h << 6))
883        } else {
884            None
885        }
886    }
887
888    fn palignr_imm_from_immediate(&mut self, imm: Immediate) -> Option<u8> {
889        let bytes = self.lower_ctx.get_immediate_data(imm).as_slice();
890
891        if bytes.windows(2).all(|a| a[0] + 1 == a[1]) {
892            Some(bytes[0])
893        } else {
894            None
895        }
896    }
897
898    fn pblendw_imm(&mut self, imm: Immediate) -> Option<u8> {
899        // First make sure that the shuffle immediate is selecting 16-bit lanes.
900        let (a, b, c, d, e, f, g, h) = self.shuffle16_from_imm(imm)?;
901
902        // Next build up an 8-bit mask from each of the bits of the selected
903        // lanes above. This instruction can only be used when each lane
904        // selector chooses from the corresponding lane in either of the two
905        // operands, meaning the Nth lane selection must satisfy `lane % 8 ==
906        // N`.
907        //
908        // This helper closure is used to calculate the value of the
909        // corresponding bit.
910        let bit = |x: u8, c: u8| {
911            if x % 8 == c {
912                if x < 8 {
913                    Some(0)
914                } else {
915                    Some(1 << c)
916                }
917            } else {
918                None
919            }
920        };
921        Some(
922            bit(a, 0)?
923                | bit(b, 1)?
924                | bit(c, 2)?
925                | bit(d, 3)?
926                | bit(e, 4)?
927                | bit(f, 5)?
928                | bit(g, 6)?
929                | bit(h, 7)?,
930        )
931    }
932
933    fn xmi_imm(&mut self, imm: u32) -> XmmMemImm {
934        XmmMemImm::unwrap_new(RegMemImm::imm(imm))
935    }
936
937    fn insert_i8x16_lane_hole(&mut self, hole_idx: u8) -> VCodeConstant {
938        let mask = -1i128 as u128;
939        self.emit_u128_le_const(mask ^ (0xff << (hole_idx * 8)))
940    }
941
942    fn writable_invalid_gpr(&mut self) -> WritableGpr {
943        let reg = Gpr::new(self.invalid_reg()).unwrap();
944        WritableGpr::from_reg(reg)
945    }
946
947    fn box_synthetic_amode(&mut self, amode: &SyntheticAmode) -> BoxSyntheticAmode {
948        Box::new(amode.clone())
949    }
950
951    ////////////////////////////////////////////////////////////////////////////
952    ///// External assembler methods.
953    ////////////////////////////////////////////////////////////////////////////
954
955    fn is_imm8(&mut self, src: &GprMemImm) -> Option<u8> {
956        match src.clone().to_reg_mem_imm() {
957            RegMemImm::Imm { simm32 } => Some(u8::try_from(simm32).ok()?),
958            _ => None,
959        }
960    }
961
962    fn is_simm8(&mut self, src: &GprMemImm) -> Option<i8> {
963        match src.clone().to_reg_mem_imm() {
964            RegMemImm::Imm { simm32 } => Some(i8::try_from(simm32).ok()?),
965            _ => None,
966        }
967    }
968
969    fn is_imm16(&mut self, src: &GprMemImm) -> Option<u16> {
970        match src.clone().to_reg_mem_imm() {
971            RegMemImm::Imm { simm32 } => Some(u16::try_from(simm32).ok()?),
972            _ => None,
973        }
974    }
975
976    fn is_simm16(&mut self, src: &GprMemImm) -> Option<i16> {
977        match src.clone().to_reg_mem_imm() {
978            RegMemImm::Imm { simm32 } => Some(i16::try_from(simm32).ok()?),
979            _ => None,
980        }
981    }
982
983    fn is_imm32(&mut self, src: &GprMemImm) -> Option<u32> {
984        match src.clone().to_reg_mem_imm() {
985            RegMemImm::Imm { simm32 } => Some(simm32),
986            _ => None,
987        }
988    }
989
990    fn is_simm32(&mut self, src: &GprMemImm) -> Option<i32> {
991        match src.clone().to_reg_mem_imm() {
992            RegMemImm::Imm { simm32 } => Some(simm32 as i32),
993            _ => None,
994        }
995    }
996
997    fn is_gpr(&mut self, src: &GprMemImm) -> Option<Gpr> {
998        match src.clone().to_reg_mem_imm() {
999            RegMemImm::Reg { reg } => Gpr::new(reg),
1000            _ => None,
1001        }
1002    }
1003
1004    fn is_xmm(&mut self, src: &XmmMem) -> Option<Xmm> {
1005        match src.clone().to_reg_mem() {
1006            RegMem::Reg { reg } => Xmm::new(reg),
1007            _ => None,
1008        }
1009    }
1010
1011    fn is_gpr_mem(&mut self, src: &GprMemImm) -> Option<GprMem> {
1012        match src.clone().to_reg_mem_imm() {
1013            RegMemImm::Reg { reg } => GprMem::new(RegMem::Reg { reg }),
1014            RegMemImm::Mem { addr } => GprMem::new(RegMem::Mem { addr }),
1015            _ => None,
1016        }
1017    }
1018
1019    fn is_xmm_mem(&mut self, src: &XmmMem) -> Option<XmmMem> {
1020        match src.clone().to_reg_mem() {
1021            RegMem::Reg { reg } => XmmMem::new(RegMem::Reg { reg }),
1022            RegMem::Mem { addr } => XmmMem::new(RegMem::Mem { addr }),
1023        }
1024    }
1025}
1026
1027impl IsleContext<'_, '_, MInst, X64Backend> {
1028    fn load_xmm_unaligned(&mut self, addr: SyntheticAmode) -> Xmm {
1029        let tmp = self.lower_ctx.alloc_tmp(types::F32X4).only_reg().unwrap();
1030        self.lower_ctx.emit(MInst::XmmUnaryRmRUnaligned {
1031            op: SseOpcode::Movdqu,
1032            src: XmmMem::unwrap_new(RegMem::mem(addr)),
1033            dst: Writable::from_reg(Xmm::unwrap_new(tmp.to_reg())),
1034        });
1035        Xmm::unwrap_new(tmp.to_reg())
1036    }
1037
1038    /// Helper used by code generated by the `cranelift-assembler-x64` crate.
1039    fn convert_gpr_to_assembler_read_write_gpr(&mut self, read: Gpr) -> asm::Gpr<PairedGpr> {
1040        let write = self.lower_ctx.alloc_tmp(types::I64).only_reg().unwrap();
1041        let write = WritableGpr::from_writable_reg(write).unwrap();
1042        asm::Gpr::new(PairedGpr { read, write })
1043    }
1044
1045    /// Helper used by code generated by the `cranelift-assembler-x64` crate.
1046    fn convert_xmm_to_assembler_read_write_xmm(&mut self, read: Xmm) -> asm::Xmm<PairedXmm> {
1047        let write = self.lower_ctx.alloc_tmp(types::F32X4).only_reg().unwrap();
1048        let write = WritableXmm::from_writable_reg(write).unwrap();
1049        asm::Xmm::new(PairedXmm { read, write })
1050    }
1051
1052    /// Helper used by code generated by the `cranelift-assembler-x64` crate.
1053    fn convert_gpr_mem_to_assembler_read_gpr_mem(&self, read: &GprMem) -> asm::GprMem<Gpr, Gpr> {
1054        match read.clone().into() {
1055            RegMem::Reg { reg } => asm::GprMem::Gpr(Gpr::new(reg).unwrap()),
1056            RegMem::Mem { addr } => asm::GprMem::Mem(addr.into()),
1057        }
1058    }
1059
1060    /// Helper used by code generated by the `cranelift-assembler-x64` crate.
1061    fn convert_xmm_mem_to_assembler_read_xmm_mem(&self, read: &XmmMem) -> asm::XmmMem<Xmm, Gpr> {
1062        match read.clone().into() {
1063            RegMem::Reg { reg } => asm::XmmMem::Xmm(Xmm::new(reg).unwrap()),
1064            RegMem::Mem { addr } => asm::XmmMem::Mem(addr.into()),
1065        }
1066    }
1067
1068    /// Helper used by code generated by the `cranelift-assembler-x64` crate.
1069    fn convert_gpr_mem_to_assembler_read_write_gpr_mem(
1070        &mut self,
1071        read: &GprMem,
1072    ) -> asm::GprMem<PairedGpr, Gpr> {
1073        match read.clone().into() {
1074            RegMem::Reg { reg } => asm::GprMem::Gpr(
1075                *self
1076                    .convert_gpr_to_assembler_read_write_gpr(Gpr::new(reg).unwrap())
1077                    .as_ref(),
1078            ),
1079            RegMem::Mem { addr } => asm::GprMem::Mem(addr.into()),
1080        }
1081    }
1082}
1083
1084// Since x64 doesn't have 8x16 shifts and we must use a 16x8 shift instead, we
1085// need to fix up the bits that migrate from one half of the lane to the
1086// other. Each 16-byte mask is indexed by the shift amount: e.g. if we shift
1087// right by 0 (no movement), we want to retain all the bits so we mask with
1088// `0xff`; if we shift right by 1, we want to retain all bits except the MSB so
1089// we mask with `0x7f`; etc.
1090
1091#[rustfmt::skip] // Preserve 16 bytes (i.e. one mask) per row.
1092const I8X16_ISHL_MASKS: [u8; 128] = [
1093    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1094    0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe,
1095    0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc,
1096    0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8,
1097    0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0,
1098    0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0,
1099    0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0,
1100    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
1101];
1102
1103#[rustfmt::skip] // Preserve 16 bytes (i.e. one mask) per row.
1104const I8X16_USHR_MASKS: [u8; 128] = [
1105    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1106    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
1107    0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f,
1108    0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f,
1109    0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
1110    0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
1111    0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
1112    0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
1113];
1114
1115#[inline]
1116fn to_simm32(constant: i64, ty: Type) -> Option<GprMemImm> {
1117    if ty.bits() <= 32 || constant == ((constant << 32) >> 32) {
1118        Some(GprMemImm::unwrap_new(RegMemImm::Imm {
1119            simm32: constant as u32,
1120        }))
1121    } else {
1122        None
1123    }
1124}