cranelift_codegen/isa/x64/
lower.rs

1//! Lowering rules for X64.
2
3// ISLE integration glue.
4pub(super) mod isle;
5
6use crate::ir::pcc::{FactContext, PccResult};
7use crate::ir::{types, ExternalName, Inst as IRInst, InstructionData, LibCall, Opcode, Type};
8use crate::isa::x64::abi::*;
9use crate::isa::x64::inst::args::*;
10use crate::isa::x64::inst::*;
11use crate::isa::x64::pcc;
12use crate::isa::{x64::X64Backend, CallConv};
13use crate::machinst::lower::*;
14use crate::machinst::*;
15use crate::result::CodegenResult;
16use crate::settings::Flags;
17use smallvec::{smallvec, SmallVec};
18use target_lexicon::Triple;
19
20//=============================================================================
21// Helpers for instruction lowering.
22
23impl Lower<'_, Inst> {
24    #[inline]
25    pub fn temp_writable_gpr(&mut self) -> WritableGpr {
26        WritableGpr::from_writable_reg(self.alloc_tmp(types::I64).only_reg().unwrap()).unwrap()
27    }
28
29    #[inline]
30    pub fn temp_writable_xmm(&mut self) -> WritableXmm {
31        WritableXmm::from_writable_reg(self.alloc_tmp(types::F64).only_reg().unwrap()).unwrap()
32    }
33}
34
35fn is_int_or_ref_ty(ty: Type) -> bool {
36    match ty {
37        types::I8 | types::I16 | types::I32 | types::I64 => true,
38        _ => false,
39    }
40}
41
42/// Returns whether the given specified `input` is a result produced by an instruction with Opcode
43/// `op`.
44// TODO investigate failures with checking against the result index.
45fn matches_input(ctx: &mut Lower<Inst>, input: InsnInput, op: Opcode) -> Option<IRInst> {
46    let inputs = ctx.get_input_as_source_or_const(input.insn, input.input);
47    inputs.inst.as_inst().and_then(|(src_inst, _)| {
48        let data = ctx.data(src_inst);
49        if data.opcode() == op {
50            return Some(src_inst);
51        }
52        None
53    })
54}
55
56/// Put the given input into possibly multiple registers, and mark it as used (side-effect).
57fn put_input_in_regs(ctx: &mut Lower<Inst>, spec: InsnInput) -> ValueRegs<Reg> {
58    let ty = ctx.input_ty(spec.insn, spec.input);
59    let input = ctx.get_input_as_source_or_const(spec.insn, spec.input);
60
61    if let Some(c) = input.constant {
62        // Generate constants fresh at each use to minimize long-range register pressure.
63        let size = if ty_bits(ty) < 64 {
64            OperandSize::Size32
65        } else {
66            OperandSize::Size64
67        };
68        assert!(is_int_or_ref_ty(ty)); // Only used for addresses.
69        let cst_copy = ctx.alloc_tmp(ty);
70        ctx.emit(Inst::imm(size, c, cst_copy.only_reg().unwrap()));
71        non_writable_value_regs(cst_copy)
72    } else {
73        ctx.put_input_in_regs(spec.insn, spec.input)
74    }
75}
76
77/// Put the given input into a register, and mark it as used (side-effect).
78fn put_input_in_reg(ctx: &mut Lower<Inst>, spec: InsnInput) -> Reg {
79    put_input_in_regs(ctx, spec)
80        .only_reg()
81        .expect("Multi-register value not expected")
82}
83
84enum MergeableLoadSize {
85    /// The load size performed by a sinkable load merging operation is
86    /// precisely the size necessary for the type in question.
87    Exact,
88
89    /// Narrower-than-32-bit values are handled by ALU insts that are at least
90    /// 32 bits wide, which is normally OK as we ignore upper buts; but, if we
91    /// generate, e.g., a direct-from-memory 32-bit add for a byte value and
92    /// the byte is the last byte in a page, the extra data that we load is
93    /// incorrectly accessed. So we only allow loads to merge for
94    /// 32-bit-and-above widths.
95    Min32,
96}
97
98/// Determines whether a load operation (indicated by `src_insn`) can be merged
99/// into the current lowering point. If so, returns the address-base source (as
100/// an `InsnInput`) and an offset from that address from which to perform the
101/// load.
102fn is_mergeable_load(
103    ctx: &mut Lower<Inst>,
104    src_insn: IRInst,
105    size: MergeableLoadSize,
106) -> Option<(InsnInput, i32)> {
107    let insn_data = ctx.data(src_insn);
108    let inputs = ctx.num_inputs(src_insn);
109    if inputs != 1 {
110        return None;
111    }
112
113    // If this type is too small to get a merged load, don't merge the load.
114    let load_ty = ctx.output_ty(src_insn, 0);
115    if ty_bits(load_ty) < 32 {
116        match size {
117            MergeableLoadSize::Exact => {}
118            MergeableLoadSize::Min32 => return None,
119        }
120    }
121
122    // Just testing the opcode is enough, because the width will always match if
123    // the type does (and the type should match if the CLIF is properly
124    // constructed).
125    if let &InstructionData::Load {
126        opcode: Opcode::Load,
127        offset,
128        ..
129    } = insn_data
130    {
131        Some((
132            InsnInput {
133                insn: src_insn,
134                input: 0,
135            },
136            offset.into(),
137        ))
138    } else {
139        None
140    }
141}
142
143fn input_to_imm(ctx: &mut Lower<Inst>, spec: InsnInput) -> Option<u64> {
144    ctx.get_input_as_source_or_const(spec.insn, spec.input)
145        .constant
146}
147
148fn emit_vm_call(
149    ctx: &mut Lower<Inst>,
150    flags: &Flags,
151    triple: &Triple,
152    libcall: LibCall,
153    inputs: &[Reg],
154) -> CodegenResult<SmallVec<[Reg; 1]>> {
155    let extname = ExternalName::LibCall(libcall);
156
157    let dist = if flags.use_colocated_libcalls() {
158        RelocDistance::Near
159    } else {
160        RelocDistance::Far
161    };
162
163    // TODO avoid recreating signatures for every single Libcall function.
164    let call_conv = CallConv::for_libcall(flags, CallConv::triple_default(triple));
165    let sig = libcall.signature(call_conv, types::I64);
166    let caller_conv = ctx.abi().call_conv(ctx.sigs());
167
168    if !ctx.sigs().have_abi_sig_for_signature(&sig) {
169        ctx.sigs_mut()
170            .make_abi_sig_from_ir_signature::<X64ABIMachineSpec>(sig.clone(), flags)?;
171    }
172
173    let mut abi =
174        X64CallSite::from_libcall(ctx.sigs(), &sig, &extname, dist, caller_conv, flags.clone());
175
176    assert_eq!(inputs.len(), abi.num_args(ctx.sigs()));
177
178    for (i, input) in inputs.iter().enumerate() {
179        abi.gen_arg(ctx, i, ValueRegs::one(*input));
180    }
181
182    let mut retval_insts: SmallInstVec<_> = smallvec![];
183    let mut outputs: SmallVec<[_; 1]> = smallvec![];
184    for i in 0..ctx.sigs().num_rets(ctx.sigs().abi_sig_for_signature(&sig)) {
185        let (retval_inst, retval_regs) = abi.gen_retval(ctx, i);
186        retval_insts.extend(retval_inst.into_iter());
187        outputs.push(retval_regs.only_reg().unwrap());
188    }
189
190    abi.emit_call(ctx);
191
192    for inst in retval_insts {
193        ctx.emit(inst);
194    }
195
196    Ok(outputs)
197}
198
199/// Returns whether the given input is a shift by a constant value less or equal than 3.
200/// The goal is to embed it within an address mode.
201fn matches_small_constant_shift(ctx: &mut Lower<Inst>, spec: InsnInput) -> Option<(InsnInput, u8)> {
202    matches_input(ctx, spec, Opcode::Ishl).and_then(|shift| {
203        match input_to_imm(
204            ctx,
205            InsnInput {
206                insn: shift,
207                input: 1,
208            },
209        ) {
210            Some(shift_amt) if shift_amt <= 3 => Some((
211                InsnInput {
212                    insn: shift,
213                    input: 0,
214                },
215                shift_amt as u8,
216            )),
217            _ => None,
218        }
219    })
220}
221
222/// Lowers an instruction to one of the x86 addressing modes.
223///
224/// Note: the 32-bit offset in Cranelift has to be sign-extended, which maps x86's behavior.
225fn lower_to_amode(ctx: &mut Lower<Inst>, spec: InsnInput, offset: i32) -> Amode {
226    let flags = ctx
227        .memflags(spec.insn)
228        .expect("Instruction with amode should have memflags");
229
230    // We now either have an add that we must materialize, or some other input; as well as the
231    // final offset.
232    if let Some(add) = matches_input(ctx, spec, Opcode::Iadd) {
233        let output_ty = ctx.output_ty(add, 0);
234        debug_assert_eq!(
235            output_ty,
236            types::I64,
237            "Address width of 64 expected, got {output_ty}"
238        );
239        let add_inputs = &[
240            InsnInput {
241                insn: add,
242                input: 0,
243            },
244            InsnInput {
245                insn: add,
246                input: 1,
247            },
248        ];
249
250        // TODO heap_addr legalization generates a uext64 *after* the shift, so these optimizations
251        // aren't happening in the wasm case. We could do better, given some range analysis.
252        let (base, index, shift) = if let Some((shift_input, shift_amt)) =
253            matches_small_constant_shift(ctx, add_inputs[0])
254        {
255            (
256                put_input_in_reg(ctx, add_inputs[1]),
257                put_input_in_reg(ctx, shift_input),
258                shift_amt,
259            )
260        } else if let Some((shift_input, shift_amt)) =
261            matches_small_constant_shift(ctx, add_inputs[1])
262        {
263            (
264                put_input_in_reg(ctx, add_inputs[0]),
265                put_input_in_reg(ctx, shift_input),
266                shift_amt,
267            )
268        } else {
269            for input in 0..=1 {
270                // Try to pierce through uextend.
271                let (inst, inst_input) = if let Some(uextend) =
272                    matches_input(ctx, InsnInput { insn: add, input }, Opcode::Uextend)
273                {
274                    (uextend, 0)
275                } else {
276                    (add, input)
277                };
278
279                // If it's a constant, add it directly!
280                if let Some(cst) = ctx.get_input_as_source_or_const(inst, inst_input).constant {
281                    let final_offset = (offset as i64).wrapping_add(cst as i64);
282                    if let Ok(final_offset) = i32::try_from(final_offset) {
283                        let base = put_input_in_reg(ctx, add_inputs[1 - input]);
284                        return Amode::imm_reg(final_offset, base).with_flags(flags);
285                    }
286                }
287            }
288
289            (
290                put_input_in_reg(ctx, add_inputs[0]),
291                put_input_in_reg(ctx, add_inputs[1]),
292                0,
293            )
294        };
295
296        return Amode::imm_reg_reg_shift(
297            offset,
298            Gpr::unwrap_new(base),
299            Gpr::unwrap_new(index),
300            shift,
301        )
302        .with_flags(flags);
303    }
304
305    let input = put_input_in_reg(ctx, spec);
306    Amode::imm_reg(offset, input).with_flags(flags)
307}
308
309//=============================================================================
310// Lowering-backend trait implementation.
311
312impl LowerBackend for X64Backend {
313    type MInst = Inst;
314
315    fn lower(&self, ctx: &mut Lower<Inst>, ir_inst: IRInst) -> Option<InstOutput> {
316        isle::lower(ctx, self, ir_inst)
317    }
318
319    fn lower_branch(
320        &self,
321        ctx: &mut Lower<Inst>,
322        ir_inst: IRInst,
323        targets: &[MachLabel],
324    ) -> Option<()> {
325        isle::lower_branch(ctx, self, ir_inst, targets)
326    }
327
328    fn maybe_pinned_reg(&self) -> Option<Reg> {
329        Some(regs::pinned_reg())
330    }
331
332    fn check_fact(
333        &self,
334        ctx: &FactContext<'_>,
335        vcode: &mut VCode<Self::MInst>,
336        inst: InsnIndex,
337        state: &mut pcc::FactFlowState,
338    ) -> PccResult<()> {
339        pcc::check(ctx, vcode, inst, state)
340    }
341
342    type FactFlowState = pcc::FactFlowState;
343}