cranelift_codegen/isa/x64/
lower.rs

1//! Lowering rules for X64.
2
3// ISLE integration glue.
4pub(super) mod isle;
5
6use crate::ir::pcc::{FactContext, PccResult};
7use crate::ir::{
8    Endianness, ExternalName, Inst as IRInst, InstructionData, LibCall, Opcode, Type, types,
9};
10use crate::isa::x64::abi::*;
11use crate::isa::x64::inst::args::*;
12use crate::isa::x64::inst::*;
13use crate::isa::x64::pcc;
14use crate::isa::{CallConv, x64::X64Backend};
15use crate::machinst::lower::*;
16use crate::machinst::*;
17use crate::result::CodegenResult;
18use crate::settings::Flags;
19use std::boxed::Box;
20use target_lexicon::Triple;
21
22/// Identifier for a particular input of an instruction.
23#[derive(Clone, Copy, Debug, PartialEq, Eq)]
24struct InsnInput {
25    insn: IRInst,
26    input: usize,
27}
28
29//=============================================================================
30// Helpers for instruction lowering.
31
32impl Lower<'_, Inst> {
33    #[inline]
34    pub fn temp_writable_gpr(&mut self) -> WritableGpr {
35        WritableGpr::from_writable_reg(self.alloc_tmp(types::I64).only_reg().unwrap()).unwrap()
36    }
37
38    #[inline]
39    pub fn temp_writable_xmm(&mut self) -> WritableXmm {
40        WritableXmm::from_writable_reg(self.alloc_tmp(types::F64).only_reg().unwrap()).unwrap()
41    }
42}
43
44fn is_int_or_ref_ty(ty: Type) -> bool {
45    match ty {
46        types::I8 | types::I16 | types::I32 | types::I64 => true,
47        _ => false,
48    }
49}
50
51/// Returns whether the given specified `input` is a result produced by an instruction with Opcode
52/// `op`.
53// TODO investigate failures with checking against the result index.
54fn matches_input(ctx: &mut Lower<Inst>, input: InsnInput, op: Opcode) -> Option<IRInst> {
55    let inputs = ctx.get_input_as_source_or_const(input.insn, input.input);
56    inputs.inst.as_inst().and_then(|(src_inst, _)| {
57        let data = ctx.data(src_inst);
58        if data.opcode() == op {
59            return Some(src_inst);
60        }
61        None
62    })
63}
64
65/// Put the given input into possibly multiple registers, and mark it as used (side-effect).
66fn put_input_in_regs(ctx: &mut Lower<Inst>, spec: InsnInput) -> ValueRegs<Reg> {
67    let ty = ctx.input_ty(spec.insn, spec.input);
68    let input = ctx.get_input_as_source_or_const(spec.insn, spec.input);
69
70    if let Some(c) = input.constant {
71        // Generate constants fresh at each use to minimize long-range register pressure.
72        let size = if ty_bits(ty) < 64 {
73            OperandSize::Size32
74        } else {
75            OperandSize::Size64
76        };
77        assert!(is_int_or_ref_ty(ty)); // Only used for addresses.
78        let cst_copy = ctx.alloc_tmp(ty);
79        ctx.emit(Inst::imm(size, c, cst_copy.only_reg().unwrap()));
80        non_writable_value_regs(cst_copy)
81    } else {
82        ctx.put_input_in_regs(spec.insn, spec.input)
83    }
84}
85
86/// Put the given input into a register, and mark it as used (side-effect).
87fn put_input_in_reg(ctx: &mut Lower<Inst>, spec: InsnInput) -> Reg {
88    put_input_in_regs(ctx, spec)
89        .only_reg()
90        .expect("Multi-register value not expected")
91}
92
93enum MergeableLoadSize {
94    /// The load size performed by a sinkable load merging operation is
95    /// precisely the size necessary for the type in question.
96    Exact,
97
98    /// Narrower-than-32-bit values are handled by ALU insts that are at least
99    /// 32 bits wide, which is normally OK as we ignore upper buts; but, if we
100    /// generate, e.g., a direct-from-memory 32-bit add for a byte value and
101    /// the byte is the last byte in a page, the extra data that we load is
102    /// incorrectly accessed. So we only allow loads to merge for
103    /// 32-bit-and-above widths.
104    Min32,
105}
106
107/// Determines whether a load operation (indicated by `src_insn`) can be merged
108/// into the current lowering point. If so, returns the address-base source (as
109/// an `InsnInput`) and an offset from that address from which to perform the
110/// load.
111fn is_mergeable_load(
112    ctx: &mut Lower<Inst>,
113    src_insn: IRInst,
114    size: MergeableLoadSize,
115) -> Option<(InsnInput, i32)> {
116    let insn_data = ctx.data(src_insn);
117    let inputs = ctx.num_inputs(src_insn);
118    if inputs != 1 {
119        return None;
120    }
121
122    // If this type is too small to get a merged load, don't merge the load.
123    let load_ty = ctx.output_ty(src_insn, 0);
124    if ty_bits(load_ty) < 32 {
125        match size {
126            MergeableLoadSize::Exact => {}
127            MergeableLoadSize::Min32 => return None,
128        }
129    }
130
131    // If the load's flags specify big-endian, we can't merge.
132    if let Some(flags) = ctx.memflags(src_insn) {
133        if flags.explicit_endianness() == Some(Endianness::Big) {
134            return None;
135        }
136    }
137
138    // Just testing the opcode is enough, because the width will always match if
139    // the type does (and the type should match if the CLIF is properly
140    // constructed).
141    if let &InstructionData::Load {
142        opcode: Opcode::Load,
143        offset,
144        ..
145    } = insn_data
146    {
147        Some((
148            InsnInput {
149                insn: src_insn,
150                input: 0,
151            },
152            offset.into(),
153        ))
154    } else {
155        None
156    }
157}
158
159fn input_to_imm(ctx: &mut Lower<Inst>, spec: InsnInput) -> Option<u64> {
160    ctx.get_input_as_source_or_const(spec.insn, spec.input)
161        .constant
162}
163
164fn emit_vm_call(
165    ctx: &mut Lower<Inst>,
166    flags: &Flags,
167    triple: &Triple,
168    libcall: LibCall,
169    inputs: &[ValueRegs<Reg>],
170) -> CodegenResult<InstOutput> {
171    let extname = ExternalName::LibCall(libcall);
172
173    // TODO avoid recreating signatures for every single Libcall function.
174    let call_conv = CallConv::for_libcall(flags, CallConv::triple_default(triple));
175    let sig = libcall.signature(call_conv, types::I64);
176    let outputs = ctx.gen_call_output(&sig);
177
178    if !ctx.sigs().have_abi_sig_for_signature(&sig) {
179        ctx.sigs_mut()
180            .make_abi_sig_from_ir_signature::<X64ABIMachineSpec>(sig.clone(), flags)?;
181    }
182    let sig = ctx.sigs().abi_sig_for_signature(&sig);
183
184    let uses = ctx.gen_call_args(sig, inputs);
185    let defs = ctx.gen_call_rets(sig, &outputs);
186
187    let stack_ret_space = ctx.sigs()[sig].sized_stack_ret_space();
188    let stack_arg_space = ctx.sigs()[sig].sized_stack_arg_space();
189    ctx.abi_mut()
190        .accumulate_outgoing_args_size(stack_ret_space + stack_arg_space);
191
192    if flags.use_colocated_libcalls() {
193        let call_info = ctx.gen_call_info(sig, extname, uses, defs, None);
194        ctx.emit(Inst::call_known(Box::new(call_info)));
195    } else {
196        let tmp = ctx.alloc_tmp(types::I64).only_reg().unwrap();
197        ctx.emit(Inst::LoadExtName {
198            dst: tmp.map(Gpr::unwrap_new),
199            name: Box::new(extname),
200            offset: 0,
201            distance: RelocDistance::Far,
202        });
203        let call_info = ctx.gen_call_info(sig, RegMem::reg(tmp.to_reg()), uses, defs, None);
204        ctx.emit(Inst::call_unknown(Box::new(call_info)));
205    }
206    Ok(outputs)
207}
208
209/// Returns whether the given input is a shift by a constant value less or equal than 3.
210/// The goal is to embed it within an address mode.
211fn matches_small_constant_shift(ctx: &mut Lower<Inst>, spec: InsnInput) -> Option<(InsnInput, u8)> {
212    matches_input(ctx, spec, Opcode::Ishl).and_then(|shift| {
213        match input_to_imm(
214            ctx,
215            InsnInput {
216                insn: shift,
217                input: 1,
218            },
219        ) {
220            Some(shift_amt) if shift_amt <= 3 => Some((
221                InsnInput {
222                    insn: shift,
223                    input: 0,
224                },
225                shift_amt as u8,
226            )),
227            _ => None,
228        }
229    })
230}
231
232/// Lowers an instruction to one of the x86 addressing modes.
233///
234/// Note: the 32-bit offset in Cranelift has to be sign-extended, which maps x86's behavior.
235fn lower_to_amode(ctx: &mut Lower<Inst>, spec: InsnInput, offset: i32) -> Amode {
236    let flags = ctx
237        .memflags(spec.insn)
238        .expect("Instruction with amode should have memflags");
239
240    // We now either have an add that we must materialize, or some other input; as well as the
241    // final offset.
242    if let Some(add) = matches_input(ctx, spec, Opcode::Iadd) {
243        let output_ty = ctx.output_ty(add, 0);
244        debug_assert_eq!(
245            output_ty,
246            types::I64,
247            "Address width of 64 expected, got {output_ty}"
248        );
249        let add_inputs = &[
250            InsnInput {
251                insn: add,
252                input: 0,
253            },
254            InsnInput {
255                insn: add,
256                input: 1,
257            },
258        ];
259
260        // TODO heap_addr legalization generates a uext64 *after* the shift, so these optimizations
261        // aren't happening in the wasm case. We could do better, given some range analysis.
262        let (base, index, shift) = if let Some((shift_input, shift_amt)) =
263            matches_small_constant_shift(ctx, add_inputs[0])
264        {
265            (
266                put_input_in_reg(ctx, add_inputs[1]),
267                put_input_in_reg(ctx, shift_input),
268                shift_amt,
269            )
270        } else if let Some((shift_input, shift_amt)) =
271            matches_small_constant_shift(ctx, add_inputs[1])
272        {
273            (
274                put_input_in_reg(ctx, add_inputs[0]),
275                put_input_in_reg(ctx, shift_input),
276                shift_amt,
277            )
278        } else {
279            for input in 0..=1 {
280                // Try to pierce through uextend.
281                let (inst, inst_input) = if let Some(uextend) =
282                    matches_input(ctx, InsnInput { insn: add, input }, Opcode::Uextend)
283                {
284                    (uextend, 0)
285                } else {
286                    (add, input)
287                };
288
289                // If it's a constant, add it directly!
290                if let Some(cst) = ctx.get_input_as_source_or_const(inst, inst_input).constant {
291                    let final_offset = (offset as i64).wrapping_add(cst as i64);
292                    if let Ok(final_offset) = i32::try_from(final_offset) {
293                        let base = put_input_in_reg(ctx, add_inputs[1 - input]);
294                        return Amode::imm_reg(final_offset, base).with_flags(flags);
295                    }
296                }
297            }
298
299            (
300                put_input_in_reg(ctx, add_inputs[0]),
301                put_input_in_reg(ctx, add_inputs[1]),
302                0,
303            )
304        };
305
306        return Amode::imm_reg_reg_shift(
307            offset,
308            Gpr::unwrap_new(base),
309            Gpr::unwrap_new(index),
310            shift,
311        )
312        .with_flags(flags);
313    }
314
315    let input = put_input_in_reg(ctx, spec);
316    Amode::imm_reg(offset, input).with_flags(flags)
317}
318
319//=============================================================================
320// Lowering-backend trait implementation.
321
322impl LowerBackend for X64Backend {
323    type MInst = Inst;
324
325    fn lower(&self, ctx: &mut Lower<Inst>, ir_inst: IRInst) -> Option<InstOutput> {
326        isle::lower(ctx, self, ir_inst)
327    }
328
329    fn lower_branch(
330        &self,
331        ctx: &mut Lower<Inst>,
332        ir_inst: IRInst,
333        targets: &[MachLabel],
334    ) -> Option<()> {
335        isle::lower_branch(ctx, self, ir_inst, targets)
336    }
337
338    fn maybe_pinned_reg(&self) -> Option<Reg> {
339        Some(regs::pinned_reg())
340    }
341
342    fn check_fact(
343        &self,
344        ctx: &FactContext<'_>,
345        vcode: &mut VCode<Self::MInst>,
346        inst: InsnIndex,
347        state: &mut pcc::FactFlowState,
348    ) -> PccResult<()> {
349        pcc::check(ctx, vcode, inst, state)
350    }
351
352    type FactFlowState = pcc::FactFlowState;
353}
cranelift_codegen/isa/x64/lower.rs

cranelift_codegen/isa/x64/
lower.rs