winch_codegen/
masm.rs

1use crate::abi::{self, align_to, scratch, LocalSlot};
2use crate::codegen::{CodeGenContext, Emission, FuncEnv};
3use crate::isa::{
4    reg::{writable, Reg, WritableReg},
5    CallingConvention,
6};
7use anyhow::Result;
8use cranelift_codegen::{
9    binemit::CodeOffset,
10    ir::{Endianness, LibCall, MemFlags, RelSourceLoc, SourceLoc, UserExternalNameRef},
11    Final, MachBufferFinalized, MachLabel,
12};
13use std::{fmt::Debug, ops::Range};
14use wasmtime_environ::PtrSize;
15
16pub(crate) use cranelift_codegen::ir::TrapCode;
17
18#[derive(Eq, PartialEq)]
19pub(crate) enum DivKind {
20    /// Signed division.
21    Signed,
22    /// Unsigned division.
23    Unsigned,
24}
25
26/// Remainder kind.
27#[derive(Copy, Clone)]
28pub(crate) enum RemKind {
29    /// Signed remainder.
30    Signed,
31    /// Unsigned remainder.
32    Unsigned,
33}
34
35impl RemKind {
36    pub fn is_signed(&self) -> bool {
37        matches!(self, Self::Signed)
38    }
39}
40
41/// Kinds of vector min operation supported by WebAssembly.
42pub(crate) enum V128MinKind {
43    /// 4 lanes of 32-bit floats.
44    F32x4,
45    /// 2 lanes of 64-bit floats.
46    F64x2,
47    /// 16 lanes of signed 8-bit integers.
48    I8x16S,
49    /// 16 lanes of unsigned 8-bit integers.
50    I8x16U,
51    /// 8 lanes of signed 16-bit integers.
52    I16x8S,
53    /// 8 lanes of unsigned 16-bit integers.
54    I16x8U,
55    /// 4 lanes of signed 32-bit integers.
56    I32x4S,
57    /// 4 lanes of unsigned 32-bit integers.
58    I32x4U,
59}
60
61impl V128MinKind {
62    /// The size of each lane.
63    pub(crate) fn lane_size(&self) -> OperandSize {
64        match self {
65            Self::F32x4 | Self::I32x4S | Self::I32x4U => OperandSize::S32,
66            Self::F64x2 => OperandSize::S64,
67            Self::I8x16S | Self::I8x16U => OperandSize::S8,
68            Self::I16x8S | Self::I16x8U => OperandSize::S16,
69        }
70    }
71}
72
73/// Kinds of vector max operation supported by WebAssembly.
74pub(crate) enum V128MaxKind {
75    /// 4 lanes of 32-bit floats.
76    F32x4,
77    /// 2 lanes of 64-bit floats.
78    F64x2,
79    /// 16 lanes of signed 8-bit integers.
80    I8x16S,
81    /// 16 lanes of unsigned 8-bit integers.
82    I8x16U,
83    /// 8 lanes of signed 16-bit integers.
84    I16x8S,
85    /// 8 lanes of unsigned 16-bit integers.
86    I16x8U,
87    /// 4 lanes of signed 32-bit integers.
88    I32x4S,
89    /// 4 lanes of unsigned 32-bit integers.
90    I32x4U,
91}
92
93impl V128MaxKind {
94    /// The size of each lane.
95    pub(crate) fn lane_size(&self) -> OperandSize {
96        match self {
97            Self::F32x4 | Self::I32x4S | Self::I32x4U => OperandSize::S32,
98            Self::F64x2 => OperandSize::S64,
99            Self::I8x16S | Self::I8x16U => OperandSize::S8,
100            Self::I16x8S | Self::I16x8U => OperandSize::S16,
101        }
102    }
103}
104
105#[derive(Eq, PartialEq)]
106pub(crate) enum MulWideKind {
107    Signed,
108    Unsigned,
109}
110
111/// Type of operation for a read-modify-write instruction.
112pub(crate) enum RmwOp {
113    Add,
114    Sub,
115    Xchg,
116    And,
117    Or,
118    Xor,
119}
120
121/// The direction to perform the memory move.
122#[derive(Debug, Clone, Eq, PartialEq)]
123pub(crate) enum MemMoveDirection {
124    /// From high memory addresses to low memory addresses.
125    /// Invariant: the source location is closer to the FP than the destination
126    /// location, which will be closer to the SP.
127    HighToLow,
128    /// From low memory addresses to high memory addresses.
129    /// Invariant: the source location is closer to the SP than the destination
130    /// location, which will be closer to the FP.
131    LowToHigh,
132}
133
134/// Classifies how to treat float-to-int conversions.
135#[derive(Debug, Copy, Clone, Eq, PartialEq)]
136pub(crate) enum TruncKind {
137    /// Saturating conversion. If the source value is greater than the maximum
138    /// value of the destination type, the result is clamped to the
139    /// destination maximum value.
140    Checked,
141    /// An exception is raised if the source value is greater than the maximum
142    /// value of the destination type.
143    Unchecked,
144}
145
146impl TruncKind {
147    /// Returns true if the truncation kind is checked.
148    pub(crate) fn is_checked(&self) -> bool {
149        *self == TruncKind::Checked
150    }
151
152    /// Returns `true` if the trunc kind is [`Unchecked`].
153    ///
154    /// [`Unchecked`]: TruncKind::Unchecked
155    #[must_use]
156    pub(crate) fn is_unchecked(&self) -> bool {
157        matches!(self, Self::Unchecked)
158    }
159}
160
161/// Representation of the stack pointer offset.
162#[derive(Copy, Clone, Eq, PartialEq, Debug, PartialOrd, Ord, Default)]
163pub struct SPOffset(u32);
164
165impl SPOffset {
166    pub fn from_u32(offs: u32) -> Self {
167        Self(offs)
168    }
169
170    pub fn as_u32(&self) -> u32 {
171        self.0
172    }
173}
174
175/// A stack slot.
176#[derive(Debug, Clone, Copy, Eq, PartialEq)]
177pub struct StackSlot {
178    /// The location of the slot, relative to the stack pointer.
179    pub offset: SPOffset,
180    /// The size of the slot, in bytes.
181    pub size: u32,
182}
183
184impl StackSlot {
185    pub fn new(offs: SPOffset, size: u32) -> Self {
186        Self { offset: offs, size }
187    }
188}
189
190/// Kinds of integer binary comparison in WebAssembly. The [`MacroAssembler`]
191/// implementation for each ISA is responsible for emitting the correct
192/// sequence of instructions when lowering to machine code.
193#[derive(Debug, Clone, Copy, Eq, PartialEq)]
194pub(crate) enum IntCmpKind {
195    /// Equal.
196    Eq,
197    /// Not equal.
198    Ne,
199    /// Signed less than.
200    LtS,
201    /// Unsigned less than.
202    LtU,
203    /// Signed greater than.
204    GtS,
205    /// Unsigned greater than.
206    GtU,
207    /// Signed less than or equal.
208    LeS,
209    /// Unsigned less than or equal.
210    LeU,
211    /// Signed greater than or equal.
212    GeS,
213    /// Unsigned greater than or equal.
214    GeU,
215}
216
217/// Kinds of float binary comparison in WebAssembly. The [`MacroAssembler`]
218/// implementation for each ISA is responsible for emitting the correct
219/// sequence of instructions when lowering code.
220#[derive(Debug)]
221pub(crate) enum FloatCmpKind {
222    /// Equal.
223    Eq,
224    /// Not equal.
225    Ne,
226    /// Less than.
227    Lt,
228    /// Greater than.
229    Gt,
230    /// Less than or equal.
231    Le,
232    /// Greater than or equal.
233    Ge,
234}
235
236/// Kinds of shifts in WebAssembly.The [`masm`] implementation for each ISA is
237/// responsible for emitting the correct sequence of instructions when
238/// lowering to machine code.
239#[derive(Debug, Clone, Copy, Eq, PartialEq)]
240pub(crate) enum ShiftKind {
241    /// Left shift.
242    Shl,
243    /// Signed right shift.
244    ShrS,
245    /// Unsigned right shift.
246    ShrU,
247    /// Left rotate.
248    Rotl,
249    /// Right rotate.
250    Rotr,
251}
252
253/// Kinds of extends in WebAssembly. Each MacroAssembler implementation
254/// is responsible for emitting the correct sequence of instructions when
255/// lowering to machine code.
256#[derive(Copy, Clone)]
257pub(crate) enum ExtendKind {
258    Signed(Extend<Signed>),
259    Unsigned(Extend<Zero>),
260}
261
262#[derive(Copy, Clone)]
263pub(crate) enum Signed {}
264#[derive(Copy, Clone)]
265pub(crate) enum Zero {}
266
267pub(crate) trait ExtendType {}
268
269impl ExtendType for Signed {}
270impl ExtendType for Zero {}
271
272#[derive(Copy, Clone)]
273pub(crate) enum Extend<T: ExtendType> {
274    /// 8 to 32 bit extend.
275    I32Extend8,
276    /// 16 to 32 bit extend.
277    I32Extend16,
278    /// 8 to 64 bit extend.
279    I64Extend8,
280    /// 16 to 64 bit extend.
281    I64Extend16,
282    /// 32 to 64 bit extend.
283    I64Extend32,
284
285    /// Variant to hold the kind of extend marker.
286    ///
287    /// This is `Signed` or `Zero`, that are empty enums, which means that this variant cannot be
288    /// constructed.
289    __Kind(T),
290}
291
292impl From<Extend<Zero>> for ExtendKind {
293    fn from(value: Extend<Zero>) -> Self {
294        ExtendKind::Unsigned(value)
295    }
296}
297
298impl<T: ExtendType> Extend<T> {
299    pub fn from_size(&self) -> OperandSize {
300        match self {
301            Extend::I32Extend8 | Extend::I64Extend8 => OperandSize::S8,
302            Extend::I32Extend16 | Extend::I64Extend16 => OperandSize::S16,
303            Extend::I64Extend32 => OperandSize::S32,
304            Extend::__Kind(_) => unreachable!(),
305        }
306    }
307
308    pub fn to_size(&self) -> OperandSize {
309        match self {
310            Extend::I32Extend8 | Extend::I32Extend16 => OperandSize::S32,
311            Extend::I64Extend8 | Extend::I64Extend16 | Extend::I64Extend32 => OperandSize::S64,
312            Extend::__Kind(_) => unreachable!(),
313        }
314    }
315
316    pub fn from_bits(&self) -> u8 {
317        self.from_size().num_bits()
318    }
319
320    pub fn to_bits(&self) -> u8 {
321        self.to_size().num_bits()
322    }
323}
324
325impl From<Extend<Signed>> for ExtendKind {
326    fn from(value: Extend<Signed>) -> Self {
327        ExtendKind::Signed(value)
328    }
329}
330
331impl ExtendKind {
332    pub fn signed(&self) -> bool {
333        match self {
334            Self::Signed(_) => true,
335            _ => false,
336        }
337    }
338
339    pub fn from_bits(&self) -> u8 {
340        match self {
341            Self::Signed(s) => s.from_bits(),
342            Self::Unsigned(u) => u.from_bits(),
343        }
344    }
345
346    pub fn to_bits(&self) -> u8 {
347        match self {
348            Self::Signed(s) => s.to_bits(),
349            Self::Unsigned(u) => u.to_bits(),
350        }
351    }
352}
353
354/// Kinds of vector load and extends in WebAssembly. Each MacroAssembler
355/// implementation is responsible for emitting the correct sequence of
356/// instructions when lowering to machine code.
357#[derive(Copy, Clone)]
358pub(crate) enum V128LoadExtendKind {
359    /// Sign extends eight 8 bit integers to eight 16 bit lanes.
360    E8x8S,
361    /// Zero extends eight 8 bit integers to eight 16 bit lanes.
362    E8x8U,
363    /// Sign extends four 16 bit integers to four 32 bit lanes.
364    E16x4S,
365    /// Zero extends four 16 bit integers to four 32 bit lanes.
366    E16x4U,
367    /// Sign extends two 32 bit integers to two 64 bit lanes.
368    E32x2S,
369    /// Zero extends two 32 bit integers to two 64 bit lanes.
370    E32x2U,
371}
372
373/// Kinds of splat loads supported by WebAssembly.
374pub(crate) enum SplatLoadKind {
375    /// 8 bits.
376    S8,
377    /// 16 bits.
378    S16,
379    /// 32 bits.
380    S32,
381    /// 64 bits.
382    S64,
383}
384
385/// Kinds of splat supported by WebAssembly.
386#[derive(Copy, Debug, Clone, Eq, PartialEq)]
387pub(crate) enum SplatKind {
388    /// 8 bit integer.
389    I8x16,
390    /// 16 bit integer.
391    I16x8,
392    /// 32 bit integer.
393    I32x4,
394    /// 64 bit integer.
395    I64x2,
396    /// 32 bit float.
397    F32x4,
398    /// 64 bit float.
399    F64x2,
400}
401
402impl SplatKind {
403    /// The lane size to use for different kinds of splats.
404    pub(crate) fn lane_size(&self) -> OperandSize {
405        match self {
406            SplatKind::I8x16 => OperandSize::S8,
407            SplatKind::I16x8 => OperandSize::S16,
408            SplatKind::I32x4 | SplatKind::F32x4 => OperandSize::S32,
409            SplatKind::I64x2 | SplatKind::F64x2 => OperandSize::S64,
410        }
411    }
412}
413
414/// Kinds of extract lane supported by WebAssembly.
415#[derive(Copy, Debug, Clone, Eq, PartialEq)]
416pub(crate) enum ExtractLaneKind {
417    /// 16 lanes of 8-bit integers sign extended to 32-bits.
418    I8x16S,
419    /// 16 lanes of 8-bit integers zero extended to 32-bits.
420    I8x16U,
421    /// 8 lanes of 16-bit integers sign extended to 32-bits.
422    I16x8S,
423    /// 8 lanes of 16-bit integers zero extended to 32-bits.
424    I16x8U,
425    /// 4 lanes of 32-bit integers.
426    I32x4,
427    /// 2 lanes of 64-bit integers.
428    I64x2,
429    /// 4 lanes of 32-bit floats.
430    F32x4,
431    /// 2 lanes of 64-bit floats.
432    F64x2,
433}
434
435impl ExtractLaneKind {
436    /// The lane size to use for different kinds of extract lane kinds.
437    pub(crate) fn lane_size(&self) -> OperandSize {
438        match self {
439            ExtractLaneKind::I8x16S | ExtractLaneKind::I8x16U => OperandSize::S8,
440            ExtractLaneKind::I16x8S | ExtractLaneKind::I16x8U => OperandSize::S16,
441            ExtractLaneKind::I32x4 | ExtractLaneKind::F32x4 => OperandSize::S32,
442            ExtractLaneKind::I64x2 | ExtractLaneKind::F64x2 => OperandSize::S64,
443        }
444    }
445}
446
447impl From<ExtractLaneKind> for Extend<Signed> {
448    fn from(value: ExtractLaneKind) -> Self {
449        match value {
450            ExtractLaneKind::I8x16S => Extend::I32Extend8,
451            ExtractLaneKind::I16x8S => Extend::I32Extend16,
452            _ => unimplemented!(),
453        }
454    }
455}
456
457/// Kinds of replace lane supported by WebAssembly.
458pub(crate) enum ReplaceLaneKind {
459    /// 16 lanes of 8 bit integers.
460    I8x16,
461    /// 8 lanes of 16 bit integers.
462    I16x8,
463    /// 4 lanes of 32 bit integers.
464    I32x4,
465    /// 2 lanes of 64 bit integers.
466    I64x2,
467    /// 4 lanes of 32 bit floats.
468    F32x4,
469    /// 2 lanes of 64 bit floats.
470    F64x2,
471}
472
473impl ReplaceLaneKind {
474    /// The lane size to use for different kinds of replace lane kinds.
475    pub(crate) fn lane_size(&self) -> OperandSize {
476        match self {
477            ReplaceLaneKind::I8x16 => OperandSize::S8,
478            ReplaceLaneKind::I16x8 => OperandSize::S16,
479            ReplaceLaneKind::I32x4 => OperandSize::S32,
480            ReplaceLaneKind::I64x2 => OperandSize::S64,
481            ReplaceLaneKind::F32x4 => OperandSize::S32,
482            ReplaceLaneKind::F64x2 => OperandSize::S64,
483        }
484    }
485}
486
487/// Kinds of behavior supported by Wasm loads.
488pub(crate) enum LoadKind {
489    /// Load the entire bytes of the operand size without any modifications.
490    Operand(OperandSize),
491    /// Atomic load, with optional scalar extend.
492    Atomic(OperandSize, Option<ExtendKind>),
493    /// Duplicate value into vector lanes.
494    Splat(SplatLoadKind),
495    /// Scalar (non-vector) extend.
496    ScalarExtend(ExtendKind),
497    /// Vector extend.
498    VectorExtend(V128LoadExtendKind),
499    /// Load content into select lane.
500    VectorLane(LaneSelector),
501    /// Load a single element into the lowest bits of a vector and initialize
502    /// all other bits to zero.
503    VectorZero(OperandSize),
504}
505
506impl LoadKind {
507    /// Returns the [`OperandSize`] used in the load operation.
508    pub(crate) fn derive_operand_size(&self) -> OperandSize {
509        match self {
510            Self::ScalarExtend(extend) | Self::Atomic(_, Some(extend)) => {
511                Self::operand_size_for_scalar(extend)
512            }
513            Self::VectorExtend(_) => OperandSize::S64,
514            Self::Splat(kind) => Self::operand_size_for_splat(kind),
515            Self::Operand(size)
516            | Self::Atomic(size, None)
517            | Self::VectorLane(LaneSelector { size, .. })
518            | Self::VectorZero(size) => *size,
519        }
520    }
521
522    pub fn vector_lane(lane: u8, size: OperandSize) -> Self {
523        Self::VectorLane(LaneSelector { lane, size })
524    }
525
526    fn operand_size_for_scalar(extend_kind: &ExtendKind) -> OperandSize {
527        match extend_kind {
528            ExtendKind::Signed(s) => s.from_size(),
529            ExtendKind::Unsigned(u) => u.from_size(),
530        }
531    }
532
533    fn operand_size_for_splat(kind: &SplatLoadKind) -> OperandSize {
534        match kind {
535            SplatLoadKind::S8 => OperandSize::S8,
536            SplatLoadKind::S16 => OperandSize::S16,
537            SplatLoadKind::S32 => OperandSize::S32,
538            SplatLoadKind::S64 => OperandSize::S64,
539        }
540    }
541
542    pub(crate) fn is_atomic(&self) -> bool {
543        matches!(self, Self::Atomic(_, _))
544    }
545}
546
547/// Kinds of behavior supported by Wasm loads.
548#[derive(Copy, Clone)]
549pub enum StoreKind {
550    /// Store the entire bytes of the operand size without any modifications.
551    Operand(OperandSize),
552    /// Store the entire bytes of the operand size without any modifications, atomically.
553    Atomic(OperandSize),
554    /// Store the content of selected lane.
555    VectorLane(LaneSelector),
556}
557
558impl StoreKind {
559    pub fn vector_lane(lane: u8, size: OperandSize) -> Self {
560        Self::VectorLane(LaneSelector { lane, size })
561    }
562}
563
564#[derive(Copy, Clone)]
565pub struct LaneSelector {
566    pub lane: u8,
567    pub size: OperandSize,
568}
569
570/// Types of vector integer to float conversions supported by WebAssembly.
571pub(crate) enum V128ConvertKind {
572    /// 4 lanes of signed 32-bit integers to 4 lanes of 32-bit floats.
573    I32x4S,
574    /// 4 lanes of unsigned 32-bit integers to 4 lanes of 32-bit floats.
575    I32x4U,
576    /// 4 lanes of signed 32-bit integers to low bits of 2 lanes of 64-bit
577    /// floats.
578    I32x4LowS,
579    /// 4 lanes of unsigned 32-bit integers to low bits of 2 lanes of 64-bit
580    /// floats.
581    I32x4LowU,
582}
583
584impl V128ConvertKind {
585    pub(crate) fn src_lane_size(&self) -> OperandSize {
586        match self {
587            V128ConvertKind::I32x4S
588            | V128ConvertKind::I32x4U
589            | V128ConvertKind::I32x4LowS
590            | V128ConvertKind::I32x4LowU => OperandSize::S32,
591        }
592    }
593
594    pub(crate) fn dst_lane_size(&self) -> OperandSize {
595        match self {
596            V128ConvertKind::I32x4S | V128ConvertKind::I32x4U => OperandSize::S32,
597            V128ConvertKind::I32x4LowS | V128ConvertKind::I32x4LowU => OperandSize::S64,
598        }
599    }
600}
601
602/// Kinds of vector narrowing operations supported by WebAssembly.
603pub(crate) enum V128NarrowKind {
604    /// Narrow 8 lanes of 16-bit integers to 16 lanes of 8-bit integers using
605    /// signed saturation.
606    I16x8S,
607    /// Narrow 8 lanes of 16-bit integers to 16 lanes of 8-bit integers using
608    /// unsigned saturation.
609    I16x8U,
610    /// Narrow 4 lanes of 32-bit integers to 8 lanes of 16-bit integers using
611    /// signed saturation.
612    I32x4S,
613    /// Narrow 4 lanes of 32-bit integers to 8 lanes of 16-bit integers using
614    /// unsigned saturation.
615    I32x4U,
616}
617
618impl V128NarrowKind {
619    /// Return the size of the destination lanes.
620    pub(crate) fn dst_lane_size(&self) -> OperandSize {
621        match self {
622            Self::I16x8S | Self::I16x8U => OperandSize::S8,
623            Self::I32x4S | Self::I32x4U => OperandSize::S16,
624        }
625    }
626}
627
628/// Kinds of vector extending operations supported by WebAssembly.
629#[derive(Debug, Copy, Clone)]
630pub(crate) enum V128ExtendKind {
631    /// Low half of i8x16 sign extended.
632    LowI8x16S,
633    /// High half of i8x16 sign extended.
634    HighI8x16S,
635    /// Low half of i8x16 zero extended.
636    LowI8x16U,
637    /// High half of i8x16 zero extended.
638    HighI8x16U,
639    /// Low half of i16x8 sign extended.
640    LowI16x8S,
641    /// High half of i16x8 sign extended.
642    HighI16x8S,
643    /// Low half of i16x8 zero extended.
644    LowI16x8U,
645    /// High half of i16x8 zero extended.
646    HighI16x8U,
647    /// Low half of i32x4 sign extended.
648    LowI32x4S,
649    /// High half of i32x4 sign extended.
650    HighI32x4S,
651    /// Low half of i32x4 zero extended.
652    LowI32x4U,
653    /// High half of i32x4 zero extended.
654    HighI32x4U,
655}
656
657impl V128ExtendKind {
658    /// The size of the source's lanes.
659    pub(crate) fn src_lane_size(&self) -> OperandSize {
660        match self {
661            Self::LowI8x16S | Self::LowI8x16U | Self::HighI8x16S | Self::HighI8x16U => {
662                OperandSize::S8
663            }
664            Self::LowI16x8S | Self::LowI16x8U | Self::HighI16x8S | Self::HighI16x8U => {
665                OperandSize::S16
666            }
667            Self::LowI32x4S | Self::LowI32x4U | Self::HighI32x4S | Self::HighI32x4U => {
668                OperandSize::S32
669            }
670        }
671    }
672}
673
674/// Kinds of vector equalities and non-equalities supported by WebAssembly.
675pub(crate) enum VectorEqualityKind {
676    /// 16 lanes of 8 bit integers.
677    I8x16,
678    /// 8 lanes of 16 bit integers.
679    I16x8,
680    /// 4 lanes of 32 bit integers.
681    I32x4,
682    /// 2 lanes of 64 bit integers.
683    I64x2,
684    /// 4 lanes of 32 bit floats.
685    F32x4,
686    /// 2 lanes of 64 bit floats.
687    F64x2,
688}
689
690impl VectorEqualityKind {
691    /// Get the lane size to use.
692    pub(crate) fn lane_size(&self) -> OperandSize {
693        match self {
694            Self::I8x16 => OperandSize::S8,
695            Self::I16x8 => OperandSize::S16,
696            Self::I32x4 | Self::F32x4 => OperandSize::S32,
697            Self::I64x2 | Self::F64x2 => OperandSize::S64,
698        }
699    }
700}
701
702/// Kinds of vector comparisons supported by WebAssembly.
703pub(crate) enum VectorCompareKind {
704    /// 16 lanes of signed 8 bit integers.
705    I8x16S,
706    /// 16 lanes of unsigned 8 bit integers.
707    I8x16U,
708    /// 8 lanes of signed 16 bit integers.
709    I16x8S,
710    /// 8 lanes of unsigned 16 bit integers.
711    I16x8U,
712    /// 4 lanes of signed 32 bit integers.
713    I32x4S,
714    /// 4 lanes of unsigned 32 bit integers.
715    I32x4U,
716    /// 2 lanes of signed 64 bit integers.
717    I64x2S,
718    /// 4 lanes of 32 bit floats.
719    F32x4,
720    /// 2 lanes of 64 bit floats.
721    F64x2,
722}
723
724impl VectorCompareKind {
725    /// Get the lane size to use.
726    pub(crate) fn lane_size(&self) -> OperandSize {
727        match self {
728            Self::I8x16S | Self::I8x16U => OperandSize::S8,
729            Self::I16x8S | Self::I16x8U => OperandSize::S16,
730            Self::I32x4S | Self::I32x4U | Self::F32x4 => OperandSize::S32,
731            Self::I64x2S | Self::F64x2 => OperandSize::S64,
732        }
733    }
734}
735
736/// Kinds of vector absolute operations supported by WebAssembly.
737#[derive(Copy, Debug, Clone, Eq, PartialEq)]
738pub(crate) enum V128AbsKind {
739    /// 8 bit integers.
740    I8x16,
741    /// 16 bit integers.
742    I16x8,
743    /// 32 bit integers.
744    I32x4,
745    /// 64 bit integers.
746    I64x2,
747    /// 32 bit floats.
748    F32x4,
749    /// 64 bit floats.
750    F64x2,
751}
752
753impl V128AbsKind {
754    /// The lane size to use.
755    pub(crate) fn lane_size(&self) -> OperandSize {
756        match self {
757            Self::I8x16 => OperandSize::S8,
758            Self::I16x8 => OperandSize::S16,
759            Self::I32x4 | Self::F32x4 => OperandSize::S32,
760            Self::I64x2 | Self::F64x2 => OperandSize::S64,
761        }
762    }
763}
764
765/// Kinds of truncation for vectors supported by WebAssembly.
766pub(crate) enum V128TruncKind {
767    /// Truncates 4 lanes of 32-bit floats to nearest integral value.
768    F32x4,
769    /// Truncates 2 lanes of 64-bit floats to nearest integral value.
770    F64x2,
771    /// Integers from signed F32x4.
772    I32x4FromF32x4S,
773    /// Integers from unsigned F32x4.
774    I32x4FromF32x4U,
775    /// Integers from signed F64x2.
776    I32x4FromF64x2SZero,
777    /// Integers from unsigned F64x2.
778    I32x4FromF64x2UZero,
779}
780
781impl V128TruncKind {
782    /// The size of the source lanes.
783    pub(crate) fn src_lane_size(&self) -> OperandSize {
784        match self {
785            V128TruncKind::F32x4
786            | V128TruncKind::I32x4FromF32x4S
787            | V128TruncKind::I32x4FromF32x4U => OperandSize::S32,
788            V128TruncKind::F64x2
789            | V128TruncKind::I32x4FromF64x2SZero
790            | V128TruncKind::I32x4FromF64x2UZero => OperandSize::S64,
791        }
792    }
793
794    /// The size of the destination lanes.
795    pub(crate) fn dst_lane_size(&self) -> OperandSize {
796        if let V128TruncKind::F64x2 = self {
797            OperandSize::S64
798        } else {
799            OperandSize::S32
800        }
801    }
802}
803
804/// Kinds of vector addition supported by WebAssembly.
805pub(crate) enum V128AddKind {
806    /// 4 lanes of 32-bit floats wrapping.
807    F32x4,
808    /// 2 lanes of 64-bit floats wrapping.
809    F64x2,
810    /// 16 lanes of 8-bit integers wrapping.
811    I8x16,
812    /// 16 lanes of 8-bit integers signed saturating.
813    I8x16SatS,
814    /// 16 lanes of 8-bit integers unsigned saturating.
815    I8x16SatU,
816    /// 8 lanes of 16-bit integers wrapping.
817    I16x8,
818    /// 8 lanes of 16-bit integers signed saturating.
819    I16x8SatS,
820    /// 8 lanes of 16-bit integers unsigned saturating.
821    I16x8SatU,
822    /// 4 lanes of 32-bit integers wrapping.
823    I32x4,
824    /// 2 lanes of 64-bit integers wrapping.
825    I64x2,
826}
827
828/// Kinds of vector subtraction supported by WebAssembly.
829pub(crate) enum V128SubKind {
830    /// 4 lanes of 32-bit floats wrapping.
831    F32x4,
832    /// 2 lanes of 64-bit floats wrapping.
833    F64x2,
834    /// 16 lanes of 8-bit integers wrapping.
835    I8x16,
836    /// 16 lanes of 8-bit integers signed saturating.
837    I8x16SatS,
838    /// 16 lanes of 8-bit integers unsigned saturating.
839    I8x16SatU,
840    /// 8 lanes of 16-bit integers wrapping.
841    I16x8,
842    /// 8 lanes of 16-bit integers signed saturating.
843    I16x8SatS,
844    /// 8 lanes of 16-bit integers unsigned saturating.
845    I16x8SatU,
846    /// 4 lanes of 32-bit integers wrapping.
847    I32x4,
848    /// 2 lanes of 64-bit integers wrapping.
849    I64x2,
850}
851
852impl From<V128NegKind> for V128SubKind {
853    fn from(value: V128NegKind) -> Self {
854        match value {
855            V128NegKind::I8x16 => Self::I8x16,
856            V128NegKind::I16x8 => Self::I16x8,
857            V128NegKind::I32x4 => Self::I32x4,
858            V128NegKind::I64x2 => Self::I64x2,
859            V128NegKind::F32x4 | V128NegKind::F64x2 => unimplemented!(),
860        }
861    }
862}
863
864/// Kinds of vector multiplication supported by WebAssembly.
865pub(crate) enum V128MulKind {
866    /// 4 lanes of 32-bit floats.
867    F32x4,
868    /// 2 lanes of 64-bit floats.
869    F64x2,
870    /// 8 lanes of 16-bit integers.
871    I16x8,
872    /// 4 lanes of 32-bit integers.
873    I32x4,
874    /// 2 lanes of 64-bit integers.
875    I64x2,
876}
877
878/// Kinds of vector negation supported by WebAssembly.
879pub(crate) enum V128NegKind {
880    /// 4 lanes of 32-bit floats.
881    F32x4,
882    /// 2 lanes of 64-bit floats.
883    F64x2,
884    /// 16 lanes of 8-bit integers.
885    I8x16,
886    /// 8 lanes of 16-bit integers.
887    I16x8,
888    /// 4 lanes of 32-bit integers.
889    I32x4,
890    /// 2 lanes of 64-bit integers.
891    I64x2,
892}
893
894impl V128NegKind {
895    /// The size of the lanes.
896    pub(crate) fn lane_size(&self) -> OperandSize {
897        match self {
898            Self::F32x4 | Self::I32x4 => OperandSize::S32,
899            Self::F64x2 | Self::I64x2 => OperandSize::S64,
900            Self::I8x16 => OperandSize::S8,
901            Self::I16x8 => OperandSize::S16,
902        }
903    }
904}
905
906/// Kinds of extended pairwise addition supported by WebAssembly.
907pub(crate) enum V128ExtAddKind {
908    /// 16 lanes of signed 8-bit integers.
909    I8x16S,
910    /// 16 lanes of unsigned 8-bit integers.
911    I8x16U,
912    /// 8 lanes of signed 16-bit integers.
913    I16x8S,
914    /// 8 lanes of unsigned 16-bit integers.
915    I16x8U,
916}
917
918/// Kinds of vector extended multiplication supported by WebAssembly.
919#[derive(Debug, Clone, Copy)]
920pub(crate) enum V128ExtMulKind {
921    LowI8x16S,
922    HighI8x16S,
923    LowI8x16U,
924    HighI8x16U,
925    LowI16x8S,
926    HighI16x8S,
927    LowI16x8U,
928    HighI16x8U,
929    LowI32x4S,
930    HighI32x4S,
931    LowI32x4U,
932    HighI32x4U,
933}
934
935impl From<V128ExtMulKind> for V128ExtendKind {
936    fn from(value: V128ExtMulKind) -> Self {
937        match value {
938            V128ExtMulKind::LowI8x16S => Self::LowI8x16S,
939            V128ExtMulKind::HighI8x16S => Self::HighI8x16S,
940            V128ExtMulKind::LowI8x16U => Self::LowI8x16U,
941            V128ExtMulKind::HighI8x16U => Self::HighI8x16U,
942            V128ExtMulKind::LowI16x8S => Self::LowI16x8S,
943            V128ExtMulKind::HighI16x8S => Self::HighI16x8S,
944            V128ExtMulKind::LowI16x8U => Self::LowI16x8U,
945            V128ExtMulKind::HighI16x8U => Self::HighI16x8U,
946            V128ExtMulKind::LowI32x4S => Self::LowI32x4S,
947            V128ExtMulKind::HighI32x4S => Self::HighI32x4S,
948            V128ExtMulKind::LowI32x4U => Self::LowI32x4U,
949            V128ExtMulKind::HighI32x4U => Self::HighI32x4U,
950        }
951    }
952}
953
954impl From<V128ExtMulKind> for V128MulKind {
955    fn from(value: V128ExtMulKind) -> Self {
956        match value {
957            V128ExtMulKind::LowI8x16S
958            | V128ExtMulKind::HighI8x16S
959            | V128ExtMulKind::LowI8x16U
960            | V128ExtMulKind::HighI8x16U => Self::I16x8,
961            V128ExtMulKind::LowI16x8S
962            | V128ExtMulKind::HighI16x8S
963            | V128ExtMulKind::LowI16x8U
964            | V128ExtMulKind::HighI16x8U => Self::I32x4,
965            V128ExtMulKind::LowI32x4S
966            | V128ExtMulKind::HighI32x4S
967            | V128ExtMulKind::LowI32x4U
968            | V128ExtMulKind::HighI32x4U => Self::I64x2,
969        }
970    }
971}
972
973/// Operand size, in bits.
974#[derive(Copy, Debug, Clone, Eq, PartialEq)]
975pub(crate) enum OperandSize {
976    /// 8 bits.
977    S8,
978    /// 16 bits.
979    S16,
980    /// 32 bits.
981    S32,
982    /// 64 bits.
983    S64,
984    /// 128 bits.
985    S128,
986}
987
988impl OperandSize {
989    /// The number of bits in the operand.
990    pub fn num_bits(&self) -> u8 {
991        match self {
992            OperandSize::S8 => 8,
993            OperandSize::S16 => 16,
994            OperandSize::S32 => 32,
995            OperandSize::S64 => 64,
996            OperandSize::S128 => 128,
997        }
998    }
999
1000    /// The number of bytes in the operand.
1001    pub fn bytes(&self) -> u32 {
1002        match self {
1003            Self::S8 => 1,
1004            Self::S16 => 2,
1005            Self::S32 => 4,
1006            Self::S64 => 8,
1007            Self::S128 => 16,
1008        }
1009    }
1010
1011    /// The binary logarithm of the number of bits in the operand.
1012    pub fn log2(&self) -> u8 {
1013        match self {
1014            OperandSize::S8 => 3,
1015            OperandSize::S16 => 4,
1016            OperandSize::S32 => 5,
1017            OperandSize::S64 => 6,
1018            OperandSize::S128 => 7,
1019        }
1020    }
1021
1022    /// Create an [`OperandSize`]  from the given number of bytes.
1023    pub fn from_bytes(bytes: u8) -> Self {
1024        use OperandSize::*;
1025        match bytes {
1026            4 => S32,
1027            8 => S64,
1028            16 => S128,
1029            _ => panic!("Invalid bytes {bytes} for OperandSize"),
1030        }
1031    }
1032
1033    pub fn extend_to<T: ExtendType>(&self, to: Self) -> Option<Extend<T>> {
1034        match to {
1035            OperandSize::S32 => match self {
1036                OperandSize::S8 => Some(Extend::I32Extend8),
1037                OperandSize::S16 => Some(Extend::I32Extend16),
1038                _ => None,
1039            },
1040            OperandSize::S64 => match self {
1041                OperandSize::S8 => Some(Extend::I64Extend8),
1042                OperandSize::S16 => Some(Extend::I64Extend16),
1043                OperandSize::S32 => Some(Extend::I64Extend32),
1044                _ => None,
1045            },
1046            _ => None,
1047        }
1048    }
1049
1050    /// The number of bits in the mantissa.
1051    ///
1052    /// Only implemented for floats.
1053    pub fn mantissa_bits(&self) -> u8 {
1054        match self {
1055            Self::S32 => 8,
1056            Self::S64 => 11,
1057            _ => unimplemented!(),
1058        }
1059    }
1060}
1061
1062/// An abstraction over a register or immediate.
1063#[derive(Copy, Clone, Debug, PartialEq, Eq)]
1064pub(crate) enum RegImm {
1065    /// A register.
1066    Reg(Reg),
1067    /// A tagged immediate argument.
1068    Imm(Imm),
1069}
1070
1071/// An tagged representation of an immediate.
1072#[derive(Copy, Clone, Debug, PartialEq, Eq)]
1073pub(crate) enum Imm {
1074    /// I32 immediate.
1075    I32(u32),
1076    /// I64 immediate.
1077    I64(u64),
1078    /// F32 immediate.
1079    F32(u32),
1080    /// F64 immediate.
1081    F64(u64),
1082    /// V128 immediate.
1083    V128(i128),
1084}
1085
1086impl Imm {
1087    /// Create a new I64 immediate.
1088    pub fn i64(val: i64) -> Self {
1089        Self::I64(val as u64)
1090    }
1091
1092    /// Create a new I32 immediate.
1093    pub fn i32(val: i32) -> Self {
1094        Self::I32(val as u32)
1095    }
1096
1097    /// Create a new F32 immediate.
1098    pub fn f32(bits: u32) -> Self {
1099        Self::F32(bits)
1100    }
1101
1102    /// Create a new F64 immediate.
1103    pub fn f64(bits: u64) -> Self {
1104        Self::F64(bits)
1105    }
1106
1107    /// Create a new V128 immediate.
1108    pub fn v128(bits: i128) -> Self {
1109        Self::V128(bits)
1110    }
1111
1112    /// Convert the immediate to i32, if possible.
1113    pub fn to_i32(&self) -> Option<i32> {
1114        match self {
1115            Self::I32(v) => Some(*v as i32),
1116            Self::I64(v) => i32::try_from(*v as i64).ok(),
1117            _ => None,
1118        }
1119    }
1120
1121    /// Returns true if the [`Imm`] is float.
1122    pub fn is_float(&self) -> bool {
1123        match self {
1124            Self::F32(_) | Self::F64(_) => true,
1125            _ => false,
1126        }
1127    }
1128
1129    /// Get the operand size of the immediate.
1130    pub fn size(&self) -> OperandSize {
1131        match self {
1132            Self::I32(_) | Self::F32(_) => OperandSize::S32,
1133            Self::I64(_) | Self::F64(_) => OperandSize::S64,
1134            Self::V128(_) => OperandSize::S128,
1135        }
1136    }
1137
1138    /// Get a little endian representation of the immediate.
1139    ///
1140    /// This method heap allocates and is intended to be used when adding
1141    /// values to the constant pool.
1142    pub fn to_bytes(&self) -> Vec<u8> {
1143        match self {
1144            Imm::I32(n) => n.to_le_bytes().to_vec(),
1145            Imm::I64(n) => n.to_le_bytes().to_vec(),
1146            Imm::F32(n) => n.to_le_bytes().to_vec(),
1147            Imm::F64(n) => n.to_le_bytes().to_vec(),
1148            Imm::V128(n) => n.to_le_bytes().to_vec(),
1149        }
1150    }
1151}
1152
1153/// The location of the [VMcontext] used for function calls.
1154#[derive(Copy, Clone, Debug, Eq, PartialEq)]
1155pub(crate) enum VMContextLoc {
1156    /// Dynamic, stored in the given register.
1157    Reg(Reg),
1158    /// The pinned [VMContext] register.
1159    Pinned,
1160}
1161
1162/// The maximum number of context arguments currently used across the compiler.
1163pub(crate) const MAX_CONTEXT_ARGS: usize = 2;
1164
1165/// Out-of-band special purpose arguments used for function call emission.
1166///
1167/// We cannot rely on the value stack for these values given that inserting
1168/// register or memory values at arbitrary locations of the value stack has the
1169/// potential to break the stack ordering principle, which states that older
1170/// values must always precede newer values, effectively simulating the order of
1171/// values in the machine stack.
1172/// The [ContextArgs] are meant to be resolved at every callsite; in some cases
1173/// it might be possible to construct it early on, but given that it might
1174/// contain allocatable registers, it's preferred to construct it in
1175/// [FnCall::emit].
1176#[derive(Clone, Debug)]
1177pub(crate) enum ContextArgs {
1178    /// No context arguments required. This is used for libcalls that don't
1179    /// require any special context arguments. For example builtin functions
1180    /// that perform float calculations.
1181    None,
1182    /// A single context argument is required; the current pinned [VMcontext]
1183    /// register must be passed as the first argument of the function call.
1184    VMContext([VMContextLoc; 1]),
1185    /// The callee and caller context arguments are required. In this case, the
1186    /// callee context argument is usually stored into an allocatable register
1187    /// and the caller is always the current pinned [VMContext] pointer.
1188    CalleeAndCallerVMContext([VMContextLoc; MAX_CONTEXT_ARGS]),
1189}
1190
1191impl ContextArgs {
1192    /// Construct an empty [ContextArgs].
1193    pub fn none() -> Self {
1194        Self::None
1195    }
1196
1197    /// Construct a [ContextArgs] declaring the usage of the pinned [VMContext]
1198    /// register as both the caller and callee context arguments.
1199    pub fn pinned_callee_and_caller_vmctx() -> Self {
1200        Self::CalleeAndCallerVMContext([VMContextLoc::Pinned, VMContextLoc::Pinned])
1201    }
1202
1203    /// Construct a [ContextArgs] that declares the usage of the pinned
1204    /// [VMContext] register as the only context argument.
1205    pub fn pinned_vmctx() -> Self {
1206        Self::VMContext([VMContextLoc::Pinned])
1207    }
1208
1209    /// Construct a [ContextArgs] that declares a dynamic callee context and the
1210    /// pinned [VMContext] register as the context arguments.
1211    pub fn with_callee_and_pinned_caller(callee_vmctx: Reg) -> Self {
1212        Self::CalleeAndCallerVMContext([VMContextLoc::Reg(callee_vmctx), VMContextLoc::Pinned])
1213    }
1214
1215    /// Get the length of the [ContextArgs].
1216    pub fn len(&self) -> usize {
1217        self.as_slice().len()
1218    }
1219
1220    /// Get a slice of the context arguments.
1221    pub fn as_slice(&self) -> &[VMContextLoc] {
1222        match self {
1223            Self::None => &[],
1224            Self::VMContext(a) => a.as_slice(),
1225            Self::CalleeAndCallerVMContext(a) => a.as_slice(),
1226        }
1227    }
1228}
1229
1230#[derive(Copy, Clone, Debug)]
1231pub(crate) enum CalleeKind {
1232    /// A function call to a raw address.
1233    Indirect(Reg),
1234    /// A function call to a local function.
1235    Direct(UserExternalNameRef),
1236    /// Call to a well known LibCall.
1237    LibCall(LibCall),
1238}
1239
1240impl CalleeKind {
1241    /// Creates a callee kind from a register.
1242    pub fn indirect(reg: Reg) -> Self {
1243        Self::Indirect(reg)
1244    }
1245
1246    /// Creates a direct callee kind from a function name.
1247    pub fn direct(name: UserExternalNameRef) -> Self {
1248        Self::Direct(name)
1249    }
1250
1251    /// Creates a known callee kind from a libcall.
1252    pub fn libcall(call: LibCall) -> Self {
1253        Self::LibCall(call)
1254    }
1255}
1256
1257impl RegImm {
1258    /// Register constructor.
1259    pub fn reg(r: Reg) -> Self {
1260        RegImm::Reg(r)
1261    }
1262
1263    /// I64 immediate constructor.
1264    pub fn i64(val: i64) -> Self {
1265        RegImm::Imm(Imm::i64(val))
1266    }
1267
1268    /// I32 immediate constructor.
1269    pub fn i32(val: i32) -> Self {
1270        RegImm::Imm(Imm::i32(val))
1271    }
1272
1273    /// F32 immediate, stored using its bits representation.
1274    pub fn f32(bits: u32) -> Self {
1275        RegImm::Imm(Imm::f32(bits))
1276    }
1277
1278    /// F64 immediate, stored using its bits representation.
1279    pub fn f64(bits: u64) -> Self {
1280        RegImm::Imm(Imm::f64(bits))
1281    }
1282
1283    /// V128 immediate.
1284    pub fn v128(bits: i128) -> Self {
1285        RegImm::Imm(Imm::v128(bits))
1286    }
1287}
1288
1289impl From<Reg> for RegImm {
1290    fn from(r: Reg) -> Self {
1291        Self::Reg(r)
1292    }
1293}
1294
1295#[derive(Debug)]
1296pub enum RoundingMode {
1297    Nearest,
1298    Up,
1299    Down,
1300    Zero,
1301}
1302
1303/// Memory flags for trusted loads/stores.
1304pub const TRUSTED_FLAGS: MemFlags = MemFlags::trusted();
1305
1306/// Flags used for WebAssembly loads / stores.
1307/// Untrusted by default so we don't set `no_trap`.
1308/// We also ensure that the endianness is the right one for WebAssembly.
1309pub const UNTRUSTED_FLAGS: MemFlags = MemFlags::new().with_endianness(Endianness::Little);
1310
1311/// Generic MacroAssembler interface used by the code generation.
1312///
1313/// The MacroAssembler trait aims to expose an interface, high-level enough,
1314/// so that each ISA can provide its own lowering to machine code. For example,
1315/// for WebAssembly operators that don't have a direct mapping to a machine
1316/// a instruction, the interface defines a signature matching the WebAssembly
1317/// operator, allowing each implementation to lower such operator entirely.
1318/// This approach attributes more responsibility to the MacroAssembler, but frees
1319/// the caller from concerning about assembling the right sequence of
1320/// instructions at the operator callsite.
1321///
1322/// The interface defaults to a three-argument form for binary operations;
1323/// this allows a natural mapping to instructions for RISC architectures,
1324/// that use three-argument form.
1325/// This approach allows for a more general interface that can be restricted
1326/// where needed, in the case of architectures that use a two-argument form.
1327
1328pub(crate) trait MacroAssembler {
1329    /// The addressing mode.
1330    type Address: Copy + Debug;
1331
1332    /// The pointer representation of the target ISA,
1333    /// used to access information from [`VMOffsets`].
1334    type Ptr: PtrSize;
1335
1336    /// The ABI details of the target.
1337    type ABI: abi::ABI;
1338
1339    /// Emit the function prologue.
1340    fn prologue(&mut self, vmctx: Reg) -> Result<()> {
1341        self.frame_setup()?;
1342        self.check_stack(vmctx)
1343    }
1344
1345    /// Generate the frame setup sequence.
1346    fn frame_setup(&mut self) -> Result<()>;
1347
1348    /// Generate the frame restore sequence.
1349    fn frame_restore(&mut self) -> Result<()>;
1350
1351    /// Emit a stack check.
1352    fn check_stack(&mut self, vmctx: Reg) -> Result<()>;
1353
1354    /// Emit the function epilogue.
1355    fn epilogue(&mut self) -> Result<()> {
1356        self.frame_restore()
1357    }
1358
1359    /// Reserve stack space.
1360    fn reserve_stack(&mut self, bytes: u32) -> Result<()>;
1361
1362    /// Free stack space.
1363    fn free_stack(&mut self, bytes: u32) -> Result<()>;
1364
1365    /// Reset the stack pointer to the given offset;
1366    ///
1367    /// Used to reset the stack pointer to a given offset
1368    /// when dealing with unreachable code.
1369    fn reset_stack_pointer(&mut self, offset: SPOffset) -> Result<()>;
1370
1371    /// Get the address of a local slot.
1372    fn local_address(&mut self, local: &LocalSlot) -> Result<Self::Address>;
1373
1374    /// Constructs an address with an offset that is relative to the
1375    /// current position of the stack pointer (e.g. [sp + (sp_offset -
1376    /// offset)].
1377    fn address_from_sp(&self, offset: SPOffset) -> Result<Self::Address>;
1378
1379    /// Constructs an address with an offset that is absolute to the
1380    /// current position of the stack pointer (e.g. [sp + offset].
1381    fn address_at_sp(&self, offset: SPOffset) -> Result<Self::Address>;
1382
1383    /// Alias for [`Self::address_at_reg`] using the VMContext register as
1384    /// a base. The VMContext register is derived from the ABI type that is
1385    /// associated to the MacroAssembler.
1386    fn address_at_vmctx(&self, offset: u32) -> Result<Self::Address>;
1387
1388    /// Construct an address that is absolute to the current position
1389    /// of the given register.
1390    fn address_at_reg(&self, reg: Reg, offset: u32) -> Result<Self::Address>;
1391
1392    /// Emit a function call to either a local or external function.
1393    fn call(
1394        &mut self,
1395        stack_args_size: u32,
1396        f: impl FnMut(&mut Self) -> Result<(CalleeKind, CallingConvention)>,
1397    ) -> Result<u32>;
1398
1399    /// Get stack pointer offset.
1400    fn sp_offset(&self) -> Result<SPOffset>;
1401
1402    /// Perform a stack store.
1403    fn store(&mut self, src: RegImm, dst: Self::Address, size: OperandSize) -> Result<()>;
1404
1405    /// Alias for `MacroAssembler::store` with the operand size corresponding
1406    /// to the pointer size of the target.
1407    fn store_ptr(&mut self, src: Reg, dst: Self::Address) -> Result<()>;
1408
1409    /// Perform a WebAssembly store.
1410    /// A WebAssembly store introduces several additional invariants compared to
1411    /// [Self::store], more precisely, it can implicitly trap, in certain
1412    /// circumstances, even if explicit bounds checks are elided, in that sense,
1413    /// we consider this type of load as untrusted. It can also differ with
1414    /// regards to the endianness depending on the target ISA. For this reason,
1415    /// [Self::wasm_store], should be explicitly used when emitting WebAssembly
1416    /// stores.
1417    fn wasm_store(&mut self, src: Reg, dst: Self::Address, store_kind: StoreKind) -> Result<()>;
1418
1419    /// Perform a zero-extended stack load.
1420    fn load(&mut self, src: Self::Address, dst: WritableReg, size: OperandSize) -> Result<()>;
1421
1422    /// Perform a WebAssembly load.
1423    /// A WebAssembly load introduces several additional invariants compared to
1424    /// [Self::load], more precisely, it can implicitly trap, in certain
1425    /// circumstances, even if explicit bounds checks are elided, in that sense,
1426    /// we consider this type of load as untrusted. It can also differ with
1427    /// regards to the endianness depending on the target ISA. For this reason,
1428    /// [Self::wasm_load], should be explicitly used when emitting WebAssembly
1429    /// loads.
1430    fn wasm_load(&mut self, src: Self::Address, dst: WritableReg, kind: LoadKind) -> Result<()>;
1431
1432    /// Alias for `MacroAssembler::load` with the operand size corresponding
1433    /// to the pointer size of the target.
1434    fn load_ptr(&mut self, src: Self::Address, dst: WritableReg) -> Result<()>;
1435
1436    /// Computes the effective address and stores the result in the destination
1437    /// register.
1438    fn compute_addr(
1439        &mut self,
1440        _src: Self::Address,
1441        _dst: WritableReg,
1442        _size: OperandSize,
1443    ) -> Result<()>;
1444
1445    /// Pop a value from the machine stack into the given register.
1446    fn pop(&mut self, dst: WritableReg, size: OperandSize) -> Result<()>;
1447
1448    /// Perform a move.
1449    fn mov(&mut self, dst: WritableReg, src: RegImm, size: OperandSize) -> Result<()>;
1450
1451    /// Perform a conditional move.
1452    fn cmov(&mut self, dst: WritableReg, src: Reg, cc: IntCmpKind, size: OperandSize)
1453        -> Result<()>;
1454
1455    /// Performs a memory move of bytes from src to dest.
1456    /// Bytes are moved in blocks of 8 bytes, where possible.
1457    fn memmove(
1458        &mut self,
1459        src: SPOffset,
1460        dst: SPOffset,
1461        bytes: u32,
1462        direction: MemMoveDirection,
1463    ) -> Result<()> {
1464        match direction {
1465            MemMoveDirection::LowToHigh => debug_assert!(dst.as_u32() < src.as_u32()),
1466            MemMoveDirection::HighToLow => debug_assert!(dst.as_u32() > src.as_u32()),
1467        }
1468        // At least 4 byte aligned.
1469        debug_assert!(bytes % 4 == 0);
1470        let mut remaining = bytes;
1471        let word_bytes = <Self::ABI as abi::ABI>::word_bytes();
1472        let scratch = scratch!(Self);
1473
1474        let word_bytes = word_bytes as u32;
1475
1476        let mut dst_offs;
1477        let mut src_offs;
1478        match direction {
1479            MemMoveDirection::LowToHigh => {
1480                dst_offs = dst.as_u32() - bytes;
1481                src_offs = src.as_u32() - bytes;
1482                while remaining >= word_bytes {
1483                    remaining -= word_bytes;
1484                    dst_offs += word_bytes;
1485                    src_offs += word_bytes;
1486
1487                    self.load_ptr(
1488                        self.address_from_sp(SPOffset::from_u32(src_offs))?,
1489                        writable!(scratch),
1490                    )?;
1491                    self.store_ptr(
1492                        scratch.into(),
1493                        self.address_from_sp(SPOffset::from_u32(dst_offs))?,
1494                    )?;
1495                }
1496            }
1497            MemMoveDirection::HighToLow => {
1498                // Go from the end to the beginning to handle overlapping addresses.
1499                src_offs = src.as_u32();
1500                dst_offs = dst.as_u32();
1501                while remaining >= word_bytes {
1502                    self.load_ptr(
1503                        self.address_from_sp(SPOffset::from_u32(src_offs))?,
1504                        writable!(scratch),
1505                    )?;
1506                    self.store_ptr(
1507                        scratch.into(),
1508                        self.address_from_sp(SPOffset::from_u32(dst_offs))?,
1509                    )?;
1510
1511                    remaining -= word_bytes;
1512                    src_offs -= word_bytes;
1513                    dst_offs -= word_bytes;
1514                }
1515            }
1516        }
1517
1518        if remaining > 0 {
1519            let half_word = word_bytes / 2;
1520            let ptr_size = OperandSize::from_bytes(half_word as u8);
1521            debug_assert!(remaining == half_word);
1522            // Need to move the offsets ahead in the `LowToHigh` case to
1523            // compensate for the initial subtraction of `bytes`.
1524            if direction == MemMoveDirection::LowToHigh {
1525                dst_offs += half_word;
1526                src_offs += half_word;
1527            }
1528
1529            self.load(
1530                self.address_from_sp(SPOffset::from_u32(src_offs))?,
1531                writable!(scratch),
1532                ptr_size,
1533            )?;
1534            self.store(
1535                scratch.into(),
1536                self.address_from_sp(SPOffset::from_u32(dst_offs))?,
1537                ptr_size,
1538            )?;
1539        }
1540        Ok(())
1541    }
1542
1543    /// Perform add operation.
1544    fn add(&mut self, dst: WritableReg, lhs: Reg, rhs: RegImm, size: OperandSize) -> Result<()>;
1545
1546    /// Perform a checked unsigned integer addition, emitting the provided trap
1547    /// if the addition overflows.
1548    fn checked_uadd(
1549        &mut self,
1550        dst: WritableReg,
1551        lhs: Reg,
1552        rhs: RegImm,
1553        size: OperandSize,
1554        trap: TrapCode,
1555    ) -> Result<()>;
1556
1557    /// Perform subtraction operation.
1558    fn sub(&mut self, dst: WritableReg, lhs: Reg, rhs: RegImm, size: OperandSize) -> Result<()>;
1559
1560    /// Perform multiplication operation.
1561    fn mul(&mut self, dst: WritableReg, lhs: Reg, rhs: RegImm, size: OperandSize) -> Result<()>;
1562
1563    /// Perform a floating point add operation.
1564    fn float_add(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) -> Result<()>;
1565
1566    /// Perform a floating point subtraction operation.
1567    fn float_sub(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) -> Result<()>;
1568
1569    /// Perform a floating point multiply operation.
1570    fn float_mul(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) -> Result<()>;
1571
1572    /// Perform a floating point divide operation.
1573    fn float_div(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) -> Result<()>;
1574
1575    /// Perform a floating point minimum operation. In x86, this will emit
1576    /// multiple instructions.
1577    fn float_min(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) -> Result<()>;
1578
1579    /// Perform a floating point maximum operation. In x86, this will emit
1580    /// multiple instructions.
1581    fn float_max(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) -> Result<()>;
1582
1583    /// Perform a floating point copysign operation. In x86, this will emit
1584    /// multiple instructions.
1585    fn float_copysign(
1586        &mut self,
1587        dst: WritableReg,
1588        lhs: Reg,
1589        rhs: Reg,
1590        size: OperandSize,
1591    ) -> Result<()>;
1592
1593    /// Perform a floating point abs operation.
1594    fn float_abs(&mut self, dst: WritableReg, size: OperandSize) -> Result<()>;
1595
1596    /// Perform a floating point negation operation.
1597    fn float_neg(&mut self, dst: WritableReg, size: OperandSize) -> Result<()>;
1598
1599    /// Perform a floating point floor operation.
1600    fn float_round<
1601        F: FnMut(&mut FuncEnv<Self::Ptr>, &mut CodeGenContext<Emission>, &mut Self) -> Result<()>,
1602    >(
1603        &mut self,
1604        mode: RoundingMode,
1605        env: &mut FuncEnv<Self::Ptr>,
1606        context: &mut CodeGenContext<Emission>,
1607        size: OperandSize,
1608        fallback: F,
1609    ) -> Result<()>;
1610
1611    /// Perform a floating point square root operation.
1612    fn float_sqrt(&mut self, dst: WritableReg, src: Reg, size: OperandSize) -> Result<()>;
1613
1614    /// Perform logical and operation.
1615    fn and(&mut self, dst: WritableReg, lhs: Reg, rhs: RegImm, size: OperandSize) -> Result<()>;
1616
1617    /// Perform logical or operation.
1618    fn or(&mut self, dst: WritableReg, lhs: Reg, rhs: RegImm, size: OperandSize) -> Result<()>;
1619
1620    /// Perform logical exclusive or operation.
1621    fn xor(&mut self, dst: WritableReg, lhs: Reg, rhs: RegImm, size: OperandSize) -> Result<()>;
1622
1623    /// Perform a shift operation between a register and an immediate.
1624    fn shift_ir(
1625        &mut self,
1626        dst: WritableReg,
1627        imm: u64,
1628        lhs: Reg,
1629        kind: ShiftKind,
1630        size: OperandSize,
1631    ) -> Result<()>;
1632
1633    /// Perform a shift operation between two registers.
1634    /// This case is special in that some architectures have specific expectations
1635    /// regarding the location of the instruction arguments. To free the
1636    /// caller from having to deal with the architecture specific constraints
1637    /// we give this function access to the code generation context, allowing
1638    /// each implementation to decide the lowering path.
1639    fn shift(
1640        &mut self,
1641        context: &mut CodeGenContext<Emission>,
1642        kind: ShiftKind,
1643        size: OperandSize,
1644    ) -> Result<()>;
1645
1646    /// Perform division operation.
1647    /// Division is special in that some architectures have specific
1648    /// expectations regarding the location of the instruction
1649    /// arguments and regarding the location of the quotient /
1650    /// remainder. To free the caller from having to deal with the
1651    /// architecture specific constraints we give this function access
1652    /// to the code generation context, allowing each implementation
1653    /// to decide the lowering path.  For cases in which division is a
1654    /// unconstrained binary operation, the caller can decide to use
1655    /// the `CodeGenContext::i32_binop` or `CodeGenContext::i64_binop`
1656    /// functions.
1657    fn div(
1658        &mut self,
1659        context: &mut CodeGenContext<Emission>,
1660        kind: DivKind,
1661        size: OperandSize,
1662    ) -> Result<()>;
1663
1664    /// Calculate remainder.
1665    fn rem(
1666        &mut self,
1667        context: &mut CodeGenContext<Emission>,
1668        kind: RemKind,
1669        size: OperandSize,
1670    ) -> Result<()>;
1671
1672    /// Compares `src1` against `src2` for the side effect of setting processor
1673    /// flags.
1674    ///
1675    /// Note that `src1` is the left-hand-side of the comparison and `src2` is
1676    /// the right-hand-side, so if testing `a < b` then `src1 == a` and
1677    /// `src2 == b`
1678    fn cmp(&mut self, src1: Reg, src2: RegImm, size: OperandSize) -> Result<()>;
1679
1680    /// Compare src and dst and put the result in dst.
1681    /// This function will potentially emit a series of instructions.
1682    ///
1683    /// The initial value in `dst` is the left-hand-side of the comparison and
1684    /// the initial value in `src` is the right-hand-side of the comparison.
1685    /// That means for `a < b` then `dst == a` and `src == b`.
1686    fn cmp_with_set(
1687        &mut self,
1688        dst: WritableReg,
1689        src: RegImm,
1690        kind: IntCmpKind,
1691        size: OperandSize,
1692    ) -> Result<()>;
1693
1694    /// Compare floats in src1 and src2 and put the result in dst.
1695    /// In x86, this will emit multiple instructions.
1696    fn float_cmp_with_set(
1697        &mut self,
1698        dst: WritableReg,
1699        src1: Reg,
1700        src2: Reg,
1701        kind: FloatCmpKind,
1702        size: OperandSize,
1703    ) -> Result<()>;
1704
1705    /// Count the number of leading zeroes in src and put the result in dst.
1706    /// In x64, this will emit multiple instructions if the `has_lzcnt` flag is
1707    /// false.
1708    fn clz(&mut self, dst: WritableReg, src: Reg, size: OperandSize) -> Result<()>;
1709
1710    /// Count the number of trailing zeroes in src and put the result in dst.masm
1711    /// In x64, this will emit multiple instructions if the `has_tzcnt` flag is
1712    /// false.
1713    fn ctz(&mut self, dst: WritableReg, src: Reg, size: OperandSize) -> Result<()>;
1714
1715    /// Push the register to the stack, returning the stack slot metadata.
1716    // NB
1717    // The stack alignment should not be assumed after any call to `push`,
1718    // unless explicitly aligned otherwise.  Typically, stack alignment is
1719    // maintained at call sites and during the execution of
1720    // epilogues.
1721    fn push(&mut self, src: Reg, size: OperandSize) -> Result<StackSlot>;
1722
1723    /// Finalize the assembly and return the result.
1724    fn finalize(self, base: Option<SourceLoc>) -> Result<MachBufferFinalized<Final>>;
1725
1726    /// Zero a particular register.
1727    fn zero(&mut self, reg: WritableReg) -> Result<()>;
1728
1729    /// Count the number of 1 bits in src and put the result in dst. In x64,
1730    /// this will emit multiple instructions if the `has_popcnt` flag is false.
1731    fn popcnt(&mut self, context: &mut CodeGenContext<Emission>, size: OperandSize) -> Result<()>;
1732
1733    /// Converts an i64 to an i32 by discarding the high 32 bits.
1734    fn wrap(&mut self, dst: WritableReg, src: Reg) -> Result<()>;
1735
1736    /// Extends an integer of a given size to a larger size.
1737    fn extend(&mut self, dst: WritableReg, src: Reg, kind: ExtendKind) -> Result<()>;
1738
1739    /// Emits one or more instructions to perform a signed truncation of a
1740    /// float into an integer.
1741    fn signed_truncate(
1742        &mut self,
1743        dst: WritableReg,
1744        src: Reg,
1745        src_size: OperandSize,
1746        dst_size: OperandSize,
1747        kind: TruncKind,
1748    ) -> Result<()>;
1749
1750    /// Emits one or more instructions to perform an unsigned truncation of a
1751    /// float into an integer.
1752    fn unsigned_truncate(
1753        &mut self,
1754        context: &mut CodeGenContext<Emission>,
1755        src_size: OperandSize,
1756        dst_size: OperandSize,
1757        kind: TruncKind,
1758    ) -> Result<()>;
1759
1760    /// Emits one or more instructions to perform a signed convert of an
1761    /// integer into a float.
1762    fn signed_convert(
1763        &mut self,
1764        dst: WritableReg,
1765        src: Reg,
1766        src_size: OperandSize,
1767        dst_size: OperandSize,
1768    ) -> Result<()>;
1769
1770    /// Emits one or more instructions to perform an unsigned convert of an
1771    /// integer into a float.
1772    fn unsigned_convert(
1773        &mut self,
1774        dst: WritableReg,
1775        src: Reg,
1776        tmp_gpr: Reg,
1777        src_size: OperandSize,
1778        dst_size: OperandSize,
1779    ) -> Result<()>;
1780
1781    /// Reinterpret a float as an integer.
1782    fn reinterpret_float_as_int(
1783        &mut self,
1784        dst: WritableReg,
1785        src: Reg,
1786        size: OperandSize,
1787    ) -> Result<()>;
1788
1789    /// Reinterpret an integer as a float.
1790    fn reinterpret_int_as_float(
1791        &mut self,
1792        dst: WritableReg,
1793        src: Reg,
1794        size: OperandSize,
1795    ) -> Result<()>;
1796
1797    /// Demote an f64 to an f32.
1798    fn demote(&mut self, dst: WritableReg, src: Reg) -> Result<()>;
1799
1800    /// Promote an f32 to an f64.
1801    fn promote(&mut self, dst: WritableReg, src: Reg) -> Result<()>;
1802
1803    /// Zero a given memory range.
1804    ///
1805    /// The default implementation divides the given memory range
1806    /// into word-sized slots. Then it unrolls a series of store
1807    /// instructions, effectively assigning zero to each slot.
1808    fn zero_mem_range(&mut self, mem: &Range<u32>) -> Result<()> {
1809        let word_size = <Self::ABI as abi::ABI>::word_bytes() as u32;
1810        if mem.is_empty() {
1811            return Ok(());
1812        }
1813
1814        let start = if mem.start % word_size == 0 {
1815            mem.start
1816        } else {
1817            // Ensure that the start of the range is at least 4-byte aligned.
1818            assert!(mem.start % 4 == 0);
1819            let start = align_to(mem.start, word_size);
1820            let addr: Self::Address = self.local_address(&LocalSlot::i32(start))?;
1821            self.store(RegImm::i32(0), addr, OperandSize::S32)?;
1822            // Ensure that the new start of the range, is word-size aligned.
1823            assert!(start % word_size == 0);
1824            start
1825        };
1826
1827        let end = align_to(mem.end, word_size);
1828        let slots = (end - start) / word_size;
1829
1830        if slots == 1 {
1831            let slot = LocalSlot::i64(start + word_size);
1832            let addr: Self::Address = self.local_address(&slot)?;
1833            self.store(RegImm::i64(0), addr, OperandSize::S64)?;
1834        } else {
1835            // TODO
1836            // Add an upper bound to this generation;
1837            // given a considerably large amount of slots
1838            // this will be inefficient.
1839            let zero = scratch!(Self);
1840            self.zero(writable!(zero))?;
1841            let zero = RegImm::reg(zero);
1842
1843            for step in (start..end).into_iter().step_by(word_size as usize) {
1844                let slot = LocalSlot::i64(step + word_size);
1845                let addr: Self::Address = self.local_address(&slot)?;
1846                self.store(zero, addr, OperandSize::S64)?;
1847            }
1848        }
1849
1850        Ok(())
1851    }
1852
1853    /// Generate a label.
1854    fn get_label(&mut self) -> Result<MachLabel>;
1855
1856    /// Bind the given label at the current code offset.
1857    fn bind(&mut self, label: MachLabel) -> Result<()>;
1858
1859    /// Conditional branch.
1860    ///
1861    /// Performs a comparison between the two operands,
1862    /// and immediately after emits a jump to the given
1863    /// label destination if the condition is met.
1864    fn branch(
1865        &mut self,
1866        kind: IntCmpKind,
1867        lhs: Reg,
1868        rhs: RegImm,
1869        taken: MachLabel,
1870        size: OperandSize,
1871    ) -> Result<()>;
1872
1873    /// Emits and unconditional jump to the given label.
1874    fn jmp(&mut self, target: MachLabel) -> Result<()>;
1875
1876    /// Emits a jump table sequence. The default label is specified as
1877    /// the last element of the targets slice.
1878    fn jmp_table(&mut self, targets: &[MachLabel], index: Reg, tmp: Reg) -> Result<()>;
1879
1880    /// Emit an unreachable code trap.
1881    fn unreachable(&mut self) -> Result<()>;
1882
1883    /// Emit an unconditional trap.
1884    fn trap(&mut self, code: TrapCode) -> Result<()>;
1885
1886    /// Traps if the condition code is met.
1887    fn trapif(&mut self, cc: IntCmpKind, code: TrapCode) -> Result<()>;
1888
1889    /// Trap if the source register is zero.
1890    fn trapz(&mut self, src: Reg, code: TrapCode) -> Result<()>;
1891
1892    /// Ensures that the stack pointer is correctly positioned before an unconditional
1893    /// jump according to the requirements of the destination target.
1894    fn ensure_sp_for_jump(&mut self, target: SPOffset) -> Result<()> {
1895        let bytes = self
1896            .sp_offset()?
1897            .as_u32()
1898            .checked_sub(target.as_u32())
1899            .unwrap_or(0);
1900
1901        if bytes > 0 {
1902            self.free_stack(bytes)?;
1903        }
1904
1905        Ok(())
1906    }
1907
1908    /// Mark the start of a source location returning the machine code offset
1909    /// and the relative source code location.
1910    fn start_source_loc(&mut self, loc: RelSourceLoc) -> Result<(CodeOffset, RelSourceLoc)>;
1911
1912    /// Mark the end of a source location.
1913    fn end_source_loc(&mut self) -> Result<()>;
1914
1915    /// The current offset, in bytes from the beginning of the function.
1916    fn current_code_offset(&self) -> Result<CodeOffset>;
1917
1918    /// Performs a 128-bit addition
1919    fn add128(
1920        &mut self,
1921        dst_lo: WritableReg,
1922        dst_hi: WritableReg,
1923        lhs_lo: Reg,
1924        lhs_hi: Reg,
1925        rhs_lo: Reg,
1926        rhs_hi: Reg,
1927    ) -> Result<()>;
1928
1929    /// Performs a 128-bit subtraction
1930    fn sub128(
1931        &mut self,
1932        dst_lo: WritableReg,
1933        dst_hi: WritableReg,
1934        lhs_lo: Reg,
1935        lhs_hi: Reg,
1936        rhs_lo: Reg,
1937        rhs_hi: Reg,
1938    ) -> Result<()>;
1939
1940    /// Performs a widening multiplication from two 64-bit operands into a
1941    /// 128-bit result.
1942    ///
1943    /// Note that some platforms require special handling of registers in this
1944    /// instruction (e.g. x64) so full access to `CodeGenContext` is provided.
1945    fn mul_wide(&mut self, context: &mut CodeGenContext<Emission>, kind: MulWideKind)
1946        -> Result<()>;
1947
1948    /// Takes the value in a src operand and replicates it across lanes of
1949    /// `size` in a destination result.
1950    fn splat(&mut self, context: &mut CodeGenContext<Emission>, size: SplatKind) -> Result<()>;
1951
1952    /// Performs a shuffle between two 128-bit vectors into a 128-bit result
1953    /// using lanes as a mask to select which indexes to copy.
1954    fn shuffle(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, lanes: [u8; 16]) -> Result<()>;
1955
1956    /// Performs a swizzle between two 128-bit vectors into a 128-bit result.
1957    fn swizzle(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg) -> Result<()>;
1958
1959    /// Performs the RMW `op` operation on the passed `addr`.
1960    ///
1961    /// The value *before* the operation was performed is written back to the `operand` register.
1962    fn atomic_rmw(
1963        &mut self,
1964        context: &mut CodeGenContext<Emission>,
1965        addr: Self::Address,
1966        size: OperandSize,
1967        op: RmwOp,
1968        flags: MemFlags,
1969        extend: Option<Extend<Zero>>,
1970    ) -> Result<()>;
1971
1972    /// Extracts the scalar value from `src` in `lane` to `dst`.
1973    fn extract_lane(
1974        &mut self,
1975        src: Reg,
1976        dst: WritableReg,
1977        lane: u8,
1978        kind: ExtractLaneKind,
1979    ) -> Result<()>;
1980
1981    /// Replaces the value in `lane` in `dst` with the value in `src`.
1982    fn replace_lane(
1983        &mut self,
1984        src: RegImm,
1985        dst: WritableReg,
1986        lane: u8,
1987        kind: ReplaceLaneKind,
1988    ) -> Result<()>;
1989
1990    /// Perform an atomic CAS (compare-and-swap) operation with the value at `addr`, and `expected`
1991    /// and `replacement` (at the top of the context's stack).
1992    ///
1993    /// This method takes the `CodeGenContext` as an arguments to accommodate architectures that
1994    /// expect parameters in specific registers. The context stack contains the `replacement`,
1995    /// and `expected` values in that order. The implementer is expected to push the value at
1996    /// `addr` before the update to the context's stack before returning.
1997    fn atomic_cas(
1998        &mut self,
1999        context: &mut CodeGenContext<Emission>,
2000        addr: Self::Address,
2001        size: OperandSize,
2002        flags: MemFlags,
2003        extend: Option<Extend<Zero>>,
2004    ) -> Result<()>;
2005
2006    /// Compares vector registers `lhs` and `rhs` for equality and puts the
2007    /// vector of results in `dst`.
2008    fn v128_eq(
2009        &mut self,
2010        dst: WritableReg,
2011        lhs: Reg,
2012        rhs: Reg,
2013        kind: VectorEqualityKind,
2014    ) -> Result<()>;
2015
2016    /// Compares vector registers `lhs` and `rhs` for inequality and puts the
2017    /// vector of results in `dst`.
2018    fn v128_ne(
2019        &mut self,
2020        dst: WritableReg,
2021        lhs: Reg,
2022        rhs: Reg,
2023        kind: VectorEqualityKind,
2024    ) -> Result<()>;
2025
2026    /// Performs a less than comparison with vector registers `lhs` and `rhs`
2027    /// and puts the vector of results in `dst`.
2028    fn v128_lt(
2029        &mut self,
2030        dst: WritableReg,
2031        lhs: Reg,
2032        rhs: Reg,
2033        kind: VectorCompareKind,
2034    ) -> Result<()>;
2035
2036    /// Performs a less than or equal comparison with vector registers `lhs`
2037    /// and `rhs` and puts the vector of results in `dst`.
2038    fn v128_le(
2039        &mut self,
2040        dst: WritableReg,
2041        lhs: Reg,
2042        rhs: Reg,
2043        kind: VectorCompareKind,
2044    ) -> Result<()>;
2045
2046    /// Performs a greater than comparison with vector registers `lhs` and
2047    /// `rhs` and puts the vector of results in `dst`.
2048    fn v128_gt(
2049        &mut self,
2050        dst: WritableReg,
2051        lhs: Reg,
2052        rhs: Reg,
2053        kind: VectorCompareKind,
2054    ) -> Result<()>;
2055
2056    /// Performs a greater than or equal comparison with vector registers `lhs`
2057    /// and `rhs` and puts the vector of results in `dst`.
2058    fn v128_ge(
2059        &mut self,
2060        dst: WritableReg,
2061        lhs: Reg,
2062        rhs: Reg,
2063        kind: VectorCompareKind,
2064    ) -> Result<()>;
2065
2066    /// Emit a memory fence.
2067    fn fence(&mut self) -> Result<()>;
2068
2069    /// Perform a logical `not` operation on the 128bits vector value in `dst`.
2070    fn v128_not(&mut self, dst: WritableReg) -> Result<()>;
2071
2072    /// Perform a logical `and` operation on `src1` and `src1`, both 128bits vector values, writing
2073    /// the result to `dst`.
2074    fn v128_and(&mut self, src1: Reg, src2: Reg, dst: WritableReg) -> Result<()>;
2075
2076    /// Perform a logical `and_not` operation on `src1` and `src1`, both 128bits vector values, writing
2077    /// the result to `dst`.
2078    ///
2079    /// `and_not` is not commutative: dst = !src1 & src2.
2080    fn v128_and_not(&mut self, src1: Reg, src2: Reg, dst: WritableReg) -> Result<()>;
2081
2082    /// Perform a logical `or` operation on `src1` and `src1`, both 128bits vector values, writing
2083    /// the result to `dst`.
2084    fn v128_or(&mut self, src1: Reg, src2: Reg, dst: WritableReg) -> Result<()>;
2085
2086    /// Perform a logical `xor` operation on `src1` and `src1`, both 128bits vector values, writing
2087    /// the result to `dst`.
2088    fn v128_xor(&mut self, src1: Reg, src2: Reg, dst: WritableReg) -> Result<()>;
2089
2090    /// Given two 128bits vectors `src1` and `src2`, and a 128bits bitmask `mask`, selects bits
2091    /// from `src1` when mask is 1, and from `src2` when mask is 0.
2092    ///
2093    /// This is equivalent to: `v128.or(v128.and(src1, mask), v128.and(src2, v128.not(mask)))`.
2094    fn v128_bitselect(&mut self, src1: Reg, src2: Reg, mask: Reg, dst: WritableReg) -> Result<()>;
2095
2096    /// If any bit in `src` is 1, set `dst` to 1, or 0 otherwise.
2097    fn v128_any_true(&mut self, src: Reg, dst: WritableReg) -> Result<()>;
2098
2099    /// Convert vector of integers to vector of floating points.
2100    fn v128_convert(&mut self, src: Reg, dst: WritableReg, kind: V128ConvertKind) -> Result<()>;
2101
2102    /// Convert two input vectors into a smaller lane vector by narrowing each
2103    /// lane.
2104    fn v128_narrow(
2105        &mut self,
2106        src1: Reg,
2107        src2: Reg,
2108        dst: WritableReg,
2109        kind: V128NarrowKind,
2110    ) -> Result<()>;
2111
2112    /// Converts a vector containing two 64-bit floating point lanes to two
2113    /// 32-bit floating point lanes and setting the two higher lanes to 0.
2114    fn v128_demote(&mut self, src: Reg, dst: WritableReg) -> Result<()>;
2115
2116    /// Converts a vector containing four 32-bit floating point lanes to two
2117    /// 64-bit floating point lanes. Only the two lower lanes are converted.
2118    fn v128_promote(&mut self, src: Reg, dst: WritableReg) -> Result<()>;
2119
2120    /// Converts low or high half of the smaller lane vector to a larger lane
2121    /// vector.
2122    fn v128_extend(&mut self, src: Reg, dst: WritableReg, kind: V128ExtendKind) -> Result<()>;
2123
2124    /// Perform a vector add between `lsh` and `rhs`, placing the result in
2125    /// `dst`.
2126    fn v128_add(&mut self, lhs: Reg, rhs: Reg, dst: WritableReg, kind: V128AddKind) -> Result<()>;
2127
2128    /// Perform a vector sub between `lhs` and `rhs`, placing the result in `dst`.
2129    fn v128_sub(&mut self, lhs: Reg, rhs: Reg, dst: WritableReg, kind: V128SubKind) -> Result<()>;
2130
2131    /// Perform a vector lane-wise mul between `lhs` and `rhs`, placing the result in `dst`.
2132    fn v128_mul(&mut self, context: &mut CodeGenContext<Emission>, kind: V128MulKind)
2133        -> Result<()>;
2134
2135    /// Perform an absolute operation on a vector.
2136    fn v128_abs(&mut self, src: Reg, dst: WritableReg, kind: V128AbsKind) -> Result<()>;
2137
2138    /// Vectorized negate of the content of `op`.
2139    fn v128_neg(&mut self, op: WritableReg, kind: V128NegKind) -> Result<()>;
2140
2141    /// Perform the shift operation specified by `kind`, by the shift amount specified by the 32-bit
2142    /// integer at the top of the stack, on the 128-bit vector specified by the second value
2143    /// from the top of the stack, interpreted as packed integers of size `lane_width`.
2144    ///
2145    /// The shift amount is taken modulo `lane_width`.
2146    fn v128_shift(
2147        &mut self,
2148        context: &mut CodeGenContext<Emission>,
2149        lane_width: OperandSize,
2150        kind: ShiftKind,
2151    ) -> Result<()>;
2152
2153    /// Perform a saturating integer q-format rounding multiplication.
2154    fn v128_q15mulr_sat_s(
2155        &mut self,
2156        lhs: Reg,
2157        rhs: Reg,
2158        dst: WritableReg,
2159        size: OperandSize,
2160    ) -> Result<()>;
2161
2162    /// Sets `dst` to 1 if all lanes in `src` are non-zero, sets `dst` to 0
2163    /// otherwise.
2164    fn v128_all_true(&mut self, src: Reg, dst: WritableReg, size: OperandSize) -> Result<()>;
2165
2166    /// Extracts the high bit of each lane in `src` and produces a scalar mask
2167    /// with all bits concatenated in `dst`.
2168    fn v128_bitmask(&mut self, src: Reg, dst: WritableReg, size: OperandSize) -> Result<()>;
2169
2170    /// Lanewise truncation operation.
2171    ///
2172    /// If using an integer kind of truncation, then this performs a lane-wise
2173    /// saturating conversion from float to integer using the IEEE
2174    /// `convertToIntegerTowardZero` function. If any input lane is NaN, the
2175    /// resulting lane is 0. If the rounded integer value of a lane is outside
2176    /// the range of the destination type, the result is saturated to the
2177    /// nearest representable integer value.
2178    fn v128_trunc(
2179        &mut self,
2180        context: &mut CodeGenContext<Emission>,
2181        kind: V128TruncKind,
2182    ) -> Result<()>;
2183
2184    /// Perform a lane-wise `min` operation between `src1` and `src2`.
2185    fn v128_min(&mut self, src1: Reg, src2: Reg, dst: WritableReg, kind: V128MinKind)
2186        -> Result<()>;
2187
2188    /// Perform a lane-wise `max` operation between `src1` and `src2`.
2189    fn v128_max(&mut self, src1: Reg, src2: Reg, dst: WritableReg, kind: V128MaxKind)
2190        -> Result<()>;
2191
2192    /// Perform the lane-wise integer extended multiplication producing twice wider result than the
2193    /// inputs. This is equivalent to an extend followed by a multiply.
2194    ///
2195    /// The extension to be performed is inferred from the `lane_width` and the `kind` of extmul,
2196    /// e.g, if `lane_width` is `S16`, and `kind` is `LowSigned`, then we sign-extend the lower
2197    /// 8bits of the 16bits lanes.
2198    fn v128_extmul(
2199        &mut self,
2200        context: &mut CodeGenContext<Emission>,
2201        kind: V128ExtMulKind,
2202    ) -> Result<()>;
2203
2204    /// Perform the lane-wise integer extended pairwise addition producing extended results (twice
2205    /// wider results than the inputs).
2206    fn v128_extadd_pairwise(
2207        &mut self,
2208        src: Reg,
2209        dst: WritableReg,
2210        kind: V128ExtAddKind,
2211    ) -> Result<()>;
2212
2213    /// Lane-wise multiply signed 16-bit integers in `lhs` and `rhs` and add
2214    /// adjacent pairs of the 32-bit results.
2215    fn v128_dot(&mut self, lhs: Reg, rhs: Reg, dst: WritableReg) -> Result<()>;
2216
2217    /// Count the number of bits set in each lane.
2218    fn v128_popcnt(&mut self, context: &mut CodeGenContext<Emission>) -> Result<()>;
2219
2220    /// Lane-wise rounding average of vectors of integers in `lhs` and `rhs`
2221    /// and put the results in `dst`.
2222    fn v128_avgr(&mut self, lhs: Reg, rhs: Reg, dst: WritableReg, size: OperandSize) -> Result<()>;
2223
2224    /// Lane-wise IEEE division on vectors of floats.
2225    fn v128_div(&mut self, lhs: Reg, rhs: Reg, dst: WritableReg, size: OperandSize) -> Result<()>;
2226
2227    /// Lane-wise IEEE square root of vector of floats.
2228    fn v128_sqrt(&mut self, src: Reg, dst: WritableReg, size: OperandSize) -> Result<()>;
2229
2230    /// Lane-wise ceiling of vector of floats.
2231    fn v128_ceil(&mut self, src: Reg, dst: WritableReg, size: OperandSize) -> Result<()>;
2232
2233    /// Lane-wise flooring of vector of floats.
2234    fn v128_floor(&mut self, src: Reg, dst: WritableReg, size: OperandSize) -> Result<()>;
2235
2236    /// Lane-wise rounding to nearest integer for vector of floats.
2237    fn v128_nearest(&mut self, src: Reg, dst: WritableReg, size: OperandSize) -> Result<()>;
2238
2239    /// Lane-wise minimum value defined as `rhs < lhs ? rhs : lhs`.
2240    fn v128_pmin(&mut self, lhs: Reg, rhs: Reg, dst: WritableReg, size: OperandSize) -> Result<()>;
2241
2242    /// Lane-wise maximum value defined as `lhs < rhs ? rhs : lhs`.
2243    fn v128_pmax(&mut self, lhs: Reg, rhs: Reg, dst: WritableReg, size: OperandSize) -> Result<()>;
2244}