winch_codegen/masm.rs
1use crate::abi::{self, align_to, scratch, LocalSlot};
2use crate::codegen::{CodeGenContext, Emission, FuncEnv};
3use crate::isa::{
4 reg::{writable, Reg, WritableReg},
5 CallingConvention,
6};
7use anyhow::Result;
8use cranelift_codegen::{
9 binemit::CodeOffset,
10 ir::{Endianness, LibCall, MemFlags, RelSourceLoc, SourceLoc, UserExternalNameRef},
11 Final, MachBufferFinalized, MachLabel,
12};
13use std::{fmt::Debug, ops::Range};
14use wasmtime_environ::PtrSize;
15
16pub(crate) use cranelift_codegen::ir::TrapCode;
17
18#[derive(Eq, PartialEq)]
19pub(crate) enum DivKind {
20 /// Signed division.
21 Signed,
22 /// Unsigned division.
23 Unsigned,
24}
25
26/// Remainder kind.
27#[derive(Copy, Clone)]
28pub(crate) enum RemKind {
29 /// Signed remainder.
30 Signed,
31 /// Unsigned remainder.
32 Unsigned,
33}
34
35impl RemKind {
36 pub fn is_signed(&self) -> bool {
37 matches!(self, Self::Signed)
38 }
39}
40
41/// Kinds of vector min operation supported by WebAssembly.
42pub(crate) enum V128MinKind {
43 /// 4 lanes of 32-bit floats.
44 F32x4,
45 /// 2 lanes of 64-bit floats.
46 F64x2,
47 /// 16 lanes of signed 8-bit integers.
48 I8x16S,
49 /// 16 lanes of unsigned 8-bit integers.
50 I8x16U,
51 /// 8 lanes of signed 16-bit integers.
52 I16x8S,
53 /// 8 lanes of unsigned 16-bit integers.
54 I16x8U,
55 /// 4 lanes of signed 32-bit integers.
56 I32x4S,
57 /// 4 lanes of unsigned 32-bit integers.
58 I32x4U,
59}
60
61impl V128MinKind {
62 /// The size of each lane.
63 pub(crate) fn lane_size(&self) -> OperandSize {
64 match self {
65 Self::F32x4 | Self::I32x4S | Self::I32x4U => OperandSize::S32,
66 Self::F64x2 => OperandSize::S64,
67 Self::I8x16S | Self::I8x16U => OperandSize::S8,
68 Self::I16x8S | Self::I16x8U => OperandSize::S16,
69 }
70 }
71}
72
73/// Kinds of vector max operation supported by WebAssembly.
74pub(crate) enum V128MaxKind {
75 /// 4 lanes of 32-bit floats.
76 F32x4,
77 /// 2 lanes of 64-bit floats.
78 F64x2,
79 /// 16 lanes of signed 8-bit integers.
80 I8x16S,
81 /// 16 lanes of unsigned 8-bit integers.
82 I8x16U,
83 /// 8 lanes of signed 16-bit integers.
84 I16x8S,
85 /// 8 lanes of unsigned 16-bit integers.
86 I16x8U,
87 /// 4 lanes of signed 32-bit integers.
88 I32x4S,
89 /// 4 lanes of unsigned 32-bit integers.
90 I32x4U,
91}
92
93impl V128MaxKind {
94 /// The size of each lane.
95 pub(crate) fn lane_size(&self) -> OperandSize {
96 match self {
97 Self::F32x4 | Self::I32x4S | Self::I32x4U => OperandSize::S32,
98 Self::F64x2 => OperandSize::S64,
99 Self::I8x16S | Self::I8x16U => OperandSize::S8,
100 Self::I16x8S | Self::I16x8U => OperandSize::S16,
101 }
102 }
103}
104
105#[derive(Eq, PartialEq)]
106pub(crate) enum MulWideKind {
107 Signed,
108 Unsigned,
109}
110
111/// Type of operation for a read-modify-write instruction.
112pub(crate) enum RmwOp {
113 Add,
114 Sub,
115 Xchg,
116 And,
117 Or,
118 Xor,
119}
120
121/// The direction to perform the memory move.
122#[derive(Debug, Clone, Eq, PartialEq)]
123pub(crate) enum MemMoveDirection {
124 /// From high memory addresses to low memory addresses.
125 /// Invariant: the source location is closer to the FP than the destination
126 /// location, which will be closer to the SP.
127 HighToLow,
128 /// From low memory addresses to high memory addresses.
129 /// Invariant: the source location is closer to the SP than the destination
130 /// location, which will be closer to the FP.
131 LowToHigh,
132}
133
134/// Classifies how to treat float-to-int conversions.
135#[derive(Debug, Copy, Clone, Eq, PartialEq)]
136pub(crate) enum TruncKind {
137 /// Saturating conversion. If the source value is greater than the maximum
138 /// value of the destination type, the result is clamped to the
139 /// destination maximum value.
140 Checked,
141 /// An exception is raised if the source value is greater than the maximum
142 /// value of the destination type.
143 Unchecked,
144}
145
146impl TruncKind {
147 /// Returns true if the truncation kind is checked.
148 pub(crate) fn is_checked(&self) -> bool {
149 *self == TruncKind::Checked
150 }
151
152 /// Returns `true` if the trunc kind is [`Unchecked`].
153 ///
154 /// [`Unchecked`]: TruncKind::Unchecked
155 #[must_use]
156 pub(crate) fn is_unchecked(&self) -> bool {
157 matches!(self, Self::Unchecked)
158 }
159}
160
161/// Representation of the stack pointer offset.
162#[derive(Copy, Clone, Eq, PartialEq, Debug, PartialOrd, Ord, Default)]
163pub struct SPOffset(u32);
164
165impl SPOffset {
166 pub fn from_u32(offs: u32) -> Self {
167 Self(offs)
168 }
169
170 pub fn as_u32(&self) -> u32 {
171 self.0
172 }
173}
174
175/// A stack slot.
176#[derive(Debug, Clone, Copy, Eq, PartialEq)]
177pub struct StackSlot {
178 /// The location of the slot, relative to the stack pointer.
179 pub offset: SPOffset,
180 /// The size of the slot, in bytes.
181 pub size: u32,
182}
183
184impl StackSlot {
185 pub fn new(offs: SPOffset, size: u32) -> Self {
186 Self { offset: offs, size }
187 }
188}
189
190/// Kinds of integer binary comparison in WebAssembly. The [`MacroAssembler`]
191/// implementation for each ISA is responsible for emitting the correct
192/// sequence of instructions when lowering to machine code.
193#[derive(Debug, Clone, Copy, Eq, PartialEq)]
194pub(crate) enum IntCmpKind {
195 /// Equal.
196 Eq,
197 /// Not equal.
198 Ne,
199 /// Signed less than.
200 LtS,
201 /// Unsigned less than.
202 LtU,
203 /// Signed greater than.
204 GtS,
205 /// Unsigned greater than.
206 GtU,
207 /// Signed less than or equal.
208 LeS,
209 /// Unsigned less than or equal.
210 LeU,
211 /// Signed greater than or equal.
212 GeS,
213 /// Unsigned greater than or equal.
214 GeU,
215}
216
217/// Kinds of float binary comparison in WebAssembly. The [`MacroAssembler`]
218/// implementation for each ISA is responsible for emitting the correct
219/// sequence of instructions when lowering code.
220#[derive(Debug)]
221pub(crate) enum FloatCmpKind {
222 /// Equal.
223 Eq,
224 /// Not equal.
225 Ne,
226 /// Less than.
227 Lt,
228 /// Greater than.
229 Gt,
230 /// Less than or equal.
231 Le,
232 /// Greater than or equal.
233 Ge,
234}
235
236/// Kinds of shifts in WebAssembly.The [`masm`] implementation for each ISA is
237/// responsible for emitting the correct sequence of instructions when
238/// lowering to machine code.
239#[derive(Debug, Clone, Copy, Eq, PartialEq)]
240pub(crate) enum ShiftKind {
241 /// Left shift.
242 Shl,
243 /// Signed right shift.
244 ShrS,
245 /// Unsigned right shift.
246 ShrU,
247 /// Left rotate.
248 Rotl,
249 /// Right rotate.
250 Rotr,
251}
252
253/// Kinds of extends in WebAssembly. Each MacroAssembler implementation
254/// is responsible for emitting the correct sequence of instructions when
255/// lowering to machine code.
256#[derive(Copy, Clone)]
257pub(crate) enum ExtendKind {
258 Signed(Extend<Signed>),
259 Unsigned(Extend<Zero>),
260}
261
262#[derive(Copy, Clone)]
263pub(crate) enum Signed {}
264#[derive(Copy, Clone)]
265pub(crate) enum Zero {}
266
267pub(crate) trait ExtendType {}
268
269impl ExtendType for Signed {}
270impl ExtendType for Zero {}
271
272#[derive(Copy, Clone)]
273pub(crate) enum Extend<T: ExtendType> {
274 /// 8 to 32 bit extend.
275 I32Extend8,
276 /// 16 to 32 bit extend.
277 I32Extend16,
278 /// 8 to 64 bit extend.
279 I64Extend8,
280 /// 16 to 64 bit extend.
281 I64Extend16,
282 /// 32 to 64 bit extend.
283 I64Extend32,
284
285 /// Variant to hold the kind of extend marker.
286 ///
287 /// This is `Signed` or `Zero`, that are empty enums, which means that this variant cannot be
288 /// constructed.
289 __Kind(T),
290}
291
292impl From<Extend<Zero>> for ExtendKind {
293 fn from(value: Extend<Zero>) -> Self {
294 ExtendKind::Unsigned(value)
295 }
296}
297
298impl<T: ExtendType> Extend<T> {
299 pub fn from_size(&self) -> OperandSize {
300 match self {
301 Extend::I32Extend8 | Extend::I64Extend8 => OperandSize::S8,
302 Extend::I32Extend16 | Extend::I64Extend16 => OperandSize::S16,
303 Extend::I64Extend32 => OperandSize::S32,
304 Extend::__Kind(_) => unreachable!(),
305 }
306 }
307
308 pub fn to_size(&self) -> OperandSize {
309 match self {
310 Extend::I32Extend8 | Extend::I32Extend16 => OperandSize::S32,
311 Extend::I64Extend8 | Extend::I64Extend16 | Extend::I64Extend32 => OperandSize::S64,
312 Extend::__Kind(_) => unreachable!(),
313 }
314 }
315
316 pub fn from_bits(&self) -> u8 {
317 self.from_size().num_bits()
318 }
319
320 pub fn to_bits(&self) -> u8 {
321 self.to_size().num_bits()
322 }
323}
324
325impl From<Extend<Signed>> for ExtendKind {
326 fn from(value: Extend<Signed>) -> Self {
327 ExtendKind::Signed(value)
328 }
329}
330
331impl ExtendKind {
332 pub fn signed(&self) -> bool {
333 match self {
334 Self::Signed(_) => true,
335 _ => false,
336 }
337 }
338
339 pub fn from_bits(&self) -> u8 {
340 match self {
341 Self::Signed(s) => s.from_bits(),
342 Self::Unsigned(u) => u.from_bits(),
343 }
344 }
345
346 pub fn to_bits(&self) -> u8 {
347 match self {
348 Self::Signed(s) => s.to_bits(),
349 Self::Unsigned(u) => u.to_bits(),
350 }
351 }
352}
353
354/// Kinds of vector load and extends in WebAssembly. Each MacroAssembler
355/// implementation is responsible for emitting the correct sequence of
356/// instructions when lowering to machine code.
357#[derive(Copy, Clone)]
358pub(crate) enum V128LoadExtendKind {
359 /// Sign extends eight 8 bit integers to eight 16 bit lanes.
360 E8x8S,
361 /// Zero extends eight 8 bit integers to eight 16 bit lanes.
362 E8x8U,
363 /// Sign extends four 16 bit integers to four 32 bit lanes.
364 E16x4S,
365 /// Zero extends four 16 bit integers to four 32 bit lanes.
366 E16x4U,
367 /// Sign extends two 32 bit integers to two 64 bit lanes.
368 E32x2S,
369 /// Zero extends two 32 bit integers to two 64 bit lanes.
370 E32x2U,
371}
372
373/// Kinds of splat loads supported by WebAssembly.
374pub(crate) enum SplatLoadKind {
375 /// 8 bits.
376 S8,
377 /// 16 bits.
378 S16,
379 /// 32 bits.
380 S32,
381 /// 64 bits.
382 S64,
383}
384
385/// Kinds of splat supported by WebAssembly.
386#[derive(Copy, Debug, Clone, Eq, PartialEq)]
387pub(crate) enum SplatKind {
388 /// 8 bit integer.
389 I8x16,
390 /// 16 bit integer.
391 I16x8,
392 /// 32 bit integer.
393 I32x4,
394 /// 64 bit integer.
395 I64x2,
396 /// 32 bit float.
397 F32x4,
398 /// 64 bit float.
399 F64x2,
400}
401
402impl SplatKind {
403 /// The lane size to use for different kinds of splats.
404 pub(crate) fn lane_size(&self) -> OperandSize {
405 match self {
406 SplatKind::I8x16 => OperandSize::S8,
407 SplatKind::I16x8 => OperandSize::S16,
408 SplatKind::I32x4 | SplatKind::F32x4 => OperandSize::S32,
409 SplatKind::I64x2 | SplatKind::F64x2 => OperandSize::S64,
410 }
411 }
412}
413
414/// Kinds of extract lane supported by WebAssembly.
415#[derive(Copy, Debug, Clone, Eq, PartialEq)]
416pub(crate) enum ExtractLaneKind {
417 /// 16 lanes of 8-bit integers sign extended to 32-bits.
418 I8x16S,
419 /// 16 lanes of 8-bit integers zero extended to 32-bits.
420 I8x16U,
421 /// 8 lanes of 16-bit integers sign extended to 32-bits.
422 I16x8S,
423 /// 8 lanes of 16-bit integers zero extended to 32-bits.
424 I16x8U,
425 /// 4 lanes of 32-bit integers.
426 I32x4,
427 /// 2 lanes of 64-bit integers.
428 I64x2,
429 /// 4 lanes of 32-bit floats.
430 F32x4,
431 /// 2 lanes of 64-bit floats.
432 F64x2,
433}
434
435impl ExtractLaneKind {
436 /// The lane size to use for different kinds of extract lane kinds.
437 pub(crate) fn lane_size(&self) -> OperandSize {
438 match self {
439 ExtractLaneKind::I8x16S | ExtractLaneKind::I8x16U => OperandSize::S8,
440 ExtractLaneKind::I16x8S | ExtractLaneKind::I16x8U => OperandSize::S16,
441 ExtractLaneKind::I32x4 | ExtractLaneKind::F32x4 => OperandSize::S32,
442 ExtractLaneKind::I64x2 | ExtractLaneKind::F64x2 => OperandSize::S64,
443 }
444 }
445}
446
447impl From<ExtractLaneKind> for Extend<Signed> {
448 fn from(value: ExtractLaneKind) -> Self {
449 match value {
450 ExtractLaneKind::I8x16S => Extend::I32Extend8,
451 ExtractLaneKind::I16x8S => Extend::I32Extend16,
452 _ => unimplemented!(),
453 }
454 }
455}
456
457/// Kinds of replace lane supported by WebAssembly.
458pub(crate) enum ReplaceLaneKind {
459 /// 16 lanes of 8 bit integers.
460 I8x16,
461 /// 8 lanes of 16 bit integers.
462 I16x8,
463 /// 4 lanes of 32 bit integers.
464 I32x4,
465 /// 2 lanes of 64 bit integers.
466 I64x2,
467 /// 4 lanes of 32 bit floats.
468 F32x4,
469 /// 2 lanes of 64 bit floats.
470 F64x2,
471}
472
473impl ReplaceLaneKind {
474 /// The lane size to use for different kinds of replace lane kinds.
475 pub(crate) fn lane_size(&self) -> OperandSize {
476 match self {
477 ReplaceLaneKind::I8x16 => OperandSize::S8,
478 ReplaceLaneKind::I16x8 => OperandSize::S16,
479 ReplaceLaneKind::I32x4 => OperandSize::S32,
480 ReplaceLaneKind::I64x2 => OperandSize::S64,
481 ReplaceLaneKind::F32x4 => OperandSize::S32,
482 ReplaceLaneKind::F64x2 => OperandSize::S64,
483 }
484 }
485}
486
487/// Kinds of behavior supported by Wasm loads.
488pub(crate) enum LoadKind {
489 /// Load the entire bytes of the operand size without any modifications.
490 Operand(OperandSize),
491 /// Atomic load, with optional scalar extend.
492 Atomic(OperandSize, Option<ExtendKind>),
493 /// Duplicate value into vector lanes.
494 Splat(SplatLoadKind),
495 /// Scalar (non-vector) extend.
496 ScalarExtend(ExtendKind),
497 /// Vector extend.
498 VectorExtend(V128LoadExtendKind),
499 /// Load content into select lane.
500 VectorLane(LaneSelector),
501 /// Load a single element into the lowest bits of a vector and initialize
502 /// all other bits to zero.
503 VectorZero(OperandSize),
504}
505
506impl LoadKind {
507 /// Returns the [`OperandSize`] used in the load operation.
508 pub(crate) fn derive_operand_size(&self) -> OperandSize {
509 match self {
510 Self::ScalarExtend(extend) | Self::Atomic(_, Some(extend)) => {
511 Self::operand_size_for_scalar(extend)
512 }
513 Self::VectorExtend(_) => OperandSize::S64,
514 Self::Splat(kind) => Self::operand_size_for_splat(kind),
515 Self::Operand(size)
516 | Self::Atomic(size, None)
517 | Self::VectorLane(LaneSelector { size, .. })
518 | Self::VectorZero(size) => *size,
519 }
520 }
521
522 pub fn vector_lane(lane: u8, size: OperandSize) -> Self {
523 Self::VectorLane(LaneSelector { lane, size })
524 }
525
526 fn operand_size_for_scalar(extend_kind: &ExtendKind) -> OperandSize {
527 match extend_kind {
528 ExtendKind::Signed(s) => s.from_size(),
529 ExtendKind::Unsigned(u) => u.from_size(),
530 }
531 }
532
533 fn operand_size_for_splat(kind: &SplatLoadKind) -> OperandSize {
534 match kind {
535 SplatLoadKind::S8 => OperandSize::S8,
536 SplatLoadKind::S16 => OperandSize::S16,
537 SplatLoadKind::S32 => OperandSize::S32,
538 SplatLoadKind::S64 => OperandSize::S64,
539 }
540 }
541
542 pub(crate) fn is_atomic(&self) -> bool {
543 matches!(self, Self::Atomic(_, _))
544 }
545}
546
547/// Kinds of behavior supported by Wasm loads.
548#[derive(Copy, Clone)]
549pub enum StoreKind {
550 /// Store the entire bytes of the operand size without any modifications.
551 Operand(OperandSize),
552 /// Store the entire bytes of the operand size without any modifications, atomically.
553 Atomic(OperandSize),
554 /// Store the content of selected lane.
555 VectorLane(LaneSelector),
556}
557
558impl StoreKind {
559 pub fn vector_lane(lane: u8, size: OperandSize) -> Self {
560 Self::VectorLane(LaneSelector { lane, size })
561 }
562}
563
564#[derive(Copy, Clone)]
565pub struct LaneSelector {
566 pub lane: u8,
567 pub size: OperandSize,
568}
569
570/// Types of vector integer to float conversions supported by WebAssembly.
571pub(crate) enum V128ConvertKind {
572 /// 4 lanes of signed 32-bit integers to 4 lanes of 32-bit floats.
573 I32x4S,
574 /// 4 lanes of unsigned 32-bit integers to 4 lanes of 32-bit floats.
575 I32x4U,
576 /// 4 lanes of signed 32-bit integers to low bits of 2 lanes of 64-bit
577 /// floats.
578 I32x4LowS,
579 /// 4 lanes of unsigned 32-bit integers to low bits of 2 lanes of 64-bit
580 /// floats.
581 I32x4LowU,
582}
583
584impl V128ConvertKind {
585 pub(crate) fn src_lane_size(&self) -> OperandSize {
586 match self {
587 V128ConvertKind::I32x4S
588 | V128ConvertKind::I32x4U
589 | V128ConvertKind::I32x4LowS
590 | V128ConvertKind::I32x4LowU => OperandSize::S32,
591 }
592 }
593
594 pub(crate) fn dst_lane_size(&self) -> OperandSize {
595 match self {
596 V128ConvertKind::I32x4S | V128ConvertKind::I32x4U => OperandSize::S32,
597 V128ConvertKind::I32x4LowS | V128ConvertKind::I32x4LowU => OperandSize::S64,
598 }
599 }
600}
601
602/// Kinds of vector narrowing operations supported by WebAssembly.
603pub(crate) enum V128NarrowKind {
604 /// Narrow 8 lanes of 16-bit integers to 16 lanes of 8-bit integers using
605 /// signed saturation.
606 I16x8S,
607 /// Narrow 8 lanes of 16-bit integers to 16 lanes of 8-bit integers using
608 /// unsigned saturation.
609 I16x8U,
610 /// Narrow 4 lanes of 32-bit integers to 8 lanes of 16-bit integers using
611 /// signed saturation.
612 I32x4S,
613 /// Narrow 4 lanes of 32-bit integers to 8 lanes of 16-bit integers using
614 /// unsigned saturation.
615 I32x4U,
616}
617
618impl V128NarrowKind {
619 /// Return the size of the destination lanes.
620 pub(crate) fn dst_lane_size(&self) -> OperandSize {
621 match self {
622 Self::I16x8S | Self::I16x8U => OperandSize::S8,
623 Self::I32x4S | Self::I32x4U => OperandSize::S16,
624 }
625 }
626}
627
628/// Kinds of vector extending operations supported by WebAssembly.
629#[derive(Debug, Copy, Clone)]
630pub(crate) enum V128ExtendKind {
631 /// Low half of i8x16 sign extended.
632 LowI8x16S,
633 /// High half of i8x16 sign extended.
634 HighI8x16S,
635 /// Low half of i8x16 zero extended.
636 LowI8x16U,
637 /// High half of i8x16 zero extended.
638 HighI8x16U,
639 /// Low half of i16x8 sign extended.
640 LowI16x8S,
641 /// High half of i16x8 sign extended.
642 HighI16x8S,
643 /// Low half of i16x8 zero extended.
644 LowI16x8U,
645 /// High half of i16x8 zero extended.
646 HighI16x8U,
647 /// Low half of i32x4 sign extended.
648 LowI32x4S,
649 /// High half of i32x4 sign extended.
650 HighI32x4S,
651 /// Low half of i32x4 zero extended.
652 LowI32x4U,
653 /// High half of i32x4 zero extended.
654 HighI32x4U,
655}
656
657impl V128ExtendKind {
658 /// The size of the source's lanes.
659 pub(crate) fn src_lane_size(&self) -> OperandSize {
660 match self {
661 Self::LowI8x16S | Self::LowI8x16U | Self::HighI8x16S | Self::HighI8x16U => {
662 OperandSize::S8
663 }
664 Self::LowI16x8S | Self::LowI16x8U | Self::HighI16x8S | Self::HighI16x8U => {
665 OperandSize::S16
666 }
667 Self::LowI32x4S | Self::LowI32x4U | Self::HighI32x4S | Self::HighI32x4U => {
668 OperandSize::S32
669 }
670 }
671 }
672}
673
674/// Kinds of vector equalities and non-equalities supported by WebAssembly.
675pub(crate) enum VectorEqualityKind {
676 /// 16 lanes of 8 bit integers.
677 I8x16,
678 /// 8 lanes of 16 bit integers.
679 I16x8,
680 /// 4 lanes of 32 bit integers.
681 I32x4,
682 /// 2 lanes of 64 bit integers.
683 I64x2,
684 /// 4 lanes of 32 bit floats.
685 F32x4,
686 /// 2 lanes of 64 bit floats.
687 F64x2,
688}
689
690impl VectorEqualityKind {
691 /// Get the lane size to use.
692 pub(crate) fn lane_size(&self) -> OperandSize {
693 match self {
694 Self::I8x16 => OperandSize::S8,
695 Self::I16x8 => OperandSize::S16,
696 Self::I32x4 | Self::F32x4 => OperandSize::S32,
697 Self::I64x2 | Self::F64x2 => OperandSize::S64,
698 }
699 }
700}
701
702/// Kinds of vector comparisons supported by WebAssembly.
703pub(crate) enum VectorCompareKind {
704 /// 16 lanes of signed 8 bit integers.
705 I8x16S,
706 /// 16 lanes of unsigned 8 bit integers.
707 I8x16U,
708 /// 8 lanes of signed 16 bit integers.
709 I16x8S,
710 /// 8 lanes of unsigned 16 bit integers.
711 I16x8U,
712 /// 4 lanes of signed 32 bit integers.
713 I32x4S,
714 /// 4 lanes of unsigned 32 bit integers.
715 I32x4U,
716 /// 2 lanes of signed 64 bit integers.
717 I64x2S,
718 /// 4 lanes of 32 bit floats.
719 F32x4,
720 /// 2 lanes of 64 bit floats.
721 F64x2,
722}
723
724impl VectorCompareKind {
725 /// Get the lane size to use.
726 pub(crate) fn lane_size(&self) -> OperandSize {
727 match self {
728 Self::I8x16S | Self::I8x16U => OperandSize::S8,
729 Self::I16x8S | Self::I16x8U => OperandSize::S16,
730 Self::I32x4S | Self::I32x4U | Self::F32x4 => OperandSize::S32,
731 Self::I64x2S | Self::F64x2 => OperandSize::S64,
732 }
733 }
734}
735
736/// Kinds of vector absolute operations supported by WebAssembly.
737#[derive(Copy, Debug, Clone, Eq, PartialEq)]
738pub(crate) enum V128AbsKind {
739 /// 8 bit integers.
740 I8x16,
741 /// 16 bit integers.
742 I16x8,
743 /// 32 bit integers.
744 I32x4,
745 /// 64 bit integers.
746 I64x2,
747 /// 32 bit floats.
748 F32x4,
749 /// 64 bit floats.
750 F64x2,
751}
752
753impl V128AbsKind {
754 /// The lane size to use.
755 pub(crate) fn lane_size(&self) -> OperandSize {
756 match self {
757 Self::I8x16 => OperandSize::S8,
758 Self::I16x8 => OperandSize::S16,
759 Self::I32x4 | Self::F32x4 => OperandSize::S32,
760 Self::I64x2 | Self::F64x2 => OperandSize::S64,
761 }
762 }
763}
764
765/// Kinds of truncation for vectors supported by WebAssembly.
766pub(crate) enum V128TruncKind {
767 /// Truncates 4 lanes of 32-bit floats to nearest integral value.
768 F32x4,
769 /// Truncates 2 lanes of 64-bit floats to nearest integral value.
770 F64x2,
771 /// Integers from signed F32x4.
772 I32x4FromF32x4S,
773 /// Integers from unsigned F32x4.
774 I32x4FromF32x4U,
775 /// Integers from signed F64x2.
776 I32x4FromF64x2SZero,
777 /// Integers from unsigned F64x2.
778 I32x4FromF64x2UZero,
779}
780
781impl V128TruncKind {
782 /// The size of the source lanes.
783 pub(crate) fn src_lane_size(&self) -> OperandSize {
784 match self {
785 V128TruncKind::F32x4
786 | V128TruncKind::I32x4FromF32x4S
787 | V128TruncKind::I32x4FromF32x4U => OperandSize::S32,
788 V128TruncKind::F64x2
789 | V128TruncKind::I32x4FromF64x2SZero
790 | V128TruncKind::I32x4FromF64x2UZero => OperandSize::S64,
791 }
792 }
793
794 /// The size of the destination lanes.
795 pub(crate) fn dst_lane_size(&self) -> OperandSize {
796 if let V128TruncKind::F64x2 = self {
797 OperandSize::S64
798 } else {
799 OperandSize::S32
800 }
801 }
802}
803
804/// Kinds of vector addition supported by WebAssembly.
805pub(crate) enum V128AddKind {
806 /// 4 lanes of 32-bit floats wrapping.
807 F32x4,
808 /// 2 lanes of 64-bit floats wrapping.
809 F64x2,
810 /// 16 lanes of 8-bit integers wrapping.
811 I8x16,
812 /// 16 lanes of 8-bit integers signed saturating.
813 I8x16SatS,
814 /// 16 lanes of 8-bit integers unsigned saturating.
815 I8x16SatU,
816 /// 8 lanes of 16-bit integers wrapping.
817 I16x8,
818 /// 8 lanes of 16-bit integers signed saturating.
819 I16x8SatS,
820 /// 8 lanes of 16-bit integers unsigned saturating.
821 I16x8SatU,
822 /// 4 lanes of 32-bit integers wrapping.
823 I32x4,
824 /// 2 lanes of 64-bit integers wrapping.
825 I64x2,
826}
827
828/// Kinds of vector subtraction supported by WebAssembly.
829pub(crate) enum V128SubKind {
830 /// 4 lanes of 32-bit floats wrapping.
831 F32x4,
832 /// 2 lanes of 64-bit floats wrapping.
833 F64x2,
834 /// 16 lanes of 8-bit integers wrapping.
835 I8x16,
836 /// 16 lanes of 8-bit integers signed saturating.
837 I8x16SatS,
838 /// 16 lanes of 8-bit integers unsigned saturating.
839 I8x16SatU,
840 /// 8 lanes of 16-bit integers wrapping.
841 I16x8,
842 /// 8 lanes of 16-bit integers signed saturating.
843 I16x8SatS,
844 /// 8 lanes of 16-bit integers unsigned saturating.
845 I16x8SatU,
846 /// 4 lanes of 32-bit integers wrapping.
847 I32x4,
848 /// 2 lanes of 64-bit integers wrapping.
849 I64x2,
850}
851
852impl From<V128NegKind> for V128SubKind {
853 fn from(value: V128NegKind) -> Self {
854 match value {
855 V128NegKind::I8x16 => Self::I8x16,
856 V128NegKind::I16x8 => Self::I16x8,
857 V128NegKind::I32x4 => Self::I32x4,
858 V128NegKind::I64x2 => Self::I64x2,
859 V128NegKind::F32x4 | V128NegKind::F64x2 => unimplemented!(),
860 }
861 }
862}
863
864/// Kinds of vector multiplication supported by WebAssembly.
865pub(crate) enum V128MulKind {
866 /// 4 lanes of 32-bit floats.
867 F32x4,
868 /// 2 lanes of 64-bit floats.
869 F64x2,
870 /// 8 lanes of 16-bit integers.
871 I16x8,
872 /// 4 lanes of 32-bit integers.
873 I32x4,
874 /// 2 lanes of 64-bit integers.
875 I64x2,
876}
877
878/// Kinds of vector negation supported by WebAssembly.
879pub(crate) enum V128NegKind {
880 /// 4 lanes of 32-bit floats.
881 F32x4,
882 /// 2 lanes of 64-bit floats.
883 F64x2,
884 /// 16 lanes of 8-bit integers.
885 I8x16,
886 /// 8 lanes of 16-bit integers.
887 I16x8,
888 /// 4 lanes of 32-bit integers.
889 I32x4,
890 /// 2 lanes of 64-bit integers.
891 I64x2,
892}
893
894impl V128NegKind {
895 /// The size of the lanes.
896 pub(crate) fn lane_size(&self) -> OperandSize {
897 match self {
898 Self::F32x4 | Self::I32x4 => OperandSize::S32,
899 Self::F64x2 | Self::I64x2 => OperandSize::S64,
900 Self::I8x16 => OperandSize::S8,
901 Self::I16x8 => OperandSize::S16,
902 }
903 }
904}
905
906/// Kinds of extended pairwise addition supported by WebAssembly.
907pub(crate) enum V128ExtAddKind {
908 /// 16 lanes of signed 8-bit integers.
909 I8x16S,
910 /// 16 lanes of unsigned 8-bit integers.
911 I8x16U,
912 /// 8 lanes of signed 16-bit integers.
913 I16x8S,
914 /// 8 lanes of unsigned 16-bit integers.
915 I16x8U,
916}
917
918/// Kinds of vector extended multiplication supported by WebAssembly.
919#[derive(Debug, Clone, Copy)]
920pub(crate) enum V128ExtMulKind {
921 LowI8x16S,
922 HighI8x16S,
923 LowI8x16U,
924 HighI8x16U,
925 LowI16x8S,
926 HighI16x8S,
927 LowI16x8U,
928 HighI16x8U,
929 LowI32x4S,
930 HighI32x4S,
931 LowI32x4U,
932 HighI32x4U,
933}
934
935impl From<V128ExtMulKind> for V128ExtendKind {
936 fn from(value: V128ExtMulKind) -> Self {
937 match value {
938 V128ExtMulKind::LowI8x16S => Self::LowI8x16S,
939 V128ExtMulKind::HighI8x16S => Self::HighI8x16S,
940 V128ExtMulKind::LowI8x16U => Self::LowI8x16U,
941 V128ExtMulKind::HighI8x16U => Self::HighI8x16U,
942 V128ExtMulKind::LowI16x8S => Self::LowI16x8S,
943 V128ExtMulKind::HighI16x8S => Self::HighI16x8S,
944 V128ExtMulKind::LowI16x8U => Self::LowI16x8U,
945 V128ExtMulKind::HighI16x8U => Self::HighI16x8U,
946 V128ExtMulKind::LowI32x4S => Self::LowI32x4S,
947 V128ExtMulKind::HighI32x4S => Self::HighI32x4S,
948 V128ExtMulKind::LowI32x4U => Self::LowI32x4U,
949 V128ExtMulKind::HighI32x4U => Self::HighI32x4U,
950 }
951 }
952}
953
954impl From<V128ExtMulKind> for V128MulKind {
955 fn from(value: V128ExtMulKind) -> Self {
956 match value {
957 V128ExtMulKind::LowI8x16S
958 | V128ExtMulKind::HighI8x16S
959 | V128ExtMulKind::LowI8x16U
960 | V128ExtMulKind::HighI8x16U => Self::I16x8,
961 V128ExtMulKind::LowI16x8S
962 | V128ExtMulKind::HighI16x8S
963 | V128ExtMulKind::LowI16x8U
964 | V128ExtMulKind::HighI16x8U => Self::I32x4,
965 V128ExtMulKind::LowI32x4S
966 | V128ExtMulKind::HighI32x4S
967 | V128ExtMulKind::LowI32x4U
968 | V128ExtMulKind::HighI32x4U => Self::I64x2,
969 }
970 }
971}
972
973/// Operand size, in bits.
974#[derive(Copy, Debug, Clone, Eq, PartialEq)]
975pub(crate) enum OperandSize {
976 /// 8 bits.
977 S8,
978 /// 16 bits.
979 S16,
980 /// 32 bits.
981 S32,
982 /// 64 bits.
983 S64,
984 /// 128 bits.
985 S128,
986}
987
988impl OperandSize {
989 /// The number of bits in the operand.
990 pub fn num_bits(&self) -> u8 {
991 match self {
992 OperandSize::S8 => 8,
993 OperandSize::S16 => 16,
994 OperandSize::S32 => 32,
995 OperandSize::S64 => 64,
996 OperandSize::S128 => 128,
997 }
998 }
999
1000 /// The number of bytes in the operand.
1001 pub fn bytes(&self) -> u32 {
1002 match self {
1003 Self::S8 => 1,
1004 Self::S16 => 2,
1005 Self::S32 => 4,
1006 Self::S64 => 8,
1007 Self::S128 => 16,
1008 }
1009 }
1010
1011 /// The binary logarithm of the number of bits in the operand.
1012 pub fn log2(&self) -> u8 {
1013 match self {
1014 OperandSize::S8 => 3,
1015 OperandSize::S16 => 4,
1016 OperandSize::S32 => 5,
1017 OperandSize::S64 => 6,
1018 OperandSize::S128 => 7,
1019 }
1020 }
1021
1022 /// Create an [`OperandSize`] from the given number of bytes.
1023 pub fn from_bytes(bytes: u8) -> Self {
1024 use OperandSize::*;
1025 match bytes {
1026 4 => S32,
1027 8 => S64,
1028 16 => S128,
1029 _ => panic!("Invalid bytes {bytes} for OperandSize"),
1030 }
1031 }
1032
1033 pub fn extend_to<T: ExtendType>(&self, to: Self) -> Option<Extend<T>> {
1034 match to {
1035 OperandSize::S32 => match self {
1036 OperandSize::S8 => Some(Extend::I32Extend8),
1037 OperandSize::S16 => Some(Extend::I32Extend16),
1038 _ => None,
1039 },
1040 OperandSize::S64 => match self {
1041 OperandSize::S8 => Some(Extend::I64Extend8),
1042 OperandSize::S16 => Some(Extend::I64Extend16),
1043 OperandSize::S32 => Some(Extend::I64Extend32),
1044 _ => None,
1045 },
1046 _ => None,
1047 }
1048 }
1049
1050 /// The number of bits in the mantissa.
1051 ///
1052 /// Only implemented for floats.
1053 pub fn mantissa_bits(&self) -> u8 {
1054 match self {
1055 Self::S32 => 8,
1056 Self::S64 => 11,
1057 _ => unimplemented!(),
1058 }
1059 }
1060}
1061
1062/// An abstraction over a register or immediate.
1063#[derive(Copy, Clone, Debug, PartialEq, Eq)]
1064pub(crate) enum RegImm {
1065 /// A register.
1066 Reg(Reg),
1067 /// A tagged immediate argument.
1068 Imm(Imm),
1069}
1070
1071/// An tagged representation of an immediate.
1072#[derive(Copy, Clone, Debug, PartialEq, Eq)]
1073pub(crate) enum Imm {
1074 /// I32 immediate.
1075 I32(u32),
1076 /// I64 immediate.
1077 I64(u64),
1078 /// F32 immediate.
1079 F32(u32),
1080 /// F64 immediate.
1081 F64(u64),
1082 /// V128 immediate.
1083 V128(i128),
1084}
1085
1086impl Imm {
1087 /// Create a new I64 immediate.
1088 pub fn i64(val: i64) -> Self {
1089 Self::I64(val as u64)
1090 }
1091
1092 /// Create a new I32 immediate.
1093 pub fn i32(val: i32) -> Self {
1094 Self::I32(val as u32)
1095 }
1096
1097 /// Create a new F32 immediate.
1098 pub fn f32(bits: u32) -> Self {
1099 Self::F32(bits)
1100 }
1101
1102 /// Create a new F64 immediate.
1103 pub fn f64(bits: u64) -> Self {
1104 Self::F64(bits)
1105 }
1106
1107 /// Create a new V128 immediate.
1108 pub fn v128(bits: i128) -> Self {
1109 Self::V128(bits)
1110 }
1111
1112 /// Convert the immediate to i32, if possible.
1113 pub fn to_i32(&self) -> Option<i32> {
1114 match self {
1115 Self::I32(v) => Some(*v as i32),
1116 Self::I64(v) => i32::try_from(*v as i64).ok(),
1117 _ => None,
1118 }
1119 }
1120
1121 /// Returns true if the [`Imm`] is float.
1122 pub fn is_float(&self) -> bool {
1123 match self {
1124 Self::F32(_) | Self::F64(_) => true,
1125 _ => false,
1126 }
1127 }
1128
1129 /// Get the operand size of the immediate.
1130 pub fn size(&self) -> OperandSize {
1131 match self {
1132 Self::I32(_) | Self::F32(_) => OperandSize::S32,
1133 Self::I64(_) | Self::F64(_) => OperandSize::S64,
1134 Self::V128(_) => OperandSize::S128,
1135 }
1136 }
1137
1138 /// Get a little endian representation of the immediate.
1139 ///
1140 /// This method heap allocates and is intended to be used when adding
1141 /// values to the constant pool.
1142 pub fn to_bytes(&self) -> Vec<u8> {
1143 match self {
1144 Imm::I32(n) => n.to_le_bytes().to_vec(),
1145 Imm::I64(n) => n.to_le_bytes().to_vec(),
1146 Imm::F32(n) => n.to_le_bytes().to_vec(),
1147 Imm::F64(n) => n.to_le_bytes().to_vec(),
1148 Imm::V128(n) => n.to_le_bytes().to_vec(),
1149 }
1150 }
1151}
1152
1153/// The location of the [VMcontext] used for function calls.
1154#[derive(Copy, Clone, Debug, Eq, PartialEq)]
1155pub(crate) enum VMContextLoc {
1156 /// Dynamic, stored in the given register.
1157 Reg(Reg),
1158 /// The pinned [VMContext] register.
1159 Pinned,
1160}
1161
1162/// The maximum number of context arguments currently used across the compiler.
1163pub(crate) const MAX_CONTEXT_ARGS: usize = 2;
1164
1165/// Out-of-band special purpose arguments used for function call emission.
1166///
1167/// We cannot rely on the value stack for these values given that inserting
1168/// register or memory values at arbitrary locations of the value stack has the
1169/// potential to break the stack ordering principle, which states that older
1170/// values must always precede newer values, effectively simulating the order of
1171/// values in the machine stack.
1172/// The [ContextArgs] are meant to be resolved at every callsite; in some cases
1173/// it might be possible to construct it early on, but given that it might
1174/// contain allocatable registers, it's preferred to construct it in
1175/// [FnCall::emit].
1176#[derive(Clone, Debug)]
1177pub(crate) enum ContextArgs {
1178 /// No context arguments required. This is used for libcalls that don't
1179 /// require any special context arguments. For example builtin functions
1180 /// that perform float calculations.
1181 None,
1182 /// A single context argument is required; the current pinned [VMcontext]
1183 /// register must be passed as the first argument of the function call.
1184 VMContext([VMContextLoc; 1]),
1185 /// The callee and caller context arguments are required. In this case, the
1186 /// callee context argument is usually stored into an allocatable register
1187 /// and the caller is always the current pinned [VMContext] pointer.
1188 CalleeAndCallerVMContext([VMContextLoc; MAX_CONTEXT_ARGS]),
1189}
1190
1191impl ContextArgs {
1192 /// Construct an empty [ContextArgs].
1193 pub fn none() -> Self {
1194 Self::None
1195 }
1196
1197 /// Construct a [ContextArgs] declaring the usage of the pinned [VMContext]
1198 /// register as both the caller and callee context arguments.
1199 pub fn pinned_callee_and_caller_vmctx() -> Self {
1200 Self::CalleeAndCallerVMContext([VMContextLoc::Pinned, VMContextLoc::Pinned])
1201 }
1202
1203 /// Construct a [ContextArgs] that declares the usage of the pinned
1204 /// [VMContext] register as the only context argument.
1205 pub fn pinned_vmctx() -> Self {
1206 Self::VMContext([VMContextLoc::Pinned])
1207 }
1208
1209 /// Construct a [ContextArgs] that declares a dynamic callee context and the
1210 /// pinned [VMContext] register as the context arguments.
1211 pub fn with_callee_and_pinned_caller(callee_vmctx: Reg) -> Self {
1212 Self::CalleeAndCallerVMContext([VMContextLoc::Reg(callee_vmctx), VMContextLoc::Pinned])
1213 }
1214
1215 /// Get the length of the [ContextArgs].
1216 pub fn len(&self) -> usize {
1217 self.as_slice().len()
1218 }
1219
1220 /// Get a slice of the context arguments.
1221 pub fn as_slice(&self) -> &[VMContextLoc] {
1222 match self {
1223 Self::None => &[],
1224 Self::VMContext(a) => a.as_slice(),
1225 Self::CalleeAndCallerVMContext(a) => a.as_slice(),
1226 }
1227 }
1228}
1229
1230#[derive(Copy, Clone, Debug)]
1231pub(crate) enum CalleeKind {
1232 /// A function call to a raw address.
1233 Indirect(Reg),
1234 /// A function call to a local function.
1235 Direct(UserExternalNameRef),
1236 /// Call to a well known LibCall.
1237 LibCall(LibCall),
1238}
1239
1240impl CalleeKind {
1241 /// Creates a callee kind from a register.
1242 pub fn indirect(reg: Reg) -> Self {
1243 Self::Indirect(reg)
1244 }
1245
1246 /// Creates a direct callee kind from a function name.
1247 pub fn direct(name: UserExternalNameRef) -> Self {
1248 Self::Direct(name)
1249 }
1250
1251 /// Creates a known callee kind from a libcall.
1252 pub fn libcall(call: LibCall) -> Self {
1253 Self::LibCall(call)
1254 }
1255}
1256
1257impl RegImm {
1258 /// Register constructor.
1259 pub fn reg(r: Reg) -> Self {
1260 RegImm::Reg(r)
1261 }
1262
1263 /// I64 immediate constructor.
1264 pub fn i64(val: i64) -> Self {
1265 RegImm::Imm(Imm::i64(val))
1266 }
1267
1268 /// I32 immediate constructor.
1269 pub fn i32(val: i32) -> Self {
1270 RegImm::Imm(Imm::i32(val))
1271 }
1272
1273 /// F32 immediate, stored using its bits representation.
1274 pub fn f32(bits: u32) -> Self {
1275 RegImm::Imm(Imm::f32(bits))
1276 }
1277
1278 /// F64 immediate, stored using its bits representation.
1279 pub fn f64(bits: u64) -> Self {
1280 RegImm::Imm(Imm::f64(bits))
1281 }
1282
1283 /// V128 immediate.
1284 pub fn v128(bits: i128) -> Self {
1285 RegImm::Imm(Imm::v128(bits))
1286 }
1287}
1288
1289impl From<Reg> for RegImm {
1290 fn from(r: Reg) -> Self {
1291 Self::Reg(r)
1292 }
1293}
1294
1295#[derive(Debug)]
1296pub enum RoundingMode {
1297 Nearest,
1298 Up,
1299 Down,
1300 Zero,
1301}
1302
1303/// Memory flags for trusted loads/stores.
1304pub const TRUSTED_FLAGS: MemFlags = MemFlags::trusted();
1305
1306/// Flags used for WebAssembly loads / stores.
1307/// Untrusted by default so we don't set `no_trap`.
1308/// We also ensure that the endianness is the right one for WebAssembly.
1309pub const UNTRUSTED_FLAGS: MemFlags = MemFlags::new().with_endianness(Endianness::Little);
1310
1311/// Generic MacroAssembler interface used by the code generation.
1312///
1313/// The MacroAssembler trait aims to expose an interface, high-level enough,
1314/// so that each ISA can provide its own lowering to machine code. For example,
1315/// for WebAssembly operators that don't have a direct mapping to a machine
1316/// a instruction, the interface defines a signature matching the WebAssembly
1317/// operator, allowing each implementation to lower such operator entirely.
1318/// This approach attributes more responsibility to the MacroAssembler, but frees
1319/// the caller from concerning about assembling the right sequence of
1320/// instructions at the operator callsite.
1321///
1322/// The interface defaults to a three-argument form for binary operations;
1323/// this allows a natural mapping to instructions for RISC architectures,
1324/// that use three-argument form.
1325/// This approach allows for a more general interface that can be restricted
1326/// where needed, in the case of architectures that use a two-argument form.
1327
1328pub(crate) trait MacroAssembler {
1329 /// The addressing mode.
1330 type Address: Copy + Debug;
1331
1332 /// The pointer representation of the target ISA,
1333 /// used to access information from [`VMOffsets`].
1334 type Ptr: PtrSize;
1335
1336 /// The ABI details of the target.
1337 type ABI: abi::ABI;
1338
1339 /// Emit the function prologue.
1340 fn prologue(&mut self, vmctx: Reg) -> Result<()> {
1341 self.frame_setup()?;
1342 self.check_stack(vmctx)
1343 }
1344
1345 /// Generate the frame setup sequence.
1346 fn frame_setup(&mut self) -> Result<()>;
1347
1348 /// Generate the frame restore sequence.
1349 fn frame_restore(&mut self) -> Result<()>;
1350
1351 /// Emit a stack check.
1352 fn check_stack(&mut self, vmctx: Reg) -> Result<()>;
1353
1354 /// Emit the function epilogue.
1355 fn epilogue(&mut self) -> Result<()> {
1356 self.frame_restore()
1357 }
1358
1359 /// Reserve stack space.
1360 fn reserve_stack(&mut self, bytes: u32) -> Result<()>;
1361
1362 /// Free stack space.
1363 fn free_stack(&mut self, bytes: u32) -> Result<()>;
1364
1365 /// Reset the stack pointer to the given offset;
1366 ///
1367 /// Used to reset the stack pointer to a given offset
1368 /// when dealing with unreachable code.
1369 fn reset_stack_pointer(&mut self, offset: SPOffset) -> Result<()>;
1370
1371 /// Get the address of a local slot.
1372 fn local_address(&mut self, local: &LocalSlot) -> Result<Self::Address>;
1373
1374 /// Constructs an address with an offset that is relative to the
1375 /// current position of the stack pointer (e.g. [sp + (sp_offset -
1376 /// offset)].
1377 fn address_from_sp(&self, offset: SPOffset) -> Result<Self::Address>;
1378
1379 /// Constructs an address with an offset that is absolute to the
1380 /// current position of the stack pointer (e.g. [sp + offset].
1381 fn address_at_sp(&self, offset: SPOffset) -> Result<Self::Address>;
1382
1383 /// Alias for [`Self::address_at_reg`] using the VMContext register as
1384 /// a base. The VMContext register is derived from the ABI type that is
1385 /// associated to the MacroAssembler.
1386 fn address_at_vmctx(&self, offset: u32) -> Result<Self::Address>;
1387
1388 /// Construct an address that is absolute to the current position
1389 /// of the given register.
1390 fn address_at_reg(&self, reg: Reg, offset: u32) -> Result<Self::Address>;
1391
1392 /// Emit a function call to either a local or external function.
1393 fn call(
1394 &mut self,
1395 stack_args_size: u32,
1396 f: impl FnMut(&mut Self) -> Result<(CalleeKind, CallingConvention)>,
1397 ) -> Result<u32>;
1398
1399 /// Get stack pointer offset.
1400 fn sp_offset(&self) -> Result<SPOffset>;
1401
1402 /// Perform a stack store.
1403 fn store(&mut self, src: RegImm, dst: Self::Address, size: OperandSize) -> Result<()>;
1404
1405 /// Alias for `MacroAssembler::store` with the operand size corresponding
1406 /// to the pointer size of the target.
1407 fn store_ptr(&mut self, src: Reg, dst: Self::Address) -> Result<()>;
1408
1409 /// Perform a WebAssembly store.
1410 /// A WebAssembly store introduces several additional invariants compared to
1411 /// [Self::store], more precisely, it can implicitly trap, in certain
1412 /// circumstances, even if explicit bounds checks are elided, in that sense,
1413 /// we consider this type of load as untrusted. It can also differ with
1414 /// regards to the endianness depending on the target ISA. For this reason,
1415 /// [Self::wasm_store], should be explicitly used when emitting WebAssembly
1416 /// stores.
1417 fn wasm_store(&mut self, src: Reg, dst: Self::Address, store_kind: StoreKind) -> Result<()>;
1418
1419 /// Perform a zero-extended stack load.
1420 fn load(&mut self, src: Self::Address, dst: WritableReg, size: OperandSize) -> Result<()>;
1421
1422 /// Perform a WebAssembly load.
1423 /// A WebAssembly load introduces several additional invariants compared to
1424 /// [Self::load], more precisely, it can implicitly trap, in certain
1425 /// circumstances, even if explicit bounds checks are elided, in that sense,
1426 /// we consider this type of load as untrusted. It can also differ with
1427 /// regards to the endianness depending on the target ISA. For this reason,
1428 /// [Self::wasm_load], should be explicitly used when emitting WebAssembly
1429 /// loads.
1430 fn wasm_load(&mut self, src: Self::Address, dst: WritableReg, kind: LoadKind) -> Result<()>;
1431
1432 /// Alias for `MacroAssembler::load` with the operand size corresponding
1433 /// to the pointer size of the target.
1434 fn load_ptr(&mut self, src: Self::Address, dst: WritableReg) -> Result<()>;
1435
1436 /// Computes the effective address and stores the result in the destination
1437 /// register.
1438 fn compute_addr(
1439 &mut self,
1440 _src: Self::Address,
1441 _dst: WritableReg,
1442 _size: OperandSize,
1443 ) -> Result<()>;
1444
1445 /// Pop a value from the machine stack into the given register.
1446 fn pop(&mut self, dst: WritableReg, size: OperandSize) -> Result<()>;
1447
1448 /// Perform a move.
1449 fn mov(&mut self, dst: WritableReg, src: RegImm, size: OperandSize) -> Result<()>;
1450
1451 /// Perform a conditional move.
1452 fn cmov(&mut self, dst: WritableReg, src: Reg, cc: IntCmpKind, size: OperandSize)
1453 -> Result<()>;
1454
1455 /// Performs a memory move of bytes from src to dest.
1456 /// Bytes are moved in blocks of 8 bytes, where possible.
1457 fn memmove(
1458 &mut self,
1459 src: SPOffset,
1460 dst: SPOffset,
1461 bytes: u32,
1462 direction: MemMoveDirection,
1463 ) -> Result<()> {
1464 match direction {
1465 MemMoveDirection::LowToHigh => debug_assert!(dst.as_u32() < src.as_u32()),
1466 MemMoveDirection::HighToLow => debug_assert!(dst.as_u32() > src.as_u32()),
1467 }
1468 // At least 4 byte aligned.
1469 debug_assert!(bytes % 4 == 0);
1470 let mut remaining = bytes;
1471 let word_bytes = <Self::ABI as abi::ABI>::word_bytes();
1472 let scratch = scratch!(Self);
1473
1474 let word_bytes = word_bytes as u32;
1475
1476 let mut dst_offs;
1477 let mut src_offs;
1478 match direction {
1479 MemMoveDirection::LowToHigh => {
1480 dst_offs = dst.as_u32() - bytes;
1481 src_offs = src.as_u32() - bytes;
1482 while remaining >= word_bytes {
1483 remaining -= word_bytes;
1484 dst_offs += word_bytes;
1485 src_offs += word_bytes;
1486
1487 self.load_ptr(
1488 self.address_from_sp(SPOffset::from_u32(src_offs))?,
1489 writable!(scratch),
1490 )?;
1491 self.store_ptr(
1492 scratch.into(),
1493 self.address_from_sp(SPOffset::from_u32(dst_offs))?,
1494 )?;
1495 }
1496 }
1497 MemMoveDirection::HighToLow => {
1498 // Go from the end to the beginning to handle overlapping addresses.
1499 src_offs = src.as_u32();
1500 dst_offs = dst.as_u32();
1501 while remaining >= word_bytes {
1502 self.load_ptr(
1503 self.address_from_sp(SPOffset::from_u32(src_offs))?,
1504 writable!(scratch),
1505 )?;
1506 self.store_ptr(
1507 scratch.into(),
1508 self.address_from_sp(SPOffset::from_u32(dst_offs))?,
1509 )?;
1510
1511 remaining -= word_bytes;
1512 src_offs -= word_bytes;
1513 dst_offs -= word_bytes;
1514 }
1515 }
1516 }
1517
1518 if remaining > 0 {
1519 let half_word = word_bytes / 2;
1520 let ptr_size = OperandSize::from_bytes(half_word as u8);
1521 debug_assert!(remaining == half_word);
1522 // Need to move the offsets ahead in the `LowToHigh` case to
1523 // compensate for the initial subtraction of `bytes`.
1524 if direction == MemMoveDirection::LowToHigh {
1525 dst_offs += half_word;
1526 src_offs += half_word;
1527 }
1528
1529 self.load(
1530 self.address_from_sp(SPOffset::from_u32(src_offs))?,
1531 writable!(scratch),
1532 ptr_size,
1533 )?;
1534 self.store(
1535 scratch.into(),
1536 self.address_from_sp(SPOffset::from_u32(dst_offs))?,
1537 ptr_size,
1538 )?;
1539 }
1540 Ok(())
1541 }
1542
1543 /// Perform add operation.
1544 fn add(&mut self, dst: WritableReg, lhs: Reg, rhs: RegImm, size: OperandSize) -> Result<()>;
1545
1546 /// Perform a checked unsigned integer addition, emitting the provided trap
1547 /// if the addition overflows.
1548 fn checked_uadd(
1549 &mut self,
1550 dst: WritableReg,
1551 lhs: Reg,
1552 rhs: RegImm,
1553 size: OperandSize,
1554 trap: TrapCode,
1555 ) -> Result<()>;
1556
1557 /// Perform subtraction operation.
1558 fn sub(&mut self, dst: WritableReg, lhs: Reg, rhs: RegImm, size: OperandSize) -> Result<()>;
1559
1560 /// Perform multiplication operation.
1561 fn mul(&mut self, dst: WritableReg, lhs: Reg, rhs: RegImm, size: OperandSize) -> Result<()>;
1562
1563 /// Perform a floating point add operation.
1564 fn float_add(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) -> Result<()>;
1565
1566 /// Perform a floating point subtraction operation.
1567 fn float_sub(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) -> Result<()>;
1568
1569 /// Perform a floating point multiply operation.
1570 fn float_mul(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) -> Result<()>;
1571
1572 /// Perform a floating point divide operation.
1573 fn float_div(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) -> Result<()>;
1574
1575 /// Perform a floating point minimum operation. In x86, this will emit
1576 /// multiple instructions.
1577 fn float_min(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) -> Result<()>;
1578
1579 /// Perform a floating point maximum operation. In x86, this will emit
1580 /// multiple instructions.
1581 fn float_max(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) -> Result<()>;
1582
1583 /// Perform a floating point copysign operation. In x86, this will emit
1584 /// multiple instructions.
1585 fn float_copysign(
1586 &mut self,
1587 dst: WritableReg,
1588 lhs: Reg,
1589 rhs: Reg,
1590 size: OperandSize,
1591 ) -> Result<()>;
1592
1593 /// Perform a floating point abs operation.
1594 fn float_abs(&mut self, dst: WritableReg, size: OperandSize) -> Result<()>;
1595
1596 /// Perform a floating point negation operation.
1597 fn float_neg(&mut self, dst: WritableReg, size: OperandSize) -> Result<()>;
1598
1599 /// Perform a floating point floor operation.
1600 fn float_round<
1601 F: FnMut(&mut FuncEnv<Self::Ptr>, &mut CodeGenContext<Emission>, &mut Self) -> Result<()>,
1602 >(
1603 &mut self,
1604 mode: RoundingMode,
1605 env: &mut FuncEnv<Self::Ptr>,
1606 context: &mut CodeGenContext<Emission>,
1607 size: OperandSize,
1608 fallback: F,
1609 ) -> Result<()>;
1610
1611 /// Perform a floating point square root operation.
1612 fn float_sqrt(&mut self, dst: WritableReg, src: Reg, size: OperandSize) -> Result<()>;
1613
1614 /// Perform logical and operation.
1615 fn and(&mut self, dst: WritableReg, lhs: Reg, rhs: RegImm, size: OperandSize) -> Result<()>;
1616
1617 /// Perform logical or operation.
1618 fn or(&mut self, dst: WritableReg, lhs: Reg, rhs: RegImm, size: OperandSize) -> Result<()>;
1619
1620 /// Perform logical exclusive or operation.
1621 fn xor(&mut self, dst: WritableReg, lhs: Reg, rhs: RegImm, size: OperandSize) -> Result<()>;
1622
1623 /// Perform a shift operation between a register and an immediate.
1624 fn shift_ir(
1625 &mut self,
1626 dst: WritableReg,
1627 imm: u64,
1628 lhs: Reg,
1629 kind: ShiftKind,
1630 size: OperandSize,
1631 ) -> Result<()>;
1632
1633 /// Perform a shift operation between two registers.
1634 /// This case is special in that some architectures have specific expectations
1635 /// regarding the location of the instruction arguments. To free the
1636 /// caller from having to deal with the architecture specific constraints
1637 /// we give this function access to the code generation context, allowing
1638 /// each implementation to decide the lowering path.
1639 fn shift(
1640 &mut self,
1641 context: &mut CodeGenContext<Emission>,
1642 kind: ShiftKind,
1643 size: OperandSize,
1644 ) -> Result<()>;
1645
1646 /// Perform division operation.
1647 /// Division is special in that some architectures have specific
1648 /// expectations regarding the location of the instruction
1649 /// arguments and regarding the location of the quotient /
1650 /// remainder. To free the caller from having to deal with the
1651 /// architecture specific constraints we give this function access
1652 /// to the code generation context, allowing each implementation
1653 /// to decide the lowering path. For cases in which division is a
1654 /// unconstrained binary operation, the caller can decide to use
1655 /// the `CodeGenContext::i32_binop` or `CodeGenContext::i64_binop`
1656 /// functions.
1657 fn div(
1658 &mut self,
1659 context: &mut CodeGenContext<Emission>,
1660 kind: DivKind,
1661 size: OperandSize,
1662 ) -> Result<()>;
1663
1664 /// Calculate remainder.
1665 fn rem(
1666 &mut self,
1667 context: &mut CodeGenContext<Emission>,
1668 kind: RemKind,
1669 size: OperandSize,
1670 ) -> Result<()>;
1671
1672 /// Compares `src1` against `src2` for the side effect of setting processor
1673 /// flags.
1674 ///
1675 /// Note that `src1` is the left-hand-side of the comparison and `src2` is
1676 /// the right-hand-side, so if testing `a < b` then `src1 == a` and
1677 /// `src2 == b`
1678 fn cmp(&mut self, src1: Reg, src2: RegImm, size: OperandSize) -> Result<()>;
1679
1680 /// Compare src and dst and put the result in dst.
1681 /// This function will potentially emit a series of instructions.
1682 ///
1683 /// The initial value in `dst` is the left-hand-side of the comparison and
1684 /// the initial value in `src` is the right-hand-side of the comparison.
1685 /// That means for `a < b` then `dst == a` and `src == b`.
1686 fn cmp_with_set(
1687 &mut self,
1688 dst: WritableReg,
1689 src: RegImm,
1690 kind: IntCmpKind,
1691 size: OperandSize,
1692 ) -> Result<()>;
1693
1694 /// Compare floats in src1 and src2 and put the result in dst.
1695 /// In x86, this will emit multiple instructions.
1696 fn float_cmp_with_set(
1697 &mut self,
1698 dst: WritableReg,
1699 src1: Reg,
1700 src2: Reg,
1701 kind: FloatCmpKind,
1702 size: OperandSize,
1703 ) -> Result<()>;
1704
1705 /// Count the number of leading zeroes in src and put the result in dst.
1706 /// In x64, this will emit multiple instructions if the `has_lzcnt` flag is
1707 /// false.
1708 fn clz(&mut self, dst: WritableReg, src: Reg, size: OperandSize) -> Result<()>;
1709
1710 /// Count the number of trailing zeroes in src and put the result in dst.masm
1711 /// In x64, this will emit multiple instructions if the `has_tzcnt` flag is
1712 /// false.
1713 fn ctz(&mut self, dst: WritableReg, src: Reg, size: OperandSize) -> Result<()>;
1714
1715 /// Push the register to the stack, returning the stack slot metadata.
1716 // NB
1717 // The stack alignment should not be assumed after any call to `push`,
1718 // unless explicitly aligned otherwise. Typically, stack alignment is
1719 // maintained at call sites and during the execution of
1720 // epilogues.
1721 fn push(&mut self, src: Reg, size: OperandSize) -> Result<StackSlot>;
1722
1723 /// Finalize the assembly and return the result.
1724 fn finalize(self, base: Option<SourceLoc>) -> Result<MachBufferFinalized<Final>>;
1725
1726 /// Zero a particular register.
1727 fn zero(&mut self, reg: WritableReg) -> Result<()>;
1728
1729 /// Count the number of 1 bits in src and put the result in dst. In x64,
1730 /// this will emit multiple instructions if the `has_popcnt` flag is false.
1731 fn popcnt(&mut self, context: &mut CodeGenContext<Emission>, size: OperandSize) -> Result<()>;
1732
1733 /// Converts an i64 to an i32 by discarding the high 32 bits.
1734 fn wrap(&mut self, dst: WritableReg, src: Reg) -> Result<()>;
1735
1736 /// Extends an integer of a given size to a larger size.
1737 fn extend(&mut self, dst: WritableReg, src: Reg, kind: ExtendKind) -> Result<()>;
1738
1739 /// Emits one or more instructions to perform a signed truncation of a
1740 /// float into an integer.
1741 fn signed_truncate(
1742 &mut self,
1743 dst: WritableReg,
1744 src: Reg,
1745 src_size: OperandSize,
1746 dst_size: OperandSize,
1747 kind: TruncKind,
1748 ) -> Result<()>;
1749
1750 /// Emits one or more instructions to perform an unsigned truncation of a
1751 /// float into an integer.
1752 fn unsigned_truncate(
1753 &mut self,
1754 context: &mut CodeGenContext<Emission>,
1755 src_size: OperandSize,
1756 dst_size: OperandSize,
1757 kind: TruncKind,
1758 ) -> Result<()>;
1759
1760 /// Emits one or more instructions to perform a signed convert of an
1761 /// integer into a float.
1762 fn signed_convert(
1763 &mut self,
1764 dst: WritableReg,
1765 src: Reg,
1766 src_size: OperandSize,
1767 dst_size: OperandSize,
1768 ) -> Result<()>;
1769
1770 /// Emits one or more instructions to perform an unsigned convert of an
1771 /// integer into a float.
1772 fn unsigned_convert(
1773 &mut self,
1774 dst: WritableReg,
1775 src: Reg,
1776 tmp_gpr: Reg,
1777 src_size: OperandSize,
1778 dst_size: OperandSize,
1779 ) -> Result<()>;
1780
1781 /// Reinterpret a float as an integer.
1782 fn reinterpret_float_as_int(
1783 &mut self,
1784 dst: WritableReg,
1785 src: Reg,
1786 size: OperandSize,
1787 ) -> Result<()>;
1788
1789 /// Reinterpret an integer as a float.
1790 fn reinterpret_int_as_float(
1791 &mut self,
1792 dst: WritableReg,
1793 src: Reg,
1794 size: OperandSize,
1795 ) -> Result<()>;
1796
1797 /// Demote an f64 to an f32.
1798 fn demote(&mut self, dst: WritableReg, src: Reg) -> Result<()>;
1799
1800 /// Promote an f32 to an f64.
1801 fn promote(&mut self, dst: WritableReg, src: Reg) -> Result<()>;
1802
1803 /// Zero a given memory range.
1804 ///
1805 /// The default implementation divides the given memory range
1806 /// into word-sized slots. Then it unrolls a series of store
1807 /// instructions, effectively assigning zero to each slot.
1808 fn zero_mem_range(&mut self, mem: &Range<u32>) -> Result<()> {
1809 let word_size = <Self::ABI as abi::ABI>::word_bytes() as u32;
1810 if mem.is_empty() {
1811 return Ok(());
1812 }
1813
1814 let start = if mem.start % word_size == 0 {
1815 mem.start
1816 } else {
1817 // Ensure that the start of the range is at least 4-byte aligned.
1818 assert!(mem.start % 4 == 0);
1819 let start = align_to(mem.start, word_size);
1820 let addr: Self::Address = self.local_address(&LocalSlot::i32(start))?;
1821 self.store(RegImm::i32(0), addr, OperandSize::S32)?;
1822 // Ensure that the new start of the range, is word-size aligned.
1823 assert!(start % word_size == 0);
1824 start
1825 };
1826
1827 let end = align_to(mem.end, word_size);
1828 let slots = (end - start) / word_size;
1829
1830 if slots == 1 {
1831 let slot = LocalSlot::i64(start + word_size);
1832 let addr: Self::Address = self.local_address(&slot)?;
1833 self.store(RegImm::i64(0), addr, OperandSize::S64)?;
1834 } else {
1835 // TODO
1836 // Add an upper bound to this generation;
1837 // given a considerably large amount of slots
1838 // this will be inefficient.
1839 let zero = scratch!(Self);
1840 self.zero(writable!(zero))?;
1841 let zero = RegImm::reg(zero);
1842
1843 for step in (start..end).into_iter().step_by(word_size as usize) {
1844 let slot = LocalSlot::i64(step + word_size);
1845 let addr: Self::Address = self.local_address(&slot)?;
1846 self.store(zero, addr, OperandSize::S64)?;
1847 }
1848 }
1849
1850 Ok(())
1851 }
1852
1853 /// Generate a label.
1854 fn get_label(&mut self) -> Result<MachLabel>;
1855
1856 /// Bind the given label at the current code offset.
1857 fn bind(&mut self, label: MachLabel) -> Result<()>;
1858
1859 /// Conditional branch.
1860 ///
1861 /// Performs a comparison between the two operands,
1862 /// and immediately after emits a jump to the given
1863 /// label destination if the condition is met.
1864 fn branch(
1865 &mut self,
1866 kind: IntCmpKind,
1867 lhs: Reg,
1868 rhs: RegImm,
1869 taken: MachLabel,
1870 size: OperandSize,
1871 ) -> Result<()>;
1872
1873 /// Emits and unconditional jump to the given label.
1874 fn jmp(&mut self, target: MachLabel) -> Result<()>;
1875
1876 /// Emits a jump table sequence. The default label is specified as
1877 /// the last element of the targets slice.
1878 fn jmp_table(&mut self, targets: &[MachLabel], index: Reg, tmp: Reg) -> Result<()>;
1879
1880 /// Emit an unreachable code trap.
1881 fn unreachable(&mut self) -> Result<()>;
1882
1883 /// Emit an unconditional trap.
1884 fn trap(&mut self, code: TrapCode) -> Result<()>;
1885
1886 /// Traps if the condition code is met.
1887 fn trapif(&mut self, cc: IntCmpKind, code: TrapCode) -> Result<()>;
1888
1889 /// Trap if the source register is zero.
1890 fn trapz(&mut self, src: Reg, code: TrapCode) -> Result<()>;
1891
1892 /// Ensures that the stack pointer is correctly positioned before an unconditional
1893 /// jump according to the requirements of the destination target.
1894 fn ensure_sp_for_jump(&mut self, target: SPOffset) -> Result<()> {
1895 let bytes = self
1896 .sp_offset()?
1897 .as_u32()
1898 .checked_sub(target.as_u32())
1899 .unwrap_or(0);
1900
1901 if bytes > 0 {
1902 self.free_stack(bytes)?;
1903 }
1904
1905 Ok(())
1906 }
1907
1908 /// Mark the start of a source location returning the machine code offset
1909 /// and the relative source code location.
1910 fn start_source_loc(&mut self, loc: RelSourceLoc) -> Result<(CodeOffset, RelSourceLoc)>;
1911
1912 /// Mark the end of a source location.
1913 fn end_source_loc(&mut self) -> Result<()>;
1914
1915 /// The current offset, in bytes from the beginning of the function.
1916 fn current_code_offset(&self) -> Result<CodeOffset>;
1917
1918 /// Performs a 128-bit addition
1919 fn add128(
1920 &mut self,
1921 dst_lo: WritableReg,
1922 dst_hi: WritableReg,
1923 lhs_lo: Reg,
1924 lhs_hi: Reg,
1925 rhs_lo: Reg,
1926 rhs_hi: Reg,
1927 ) -> Result<()>;
1928
1929 /// Performs a 128-bit subtraction
1930 fn sub128(
1931 &mut self,
1932 dst_lo: WritableReg,
1933 dst_hi: WritableReg,
1934 lhs_lo: Reg,
1935 lhs_hi: Reg,
1936 rhs_lo: Reg,
1937 rhs_hi: Reg,
1938 ) -> Result<()>;
1939
1940 /// Performs a widening multiplication from two 64-bit operands into a
1941 /// 128-bit result.
1942 ///
1943 /// Note that some platforms require special handling of registers in this
1944 /// instruction (e.g. x64) so full access to `CodeGenContext` is provided.
1945 fn mul_wide(&mut self, context: &mut CodeGenContext<Emission>, kind: MulWideKind)
1946 -> Result<()>;
1947
1948 /// Takes the value in a src operand and replicates it across lanes of
1949 /// `size` in a destination result.
1950 fn splat(&mut self, context: &mut CodeGenContext<Emission>, size: SplatKind) -> Result<()>;
1951
1952 /// Performs a shuffle between two 128-bit vectors into a 128-bit result
1953 /// using lanes as a mask to select which indexes to copy.
1954 fn shuffle(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, lanes: [u8; 16]) -> Result<()>;
1955
1956 /// Performs a swizzle between two 128-bit vectors into a 128-bit result.
1957 fn swizzle(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg) -> Result<()>;
1958
1959 /// Performs the RMW `op` operation on the passed `addr`.
1960 ///
1961 /// The value *before* the operation was performed is written back to the `operand` register.
1962 fn atomic_rmw(
1963 &mut self,
1964 context: &mut CodeGenContext<Emission>,
1965 addr: Self::Address,
1966 size: OperandSize,
1967 op: RmwOp,
1968 flags: MemFlags,
1969 extend: Option<Extend<Zero>>,
1970 ) -> Result<()>;
1971
1972 /// Extracts the scalar value from `src` in `lane` to `dst`.
1973 fn extract_lane(
1974 &mut self,
1975 src: Reg,
1976 dst: WritableReg,
1977 lane: u8,
1978 kind: ExtractLaneKind,
1979 ) -> Result<()>;
1980
1981 /// Replaces the value in `lane` in `dst` with the value in `src`.
1982 fn replace_lane(
1983 &mut self,
1984 src: RegImm,
1985 dst: WritableReg,
1986 lane: u8,
1987 kind: ReplaceLaneKind,
1988 ) -> Result<()>;
1989
1990 /// Perform an atomic CAS (compare-and-swap) operation with the value at `addr`, and `expected`
1991 /// and `replacement` (at the top of the context's stack).
1992 ///
1993 /// This method takes the `CodeGenContext` as an arguments to accommodate architectures that
1994 /// expect parameters in specific registers. The context stack contains the `replacement`,
1995 /// and `expected` values in that order. The implementer is expected to push the value at
1996 /// `addr` before the update to the context's stack before returning.
1997 fn atomic_cas(
1998 &mut self,
1999 context: &mut CodeGenContext<Emission>,
2000 addr: Self::Address,
2001 size: OperandSize,
2002 flags: MemFlags,
2003 extend: Option<Extend<Zero>>,
2004 ) -> Result<()>;
2005
2006 /// Compares vector registers `lhs` and `rhs` for equality and puts the
2007 /// vector of results in `dst`.
2008 fn v128_eq(
2009 &mut self,
2010 dst: WritableReg,
2011 lhs: Reg,
2012 rhs: Reg,
2013 kind: VectorEqualityKind,
2014 ) -> Result<()>;
2015
2016 /// Compares vector registers `lhs` and `rhs` for inequality and puts the
2017 /// vector of results in `dst`.
2018 fn v128_ne(
2019 &mut self,
2020 dst: WritableReg,
2021 lhs: Reg,
2022 rhs: Reg,
2023 kind: VectorEqualityKind,
2024 ) -> Result<()>;
2025
2026 /// Performs a less than comparison with vector registers `lhs` and `rhs`
2027 /// and puts the vector of results in `dst`.
2028 fn v128_lt(
2029 &mut self,
2030 dst: WritableReg,
2031 lhs: Reg,
2032 rhs: Reg,
2033 kind: VectorCompareKind,
2034 ) -> Result<()>;
2035
2036 /// Performs a less than or equal comparison with vector registers `lhs`
2037 /// and `rhs` and puts the vector of results in `dst`.
2038 fn v128_le(
2039 &mut self,
2040 dst: WritableReg,
2041 lhs: Reg,
2042 rhs: Reg,
2043 kind: VectorCompareKind,
2044 ) -> Result<()>;
2045
2046 /// Performs a greater than comparison with vector registers `lhs` and
2047 /// `rhs` and puts the vector of results in `dst`.
2048 fn v128_gt(
2049 &mut self,
2050 dst: WritableReg,
2051 lhs: Reg,
2052 rhs: Reg,
2053 kind: VectorCompareKind,
2054 ) -> Result<()>;
2055
2056 /// Performs a greater than or equal comparison with vector registers `lhs`
2057 /// and `rhs` and puts the vector of results in `dst`.
2058 fn v128_ge(
2059 &mut self,
2060 dst: WritableReg,
2061 lhs: Reg,
2062 rhs: Reg,
2063 kind: VectorCompareKind,
2064 ) -> Result<()>;
2065
2066 /// Emit a memory fence.
2067 fn fence(&mut self) -> Result<()>;
2068
2069 /// Perform a logical `not` operation on the 128bits vector value in `dst`.
2070 fn v128_not(&mut self, dst: WritableReg) -> Result<()>;
2071
2072 /// Perform a logical `and` operation on `src1` and `src1`, both 128bits vector values, writing
2073 /// the result to `dst`.
2074 fn v128_and(&mut self, src1: Reg, src2: Reg, dst: WritableReg) -> Result<()>;
2075
2076 /// Perform a logical `and_not` operation on `src1` and `src1`, both 128bits vector values, writing
2077 /// the result to `dst`.
2078 ///
2079 /// `and_not` is not commutative: dst = !src1 & src2.
2080 fn v128_and_not(&mut self, src1: Reg, src2: Reg, dst: WritableReg) -> Result<()>;
2081
2082 /// Perform a logical `or` operation on `src1` and `src1`, both 128bits vector values, writing
2083 /// the result to `dst`.
2084 fn v128_or(&mut self, src1: Reg, src2: Reg, dst: WritableReg) -> Result<()>;
2085
2086 /// Perform a logical `xor` operation on `src1` and `src1`, both 128bits vector values, writing
2087 /// the result to `dst`.
2088 fn v128_xor(&mut self, src1: Reg, src2: Reg, dst: WritableReg) -> Result<()>;
2089
2090 /// Given two 128bits vectors `src1` and `src2`, and a 128bits bitmask `mask`, selects bits
2091 /// from `src1` when mask is 1, and from `src2` when mask is 0.
2092 ///
2093 /// This is equivalent to: `v128.or(v128.and(src1, mask), v128.and(src2, v128.not(mask)))`.
2094 fn v128_bitselect(&mut self, src1: Reg, src2: Reg, mask: Reg, dst: WritableReg) -> Result<()>;
2095
2096 /// If any bit in `src` is 1, set `dst` to 1, or 0 otherwise.
2097 fn v128_any_true(&mut self, src: Reg, dst: WritableReg) -> Result<()>;
2098
2099 /// Convert vector of integers to vector of floating points.
2100 fn v128_convert(&mut self, src: Reg, dst: WritableReg, kind: V128ConvertKind) -> Result<()>;
2101
2102 /// Convert two input vectors into a smaller lane vector by narrowing each
2103 /// lane.
2104 fn v128_narrow(
2105 &mut self,
2106 src1: Reg,
2107 src2: Reg,
2108 dst: WritableReg,
2109 kind: V128NarrowKind,
2110 ) -> Result<()>;
2111
2112 /// Converts a vector containing two 64-bit floating point lanes to two
2113 /// 32-bit floating point lanes and setting the two higher lanes to 0.
2114 fn v128_demote(&mut self, src: Reg, dst: WritableReg) -> Result<()>;
2115
2116 /// Converts a vector containing four 32-bit floating point lanes to two
2117 /// 64-bit floating point lanes. Only the two lower lanes are converted.
2118 fn v128_promote(&mut self, src: Reg, dst: WritableReg) -> Result<()>;
2119
2120 /// Converts low or high half of the smaller lane vector to a larger lane
2121 /// vector.
2122 fn v128_extend(&mut self, src: Reg, dst: WritableReg, kind: V128ExtendKind) -> Result<()>;
2123
2124 /// Perform a vector add between `lsh` and `rhs`, placing the result in
2125 /// `dst`.
2126 fn v128_add(&mut self, lhs: Reg, rhs: Reg, dst: WritableReg, kind: V128AddKind) -> Result<()>;
2127
2128 /// Perform a vector sub between `lhs` and `rhs`, placing the result in `dst`.
2129 fn v128_sub(&mut self, lhs: Reg, rhs: Reg, dst: WritableReg, kind: V128SubKind) -> Result<()>;
2130
2131 /// Perform a vector lane-wise mul between `lhs` and `rhs`, placing the result in `dst`.
2132 fn v128_mul(&mut self, context: &mut CodeGenContext<Emission>, kind: V128MulKind)
2133 -> Result<()>;
2134
2135 /// Perform an absolute operation on a vector.
2136 fn v128_abs(&mut self, src: Reg, dst: WritableReg, kind: V128AbsKind) -> Result<()>;
2137
2138 /// Vectorized negate of the content of `op`.
2139 fn v128_neg(&mut self, op: WritableReg, kind: V128NegKind) -> Result<()>;
2140
2141 /// Perform the shift operation specified by `kind`, by the shift amount specified by the 32-bit
2142 /// integer at the top of the stack, on the 128-bit vector specified by the second value
2143 /// from the top of the stack, interpreted as packed integers of size `lane_width`.
2144 ///
2145 /// The shift amount is taken modulo `lane_width`.
2146 fn v128_shift(
2147 &mut self,
2148 context: &mut CodeGenContext<Emission>,
2149 lane_width: OperandSize,
2150 kind: ShiftKind,
2151 ) -> Result<()>;
2152
2153 /// Perform a saturating integer q-format rounding multiplication.
2154 fn v128_q15mulr_sat_s(
2155 &mut self,
2156 lhs: Reg,
2157 rhs: Reg,
2158 dst: WritableReg,
2159 size: OperandSize,
2160 ) -> Result<()>;
2161
2162 /// Sets `dst` to 1 if all lanes in `src` are non-zero, sets `dst` to 0
2163 /// otherwise.
2164 fn v128_all_true(&mut self, src: Reg, dst: WritableReg, size: OperandSize) -> Result<()>;
2165
2166 /// Extracts the high bit of each lane in `src` and produces a scalar mask
2167 /// with all bits concatenated in `dst`.
2168 fn v128_bitmask(&mut self, src: Reg, dst: WritableReg, size: OperandSize) -> Result<()>;
2169
2170 /// Lanewise truncation operation.
2171 ///
2172 /// If using an integer kind of truncation, then this performs a lane-wise
2173 /// saturating conversion from float to integer using the IEEE
2174 /// `convertToIntegerTowardZero` function. If any input lane is NaN, the
2175 /// resulting lane is 0. If the rounded integer value of a lane is outside
2176 /// the range of the destination type, the result is saturated to the
2177 /// nearest representable integer value.
2178 fn v128_trunc(
2179 &mut self,
2180 context: &mut CodeGenContext<Emission>,
2181 kind: V128TruncKind,
2182 ) -> Result<()>;
2183
2184 /// Perform a lane-wise `min` operation between `src1` and `src2`.
2185 fn v128_min(&mut self, src1: Reg, src2: Reg, dst: WritableReg, kind: V128MinKind)
2186 -> Result<()>;
2187
2188 /// Perform a lane-wise `max` operation between `src1` and `src2`.
2189 fn v128_max(&mut self, src1: Reg, src2: Reg, dst: WritableReg, kind: V128MaxKind)
2190 -> Result<()>;
2191
2192 /// Perform the lane-wise integer extended multiplication producing twice wider result than the
2193 /// inputs. This is equivalent to an extend followed by a multiply.
2194 ///
2195 /// The extension to be performed is inferred from the `lane_width` and the `kind` of extmul,
2196 /// e.g, if `lane_width` is `S16`, and `kind` is `LowSigned`, then we sign-extend the lower
2197 /// 8bits of the 16bits lanes.
2198 fn v128_extmul(
2199 &mut self,
2200 context: &mut CodeGenContext<Emission>,
2201 kind: V128ExtMulKind,
2202 ) -> Result<()>;
2203
2204 /// Perform the lane-wise integer extended pairwise addition producing extended results (twice
2205 /// wider results than the inputs).
2206 fn v128_extadd_pairwise(
2207 &mut self,
2208 src: Reg,
2209 dst: WritableReg,
2210 kind: V128ExtAddKind,
2211 ) -> Result<()>;
2212
2213 /// Lane-wise multiply signed 16-bit integers in `lhs` and `rhs` and add
2214 /// adjacent pairs of the 32-bit results.
2215 fn v128_dot(&mut self, lhs: Reg, rhs: Reg, dst: WritableReg) -> Result<()>;
2216
2217 /// Count the number of bits set in each lane.
2218 fn v128_popcnt(&mut self, context: &mut CodeGenContext<Emission>) -> Result<()>;
2219
2220 /// Lane-wise rounding average of vectors of integers in `lhs` and `rhs`
2221 /// and put the results in `dst`.
2222 fn v128_avgr(&mut self, lhs: Reg, rhs: Reg, dst: WritableReg, size: OperandSize) -> Result<()>;
2223
2224 /// Lane-wise IEEE division on vectors of floats.
2225 fn v128_div(&mut self, lhs: Reg, rhs: Reg, dst: WritableReg, size: OperandSize) -> Result<()>;
2226
2227 /// Lane-wise IEEE square root of vector of floats.
2228 fn v128_sqrt(&mut self, src: Reg, dst: WritableReg, size: OperandSize) -> Result<()>;
2229
2230 /// Lane-wise ceiling of vector of floats.
2231 fn v128_ceil(&mut self, src: Reg, dst: WritableReg, size: OperandSize) -> Result<()>;
2232
2233 /// Lane-wise flooring of vector of floats.
2234 fn v128_floor(&mut self, src: Reg, dst: WritableReg, size: OperandSize) -> Result<()>;
2235
2236 /// Lane-wise rounding to nearest integer for vector of floats.
2237 fn v128_nearest(&mut self, src: Reg, dst: WritableReg, size: OperandSize) -> Result<()>;
2238
2239 /// Lane-wise minimum value defined as `rhs < lhs ? rhs : lhs`.
2240 fn v128_pmin(&mut self, lhs: Reg, rhs: Reg, dst: WritableReg, size: OperandSize) -> Result<()>;
2241
2242 /// Lane-wise maximum value defined as `lhs < rhs ? rhs : lhs`.
2243 fn v128_pmax(&mut self, lhs: Reg, rhs: Reg, dst: WritableReg, size: OperandSize) -> Result<()>;
2244}