pulley_interpreter/
lib.rs

1//! The pulley bytecode for fast interpreters.
2
3#![cfg_attr(docsrs, feature(doc_cfg))]
4#![cfg_attr(pulley_tail_calls, feature(explicit_tail_calls))]
5#![cfg_attr(pulley_tail_calls, allow(incomplete_features, unstable_features))]
6#![deny(missing_docs)]
7#![no_std]
8
9#[cfg(feature = "std")]
10#[macro_use]
11extern crate std;
12
13#[cfg(feature = "decode")]
14extern crate alloc;
15
16/// Calls the given macro with each opcode.
17///
18/// # Instruction Guidelines
19///
20/// We're inventing an instruction set here which naturally brings a whole set
21/// of design questions. Note that this is explicitly intended to be only ever
22/// used for Pulley where there are a different set of design constraints than
23/// other instruction sets (e.g. general-purpose CPU ISAs). Some examples of
24/// constraints for Pulley are:
25///
26/// * Instructions must be portable to many architectures.
27/// * The Pulley ISA is mostly target-independent as the compilation target is
28///   currently only parameterized on pointer width and endianness.
29/// * Pulley instructions should be balance of time-to-decode and code size. For
30///   example super fancy bit-packing tricks might be tough to decode in
31///   software but might be worthwhile if it's quite common and greatly reduces
32///   the size of bytecode. There's not a hard-and-fast answer here, but a
33///   balance to be made.
34/// * Many "macro ops" are present to reduce the size of compiled bytecode so
35///   there is a wide set of duplicate functionality between opcodes (and this
36///   is expected).
37///
38/// Given all this it's also useful to have a set of guidelines used to name and
39/// develop Pulley instructions. As of the time of this writing it's still
40/// pretty early days for Pulley so some of these guidelines may change over
41/// time. Additionally instructions don't necessarily all follow these
42/// conventions and that may also change over time. With that in mind, here's a
43/// rough set of guidelines:
44///
45/// * Most instructions are prefixed with `x`, `f`, or `v`, indicating which
46///   type of register they're operating on. (e.g. `xadd32` operates on the `x`
47///   integer registers and `fadd32` operates on the `f` float registers).
48///
49/// * Most instructions are suffixed or otherwise contain the bit width they're
50///   operating on. For example `xadd32` is a 32-bit addition.
51///
52/// * If an instruction operates on signed or unsigned data (such as division
53///   and remainder), then the instruction is suffixed with `_s` or `_u`.
54///
55/// * Instructions operate on either 32 or 64-bit parts of a register.
56///   Instructions modifying only 32-bits of a register always modify the "low"
57///   part of a register and leave the upper part unmodified. This is intended
58///   to help 32-bit platforms where if most operations are 32-bit there's no
59///   need for extra instructions to sign or zero extend and modify the upper
60///   half of the register.
61///
62/// * Binops use `BinaryOperands<T>` for the destination and argument registers.
63///
64/// * Instructions operating on memory contain a few pieces of information:
65///
66///   ```text
67///   xload16le_u32_o32
68///   │└─┬┘└┤└┤ └┬┘ └┬┘
69///   │  │  │ │  │   ▼
70///   │  │  │ │  │   addressing mode
71///   │  │  │ │  ▼
72///   │  │  │ │  width of register modified + sign-extension (optional)
73///   │  │  │ ▼
74///   │  │  │ endianness of the operation (le/be)
75///   │  │  ▼
76///   │  │  bit-width of the operation
77///   │  ▼
78///   │  what's happening (load/store)
79///   ▼
80///   register being operated on (x/f/z)
81///   ```
82///
83/// More guidelines might get added here over time, and if you have any
84/// questions feel free to raise them and we can try to add them here as well!
85#[macro_export]
86macro_rules! for_each_op {
87    ( $macro:ident ) => {
88        $macro! {
89            /// No-operation.
90            nop = Nop;
91
92            /// Transfer control the address in the `lr` register.
93            ret = Ret;
94
95            /// Transfer control to the PC at the given offset and set the `lr`
96            /// register to the PC just after this instruction.
97            ///
98            /// This instruction generally assumes that the Pulley ABI is being
99            /// respected where arguments are in argument registers (starting at
100            /// x0 for integer arguments) and results are in result registers.
101            /// This instruction itself assume that all arguments are already in
102            /// their registers. Subsequent instructions below enable moving
103            /// arguments into the correct registers as part of the same call
104            /// instruction.
105            call = Call { offset: PcRelOffset };
106            /// Like `call`, but also `x0 = arg1`
107            call1 = Call1 { arg1: XReg, offset: PcRelOffset };
108            /// Like `call`, but also `x0, x1 = arg1, arg2`
109            call2 = Call2 { arg1: XReg, arg2: XReg, offset: PcRelOffset };
110            /// Like `call`, but also `x0, x1, x2 = arg1, arg2, arg3`
111            call3 = Call3 { arg1: XReg, arg2: XReg, arg3: XReg, offset: PcRelOffset };
112            /// Like `call`, but also `x0, x1, x2, x3 = arg1, arg2, arg3, arg4`
113            call4 = Call4 { arg1: XReg, arg2: XReg, arg3: XReg, arg4: XReg, offset: PcRelOffset };
114
115            /// Transfer control to the PC in `reg` and set `lr` to the PC just
116            /// after this instruction.
117            call_indirect = CallIndirect { reg: XReg };
118
119            /// Unconditionally transfer control to the PC at the given offset.
120            jump = Jump { offset: PcRelOffset };
121
122            /// Unconditionally transfer control to the PC at specified
123            /// register.
124            xjump = XJump { reg: XReg };
125
126            /// Conditionally transfer control to the given PC offset if
127            /// `low32(cond)` contains a non-zero value.
128            br_if32 = BrIf { cond: XReg, offset: PcRelOffset };
129
130            /// Conditionally transfer control to the given PC offset if
131            /// `low32(cond)` contains a zero value.
132            br_if_not32 = BrIfNot { cond: XReg, offset: PcRelOffset };
133
134            /// Branch if `a == b`.
135            br_if_xeq32 = BrIfXeq32 { a: XReg, b: XReg, offset: PcRelOffset };
136            /// Branch if `a != `b.
137            br_if_xneq32 = BrIfXneq32 { a: XReg, b: XReg, offset: PcRelOffset };
138            /// Branch if signed `a < b`.
139            br_if_xslt32 = BrIfXslt32 { a: XReg, b: XReg, offset: PcRelOffset };
140            /// Branch if signed `a <= b`.
141            br_if_xslteq32 = BrIfXslteq32 { a: XReg, b: XReg, offset: PcRelOffset };
142            /// Branch if unsigned `a < b`.
143            br_if_xult32 = BrIfXult32 { a: XReg, b: XReg, offset: PcRelOffset };
144            /// Branch if unsigned `a <= b`.
145            br_if_xulteq32 = BrIfXulteq32 { a: XReg, b: XReg, offset: PcRelOffset };
146            /// Branch if `a == b`.
147            br_if_xeq64 = BrIfXeq64 { a: XReg, b: XReg, offset: PcRelOffset };
148            /// Branch if `a != `b.
149            br_if_xneq64 = BrIfXneq64 { a: XReg, b: XReg, offset: PcRelOffset };
150            /// Branch if signed `a < b`.
151            br_if_xslt64 = BrIfXslt64 { a: XReg, b: XReg, offset: PcRelOffset };
152            /// Branch if signed `a <= b`.
153            br_if_xslteq64 = BrIfXslteq64 { a: XReg, b: XReg, offset: PcRelOffset };
154            /// Branch if unsigned `a < b`.
155            br_if_xult64 = BrIfXult64 { a: XReg, b: XReg, offset: PcRelOffset };
156            /// Branch if unsigned `a <= b`.
157            br_if_xulteq64 = BrIfXulteq64 { a: XReg, b: XReg, offset: PcRelOffset };
158
159            /// Branch if `a == b`.
160            br_if_xeq32_i8 = BrIfXeq32I8 { a: XReg, b: i8, offset: PcRelOffset };
161            /// Branch if `a == b`.
162            br_if_xeq32_i32 = BrIfXeq32I32 { a: XReg, b: i32, offset: PcRelOffset };
163            /// Branch if `a != `b.
164            br_if_xneq32_i8 = BrIfXneq32I8 { a: XReg, b: i8, offset: PcRelOffset };
165            /// Branch if `a != `b.
166            br_if_xneq32_i32 = BrIfXneq32I32 { a: XReg, b: i32, offset: PcRelOffset };
167            /// Branch if signed `a < b`.
168            br_if_xslt32_i8 = BrIfXslt32I8 { a: XReg, b: i8, offset: PcRelOffset };
169            /// Branch if signed `a < b`.
170            br_if_xslt32_i32 = BrIfXslt32I32 { a: XReg, b: i32, offset: PcRelOffset };
171            /// Branch if signed `a > b`.
172            br_if_xsgt32_i8 = BrIfXsgt32I8 { a: XReg, b: i8, offset: PcRelOffset };
173            /// Branch if signed `a > b`.
174            br_if_xsgt32_i32 = BrIfXsgt32I32 { a: XReg, b: i32, offset: PcRelOffset };
175            /// Branch if signed `a <= b`.
176            br_if_xslteq32_i8 = BrIfXslteq32I8 { a: XReg, b: i8, offset: PcRelOffset };
177            /// Branch if signed `a <= b`.
178            br_if_xslteq32_i32 = BrIfXslteq32I32 { a: XReg, b: i32, offset: PcRelOffset };
179            /// Branch if signed `a >= b`.
180            br_if_xsgteq32_i8 = BrIfXsgteq32I8 { a: XReg, b: i8, offset: PcRelOffset };
181            /// Branch if signed `a >= b`.
182            br_if_xsgteq32_i32 = BrIfXsgteq32I32 { a: XReg, b: i32, offset: PcRelOffset };
183            /// Branch if unsigned `a < b`.
184            br_if_xult32_u8 = BrIfXult32U8 { a: XReg, b: u8, offset: PcRelOffset };
185            /// Branch if unsigned `a < b`.
186            br_if_xult32_u32 = BrIfXult32U32 { a: XReg, b: u32, offset: PcRelOffset };
187            /// Branch if unsigned `a <= b`.
188            br_if_xulteq32_u8 = BrIfXulteq32U8 { a: XReg, b: u8, offset: PcRelOffset };
189            /// Branch if unsigned `a <= b`.
190            br_if_xulteq32_u32 = BrIfXulteq32U32 { a: XReg, b: u32, offset: PcRelOffset };
191            /// Branch if unsigned `a > b`.
192            br_if_xugt32_u8 = BrIfXugt32U8 { a: XReg, b: u8, offset: PcRelOffset };
193            /// Branch if unsigned `a > b`.
194            br_if_xugt32_u32 = BrIfXugt32U32 { a: XReg, b: u32, offset: PcRelOffset };
195            /// Branch if unsigned `a >= b`.
196            br_if_xugteq32_u8 = BrIfXugteq32U8 { a: XReg, b: u8, offset: PcRelOffset };
197            /// Branch if unsigned `a >= b`.
198            br_if_xugteq32_u32 = BrIfXugteq32U32 { a: XReg, b: u32, offset: PcRelOffset };
199
200            /// Branch if `a == b`.
201            br_if_xeq64_i8 = BrIfXeq64I8 { a: XReg, b: i8, offset: PcRelOffset };
202            /// Branch if `a == b`.
203            br_if_xeq64_i32 = BrIfXeq64I32 { a: XReg, b: i32, offset: PcRelOffset };
204            /// Branch if `a != `b.
205            br_if_xneq64_i8 = BrIfXneq64I8 { a: XReg, b: i8, offset: PcRelOffset };
206            /// Branch if `a != `b.
207            br_if_xneq64_i32 = BrIfXneq64I32 { a: XReg, b: i32, offset: PcRelOffset };
208            /// Branch if signed `a < b`.
209            br_if_xslt64_i8 = BrIfXslt64I8 { a: XReg, b: i8, offset: PcRelOffset };
210            /// Branch if signed `a < b`.
211            br_if_xslt64_i32 = BrIfXslt64I32 { a: XReg, b: i32, offset: PcRelOffset };
212            /// Branch if signed `a > b`.
213            br_if_xsgt64_i8 = BrIfXsgt64I8 { a: XReg, b: i8, offset: PcRelOffset };
214            /// Branch if signed `a > b`.
215            br_if_xsgt64_i32 = BrIfXsgt64I32 { a: XReg, b: i32, offset: PcRelOffset };
216            /// Branch if signed `a <= b`.
217            br_if_xslteq64_i8 = BrIfXslteq64I8 { a: XReg, b: i8, offset: PcRelOffset };
218            /// Branch if signed `a <= b`.
219            br_if_xslteq64_i32 = BrIfXslteq64I32 { a: XReg, b: i32, offset: PcRelOffset };
220            /// Branch if signed `a >= b`.
221            br_if_xsgteq64_i8 = BrIfXsgteq64I8 { a: XReg, b: i8, offset: PcRelOffset };
222            /// Branch if signed `a >= b`.
223            br_if_xsgteq64_i32 = BrIfXsgteq64I32 { a: XReg, b: i32, offset: PcRelOffset };
224            /// Branch if unsigned `a < b`.
225            br_if_xult64_u8 = BrIfXult64U8 { a: XReg, b: u8, offset: PcRelOffset };
226            /// Branch if unsigned `a < b`.
227            br_if_xult64_u32 = BrIfXult64U32 { a: XReg, b: u32, offset: PcRelOffset };
228            /// Branch if unsigned `a <= b`.
229            br_if_xulteq64_u8 = BrIfXulteq64U8 { a: XReg, b: u8, offset: PcRelOffset };
230            /// Branch if unsigned `a <= b`.
231            br_if_xulteq64_u32 = BrIfXulteq64U32 { a: XReg, b: u32, offset: PcRelOffset };
232            /// Branch if unsigned `a > b`.
233            br_if_xugt64_u8 = BrIfXugt64U8 { a: XReg, b: u8, offset: PcRelOffset };
234            /// Branch if unsigned `a > b`.
235            br_if_xugt64_u32 = BrIfXugt64U32 { a: XReg, b: u32, offset: PcRelOffset };
236            /// Branch if unsigned `a >= b`.
237            br_if_xugteq64_u8 = BrIfXugteq64U8 { a: XReg, b: u8, offset: PcRelOffset };
238            /// Branch if unsigned `a >= b`.
239            br_if_xugteq64_u32 = BrIfXugteq64U32 { a: XReg, b: u32, offset: PcRelOffset };
240
241            /// Branch to the label indicated by `low32(idx)`.
242            ///
243            /// After this instruction are `amt` instances of `PcRelOffset`
244            /// and the `idx` selects which one will be branched to. The value
245            /// of `idx` is clamped to `amt - 1` (e.g. the last offset is the
246            /// "default" one.
247            br_table32 = BrTable32 { idx: XReg, amt: u32 };
248
249            /// Move between `x` registers.
250            xmov = Xmov { dst: XReg, src: XReg };
251
252            /// Set `dst = 0`
253            xzero = Xzero { dst: XReg };
254            /// Set `dst = 1`
255            xone = Xone { dst: XReg };
256            /// Set `dst = sign_extend(imm8)`.
257            xconst8 = Xconst8 { dst: XReg, imm: i8 };
258            /// Set `dst = sign_extend(imm16)`.
259            xconst16 = Xconst16 { dst: XReg, imm: i16 };
260            /// Set `dst = sign_extend(imm32)`.
261            xconst32 = Xconst32 { dst: XReg, imm: i32 };
262            /// Set `dst = imm64`.
263            xconst64 = Xconst64 { dst: XReg, imm: i64 };
264
265            /// 32-bit wrapping addition: `low32(dst) = low32(src1) + low32(src2)`.
266            ///
267            /// The upper 32-bits of `dst` are unmodified.
268            xadd32 = Xadd32 { operands: BinaryOperands<XReg> };
269            /// Same as `xadd32` but `src2` is a zero-extended 8-bit immediate.
270            xadd32_u8 = Xadd32U8 { dst: XReg, src1: XReg, src2: u8 };
271            /// Same as `xadd32` but `src2` is a 32-bit immediate.
272            xadd32_u32 = Xadd32U32 { dst: XReg, src1: XReg, src2: u32 };
273
274            /// 64-bit wrapping addition: `dst = src1 + src2`.
275            xadd64 = Xadd64 { operands: BinaryOperands<XReg> };
276            /// Same as `xadd64` but `src2` is a zero-extended 8-bit immediate.
277            xadd64_u8 = Xadd64U8 { dst: XReg, src1: XReg, src2: u8 };
278            /// Same as `xadd64` but `src2` is a zero-extended 32-bit immediate.
279            xadd64_u32 = Xadd64U32 { dst: XReg, src1: XReg, src2: u32 };
280
281            /// `low32(dst) = low32(src1) * low32(src2) + low32(src3)`
282            xmadd32 = Xmadd32 { dst: XReg, src1: XReg, src2: XReg, src3: XReg };
283            /// `dst = src1 * src2 + src3`
284            xmadd64 = Xmadd64 { dst: XReg, src1: XReg, src2: XReg, src3: XReg };
285
286            /// 32-bit wrapping subtraction: `low32(dst) = low32(src1) - low32(src2)`.
287            ///
288            /// The upper 32-bits of `dst` are unmodified.
289            xsub32 = Xsub32 { operands: BinaryOperands<XReg> };
290            /// Same as `xsub32` but `src2` is a zero-extended 8-bit immediate.
291            xsub32_u8 = Xsub32U8 { dst: XReg, src1: XReg, src2: u8 };
292            /// Same as `xsub32` but `src2` is a 32-bit immediate.
293            xsub32_u32 = Xsub32U32 { dst: XReg, src1: XReg, src2: u32 };
294
295            /// 64-bit wrapping subtraction: `dst = src1 - src2`.
296            xsub64 = Xsub64 { operands: BinaryOperands<XReg> };
297            /// Same as `xsub64` but `src2` is a zero-extended 8-bit immediate.
298            xsub64_u8 = Xsub64U8 { dst: XReg, src1: XReg, src2: u8 };
299            /// Same as `xsub64` but `src2` is a zero-extended 32-bit immediate.
300            xsub64_u32 = Xsub64U32 { dst: XReg, src1: XReg, src2: u32 };
301
302            /// `low32(dst) = low32(src1) * low32(src2)`
303            xmul32 = XMul32 { operands: BinaryOperands<XReg> };
304            /// Same as `xmul64` but `src2` is a sign-extended 8-bit immediate.
305            xmul32_s8 = Xmul32S8 { dst: XReg, src1: XReg, src2: i8 };
306            /// Same as `xmul32` but `src2` is a sign-extended 32-bit immediate.
307            xmul32_s32 = Xmul32S32 { dst: XReg, src1: XReg, src2: i32 };
308
309            /// `dst = src1 * src2`
310            xmul64 = XMul64 { operands: BinaryOperands<XReg> };
311            /// Same as `xmul64` but `src2` is a sign-extended 8-bit immediate.
312            xmul64_s8 = Xmul64S8 { dst: XReg, src1: XReg, src2: i8 };
313            /// Same as `xmul64` but `src2` is a sign-extended 64-bit immediate.
314            xmul64_s32 = Xmul64S32 { dst: XReg, src1: XReg, src2: i32 };
315
316            /// `low32(dst) = trailing_zeros(low32(src))`
317            xctz32 = Xctz32 { dst: XReg, src: XReg };
318            /// `dst = trailing_zeros(src)`
319            xctz64 = Xctz64 { dst: XReg, src: XReg };
320
321            /// `low32(dst) = leading_zeros(low32(src))`
322            xclz32 = Xclz32 { dst: XReg, src: XReg };
323            /// `dst = leading_zeros(src)`
324            xclz64 = Xclz64 { dst: XReg, src: XReg };
325
326            /// `low32(dst) = count_ones(low32(src))`
327            xpopcnt32 = Xpopcnt32 { dst: XReg, src: XReg };
328            /// `dst = count_ones(src)`
329            xpopcnt64 = Xpopcnt64 { dst: XReg, src: XReg };
330
331            /// `low32(dst) = rotate_left(low32(src1), low32(src2))`
332            xrotl32 = Xrotl32 { operands: BinaryOperands<XReg> };
333            /// `dst = rotate_left(src1, src2)`
334            xrotl64 = Xrotl64 { operands: BinaryOperands<XReg> };
335
336            /// `low32(dst) = rotate_right(low32(src1), low32(src2))`
337            xrotr32 = Xrotr32 { operands: BinaryOperands<XReg> };
338            /// `dst = rotate_right(src1, src2)`
339            xrotr64 = Xrotr64 { operands: BinaryOperands<XReg> };
340
341            /// `low32(dst) = low32(src1) << low5(src2)`
342            xshl32 = Xshl32 { operands: BinaryOperands<XReg> };
343            /// `low32(dst) = low32(src1) >> low5(src2)`
344            xshr32_s = Xshr32S { operands: BinaryOperands<XReg> };
345            /// `low32(dst) = low32(src1) >> low5(src2)`
346            xshr32_u = Xshr32U { operands: BinaryOperands<XReg> };
347            /// `dst = src1 << low5(src2)`
348            xshl64 = Xshl64 { operands: BinaryOperands<XReg> };
349            /// `dst = src1 >> low6(src2)`
350            xshr64_s = Xshr64S { operands: BinaryOperands<XReg> };
351            /// `dst = src1 >> low6(src2)`
352            xshr64_u = Xshr64U { operands: BinaryOperands<XReg> };
353
354            /// `low32(dst) = low32(src1) << low5(src2)`
355            xshl32_u6 = Xshl32U6 { operands: BinaryOperands<XReg, XReg, U6> };
356            /// `low32(dst) = low32(src1) >> low5(src2)`
357            xshr32_s_u6 = Xshr32SU6 { operands: BinaryOperands<XReg, XReg, U6> };
358            /// `low32(dst) = low32(src1) >> low5(src2)`
359            xshr32_u_u6 = Xshr32UU6 { operands: BinaryOperands<XReg, XReg, U6> };
360            /// `dst = src1 << low5(src2)`
361            xshl64_u6 = Xshl64U6 { operands: BinaryOperands<XReg, XReg, U6> };
362            /// `dst = src1 >> low6(src2)`
363            xshr64_s_u6 = Xshr64SU6 { operands: BinaryOperands<XReg, XReg, U6> };
364            /// `dst = src1 >> low6(src2)`
365            xshr64_u_u6 = Xshr64UU6 { operands: BinaryOperands<XReg, XReg, U6> };
366
367            /// `low32(dst) = -low32(src)`
368            xneg32 = Xneg32 { dst: XReg, src: XReg };
369            /// `dst = -src`
370            xneg64 = Xneg64 { dst: XReg, src: XReg };
371
372            /// `low32(dst) = src1 == src2`
373            xeq64 = Xeq64 { operands: BinaryOperands<XReg> };
374            /// `low32(dst) = src1 != src2`
375            xneq64 = Xneq64 { operands: BinaryOperands<XReg> };
376            /// `low32(dst) = src1 < src2` (signed)
377            xslt64 = Xslt64 { operands: BinaryOperands<XReg> };
378            /// `low32(dst) = src1 <= src2` (signed)
379            xslteq64 = Xslteq64 { operands: BinaryOperands<XReg> };
380            /// `low32(dst) = src1 < src2` (unsigned)
381            xult64 = Xult64 { operands: BinaryOperands<XReg> };
382            /// `low32(dst) = src1 <= src2` (unsigned)
383            xulteq64 = Xulteq64 { operands: BinaryOperands<XReg> };
384            /// `low32(dst) = low32(src1) == low32(src2)`
385            xeq32 = Xeq32 { operands: BinaryOperands<XReg> };
386            /// `low32(dst) = low32(src1) != low32(src2)`
387            xneq32 = Xneq32 { operands: BinaryOperands<XReg> };
388            /// `low32(dst) = low32(src1) < low32(src2)` (signed)
389            xslt32 = Xslt32 { operands: BinaryOperands<XReg> };
390            /// `low32(dst) = low32(src1) <= low32(src2)` (signed)
391            xslteq32 = Xslteq32 { operands: BinaryOperands<XReg> };
392            /// `low32(dst) = low32(src1) < low32(src2)` (unsigned)
393            xult32 = Xult32 { operands: BinaryOperands<XReg> };
394            /// `low32(dst) = low32(src1) <= low32(src2)` (unsigned)
395            xulteq32 = Xulteq32 { operands: BinaryOperands<XReg> };
396
397            // Loads/stores with various addressing modes. Note that each style
398            // of addressing mode is split to its own suite of instructions to
399            // simplify the implementation of each opcode and avoid internal
400            // branching when using one addressing mode vs another.
401            //
402            // Note that big-endian, float, and vector loads are deferred to
403            // the "extended" opcode set below.
404
405            /// `low32(dst) = zext_8_32(*addr)`
406            xload8_u32_o32 = XLoad8U32O32 { dst: XReg, addr: AddrO32 };
407            /// `low32(dst) = sext_8_32(*addr)`
408            xload8_s32_o32 = XLoad8S32O32 { dst: XReg, addr: AddrO32 };
409            /// `low32(dst) = o32ext_16_32(*addr)`
410            xload16le_u32_o32 = XLoad16LeU32O32 { dst: XReg, addr: AddrO32 };
411            /// `low32(dst) = sext_16_32(*addr)`
412            xload16le_s32_o32 = XLoad16LeS32O32 { dst: XReg, addr: AddrO32 };
413            /// `low32(dst) = *addr`
414            xload32le_o32 = XLoad32LeO32 { dst: XReg, addr: AddrO32 };
415            /// `dst = *addr`
416            xload64le_o32 = XLoad64LeO32 { dst: XReg, addr: AddrO32 };
417            /// `*addr = low8(src)`
418            xstore8_o32 = XStore8O32 { addr: AddrO32, src: XReg };
419            /// `*addr = low16(src)`
420            xstore16le_o32 = XStore16LeO32 { addr: AddrO32, src: XReg };
421            /// `*addr = low32(src)`
422            xstore32le_o32 = XStore32LeO32 { addr: AddrO32, src: XReg };
423            /// `*addr = src`
424            xstore64le_o32 = XStore64LeO32 { addr: AddrO32, src: XReg };
425
426            /// `low32(dst) = zext_8_32(*addr)`
427            xload8_u32_z = XLoad8U32Z { dst: XReg, addr: AddrZ };
428            /// `low32(dst) = sext_8_32(*addr)`
429            xload8_s32_z = XLoad8S32Z { dst: XReg, addr: AddrZ };
430            /// `low32(dst) = zext_16_32(*addr)`
431            xload16le_u32_z = XLoad16LeU32Z { dst: XReg, addr: AddrZ };
432            /// `low32(dst) = sext_16_32(*addr)`
433            xload16le_s32_z = XLoad16LeS32Z { dst: XReg, addr: AddrZ };
434            /// `low32(dst) = *addr`
435            xload32le_z = XLoad32LeZ { dst: XReg, addr: AddrZ };
436            /// `dst = *addr`
437            xload64le_z = XLoad64LeZ { dst: XReg, addr: AddrZ };
438            /// `*addr = low8(src)`
439            xstore8_z = XStore8Z { addr: AddrZ, src: XReg };
440            /// `*addr = low16(src)`
441            xstore16le_z = XStore16LeZ { addr: AddrZ, src: XReg };
442            /// `*addr = low32(src)`
443            xstore32le_z = XStore32LeZ { addr: AddrZ, src: XReg };
444            /// `*addr = src`
445            xstore64le_z = XStore64LeZ { addr: AddrZ, src: XReg };
446
447            /// `low32(dst) = zext_8_32(*addr)`
448            xload8_u32_g32 = XLoad8U32G32 { dst: XReg, addr: AddrG32 };
449            /// `low32(dst) = sext_8_32(*addr)`
450            xload8_s32_g32 = XLoad8S32G32 { dst: XReg, addr: AddrG32 };
451            /// `low32(dst) = zext_16_32(*addr)`
452            xload16le_u32_g32 = XLoad16LeU32G32 { dst: XReg, addr: AddrG32 };
453            /// `low32(dst) = sext_16_32(*addr)`
454            xload16le_s32_g32 = XLoad16LeS32G32 { dst: XReg, addr: AddrG32 };
455            /// `low32(dst) = *addr`
456            xload32le_g32 = XLoad32LeG32 { dst: XReg, addr: AddrG32 };
457            /// `dst = *addr`
458            xload64le_g32 = XLoad64LeG32 { dst: XReg, addr: AddrG32 };
459            /// `*addr = low8(src)`
460            xstore8_g32 = XStore8G32 { addr: AddrG32, src: XReg };
461            /// `*addr = low16(src)`
462            xstore16le_g32 = XStore16LeG32 { addr: AddrG32, src: XReg };
463            /// `*addr = low32(src)`
464            xstore32le_g32 = XStore32LeG32 { addr: AddrG32, src: XReg };
465            /// `*addr = src`
466            xstore64le_g32 = XStore64LeG32 { addr: AddrG32, src: XReg };
467
468            /// `low32(dst) = zext_8_32(*addr)`
469            xload8_u32_g32bne = XLoad8U32G32Bne { dst: XReg, addr: AddrG32Bne };
470            /// `low32(dst) = sext_8_32(*addr)`
471            xload8_s32_g32bne = XLoad8S32G32Bne { dst: XReg, addr: AddrG32Bne };
472            /// `low32(dst) = zext_16_32(*addr)`
473            xload16le_u32_g32bne = XLoad16LeU32G32Bne { dst: XReg, addr: AddrG32Bne };
474            /// `low32(dst) = sext_16_32(*addr)`
475            xload16le_s32_g32bne = XLoad16LeS32G32Bne { dst: XReg, addr: AddrG32Bne };
476            /// `low32(dst) = *addr`
477            xload32le_g32bne = XLoad32LeG32Bne { dst: XReg, addr: AddrG32Bne };
478            /// `dst = *addr`
479            xload64le_g32bne = XLoad64LeG32Bne { dst: XReg, addr: AddrG32Bne };
480            /// `*addr = low8(src)`
481            xstore8_g32bne = XStore8G32Bne { addr: AddrG32Bne, src: XReg };
482            /// `*addr = low16(src)`
483            xstore16le_g32bne = XStore16LeG32Bne { addr: AddrG32Bne, src: XReg };
484            /// `*addr = low32(src)`
485            xstore32le_g32bne = XStore32LeG32Bne { addr: AddrG32Bne, src: XReg };
486            /// `*addr = src`
487            xstore64le_g32bne = XStore64LeG32Bne { addr: AddrG32Bne, src: XReg };
488
489
490            /// `push lr; push fp; fp = sp`
491            push_frame = PushFrame ;
492            /// `sp = fp; pop fp; pop lr`
493            pop_frame = PopFrame ;
494
495            /// Macro-instruction to enter a function, allocate some stack, and
496            /// then save some registers.
497            ///
498            /// This is equivalent to `push_frame`, `stack_alloc32 amt`, then
499            /// saving all of `regs` to the top of the stack just allocated.
500            push_frame_save = PushFrameSave { amt: u16, regs: UpperRegSet<XReg> };
501            /// Inverse of `push_frame_save`. Restores `regs` from the top of
502            /// the stack, then runs `stack_free32 amt`, then runs `pop_frame`.
503            pop_frame_restore = PopFrameRestore { amt: u16, regs: UpperRegSet<XReg> };
504
505            /// `sp = sp.checked_sub(amt)`
506            stack_alloc32 = StackAlloc32 { amt: u32 };
507
508            /// `sp = sp + amt`
509            stack_free32 = StackFree32 { amt: u32 };
510
511            /// `dst = zext(low8(src))`
512            zext8 = Zext8 { dst: XReg, src: XReg };
513            /// `dst = zext(low16(src))`
514            zext16 = Zext16 { dst: XReg, src: XReg };
515            /// `dst = zext(low32(src))`
516            zext32 = Zext32 { dst: XReg, src: XReg };
517            /// `dst = sext(low8(src))`
518            sext8 = Sext8 { dst: XReg, src: XReg };
519            /// `dst = sext(low16(src))`
520            sext16 = Sext16 { dst: XReg, src: XReg };
521            /// `dst = sext(low32(src))`
522            sext32 = Sext32 { dst: XReg, src: XReg };
523
524            /// `low32(dst) = |low32(src)|`
525            xabs32 = XAbs32 { dst: XReg, src: XReg };
526            /// `dst = |src|`
527            xabs64 = XAbs64 { dst: XReg, src: XReg };
528
529            /// `low32(dst) = low32(src1) / low32(src2)` (signed)
530            xdiv32_s = XDiv32S { operands: BinaryOperands<XReg> };
531
532            /// `dst = src1 / src2` (signed)
533            xdiv64_s = XDiv64S { operands: BinaryOperands<XReg> };
534
535            /// `low32(dst) = low32(src1) / low32(src2)` (unsigned)
536            xdiv32_u = XDiv32U { operands: BinaryOperands<XReg> };
537
538            /// `dst = src1 / src2` (unsigned)
539            xdiv64_u = XDiv64U { operands: BinaryOperands<XReg> };
540
541            /// `low32(dst) = low32(src1) % low32(src2)` (signed)
542            xrem32_s = XRem32S { operands: BinaryOperands<XReg> };
543
544            /// `dst = src1 / src2` (signed)
545            xrem64_s = XRem64S { operands: BinaryOperands<XReg> };
546
547            /// `low32(dst) = low32(src1) % low32(src2)` (unsigned)
548            xrem32_u = XRem32U { operands: BinaryOperands<XReg> };
549
550            /// `dst = src1 / src2` (unsigned)
551            xrem64_u = XRem64U { operands: BinaryOperands<XReg> };
552
553            /// `low32(dst) = low32(src1) & low32(src2)`
554            xband32 = XBand32 { operands: BinaryOperands<XReg> };
555            /// Same as `xband64` but `src2` is a sign-extended 8-bit immediate.
556            xband32_s8 = Xband32S8 { dst: XReg, src1: XReg, src2: i8 };
557            /// Same as `xband32` but `src2` is a sign-extended 32-bit immediate.
558            xband32_s32 = Xband32S32 { dst: XReg, src1: XReg, src2: i32 };
559            /// `dst = src1 & src2`
560            xband64 = XBand64 { operands: BinaryOperands<XReg> };
561            /// Same as `xband64` but `src2` is a sign-extended 8-bit immediate.
562            xband64_s8 = Xband64S8 { dst: XReg, src1: XReg, src2: i8 };
563            /// Same as `xband64` but `src2` is a sign-extended 32-bit immediate.
564            xband64_s32 = Xband64S32 { dst: XReg, src1: XReg, src2: i32 };
565            /// `low32(dst) = low32(src1) | low32(src2)`
566            xbor32 = XBor32 { operands: BinaryOperands<XReg> };
567            /// Same as `xbor64` but `src2` is a sign-extended 8-bit immediate.
568            xbor32_s8 = Xbor32S8 { dst: XReg, src1: XReg, src2: i8 };
569            /// Same as `xbor32` but `src2` is a sign-extended 32-bit immediate.
570            xbor32_s32 = Xbor32S32 { dst: XReg, src1: XReg, src2: i32 };
571            /// `dst = src1 | src2`
572            xbor64 = XBor64 { operands: BinaryOperands<XReg> };
573            /// Same as `xbor64` but `src2` is a sign-extended 8-bit immediate.
574            xbor64_s8 = Xbor64S8 { dst: XReg, src1: XReg, src2: i8 };
575            /// Same as `xbor64` but `src2` is a sign-extended 32-bit immediate.
576            xbor64_s32 = Xbor64S32 { dst: XReg, src1: XReg, src2: i32 };
577
578            /// `low32(dst) = low32(src1) ^ low32(src2)`
579            xbxor32 = XBxor32 { operands: BinaryOperands<XReg> };
580            /// Same as `xbxor64` but `src2` is a sign-extended 8-bit immediate.
581            xbxor32_s8 = Xbxor32S8 { dst: XReg, src1: XReg, src2: i8 };
582            /// Same as `xbxor32` but `src2` is a sign-extended 32-bit immediate.
583            xbxor32_s32 = Xbxor32S32 { dst: XReg, src1: XReg, src2: i32 };
584            /// `dst = src1 ^ src2`
585            xbxor64 = XBxor64 { operands: BinaryOperands<XReg> };
586            /// Same as `xbxor64` but `src2` is a sign-extended 8-bit immediate.
587            xbxor64_s8 = Xbxor64S8 { dst: XReg, src1: XReg, src2: i8 };
588            /// Same as `xbxor64` but `src2` is a sign-extended 32-bit immediate.
589            xbxor64_s32 = Xbxor64S32 { dst: XReg, src1: XReg, src2: i32 };
590
591            /// `low32(dst) = !low32(src1)`
592            xbnot32 = XBnot32 { dst: XReg, src: XReg };
593            /// `dst = !src1`
594            xbnot64 = XBnot64 { dst: XReg, src: XReg };
595
596            /// `low32(dst) = min(low32(src1), low32(src2))` (unsigned)
597            xmin32_u = Xmin32U { operands: BinaryOperands<XReg> };
598            /// `low32(dst) = min(low32(src1), low32(src2))` (signed)
599            xmin32_s = Xmin32S { operands: BinaryOperands<XReg> };
600            /// `low32(dst) = max(low32(src1), low32(src2))` (unsigned)
601            xmax32_u = Xmax32U { operands: BinaryOperands<XReg> };
602            /// `low32(dst) = max(low32(src1), low32(src2))` (signed)
603            xmax32_s = Xmax32S { operands: BinaryOperands<XReg> };
604            /// `dst = min(src1, src2)` (unsigned)
605            xmin64_u = Xmin64U { operands: BinaryOperands<XReg> };
606            /// `dst = min(src1, src2)` (signed)
607            xmin64_s = Xmin64S { operands: BinaryOperands<XReg> };
608            /// `dst = max(src1, src2)` (unsigned)
609            xmax64_u = Xmax64U { operands: BinaryOperands<XReg> };
610            /// `dst = max(src1, src2)` (signed)
611            xmax64_s = Xmax64S { operands: BinaryOperands<XReg> };
612
613            /// `low32(dst) = low32(cond) ? low32(if_nonzero) : low32(if_zero)`
614            xselect32 = XSelect32 { dst: XReg, cond: XReg, if_nonzero: XReg, if_zero: XReg };
615            /// `dst = low32(cond) ? if_nonzero : if_zero`
616            xselect64 = XSelect64 { dst: XReg, cond: XReg, if_nonzero: XReg, if_zero: XReg };
617        }
618    };
619}
620
621/// Calls the given macro with each extended opcode.
622#[macro_export]
623macro_rules! for_each_extended_op {
624    ( $macro:ident ) => {
625        $macro! {
626            /// Raise a trap.
627            trap = Trap;
628
629            /// A special opcode to halt interpreter execution and yield control
630            /// back to the host.
631            ///
632            /// This opcode results in `DoneReason::CallIndirectHost` where the
633            /// `id` here is shepherded along to the embedder. It's up to the
634            /// embedder to determine what to do with the `id` and the current
635            /// state of registers and the stack.
636            ///
637            /// In Wasmtime this is used to implement interpreter-to-host calls.
638            /// This is modeled as a `call` instruction where the first
639            /// parameter is the native function pointer to invoke and all
640            /// remaining parameters for the native function are in following
641            /// parameter positions (e.g. `x1`, `x2`, ...). The results of the
642            /// host call are then store in `x0`.
643            ///
644            /// Handling this in Wasmtime is done through a "relocation" which
645            /// is resolved at link-time when raw bytecode from Cranelift is
646            /// assembled into the final object that Wasmtime will interpret.
647            call_indirect_host = CallIndirectHost { id: u8 };
648
649            /// Adds `offset` to the pc of this instruction and stores it in
650            /// `dst`.
651            xpcadd = Xpcadd { dst: XReg, offset: PcRelOffset };
652
653            /// Gets the special "fp" register and moves it into `dst`.
654            xmov_fp = XmovFp { dst: XReg };
655
656            /// Gets the special "lr" register and moves it into `dst`.
657            xmov_lr = XmovLr { dst: XReg };
658
659            /// `dst = byteswap(low32(src))`
660            bswap32 = Bswap32 { dst: XReg, src: XReg };
661            /// `dst = byteswap(src)`
662            bswap64 = Bswap64 { dst: XReg, src: XReg };
663
664            /// 32-bit checked unsigned addition: `low32(dst) = low32(src1) +
665            /// low32(src2)`.
666            ///
667            /// The upper 32-bits of `dst` are unmodified. Traps if the addition
668            /// overflows.
669            xadd32_uoverflow_trap = Xadd32UoverflowTrap { operands: BinaryOperands<XReg> };
670
671            /// 64-bit checked unsigned addition: `dst = src1 + src2`.
672            xadd64_uoverflow_trap = Xadd64UoverflowTrap { operands: BinaryOperands<XReg> };
673
674            /// `dst = high64(src1 * src2)` (signed)
675            xmulhi64_s = XMulHi64S { operands: BinaryOperands<XReg> };
676            /// `dst = high64(src1 * src2)` (unsigned)
677            xmulhi64_u = XMulHi64U { operands: BinaryOperands<XReg> };
678
679            /// low32(dst) = if low32(src) == 0 { 0 } else { -1 }
680            xbmask32 = Xbmask32 { dst: XReg, src: XReg };
681            /// dst = if src == 0 { 0 } else { -1 }
682            xbmask64 = Xbmask64 { dst: XReg, src: XReg };
683
684            // Big-endian loads/stores of X-registers using the "o32"
685            // addressing mode
686
687            /// `low32(dst) = zext(*addr)`
688            xload16be_u32_o32 = XLoad16BeU32O32 { dst: XReg, addr: AddrO32 };
689            /// `low32(dst) = sext(*addr)`
690            xload16be_s32_o32 = XLoad16BeS32O32 { dst: XReg, addr: AddrO32 };
691            /// `low32(dst) = zext(*addr)`
692            xload32be_o32 = XLoad32BeO32 { dst: XReg, addr: AddrO32 };
693            /// `dst = *addr`
694            xload64be_o32 = XLoad64BeO32 { dst: XReg, addr: AddrO32 };
695            /// `*addr = low16(src)`
696            xstore16be_o32 = XStore16BeO32 { addr: AddrO32, src: XReg };
697            /// `*addr = low32(src)`
698            xstore32be_o32 = XStore32BeO32 { addr: AddrO32, src: XReg };
699            /// `*addr = low64(src)`
700            xstore64be_o32 = XStore64BeO32 { addr: AddrO32, src: XReg };
701
702            // Big and little endian float loads/stores. Note that the "Z"
703            // addressing mode only has little-endian variants.
704
705            /// `low32(dst) = zext(*addr)`
706            fload32be_o32 = Fload32BeO32 { dst: FReg, addr: AddrO32 };
707            /// `dst = *addr`
708            fload64be_o32 = Fload64BeO32 { dst: FReg, addr: AddrO32 };
709            /// `*addr = low32(src)`
710            fstore32be_o32 = Fstore32BeO32 { addr: AddrO32, src: FReg };
711            /// `*addr = src`
712            fstore64be_o32 = Fstore64BeO32 { addr: AddrO32, src: FReg };
713
714            /// `low32(dst) = zext(*addr)`
715            fload32le_o32 = Fload32LeO32 { dst: FReg, addr: AddrO32 };
716            /// `dst = *addr`
717            fload64le_o32 = Fload64LeO32 { dst: FReg, addr: AddrO32 };
718            /// `*addr = low32(src)`
719            fstore32le_o32 = Fstore32LeO32 { addr: AddrO32, src: FReg };
720            /// `*addr = src`
721            fstore64le_o32 = Fstore64LeO32 { addr: AddrO32, src: FReg };
722
723            /// `low32(dst) = zext(*addr)`
724            fload32le_z = Fload32LeZ { dst: FReg, addr: AddrZ };
725            /// `dst = *addr`
726            fload64le_z = Fload64LeZ { dst: FReg, addr: AddrZ };
727            /// `*addr = low32(src)`
728            fstore32le_z = Fstore32LeZ { addr: AddrZ, src: FReg };
729            /// `*addr = src`
730            fstore64le_z = Fstore64LeZ { addr: AddrZ, src: FReg };
731
732            /// `low32(dst) = zext(*addr)`
733            fload32le_g32 = Fload32LeG32 { dst: FReg, addr: AddrG32 };
734            /// `dst = *addr`
735            fload64le_g32 = Fload64LeG32 { dst: FReg, addr: AddrG32 };
736            /// `*addr = low32(src)`
737            fstore32le_g32 = Fstore32LeG32 { addr: AddrG32, src: FReg };
738            /// `*addr = src`
739            fstore64le_g32 = Fstore64LeG32 { addr: AddrG32, src: FReg };
740
741            // Vector loads/stores. Note that big-endian variants are all
742            // omitted.
743
744            /// `dst = *addr`
745            vload128le_o32 = VLoad128O32 { dst: VReg, addr: AddrO32 };
746            /// `*addr = src`
747            vstore128le_o32 = Vstore128LeO32 { addr: AddrO32, src: VReg };
748            /// `dst = *(ptr + offset)`
749            vload128le_z = VLoad128Z { dst: VReg, addr: AddrZ };
750            /// `*(ptr + offset) = src`
751            vstore128le_z = Vstore128LeZ { addr: AddrZ, src: VReg };
752            /// `dst = *(ptr + offset)`
753            vload128le_g32 = VLoad128G32 { dst: VReg, addr: AddrG32 };
754            /// `*(ptr + offset) = src`
755            vstore128le_g32 = Vstore128LeG32 { addr: AddrG32, src: VReg };
756
757            /// Move between `f` registers.
758            fmov = Fmov { dst: FReg, src: FReg };
759            /// Move between `v` registers.
760            vmov = Vmov { dst: VReg, src: VReg };
761
762            /// `low32(dst) = bitcast low32(src) as i32`
763            bitcast_int_from_float_32 = BitcastIntFromFloat32 { dst: XReg, src: FReg };
764            /// `dst = bitcast src as i64`
765            bitcast_int_from_float_64 = BitcastIntFromFloat64 { dst: XReg, src: FReg };
766            /// `low32(dst) = bitcast low32(src) as f32`
767            bitcast_float_from_int_32 = BitcastFloatFromInt32 { dst: FReg, src: XReg };
768            /// `dst = bitcast src as f64`
769            bitcast_float_from_int_64 = BitcastFloatFromInt64 { dst: FReg, src: XReg };
770
771            /// `low32(dst) = bits`
772            fconst32 = FConst32 { dst: FReg, bits: u32 };
773            /// `dst = bits`
774            fconst64 = FConst64 { dst: FReg, bits: u64 };
775
776            /// `low32(dst) = zext(src1 == src2)`
777            feq32 = Feq32 { dst: XReg, src1: FReg, src2: FReg };
778            /// `low32(dst) = zext(src1 != src2)`
779            fneq32 = Fneq32 { dst: XReg, src1: FReg, src2: FReg };
780            /// `low32(dst) = zext(src1 < src2)`
781            flt32 = Flt32 { dst: XReg, src1: FReg, src2: FReg };
782            /// `low32(dst) = zext(src1 <= src2)`
783            flteq32 = Flteq32 { dst: XReg, src1: FReg, src2: FReg };
784            /// `low32(dst) = zext(src1 == src2)`
785            feq64 = Feq64 { dst: XReg, src1: FReg, src2: FReg };
786            /// `low32(dst) = zext(src1 != src2)`
787            fneq64 = Fneq64 { dst: XReg, src1: FReg, src2: FReg };
788            /// `low32(dst) = zext(src1 < src2)`
789            flt64 = Flt64 { dst: XReg, src1: FReg, src2: FReg };
790            /// `low32(dst) = zext(src1 <= src2)`
791            flteq64 = Flteq64 { dst: XReg, src1: FReg, src2: FReg };
792
793            /// `low32(dst) = low32(cond) ? low32(if_nonzero) : low32(if_zero)`
794            fselect32 = FSelect32 { dst: FReg, cond: XReg, if_nonzero: FReg, if_zero: FReg };
795            /// `dst = low32(cond) ? if_nonzero : if_zero`
796            fselect64 = FSelect64 { dst: FReg, cond: XReg, if_nonzero: FReg, if_zero: FReg };
797
798            /// `low32(dst) = demote(src)`
799            f32_from_f64 = F32FromF64 { dst: FReg, src: FReg };
800            /// `(st) = promote(low32(src))`
801            f64_from_f32 = F64FromF32 { dst: FReg, src: FReg };
802
803            /// `low32(dst) = checked_f32_from_signed(low32(src))`
804            f32_from_x32_s = F32FromX32S { dst: FReg, src: XReg };
805            /// `low32(dst) = checked_f32_from_unsigned(low32(src))`
806            f32_from_x32_u = F32FromX32U { dst: FReg, src: XReg };
807            /// `low32(dst) = checked_f32_from_signed(src)`
808            f32_from_x64_s = F32FromX64S { dst: FReg, src: XReg };
809            /// `low32(dst) = checked_f32_from_unsigned(src)`
810            f32_from_x64_u = F32FromX64U { dst: FReg, src: XReg };
811            /// `dst = checked_f64_from_signed(low32(src))`
812            f64_from_x32_s = F64FromX32S { dst: FReg, src: XReg };
813            /// `dst = checked_f64_from_unsigned(low32(src))`
814            f64_from_x32_u = F64FromX32U { dst: FReg, src: XReg };
815            /// `dst = checked_f64_from_signed(src)`
816            f64_from_x64_s = F64FromX64S { dst: FReg, src: XReg };
817            /// `dst = checked_f64_from_unsigned(src)`
818            f64_from_x64_u = F64FromX64U { dst: FReg, src: XReg };
819
820            /// `low32(dst) = checked_signed_from_f32(low32(src))`
821            x32_from_f32_s = X32FromF32S { dst: XReg, src: FReg };
822            /// `low32(dst) = checked_unsigned_from_f32(low32(src))`
823            x32_from_f32_u = X32FromF32U { dst: XReg, src: FReg };
824            /// `low32(dst) = checked_signed_from_f64(src)`
825            x32_from_f64_s = X32FromF64S { dst: XReg, src: FReg };
826            /// `low32(dst) = checked_unsigned_from_f64(src)`
827            x32_from_f64_u = X32FromF64U { dst: XReg, src: FReg };
828            /// `dst = checked_signed_from_f32(low32(src))`
829            x64_from_f32_s = X64FromF32S { dst: XReg, src: FReg };
830            /// `dst = checked_unsigned_from_f32(low32(src))`
831            x64_from_f32_u = X64FromF32U { dst: XReg, src: FReg };
832            /// `dst = checked_signed_from_f64(src)`
833            x64_from_f64_s = X64FromF64S { dst: XReg, src: FReg };
834            /// `dst = checked_unsigned_from_f64(src)`
835            x64_from_f64_u = X64FromF64U { dst: XReg, src: FReg };
836
837            /// `low32(dst) = saturating_signed_from_f32(low32(src))`
838            x32_from_f32_s_sat = X32FromF32SSat { dst: XReg, src: FReg };
839            /// `low32(dst) = saturating_unsigned_from_f32(low32(src))`
840            x32_from_f32_u_sat = X32FromF32USat { dst: XReg, src: FReg };
841            /// `low32(dst) = saturating_signed_from_f64(src)`
842            x32_from_f64_s_sat = X32FromF64SSat { dst: XReg, src: FReg };
843            /// `low32(dst) = saturating_unsigned_from_f64(src)`
844            x32_from_f64_u_sat = X32FromF64USat { dst: XReg, src: FReg };
845            /// `dst = saturating_signed_from_f32(low32(src))`
846            x64_from_f32_s_sat = X64FromF32SSat { dst: XReg, src: FReg };
847            /// `dst = saturating_unsigned_from_f32(low32(src))`
848            x64_from_f32_u_sat = X64FromF32USat { dst: XReg, src: FReg };
849            /// `dst = saturating_signed_from_f64(src)`
850            x64_from_f64_s_sat = X64FromF64SSat { dst: XReg, src: FReg };
851            /// `dst = saturating_unsigned_from_f64(src)`
852            x64_from_f64_u_sat = X64FromF64USat { dst: XReg, src: FReg };
853
854            /// `low32(dst) = copysign(low32(src1), low32(src2))`
855            fcopysign32 = FCopySign32 { operands: BinaryOperands<FReg> };
856            /// `dst = copysign(src1, src2)`
857            fcopysign64 = FCopySign64 { operands: BinaryOperands<FReg> };
858
859            /// `low32(dst) = low32(src1) + low32(src2)`
860            fadd32 = Fadd32 { operands: BinaryOperands<FReg> };
861            /// `low32(dst) = low32(src1) - low32(src2)`
862            fsub32 = Fsub32 { operands: BinaryOperands<FReg> };
863            /// `low128(dst) = low128(src1) - low128(src2)`
864            vsubf32x4 = Vsubf32x4 { operands: BinaryOperands<VReg> };
865            /// `low32(dst) = low32(src1) * low32(src2)`
866            fmul32 = Fmul32 { operands: BinaryOperands<FReg> };
867            /// `low128(dst) = low128(src1) * low128(src2)`
868            vmulf32x4 = Vmulf32x4 { operands: BinaryOperands<VReg> };
869            /// `low32(dst) = low32(src1) / low32(src2)`
870            fdiv32 = Fdiv32 { operands: BinaryOperands<FReg> };
871            /// `low128(dst) = low128(src1) / low128(src2)`
872            vdivf32x4 = Vdivf32x4 { operands: BinaryOperands<VReg> };
873            /// `low32(dst) = ieee_maximum(low32(src1), low32(src2))`
874            fmaximum32 = Fmaximum32 { operands: BinaryOperands<FReg> };
875            /// `low32(dst) = ieee_minimum(low32(src1), low32(src2))`
876            fminimum32 = Fminimum32 { operands: BinaryOperands<FReg> };
877            /// `low32(dst) = ieee_trunc(low32(src))`
878            ftrunc32 = Ftrunc32 { dst: FReg, src: FReg };
879            /// `low128(dst) = ieee_trunc(low128(src))`
880            vtrunc32x4 = Vtrunc32x4 { dst: VReg, src: VReg };
881            /// `low128(dst) = ieee_trunc(low128(src))`
882            vtrunc64x2 = Vtrunc64x2 { dst: VReg, src: VReg };
883            /// `low32(dst) = ieee_floor(low32(src))`
884            ffloor32 = Ffloor32 { dst: FReg, src: FReg };
885            /// `low128(dst) = ieee_floor(low128(src))`
886            vfloor32x4 = Vfloor32x4 { dst: VReg, src: VReg };
887            /// `low128(dst) = ieee_floor(low128(src))`
888            vfloor64x2 = Vfloor64x2 { dst: VReg, src: VReg };
889            /// `low32(dst) = ieee_ceil(low32(src))`
890            fceil32 = Fceil32 { dst: FReg, src: FReg };
891            /// `low128(dst) = ieee_ceil(low128(src))`
892            vceil32x4 = Vceil32x4 { dst: VReg, src: VReg };
893            /// `low128(dst) = ieee_ceil(low128(src))`
894            vceil64x2 = Vceil64x2 { dst: VReg, src: VReg };
895            /// `low32(dst) = ieee_nearest(low32(src))`
896            fnearest32 = Fnearest32 { dst: FReg, src: FReg };
897            /// `low32(dst) = ieee_sqrt(low32(src))`
898            fsqrt32 = Fsqrt32 { dst: FReg, src: FReg };
899            /// `low32(dst) = ieee_sqrt(low32(src))`
900            vsqrt32x4 = Vsqrt32x4 { dst: VReg, src: VReg };
901            /// `low32(dst) = ieee_sqrt(low32(src))`
902            vsqrt64x2 = Vsqrt64x2 { dst: VReg, src: VReg };
903            /// `low32(dst) = -low32(src)`
904            fneg32 = Fneg32 { dst: FReg, src: FReg };
905            /// `low128(dst) = -low128(src)`
906            vnegf32x4 = Vnegf32x4 { dst: VReg, src: VReg };
907            /// `low32(dst) = |low32(src)|`
908            fabs32 = Fabs32 { dst: FReg, src: FReg };
909
910            /// `dst = src1 + src2`
911            fadd64 = Fadd64 { operands: BinaryOperands<FReg> };
912            /// `dst = src1 - src2`
913            fsub64 = Fsub64 { operands: BinaryOperands<FReg> };
914            /// `dst = src1 * src2`
915            fmul64 = Fmul64 { operands: BinaryOperands<FReg> };
916            /// `dst = src1 / src2`
917            fdiv64 = Fdiv64 { operands: BinaryOperands<FReg> };
918            /// `dst = src1 / src2`
919            vdivf64x2 = VDivF64x2 { operands: BinaryOperands<VReg> };
920            /// `dst = ieee_maximum(src1, src2)`
921            fmaximum64 = Fmaximum64 { operands: BinaryOperands<FReg> };
922            /// `dst = ieee_minimum(src1, src2)`
923            fminimum64 = Fminimum64 { operands: BinaryOperands<FReg> };
924            /// `dst = ieee_trunc(src)`
925            ftrunc64 = Ftrunc64 { dst: FReg, src: FReg };
926            /// `dst = ieee_floor(src)`
927            ffloor64 = Ffloor64 { dst: FReg, src: FReg };
928            /// `dst = ieee_ceil(src)`
929            fceil64 = Fceil64 { dst: FReg, src: FReg };
930            /// `dst = ieee_nearest(src)`
931            fnearest64 = Fnearest64 { dst: FReg, src: FReg };
932            /// `low128(dst) = ieee_nearest(low128(src))`
933            vnearest32x4 = Vnearest32x4 { dst: VReg, src: VReg };
934            /// `low128(dst) = ieee_nearest(low128(src))`
935            vnearest64x2 = Vnearest64x2 { dst: VReg, src: VReg };
936            /// `dst = ieee_sqrt(src)`
937            fsqrt64 = Fsqrt64 { dst: FReg, src: FReg };
938            /// `dst = -src`
939            fneg64 = Fneg64 { dst: FReg, src: FReg };
940            /// `dst = |src|`
941            fabs64 = Fabs64 { dst: FReg, src: FReg };
942
943            /// `dst = imm`
944            vconst128 = Vconst128 { dst: VReg, imm: u128 };
945
946            /// `dst = src1 + src2`
947            vaddi8x16 = VAddI8x16 { operands: BinaryOperands<VReg> };
948            /// `dst = src1 + src2`
949            vaddi16x8 = VAddI16x8 { operands: BinaryOperands<VReg> };
950            /// `dst = src1 + src2`
951            vaddi32x4 = VAddI32x4 { operands: BinaryOperands<VReg> };
952            /// `dst = src1 + src2`
953            vaddi64x2 = VAddI64x2 { operands: BinaryOperands<VReg> };
954            /// `dst = src1 + src2`
955            vaddf32x4 = VAddF32x4 { operands: BinaryOperands<VReg> };
956            /// `dst = src1 + src2`
957            vaddf64x2 = VAddF64x2 { operands: BinaryOperands<VReg> };
958
959            /// `dst = satruating_add(src1, src2)`
960            vaddi8x16_sat = VAddI8x16Sat { operands: BinaryOperands<VReg> };
961            /// `dst = satruating_add(src1, src2)`
962            vaddu8x16_sat = VAddU8x16Sat { operands: BinaryOperands<VReg> };
963            /// `dst = satruating_add(src1, src2)`
964            vaddi16x8_sat = VAddI16x8Sat { operands: BinaryOperands<VReg> };
965            /// `dst = satruating_add(src1, src2)`
966            vaddu16x8_sat = VAddU16x8Sat { operands: BinaryOperands<VReg> };
967
968            /// `dst = [src1[0] + src1[1], ..., src2[6] + src2[7]]`
969            vaddpairwisei16x8_s = VAddpairwiseI16x8S { operands: BinaryOperands<VReg> };
970            /// `dst = [src1[0] + src1[1], ..., src2[2] + src2[3]]`
971            vaddpairwisei32x4_s = VAddpairwiseI32x4S { operands: BinaryOperands<VReg> };
972
973            /// `dst = src1 << src2`
974            vshli8x16 = VShlI8x16 { operands: BinaryOperands<VReg, VReg, XReg> };
975            /// `dst = src1 << src2`
976            vshli16x8 = VShlI16x8 { operands: BinaryOperands<VReg, VReg, XReg> };
977            /// `dst = src1 << src2`
978            vshli32x4 = VShlI32x4 { operands: BinaryOperands<VReg, VReg, XReg> };
979            /// `dst = src1 << src2`
980            vshli64x2 = VShlI64x2 { operands: BinaryOperands<VReg, VReg, XReg> };
981            /// `dst = src1 >> src2` (signed)
982            vshri8x16_s = VShrI8x16S { operands: BinaryOperands<VReg, VReg, XReg> };
983            /// `dst = src1 >> src2` (signed)
984            vshri16x8_s = VShrI16x8S { operands: BinaryOperands<VReg, VReg, XReg> };
985            /// `dst = src1 >> src2` (signed)
986            vshri32x4_s = VShrI32x4S { operands: BinaryOperands<VReg, VReg, XReg> };
987            /// `dst = src1 >> src2` (signed)
988            vshri64x2_s = VShrI64x2S { operands: BinaryOperands<VReg, VReg, XReg> };
989            /// `dst = src1 >> src2` (unsigned)
990            vshri8x16_u = VShrI8x16U { operands: BinaryOperands<VReg, VReg, XReg> };
991            /// `dst = src1 >> src2` (unsigned)
992            vshri16x8_u = VShrI16x8U { operands: BinaryOperands<VReg, VReg, XReg> };
993            /// `dst = src1 >> src2` (unsigned)
994            vshri32x4_u = VShrI32x4U { operands: BinaryOperands<VReg, VReg, XReg> };
995            /// `dst = src1 >> src2` (unsigned)
996            vshri64x2_u = VShrI64x2U { operands: BinaryOperands<VReg, VReg, XReg> };
997
998            /// `dst = splat(low8(src))`
999            vsplatx8 = VSplatX8 { dst: VReg, src: XReg };
1000            /// `dst = splat(low16(src))`
1001            vsplatx16 = VSplatX16 { dst: VReg, src: XReg };
1002            /// `dst = splat(low32(src))`
1003            vsplatx32 = VSplatX32 { dst: VReg, src: XReg };
1004            /// `dst = splat(src)`
1005            vsplatx64 = VSplatX64 { dst: VReg, src: XReg };
1006            /// `dst = splat(low32(src))`
1007            vsplatf32 = VSplatF32 { dst: VReg, src: FReg };
1008            /// `dst = splat(src)`
1009            vsplatf64 = VSplatF64 { dst: VReg, src: FReg };
1010
1011            /// Load the 64-bit source as i8x8 and sign-extend to i16x8.
1012            vload8x8_s_z = VLoad8x8SZ { dst: VReg, addr: AddrZ };
1013            /// Load the 64-bit source as u8x8 and zero-extend to i16x8.
1014            vload8x8_u_z = VLoad8x8UZ { dst: VReg, addr: AddrZ };
1015            /// Load the 64-bit source as i16x4 and sign-extend to i32x4.
1016            vload16x4le_s_z = VLoad16x4LeSZ { dst: VReg, addr: AddrZ };
1017            /// Load the 64-bit source as u16x4 and zero-extend to i32x4.
1018            vload16x4le_u_z = VLoad16x4LeUZ { dst: VReg, addr: AddrZ };
1019            /// Load the 64-bit source as i32x2 and sign-extend to i64x2.
1020            vload32x2le_s_z = VLoad32x2LeSZ { dst: VReg, addr: AddrZ };
1021            /// Load the 64-bit source as u32x2 and zero-extend to i64x2.
1022            vload32x2le_u_z = VLoad32x2LeUZ { dst: VReg, addr: AddrZ };
1023
1024            /// `dst = src1 & src2`
1025            vband128 = VBand128 { operands: BinaryOperands<VReg> };
1026            /// `dst = src1 | src2`
1027            vbor128 = VBor128 { operands: BinaryOperands<VReg> };
1028            /// `dst = src1 ^ src2`
1029            vbxor128 = VBxor128 { operands: BinaryOperands<VReg> };
1030            /// `dst = !src1`
1031            vbnot128 = VBnot128 { dst: VReg, src: VReg };
1032            /// `dst = (c & x) | (!c & y)`
1033            vbitselect128 = VBitselect128 { dst: VReg, c: VReg, x: VReg, y: VReg };
1034            /// Collect high bits of each lane into the low 32-bits of the
1035            /// destination.
1036            vbitmask8x16 = Vbitmask8x16 { dst: XReg, src: VReg };
1037            /// Collect high bits of each lane into the low 32-bits of the
1038            /// destination.
1039            vbitmask16x8 = Vbitmask16x8 { dst: XReg, src: VReg };
1040            /// Collect high bits of each lane into the low 32-bits of the
1041            /// destination.
1042            vbitmask32x4 = Vbitmask32x4 { dst: XReg, src: VReg };
1043            /// Collect high bits of each lane into the low 32-bits of the
1044            /// destination.
1045            vbitmask64x2 = Vbitmask64x2 { dst: XReg, src: VReg };
1046            /// Store whether all lanes are nonzero in `dst`.
1047            valltrue8x16 = Valltrue8x16 { dst: XReg, src: VReg };
1048            /// Store whether all lanes are nonzero in `dst`.
1049            valltrue16x8 = Valltrue16x8 { dst: XReg, src: VReg };
1050            /// Store whether all lanes are nonzero in `dst`.
1051            valltrue32x4 = Valltrue32x4 { dst: XReg, src: VReg };
1052            /// Store whether any lanes are nonzero in `dst`.
1053            valltrue64x2 = Valltrue64x2 { dst: XReg, src: VReg };
1054            /// Store whether any lanes are nonzero in `dst`.
1055            vanytrue8x16 = Vanytrue8x16 { dst: XReg, src: VReg };
1056            /// Store whether any lanes are nonzero in `dst`.
1057            vanytrue16x8 = Vanytrue16x8 { dst: XReg, src: VReg };
1058            /// Store whether any lanes are nonzero in `dst`.
1059            vanytrue32x4 = Vanytrue32x4 { dst: XReg, src: VReg };
1060            /// Store whether any lanes are nonzero in `dst`.
1061            vanytrue64x2 = Vanytrue64x2 { dst: XReg, src: VReg };
1062
1063            /// Int-to-float conversion (same as `f32_from_x32_s`)
1064            vf32x4_from_i32x4_s = VF32x4FromI32x4S { dst: VReg, src: VReg };
1065            /// Int-to-float conversion (same as `f32_from_x32_u`)
1066            vf32x4_from_i32x4_u = VF32x4FromI32x4U { dst: VReg, src: VReg };
1067            /// Int-to-float conversion (same as `f64_from_x64_s`)
1068            vf64x2_from_i64x2_s = VF64x2FromI64x2S { dst: VReg, src: VReg };
1069            /// Int-to-float conversion (same as `f64_from_x64_u`)
1070            vf64x2_from_i64x2_u = VF64x2FromI64x2U { dst: VReg, src: VReg };
1071            /// Float-to-int conversion (same as `x32_from_f32_s`
1072            vi32x4_from_f32x4_s = VI32x4FromF32x4S { dst: VReg, src: VReg };
1073            /// Float-to-int conversion (same as `x32_from_f32_u`
1074            vi32x4_from_f32x4_u = VI32x4FromF32x4U { dst: VReg, src: VReg };
1075            /// Float-to-int conversion (same as `x64_from_f64_s`
1076            vi64x2_from_f64x2_s = VI64x2FromF64x2S { dst: VReg, src: VReg };
1077            /// Float-to-int conversion (same as `x64_from_f64_u`
1078            vi64x2_from_f64x2_u = VI64x2FromF64x2U { dst: VReg, src: VReg };
1079
1080            /// Widens the low lanes of the input vector, as signed, to twice
1081            /// the width.
1082            vwidenlow8x16_s = VWidenLow8x16S { dst: VReg, src: VReg };
1083            /// Widens the low lanes of the input vector, as unsigned, to twice
1084            /// the width.
1085            vwidenlow8x16_u = VWidenLow8x16U { dst: VReg, src: VReg };
1086            /// Widens the low lanes of the input vector, as signed, to twice
1087            /// the width.
1088            vwidenlow16x8_s = VWidenLow16x8S { dst: VReg, src: VReg };
1089            /// Widens the low lanes of the input vector, as unsigned, to twice
1090            /// the width.
1091            vwidenlow16x8_u = VWidenLow16x8U { dst: VReg, src: VReg };
1092            /// Widens the low lanes of the input vector, as signed, to twice
1093            /// the width.
1094            vwidenlow32x4_s = VWidenLow32x4S { dst: VReg, src: VReg };
1095            /// Widens the low lanes of the input vector, as unsigned, to twice
1096            /// the width.
1097            vwidenlow32x4_u = VWidenLow32x4U { dst: VReg, src: VReg };
1098            /// Widens the high lanes of the input vector, as signed, to twice
1099            /// the width.
1100            vwidenhigh8x16_s = VWidenHigh8x16S { dst: VReg, src: VReg };
1101            /// Widens the high lanes of the input vector, as unsigned, to twice
1102            /// the width.
1103            vwidenhigh8x16_u = VWidenHigh8x16U { dst: VReg, src: VReg };
1104            /// Widens the high lanes of the input vector, as signed, to twice
1105            /// the width.
1106            vwidenhigh16x8_s = VWidenHigh16x8S { dst: VReg, src: VReg };
1107            /// Widens the high lanes of the input vector, as unsigned, to twice
1108            /// the width.
1109            vwidenhigh16x8_u = VWidenHigh16x8U { dst: VReg, src: VReg };
1110            /// Widens the high lanes of the input vector, as signed, to twice
1111            /// the width.
1112            vwidenhigh32x4_s = VWidenHigh32x4S { dst: VReg, src: VReg };
1113            /// Widens the high lanes of the input vector, as unsigned, to twice
1114            /// the width.
1115            vwidenhigh32x4_u = VWidenHigh32x4U { dst: VReg, src: VReg };
1116
1117            /// Narrows the two 16x8 vectors, assuming all input lanes are
1118            /// signed, to half the width. Narrowing is signed and saturating.
1119            vnarrow16x8_s = Vnarrow16x8S { operands: BinaryOperands<VReg> };
1120            /// Narrows the two 16x8 vectors, assuming all input lanes are
1121            /// signed, to half the width. Narrowing is unsigned and saturating.
1122            vnarrow16x8_u = Vnarrow16x8U { operands: BinaryOperands<VReg> };
1123            /// Narrows the two 32x4 vectors, assuming all input lanes are
1124            /// signed, to half the width. Narrowing is signed and saturating.
1125            vnarrow32x4_s = Vnarrow32x4S { operands: BinaryOperands<VReg> };
1126            /// Narrows the two 32x4 vectors, assuming all input lanes are
1127            /// signed, to half the width. Narrowing is unsigned and saturating.
1128            vnarrow32x4_u = Vnarrow32x4U { operands: BinaryOperands<VReg> };
1129            /// Narrows the two 64x2 vectors, assuming all input lanes are
1130            /// signed, to half the width. Narrowing is signed and saturating.
1131            vnarrow64x2_s = Vnarrow64x2S { operands: BinaryOperands<VReg> };
1132            /// Narrows the two 64x2 vectors, assuming all input lanes are
1133            /// signed, to half the width. Narrowing is unsigned and saturating.
1134            vnarrow64x2_u = Vnarrow64x2U { operands: BinaryOperands<VReg> };
1135            /// Narrows the two 64x2 vectors, assuming all input lanes are
1136            /// unsigned, to half the width. Narrowing is unsigned and saturating.
1137            vunarrow64x2_u = Vunarrow64x2U { operands: BinaryOperands<VReg> };
1138            /// Promotes the low two lanes of the f32x4 input to f64x2.
1139            vfpromotelow = VFpromoteLow { dst: VReg, src: VReg };
1140            /// Demotes the two f64x2 lanes to f32x2 and then extends with two
1141            /// more zero lanes.
1142            vfdemote = VFdemote { dst: VReg, src: VReg };
1143
1144            /// `dst = src1 - src2`
1145            vsubi8x16 = VSubI8x16 { operands: BinaryOperands<VReg> };
1146            /// `dst = src1 - src2`
1147            vsubi16x8 = VSubI16x8 { operands: BinaryOperands<VReg> };
1148            /// `dst = src1 - src2`
1149            vsubi32x4 = VSubI32x4 { operands: BinaryOperands<VReg> };
1150            /// `dst = src1 - src2`
1151            vsubi64x2 = VSubI64x2 { operands: BinaryOperands<VReg> };
1152            /// `dst = src1 - src2`
1153            vsubf64x2 = VSubF64x2 { operands: BinaryOperands<VReg> };
1154
1155            /// `dst = saturating_sub(src1, src2)`
1156            vsubi8x16_sat = VSubI8x16Sat { operands: BinaryOperands<VReg> };
1157            /// `dst = saturating_sub(src1, src2)`
1158            vsubu8x16_sat = VSubU8x16Sat { operands: BinaryOperands<VReg> };
1159            /// `dst = saturating_sub(src1, src2)`
1160            vsubi16x8_sat = VSubI16x8Sat { operands: BinaryOperands<VReg> };
1161            /// `dst = saturating_sub(src1, src2)`
1162            vsubu16x8_sat = VSubU16x8Sat { operands: BinaryOperands<VReg> };
1163
1164            /// `dst = src1 * src2`
1165            vmuli8x16 = VMulI8x16 { operands: BinaryOperands<VReg> };
1166            /// `dst = src1 * src2`
1167            vmuli16x8 = VMulI16x8 { operands: BinaryOperands<VReg> };
1168            /// `dst = src1 * src2`
1169            vmuli32x4 = VMulI32x4 { operands: BinaryOperands<VReg> };
1170            /// `dst = src1 * src2`
1171            vmuli64x2 = VMulI64x2 { operands: BinaryOperands<VReg> };
1172            /// `dst = src1 * src2`
1173            vmulf64x2 = VMulF64x2 { operands: BinaryOperands<VReg> };
1174
1175            /// `dst = signed_saturate(src1 * src2 + (1 << (Q - 1)) >> Q)`
1176            vqmulrsi16x8 = VQmulrsI16x8 { operands: BinaryOperands<VReg> };
1177
1178            /// `dst = count_ones(src)`
1179            vpopcnt8x16 = VPopcnt8x16 { dst: VReg, src: VReg };
1180
1181            /// `low32(dst) = zext(src[lane])`
1182            xextractv8x16 = XExtractV8x16 { dst: XReg, src: VReg, lane: u8 };
1183            /// `low32(dst) = zext(src[lane])`
1184            xextractv16x8 = XExtractV16x8 { dst: XReg, src: VReg, lane: u8 };
1185            /// `low32(dst) = src[lane]`
1186            xextractv32x4 = XExtractV32x4 { dst: XReg, src: VReg, lane: u8 };
1187            /// `dst = src[lane]`
1188            xextractv64x2 = XExtractV64x2 { dst: XReg, src: VReg, lane: u8 };
1189            /// `low32(dst) = src[lane]`
1190            fextractv32x4 = FExtractV32x4 { dst: FReg, src: VReg, lane: u8 };
1191            /// `dst = src[lane]`
1192            fextractv64x2 = FExtractV64x2 { dst: FReg, src: VReg, lane: u8 };
1193
1194            /// `dst = src1; dst[lane] = src2`
1195            vinsertx8 = VInsertX8 { operands: BinaryOperands<VReg, VReg, XReg>, lane: u8 };
1196            /// `dst = src1; dst[lane] = src2`
1197            vinsertx16 = VInsertX16 { operands: BinaryOperands<VReg, VReg, XReg>, lane: u8 };
1198            /// `dst = src1; dst[lane] = src2`
1199            vinsertx32 = VInsertX32 { operands: BinaryOperands<VReg, VReg, XReg>, lane: u8 };
1200            /// `dst = src1; dst[lane] = src2`
1201            vinsertx64 = VInsertX64 { operands: BinaryOperands<VReg, VReg, XReg>, lane: u8 };
1202            /// `dst = src1; dst[lane] = src2`
1203            vinsertf32 = VInsertF32 { operands: BinaryOperands<VReg, VReg, FReg>, lane: u8 };
1204            /// `dst = src1; dst[lane] = src2`
1205            vinsertf64 = VInsertF64 { operands: BinaryOperands<VReg, VReg, FReg>, lane: u8 };
1206
1207            /// `dst = src == dst`
1208            veq8x16 = Veq8x16 { operands: BinaryOperands<VReg> };
1209            /// `dst = src != dst`
1210            vneq8x16 = Vneq8x16 { operands: BinaryOperands<VReg> };
1211            /// `dst = src < dst` (signed)
1212            vslt8x16 = Vslt8x16 { operands: BinaryOperands<VReg> };
1213            /// `dst = src <= dst` (signed)
1214            vslteq8x16 = Vslteq8x16 { operands: BinaryOperands<VReg> };
1215            /// `dst = src < dst` (unsigned)
1216            vult8x16 = Vult8x16 { operands: BinaryOperands<VReg> };
1217            /// `dst = src <= dst` (unsigned)
1218            vulteq8x16 = Vulteq8x16 { operands: BinaryOperands<VReg> };
1219            /// `dst = src == dst`
1220            veq16x8 = Veq16x8 { operands: BinaryOperands<VReg> };
1221            /// `dst = src != dst`
1222            vneq16x8 = Vneq16x8 { operands: BinaryOperands<VReg> };
1223            /// `dst = src < dst` (signed)
1224            vslt16x8 = Vslt16x8 { operands: BinaryOperands<VReg> };
1225            /// `dst = src <= dst` (signed)
1226            vslteq16x8 = Vslteq16x8 { operands: BinaryOperands<VReg> };
1227            /// `dst = src < dst` (unsigned)
1228            vult16x8 = Vult16x8 { operands: BinaryOperands<VReg> };
1229            /// `dst = src <= dst` (unsigned)
1230            vulteq16x8 = Vulteq16x8 { operands: BinaryOperands<VReg> };
1231            /// `dst = src == dst`
1232            veq32x4 = Veq32x4 { operands: BinaryOperands<VReg> };
1233            /// `dst = src != dst`
1234            vneq32x4 = Vneq32x4 { operands: BinaryOperands<VReg> };
1235            /// `dst = src < dst` (signed)
1236            vslt32x4 = Vslt32x4 { operands: BinaryOperands<VReg> };
1237            /// `dst = src <= dst` (signed)
1238            vslteq32x4 = Vslteq32x4 { operands: BinaryOperands<VReg> };
1239            /// `dst = src < dst` (unsigned)
1240            vult32x4 = Vult32x4 { operands: BinaryOperands<VReg> };
1241            /// `dst = src <= dst` (unsigned)
1242            vulteq32x4 = Vulteq32x4 { operands: BinaryOperands<VReg> };
1243            /// `dst = src == dst`
1244            veq64x2 = Veq64x2 { operands: BinaryOperands<VReg> };
1245            /// `dst = src != dst`
1246            vneq64x2 = Vneq64x2 { operands: BinaryOperands<VReg> };
1247            /// `dst = src < dst` (signed)
1248            vslt64x2 = Vslt64x2 { operands: BinaryOperands<VReg> };
1249            /// `dst = src <= dst` (signed)
1250            vslteq64x2 = Vslteq64x2 { operands: BinaryOperands<VReg> };
1251            /// `dst = src < dst` (unsigned)
1252            vult64x2 = Vult64x2 { operands: BinaryOperands<VReg> };
1253            /// `dst = src <= dst` (unsigned)
1254            vulteq64x2 = Vulteq64x2 { operands: BinaryOperands<VReg> };
1255
1256            /// `dst = -src`
1257            vneg8x16 = Vneg8x16 { dst: VReg, src: VReg };
1258            /// `dst = -src`
1259            vneg16x8 = Vneg16x8 { dst: VReg, src: VReg };
1260            /// `dst = -src`
1261            vneg32x4 = Vneg32x4 { dst: VReg, src: VReg };
1262            /// `dst = -src`
1263            vneg64x2 = Vneg64x2 { dst: VReg, src: VReg };
1264            /// `dst = -src`
1265            vnegf64x2 = VnegF64x2 { dst: VReg, src: VReg };
1266
1267            /// `dst = min(src1, src2)` (signed)
1268            vmin8x16_s = Vmin8x16S { operands: BinaryOperands<VReg> };
1269            /// `dst = min(src1, src2)` (unsigned)
1270            vmin8x16_u = Vmin8x16U { operands: BinaryOperands<VReg> };
1271            /// `dst = min(src1, src2)` (signed)
1272            vmin16x8_s = Vmin16x8S { operands: BinaryOperands<VReg> };
1273            /// `dst = min(src1, src2)` (unsigned)
1274            vmin16x8_u = Vmin16x8U { operands: BinaryOperands<VReg> };
1275            /// `dst = max(src1, src2)` (signed)
1276            vmax8x16_s = Vmax8x16S { operands: BinaryOperands<VReg> };
1277            /// `dst = max(src1, src2)` (unsigned)
1278            vmax8x16_u = Vmax8x16U { operands: BinaryOperands<VReg> };
1279            /// `dst = max(src1, src2)` (signed)
1280            vmax16x8_s = Vmax16x8S { operands: BinaryOperands<VReg> };
1281            /// `dst = max(src1, src2)` (unsigned)
1282            vmax16x8_u = Vmax16x8U { operands: BinaryOperands<VReg> };
1283
1284            /// `dst = min(src1, src2)` (signed)
1285            vmin32x4_s = Vmin32x4S { operands: BinaryOperands<VReg> };
1286            /// `dst = min(src1, src2)` (unsigned)
1287            vmin32x4_u = Vmin32x4U { operands: BinaryOperands<VReg> };
1288            /// `dst = max(src1, src2)` (signed)
1289            vmax32x4_s = Vmax32x4S { operands: BinaryOperands<VReg> };
1290            /// `dst = max(src1, src2)` (unsigned)
1291            vmax32x4_u = Vmax32x4U { operands: BinaryOperands<VReg> };
1292
1293            /// `dst = |src|`
1294            vabs8x16 = Vabs8x16 { dst: VReg, src: VReg };
1295            /// `dst = |src|`
1296            vabs16x8 = Vabs16x8 { dst: VReg, src: VReg };
1297            /// `dst = |src|`
1298            vabs32x4 = Vabs32x4 { dst: VReg, src: VReg };
1299            /// `dst = |src|`
1300            vabs64x2 = Vabs64x2 { dst: VReg, src: VReg };
1301
1302            /// `dst = |src|`
1303            vabsf32x4 = Vabsf32x4 { dst: VReg, src: VReg };
1304            /// `dst = |src|`
1305            vabsf64x2 = Vabsf64x2 { dst: VReg, src: VReg };
1306            /// `dst = ieee_maximum(src1, src2)`
1307            vmaximumf32x4 = Vmaximumf32x4 { operands: BinaryOperands<VReg> };
1308            /// `dst = ieee_maximum(src1, src2)`
1309            vmaximumf64x2 = Vmaximumf64x2 { operands: BinaryOperands<VReg> };
1310            /// `dst = ieee_minimum(src1, src2)`
1311            vminimumf32x4 = Vminimumf32x4 { operands: BinaryOperands<VReg> };
1312            /// `dst = ieee_minimum(src1, src2)`
1313            vminimumf64x2 = Vminimumf64x2 { operands: BinaryOperands<VReg> };
1314
1315            /// `dst = shuffle(src1, src2, mask)`
1316            vshuffle = VShuffle { dst: VReg, src1: VReg, src2: VReg, mask: u128 };
1317
1318            /// `dst = swizzle(src1, src2)`
1319            vswizzlei8x16 = Vswizzlei8x16 { operands: BinaryOperands<VReg> };
1320
1321            /// `dst = (src1 + src2 + 1) // 2`
1322            vavground8x16 = Vavground8x16 { operands: BinaryOperands<VReg> };
1323            /// `dst = (src1 + src2 + 1) // 2`
1324            vavground16x8 = Vavground16x8 { operands: BinaryOperands<VReg> };
1325
1326            /// `dst = src == dst`
1327            veqf32x4 = VeqF32x4 { operands: BinaryOperands<VReg> };
1328            /// `dst = src != dst`
1329            vneqf32x4 = VneqF32x4 { operands: BinaryOperands<VReg> };
1330            /// `dst = src < dst`
1331            vltf32x4 = VltF32x4 { operands: BinaryOperands<VReg> };
1332            /// `dst = src <= dst`
1333            vlteqf32x4 = VlteqF32x4 { operands: BinaryOperands<VReg> };
1334            /// `dst = src == dst`
1335            veqf64x2 = VeqF64x2 { operands: BinaryOperands<VReg> };
1336            /// `dst = src != dst`
1337            vneqf64x2 = VneqF64x2 { operands: BinaryOperands<VReg> };
1338            /// `dst = src < dst`
1339            vltf64x2 = VltF64x2 { operands: BinaryOperands<VReg> };
1340            /// `dst = src <= dst`
1341            vlteqf64x2 = VlteqF64x2 { operands: BinaryOperands<VReg> };
1342
1343            /// `dst = ieee_fma(a, b, c)`
1344            vfma32x4 = Vfma32x4 { dst: VReg, a: VReg, b: VReg, c: VReg };
1345            /// `dst = ieee_fma(a, b, c)`
1346            vfma64x2 = Vfma64x2 { dst: VReg, a: VReg, b: VReg, c: VReg };
1347
1348            /// `dst = low32(cond) ? if_nonzero : if_zero`
1349            vselect = Vselect { dst: VReg, cond: XReg, if_nonzero: VReg, if_zero: VReg };
1350
1351            /// `dst_hi:dst_lo = lhs_hi:lhs_lo + rhs_hi:rhs_lo`
1352            xadd128 = Xadd128 {
1353                dst_lo: XReg,
1354                dst_hi: XReg,
1355                lhs_lo: XReg,
1356                lhs_hi: XReg,
1357                rhs_lo: XReg,
1358                rhs_hi: XReg
1359            };
1360            /// `dst_hi:dst_lo = lhs_hi:lhs_lo - rhs_hi:rhs_lo`
1361            xsub128 = Xsub128 {
1362                dst_lo: XReg,
1363                dst_hi: XReg,
1364                lhs_lo: XReg,
1365                lhs_hi: XReg,
1366                rhs_lo: XReg,
1367                rhs_hi: XReg
1368            };
1369            /// `dst_hi:dst_lo = sext(lhs) * sext(rhs)`
1370            xwidemul64_s = Xwidemul64S {
1371                dst_lo: XReg,
1372                dst_hi: XReg,
1373                lhs: XReg,
1374                rhs: XReg
1375            };
1376            /// `dst_hi:dst_lo = zext(lhs) * zext(rhs)`
1377            xwidemul64_u = Xwidemul64U {
1378                dst_lo: XReg,
1379                dst_hi: XReg,
1380                lhs: XReg,
1381                rhs: XReg
1382            };
1383        }
1384    };
1385}
1386
1387#[cfg(feature = "decode")]
1388pub mod decode;
1389#[cfg(feature = "disas")]
1390pub mod disas;
1391#[cfg(feature = "encode")]
1392pub mod encode;
1393#[cfg(feature = "interp")]
1394pub mod interp;
1395#[cfg(feature = "profile")]
1396pub mod profile;
1397#[cfg(all(not(feature = "profile"), feature = "interp"))]
1398mod profile_disabled;
1399#[cfg(all(not(feature = "profile"), feature = "interp"))]
1400use profile_disabled as profile;
1401
1402pub mod regs;
1403pub use regs::*;
1404
1405pub mod imms;
1406pub use imms::*;
1407
1408pub mod op;
1409pub use op::*;
1410
1411pub mod opcode;
1412pub use opcode::*;
1413
1414#[cfg(any(feature = "encode", feature = "decode"))]
1415pub(crate) unsafe fn unreachable_unchecked() -> ! {
1416    #[cfg(debug_assertions)]
1417    unreachable!();
1418
1419    #[cfg(not(debug_assertions))]
1420    unsafe {
1421        core::hint::unreachable_unchecked()
1422    }
1423}