pulley_interpreter/
interp.rs

1//! Interpretation of pulley bytecode.
2
3use crate::decode::*;
4use crate::encode::Encode;
5use crate::imms::*;
6use crate::profile::{ExecutingPc, ExecutingPcRef};
7use crate::regs::*;
8use alloc::string::ToString;
9use alloc::vec::Vec;
10use core::fmt;
11use core::mem;
12use core::ops::ControlFlow;
13use core::ops::{Index, IndexMut};
14use core::ptr::NonNull;
15use wasmtime_math::WasmFloat;
16mod debug;
17#[cfg(all(not(pulley_tail_calls), not(pulley_assume_llvm_makes_tail_calls)))]
18mod match_loop;
19#[cfg(any(pulley_tail_calls, pulley_assume_llvm_makes_tail_calls))]
20mod tail_loop;
21
22const DEFAULT_STACK_SIZE: usize = 1 << 20; // 1 MiB
23
24/// A virtual machine for interpreting Pulley bytecode.
25pub struct Vm {
26    state: MachineState,
27    executing_pc: ExecutingPc,
28}
29
30impl Default for Vm {
31    fn default() -> Self {
32        Vm::new()
33    }
34}
35
36impl Vm {
37    /// Create a new virtual machine with the default stack size.
38    pub fn new() -> Self {
39        Self::with_stack(DEFAULT_STACK_SIZE)
40    }
41
42    /// Create a new virtual machine with the given stack.
43    pub fn with_stack(stack_size: usize) -> Self {
44        Self {
45            state: MachineState::with_stack(stack_size),
46            executing_pc: ExecutingPc::default(),
47        }
48    }
49
50    /// Get a shared reference to this VM's machine state.
51    pub fn state(&self) -> &MachineState {
52        &self.state
53    }
54
55    /// Get an exclusive reference to this VM's machine state.
56    pub fn state_mut(&mut self) -> &mut MachineState {
57        &mut self.state
58    }
59
60    /// Call a bytecode function.
61    ///
62    /// The given `func` must point to the beginning of a valid Pulley bytecode
63    /// function.
64    ///
65    /// The given `args` must match the number and type of arguments that
66    /// function expects.
67    ///
68    /// The given `rets` must match the function's actual return types.
69    ///
70    /// Returns either the resulting values, or the PC at which a trap was
71    /// raised.
72    pub unsafe fn call<'a, T>(
73        &'a mut self,
74        func: NonNull<u8>,
75        args: &[Val],
76        rets: T,
77    ) -> DoneReason<impl Iterator<Item = Val> + use<'a, T>>
78    where
79        T: IntoIterator<Item = RegType> + 'a,
80    {
81        let lr = self.call_start(args);
82
83        match self.call_run(func) {
84            DoneReason::ReturnToHost(()) => DoneReason::ReturnToHost(self.call_end(lr, rets)),
85            DoneReason::Trap { pc, kind } => DoneReason::Trap { pc, kind },
86            DoneReason::CallIndirectHost { id, resume } => {
87                DoneReason::CallIndirectHost { id, resume }
88            }
89        }
90    }
91
92    /// Peforms the initial part of [`Vm::call`] in setting up the `args`
93    /// provided in registers according to Pulley's ABI.
94    ///
95    /// # Return
96    ///
97    /// Returns the old `lr` register value. The current `lr` value is replaced
98    /// with a sentinel that triggers a return to the host when returned-to.
99    ///
100    /// # Unsafety
101    ///
102    /// All the same unsafety as `call` and additiionally, you must
103    /// invoke `call_run` and then `call_end` after calling `call_start`.
104    /// If you don't want to wrangle these invocations, use `call` instead
105    /// of `call_{start,run,end}`.
106    pub unsafe fn call_start<'a>(&'a mut self, args: &[Val]) -> *mut u8 {
107        // NB: make sure this method stays in sync with
108        // `PulleyMachineDeps::compute_arg_locs`!
109
110        let mut x_args = (0..16).map(|x| XReg::new_unchecked(x));
111        let mut f_args = (0..16).map(|f| FReg::new_unchecked(f));
112        let mut v_args = (0..16).map(|v| VReg::new_unchecked(v));
113
114        for arg in args {
115            match arg {
116                Val::XReg(val) => match x_args.next() {
117                    Some(reg) => self.state[reg] = *val,
118                    None => todo!("stack slots"),
119                },
120                Val::FReg(val) => match f_args.next() {
121                    Some(reg) => self.state[reg] = *val,
122                    None => todo!("stack slots"),
123                },
124                Val::VReg(val) => match v_args.next() {
125                    Some(reg) => self.state[reg] = *val,
126                    None => todo!("stack slots"),
127                },
128            }
129        }
130
131        mem::replace(&mut self.state.lr, HOST_RETURN_ADDR)
132    }
133
134    /// Peforms the internal part of [`Vm::call`] where bytecode is actually
135    /// executed.
136    ///
137    /// # Unsafety
138    ///
139    /// In addition to all the invariants documented for `call`, you
140    /// may only invoke `call_run` after invoking `call_start` to
141    /// initialize this call's arguments.
142    pub unsafe fn call_run(&mut self, pc: NonNull<u8>) -> DoneReason<()> {
143        self.state.debug_assert_done_reason_none();
144        let interpreter = Interpreter {
145            state: &mut self.state,
146            pc: UnsafeBytecodeStream::new(pc),
147            executing_pc: self.executing_pc.as_ref(),
148        };
149        let done = interpreter.run();
150        self.state.done_decode(done)
151    }
152
153    /// Peforms the tail end of [`Vm::call`] by returning the values as
154    /// determined by `rets` according to Pulley's ABI.
155    ///
156    /// The `old_ret` value should have been provided from `call_start`
157    /// previously.
158    ///
159    /// # Unsafety
160    ///
161    /// In addition to the invariants documented for `call`, this may
162    /// only be called after `call_run`.
163    pub unsafe fn call_end<'a>(
164        &'a mut self,
165        old_ret: *mut u8,
166        rets: impl IntoIterator<Item = RegType> + 'a,
167    ) -> impl Iterator<Item = Val> + 'a {
168        self.state.lr = old_ret;
169        // NB: make sure this method stays in sync with
170        // `PulleyMachineDeps::compute_arg_locs`!
171
172        let mut x_rets = (0..15).map(|x| XReg::new_unchecked(x));
173        let mut f_rets = (0..16).map(|f| FReg::new_unchecked(f));
174        let mut v_rets = (0..16).map(|v| VReg::new_unchecked(v));
175
176        rets.into_iter().map(move |ty| match ty {
177            RegType::XReg => match x_rets.next() {
178                Some(reg) => Val::XReg(self.state[reg]),
179                None => todo!("stack slots"),
180            },
181            RegType::FReg => match f_rets.next() {
182                Some(reg) => Val::FReg(self.state[reg]),
183                None => todo!("stack slots"),
184            },
185            RegType::VReg => match v_rets.next() {
186                Some(reg) => Val::VReg(self.state[reg]),
187                None => todo!("stack slots"),
188            },
189        })
190    }
191
192    /// Returns the current `fp` register value.
193    pub fn fp(&self) -> *mut u8 {
194        self.state.fp
195    }
196
197    /// Returns the current `lr` register value.
198    pub fn lr(&self) -> *mut u8 {
199        self.state.lr
200    }
201
202    /// Sets the current `fp` register value.
203    pub unsafe fn set_fp(&mut self, fp: *mut u8) {
204        self.state.fp = fp;
205    }
206
207    /// Sets the current `lr` register value.
208    pub unsafe fn set_lr(&mut self, lr: *mut u8) {
209        self.state.lr = lr;
210    }
211
212    /// Gets a handle to the currently executing program counter for this
213    /// interpreter which can be read from other threads.
214    //
215    // Note that despite this field still existing with `not(feature =
216    // "profile")` it's hidden from the public API in that scenario as it has no
217    // methods anyway.
218    #[cfg(feature = "profile")]
219    pub fn executing_pc(&self) -> &ExecutingPc {
220        &self.executing_pc
221    }
222}
223
224impl Drop for Vm {
225    fn drop(&mut self) {
226        self.executing_pc.set_done();
227    }
228}
229
230/// The type of a register in the Pulley machine state.
231#[derive(Clone, Copy, Debug)]
232pub enum RegType {
233    /// An `x` register: integers.
234    XReg,
235
236    /// An `f` register: floats.
237    FReg,
238
239    /// A `v` register: vectors.
240    VReg,
241}
242
243/// A value that can be stored in a register.
244#[derive(Clone, Copy, Debug)]
245pub enum Val {
246    /// An `x` register value: integers.
247    XReg(XRegVal),
248
249    /// An `f` register value: floats.
250    FReg(FRegVal),
251
252    /// A `v` register value: vectors.
253    VReg(VRegVal),
254}
255
256impl fmt::LowerHex for Val {
257    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
258        match self {
259            Val::XReg(v) => fmt::LowerHex::fmt(v, f),
260            Val::FReg(v) => fmt::LowerHex::fmt(v, f),
261            Val::VReg(v) => fmt::LowerHex::fmt(v, f),
262        }
263    }
264}
265
266impl From<XRegVal> for Val {
267    fn from(value: XRegVal) -> Self {
268        Val::XReg(value)
269    }
270}
271
272impl From<u64> for Val {
273    fn from(value: u64) -> Self {
274        XRegVal::new_u64(value).into()
275    }
276}
277
278impl From<u32> for Val {
279    fn from(value: u32) -> Self {
280        XRegVal::new_u32(value).into()
281    }
282}
283
284impl From<i64> for Val {
285    fn from(value: i64) -> Self {
286        XRegVal::new_i64(value).into()
287    }
288}
289
290impl From<i32> for Val {
291    fn from(value: i32) -> Self {
292        XRegVal::new_i32(value).into()
293    }
294}
295
296impl<T> From<*mut T> for Val {
297    fn from(value: *mut T) -> Self {
298        XRegVal::new_ptr(value).into()
299    }
300}
301
302impl From<FRegVal> for Val {
303    fn from(value: FRegVal) -> Self {
304        Val::FReg(value)
305    }
306}
307
308impl From<f64> for Val {
309    fn from(value: f64) -> Self {
310        FRegVal::new_f64(value).into()
311    }
312}
313
314impl From<f32> for Val {
315    fn from(value: f32) -> Self {
316        FRegVal::new_f32(value).into()
317    }
318}
319
320impl From<VRegVal> for Val {
321    fn from(value: VRegVal) -> Self {
322        Val::VReg(value)
323    }
324}
325
326/// An `x` register value: integers.
327#[derive(Copy, Clone)]
328pub struct XRegVal(XRegUnion);
329
330impl PartialEq for XRegVal {
331    fn eq(&self, other: &Self) -> bool {
332        self.get_u64() == other.get_u64()
333    }
334}
335
336impl Eq for XRegVal {}
337
338impl fmt::Debug for XRegVal {
339    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
340        f.debug_struct("XRegVal")
341            .field("as_u64", &self.get_u64())
342            .finish()
343    }
344}
345
346impl fmt::LowerHex for XRegVal {
347    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
348        fmt::LowerHex::fmt(&self.get_u64(), f)
349    }
350}
351
352/// Contents of an "x" register, or a general-purpose register.
353///
354/// This is represented as a Rust `union` to make it easier to access typed
355/// views of this, notably the `ptr` field which enables preserving a bit of
356/// provenance for Rust for values stored as a pointer and read as a pointer.
357///
358/// Note that the actual in-memory representation of this value is handled
359/// carefully at this time. Pulley bytecode exposes the ability to store a
360/// 32-bit result into a register and then read the 64-bit contents of the
361/// register. This leaves us with the question of what to do with the upper bits
362/// of the register when the 32-bit result is generated. Possibilities for
363/// handling this are:
364///
365/// 1. Do nothing, just store the 32-bit value. The problem with this approach
366///    means that the "upper bits" are now endianness-dependent. That means that
367///    the state of the register is now platform-dependent.
368/// 2. Sign or zero-extend. This restores platform-independent behavior but
369///    requires an extra store on 32-bit platforms because they can probably
370///    only store 32-bits at a time.
371/// 3. Always store the values in this union as little-endian. This means that
372///    big-endian platforms have to do a byte-swap but otherwise it has
373///    platform-independent behavior.
374///
375/// This union chooses route (3) at this time where the values here are always
376/// stored in little-endian form (even the `ptr` field). That guarantees
377/// cross-platform behavior while also minimizing the amount of data stored on
378/// writes.
379///
380/// In the future we may wish to benchmark this and possibly change this.
381/// Technically Cranelift-generated bytecode should never rely on the upper bits
382/// of a register if it didn't previously write them so this in theory doesn't
383/// actually matter for Cranelift or wasm semantics. The only cost right now is
384/// to big-endian platforms though and it's not certain how crucial performance
385/// will be there.
386///
387/// One final note is that this notably contrasts with native CPUs where
388/// native ISAs like RISC-V specifically define the entire register on every
389/// instruction, even if only the low half contains a significant result. Pulley
390/// is unlikely to become out-of-order within the CPU itself as it's interpreted
391/// meaning that severing data-dependencies with previous operations is
392/// hypothesized to not be too important. If this is ever a problem though it
393/// could increase the likelihood we go for route (2) above instead (or maybe
394/// even (1)).
395#[derive(Copy, Clone)]
396union XRegUnion {
397    i32: i32,
398    u32: u32,
399    i64: i64,
400    u64: u64,
401
402    // Note that this is intentionally `usize` and not an actual pointer like
403    // `*mut u8`. The reason for this is that provenance is required in Rust for
404    // pointers but Cranelift has no pointer type and thus no concept of
405    // provenance. That means that at-rest it's not known whether the value has
406    // provenance or not and basically means that Pulley is required to use
407    // "permissive provenance" in Rust as opposed to strict provenance.
408    //
409    // That's more-or-less a long-winded way of saying that storage of a pointer
410    // in this value is done with `.expose_provenance()` and reading a pointer
411    // uses `with_exposed_provenance_mut(..)`.
412    ptr: usize,
413}
414
415impl Default for XRegVal {
416    fn default() -> Self {
417        Self(unsafe { mem::zeroed() })
418    }
419}
420
421#[expect(missing_docs, reason = "self-describing methods")]
422impl XRegVal {
423    pub fn new_i32(x: i32) -> Self {
424        let mut val = XRegVal::default();
425        val.set_i32(x);
426        val
427    }
428
429    pub fn new_u32(x: u32) -> Self {
430        let mut val = XRegVal::default();
431        val.set_u32(x);
432        val
433    }
434
435    pub fn new_i64(x: i64) -> Self {
436        let mut val = XRegVal::default();
437        val.set_i64(x);
438        val
439    }
440
441    pub fn new_u64(x: u64) -> Self {
442        let mut val = XRegVal::default();
443        val.set_u64(x);
444        val
445    }
446
447    pub fn new_ptr<T>(ptr: *mut T) -> Self {
448        let mut val = XRegVal::default();
449        val.set_ptr(ptr);
450        val
451    }
452
453    pub fn get_i32(&self) -> i32 {
454        let x = unsafe { self.0.i32 };
455        i32::from_le(x)
456    }
457
458    pub fn get_u32(&self) -> u32 {
459        let x = unsafe { self.0.u32 };
460        u32::from_le(x)
461    }
462
463    pub fn get_i64(&self) -> i64 {
464        let x = unsafe { self.0.i64 };
465        i64::from_le(x)
466    }
467
468    pub fn get_u64(&self) -> u64 {
469        let x = unsafe { self.0.u64 };
470        u64::from_le(x)
471    }
472
473    pub fn get_ptr<T>(&self) -> *mut T {
474        let ptr = unsafe { self.0.ptr };
475        let ptr = usize::from_le(ptr);
476        #[cfg(has_provenance_apis)]
477        return core::ptr::with_exposed_provenance_mut(ptr);
478        #[cfg(not(has_provenance_apis))]
479        return ptr as *mut T;
480    }
481
482    pub fn set_i32(&mut self, x: i32) {
483        self.0.i32 = x.to_le();
484    }
485
486    pub fn set_u32(&mut self, x: u32) {
487        self.0.u32 = x.to_le();
488    }
489
490    pub fn set_i64(&mut self, x: i64) {
491        self.0.i64 = x.to_le();
492    }
493
494    pub fn set_u64(&mut self, x: u64) {
495        self.0.u64 = x.to_le();
496    }
497
498    pub fn set_ptr<T>(&mut self, ptr: *mut T) {
499        #[cfg(has_provenance_apis)]
500        let ptr = ptr.expose_provenance();
501        #[cfg(not(has_provenance_apis))]
502        let ptr = ptr as usize;
503        self.0.ptr = ptr.to_le();
504    }
505}
506
507/// An `f` register value: floats.
508#[derive(Copy, Clone)]
509pub struct FRegVal(FRegUnion);
510
511impl fmt::Debug for FRegVal {
512    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
513        f.debug_struct("FRegVal")
514            .field("as_f32", &self.get_f32())
515            .field("as_f64", &self.get_f64())
516            .finish()
517    }
518}
519
520impl fmt::LowerHex for FRegVal {
521    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
522        fmt::LowerHex::fmt(&self.get_f64().to_bits(), f)
523    }
524}
525
526// NB: like `XRegUnion` values here are always little-endian, see the
527// documentation above for more details.
528#[derive(Copy, Clone)]
529union FRegUnion {
530    f32: u32,
531    f64: u64,
532}
533
534impl Default for FRegVal {
535    fn default() -> Self {
536        Self(unsafe { mem::zeroed() })
537    }
538}
539
540#[expect(missing_docs, reason = "self-describing methods")]
541impl FRegVal {
542    pub fn new_f32(f: f32) -> Self {
543        let mut val = Self::default();
544        val.set_f32(f);
545        val
546    }
547
548    pub fn new_f64(f: f64) -> Self {
549        let mut val = Self::default();
550        val.set_f64(f);
551        val
552    }
553
554    pub fn get_f32(&self) -> f32 {
555        let val = unsafe { self.0.f32 };
556        f32::from_le_bytes(val.to_ne_bytes())
557    }
558
559    pub fn get_f64(&self) -> f64 {
560        let val = unsafe { self.0.f64 };
561        f64::from_le_bytes(val.to_ne_bytes())
562    }
563
564    pub fn set_f32(&mut self, val: f32) {
565        self.0.f32 = u32::from_ne_bytes(val.to_le_bytes());
566    }
567
568    pub fn set_f64(&mut self, val: f64) {
569        self.0.f64 = u64::from_ne_bytes(val.to_le_bytes());
570    }
571}
572
573/// A `v` register value: vectors.
574#[derive(Copy, Clone)]
575pub struct VRegVal(VRegUnion);
576
577impl fmt::Debug for VRegVal {
578    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
579        f.debug_struct("VRegVal")
580            .field("as_u128", &unsafe { self.0.u128 })
581            .finish()
582    }
583}
584
585impl fmt::LowerHex for VRegVal {
586    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
587        fmt::LowerHex::fmt(unsafe { &self.0.u128 }, f)
588    }
589}
590
591/// 128-bit vector registers.
592///
593/// This register is always stored in little-endian order and has different
594/// constraints than `XRegVal` and `FRegVal` above. Notably all fields of this
595/// union are the same width so all bits are always defined. Note that
596/// little-endian is required though so bitcasts between different shapes of
597/// vectors works. This union cannot be stored in big-endian.
598#[derive(Copy, Clone)]
599#[repr(align(16))]
600union VRegUnion {
601    u128: u128,
602    i8x16: [i8; 16],
603    i16x8: [i16; 8],
604    i32x4: [i32; 4],
605    i64x2: [i64; 2],
606    u8x16: [u8; 16],
607    u16x8: [u16; 8],
608    u32x4: [u32; 4],
609    u64x2: [u64; 2],
610    // Note that these are `u32` and `u64`, not f32/f64. That's only because
611    // f32/f64 don't have `.to_le()` and `::from_le()` so need to go through the
612    // bits anyway.
613    f32x4: [u32; 4],
614    f64x2: [u64; 2],
615}
616
617impl Default for VRegVal {
618    fn default() -> Self {
619        Self(unsafe { mem::zeroed() })
620    }
621}
622
623#[expect(missing_docs, reason = "self-describing methods")]
624impl VRegVal {
625    pub fn new_u128(i: u128) -> Self {
626        let mut val = Self::default();
627        val.set_u128(i);
628        val
629    }
630
631    pub fn get_u128(&self) -> u128 {
632        let val = unsafe { self.0.u128 };
633        u128::from_le(val)
634    }
635
636    pub fn set_u128(&mut self, val: u128) {
637        self.0.u128 = val.to_le();
638    }
639
640    fn get_i8x16(&self) -> [i8; 16] {
641        let val = unsafe { self.0.i8x16 };
642        val.map(|e| i8::from_le(e))
643    }
644
645    fn set_i8x16(&mut self, val: [i8; 16]) {
646        self.0.i8x16 = val.map(|e| e.to_le());
647    }
648
649    fn get_u8x16(&self) -> [u8; 16] {
650        let val = unsafe { self.0.u8x16 };
651        val.map(|e| u8::from_le(e))
652    }
653
654    fn set_u8x16(&mut self, val: [u8; 16]) {
655        self.0.u8x16 = val.map(|e| e.to_le());
656    }
657
658    fn get_i16x8(&self) -> [i16; 8] {
659        let val = unsafe { self.0.i16x8 };
660        val.map(|e| i16::from_le(e))
661    }
662
663    fn set_i16x8(&mut self, val: [i16; 8]) {
664        self.0.i16x8 = val.map(|e| e.to_le());
665    }
666
667    fn get_u16x8(&self) -> [u16; 8] {
668        let val = unsafe { self.0.u16x8 };
669        val.map(|e| u16::from_le(e))
670    }
671
672    fn set_u16x8(&mut self, val: [u16; 8]) {
673        self.0.u16x8 = val.map(|e| e.to_le());
674    }
675
676    fn get_i32x4(&self) -> [i32; 4] {
677        let val = unsafe { self.0.i32x4 };
678        val.map(|e| i32::from_le(e))
679    }
680
681    fn set_i32x4(&mut self, val: [i32; 4]) {
682        self.0.i32x4 = val.map(|e| e.to_le());
683    }
684
685    fn get_u32x4(&self) -> [u32; 4] {
686        let val = unsafe { self.0.u32x4 };
687        val.map(|e| u32::from_le(e))
688    }
689
690    fn set_u32x4(&mut self, val: [u32; 4]) {
691        self.0.u32x4 = val.map(|e| e.to_le());
692    }
693
694    fn get_i64x2(&self) -> [i64; 2] {
695        let val = unsafe { self.0.i64x2 };
696        val.map(|e| i64::from_le(e))
697    }
698
699    fn set_i64x2(&mut self, val: [i64; 2]) {
700        self.0.i64x2 = val.map(|e| e.to_le());
701    }
702
703    fn get_u64x2(&self) -> [u64; 2] {
704        let val = unsafe { self.0.u64x2 };
705        val.map(|e| u64::from_le(e))
706    }
707
708    fn set_u64x2(&mut self, val: [u64; 2]) {
709        self.0.u64x2 = val.map(|e| e.to_le());
710    }
711
712    fn get_f64x2(&self) -> [f64; 2] {
713        let val = unsafe { self.0.f64x2 };
714        val.map(|e| f64::from_bits(u64::from_le(e)))
715    }
716
717    fn set_f64x2(&mut self, val: [f64; 2]) {
718        self.0.f64x2 = val.map(|e| e.to_bits().to_le());
719    }
720
721    fn get_f32x4(&self) -> [f32; 4] {
722        let val = unsafe { self.0.f32x4 };
723        val.map(|e| f32::from_bits(u32::from_le(e)))
724    }
725
726    fn set_f32x4(&mut self, val: [f32; 4]) {
727        self.0.f32x4 = val.map(|e| e.to_bits().to_le());
728    }
729}
730
731/// The machine state for a Pulley virtual machine: the various registers and
732/// stack.
733pub struct MachineState {
734    x_regs: [XRegVal; XReg::RANGE.end as usize],
735    f_regs: [FRegVal; FReg::RANGE.end as usize],
736    v_regs: [VRegVal; VReg::RANGE.end as usize],
737    fp: *mut u8,
738    lr: *mut u8,
739    stack: Stack,
740    done_reason: Option<DoneReason<()>>,
741}
742
743unsafe impl Send for MachineState {}
744unsafe impl Sync for MachineState {}
745
746/// Helper structure to store the state of the Pulley stack.
747///
748/// The Pulley stack notably needs to be a 16-byte aligned allocation on the
749/// host to ensure that addresses handed out are indeed 16-byte aligned. This is
750/// done with a custom `Vec<T>` internally where `T` has size and align of 16.
751/// This is manually done with a helper `Align16` type below.
752struct Stack {
753    storage: Vec<Align16>,
754}
755
756/// Helper type used with `Stack` above.
757#[derive(Copy, Clone)]
758#[repr(align(16))]
759struct Align16 {
760    // Just here to give the structure a size of 16. The alignment is always 16
761    // regardless of what the host platform's alignment of u128 is.
762    _unused: u128,
763}
764
765impl Stack {
766    /// Creates a new stack which will have a byte size of at least `size`.
767    ///
768    /// The allocated stack might be slightly larger due to rounding necessary.
769    fn new(size: usize) -> Stack {
770        Stack {
771            // Round up `size` to the nearest multiple of 16. Note that the
772            // stack is also allocated here but not initialized, and that's
773            // intentional as pulley bytecode should always initialize the stack
774            // before use.
775            storage: Vec::with_capacity((size + 15) / 16),
776        }
777    }
778
779    /// Returns a pointer to the top of the stack (the highest address).
780    ///
781    /// Note that the returned pointer has provenance for the entire stack
782    /// allocation, however, not just the top.
783    fn top(&mut self) -> *mut u8 {
784        let len = self.len();
785        unsafe { self.base().add(len) }
786    }
787
788    /// Returns a pointer to the base of the stack (the lowest address).
789    ///
790    /// Note that the returned pointer has provenance for the entire stack
791    /// allocation, however, not just the top.
792    fn base(&mut self) -> *mut u8 {
793        self.storage.as_mut_ptr().cast::<u8>()
794    }
795
796    /// Returns the length, in bytes, of this stack allocation.
797    fn len(&self) -> usize {
798        self.storage.capacity() * mem::size_of::<Align16>()
799    }
800}
801
802impl fmt::Debug for MachineState {
803    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
804        let MachineState {
805            x_regs,
806            f_regs,
807            v_regs,
808            stack: _,
809            done_reason: _,
810            fp: _,
811            lr: _,
812        } = self;
813
814        struct RegMap<'a, R>(&'a [R], fn(u8) -> alloc::string::String);
815
816        impl<R: fmt::Debug> fmt::Debug for RegMap<'_, R> {
817            fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
818                let mut f = f.debug_map();
819                for (i, r) in self.0.iter().enumerate() {
820                    f.entry(&(self.1)(i as u8), r);
821                }
822                f.finish()
823            }
824        }
825
826        f.debug_struct("MachineState")
827            .field(
828                "x_regs",
829                &RegMap(x_regs, |i| XReg::new(i).unwrap().to_string()),
830            )
831            .field(
832                "f_regs",
833                &RegMap(f_regs, |i| FReg::new(i).unwrap().to_string()),
834            )
835            .field(
836                "v_regs",
837                &RegMap(v_regs, |i| VReg::new(i).unwrap().to_string()),
838            )
839            .finish_non_exhaustive()
840    }
841}
842
843macro_rules! index_reg {
844    ($reg_ty:ty,$value_ty:ty,$field:ident) => {
845        impl Index<$reg_ty> for Vm {
846            type Output = $value_ty;
847
848            fn index(&self, reg: $reg_ty) -> &Self::Output {
849                &self.state[reg]
850            }
851        }
852
853        impl IndexMut<$reg_ty> for Vm {
854            fn index_mut(&mut self, reg: $reg_ty) -> &mut Self::Output {
855                &mut self.state[reg]
856            }
857        }
858
859        impl Index<$reg_ty> for MachineState {
860            type Output = $value_ty;
861
862            fn index(&self, reg: $reg_ty) -> &Self::Output {
863                &self.$field[reg.index()]
864            }
865        }
866
867        impl IndexMut<$reg_ty> for MachineState {
868            fn index_mut(&mut self, reg: $reg_ty) -> &mut Self::Output {
869                &mut self.$field[reg.index()]
870            }
871        }
872    };
873}
874
875index_reg!(XReg, XRegVal, x_regs);
876index_reg!(FReg, FRegVal, f_regs);
877index_reg!(VReg, VRegVal, v_regs);
878
879/// Sentinel return address that signals the end of the call stack.
880const HOST_RETURN_ADDR: *mut u8 = usize::MAX as *mut u8;
881
882impl MachineState {
883    fn with_stack(stack_size: usize) -> Self {
884        let mut state = Self {
885            x_regs: [Default::default(); XReg::RANGE.end as usize],
886            f_regs: Default::default(),
887            v_regs: Default::default(),
888            stack: Stack::new(stack_size),
889            done_reason: None,
890            fp: HOST_RETURN_ADDR,
891            lr: HOST_RETURN_ADDR,
892        };
893
894        let sp = state.stack.top();
895        state[XReg::sp] = XRegVal::new_ptr(sp);
896
897        state
898    }
899}
900
901/// Inner private module to prevent creation of the `Done` structure outside of
902/// this module.
903mod done {
904    use super::{Encode, Interpreter, MachineState};
905    use core::ops::ControlFlow;
906    use core::ptr::NonNull;
907
908    /// Zero-sized sentinel indicating that pulley execution has halted.
909    ///
910    /// The reason for halting is stored in `MachineState`.
911    #[derive(Copy, Clone, Debug, PartialEq, Eq)]
912    pub struct Done {
913        _priv: (),
914    }
915
916    /// Reason that the pulley interpreter has ceased execution.
917    pub enum DoneReason<T> {
918        /// A trap happened at this bytecode instruction.
919        Trap {
920            /// Which instruction is raising this trap.
921            pc: NonNull<u8>,
922            /// The kind of trap being raised, if known.
923            kind: Option<TrapKind>,
924        },
925        /// The `call_indirect_host` instruction was executed.
926        CallIndirectHost {
927            /// The payload of `call_indirect_host`.
928            id: u8,
929            /// Where to resume execution after the host has finished.
930            resume: NonNull<u8>,
931        },
932        /// Pulley has finished and the provided value is being returned.
933        ReturnToHost(T),
934    }
935
936    /// Stored within `DoneReason::Trap`.
937    #[expect(missing_docs, reason = "self-describing variants")]
938    pub enum TrapKind {
939        DivideByZero,
940        IntegerOverflow,
941        BadConversionToInteger,
942        MemoryOutOfBounds,
943    }
944
945    impl MachineState {
946        pub(super) fn debug_assert_done_reason_none(&mut self) {
947            debug_assert!(self.done_reason.is_none());
948        }
949
950        pub(super) fn done_decode(&mut self, Done { _priv }: Done) -> DoneReason<()> {
951            self.done_reason.take().unwrap()
952        }
953    }
954
955    impl Interpreter<'_> {
956        /// Finishes execution by recording `DoneReason::Trap`.
957        ///
958        /// This method takes an `I` generic parameter indicating which
959        /// instruction is executing this function and generating a trap. That's
960        /// used to go backwards from the current `pc` which is just beyond the
961        /// instruction to point to the instruction itself in the trap metadata
962        /// returned from the interpreter.
963        #[cold]
964        pub fn done_trap<I: Encode>(&mut self) -> ControlFlow<Done> {
965            self.done_trap_kind::<I>(None)
966        }
967
968        /// Same as `done_trap` but with an explicit `TrapKind`.
969        #[cold]
970        pub fn done_trap_kind<I: Encode>(&mut self, kind: Option<TrapKind>) -> ControlFlow<Done> {
971            let pc = self.current_pc::<I>();
972            self.state.done_reason = Some(DoneReason::Trap { pc, kind });
973            ControlFlow::Break(Done { _priv: () })
974        }
975
976        /// Finishes execution by recording `DoneReason::CallIndirectHost`.
977        #[cold]
978        pub fn done_call_indirect_host(&mut self, id: u8) -> ControlFlow<Done> {
979            self.state.done_reason = Some(DoneReason::CallIndirectHost {
980                id,
981                resume: self.pc.as_ptr(),
982            });
983            ControlFlow::Break(Done { _priv: () })
984        }
985
986        /// Finishes execution by recording `DoneReason::ReturnToHost`.
987        #[cold]
988        pub fn done_return_to_host(&mut self) -> ControlFlow<Done> {
989            self.state.done_reason = Some(DoneReason::ReturnToHost(()));
990            ControlFlow::Break(Done { _priv: () })
991        }
992    }
993}
994
995use done::Done;
996pub use done::{DoneReason, TrapKind};
997
998struct Interpreter<'a> {
999    state: &'a mut MachineState,
1000    pc: UnsafeBytecodeStream,
1001    executing_pc: ExecutingPcRef<'a>,
1002}
1003
1004impl Interpreter<'_> {
1005    /// Performs a relative jump of `offset` bytes from the current instruction.
1006    ///
1007    /// This will jump from the start of the current instruction, identified by
1008    /// `I`, `offset` bytes away. Note that the `self.pc` at the start of this
1009    /// function actually points to the instruction after this one so `I` is
1010    /// necessary to go back to ourselves after which we then go `offset` away.
1011    #[inline]
1012    fn pc_rel_jump<I: Encode>(&mut self, offset: PcRelOffset) -> ControlFlow<Done> {
1013        let offset = isize::try_from(i32::from(offset)).unwrap();
1014        let my_pc = self.current_pc::<I>();
1015        self.pc = unsafe { UnsafeBytecodeStream::new(my_pc.offset(offset)) };
1016        ControlFlow::Continue(())
1017    }
1018
1019    /// Returns the PC of the current instruction where `I` is the static type
1020    /// representing the current instruction.
1021    fn current_pc<I: Encode>(&self) -> NonNull<u8> {
1022        unsafe { self.pc.offset(-isize::from(I::WIDTH)).as_ptr() }
1023    }
1024
1025    /// `sp -= size_of::<T>(); *sp = val;`
1026    ///
1027    /// Note that `I` is the instruction which is pushing data to use if a trap
1028    /// is generated.
1029    #[must_use]
1030    fn push<I: Encode, T>(&mut self, val: T) -> ControlFlow<Done> {
1031        let new_sp = self.state[XReg::sp].get_ptr::<T>().wrapping_sub(1);
1032        self.set_sp::<I>(new_sp.cast())?;
1033        unsafe {
1034            new_sp.write_unaligned(val);
1035        }
1036        ControlFlow::Continue(())
1037    }
1038
1039    /// `ret = *sp; sp -= size_of::<T>()`
1040    fn pop<T>(&mut self) -> T {
1041        let sp = self.state[XReg::sp].get_ptr::<T>();
1042        let val = unsafe { sp.read_unaligned() };
1043        self.set_sp_unchecked(sp.wrapping_add(1));
1044        val
1045    }
1046
1047    /// Sets the stack pointer to the `sp` provided.
1048    ///
1049    /// Returns a trap if this would result in stack overflow, or if `sp` is
1050    /// beneath the base pointer of `self.state.stack`.
1051    ///
1052    /// The `I` parameter here is the instruction that is setting the stack
1053    /// pointer and is used to calculate this instruction's own `pc` if this
1054    /// instruction traps.
1055    #[must_use]
1056    fn set_sp<I: Encode>(&mut self, sp: *mut u8) -> ControlFlow<Done> {
1057        let sp_raw = sp as usize;
1058        let base_raw = self.state.stack.base() as usize;
1059        if sp_raw < base_raw {
1060            return self.done_trap::<I>();
1061        }
1062        self.set_sp_unchecked(sp);
1063        ControlFlow::Continue(())
1064    }
1065
1066    /// Same as `set_sp` but does not check to see if `sp` is in-bounds. Should
1067    /// only be used with stack increment operations such as `pop`.
1068    fn set_sp_unchecked<T>(&mut self, sp: *mut T) {
1069        if cfg!(debug_assertions) {
1070            let sp_raw = sp as usize;
1071            let base = self.state.stack.base() as usize;
1072            let end = base + self.state.stack.len();
1073            assert!(base <= sp_raw && sp_raw <= end);
1074        }
1075        self.state[XReg::sp].set_ptr(sp);
1076    }
1077
1078    /// Loads a value of `T` using native-endian byte ordering from the `addr`
1079    /// specified.
1080    ///
1081    /// The `I` type parameter is the instruction issuing this load which is
1082    /// used in case of traps to calculate the trapping pc.
1083    ///
1084    /// Returns `ControlFlow::Break` if a trap happens or
1085    /// `ControlFlow::Continue` if the value was loaded successfully.
1086    ///
1087    /// # Unsafety
1088    ///
1089    /// Safety of this method relies on the safety of the original bytecode
1090    /// itself and correctly annotating both `T` and `I`.
1091    #[must_use]
1092    unsafe fn load_ne<T, I: Encode>(&mut self, addr: impl AddressingMode) -> ControlFlow<Done, T> {
1093        unsafe { addr.load_ne::<T, I>(self) }
1094    }
1095
1096    /// Stores a `val` to the `addr` specified.
1097    ///
1098    /// The `I` type parameter is the instruction issuing this store which is
1099    /// used in case of traps to calculate the trapping pc.
1100    ///
1101    /// Returns `ControlFlow::Break` if a trap happens or
1102    /// `ControlFlow::Continue` if the value was stored successfully.
1103    ///
1104    /// # Unsafety
1105    ///
1106    /// Safety of this method relies on the safety of the original bytecode
1107    /// itself and correctly annotating both `T` and `I`.
1108    #[must_use]
1109    unsafe fn store_ne<T, I: Encode>(
1110        &mut self,
1111        addr: impl AddressingMode,
1112        val: T,
1113    ) -> ControlFlow<Done> {
1114        unsafe { addr.store_ne::<T, I>(self, val) }
1115    }
1116
1117    fn check_xnn_from_fnn<I: Encode>(&mut self, val: f64, lo: f64, hi: f64) -> ControlFlow<Done> {
1118        if val != val {
1119            return self.done_trap_kind::<I>(Some(TrapKind::BadConversionToInteger));
1120        }
1121        let val = val.wasm_trunc();
1122        if val <= lo || val >= hi {
1123            return self.done_trap_kind::<I>(Some(TrapKind::IntegerOverflow));
1124        }
1125        ControlFlow::Continue(())
1126    }
1127
1128    fn get_i128(&self, lo: XReg, hi: XReg) -> i128 {
1129        let lo = self.state[lo].get_u64();
1130        let hi = self.state[hi].get_i64();
1131        i128::from(lo) | (i128::from(hi) << 64)
1132    }
1133
1134    fn set_i128(&mut self, lo: XReg, hi: XReg, val: i128) {
1135        self.state[lo].set_u64(val as u64);
1136        self.state[hi].set_u64((val >> 64) as u64);
1137    }
1138
1139    fn record_executing_pc_for_profiling(&mut self) {
1140        // Note that this is a no-op if `feature = "profile"` is disabled.
1141        self.executing_pc.record(self.pc.as_ptr().as_ptr() as usize);
1142    }
1143}
1144
1145/// Helper trait to encompass the various addressing modes of Pulley.
1146trait AddressingMode: Sized {
1147    /// Calculates the native host address `*mut T` corresponding to this
1148    /// addressing mode.
1149    ///
1150    /// # Safety
1151    ///
1152    /// Relies on the original bytecode being safe to execute as this will
1153    /// otherwise perform unsafe byte offsets for example which requires the
1154    /// original bytecode to be correct.
1155    #[must_use]
1156    unsafe fn addr<T, I: Encode>(self, i: &mut Interpreter<'_>) -> ControlFlow<Done, *mut T>;
1157
1158    /// Loads a value of `T` from this address, using native-endian byte order.
1159    ///
1160    /// For more information see [`Interpreter::load_ne`].
1161    #[must_use]
1162    unsafe fn load_ne<T, I: Encode>(self, i: &mut Interpreter<'_>) -> ControlFlow<Done, T> {
1163        let ret = unsafe { self.addr::<T, I>(i)?.read_unaligned() };
1164        ControlFlow::Continue(ret)
1165    }
1166
1167    /// Stores a `val` to this address, using native-endian byte order.
1168    ///
1169    /// For more information see [`Interpreter::store_ne`].
1170    #[must_use]
1171    unsafe fn store_ne<T, I: Encode>(self, i: &mut Interpreter<'_>, val: T) -> ControlFlow<Done> {
1172        unsafe {
1173            self.addr::<T, I>(i)?.write_unaligned(val);
1174        }
1175        ControlFlow::Continue(())
1176    }
1177}
1178
1179impl AddressingMode for AddrO32 {
1180    unsafe fn addr<T, I: Encode>(self, i: &mut Interpreter<'_>) -> ControlFlow<Done, *mut T> {
1181        // Note that this addressing mode cannot return `ControlFlow::Break`
1182        // which is intentional. It's expected that LLVM optimizes away any
1183        // branches callers have.
1184        unsafe {
1185            ControlFlow::Continue(
1186                i.state[self.addr]
1187                    .get_ptr::<T>()
1188                    .byte_offset(self.offset as isize),
1189            )
1190        }
1191    }
1192}
1193
1194impl AddressingMode for AddrZ {
1195    unsafe fn addr<T, I: Encode>(self, i: &mut Interpreter<'_>) -> ControlFlow<Done, *mut T> {
1196        // This addressing mode defines loading/storing to the null address as
1197        // a trap, but all other addresses are allowed.
1198        let host_addr = i.state[self.addr].get_ptr::<T>();
1199        if host_addr.is_null() {
1200            i.done_trap_kind::<I>(Some(TrapKind::MemoryOutOfBounds))?;
1201            unreachable!();
1202        }
1203        unsafe {
1204            let addr = host_addr.byte_offset(self.offset as isize);
1205            ControlFlow::Continue(addr)
1206        }
1207    }
1208}
1209
1210impl AddressingMode for AddrG32 {
1211    unsafe fn addr<T, I: Encode>(self, i: &mut Interpreter<'_>) -> ControlFlow<Done, *mut T> {
1212        // Test if `bound - offset - T` is less than the wasm address to
1213        // generate a trap. It's a guarantee of this instruction that these
1214        // subtractions don't overflow.
1215        let bound = i.state[self.host_heap_bound].get_u64() as usize;
1216        let offset = usize::from(self.offset);
1217        let wasm_addr = i.state[self.wasm_addr].get_u32() as usize;
1218        if wasm_addr > bound - offset - size_of::<T>() {
1219            i.done_trap_kind::<I>(Some(TrapKind::MemoryOutOfBounds))?;
1220            unreachable!();
1221        }
1222        unsafe {
1223            let addr = i.state[self.host_heap_base]
1224                .get_ptr::<T>()
1225                .byte_add(wasm_addr)
1226                .byte_add(offset);
1227            ControlFlow::Continue(addr)
1228        }
1229    }
1230}
1231
1232impl AddressingMode for AddrG32Bne {
1233    unsafe fn addr<T, I: Encode>(self, i: &mut Interpreter<'_>) -> ControlFlow<Done, *mut T> {
1234        // Same as `AddrG32` above except that the bound is loaded from memory.
1235        let bound = unsafe {
1236            *i.state[self.host_heap_bound_addr]
1237                .get_ptr::<usize>()
1238                .byte_add(usize::from(self.host_heap_bound_offset))
1239        };
1240        let wasm_addr = i.state[self.wasm_addr].get_u32() as usize;
1241        let offset = usize::from(self.offset);
1242        if wasm_addr > bound - offset - size_of::<T>() {
1243            i.done_trap_kind::<I>(Some(TrapKind::MemoryOutOfBounds))?;
1244            unreachable!();
1245        }
1246        unsafe {
1247            let addr = i.state[self.host_heap_base]
1248                .get_ptr::<T>()
1249                .byte_add(wasm_addr)
1250                .byte_add(offset);
1251            ControlFlow::Continue(addr)
1252        }
1253    }
1254}
1255
1256#[test]
1257fn simple_push_pop() {
1258    let mut state = MachineState::with_stack(16);
1259    let pc = ExecutingPc::default();
1260    unsafe {
1261        let mut bytecode = [0; 10];
1262        let mut i = Interpreter {
1263            state: &mut state,
1264            // this isn't actually read so just manufacture a dummy one
1265            pc: UnsafeBytecodeStream::new(NonNull::new(bytecode.as_mut_ptr().offset(4)).unwrap()),
1266            executing_pc: pc.as_ref(),
1267        };
1268        assert!(i.push::<crate::Ret, _>(0_i32).is_continue());
1269        assert_eq!(i.pop::<i32>(), 0_i32);
1270        assert!(i.push::<crate::Ret, _>(1_i32).is_continue());
1271        assert!(i.push::<crate::Ret, _>(2_i32).is_continue());
1272        assert!(i.push::<crate::Ret, _>(3_i32).is_continue());
1273        assert!(i.push::<crate::Ret, _>(4_i32).is_continue());
1274        assert!(i.push::<crate::Ret, _>(5_i32).is_break());
1275        assert!(i.push::<crate::Ret, _>(6_i32).is_break());
1276        assert_eq!(i.pop::<i32>(), 4_i32);
1277        assert_eq!(i.pop::<i32>(), 3_i32);
1278        assert_eq!(i.pop::<i32>(), 2_i32);
1279        assert_eq!(i.pop::<i32>(), 1_i32);
1280    }
1281}
1282
1283macro_rules! br_if_imm {
1284    ($(
1285        fn $snake:ident(&mut self, a: XReg, b: $imm:ident, offset: PcRelOffset)
1286            = $camel:ident / $op:tt / $get:ident;
1287    )*) => {$(
1288        fn $snake(&mut self, a: XReg, b: $imm, offset: PcRelOffset) -> ControlFlow<Done> {
1289            let a = self.state[a].$get();
1290            if a $op b.into() {
1291                self.pc_rel_jump::<crate::$camel>(offset)
1292            } else {
1293                ControlFlow::Continue(())
1294            }
1295        }
1296    )*};
1297}
1298
1299impl OpVisitor for Interpreter<'_> {
1300    type BytecodeStream = UnsafeBytecodeStream;
1301    type Return = ControlFlow<Done>;
1302
1303    fn bytecode(&mut self) -> &mut UnsafeBytecodeStream {
1304        &mut self.pc
1305    }
1306
1307    fn ret(&mut self) -> ControlFlow<Done> {
1308        let lr = self.state.lr;
1309        if lr == HOST_RETURN_ADDR {
1310            self.done_return_to_host()
1311        } else {
1312            self.pc = unsafe { UnsafeBytecodeStream::new(NonNull::new_unchecked(lr)) };
1313            ControlFlow::Continue(())
1314        }
1315    }
1316
1317    fn call(&mut self, offset: PcRelOffset) -> ControlFlow<Done> {
1318        let return_addr = self.pc.as_ptr();
1319        self.state.lr = return_addr.as_ptr();
1320        self.pc_rel_jump::<crate::Call>(offset);
1321        ControlFlow::Continue(())
1322    }
1323
1324    fn call1(&mut self, arg1: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1325        let return_addr = self.pc.as_ptr();
1326        self.state.lr = return_addr.as_ptr();
1327        self.state[XReg::x0] = self.state[arg1];
1328        self.pc_rel_jump::<crate::Call1>(offset);
1329        ControlFlow::Continue(())
1330    }
1331
1332    fn call2(&mut self, arg1: XReg, arg2: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1333        let return_addr = self.pc.as_ptr();
1334        self.state.lr = return_addr.as_ptr();
1335        let (x0, x1) = (self.state[arg1], self.state[arg2]);
1336        self.state[XReg::x0] = x0;
1337        self.state[XReg::x1] = x1;
1338        self.pc_rel_jump::<crate::Call2>(offset);
1339        ControlFlow::Continue(())
1340    }
1341
1342    fn call3(
1343        &mut self,
1344        arg1: XReg,
1345        arg2: XReg,
1346        arg3: XReg,
1347        offset: PcRelOffset,
1348    ) -> ControlFlow<Done> {
1349        let return_addr = self.pc.as_ptr();
1350        self.state.lr = return_addr.as_ptr();
1351        let (x0, x1, x2) = (self.state[arg1], self.state[arg2], self.state[arg3]);
1352        self.state[XReg::x0] = x0;
1353        self.state[XReg::x1] = x1;
1354        self.state[XReg::x2] = x2;
1355        self.pc_rel_jump::<crate::Call3>(offset);
1356        ControlFlow::Continue(())
1357    }
1358
1359    fn call4(
1360        &mut self,
1361        arg1: XReg,
1362        arg2: XReg,
1363        arg3: XReg,
1364        arg4: XReg,
1365        offset: PcRelOffset,
1366    ) -> ControlFlow<Done> {
1367        let return_addr = self.pc.as_ptr();
1368        self.state.lr = return_addr.as_ptr();
1369        let (x0, x1, x2, x3) = (
1370            self.state[arg1],
1371            self.state[arg2],
1372            self.state[arg3],
1373            self.state[arg4],
1374        );
1375        self.state[XReg::x0] = x0;
1376        self.state[XReg::x1] = x1;
1377        self.state[XReg::x2] = x2;
1378        self.state[XReg::x3] = x3;
1379        self.pc_rel_jump::<crate::Call4>(offset);
1380        ControlFlow::Continue(())
1381    }
1382
1383    fn call_indirect(&mut self, dst: XReg) -> ControlFlow<Done> {
1384        let return_addr = self.pc.as_ptr();
1385        self.state.lr = return_addr.as_ptr();
1386        // SAFETY: part of the unsafe contract of the interpreter is only valid
1387        // bytecode is interpreted, so the jump destination is part of the validity
1388        // of the bytecode itself.
1389        unsafe {
1390            self.pc = UnsafeBytecodeStream::new(NonNull::new_unchecked(self.state[dst].get_ptr()));
1391        }
1392        ControlFlow::Continue(())
1393    }
1394
1395    fn jump(&mut self, offset: PcRelOffset) -> ControlFlow<Done> {
1396        self.pc_rel_jump::<crate::Jump>(offset);
1397        ControlFlow::Continue(())
1398    }
1399
1400    fn xjump(&mut self, reg: XReg) -> ControlFlow<Done> {
1401        unsafe {
1402            self.pc = UnsafeBytecodeStream::new(NonNull::new_unchecked(self.state[reg].get_ptr()));
1403        }
1404        ControlFlow::Continue(())
1405    }
1406
1407    fn br_if32(&mut self, cond: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1408        let cond = self.state[cond].get_u32();
1409        if cond != 0 {
1410            self.pc_rel_jump::<crate::BrIf>(offset)
1411        } else {
1412            ControlFlow::Continue(())
1413        }
1414    }
1415
1416    fn br_if_not32(&mut self, cond: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1417        let cond = self.state[cond].get_u32();
1418        if cond == 0 {
1419            self.pc_rel_jump::<crate::BrIfNot>(offset)
1420        } else {
1421            ControlFlow::Continue(())
1422        }
1423    }
1424
1425    fn br_if_xeq32(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1426        let a = self.state[a].get_u32();
1427        let b = self.state[b].get_u32();
1428        if a == b {
1429            self.pc_rel_jump::<crate::BrIfXeq32>(offset)
1430        } else {
1431            ControlFlow::Continue(())
1432        }
1433    }
1434
1435    fn br_if_xneq32(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1436        let a = self.state[a].get_u32();
1437        let b = self.state[b].get_u32();
1438        if a != b {
1439            self.pc_rel_jump::<crate::BrIfXneq32>(offset)
1440        } else {
1441            ControlFlow::Continue(())
1442        }
1443    }
1444
1445    fn br_if_xslt32(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1446        let a = self.state[a].get_i32();
1447        let b = self.state[b].get_i32();
1448        if a < b {
1449            self.pc_rel_jump::<crate::BrIfXslt32>(offset)
1450        } else {
1451            ControlFlow::Continue(())
1452        }
1453    }
1454
1455    fn br_if_xslteq32(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1456        let a = self.state[a].get_i32();
1457        let b = self.state[b].get_i32();
1458        if a <= b {
1459            self.pc_rel_jump::<crate::BrIfXslteq32>(offset)
1460        } else {
1461            ControlFlow::Continue(())
1462        }
1463    }
1464
1465    fn br_if_xult32(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1466        let a = self.state[a].get_u32();
1467        let b = self.state[b].get_u32();
1468        if a < b {
1469            self.pc_rel_jump::<crate::BrIfXult32>(offset)
1470        } else {
1471            ControlFlow::Continue(())
1472        }
1473    }
1474
1475    fn br_if_xulteq32(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1476        let a = self.state[a].get_u32();
1477        let b = self.state[b].get_u32();
1478        if a <= b {
1479            self.pc_rel_jump::<crate::BrIfXulteq32>(offset)
1480        } else {
1481            ControlFlow::Continue(())
1482        }
1483    }
1484
1485    fn br_if_xeq64(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1486        let a = self.state[a].get_u64();
1487        let b = self.state[b].get_u64();
1488        if a == b {
1489            self.pc_rel_jump::<crate::BrIfXeq64>(offset)
1490        } else {
1491            ControlFlow::Continue(())
1492        }
1493    }
1494
1495    fn br_if_xneq64(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1496        let a = self.state[a].get_u64();
1497        let b = self.state[b].get_u64();
1498        if a != b {
1499            self.pc_rel_jump::<crate::BrIfXneq64>(offset)
1500        } else {
1501            ControlFlow::Continue(())
1502        }
1503    }
1504
1505    fn br_if_xslt64(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1506        let a = self.state[a].get_i64();
1507        let b = self.state[b].get_i64();
1508        if a < b {
1509            self.pc_rel_jump::<crate::BrIfXslt64>(offset)
1510        } else {
1511            ControlFlow::Continue(())
1512        }
1513    }
1514
1515    fn br_if_xslteq64(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1516        let a = self.state[a].get_i64();
1517        let b = self.state[b].get_i64();
1518        if a <= b {
1519            self.pc_rel_jump::<crate::BrIfXslteq64>(offset)
1520        } else {
1521            ControlFlow::Continue(())
1522        }
1523    }
1524
1525    fn br_if_xult64(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1526        let a = self.state[a].get_u64();
1527        let b = self.state[b].get_u64();
1528        if a < b {
1529            self.pc_rel_jump::<crate::BrIfXult64>(offset)
1530        } else {
1531            ControlFlow::Continue(())
1532        }
1533    }
1534
1535    fn br_if_xulteq64(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1536        let a = self.state[a].get_u64();
1537        let b = self.state[b].get_u64();
1538        if a <= b {
1539            self.pc_rel_jump::<crate::BrIfXulteq64>(offset)
1540        } else {
1541            ControlFlow::Continue(())
1542        }
1543    }
1544
1545    br_if_imm! {
1546        fn br_if_xeq32_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1547            = BrIfXeq32I8 / == / get_i32;
1548        fn br_if_xeq32_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1549            = BrIfXeq32I32 / == / get_i32;
1550        fn br_if_xneq32_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1551            = BrIfXneq32I8 / != / get_i32;
1552        fn br_if_xneq32_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1553            = BrIfXneq32I32 / != / get_i32;
1554
1555        fn br_if_xslt32_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1556            = BrIfXslt32I8 / < / get_i32;
1557        fn br_if_xslt32_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1558            = BrIfXslt32I32 / < / get_i32;
1559        fn br_if_xsgt32_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1560            = BrIfXsgt32I8 / > / get_i32;
1561        fn br_if_xsgt32_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1562            = BrIfXsgt32I32 / > / get_i32;
1563        fn br_if_xslteq32_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1564            = BrIfXslteq32I8 / <= / get_i32;
1565        fn br_if_xslteq32_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1566            = BrIfXslteq32I32 / <= / get_i32;
1567        fn br_if_xsgteq32_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1568            = BrIfXsgteq32I8 / >= / get_i32;
1569        fn br_if_xsgteq32_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1570            = BrIfXsgteq32I32 / >= / get_i32;
1571
1572        fn br_if_xult32_u8(&mut self, a: XReg, b: u8, offset: PcRelOffset)
1573            = BrIfXult32U8 / < / get_u32;
1574        fn br_if_xult32_u32(&mut self, a: XReg, b: u32, offset: PcRelOffset)
1575            = BrIfXult32U32 / < / get_u32;
1576        fn br_if_xugt32_u8(&mut self, a: XReg, b: u8, offset: PcRelOffset)
1577            = BrIfXugt32U8 / > / get_u32;
1578        fn br_if_xugt32_u32(&mut self, a: XReg, b: u32, offset: PcRelOffset)
1579            = BrIfXugt32U32 / > / get_u32;
1580        fn br_if_xulteq32_u8(&mut self, a: XReg, b: u8, offset: PcRelOffset)
1581            = BrIfXulteq32U8 / <= / get_u32;
1582        fn br_if_xulteq32_u32(&mut self, a: XReg, b: u32, offset: PcRelOffset)
1583            = BrIfXulteq32U32 / <= / get_u32;
1584        fn br_if_xugteq32_u8(&mut self, a: XReg, b: u8, offset: PcRelOffset)
1585            = BrIfXugteq32U8 / >= / get_u32;
1586        fn br_if_xugteq32_u32(&mut self, a: XReg, b: u32, offset: PcRelOffset)
1587            = BrIfXugteq32U32 / >= / get_u32;
1588
1589        fn br_if_xeq64_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1590            = BrIfXeq64I8 / == / get_i64;
1591        fn br_if_xeq64_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1592            = BrIfXeq64I32 / == / get_i64;
1593        fn br_if_xneq64_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1594            = BrIfXneq64I8 / != / get_i64;
1595        fn br_if_xneq64_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1596            = BrIfXneq64I32 / != / get_i64;
1597
1598        fn br_if_xslt64_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1599            = BrIfXslt64I8 / < / get_i64;
1600        fn br_if_xslt64_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1601            = BrIfXslt64I32 / < / get_i64;
1602        fn br_if_xsgt64_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1603            = BrIfXsgt64I8 / > / get_i64;
1604        fn br_if_xsgt64_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1605            = BrIfXsgt64I32 / > / get_i64;
1606        fn br_if_xslteq64_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1607            = BrIfXslteq64I8 / <= / get_i64;
1608        fn br_if_xslteq64_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1609            = BrIfXslteq64I32 / <= / get_i64;
1610        fn br_if_xsgteq64_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1611            = BrIfXsgteq64I8 / >= / get_i64;
1612        fn br_if_xsgteq64_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1613            = BrIfXsgteq64I32 / >= / get_i64;
1614
1615        fn br_if_xult64_u8(&mut self, a: XReg, b: u8, offset: PcRelOffset)
1616            = BrIfXult64U8 / < / get_u64;
1617        fn br_if_xult64_u32(&mut self, a: XReg, b: u32, offset: PcRelOffset)
1618            = BrIfXult64U32 / < / get_u64;
1619        fn br_if_xugt64_u8(&mut self, a: XReg, b: u8, offset: PcRelOffset)
1620            = BrIfXugt64U8 / > / get_u64;
1621        fn br_if_xugt64_u32(&mut self, a: XReg, b: u32, offset: PcRelOffset)
1622            = BrIfXugt64U32 / > / get_u64;
1623        fn br_if_xulteq64_u8(&mut self, a: XReg, b: u8, offset: PcRelOffset)
1624            = BrIfXulteq64U8 / <= / get_u64;
1625        fn br_if_xulteq64_u32(&mut self, a: XReg, b: u32, offset: PcRelOffset)
1626            = BrIfXulteq64U32 / <= / get_u64;
1627        fn br_if_xugteq64_u8(&mut self, a: XReg, b: u8, offset: PcRelOffset)
1628            = BrIfXugteq64U8 / >= / get_u64;
1629        fn br_if_xugteq64_u32(&mut self, a: XReg, b: u32, offset: PcRelOffset)
1630            = BrIfXugteq64U32 / >= / get_u64;
1631    }
1632
1633    fn xmov(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
1634        let val = self.state[src];
1635        self.state[dst] = val;
1636        ControlFlow::Continue(())
1637    }
1638
1639    fn xconst8(&mut self, dst: XReg, imm: i8) -> ControlFlow<Done> {
1640        self.state[dst].set_i64(i64::from(imm));
1641        ControlFlow::Continue(())
1642    }
1643
1644    fn xzero(&mut self, dst: XReg) -> ControlFlow<Done> {
1645        self.state[dst].set_i64(0);
1646        ControlFlow::Continue(())
1647    }
1648
1649    fn xone(&mut self, dst: XReg) -> ControlFlow<Done> {
1650        self.state[dst].set_i64(1);
1651        ControlFlow::Continue(())
1652    }
1653
1654    fn xconst16(&mut self, dst: XReg, imm: i16) -> ControlFlow<Done> {
1655        self.state[dst].set_i64(i64::from(imm));
1656        ControlFlow::Continue(())
1657    }
1658
1659    fn xconst32(&mut self, dst: XReg, imm: i32) -> ControlFlow<Done> {
1660        self.state[dst].set_i64(i64::from(imm));
1661        ControlFlow::Continue(())
1662    }
1663
1664    fn xconst64(&mut self, dst: XReg, imm: i64) -> ControlFlow<Done> {
1665        self.state[dst].set_i64(imm);
1666        ControlFlow::Continue(())
1667    }
1668
1669    fn xadd32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1670        let a = self.state[operands.src1].get_u32();
1671        let b = self.state[operands.src2].get_u32();
1672        self.state[operands.dst].set_u32(a.wrapping_add(b));
1673        ControlFlow::Continue(())
1674    }
1675
1676    fn xadd32_u8(&mut self, dst: XReg, src1: XReg, src2: u8) -> ControlFlow<Done> {
1677        self.xadd32_u32(dst, src1, src2.into())
1678    }
1679
1680    fn xadd32_u32(&mut self, dst: XReg, src1: XReg, src2: u32) -> ControlFlow<Done> {
1681        let a = self.state[src1].get_u32();
1682        self.state[dst].set_u32(a.wrapping_add(src2.into()));
1683        ControlFlow::Continue(())
1684    }
1685
1686    fn xadd64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1687        let a = self.state[operands.src1].get_u64();
1688        let b = self.state[operands.src2].get_u64();
1689        self.state[operands.dst].set_u64(a.wrapping_add(b));
1690        ControlFlow::Continue(())
1691    }
1692
1693    fn xadd64_u8(&mut self, dst: XReg, src1: XReg, src2: u8) -> ControlFlow<Done> {
1694        self.xadd64_u32(dst, src1, src2.into())
1695    }
1696
1697    fn xadd64_u32(&mut self, dst: XReg, src1: XReg, src2: u32) -> ControlFlow<Done> {
1698        let a = self.state[src1].get_u64();
1699        self.state[dst].set_u64(a.wrapping_add(src2.into()));
1700        ControlFlow::Continue(())
1701    }
1702
1703    fn xmadd32(&mut self, dst: XReg, src1: XReg, src2: XReg, src3: XReg) -> ControlFlow<Done> {
1704        let a = self.state[src1].get_u32();
1705        let b = self.state[src2].get_u32();
1706        let c = self.state[src3].get_u32();
1707        self.state[dst].set_u32(a.wrapping_mul(b).wrapping_add(c));
1708        ControlFlow::Continue(())
1709    }
1710
1711    fn xmadd64(&mut self, dst: XReg, src1: XReg, src2: XReg, src3: XReg) -> ControlFlow<Done> {
1712        let a = self.state[src1].get_u64();
1713        let b = self.state[src2].get_u64();
1714        let c = self.state[src3].get_u64();
1715        self.state[dst].set_u64(a.wrapping_mul(b).wrapping_add(c));
1716        ControlFlow::Continue(())
1717    }
1718
1719    fn xsub32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1720        let a = self.state[operands.src1].get_u32();
1721        let b = self.state[operands.src2].get_u32();
1722        self.state[operands.dst].set_u32(a.wrapping_sub(b));
1723        ControlFlow::Continue(())
1724    }
1725
1726    fn xsub32_u8(&mut self, dst: XReg, src1: XReg, src2: u8) -> ControlFlow<Done> {
1727        self.xsub32_u32(dst, src1, src2.into())
1728    }
1729
1730    fn xsub32_u32(&mut self, dst: XReg, src1: XReg, src2: u32) -> ControlFlow<Done> {
1731        let a = self.state[src1].get_u32();
1732        self.state[dst].set_u32(a.wrapping_sub(src2.into()));
1733        ControlFlow::Continue(())
1734    }
1735
1736    fn xsub64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1737        let a = self.state[operands.src1].get_u64();
1738        let b = self.state[operands.src2].get_u64();
1739        self.state[operands.dst].set_u64(a.wrapping_sub(b));
1740        ControlFlow::Continue(())
1741    }
1742
1743    fn xsub64_u8(&mut self, dst: XReg, src1: XReg, src2: u8) -> ControlFlow<Done> {
1744        self.xsub64_u32(dst, src1, src2.into())
1745    }
1746
1747    fn xsub64_u32(&mut self, dst: XReg, src1: XReg, src2: u32) -> ControlFlow<Done> {
1748        let a = self.state[src1].get_u64();
1749        self.state[dst].set_u64(a.wrapping_sub(src2.into()));
1750        ControlFlow::Continue(())
1751    }
1752
1753    fn xmul32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1754        let a = self.state[operands.src1].get_u32();
1755        let b = self.state[operands.src2].get_u32();
1756        self.state[operands.dst].set_u32(a.wrapping_mul(b));
1757        ControlFlow::Continue(())
1758    }
1759
1760    fn xmul32_s8(&mut self, dst: XReg, src1: XReg, src2: i8) -> ControlFlow<Done> {
1761        self.xmul32_s32(dst, src1, src2.into())
1762    }
1763
1764    fn xmul32_s32(&mut self, dst: XReg, src1: XReg, src2: i32) -> ControlFlow<Done> {
1765        let a = self.state[src1].get_i32();
1766        self.state[dst].set_i32(a.wrapping_mul(src2));
1767        ControlFlow::Continue(())
1768    }
1769
1770    fn xmul64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1771        let a = self.state[operands.src1].get_u64();
1772        let b = self.state[operands.src2].get_u64();
1773        self.state[operands.dst].set_u64(a.wrapping_mul(b));
1774        ControlFlow::Continue(())
1775    }
1776
1777    fn xmul64_s8(&mut self, dst: XReg, src1: XReg, src2: i8) -> ControlFlow<Done> {
1778        self.xmul64_s32(dst, src1, src2.into())
1779    }
1780
1781    fn xmul64_s32(&mut self, dst: XReg, src1: XReg, src2: i32) -> ControlFlow<Done> {
1782        let a = self.state[src1].get_i64();
1783        self.state[dst].set_i64(a.wrapping_mul(src2.into()));
1784        ControlFlow::Continue(())
1785    }
1786
1787    fn xshl32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1788        let a = self.state[operands.src1].get_u32();
1789        let b = self.state[operands.src2].get_u32();
1790        self.state[operands.dst].set_u32(a.wrapping_shl(b));
1791        ControlFlow::Continue(())
1792    }
1793
1794    fn xshr32_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1795        let a = self.state[operands.src1].get_u32();
1796        let b = self.state[operands.src2].get_u32();
1797        self.state[operands.dst].set_u32(a.wrapping_shr(b));
1798        ControlFlow::Continue(())
1799    }
1800
1801    fn xshr32_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1802        let a = self.state[operands.src1].get_i32();
1803        let b = self.state[operands.src2].get_u32();
1804        self.state[operands.dst].set_i32(a.wrapping_shr(b));
1805        ControlFlow::Continue(())
1806    }
1807
1808    fn xshl64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1809        let a = self.state[operands.src1].get_u64();
1810        let b = self.state[operands.src2].get_u32();
1811        self.state[operands.dst].set_u64(a.wrapping_shl(b));
1812        ControlFlow::Continue(())
1813    }
1814
1815    fn xshr64_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1816        let a = self.state[operands.src1].get_u64();
1817        let b = self.state[operands.src2].get_u32();
1818        self.state[operands.dst].set_u64(a.wrapping_shr(b));
1819        ControlFlow::Continue(())
1820    }
1821
1822    fn xshr64_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1823        let a = self.state[operands.src1].get_i64();
1824        let b = self.state[operands.src2].get_u32();
1825        self.state[operands.dst].set_i64(a.wrapping_shr(b));
1826        ControlFlow::Continue(())
1827    }
1828
1829    fn xshl32_u6(&mut self, operands: BinaryOperands<XReg, XReg, U6>) -> ControlFlow<Done> {
1830        let a = self.state[operands.src1].get_u32();
1831        let b = u32::from(u8::from(operands.src2));
1832        self.state[operands.dst].set_u32(a.wrapping_shl(b));
1833        ControlFlow::Continue(())
1834    }
1835
1836    fn xshr32_u_u6(&mut self, operands: BinaryOperands<XReg, XReg, U6>) -> ControlFlow<Done> {
1837        let a = self.state[operands.src1].get_u32();
1838        let b = u32::from(u8::from(operands.src2));
1839        self.state[operands.dst].set_u32(a.wrapping_shr(b));
1840        ControlFlow::Continue(())
1841    }
1842
1843    fn xshr32_s_u6(&mut self, operands: BinaryOperands<XReg, XReg, U6>) -> ControlFlow<Done> {
1844        let a = self.state[operands.src1].get_i32();
1845        let b = u32::from(u8::from(operands.src2));
1846        self.state[operands.dst].set_i32(a.wrapping_shr(b));
1847        ControlFlow::Continue(())
1848    }
1849
1850    fn xshl64_u6(&mut self, operands: BinaryOperands<XReg, XReg, U6>) -> ControlFlow<Done> {
1851        let a = self.state[operands.src1].get_u64();
1852        let b = u32::from(u8::from(operands.src2));
1853        self.state[operands.dst].set_u64(a.wrapping_shl(b));
1854        ControlFlow::Continue(())
1855    }
1856
1857    fn xshr64_u_u6(&mut self, operands: BinaryOperands<XReg, XReg, U6>) -> ControlFlow<Done> {
1858        let a = self.state[operands.src1].get_u64();
1859        let b = u32::from(u8::from(operands.src2));
1860        self.state[operands.dst].set_u64(a.wrapping_shr(b));
1861        ControlFlow::Continue(())
1862    }
1863
1864    fn xshr64_s_u6(&mut self, operands: BinaryOperands<XReg, XReg, U6>) -> ControlFlow<Done> {
1865        let a = self.state[operands.src1].get_i64();
1866        let b = u32::from(u8::from(operands.src2));
1867        self.state[operands.dst].set_i64(a.wrapping_shr(b));
1868        ControlFlow::Continue(())
1869    }
1870
1871    fn xneg32(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
1872        let a = self.state[src].get_i32();
1873        self.state[dst].set_i32(a.wrapping_neg());
1874        ControlFlow::Continue(())
1875    }
1876
1877    fn xneg64(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
1878        let a = self.state[src].get_i64();
1879        self.state[dst].set_i64(a.wrapping_neg());
1880        ControlFlow::Continue(())
1881    }
1882
1883    fn xeq64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1884        let a = self.state[operands.src1].get_u64();
1885        let b = self.state[operands.src2].get_u64();
1886        self.state[operands.dst].set_u32(u32::from(a == b));
1887        ControlFlow::Continue(())
1888    }
1889
1890    fn xneq64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1891        let a = self.state[operands.src1].get_u64();
1892        let b = self.state[operands.src2].get_u64();
1893        self.state[operands.dst].set_u32(u32::from(a != b));
1894        ControlFlow::Continue(())
1895    }
1896
1897    fn xslt64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1898        let a = self.state[operands.src1].get_i64();
1899        let b = self.state[operands.src2].get_i64();
1900        self.state[operands.dst].set_u32(u32::from(a < b));
1901        ControlFlow::Continue(())
1902    }
1903
1904    fn xslteq64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1905        let a = self.state[operands.src1].get_i64();
1906        let b = self.state[operands.src2].get_i64();
1907        self.state[operands.dst].set_u32(u32::from(a <= b));
1908        ControlFlow::Continue(())
1909    }
1910
1911    fn xult64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1912        let a = self.state[operands.src1].get_u64();
1913        let b = self.state[operands.src2].get_u64();
1914        self.state[operands.dst].set_u32(u32::from(a < b));
1915        ControlFlow::Continue(())
1916    }
1917
1918    fn xulteq64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1919        let a = self.state[operands.src1].get_u64();
1920        let b = self.state[operands.src2].get_u64();
1921        self.state[operands.dst].set_u32(u32::from(a <= b));
1922        ControlFlow::Continue(())
1923    }
1924
1925    fn xeq32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1926        let a = self.state[operands.src1].get_u32();
1927        let b = self.state[operands.src2].get_u32();
1928        self.state[operands.dst].set_u32(u32::from(a == b));
1929        ControlFlow::Continue(())
1930    }
1931
1932    fn xneq32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1933        let a = self.state[operands.src1].get_u32();
1934        let b = self.state[operands.src2].get_u32();
1935        self.state[operands.dst].set_u32(u32::from(a != b));
1936        ControlFlow::Continue(())
1937    }
1938
1939    fn xslt32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1940        let a = self.state[operands.src1].get_i32();
1941        let b = self.state[operands.src2].get_i32();
1942        self.state[operands.dst].set_u32(u32::from(a < b));
1943        ControlFlow::Continue(())
1944    }
1945
1946    fn xslteq32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1947        let a = self.state[operands.src1].get_i32();
1948        let b = self.state[operands.src2].get_i32();
1949        self.state[operands.dst].set_u32(u32::from(a <= b));
1950        ControlFlow::Continue(())
1951    }
1952
1953    fn xult32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1954        let a = self.state[operands.src1].get_u32();
1955        let b = self.state[operands.src2].get_u32();
1956        self.state[operands.dst].set_u32(u32::from(a < b));
1957        ControlFlow::Continue(())
1958    }
1959
1960    fn xulteq32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1961        let a = self.state[operands.src1].get_u32();
1962        let b = self.state[operands.src2].get_u32();
1963        self.state[operands.dst].set_u32(u32::from(a <= b));
1964        ControlFlow::Continue(())
1965    }
1966
1967    fn push_frame(&mut self) -> ControlFlow<Done> {
1968        self.push::<crate::PushFrame, _>(self.state.lr)?;
1969        self.push::<crate::PushFrame, _>(self.state.fp)?;
1970        self.state.fp = self.state[XReg::sp].get_ptr();
1971        ControlFlow::Continue(())
1972    }
1973
1974    #[inline]
1975    fn push_frame_save(&mut self, amt: u16, regs: UpperRegSet<XReg>) -> ControlFlow<Done> {
1976        // Decrement the stack pointer `amt` bytes plus 2 pointers more for
1977        // fp/lr.
1978        let ptr_size = size_of::<usize>();
1979        let full_amt = usize::from(amt) + 2 * ptr_size;
1980        let new_sp = self.state[XReg::sp].get_ptr::<u8>().wrapping_sub(full_amt);
1981        self.set_sp::<crate::PushFrameSave>(new_sp)?;
1982
1983        unsafe {
1984            // Emulate `push_frame` by placing `lr` and `fp` onto the stack, in
1985            // that order, at the top of the allocated area.
1986            self.store_ne::<_, crate::PushFrameSave>(
1987                AddrO32 {
1988                    addr: XReg::sp,
1989                    offset: (full_amt - 1 * ptr_size) as i32,
1990                },
1991                self.state.lr,
1992            )?;
1993            self.store_ne::<_, crate::PushFrameSave>(
1994                AddrO32 {
1995                    addr: XReg::sp,
1996                    offset: (full_amt - 2 * ptr_size) as i32,
1997                },
1998                self.state.fp,
1999            )?;
2000
2001            // Set `fp` to the top of our frame, where `fp` is stored.
2002            let mut offset = amt as i32;
2003            self.state.fp = self.state[XReg::sp]
2004                .get_ptr::<u8>()
2005                .byte_offset(offset as isize);
2006
2007            // Next save any registers in `regs` to the stack.
2008            for reg in regs {
2009                offset -= 8;
2010                self.store_ne::<_, crate::PushFrameSave>(
2011                    AddrO32 {
2012                        addr: XReg::sp,
2013                        offset,
2014                    },
2015                    self.state[reg].get_u64(),
2016                )?;
2017            }
2018        }
2019        ControlFlow::Continue(())
2020    }
2021
2022    fn pop_frame_restore(&mut self, amt: u16, regs: UpperRegSet<XReg>) -> ControlFlow<Done> {
2023        // Restore all registers in `regs`, followed by the normal `pop_frame`
2024        // opcode below to restore fp/lr.
2025        unsafe {
2026            let mut offset = i32::from(amt);
2027            for reg in regs {
2028                offset -= 8;
2029                let val = self.load_ne::<_, crate::PopFrameRestore>(AddrO32 {
2030                    addr: XReg::sp,
2031                    offset,
2032                })?;
2033                self.state[reg].set_u64(val);
2034            }
2035        }
2036        self.pop_frame()
2037    }
2038
2039    fn pop_frame(&mut self) -> ControlFlow<Done> {
2040        self.set_sp_unchecked(self.state.fp);
2041        let fp = self.pop();
2042        let lr = self.pop();
2043        self.state.fp = fp;
2044        self.state.lr = lr;
2045        ControlFlow::Continue(())
2046    }
2047
2048    fn br_table32(&mut self, idx: XReg, amt: u32) -> ControlFlow<Done> {
2049        let idx = self.state[idx].get_u32().min(amt - 1) as isize;
2050        // SAFETY: part of the contract of the interpreter is only dealing with
2051        // valid bytecode, so this offset should be safe.
2052        self.pc = unsafe { self.pc.offset(idx * 4) };
2053
2054        // Decode the `PcRelOffset` without tampering with `self.pc` as the
2055        // jump is relative to `self.pc`.
2056        let mut tmp = self.pc;
2057        let Ok(rel) = PcRelOffset::decode(&mut tmp);
2058        let offset = isize::try_from(i32::from(rel)).unwrap();
2059        self.pc = unsafe { self.pc.offset(offset) };
2060        ControlFlow::Continue(())
2061    }
2062
2063    fn stack_alloc32(&mut self, amt: u32) -> ControlFlow<Done> {
2064        let amt = usize::try_from(amt).unwrap();
2065        let new_sp = self.state[XReg::sp].get_ptr::<u8>().wrapping_sub(amt);
2066        self.set_sp::<crate::StackAlloc32>(new_sp)?;
2067        ControlFlow::Continue(())
2068    }
2069
2070    fn stack_free32(&mut self, amt: u32) -> ControlFlow<Done> {
2071        let amt = usize::try_from(amt).unwrap();
2072        let new_sp = self.state[XReg::sp].get_ptr::<u8>().wrapping_add(amt);
2073        self.set_sp_unchecked(new_sp);
2074        ControlFlow::Continue(())
2075    }
2076
2077    fn zext8(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2078        let src = self.state[src].get_u64() as u8;
2079        self.state[dst].set_u64(src.into());
2080        ControlFlow::Continue(())
2081    }
2082
2083    fn zext16(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2084        let src = self.state[src].get_u64() as u16;
2085        self.state[dst].set_u64(src.into());
2086        ControlFlow::Continue(())
2087    }
2088
2089    fn zext32(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2090        let src = self.state[src].get_u64() as u32;
2091        self.state[dst].set_u64(src.into());
2092        ControlFlow::Continue(())
2093    }
2094
2095    fn sext8(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2096        let src = self.state[src].get_i64() as i8;
2097        self.state[dst].set_i64(src.into());
2098        ControlFlow::Continue(())
2099    }
2100
2101    fn sext16(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2102        let src = self.state[src].get_i64() as i16;
2103        self.state[dst].set_i64(src.into());
2104        ControlFlow::Continue(())
2105    }
2106
2107    fn sext32(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2108        let src = self.state[src].get_i64() as i32;
2109        self.state[dst].set_i64(src.into());
2110        ControlFlow::Continue(())
2111    }
2112
2113    fn xdiv32_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2114        let a = self.state[operands.src1].get_i32();
2115        let b = self.state[operands.src2].get_i32();
2116        match a.checked_div(b) {
2117            Some(result) => {
2118                self.state[operands.dst].set_i32(result);
2119                ControlFlow::Continue(())
2120            }
2121            None => {
2122                let kind = if b == 0 {
2123                    TrapKind::DivideByZero
2124                } else {
2125                    TrapKind::IntegerOverflow
2126                };
2127                self.done_trap_kind::<crate::XDiv32S>(Some(kind))
2128            }
2129        }
2130    }
2131
2132    fn xdiv64_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2133        let a = self.state[operands.src1].get_i64();
2134        let b = self.state[operands.src2].get_i64();
2135        match a.checked_div(b) {
2136            Some(result) => {
2137                self.state[operands.dst].set_i64(result);
2138                ControlFlow::Continue(())
2139            }
2140            None => {
2141                let kind = if b == 0 {
2142                    TrapKind::DivideByZero
2143                } else {
2144                    TrapKind::IntegerOverflow
2145                };
2146                self.done_trap_kind::<crate::XDiv64S>(Some(kind))
2147            }
2148        }
2149    }
2150
2151    fn xdiv32_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2152        let a = self.state[operands.src1].get_u32();
2153        let b = self.state[operands.src2].get_u32();
2154        match a.checked_div(b) {
2155            Some(result) => {
2156                self.state[operands.dst].set_u32(result);
2157                ControlFlow::Continue(())
2158            }
2159            None => self.done_trap_kind::<crate::XDiv64U>(Some(TrapKind::DivideByZero)),
2160        }
2161    }
2162
2163    fn xdiv64_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2164        let a = self.state[operands.src1].get_u64();
2165        let b = self.state[operands.src2].get_u64();
2166        match a.checked_div(b) {
2167            Some(result) => {
2168                self.state[operands.dst].set_u64(result);
2169                ControlFlow::Continue(())
2170            }
2171            None => self.done_trap_kind::<crate::XDiv64U>(Some(TrapKind::DivideByZero)),
2172        }
2173    }
2174
2175    fn xrem32_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2176        let a = self.state[operands.src1].get_i32();
2177        let b = self.state[operands.src2].get_i32();
2178        let result = if a == i32::MIN && b == -1 {
2179            Some(0)
2180        } else {
2181            a.checked_rem(b)
2182        };
2183        match result {
2184            Some(result) => {
2185                self.state[operands.dst].set_i32(result);
2186                ControlFlow::Continue(())
2187            }
2188            None => self.done_trap_kind::<crate::XRem32S>(Some(TrapKind::DivideByZero)),
2189        }
2190    }
2191
2192    fn xrem64_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2193        let a = self.state[operands.src1].get_i64();
2194        let b = self.state[operands.src2].get_i64();
2195        let result = if a == i64::MIN && b == -1 {
2196            Some(0)
2197        } else {
2198            a.checked_rem(b)
2199        };
2200        match result {
2201            Some(result) => {
2202                self.state[operands.dst].set_i64(result);
2203                ControlFlow::Continue(())
2204            }
2205            None => self.done_trap_kind::<crate::XRem64S>(Some(TrapKind::DivideByZero)),
2206        }
2207    }
2208
2209    fn xrem32_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2210        let a = self.state[operands.src1].get_u32();
2211        let b = self.state[operands.src2].get_u32();
2212        match a.checked_rem(b) {
2213            Some(result) => {
2214                self.state[operands.dst].set_u32(result);
2215                ControlFlow::Continue(())
2216            }
2217            None => self.done_trap_kind::<crate::XRem32U>(Some(TrapKind::DivideByZero)),
2218        }
2219    }
2220
2221    fn xrem64_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2222        let a = self.state[operands.src1].get_u64();
2223        let b = self.state[operands.src2].get_u64();
2224        match a.checked_rem(b) {
2225            Some(result) => {
2226                self.state[operands.dst].set_u64(result);
2227                ControlFlow::Continue(())
2228            }
2229            None => self.done_trap_kind::<crate::XRem64U>(Some(TrapKind::DivideByZero)),
2230        }
2231    }
2232
2233    fn xband32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2234        let a = self.state[operands.src1].get_u32();
2235        let b = self.state[operands.src2].get_u32();
2236        self.state[operands.dst].set_u32(a & b);
2237        ControlFlow::Continue(())
2238    }
2239
2240    fn xband32_s8(&mut self, dst: XReg, src1: XReg, src2: i8) -> ControlFlow<Done> {
2241        self.xband32_s32(dst, src1, src2.into())
2242    }
2243
2244    fn xband32_s32(&mut self, dst: XReg, src1: XReg, src2: i32) -> ControlFlow<Done> {
2245        let a = self.state[src1].get_i32();
2246        self.state[dst].set_i32(a & src2);
2247        ControlFlow::Continue(())
2248    }
2249
2250    fn xband64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2251        let a = self.state[operands.src1].get_u64();
2252        let b = self.state[operands.src2].get_u64();
2253        self.state[operands.dst].set_u64(a & b);
2254        ControlFlow::Continue(())
2255    }
2256
2257    fn xband64_s8(&mut self, dst: XReg, src1: XReg, src2: i8) -> ControlFlow<Done> {
2258        self.xband64_s32(dst, src1, src2.into())
2259    }
2260
2261    fn xband64_s32(&mut self, dst: XReg, src1: XReg, src2: i32) -> ControlFlow<Done> {
2262        let a = self.state[src1].get_i64();
2263        self.state[dst].set_i64(a & i64::from(src2));
2264        ControlFlow::Continue(())
2265    }
2266
2267    fn xbor32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2268        let a = self.state[operands.src1].get_u32();
2269        let b = self.state[operands.src2].get_u32();
2270        self.state[operands.dst].set_u32(a | b);
2271        ControlFlow::Continue(())
2272    }
2273
2274    fn xbor32_s8(&mut self, dst: XReg, src1: XReg, src2: i8) -> ControlFlow<Done> {
2275        self.xbor32_s32(dst, src1, src2.into())
2276    }
2277
2278    fn xbor32_s32(&mut self, dst: XReg, src1: XReg, src2: i32) -> ControlFlow<Done> {
2279        let a = self.state[src1].get_i32();
2280        self.state[dst].set_i32(a | src2);
2281        ControlFlow::Continue(())
2282    }
2283
2284    fn xbor64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2285        let a = self.state[operands.src1].get_u64();
2286        let b = self.state[operands.src2].get_u64();
2287        self.state[operands.dst].set_u64(a | b);
2288        ControlFlow::Continue(())
2289    }
2290
2291    fn xbor64_s8(&mut self, dst: XReg, src1: XReg, src2: i8) -> ControlFlow<Done> {
2292        self.xbor64_s32(dst, src1, src2.into())
2293    }
2294
2295    fn xbor64_s32(&mut self, dst: XReg, src1: XReg, src2: i32) -> ControlFlow<Done> {
2296        let a = self.state[src1].get_i64();
2297        self.state[dst].set_i64(a | i64::from(src2));
2298        ControlFlow::Continue(())
2299    }
2300
2301    fn xbxor32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2302        let a = self.state[operands.src1].get_u32();
2303        let b = self.state[operands.src2].get_u32();
2304        self.state[operands.dst].set_u32(a ^ b);
2305        ControlFlow::Continue(())
2306    }
2307
2308    fn xbxor32_s8(&mut self, dst: XReg, src1: XReg, src2: i8) -> ControlFlow<Done> {
2309        self.xbxor32_s32(dst, src1, src2.into())
2310    }
2311
2312    fn xbxor32_s32(&mut self, dst: XReg, src1: XReg, src2: i32) -> ControlFlow<Done> {
2313        let a = self.state[src1].get_i32();
2314        self.state[dst].set_i32(a ^ src2);
2315        ControlFlow::Continue(())
2316    }
2317
2318    fn xbxor64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2319        let a = self.state[operands.src1].get_u64();
2320        let b = self.state[operands.src2].get_u64();
2321        self.state[operands.dst].set_u64(a ^ b);
2322        ControlFlow::Continue(())
2323    }
2324
2325    fn xbxor64_s8(&mut self, dst: XReg, src1: XReg, src2: i8) -> ControlFlow<Done> {
2326        self.xbxor64_s32(dst, src1, src2.into())
2327    }
2328
2329    fn xbxor64_s32(&mut self, dst: XReg, src1: XReg, src2: i32) -> ControlFlow<Done> {
2330        let a = self.state[src1].get_i64();
2331        self.state[dst].set_i64(a ^ i64::from(src2));
2332        ControlFlow::Continue(())
2333    }
2334
2335    fn xbnot32(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2336        let a = self.state[src].get_u32();
2337        self.state[dst].set_u32(!a);
2338        ControlFlow::Continue(())
2339    }
2340
2341    fn xbnot64(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2342        let a = self.state[src].get_u64();
2343        self.state[dst].set_u64(!a);
2344        ControlFlow::Continue(())
2345    }
2346
2347    fn xmin32_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2348        let a = self.state[operands.src1].get_u32();
2349        let b = self.state[operands.src2].get_u32();
2350        self.state[operands.dst].set_u32(a.min(b));
2351        ControlFlow::Continue(())
2352    }
2353
2354    fn xmin32_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2355        let a = self.state[operands.src1].get_i32();
2356        let b = self.state[operands.src2].get_i32();
2357        self.state[operands.dst].set_i32(a.min(b));
2358        ControlFlow::Continue(())
2359    }
2360
2361    fn xmax32_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2362        let a = self.state[operands.src1].get_u32();
2363        let b = self.state[operands.src2].get_u32();
2364        self.state[operands.dst].set_u32(a.max(b));
2365        ControlFlow::Continue(())
2366    }
2367
2368    fn xmax32_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2369        let a = self.state[operands.src1].get_i32();
2370        let b = self.state[operands.src2].get_i32();
2371        self.state[operands.dst].set_i32(a.max(b));
2372        ControlFlow::Continue(())
2373    }
2374
2375    fn xmin64_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2376        let a = self.state[operands.src1].get_u64();
2377        let b = self.state[operands.src2].get_u64();
2378        self.state[operands.dst].set_u64(a.min(b));
2379        ControlFlow::Continue(())
2380    }
2381
2382    fn xmin64_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2383        let a = self.state[operands.src1].get_i64();
2384        let b = self.state[operands.src2].get_i64();
2385        self.state[operands.dst].set_i64(a.min(b));
2386        ControlFlow::Continue(())
2387    }
2388
2389    fn xmax64_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2390        let a = self.state[operands.src1].get_u64();
2391        let b = self.state[operands.src2].get_u64();
2392        self.state[operands.dst].set_u64(a.max(b));
2393        ControlFlow::Continue(())
2394    }
2395
2396    fn xmax64_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2397        let a = self.state[operands.src1].get_i64();
2398        let b = self.state[operands.src2].get_i64();
2399        self.state[operands.dst].set_i64(a.max(b));
2400        ControlFlow::Continue(())
2401    }
2402
2403    fn xctz32(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2404        let a = self.state[src].get_u32();
2405        self.state[dst].set_u32(a.trailing_zeros());
2406        ControlFlow::Continue(())
2407    }
2408
2409    fn xctz64(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2410        let a = self.state[src].get_u64();
2411        self.state[dst].set_u64(a.trailing_zeros().into());
2412        ControlFlow::Continue(())
2413    }
2414
2415    fn xclz32(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2416        let a = self.state[src].get_u32();
2417        self.state[dst].set_u32(a.leading_zeros());
2418        ControlFlow::Continue(())
2419    }
2420
2421    fn xclz64(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2422        let a = self.state[src].get_u64();
2423        self.state[dst].set_u64(a.leading_zeros().into());
2424        ControlFlow::Continue(())
2425    }
2426
2427    fn xpopcnt32(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2428        let a = self.state[src].get_u32();
2429        self.state[dst].set_u32(a.count_ones());
2430        ControlFlow::Continue(())
2431    }
2432
2433    fn xpopcnt64(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2434        let a = self.state[src].get_u64();
2435        self.state[dst].set_u64(a.count_ones().into());
2436        ControlFlow::Continue(())
2437    }
2438
2439    fn xrotl32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2440        let a = self.state[operands.src1].get_u32();
2441        let b = self.state[operands.src2].get_u32();
2442        self.state[operands.dst].set_u32(a.rotate_left(b));
2443        ControlFlow::Continue(())
2444    }
2445
2446    fn xrotl64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2447        let a = self.state[operands.src1].get_u64();
2448        let b = self.state[operands.src2].get_u32();
2449        self.state[operands.dst].set_u64(a.rotate_left(b));
2450        ControlFlow::Continue(())
2451    }
2452
2453    fn xrotr32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2454        let a = self.state[operands.src1].get_u32();
2455        let b = self.state[operands.src2].get_u32();
2456        self.state[operands.dst].set_u32(a.rotate_right(b));
2457        ControlFlow::Continue(())
2458    }
2459
2460    fn xrotr64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2461        let a = self.state[operands.src1].get_u64();
2462        let b = self.state[operands.src2].get_u32();
2463        self.state[operands.dst].set_u64(a.rotate_right(b));
2464        ControlFlow::Continue(())
2465    }
2466
2467    fn xselect32(
2468        &mut self,
2469        dst: XReg,
2470        cond: XReg,
2471        if_nonzero: XReg,
2472        if_zero: XReg,
2473    ) -> ControlFlow<Done> {
2474        let result = if self.state[cond].get_u32() != 0 {
2475            self.state[if_nonzero].get_u32()
2476        } else {
2477            self.state[if_zero].get_u32()
2478        };
2479        self.state[dst].set_u32(result);
2480        ControlFlow::Continue(())
2481    }
2482
2483    fn xselect64(
2484        &mut self,
2485        dst: XReg,
2486        cond: XReg,
2487        if_nonzero: XReg,
2488        if_zero: XReg,
2489    ) -> ControlFlow<Done> {
2490        let result = if self.state[cond].get_u32() != 0 {
2491            self.state[if_nonzero].get_u64()
2492        } else {
2493            self.state[if_zero].get_u64()
2494        };
2495        self.state[dst].set_u64(result);
2496        ControlFlow::Continue(())
2497    }
2498
2499    fn xabs32(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2500        let a = self.state[src].get_i32();
2501        self.state[dst].set_i32(a.wrapping_abs());
2502        ControlFlow::Continue(())
2503    }
2504
2505    fn xabs64(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2506        let a = self.state[src].get_i64();
2507        self.state[dst].set_i64(a.wrapping_abs());
2508        ControlFlow::Continue(())
2509    }
2510
2511    // =========================================================================
2512    // o32 addressing modes
2513
2514    fn xload8_u32_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done> {
2515        let result = unsafe { self.load_ne::<u8, crate::XLoad8U32O32>(addr)? };
2516        self.state[dst].set_u32(result.into());
2517        ControlFlow::Continue(())
2518    }
2519
2520    fn xload8_s32_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done> {
2521        let result = unsafe { self.load_ne::<i8, crate::XLoad8S32O32>(addr)? };
2522        self.state[dst].set_i32(result.into());
2523        ControlFlow::Continue(())
2524    }
2525
2526    fn xload16le_u32_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done> {
2527        let result = unsafe { self.load_ne::<u16, crate::XLoad16LeU32O32>(addr)? };
2528        self.state[dst].set_u32(u16::from_le(result).into());
2529        ControlFlow::Continue(())
2530    }
2531
2532    fn xload16le_s32_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done> {
2533        let result = unsafe { self.load_ne::<i16, crate::XLoad16LeS32O32>(addr)? };
2534        self.state[dst].set_i32(i16::from_le(result).into());
2535        ControlFlow::Continue(())
2536    }
2537
2538    fn xload32le_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done> {
2539        let result = unsafe { self.load_ne::<i32, crate::XLoad32LeO32>(addr)? };
2540        self.state[dst].set_i32(i32::from_le(result));
2541        ControlFlow::Continue(())
2542    }
2543
2544    fn xload64le_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done> {
2545        let result = unsafe { self.load_ne::<i64, crate::XLoad64LeO32>(addr)? };
2546        self.state[dst].set_i64(i64::from_le(result));
2547        ControlFlow::Continue(())
2548    }
2549
2550    fn xstore8_o32(&mut self, addr: AddrO32, val: XReg) -> ControlFlow<Done> {
2551        let val = self.state[val].get_u32() as u8;
2552        unsafe {
2553            self.store_ne::<u8, crate::XStore8O32>(addr, val)?;
2554        }
2555        ControlFlow::Continue(())
2556    }
2557
2558    fn xstore16le_o32(&mut self, addr: AddrO32, val: XReg) -> ControlFlow<Done> {
2559        let val = self.state[val].get_u32() as u16;
2560        unsafe {
2561            self.store_ne::<u16, crate::XStore16LeO32>(addr, val.to_le())?;
2562        }
2563        ControlFlow::Continue(())
2564    }
2565
2566    fn xstore32le_o32(&mut self, addr: AddrO32, val: XReg) -> ControlFlow<Done> {
2567        let val = self.state[val].get_u32();
2568        unsafe {
2569            self.store_ne::<u32, crate::XStore32LeO32>(addr, val.to_le())?;
2570        }
2571        ControlFlow::Continue(())
2572    }
2573
2574    fn xstore64le_o32(&mut self, addr: AddrO32, val: XReg) -> ControlFlow<Done> {
2575        let val = self.state[val].get_u64();
2576        unsafe {
2577            self.store_ne::<u64, crate::XStore64LeO32>(addr, val.to_le())?;
2578        }
2579        ControlFlow::Continue(())
2580    }
2581
2582    // =========================================================================
2583    // g32 addressing modes
2584
2585    fn xload8_u32_g32(&mut self, dst: XReg, addr: AddrG32) -> ControlFlow<Done> {
2586        let result = unsafe { self.load_ne::<u8, crate::XLoad8U32G32>(addr)? };
2587        self.state[dst].set_u32(result.into());
2588        ControlFlow::Continue(())
2589    }
2590
2591    fn xload8_s32_g32(&mut self, dst: XReg, addr: AddrG32) -> ControlFlow<Done> {
2592        let result = unsafe { self.load_ne::<i8, crate::XLoad8S32G32>(addr)? };
2593        self.state[dst].set_i32(result.into());
2594        ControlFlow::Continue(())
2595    }
2596
2597    fn xload16le_u32_g32(&mut self, dst: XReg, addr: AddrG32) -> ControlFlow<Done> {
2598        let result = unsafe { self.load_ne::<u16, crate::XLoad16LeU32G32>(addr)? };
2599        self.state[dst].set_u32(u16::from_le(result).into());
2600        ControlFlow::Continue(())
2601    }
2602
2603    fn xload16le_s32_g32(&mut self, dst: XReg, addr: AddrG32) -> ControlFlow<Done> {
2604        let result = unsafe { self.load_ne::<i16, crate::XLoad16LeS32G32>(addr)? };
2605        self.state[dst].set_i32(i16::from_le(result).into());
2606        ControlFlow::Continue(())
2607    }
2608
2609    fn xload32le_g32(&mut self, dst: XReg, addr: AddrG32) -> ControlFlow<Done> {
2610        let result = unsafe { self.load_ne::<i32, crate::XLoad32LeG32>(addr)? };
2611        self.state[dst].set_i32(i32::from_le(result));
2612        ControlFlow::Continue(())
2613    }
2614
2615    fn xload64le_g32(&mut self, dst: XReg, addr: AddrG32) -> ControlFlow<Done> {
2616        let result = unsafe { self.load_ne::<i64, crate::XLoad64LeG32>(addr)? };
2617        self.state[dst].set_i64(i64::from_le(result));
2618        ControlFlow::Continue(())
2619    }
2620
2621    fn xstore8_g32(&mut self, addr: AddrG32, val: XReg) -> ControlFlow<Done> {
2622        let val = self.state[val].get_u32() as u8;
2623        unsafe {
2624            self.store_ne::<u8, crate::XStore8G32>(addr, val)?;
2625        }
2626        ControlFlow::Continue(())
2627    }
2628
2629    fn xstore16le_g32(&mut self, addr: AddrG32, val: XReg) -> ControlFlow<Done> {
2630        let val = self.state[val].get_u32() as u16;
2631        unsafe {
2632            self.store_ne::<u16, crate::XStore16LeG32>(addr, val.to_le())?;
2633        }
2634        ControlFlow::Continue(())
2635    }
2636
2637    fn xstore32le_g32(&mut self, addr: AddrG32, val: XReg) -> ControlFlow<Done> {
2638        let val = self.state[val].get_u32();
2639        unsafe {
2640            self.store_ne::<u32, crate::XStore32LeG32>(addr, val.to_le())?;
2641        }
2642        ControlFlow::Continue(())
2643    }
2644
2645    fn xstore64le_g32(&mut self, addr: AddrG32, val: XReg) -> ControlFlow<Done> {
2646        let val = self.state[val].get_u64();
2647        unsafe {
2648            self.store_ne::<u64, crate::XStore64LeG32>(addr, val.to_le())?;
2649        }
2650        ControlFlow::Continue(())
2651    }
2652
2653    // =========================================================================
2654    // z addressing modes
2655
2656    fn xload8_u32_z(&mut self, dst: XReg, addr: AddrZ) -> ControlFlow<Done> {
2657        let result = unsafe { self.load_ne::<u8, crate::XLoad8U32Z>(addr)? };
2658        self.state[dst].set_u32(result.into());
2659        ControlFlow::Continue(())
2660    }
2661
2662    fn xload8_s32_z(&mut self, dst: XReg, addr: AddrZ) -> ControlFlow<Done> {
2663        let result = unsafe { self.load_ne::<i8, crate::XLoad8S32Z>(addr)? };
2664        self.state[dst].set_i32(result.into());
2665        ControlFlow::Continue(())
2666    }
2667
2668    fn xload16le_u32_z(&mut self, dst: XReg, addr: AddrZ) -> ControlFlow<Done> {
2669        let result = unsafe { self.load_ne::<u16, crate::XLoad16LeU32Z>(addr)? };
2670        self.state[dst].set_u32(u16::from_le(result).into());
2671        ControlFlow::Continue(())
2672    }
2673
2674    fn xload16le_s32_z(&mut self, dst: XReg, addr: AddrZ) -> ControlFlow<Done> {
2675        let result = unsafe { self.load_ne::<i16, crate::XLoad16LeS32Z>(addr)? };
2676        self.state[dst].set_i32(i16::from_le(result).into());
2677        ControlFlow::Continue(())
2678    }
2679
2680    fn xload32le_z(&mut self, dst: XReg, addr: AddrZ) -> ControlFlow<Done> {
2681        let result = unsafe { self.load_ne::<i32, crate::XLoad32LeZ>(addr)? };
2682        self.state[dst].set_i32(i32::from_le(result));
2683        ControlFlow::Continue(())
2684    }
2685
2686    fn xload64le_z(&mut self, dst: XReg, addr: AddrZ) -> ControlFlow<Done> {
2687        let result = unsafe { self.load_ne::<i64, crate::XLoad64LeZ>(addr)? };
2688        self.state[dst].set_i64(i64::from_le(result));
2689        ControlFlow::Continue(())
2690    }
2691
2692    fn xstore8_z(&mut self, addr: AddrZ, val: XReg) -> ControlFlow<Done> {
2693        let val = self.state[val].get_u32() as u8;
2694        unsafe {
2695            self.store_ne::<u8, crate::XStore8Z>(addr, val)?;
2696        }
2697        ControlFlow::Continue(())
2698    }
2699
2700    fn xstore16le_z(&mut self, addr: AddrZ, val: XReg) -> ControlFlow<Done> {
2701        let val = self.state[val].get_u32() as u16;
2702        unsafe {
2703            self.store_ne::<u16, crate::XStore16LeZ>(addr, val.to_le())?;
2704        }
2705        ControlFlow::Continue(())
2706    }
2707
2708    fn xstore32le_z(&mut self, addr: AddrZ, val: XReg) -> ControlFlow<Done> {
2709        let val = self.state[val].get_u32();
2710        unsafe {
2711            self.store_ne::<u32, crate::XStore32LeZ>(addr, val.to_le())?;
2712        }
2713        ControlFlow::Continue(())
2714    }
2715
2716    fn xstore64le_z(&mut self, addr: AddrZ, val: XReg) -> ControlFlow<Done> {
2717        let val = self.state[val].get_u64();
2718        unsafe {
2719            self.store_ne::<u64, crate::XStore64LeZ>(addr, val.to_le())?;
2720        }
2721        ControlFlow::Continue(())
2722    }
2723
2724    // =========================================================================
2725    // g32bne addressing modes
2726
2727    fn xload8_u32_g32bne(&mut self, dst: XReg, addr: AddrG32Bne) -> ControlFlow<Done> {
2728        let result = unsafe { self.load_ne::<u8, crate::XLoad8U32G32Bne>(addr)? };
2729        self.state[dst].set_u32(result.into());
2730        ControlFlow::Continue(())
2731    }
2732
2733    fn xload8_s32_g32bne(&mut self, dst: XReg, addr: AddrG32Bne) -> ControlFlow<Done> {
2734        let result = unsafe { self.load_ne::<i8, crate::XLoad8S32G32Bne>(addr)? };
2735        self.state[dst].set_i32(result.into());
2736        ControlFlow::Continue(())
2737    }
2738
2739    fn xload16le_u32_g32bne(&mut self, dst: XReg, addr: AddrG32Bne) -> ControlFlow<Done> {
2740        let result = unsafe { self.load_ne::<u16, crate::XLoad16LeU32G32Bne>(addr)? };
2741        self.state[dst].set_u32(u16::from_le(result).into());
2742        ControlFlow::Continue(())
2743    }
2744
2745    fn xload16le_s32_g32bne(&mut self, dst: XReg, addr: AddrG32Bne) -> ControlFlow<Done> {
2746        let result = unsafe { self.load_ne::<i16, crate::XLoad16LeS32G32Bne>(addr)? };
2747        self.state[dst].set_i32(i16::from_le(result).into());
2748        ControlFlow::Continue(())
2749    }
2750
2751    fn xload32le_g32bne(&mut self, dst: XReg, addr: AddrG32Bne) -> ControlFlow<Done> {
2752        let result = unsafe { self.load_ne::<i32, crate::XLoad32LeG32Bne>(addr)? };
2753        self.state[dst].set_i32(i32::from_le(result));
2754        ControlFlow::Continue(())
2755    }
2756
2757    fn xload64le_g32bne(&mut self, dst: XReg, addr: AddrG32Bne) -> ControlFlow<Done> {
2758        let result = unsafe { self.load_ne::<i64, crate::XLoad64LeG32Bne>(addr)? };
2759        self.state[dst].set_i64(i64::from_le(result));
2760        ControlFlow::Continue(())
2761    }
2762
2763    fn xstore8_g32bne(&mut self, addr: AddrG32Bne, val: XReg) -> ControlFlow<Done> {
2764        let val = self.state[val].get_u32() as u8;
2765        unsafe {
2766            self.store_ne::<u8, crate::XStore8G32Bne>(addr, val)?;
2767        }
2768        ControlFlow::Continue(())
2769    }
2770
2771    fn xstore16le_g32bne(&mut self, addr: AddrG32Bne, val: XReg) -> ControlFlow<Done> {
2772        let val = self.state[val].get_u32() as u16;
2773        unsafe {
2774            self.store_ne::<u16, crate::XStore16LeG32Bne>(addr, val.to_le())?;
2775        }
2776        ControlFlow::Continue(())
2777    }
2778
2779    fn xstore32le_g32bne(&mut self, addr: AddrG32Bne, val: XReg) -> ControlFlow<Done> {
2780        let val = self.state[val].get_u32();
2781        unsafe {
2782            self.store_ne::<u32, crate::XStore32LeG32Bne>(addr, val.to_le())?;
2783        }
2784        ControlFlow::Continue(())
2785    }
2786
2787    fn xstore64le_g32bne(&mut self, addr: AddrG32Bne, val: XReg) -> ControlFlow<Done> {
2788        let val = self.state[val].get_u64();
2789        unsafe {
2790            self.store_ne::<u64, crate::XStore64LeG32Bne>(addr, val.to_le())?;
2791        }
2792        ControlFlow::Continue(())
2793    }
2794}
2795
2796impl ExtendedOpVisitor for Interpreter<'_> {
2797    fn nop(&mut self) -> ControlFlow<Done> {
2798        ControlFlow::Continue(())
2799    }
2800
2801    fn trap(&mut self) -> ControlFlow<Done> {
2802        self.done_trap::<crate::Trap>()
2803    }
2804
2805    fn call_indirect_host(&mut self, id: u8) -> ControlFlow<Done> {
2806        self.done_call_indirect_host(id)
2807    }
2808
2809    fn bswap32(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2810        let src = self.state[src].get_u32();
2811        self.state[dst].set_u32(src.swap_bytes());
2812        ControlFlow::Continue(())
2813    }
2814
2815    fn bswap64(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2816        let src = self.state[src].get_u64();
2817        self.state[dst].set_u64(src.swap_bytes());
2818        ControlFlow::Continue(())
2819    }
2820
2821    fn xbmask32(&mut self, dst: XReg, src: XReg) -> Self::Return {
2822        let a = self.state[src].get_u32();
2823        if a == 0 {
2824            self.state[dst].set_u32(0);
2825        } else {
2826            self.state[dst].set_i32(-1);
2827        }
2828        ControlFlow::Continue(())
2829    }
2830
2831    fn xbmask64(&mut self, dst: XReg, src: XReg) -> Self::Return {
2832        let a = self.state[src].get_u64();
2833        if a == 0 {
2834            self.state[dst].set_u64(0);
2835        } else {
2836            self.state[dst].set_i64(-1);
2837        }
2838        ControlFlow::Continue(())
2839    }
2840
2841    fn xadd32_uoverflow_trap(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2842        let a = self.state[operands.src1].get_u32();
2843        let b = self.state[operands.src2].get_u32();
2844        match a.checked_add(b) {
2845            Some(c) => {
2846                self.state[operands.dst].set_u32(c);
2847                ControlFlow::Continue(())
2848            }
2849            None => self.done_trap::<crate::Xadd32UoverflowTrap>(),
2850        }
2851    }
2852
2853    fn xadd64_uoverflow_trap(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2854        let a = self.state[operands.src1].get_u64();
2855        let b = self.state[operands.src2].get_u64();
2856        match a.checked_add(b) {
2857            Some(c) => {
2858                self.state[operands.dst].set_u64(c);
2859                ControlFlow::Continue(())
2860            }
2861            None => self.done_trap::<crate::Xadd64UoverflowTrap>(),
2862        }
2863    }
2864
2865    fn xmulhi64_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2866        let a = self.state[operands.src1].get_i64();
2867        let b = self.state[operands.src2].get_i64();
2868        let result = ((i128::from(a) * i128::from(b)) >> 64) as i64;
2869        self.state[operands.dst].set_i64(result);
2870        ControlFlow::Continue(())
2871    }
2872
2873    fn xmulhi64_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2874        let a = self.state[operands.src1].get_u64();
2875        let b = self.state[operands.src2].get_u64();
2876        let result = ((u128::from(a) * u128::from(b)) >> 64) as u64;
2877        self.state[operands.dst].set_u64(result);
2878        ControlFlow::Continue(())
2879    }
2880
2881    // =========================================================================
2882    // o32 addressing modes for big-endian X-registers
2883
2884    fn xload16be_u32_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done> {
2885        let result = unsafe { self.load_ne::<u16, crate::XLoad16BeU32O32>(addr)? };
2886        self.state[dst].set_u32(u16::from_be(result).into());
2887        ControlFlow::Continue(())
2888    }
2889
2890    fn xload16be_s32_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done> {
2891        let result = unsafe { self.load_ne::<i16, crate::XLoad16BeS32O32>(addr)? };
2892        self.state[dst].set_i32(i16::from_be(result).into());
2893        ControlFlow::Continue(())
2894    }
2895
2896    fn xload32be_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done> {
2897        let result = unsafe { self.load_ne::<i32, crate::XLoad32BeO32>(addr)? };
2898        self.state[dst].set_i32(i32::from_be(result));
2899        ControlFlow::Continue(())
2900    }
2901
2902    fn xload64be_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done> {
2903        let result = unsafe { self.load_ne::<i64, crate::XLoad64BeO32>(addr)? };
2904        self.state[dst].set_i64(i64::from_be(result));
2905        ControlFlow::Continue(())
2906    }
2907
2908    fn xstore16be_o32(&mut self, addr: AddrO32, val: XReg) -> ControlFlow<Done> {
2909        let val = self.state[val].get_u32() as u16;
2910        unsafe {
2911            self.store_ne::<u16, crate::XStore16BeO32>(addr, val.to_be())?;
2912        }
2913        ControlFlow::Continue(())
2914    }
2915
2916    fn xstore32be_o32(&mut self, addr: AddrO32, val: XReg) -> ControlFlow<Done> {
2917        let val = self.state[val].get_u32();
2918        unsafe {
2919            self.store_ne::<u32, crate::XStore32BeO32>(addr, val.to_be())?;
2920        }
2921        ControlFlow::Continue(())
2922    }
2923
2924    fn xstore64be_o32(&mut self, addr: AddrO32, val: XReg) -> ControlFlow<Done> {
2925        let val = self.state[val].get_u64();
2926        unsafe {
2927            self.store_ne::<u64, crate::XStore64BeO32>(addr, val.to_be())?;
2928        }
2929        ControlFlow::Continue(())
2930    }
2931
2932    // =========================================================================
2933    // o32 addressing modes for little-endian F-registers
2934
2935    fn fload32le_o32(&mut self, dst: FReg, addr: AddrO32) -> ControlFlow<Done> {
2936        let val = unsafe { self.load_ne::<u32, crate::Fload32LeO32>(addr)? };
2937        self.state[dst].set_f32(f32::from_bits(u32::from_le(val)));
2938        ControlFlow::Continue(())
2939    }
2940
2941    fn fload64le_o32(&mut self, dst: FReg, addr: AddrO32) -> ControlFlow<Done> {
2942        let val = unsafe { self.load_ne::<u64, crate::Fload64LeO32>(addr)? };
2943        self.state[dst].set_f64(f64::from_bits(u64::from_le(val)));
2944        ControlFlow::Continue(())
2945    }
2946
2947    fn fstore32le_o32(&mut self, addr: AddrO32, src: FReg) -> ControlFlow<Done> {
2948        let val = self.state[src].get_f32();
2949        unsafe {
2950            self.store_ne::<u32, crate::Fstore32LeO32>(addr, val.to_bits().to_le())?;
2951        }
2952        ControlFlow::Continue(())
2953    }
2954
2955    fn fstore64le_o32(&mut self, addr: AddrO32, src: FReg) -> ControlFlow<Done> {
2956        let val = self.state[src].get_f64();
2957        unsafe {
2958            self.store_ne::<u64, crate::Fstore64LeO32>(addr, val.to_bits().to_le())?;
2959        }
2960        ControlFlow::Continue(())
2961    }
2962
2963    // =========================================================================
2964    // o32 addressing modes for big-endian F-registers
2965
2966    fn fload32be_o32(&mut self, dst: FReg, addr: AddrO32) -> ControlFlow<Done> {
2967        let val = unsafe { self.load_ne::<u32, crate::Fload32BeO32>(addr)? };
2968        self.state[dst].set_f32(f32::from_bits(u32::from_be(val)));
2969        ControlFlow::Continue(())
2970    }
2971
2972    fn fload64be_o32(&mut self, dst: FReg, addr: AddrO32) -> ControlFlow<Done> {
2973        let val = unsafe { self.load_ne::<u64, crate::Fload64BeO32>(addr)? };
2974        self.state[dst].set_f64(f64::from_bits(u64::from_be(val)));
2975        ControlFlow::Continue(())
2976    }
2977
2978    fn fstore32be_o32(&mut self, addr: AddrO32, src: FReg) -> ControlFlow<Done> {
2979        let val = self.state[src].get_f32();
2980        unsafe {
2981            self.store_ne::<u32, crate::Fstore32BeO32>(addr, val.to_bits().to_be())?;
2982        }
2983        ControlFlow::Continue(())
2984    }
2985
2986    fn fstore64be_o32(&mut self, addr: AddrO32, src: FReg) -> ControlFlow<Done> {
2987        let val = self.state[src].get_f64();
2988        unsafe {
2989            self.store_ne::<u64, crate::Fstore64BeO32>(addr, val.to_bits().to_be())?;
2990        }
2991        ControlFlow::Continue(())
2992    }
2993
2994    // =========================================================================
2995    // z addressing modes for little-endian F-registers
2996
2997    fn fload32le_z(&mut self, dst: FReg, addr: AddrZ) -> ControlFlow<Done> {
2998        let val = unsafe { self.load_ne::<u32, crate::Fload32LeZ>(addr)? };
2999        self.state[dst].set_f32(f32::from_bits(u32::from_le(val)));
3000        ControlFlow::Continue(())
3001    }
3002
3003    fn fload64le_z(&mut self, dst: FReg, addr: AddrZ) -> ControlFlow<Done> {
3004        let val = unsafe { self.load_ne::<u64, crate::Fload64LeZ>(addr)? };
3005        self.state[dst].set_f64(f64::from_bits(u64::from_le(val)));
3006        ControlFlow::Continue(())
3007    }
3008
3009    fn fstore32le_z(&mut self, addr: AddrZ, src: FReg) -> ControlFlow<Done> {
3010        let val = self.state[src].get_f32();
3011        unsafe {
3012            self.store_ne::<u32, crate::Fstore32LeZ>(addr, val.to_bits().to_le())?;
3013        }
3014        ControlFlow::Continue(())
3015    }
3016
3017    fn fstore64le_z(&mut self, addr: AddrZ, src: FReg) -> ControlFlow<Done> {
3018        let val = self.state[src].get_f64();
3019        unsafe {
3020            self.store_ne::<u64, crate::Fstore64LeZ>(addr, val.to_bits().to_le())?;
3021        }
3022        ControlFlow::Continue(())
3023    }
3024
3025    // =========================================================================
3026    // g32 addressing modes for little-endian F-registers
3027
3028    fn fload32le_g32(&mut self, dst: FReg, addr: AddrG32) -> ControlFlow<Done> {
3029        let val = unsafe { self.load_ne::<u32, crate::Fload32LeG32>(addr)? };
3030        self.state[dst].set_f32(f32::from_bits(u32::from_le(val)));
3031        ControlFlow::Continue(())
3032    }
3033
3034    fn fload64le_g32(&mut self, dst: FReg, addr: AddrG32) -> ControlFlow<Done> {
3035        let val = unsafe { self.load_ne::<u64, crate::Fload64LeG32>(addr)? };
3036        self.state[dst].set_f64(f64::from_bits(u64::from_le(val)));
3037        ControlFlow::Continue(())
3038    }
3039
3040    fn fstore32le_g32(&mut self, addr: AddrG32, src: FReg) -> ControlFlow<Done> {
3041        let val = self.state[src].get_f32();
3042        unsafe {
3043            self.store_ne::<u32, crate::Fstore32LeG32>(addr, val.to_bits().to_le())?;
3044        }
3045        ControlFlow::Continue(())
3046    }
3047
3048    fn fstore64le_g32(&mut self, addr: AddrG32, src: FReg) -> ControlFlow<Done> {
3049        let val = self.state[src].get_f64();
3050        unsafe {
3051            self.store_ne::<u64, crate::Fstore64LeG32>(addr, val.to_bits().to_le())?;
3052        }
3053        ControlFlow::Continue(())
3054    }
3055
3056    // =========================================================================
3057    // o32 addressing modes for little-endian V-registers
3058
3059    fn vload128le_o32(&mut self, dst: VReg, addr: AddrO32) -> ControlFlow<Done> {
3060        let val = unsafe { self.load_ne::<u128, crate::VLoad128O32>(addr)? };
3061        self.state[dst].set_u128(u128::from_le(val));
3062        ControlFlow::Continue(())
3063    }
3064
3065    fn vstore128le_o32(&mut self, addr: AddrO32, src: VReg) -> ControlFlow<Done> {
3066        let val = self.state[src].get_u128();
3067        unsafe {
3068            self.store_ne::<u128, crate::Vstore128LeO32>(addr, val.to_le())?;
3069        }
3070        ControlFlow::Continue(())
3071    }
3072
3073    // =========================================================================
3074    // z addressing modes for little-endian V-registers
3075
3076    fn vload128le_z(&mut self, dst: VReg, addr: AddrZ) -> ControlFlow<Done> {
3077        let val = unsafe { self.load_ne::<u128, crate::VLoad128Z>(addr)? };
3078        self.state[dst].set_u128(u128::from_le(val));
3079        ControlFlow::Continue(())
3080    }
3081
3082    fn vstore128le_z(&mut self, addr: AddrZ, src: VReg) -> ControlFlow<Done> {
3083        let val = self.state[src].get_u128();
3084        unsafe {
3085            self.store_ne::<u128, crate::Vstore128LeZ>(addr, val.to_le())?;
3086        }
3087        ControlFlow::Continue(())
3088    }
3089
3090    // =========================================================================
3091    // g32 addressing modes for little-endian V-registers
3092
3093    fn vload128le_g32(&mut self, dst: VReg, addr: AddrG32) -> ControlFlow<Done> {
3094        let val = unsafe { self.load_ne::<u128, crate::VLoad128G32>(addr)? };
3095        self.state[dst].set_u128(u128::from_le(val));
3096        ControlFlow::Continue(())
3097    }
3098
3099    fn vstore128le_g32(&mut self, addr: AddrG32, src: VReg) -> ControlFlow<Done> {
3100        let val = self.state[src].get_u128();
3101        unsafe {
3102            self.store_ne::<u128, crate::Vstore128LeG32>(addr, val.to_le())?;
3103        }
3104        ControlFlow::Continue(())
3105    }
3106
3107    fn xmov_fp(&mut self, dst: XReg) -> ControlFlow<Done> {
3108        let fp = self.state.fp;
3109        self.state[dst].set_ptr(fp);
3110        ControlFlow::Continue(())
3111    }
3112
3113    fn xmov_lr(&mut self, dst: XReg) -> ControlFlow<Done> {
3114        let lr = self.state.lr;
3115        self.state[dst].set_ptr(lr);
3116        ControlFlow::Continue(())
3117    }
3118
3119    fn fmov(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3120        let val = self.state[src];
3121        self.state[dst] = val;
3122        ControlFlow::Continue(())
3123    }
3124
3125    fn vmov(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3126        let val = self.state[src];
3127        self.state[dst] = val;
3128        ControlFlow::Continue(())
3129    }
3130
3131    fn fconst32(&mut self, dst: FReg, bits: u32) -> ControlFlow<Done> {
3132        self.state[dst].set_f32(f32::from_bits(bits));
3133        ControlFlow::Continue(())
3134    }
3135
3136    fn fconst64(&mut self, dst: FReg, bits: u64) -> ControlFlow<Done> {
3137        self.state[dst].set_f64(f64::from_bits(bits));
3138        ControlFlow::Continue(())
3139    }
3140
3141    fn bitcast_int_from_float_32(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3142        let val = self.state[src].get_f32();
3143        self.state[dst].set_u32(val.to_bits());
3144        ControlFlow::Continue(())
3145    }
3146
3147    fn bitcast_int_from_float_64(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3148        let val = self.state[src].get_f64();
3149        self.state[dst].set_u64(val.to_bits());
3150        ControlFlow::Continue(())
3151    }
3152
3153    fn bitcast_float_from_int_32(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done> {
3154        let val = self.state[src].get_u32();
3155        self.state[dst].set_f32(f32::from_bits(val));
3156        ControlFlow::Continue(())
3157    }
3158
3159    fn bitcast_float_from_int_64(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done> {
3160        let val = self.state[src].get_u64();
3161        self.state[dst].set_f64(f64::from_bits(val));
3162        ControlFlow::Continue(())
3163    }
3164
3165    fn feq32(&mut self, dst: XReg, src1: FReg, src2: FReg) -> ControlFlow<Done> {
3166        let a = self.state[src1].get_f32();
3167        let b = self.state[src2].get_f32();
3168        self.state[dst].set_u32(u32::from(a == b));
3169        ControlFlow::Continue(())
3170    }
3171
3172    fn fneq32(&mut self, dst: XReg, src1: FReg, src2: FReg) -> ControlFlow<Done> {
3173        let a = self.state[src1].get_f32();
3174        let b = self.state[src2].get_f32();
3175        self.state[dst].set_u32(u32::from(a != b));
3176        ControlFlow::Continue(())
3177    }
3178
3179    fn flt32(&mut self, dst: XReg, src1: FReg, src2: FReg) -> ControlFlow<Done> {
3180        let a = self.state[src1].get_f32();
3181        let b = self.state[src2].get_f32();
3182        self.state[dst].set_u32(u32::from(a < b));
3183        ControlFlow::Continue(())
3184    }
3185
3186    fn flteq32(&mut self, dst: XReg, src1: FReg, src2: FReg) -> ControlFlow<Done> {
3187        let a = self.state[src1].get_f32();
3188        let b = self.state[src2].get_f32();
3189        self.state[dst].set_u32(u32::from(a <= b));
3190        ControlFlow::Continue(())
3191    }
3192
3193    fn feq64(&mut self, dst: XReg, src1: FReg, src2: FReg) -> ControlFlow<Done> {
3194        let a = self.state[src1].get_f64();
3195        let b = self.state[src2].get_f64();
3196        self.state[dst].set_u32(u32::from(a == b));
3197        ControlFlow::Continue(())
3198    }
3199
3200    fn fneq64(&mut self, dst: XReg, src1: FReg, src2: FReg) -> ControlFlow<Done> {
3201        let a = self.state[src1].get_f64();
3202        let b = self.state[src2].get_f64();
3203        self.state[dst].set_u32(u32::from(a != b));
3204        ControlFlow::Continue(())
3205    }
3206
3207    fn flt64(&mut self, dst: XReg, src1: FReg, src2: FReg) -> ControlFlow<Done> {
3208        let a = self.state[src1].get_f64();
3209        let b = self.state[src2].get_f64();
3210        self.state[dst].set_u32(u32::from(a < b));
3211        ControlFlow::Continue(())
3212    }
3213
3214    fn flteq64(&mut self, dst: XReg, src1: FReg, src2: FReg) -> ControlFlow<Done> {
3215        let a = self.state[src1].get_f64();
3216        let b = self.state[src2].get_f64();
3217        self.state[dst].set_u32(u32::from(a <= b));
3218        ControlFlow::Continue(())
3219    }
3220
3221    fn fselect32(
3222        &mut self,
3223        dst: FReg,
3224        cond: XReg,
3225        if_nonzero: FReg,
3226        if_zero: FReg,
3227    ) -> ControlFlow<Done> {
3228        let result = if self.state[cond].get_u32() != 0 {
3229            self.state[if_nonzero].get_f32()
3230        } else {
3231            self.state[if_zero].get_f32()
3232        };
3233        self.state[dst].set_f32(result);
3234        ControlFlow::Continue(())
3235    }
3236
3237    fn fselect64(
3238        &mut self,
3239        dst: FReg,
3240        cond: XReg,
3241        if_nonzero: FReg,
3242        if_zero: FReg,
3243    ) -> ControlFlow<Done> {
3244        let result = if self.state[cond].get_u32() != 0 {
3245            self.state[if_nonzero].get_f64()
3246        } else {
3247            self.state[if_zero].get_f64()
3248        };
3249        self.state[dst].set_f64(result);
3250        ControlFlow::Continue(())
3251    }
3252
3253    fn f32_from_x32_s(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done> {
3254        let a = self.state[src].get_i32();
3255        self.state[dst].set_f32(a as f32);
3256        ControlFlow::Continue(())
3257    }
3258
3259    fn f32_from_x32_u(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done> {
3260        let a = self.state[src].get_u32();
3261        self.state[dst].set_f32(a as f32);
3262        ControlFlow::Continue(())
3263    }
3264
3265    fn f32_from_x64_s(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done> {
3266        let a = self.state[src].get_i64();
3267        self.state[dst].set_f32(a as f32);
3268        ControlFlow::Continue(())
3269    }
3270
3271    fn f32_from_x64_u(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done> {
3272        let a = self.state[src].get_u64();
3273        self.state[dst].set_f32(a as f32);
3274        ControlFlow::Continue(())
3275    }
3276
3277    fn f64_from_x32_s(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done> {
3278        let a = self.state[src].get_i32();
3279        self.state[dst].set_f64(a as f64);
3280        ControlFlow::Continue(())
3281    }
3282
3283    fn f64_from_x32_u(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done> {
3284        let a = self.state[src].get_u32();
3285        self.state[dst].set_f64(a as f64);
3286        ControlFlow::Continue(())
3287    }
3288
3289    fn f64_from_x64_s(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done> {
3290        let a = self.state[src].get_i64();
3291        self.state[dst].set_f64(a as f64);
3292        ControlFlow::Continue(())
3293    }
3294
3295    fn f64_from_x64_u(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done> {
3296        let a = self.state[src].get_u64();
3297        self.state[dst].set_f64(a as f64);
3298        ControlFlow::Continue(())
3299    }
3300
3301    fn x32_from_f32_s(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3302        let a = self.state[src].get_f32();
3303        self.check_xnn_from_fnn::<crate::X32FromF32S>(a.into(), -2147483649.0, 2147483648.0)?;
3304        self.state[dst].set_i32(a as i32);
3305        ControlFlow::Continue(())
3306    }
3307
3308    fn x32_from_f32_u(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3309        let a = self.state[src].get_f32();
3310        self.check_xnn_from_fnn::<crate::X32FromF32U>(a.into(), -1.0, 4294967296.0)?;
3311        self.state[dst].set_u32(a as u32);
3312        ControlFlow::Continue(())
3313    }
3314
3315    fn x64_from_f32_s(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3316        let a = self.state[src].get_f32();
3317        self.check_xnn_from_fnn::<crate::X64FromF32S>(
3318            a.into(),
3319            -9223372036854777856.0,
3320            9223372036854775808.0,
3321        )?;
3322        self.state[dst].set_i64(a as i64);
3323        ControlFlow::Continue(())
3324    }
3325
3326    fn x64_from_f32_u(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3327        let a = self.state[src].get_f32();
3328        self.check_xnn_from_fnn::<crate::X64FromF32U>(a.into(), -1.0, 18446744073709551616.0)?;
3329        self.state[dst].set_u64(a as u64);
3330        ControlFlow::Continue(())
3331    }
3332
3333    fn x32_from_f64_s(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3334        let a = self.state[src].get_f64();
3335        self.check_xnn_from_fnn::<crate::X32FromF64S>(a, -2147483649.0, 2147483648.0)?;
3336        self.state[dst].set_i32(a as i32);
3337        ControlFlow::Continue(())
3338    }
3339
3340    fn x32_from_f64_u(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3341        let a = self.state[src].get_f64();
3342        self.check_xnn_from_fnn::<crate::X32FromF64U>(a, -1.0, 4294967296.0)?;
3343        self.state[dst].set_u32(a as u32);
3344        ControlFlow::Continue(())
3345    }
3346
3347    fn x64_from_f64_s(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3348        let a = self.state[src].get_f64();
3349        self.check_xnn_from_fnn::<crate::X64FromF64S>(
3350            a,
3351            -9223372036854777856.0,
3352            9223372036854775808.0,
3353        )?;
3354        self.state[dst].set_i64(a as i64);
3355        ControlFlow::Continue(())
3356    }
3357
3358    fn x64_from_f64_u(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3359        let a = self.state[src].get_f64();
3360        self.check_xnn_from_fnn::<crate::X64FromF64U>(a, -1.0, 18446744073709551616.0)?;
3361        self.state[dst].set_u64(a as u64);
3362        ControlFlow::Continue(())
3363    }
3364
3365    fn x32_from_f32_s_sat(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3366        let a = self.state[src].get_f32();
3367        self.state[dst].set_i32(a as i32);
3368        ControlFlow::Continue(())
3369    }
3370
3371    fn x32_from_f32_u_sat(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3372        let a = self.state[src].get_f32();
3373        self.state[dst].set_u32(a as u32);
3374        ControlFlow::Continue(())
3375    }
3376
3377    fn x64_from_f32_s_sat(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3378        let a = self.state[src].get_f32();
3379        self.state[dst].set_i64(a as i64);
3380        ControlFlow::Continue(())
3381    }
3382
3383    fn x64_from_f32_u_sat(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3384        let a = self.state[src].get_f32();
3385        self.state[dst].set_u64(a as u64);
3386        ControlFlow::Continue(())
3387    }
3388
3389    fn x32_from_f64_s_sat(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3390        let a = self.state[src].get_f64();
3391        self.state[dst].set_i32(a as i32);
3392        ControlFlow::Continue(())
3393    }
3394
3395    fn x32_from_f64_u_sat(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3396        let a = self.state[src].get_f64();
3397        self.state[dst].set_u32(a as u32);
3398        ControlFlow::Continue(())
3399    }
3400
3401    fn x64_from_f64_s_sat(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3402        let a = self.state[src].get_f64();
3403        self.state[dst].set_i64(a as i64);
3404        ControlFlow::Continue(())
3405    }
3406
3407    fn x64_from_f64_u_sat(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3408        let a = self.state[src].get_f64();
3409        self.state[dst].set_u64(a as u64);
3410        ControlFlow::Continue(())
3411    }
3412
3413    fn f32_from_f64(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3414        let a = self.state[src].get_f64();
3415        self.state[dst].set_f32(a as f32);
3416        ControlFlow::Continue(())
3417    }
3418
3419    fn f64_from_f32(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3420        let a = self.state[src].get_f32();
3421        self.state[dst].set_f64(a.into());
3422        ControlFlow::Continue(())
3423    }
3424
3425    fn fcopysign32(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3426        let a = self.state[operands.src1].get_f32();
3427        let b = self.state[operands.src2].get_f32();
3428        self.state[operands.dst].set_f32(a.wasm_copysign(b));
3429        ControlFlow::Continue(())
3430    }
3431
3432    fn fcopysign64(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3433        let a = self.state[operands.src1].get_f64();
3434        let b = self.state[operands.src2].get_f64();
3435        self.state[operands.dst].set_f64(a.wasm_copysign(b));
3436        ControlFlow::Continue(())
3437    }
3438
3439    fn fadd32(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3440        let a = self.state[operands.src1].get_f32();
3441        let b = self.state[operands.src2].get_f32();
3442        self.state[operands.dst].set_f32(a + b);
3443        ControlFlow::Continue(())
3444    }
3445
3446    fn fsub32(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3447        let a = self.state[operands.src1].get_f32();
3448        let b = self.state[operands.src2].get_f32();
3449        self.state[operands.dst].set_f32(a - b);
3450        ControlFlow::Continue(())
3451    }
3452
3453    fn vsubf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3454        let mut a = self.state[operands.src1].get_f32x4();
3455        let b = self.state[operands.src2].get_f32x4();
3456        for (a, b) in a.iter_mut().zip(b) {
3457            *a = *a - b;
3458        }
3459        self.state[operands.dst].set_f32x4(a);
3460        ControlFlow::Continue(())
3461    }
3462
3463    fn fmul32(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3464        let a = self.state[operands.src1].get_f32();
3465        let b = self.state[operands.src2].get_f32();
3466        self.state[operands.dst].set_f32(a * b);
3467        ControlFlow::Continue(())
3468    }
3469
3470    fn vmulf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3471        let mut a = self.state[operands.src1].get_f32x4();
3472        let b = self.state[operands.src2].get_f32x4();
3473        for (a, b) in a.iter_mut().zip(b) {
3474            *a = *a * b;
3475        }
3476        self.state[operands.dst].set_f32x4(a);
3477        ControlFlow::Continue(())
3478    }
3479
3480    fn fdiv32(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3481        let a = self.state[operands.src1].get_f32();
3482        let b = self.state[operands.src2].get_f32();
3483        self.state[operands.dst].set_f32(a / b);
3484        ControlFlow::Continue(())
3485    }
3486
3487    fn vdivf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3488        let a = self.state[operands.src1].get_f32x4();
3489        let b = self.state[operands.src2].get_f32x4();
3490        let mut result = [0.0f32; 4];
3491
3492        for i in 0..4 {
3493            result[i] = a[i] / b[i];
3494        }
3495
3496        self.state[operands.dst].set_f32x4(result);
3497        ControlFlow::Continue(())
3498    }
3499
3500    fn vdivf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3501        let a = self.state[operands.src1].get_f64x2();
3502        let b = self.state[operands.src2].get_f64x2();
3503        let mut result = [0.0f64; 2];
3504
3505        for i in 0..2 {
3506            result[i] = a[i] / b[i];
3507        }
3508
3509        self.state[operands.dst].set_f64x2(result);
3510        ControlFlow::Continue(())
3511    }
3512
3513    fn fmaximum32(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3514        let a = self.state[operands.src1].get_f32();
3515        let b = self.state[operands.src2].get_f32();
3516        self.state[operands.dst].set_f32(a.wasm_maximum(b));
3517        ControlFlow::Continue(())
3518    }
3519
3520    fn fminimum32(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3521        let a = self.state[operands.src1].get_f32();
3522        let b = self.state[operands.src2].get_f32();
3523        self.state[operands.dst].set_f32(a.wasm_minimum(b));
3524        ControlFlow::Continue(())
3525    }
3526
3527    fn ftrunc32(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3528        let a = self.state[src].get_f32();
3529        self.state[dst].set_f32(a.wasm_trunc());
3530        ControlFlow::Continue(())
3531    }
3532
3533    fn vtrunc32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3534        let mut a = self.state[src].get_f32x4();
3535        for elem in a.iter_mut() {
3536            *elem = elem.wasm_trunc();
3537        }
3538        self.state[dst].set_f32x4(a);
3539        ControlFlow::Continue(())
3540    }
3541
3542    fn vtrunc64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3543        let mut a = self.state[src].get_f64x2();
3544        for elem in a.iter_mut() {
3545            *elem = elem.wasm_trunc();
3546        }
3547        self.state[dst].set_f64x2(a);
3548        ControlFlow::Continue(())
3549    }
3550
3551    fn ffloor32(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3552        let a = self.state[src].get_f32();
3553        self.state[dst].set_f32(a.wasm_floor());
3554        ControlFlow::Continue(())
3555    }
3556
3557    fn vfloor32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3558        let mut a = self.state[src].get_f32x4();
3559        for elem in a.iter_mut() {
3560            *elem = elem.wasm_floor();
3561        }
3562        self.state[dst].set_f32x4(a);
3563        ControlFlow::Continue(())
3564    }
3565
3566    fn vfloor64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3567        let mut a = self.state[src].get_f64x2();
3568        for elem in a.iter_mut() {
3569            *elem = elem.wasm_floor();
3570        }
3571        self.state[dst].set_f64x2(a);
3572        ControlFlow::Continue(())
3573    }
3574
3575    fn fceil32(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3576        let a = self.state[src].get_f32();
3577        self.state[dst].set_f32(a.wasm_ceil());
3578        ControlFlow::Continue(())
3579    }
3580
3581    fn vceil32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3582        let mut a = self.state[src].get_f32x4();
3583        for elem in a.iter_mut() {
3584            *elem = elem.wasm_ceil();
3585        }
3586        self.state[dst].set_f32x4(a);
3587
3588        ControlFlow::Continue(())
3589    }
3590
3591    fn vceil64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3592        let mut a = self.state[src].get_f64x2();
3593        for elem in a.iter_mut() {
3594            *elem = elem.wasm_ceil();
3595        }
3596        self.state[dst].set_f64x2(a);
3597
3598        ControlFlow::Continue(())
3599    }
3600
3601    fn fnearest32(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3602        let a = self.state[src].get_f32();
3603        self.state[dst].set_f32(a.wasm_nearest());
3604        ControlFlow::Continue(())
3605    }
3606
3607    fn vnearest32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3608        let mut a = self.state[src].get_f32x4();
3609        for elem in a.iter_mut() {
3610            *elem = elem.wasm_nearest();
3611        }
3612        self.state[dst].set_f32x4(a);
3613        ControlFlow::Continue(())
3614    }
3615
3616    fn vnearest64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3617        let mut a = self.state[src].get_f64x2();
3618        for elem in a.iter_mut() {
3619            *elem = elem.wasm_nearest();
3620        }
3621        self.state[dst].set_f64x2(a);
3622        ControlFlow::Continue(())
3623    }
3624
3625    fn fsqrt32(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3626        let a = self.state[src].get_f32();
3627        self.state[dst].set_f32(a.wasm_sqrt());
3628        ControlFlow::Continue(())
3629    }
3630
3631    fn vsqrt32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3632        let mut a = self.state[src].get_f32x4();
3633        for elem in a.iter_mut() {
3634            *elem = elem.wasm_sqrt();
3635        }
3636        self.state[dst].set_f32x4(a);
3637        ControlFlow::Continue(())
3638    }
3639
3640    fn vsqrt64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3641        let mut a = self.state[src].get_f64x2();
3642        for elem in a.iter_mut() {
3643            *elem = elem.wasm_sqrt();
3644        }
3645        self.state[dst].set_f64x2(a);
3646        ControlFlow::Continue(())
3647    }
3648
3649    fn fneg32(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3650        let a = self.state[src].get_f32();
3651        self.state[dst].set_f32(-a);
3652        ControlFlow::Continue(())
3653    }
3654
3655    fn vnegf32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3656        let mut a = self.state[src].get_f32x4();
3657        for elem in a.iter_mut() {
3658            *elem = -*elem;
3659        }
3660        self.state[dst].set_f32x4(a);
3661        ControlFlow::Continue(())
3662    }
3663
3664    fn fabs32(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3665        let a = self.state[src].get_f32();
3666        self.state[dst].set_f32(a.wasm_abs());
3667        ControlFlow::Continue(())
3668    }
3669
3670    fn fadd64(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3671        let a = self.state[operands.src1].get_f64();
3672        let b = self.state[operands.src2].get_f64();
3673        self.state[operands.dst].set_f64(a + b);
3674        ControlFlow::Continue(())
3675    }
3676
3677    fn fsub64(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3678        let a = self.state[operands.src1].get_f64();
3679        let b = self.state[operands.src2].get_f64();
3680        self.state[operands.dst].set_f64(a - b);
3681        ControlFlow::Continue(())
3682    }
3683
3684    fn fmul64(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3685        let a = self.state[operands.src1].get_f64();
3686        let b = self.state[operands.src2].get_f64();
3687        self.state[operands.dst].set_f64(a * b);
3688        ControlFlow::Continue(())
3689    }
3690
3691    fn fdiv64(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3692        let a = self.state[operands.src1].get_f64();
3693        let b = self.state[operands.src2].get_f64();
3694        self.state[operands.dst].set_f64(a / b);
3695        ControlFlow::Continue(())
3696    }
3697
3698    fn fmaximum64(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3699        let a = self.state[operands.src1].get_f64();
3700        let b = self.state[operands.src2].get_f64();
3701        self.state[operands.dst].set_f64(a.wasm_maximum(b));
3702        ControlFlow::Continue(())
3703    }
3704
3705    fn fminimum64(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3706        let a = self.state[operands.src1].get_f64();
3707        let b = self.state[operands.src2].get_f64();
3708        self.state[operands.dst].set_f64(a.wasm_minimum(b));
3709        ControlFlow::Continue(())
3710    }
3711
3712    fn ftrunc64(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3713        let a = self.state[src].get_f64();
3714        self.state[dst].set_f64(a.wasm_trunc());
3715        ControlFlow::Continue(())
3716    }
3717
3718    fn ffloor64(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3719        let a = self.state[src].get_f64();
3720        self.state[dst].set_f64(a.wasm_floor());
3721        ControlFlow::Continue(())
3722    }
3723
3724    fn fceil64(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3725        let a = self.state[src].get_f64();
3726        self.state[dst].set_f64(a.wasm_ceil());
3727        ControlFlow::Continue(())
3728    }
3729
3730    fn fnearest64(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3731        let a = self.state[src].get_f64();
3732        self.state[dst].set_f64(a.wasm_nearest());
3733        ControlFlow::Continue(())
3734    }
3735
3736    fn fsqrt64(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3737        let a = self.state[src].get_f64();
3738        self.state[dst].set_f64(a.wasm_sqrt());
3739        ControlFlow::Continue(())
3740    }
3741
3742    fn fneg64(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3743        let a = self.state[src].get_f64();
3744        self.state[dst].set_f64(-a);
3745        ControlFlow::Continue(())
3746    }
3747
3748    fn fabs64(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3749        let a = self.state[src].get_f64();
3750        self.state[dst].set_f64(a.wasm_abs());
3751        ControlFlow::Continue(())
3752    }
3753
3754    fn vaddi8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3755        let mut a = self.state[operands.src1].get_i8x16();
3756        let b = self.state[operands.src2].get_i8x16();
3757        for (a, b) in a.iter_mut().zip(b) {
3758            *a = a.wrapping_add(b);
3759        }
3760        self.state[operands.dst].set_i8x16(a);
3761        ControlFlow::Continue(())
3762    }
3763
3764    fn vaddi16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3765        let mut a = self.state[operands.src1].get_i16x8();
3766        let b = self.state[operands.src2].get_i16x8();
3767        for (a, b) in a.iter_mut().zip(b) {
3768            *a = a.wrapping_add(b);
3769        }
3770        self.state[operands.dst].set_i16x8(a);
3771        ControlFlow::Continue(())
3772    }
3773
3774    fn vaddi32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3775        let mut a = self.state[operands.src1].get_i32x4();
3776        let b = self.state[operands.src2].get_i32x4();
3777        for (a, b) in a.iter_mut().zip(b) {
3778            *a = a.wrapping_add(b);
3779        }
3780        self.state[operands.dst].set_i32x4(a);
3781        ControlFlow::Continue(())
3782    }
3783
3784    fn vaddi64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3785        let mut a = self.state[operands.src1].get_i64x2();
3786        let b = self.state[operands.src2].get_i64x2();
3787        for (a, b) in a.iter_mut().zip(b) {
3788            *a = a.wrapping_add(b);
3789        }
3790        self.state[operands.dst].set_i64x2(a);
3791        ControlFlow::Continue(())
3792    }
3793
3794    fn vaddf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3795        let mut a = self.state[operands.src1].get_f32x4();
3796        let b = self.state[operands.src2].get_f32x4();
3797        for (a, b) in a.iter_mut().zip(b) {
3798            *a += b;
3799        }
3800        self.state[operands.dst].set_f32x4(a);
3801        ControlFlow::Continue(())
3802    }
3803
3804    fn vaddf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3805        let mut a = self.state[operands.src1].get_f64x2();
3806        let b = self.state[operands.src2].get_f64x2();
3807        for (a, b) in a.iter_mut().zip(b) {
3808            *a += b;
3809        }
3810        self.state[operands.dst].set_f64x2(a);
3811        ControlFlow::Continue(())
3812    }
3813
3814    fn vaddi8x16_sat(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3815        let mut a = self.state[operands.src1].get_i8x16();
3816        let b = self.state[operands.src2].get_i8x16();
3817        for (a, b) in a.iter_mut().zip(b) {
3818            *a = (*a).saturating_add(b);
3819        }
3820        self.state[operands.dst].set_i8x16(a);
3821        ControlFlow::Continue(())
3822    }
3823
3824    fn vaddu8x16_sat(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3825        let mut a = self.state[operands.src1].get_u8x16();
3826        let b = self.state[operands.src2].get_u8x16();
3827        for (a, b) in a.iter_mut().zip(b) {
3828            *a = (*a).saturating_add(b);
3829        }
3830        self.state[operands.dst].set_u8x16(a);
3831        ControlFlow::Continue(())
3832    }
3833
3834    fn vaddi16x8_sat(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3835        let mut a = self.state[operands.src1].get_i16x8();
3836        let b = self.state[operands.src2].get_i16x8();
3837        for (a, b) in a.iter_mut().zip(b) {
3838            *a = (*a).saturating_add(b);
3839        }
3840        self.state[operands.dst].set_i16x8(a);
3841        ControlFlow::Continue(())
3842    }
3843
3844    fn vaddu16x8_sat(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3845        let mut a = self.state[operands.src1].get_u16x8();
3846        let b = self.state[operands.src2].get_u16x8();
3847        for (a, b) in a.iter_mut().zip(b) {
3848            *a = (*a).saturating_add(b);
3849        }
3850        self.state[operands.dst].set_u16x8(a);
3851        ControlFlow::Continue(())
3852    }
3853
3854    fn vaddpairwisei16x8_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3855        let a = self.state[operands.src1].get_i16x8();
3856        let b = self.state[operands.src2].get_i16x8();
3857        let mut result = [0i16; 8];
3858        let half = result.len() / 2;
3859        for i in 0..half {
3860            result[i] = a[2 * i].wrapping_add(a[2 * i + 1]);
3861            result[i + half] = b[2 * i].wrapping_add(b[2 * i + 1]);
3862        }
3863        self.state[operands.dst].set_i16x8(result);
3864        ControlFlow::Continue(())
3865    }
3866
3867    fn vaddpairwisei32x4_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3868        let a = self.state[operands.src1].get_i32x4();
3869        let b = self.state[operands.src2].get_i32x4();
3870        let mut result = [0i32; 4];
3871        result[0] = a[0].wrapping_add(a[1]);
3872        result[1] = a[2].wrapping_add(a[3]);
3873        result[2] = b[0].wrapping_add(b[1]);
3874        result[3] = b[2].wrapping_add(b[3]);
3875        self.state[operands.dst].set_i32x4(result);
3876        ControlFlow::Continue(())
3877    }
3878
3879    fn vshli8x16(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
3880        let a = self.state[operands.src1].get_i8x16();
3881        let b = self.state[operands.src2].get_u32();
3882        self.state[operands.dst].set_i8x16(a.map(|a| a.wrapping_shl(b)));
3883        ControlFlow::Continue(())
3884    }
3885
3886    fn vshli16x8(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
3887        let a = self.state[operands.src1].get_i16x8();
3888        let b = self.state[operands.src2].get_u32();
3889        self.state[operands.dst].set_i16x8(a.map(|a| a.wrapping_shl(b)));
3890        ControlFlow::Continue(())
3891    }
3892
3893    fn vshli32x4(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
3894        let a = self.state[operands.src1].get_i32x4();
3895        let b = self.state[operands.src2].get_u32();
3896        self.state[operands.dst].set_i32x4(a.map(|a| a.wrapping_shl(b)));
3897        ControlFlow::Continue(())
3898    }
3899
3900    fn vshli64x2(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
3901        let a = self.state[operands.src1].get_i64x2();
3902        let b = self.state[operands.src2].get_u32();
3903        self.state[operands.dst].set_i64x2(a.map(|a| a.wrapping_shl(b)));
3904        ControlFlow::Continue(())
3905    }
3906
3907    fn vshri8x16_s(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
3908        let a = self.state[operands.src1].get_i8x16();
3909        let b = self.state[operands.src2].get_u32();
3910        self.state[operands.dst].set_i8x16(a.map(|a| a.wrapping_shr(b)));
3911        ControlFlow::Continue(())
3912    }
3913
3914    fn vshri16x8_s(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
3915        let a = self.state[operands.src1].get_i16x8();
3916        let b = self.state[operands.src2].get_u32();
3917        self.state[operands.dst].set_i16x8(a.map(|a| a.wrapping_shr(b)));
3918        ControlFlow::Continue(())
3919    }
3920
3921    fn vshri32x4_s(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
3922        let a = self.state[operands.src1].get_i32x4();
3923        let b = self.state[operands.src2].get_u32();
3924        self.state[operands.dst].set_i32x4(a.map(|a| a.wrapping_shr(b)));
3925        ControlFlow::Continue(())
3926    }
3927
3928    fn vshri64x2_s(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
3929        let a = self.state[operands.src1].get_i64x2();
3930        let b = self.state[operands.src2].get_u32();
3931        self.state[operands.dst].set_i64x2(a.map(|a| a.wrapping_shr(b)));
3932        ControlFlow::Continue(())
3933    }
3934
3935    fn vshri8x16_u(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
3936        let a = self.state[operands.src1].get_u8x16();
3937        let b = self.state[operands.src2].get_u32();
3938        self.state[operands.dst].set_u8x16(a.map(|a| a.wrapping_shr(b)));
3939        ControlFlow::Continue(())
3940    }
3941
3942    fn vshri16x8_u(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
3943        let a = self.state[operands.src1].get_u16x8();
3944        let b = self.state[operands.src2].get_u32();
3945        self.state[operands.dst].set_u16x8(a.map(|a| a.wrapping_shr(b)));
3946        ControlFlow::Continue(())
3947    }
3948
3949    fn vshri32x4_u(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
3950        let a = self.state[operands.src1].get_u32x4();
3951        let b = self.state[operands.src2].get_u32();
3952        self.state[operands.dst].set_u32x4(a.map(|a| a.wrapping_shr(b)));
3953        ControlFlow::Continue(())
3954    }
3955
3956    fn vshri64x2_u(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
3957        let a = self.state[operands.src1].get_u64x2();
3958        let b = self.state[operands.src2].get_u32();
3959        self.state[operands.dst].set_u64x2(a.map(|a| a.wrapping_shr(b)));
3960        ControlFlow::Continue(())
3961    }
3962
3963    fn vconst128(&mut self, dst: VReg, val: u128) -> ControlFlow<Done> {
3964        self.state[dst].set_u128(val);
3965        ControlFlow::Continue(())
3966    }
3967
3968    fn vsplatx8(&mut self, dst: VReg, src: XReg) -> ControlFlow<Done> {
3969        let val = self.state[src].get_u32() as u8;
3970        self.state[dst].set_u8x16([val; 16]);
3971        ControlFlow::Continue(())
3972    }
3973
3974    fn vsplatx16(&mut self, dst: VReg, src: XReg) -> ControlFlow<Done> {
3975        let val = self.state[src].get_u32() as u16;
3976        self.state[dst].set_u16x8([val; 8]);
3977        ControlFlow::Continue(())
3978    }
3979
3980    fn vsplatx32(&mut self, dst: VReg, src: XReg) -> ControlFlow<Done> {
3981        let val = self.state[src].get_u32();
3982        self.state[dst].set_u32x4([val; 4]);
3983        ControlFlow::Continue(())
3984    }
3985
3986    fn vsplatx64(&mut self, dst: VReg, src: XReg) -> ControlFlow<Done> {
3987        let val = self.state[src].get_u64();
3988        self.state[dst].set_u64x2([val; 2]);
3989        ControlFlow::Continue(())
3990    }
3991
3992    fn vsplatf32(&mut self, dst: VReg, src: FReg) -> ControlFlow<Done> {
3993        let val = self.state[src].get_f32();
3994        self.state[dst].set_f32x4([val; 4]);
3995        ControlFlow::Continue(())
3996    }
3997
3998    fn vsplatf64(&mut self, dst: VReg, src: FReg) -> ControlFlow<Done> {
3999        let val = self.state[src].get_f64();
4000        self.state[dst].set_f64x2([val; 2]);
4001        ControlFlow::Continue(())
4002    }
4003
4004    fn vload8x8_s_z(&mut self, dst: VReg, addr: AddrZ) -> ControlFlow<Done> {
4005        let val = unsafe { self.load_ne::<[i8; 8], crate::VLoad8x8SZ>(addr)? };
4006        self.state[dst].set_i16x8(val.map(|i| i.into()));
4007        ControlFlow::Continue(())
4008    }
4009
4010    fn vload8x8_u_z(&mut self, dst: VReg, addr: AddrZ) -> ControlFlow<Done> {
4011        let val = unsafe { self.load_ne::<[u8; 8], crate::VLoad8x8UZ>(addr)? };
4012        self.state[dst].set_u16x8(val.map(|i| i.into()));
4013        ControlFlow::Continue(())
4014    }
4015
4016    fn vload16x4le_s_z(&mut self, dst: VReg, addr: AddrZ) -> ControlFlow<Done> {
4017        let val = unsafe { self.load_ne::<[i16; 4], crate::VLoad16x4LeSZ>(addr)? };
4018        self.state[dst].set_i32x4(val.map(|i| i16::from_le(i).into()));
4019        ControlFlow::Continue(())
4020    }
4021
4022    fn vload16x4le_u_z(&mut self, dst: VReg, addr: AddrZ) -> ControlFlow<Done> {
4023        let val = unsafe { self.load_ne::<[u16; 4], crate::VLoad16x4LeUZ>(addr)? };
4024        self.state[dst].set_u32x4(val.map(|i| u16::from_le(i).into()));
4025        ControlFlow::Continue(())
4026    }
4027
4028    fn vload32x2le_s_z(&mut self, dst: VReg, addr: AddrZ) -> ControlFlow<Done> {
4029        let val = unsafe { self.load_ne::<[i32; 2], crate::VLoad32x2LeSZ>(addr)? };
4030        self.state[dst].set_i64x2(val.map(|i| i32::from_le(i).into()));
4031        ControlFlow::Continue(())
4032    }
4033
4034    fn vload32x2le_u_z(&mut self, dst: VReg, addr: AddrZ) -> ControlFlow<Done> {
4035        let val = unsafe { self.load_ne::<[u32; 2], crate::VLoad32x2LeUZ>(addr)? };
4036        self.state[dst].set_u64x2(val.map(|i| u32::from_le(i).into()));
4037        ControlFlow::Continue(())
4038    }
4039
4040    fn vband128(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4041        let a = self.state[operands.src1].get_u128();
4042        let b = self.state[operands.src2].get_u128();
4043        self.state[operands.dst].set_u128(a & b);
4044        ControlFlow::Continue(())
4045    }
4046
4047    fn vbor128(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4048        let a = self.state[operands.src1].get_u128();
4049        let b = self.state[operands.src2].get_u128();
4050        self.state[operands.dst].set_u128(a | b);
4051        ControlFlow::Continue(())
4052    }
4053
4054    fn vbxor128(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4055        let a = self.state[operands.src1].get_u128();
4056        let b = self.state[operands.src2].get_u128();
4057        self.state[operands.dst].set_u128(a ^ b);
4058        ControlFlow::Continue(())
4059    }
4060
4061    fn vbnot128(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4062        let a = self.state[src].get_u128();
4063        self.state[dst].set_u128(!a);
4064        ControlFlow::Continue(())
4065    }
4066
4067    fn vbitselect128(&mut self, dst: VReg, c: VReg, x: VReg, y: VReg) -> ControlFlow<Done> {
4068        let c = self.state[c].get_u128();
4069        let x = self.state[x].get_u128();
4070        let y = self.state[y].get_u128();
4071        self.state[dst].set_u128((c & x) | (!c & y));
4072        ControlFlow::Continue(())
4073    }
4074
4075    fn vbitmask8x16(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4076        let a = self.state[src].get_u8x16();
4077        let mut result = 0;
4078        for item in a.iter().rev() {
4079            result <<= 1;
4080            result |= (*item >> 7) as u32;
4081        }
4082        self.state[dst].set_u32(result);
4083        ControlFlow::Continue(())
4084    }
4085
4086    fn vbitmask16x8(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4087        let a = self.state[src].get_u16x8();
4088        let mut result = 0;
4089        for item in a.iter().rev() {
4090            result <<= 1;
4091            result |= (*item >> 15) as u32;
4092        }
4093        self.state[dst].set_u32(result);
4094        ControlFlow::Continue(())
4095    }
4096
4097    fn vbitmask32x4(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4098        let a = self.state[src].get_u32x4();
4099        let mut result = 0;
4100        for item in a.iter().rev() {
4101            result <<= 1;
4102            result |= *item >> 31;
4103        }
4104        self.state[dst].set_u32(result);
4105        ControlFlow::Continue(())
4106    }
4107
4108    fn vbitmask64x2(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4109        let a = self.state[src].get_u64x2();
4110        let mut result = 0;
4111        for item in a.iter().rev() {
4112            result <<= 1;
4113            result |= (*item >> 63) as u32;
4114        }
4115        self.state[dst].set_u32(result);
4116        ControlFlow::Continue(())
4117    }
4118
4119    fn valltrue8x16(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4120        let a = self.state[src].get_u8x16();
4121        let result = a.iter().all(|a| *a != 0);
4122        self.state[dst].set_u32(u32::from(result));
4123        ControlFlow::Continue(())
4124    }
4125
4126    fn valltrue16x8(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4127        let a = self.state[src].get_u16x8();
4128        let result = a.iter().all(|a| *a != 0);
4129        self.state[dst].set_u32(u32::from(result));
4130        ControlFlow::Continue(())
4131    }
4132
4133    fn valltrue32x4(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4134        let a = self.state[src].get_u32x4();
4135        let result = a.iter().all(|a| *a != 0);
4136        self.state[dst].set_u32(u32::from(result));
4137        ControlFlow::Continue(())
4138    }
4139
4140    fn valltrue64x2(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4141        let a = self.state[src].get_u64x2();
4142        let result = a.iter().all(|a| *a != 0);
4143        self.state[dst].set_u32(u32::from(result));
4144        ControlFlow::Continue(())
4145    }
4146
4147    fn vanytrue8x16(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4148        let a = self.state[src].get_u8x16();
4149        let result = a.iter().any(|a| *a != 0);
4150        self.state[dst].set_u32(u32::from(result));
4151        ControlFlow::Continue(())
4152    }
4153
4154    fn vanytrue16x8(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4155        let a = self.state[src].get_u16x8();
4156        let result = a.iter().any(|a| *a != 0);
4157        self.state[dst].set_u32(u32::from(result));
4158        ControlFlow::Continue(())
4159    }
4160
4161    fn vanytrue32x4(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4162        let a = self.state[src].get_u32x4();
4163        let result = a.iter().any(|a| *a != 0);
4164        self.state[dst].set_u32(u32::from(result));
4165        ControlFlow::Continue(())
4166    }
4167
4168    fn vanytrue64x2(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4169        let a = self.state[src].get_u64x2();
4170        let result = a.iter().any(|a| *a != 0);
4171        self.state[dst].set_u32(u32::from(result));
4172        ControlFlow::Continue(())
4173    }
4174
4175    fn vf32x4_from_i32x4_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4176        let a = self.state[src].get_i32x4();
4177        self.state[dst].set_f32x4(a.map(|i| i as f32));
4178        ControlFlow::Continue(())
4179    }
4180
4181    fn vf32x4_from_i32x4_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4182        let a = self.state[src].get_u32x4();
4183        self.state[dst].set_f32x4(a.map(|i| i as f32));
4184        ControlFlow::Continue(())
4185    }
4186
4187    fn vf64x2_from_i64x2_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4188        let a = self.state[src].get_i64x2();
4189        self.state[dst].set_f64x2(a.map(|i| i as f64));
4190        ControlFlow::Continue(())
4191    }
4192
4193    fn vf64x2_from_i64x2_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4194        let a = self.state[src].get_u64x2();
4195        self.state[dst].set_f64x2(a.map(|i| i as f64));
4196        ControlFlow::Continue(())
4197    }
4198
4199    fn vi32x4_from_f32x4_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4200        let a = self.state[src].get_f32x4();
4201        self.state[dst].set_i32x4(a.map(|f| f as i32));
4202        ControlFlow::Continue(())
4203    }
4204
4205    fn vi32x4_from_f32x4_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4206        let a = self.state[src].get_f32x4();
4207        self.state[dst].set_u32x4(a.map(|f| f as u32));
4208        ControlFlow::Continue(())
4209    }
4210
4211    fn vi64x2_from_f64x2_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4212        let a = self.state[src].get_f64x2();
4213        self.state[dst].set_i64x2(a.map(|f| f as i64));
4214        ControlFlow::Continue(())
4215    }
4216
4217    fn vi64x2_from_f64x2_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4218        let a = self.state[src].get_f64x2();
4219        self.state[dst].set_u64x2(a.map(|f| f as u64));
4220        ControlFlow::Continue(())
4221    }
4222
4223    fn vwidenlow8x16_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4224        let a = *self.state[src].get_i8x16().first_chunk().unwrap();
4225        self.state[dst].set_i16x8(a.map(|i| i.into()));
4226        ControlFlow::Continue(())
4227    }
4228
4229    fn vwidenlow8x16_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4230        let a = *self.state[src].get_u8x16().first_chunk().unwrap();
4231        self.state[dst].set_u16x8(a.map(|i| i.into()));
4232        ControlFlow::Continue(())
4233    }
4234
4235    fn vwidenlow16x8_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4236        let a = *self.state[src].get_i16x8().first_chunk().unwrap();
4237        self.state[dst].set_i32x4(a.map(|i| i.into()));
4238        ControlFlow::Continue(())
4239    }
4240
4241    fn vwidenlow16x8_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4242        let a = *self.state[src].get_u16x8().first_chunk().unwrap();
4243        self.state[dst].set_u32x4(a.map(|i| i.into()));
4244        ControlFlow::Continue(())
4245    }
4246
4247    fn vwidenlow32x4_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4248        let a = *self.state[src].get_i32x4().first_chunk().unwrap();
4249        self.state[dst].set_i64x2(a.map(|i| i.into()));
4250        ControlFlow::Continue(())
4251    }
4252
4253    fn vwidenlow32x4_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4254        let a = *self.state[src].get_u32x4().first_chunk().unwrap();
4255        self.state[dst].set_u64x2(a.map(|i| i.into()));
4256        ControlFlow::Continue(())
4257    }
4258
4259    fn vwidenhigh8x16_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4260        let a = *self.state[src].get_i8x16().last_chunk().unwrap();
4261        self.state[dst].set_i16x8(a.map(|i| i.into()));
4262        ControlFlow::Continue(())
4263    }
4264
4265    fn vwidenhigh8x16_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4266        let a = *self.state[src].get_u8x16().last_chunk().unwrap();
4267        self.state[dst].set_u16x8(a.map(|i| i.into()));
4268        ControlFlow::Continue(())
4269    }
4270
4271    fn vwidenhigh16x8_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4272        let a = *self.state[src].get_i16x8().last_chunk().unwrap();
4273        self.state[dst].set_i32x4(a.map(|i| i.into()));
4274        ControlFlow::Continue(())
4275    }
4276
4277    fn vwidenhigh16x8_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4278        let a = *self.state[src].get_u16x8().last_chunk().unwrap();
4279        self.state[dst].set_u32x4(a.map(|i| i.into()));
4280        ControlFlow::Continue(())
4281    }
4282
4283    fn vwidenhigh32x4_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4284        let a = *self.state[src].get_i32x4().last_chunk().unwrap();
4285        self.state[dst].set_i64x2(a.map(|i| i.into()));
4286        ControlFlow::Continue(())
4287    }
4288
4289    fn vwidenhigh32x4_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4290        let a = *self.state[src].get_u32x4().last_chunk().unwrap();
4291        self.state[dst].set_u64x2(a.map(|i| i.into()));
4292        ControlFlow::Continue(())
4293    }
4294
4295    fn vnarrow16x8_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4296        let a = self.state[operands.src1].get_i16x8();
4297        let b = self.state[operands.src2].get_i16x8();
4298        let mut result = [0; 16];
4299        for (i, d) in a.iter().chain(&b).zip(&mut result) {
4300            *d = (*i)
4301                .try_into()
4302                .unwrap_or(if *i < 0 { i8::MIN } else { i8::MAX });
4303        }
4304        self.state[operands.dst].set_i8x16(result);
4305        ControlFlow::Continue(())
4306    }
4307
4308    fn vnarrow16x8_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4309        let a = self.state[operands.src1].get_i16x8();
4310        let b = self.state[operands.src2].get_i16x8();
4311        let mut result = [0; 16];
4312        for (i, d) in a.iter().chain(&b).zip(&mut result) {
4313            *d = (*i)
4314                .try_into()
4315                .unwrap_or(if *i < 0 { u8::MIN } else { u8::MAX });
4316        }
4317        self.state[operands.dst].set_u8x16(result);
4318        ControlFlow::Continue(())
4319    }
4320
4321    fn vnarrow32x4_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4322        let a = self.state[operands.src1].get_i32x4();
4323        let b = self.state[operands.src2].get_i32x4();
4324        let mut result = [0; 8];
4325        for (i, d) in a.iter().chain(&b).zip(&mut result) {
4326            *d = (*i)
4327                .try_into()
4328                .unwrap_or(if *i < 0 { i16::MIN } else { i16::MAX });
4329        }
4330        self.state[operands.dst].set_i16x8(result);
4331        ControlFlow::Continue(())
4332    }
4333
4334    fn vnarrow32x4_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4335        let a = self.state[operands.src1].get_i32x4();
4336        let b = self.state[operands.src2].get_i32x4();
4337        let mut result = [0; 8];
4338        for (i, d) in a.iter().chain(&b).zip(&mut result) {
4339            *d = (*i)
4340                .try_into()
4341                .unwrap_or(if *i < 0 { u16::MIN } else { u16::MAX });
4342        }
4343        self.state[operands.dst].set_u16x8(result);
4344        ControlFlow::Continue(())
4345    }
4346
4347    fn vnarrow64x2_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4348        let a = self.state[operands.src1].get_i64x2();
4349        let b = self.state[operands.src2].get_i64x2();
4350        let mut result = [0; 4];
4351        for (i, d) in a.iter().chain(&b).zip(&mut result) {
4352            *d = (*i)
4353                .try_into()
4354                .unwrap_or(if *i < 0 { i32::MIN } else { i32::MAX });
4355        }
4356        self.state[operands.dst].set_i32x4(result);
4357        ControlFlow::Continue(())
4358    }
4359
4360    fn vnarrow64x2_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4361        let a = self.state[operands.src1].get_i64x2();
4362        let b = self.state[operands.src2].get_i64x2();
4363        let mut result = [0; 4];
4364        for (i, d) in a.iter().chain(&b).zip(&mut result) {
4365            *d = (*i)
4366                .try_into()
4367                .unwrap_or(if *i < 0 { u32::MIN } else { u32::MAX });
4368        }
4369        self.state[operands.dst].set_u32x4(result);
4370        ControlFlow::Continue(())
4371    }
4372
4373    fn vunarrow64x2_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4374        let a = self.state[operands.src1].get_u64x2();
4375        let b = self.state[operands.src2].get_u64x2();
4376        let mut result = [0; 4];
4377        for (i, d) in a.iter().chain(&b).zip(&mut result) {
4378            *d = (*i).try_into().unwrap_or(u32::MAX);
4379        }
4380        self.state[operands.dst].set_u32x4(result);
4381        ControlFlow::Continue(())
4382    }
4383
4384    fn vfpromotelow(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4385        let a = self.state[src].get_f32x4();
4386        self.state[dst].set_f64x2([a[0].into(), a[1].into()]);
4387        ControlFlow::Continue(())
4388    }
4389
4390    fn vfdemote(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4391        let a = self.state[src].get_f64x2();
4392        self.state[dst].set_f32x4([a[0] as f32, a[1] as f32, 0.0, 0.0]);
4393        ControlFlow::Continue(())
4394    }
4395
4396    fn vsubi8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4397        let mut a = self.state[operands.src1].get_i8x16();
4398        let b = self.state[operands.src2].get_i8x16();
4399        for (a, b) in a.iter_mut().zip(b) {
4400            *a = a.wrapping_sub(b);
4401        }
4402        self.state[operands.dst].set_i8x16(a);
4403        ControlFlow::Continue(())
4404    }
4405
4406    fn vsubi16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4407        let mut a = self.state[operands.src1].get_i16x8();
4408        let b = self.state[operands.src2].get_i16x8();
4409        for (a, b) in a.iter_mut().zip(b) {
4410            *a = a.wrapping_sub(b);
4411        }
4412        self.state[operands.dst].set_i16x8(a);
4413        ControlFlow::Continue(())
4414    }
4415
4416    fn vsubi32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4417        let mut a = self.state[operands.src1].get_i32x4();
4418        let b = self.state[operands.src2].get_i32x4();
4419        for (a, b) in a.iter_mut().zip(b) {
4420            *a = a.wrapping_sub(b);
4421        }
4422        self.state[operands.dst].set_i32x4(a);
4423        ControlFlow::Continue(())
4424    }
4425
4426    fn vsubi64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4427        let mut a = self.state[operands.src1].get_i64x2();
4428        let b = self.state[operands.src2].get_i64x2();
4429        for (a, b) in a.iter_mut().zip(b) {
4430            *a = a.wrapping_sub(b);
4431        }
4432        self.state[operands.dst].set_i64x2(a);
4433        ControlFlow::Continue(())
4434    }
4435
4436    fn vsubi8x16_sat(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4437        let mut a = self.state[operands.src1].get_i8x16();
4438        let b = self.state[operands.src2].get_i8x16();
4439        for (a, b) in a.iter_mut().zip(b) {
4440            *a = a.saturating_sub(b);
4441        }
4442        self.state[operands.dst].set_i8x16(a);
4443        ControlFlow::Continue(())
4444    }
4445
4446    fn vsubu8x16_sat(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4447        let mut a = self.state[operands.src1].get_u8x16();
4448        let b = self.state[operands.src2].get_u8x16();
4449        for (a, b) in a.iter_mut().zip(b) {
4450            *a = a.saturating_sub(b);
4451        }
4452        self.state[operands.dst].set_u8x16(a);
4453        ControlFlow::Continue(())
4454    }
4455
4456    fn vsubi16x8_sat(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4457        let mut a = self.state[operands.src1].get_i16x8();
4458        let b = self.state[operands.src2].get_i16x8();
4459        for (a, b) in a.iter_mut().zip(b) {
4460            *a = a.saturating_sub(b);
4461        }
4462        self.state[operands.dst].set_i16x8(a);
4463        ControlFlow::Continue(())
4464    }
4465
4466    fn vsubu16x8_sat(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4467        let mut a = self.state[operands.src1].get_u16x8();
4468        let b = self.state[operands.src2].get_u16x8();
4469        for (a, b) in a.iter_mut().zip(b) {
4470            *a = a.saturating_sub(b);
4471        }
4472        self.state[operands.dst].set_u16x8(a);
4473        ControlFlow::Continue(())
4474    }
4475
4476    fn vsubf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4477        let mut a = self.state[operands.src1].get_f64x2();
4478        let b = self.state[operands.src2].get_f64x2();
4479        for (a, b) in a.iter_mut().zip(b) {
4480            *a = *a - b;
4481        }
4482        self.state[operands.dst].set_f64x2(a);
4483        ControlFlow::Continue(())
4484    }
4485
4486    fn vmuli8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4487        let mut a = self.state[operands.src1].get_i8x16();
4488        let b = self.state[operands.src2].get_i8x16();
4489        for (a, b) in a.iter_mut().zip(b) {
4490            *a = a.wrapping_mul(b);
4491        }
4492        self.state[operands.dst].set_i8x16(a);
4493        ControlFlow::Continue(())
4494    }
4495
4496    fn vmuli16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4497        let mut a = self.state[operands.src1].get_i16x8();
4498        let b = self.state[operands.src2].get_i16x8();
4499        for (a, b) in a.iter_mut().zip(b) {
4500            *a = a.wrapping_mul(b);
4501        }
4502        self.state[operands.dst].set_i16x8(a);
4503        ControlFlow::Continue(())
4504    }
4505
4506    fn vmuli32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4507        let mut a = self.state[operands.src1].get_i32x4();
4508        let b = self.state[operands.src2].get_i32x4();
4509        for (a, b) in a.iter_mut().zip(b) {
4510            *a = a.wrapping_mul(b);
4511        }
4512        self.state[operands.dst].set_i32x4(a);
4513        ControlFlow::Continue(())
4514    }
4515
4516    fn vmuli64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4517        let mut a = self.state[operands.src1].get_i64x2();
4518        let b = self.state[operands.src2].get_i64x2();
4519        for (a, b) in a.iter_mut().zip(b) {
4520            *a = a.wrapping_mul(b);
4521        }
4522        self.state[operands.dst].set_i64x2(a);
4523        ControlFlow::Continue(())
4524    }
4525
4526    fn vmulf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4527        let mut a = self.state[operands.src1].get_f64x2();
4528        let b = self.state[operands.src2].get_f64x2();
4529        for (a, b) in a.iter_mut().zip(b) {
4530            *a = *a * b;
4531        }
4532        self.state[operands.dst].set_f64x2(a);
4533        ControlFlow::Continue(())
4534    }
4535
4536    fn vqmulrsi16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4537        let mut a = self.state[operands.src1].get_i16x8();
4538        let b = self.state[operands.src2].get_i16x8();
4539        const MIN: i32 = i16::MIN as i32;
4540        const MAX: i32 = i16::MAX as i32;
4541        for (a, b) in a.iter_mut().zip(b) {
4542            let r = (i32::from(*a) * i32::from(b) + (1 << 14)) >> 15;
4543            *a = r.clamp(MIN, MAX) as i16;
4544        }
4545        self.state[operands.dst].set_i16x8(a);
4546        ControlFlow::Continue(())
4547    }
4548
4549    fn vpopcnt8x16(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4550        let a = self.state[src].get_u8x16();
4551        self.state[dst].set_u8x16(a.map(|i| i.count_ones() as u8));
4552        ControlFlow::Continue(())
4553    }
4554
4555    fn xextractv8x16(&mut self, dst: XReg, src: VReg, lane: u8) -> ControlFlow<Done> {
4556        let a = unsafe { *self.state[src].get_u8x16().get_unchecked(usize::from(lane)) };
4557        self.state[dst].set_u32(u32::from(a));
4558        ControlFlow::Continue(())
4559    }
4560
4561    fn xextractv16x8(&mut self, dst: XReg, src: VReg, lane: u8) -> ControlFlow<Done> {
4562        let a = unsafe { *self.state[src].get_u16x8().get_unchecked(usize::from(lane)) };
4563        self.state[dst].set_u32(u32::from(a));
4564        ControlFlow::Continue(())
4565    }
4566
4567    fn xextractv32x4(&mut self, dst: XReg, src: VReg, lane: u8) -> ControlFlow<Done> {
4568        let a = unsafe { *self.state[src].get_u32x4().get_unchecked(usize::from(lane)) };
4569        self.state[dst].set_u32(a);
4570        ControlFlow::Continue(())
4571    }
4572
4573    fn xextractv64x2(&mut self, dst: XReg, src: VReg, lane: u8) -> ControlFlow<Done> {
4574        let a = unsafe { *self.state[src].get_u64x2().get_unchecked(usize::from(lane)) };
4575        self.state[dst].set_u64(a);
4576        ControlFlow::Continue(())
4577    }
4578
4579    fn fextractv32x4(&mut self, dst: FReg, src: VReg, lane: u8) -> ControlFlow<Done> {
4580        let a = unsafe { *self.state[src].get_f32x4().get_unchecked(usize::from(lane)) };
4581        self.state[dst].set_f32(a);
4582        ControlFlow::Continue(())
4583    }
4584
4585    fn fextractv64x2(&mut self, dst: FReg, src: VReg, lane: u8) -> ControlFlow<Done> {
4586        let a = unsafe { *self.state[src].get_f64x2().get_unchecked(usize::from(lane)) };
4587        self.state[dst].set_f64(a);
4588        ControlFlow::Continue(())
4589    }
4590
4591    fn vinsertx8(
4592        &mut self,
4593        operands: BinaryOperands<VReg, VReg, XReg>,
4594        lane: u8,
4595    ) -> ControlFlow<Done> {
4596        let mut a = self.state[operands.src1].get_u8x16();
4597        let b = self.state[operands.src2].get_u32() as u8;
4598        unsafe {
4599            *a.get_unchecked_mut(usize::from(lane)) = b;
4600        }
4601        self.state[operands.dst].set_u8x16(a);
4602        ControlFlow::Continue(())
4603    }
4604
4605    fn vinsertx16(
4606        &mut self,
4607        operands: BinaryOperands<VReg, VReg, XReg>,
4608        lane: u8,
4609    ) -> ControlFlow<Done> {
4610        let mut a = self.state[operands.src1].get_u16x8();
4611        let b = self.state[operands.src2].get_u32() as u16;
4612        unsafe {
4613            *a.get_unchecked_mut(usize::from(lane)) = b;
4614        }
4615        self.state[operands.dst].set_u16x8(a);
4616        ControlFlow::Continue(())
4617    }
4618
4619    fn vinsertx32(
4620        &mut self,
4621        operands: BinaryOperands<VReg, VReg, XReg>,
4622        lane: u8,
4623    ) -> ControlFlow<Done> {
4624        let mut a = self.state[operands.src1].get_u32x4();
4625        let b = self.state[operands.src2].get_u32();
4626        unsafe {
4627            *a.get_unchecked_mut(usize::from(lane)) = b;
4628        }
4629        self.state[operands.dst].set_u32x4(a);
4630        ControlFlow::Continue(())
4631    }
4632
4633    fn vinsertx64(
4634        &mut self,
4635        operands: BinaryOperands<VReg, VReg, XReg>,
4636        lane: u8,
4637    ) -> ControlFlow<Done> {
4638        let mut a = self.state[operands.src1].get_u64x2();
4639        let b = self.state[operands.src2].get_u64();
4640        unsafe {
4641            *a.get_unchecked_mut(usize::from(lane)) = b;
4642        }
4643        self.state[operands.dst].set_u64x2(a);
4644        ControlFlow::Continue(())
4645    }
4646
4647    fn vinsertf32(
4648        &mut self,
4649        operands: BinaryOperands<VReg, VReg, FReg>,
4650        lane: u8,
4651    ) -> ControlFlow<Done> {
4652        let mut a = self.state[operands.src1].get_f32x4();
4653        let b = self.state[operands.src2].get_f32();
4654        unsafe {
4655            *a.get_unchecked_mut(usize::from(lane)) = b;
4656        }
4657        self.state[operands.dst].set_f32x4(a);
4658        ControlFlow::Continue(())
4659    }
4660
4661    fn vinsertf64(
4662        &mut self,
4663        operands: BinaryOperands<VReg, VReg, FReg>,
4664        lane: u8,
4665    ) -> ControlFlow<Done> {
4666        let mut a = self.state[operands.src1].get_f64x2();
4667        let b = self.state[operands.src2].get_f64();
4668        unsafe {
4669            *a.get_unchecked_mut(usize::from(lane)) = b;
4670        }
4671        self.state[operands.dst].set_f64x2(a);
4672        ControlFlow::Continue(())
4673    }
4674
4675    fn veq8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4676        let a = self.state[operands.src1].get_u8x16();
4677        let b = self.state[operands.src2].get_u8x16();
4678        let mut c = [0; 16];
4679        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4680            *c = if a == b { u8::MAX } else { 0 };
4681        }
4682        self.state[operands.dst].set_u8x16(c);
4683        ControlFlow::Continue(())
4684    }
4685
4686    fn vneq8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4687        let a = self.state[operands.src1].get_u8x16();
4688        let b = self.state[operands.src2].get_u8x16();
4689        let mut c = [0; 16];
4690        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4691            *c = if a != b { u8::MAX } else { 0 };
4692        }
4693        self.state[operands.dst].set_u8x16(c);
4694        ControlFlow::Continue(())
4695    }
4696
4697    fn vslt8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4698        let a = self.state[operands.src1].get_i8x16();
4699        let b = self.state[operands.src2].get_i8x16();
4700        let mut c = [0; 16];
4701        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4702            *c = if a < b { u8::MAX } else { 0 };
4703        }
4704        self.state[operands.dst].set_u8x16(c);
4705        ControlFlow::Continue(())
4706    }
4707
4708    fn vslteq8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4709        let a = self.state[operands.src1].get_i8x16();
4710        let b = self.state[operands.src2].get_i8x16();
4711        let mut c = [0; 16];
4712        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4713            *c = if a <= b { u8::MAX } else { 0 };
4714        }
4715        self.state[operands.dst].set_u8x16(c);
4716        ControlFlow::Continue(())
4717    }
4718
4719    fn vult8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4720        let a = self.state[operands.src1].get_u8x16();
4721        let b = self.state[operands.src2].get_u8x16();
4722        let mut c = [0; 16];
4723        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4724            *c = if a < b { u8::MAX } else { 0 };
4725        }
4726        self.state[operands.dst].set_u8x16(c);
4727        ControlFlow::Continue(())
4728    }
4729
4730    fn vulteq8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4731        let a = self.state[operands.src1].get_u8x16();
4732        let b = self.state[operands.src2].get_u8x16();
4733        let mut c = [0; 16];
4734        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4735            *c = if a <= b { u8::MAX } else { 0 };
4736        }
4737        self.state[operands.dst].set_u8x16(c);
4738        ControlFlow::Continue(())
4739    }
4740
4741    fn veq16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4742        let a = self.state[operands.src1].get_u16x8();
4743        let b = self.state[operands.src2].get_u16x8();
4744        let mut c = [0; 8];
4745        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4746            *c = if a == b { u16::MAX } else { 0 };
4747        }
4748        self.state[operands.dst].set_u16x8(c);
4749        ControlFlow::Continue(())
4750    }
4751
4752    fn vneq16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4753        let a = self.state[operands.src1].get_u16x8();
4754        let b = self.state[operands.src2].get_u16x8();
4755        let mut c = [0; 8];
4756        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4757            *c = if a != b { u16::MAX } else { 0 };
4758        }
4759        self.state[operands.dst].set_u16x8(c);
4760        ControlFlow::Continue(())
4761    }
4762
4763    fn vslt16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4764        let a = self.state[operands.src1].get_i16x8();
4765        let b = self.state[operands.src2].get_i16x8();
4766        let mut c = [0; 8];
4767        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4768            *c = if a < b { u16::MAX } else { 0 };
4769        }
4770        self.state[operands.dst].set_u16x8(c);
4771        ControlFlow::Continue(())
4772    }
4773
4774    fn vslteq16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4775        let a = self.state[operands.src1].get_i16x8();
4776        let b = self.state[operands.src2].get_i16x8();
4777        let mut c = [0; 8];
4778        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4779            *c = if a <= b { u16::MAX } else { 0 };
4780        }
4781        self.state[operands.dst].set_u16x8(c);
4782        ControlFlow::Continue(())
4783    }
4784
4785    fn vult16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4786        let a = self.state[operands.src1].get_u16x8();
4787        let b = self.state[operands.src2].get_u16x8();
4788        let mut c = [0; 8];
4789        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4790            *c = if a < b { u16::MAX } else { 0 };
4791        }
4792        self.state[operands.dst].set_u16x8(c);
4793        ControlFlow::Continue(())
4794    }
4795
4796    fn vulteq16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4797        let a = self.state[operands.src1].get_u16x8();
4798        let b = self.state[operands.src2].get_u16x8();
4799        let mut c = [0; 8];
4800        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4801            *c = if a <= b { u16::MAX } else { 0 };
4802        }
4803        self.state[operands.dst].set_u16x8(c);
4804        ControlFlow::Continue(())
4805    }
4806
4807    fn veq32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4808        let a = self.state[operands.src1].get_u32x4();
4809        let b = self.state[operands.src2].get_u32x4();
4810        let mut c = [0; 4];
4811        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4812            *c = if a == b { u32::MAX } else { 0 };
4813        }
4814        self.state[operands.dst].set_u32x4(c);
4815        ControlFlow::Continue(())
4816    }
4817
4818    fn vneq32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4819        let a = self.state[operands.src1].get_u32x4();
4820        let b = self.state[operands.src2].get_u32x4();
4821        let mut c = [0; 4];
4822        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4823            *c = if a != b { u32::MAX } else { 0 };
4824        }
4825        self.state[operands.dst].set_u32x4(c);
4826        ControlFlow::Continue(())
4827    }
4828
4829    fn vslt32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4830        let a = self.state[operands.src1].get_i32x4();
4831        let b = self.state[operands.src2].get_i32x4();
4832        let mut c = [0; 4];
4833        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4834            *c = if a < b { u32::MAX } else { 0 };
4835        }
4836        self.state[operands.dst].set_u32x4(c);
4837        ControlFlow::Continue(())
4838    }
4839
4840    fn vslteq32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4841        let a = self.state[operands.src1].get_i32x4();
4842        let b = self.state[operands.src2].get_i32x4();
4843        let mut c = [0; 4];
4844        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4845            *c = if a <= b { u32::MAX } else { 0 };
4846        }
4847        self.state[operands.dst].set_u32x4(c);
4848        ControlFlow::Continue(())
4849    }
4850
4851    fn vult32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4852        let a = self.state[operands.src1].get_u32x4();
4853        let b = self.state[operands.src2].get_u32x4();
4854        let mut c = [0; 4];
4855        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4856            *c = if a < b { u32::MAX } else { 0 };
4857        }
4858        self.state[operands.dst].set_u32x4(c);
4859        ControlFlow::Continue(())
4860    }
4861
4862    fn vulteq32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4863        let a = self.state[operands.src1].get_u32x4();
4864        let b = self.state[operands.src2].get_u32x4();
4865        let mut c = [0; 4];
4866        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4867            *c = if a <= b { u32::MAX } else { 0 };
4868        }
4869        self.state[operands.dst].set_u32x4(c);
4870        ControlFlow::Continue(())
4871    }
4872
4873    fn veq64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4874        let a = self.state[operands.src1].get_u64x2();
4875        let b = self.state[operands.src2].get_u64x2();
4876        let mut c = [0; 2];
4877        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4878            *c = if a == b { u64::MAX } else { 0 };
4879        }
4880        self.state[operands.dst].set_u64x2(c);
4881        ControlFlow::Continue(())
4882    }
4883
4884    fn vneq64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4885        let a = self.state[operands.src1].get_u64x2();
4886        let b = self.state[operands.src2].get_u64x2();
4887        let mut c = [0; 2];
4888        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4889            *c = if a != b { u64::MAX } else { 0 };
4890        }
4891        self.state[operands.dst].set_u64x2(c);
4892        ControlFlow::Continue(())
4893    }
4894
4895    fn vslt64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4896        let a = self.state[operands.src1].get_i64x2();
4897        let b = self.state[operands.src2].get_i64x2();
4898        let mut c = [0; 2];
4899        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4900            *c = if a < b { u64::MAX } else { 0 };
4901        }
4902        self.state[operands.dst].set_u64x2(c);
4903        ControlFlow::Continue(())
4904    }
4905
4906    fn vslteq64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4907        let a = self.state[operands.src1].get_i64x2();
4908        let b = self.state[operands.src2].get_i64x2();
4909        let mut c = [0; 2];
4910        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4911            *c = if a <= b { u64::MAX } else { 0 };
4912        }
4913        self.state[operands.dst].set_u64x2(c);
4914        ControlFlow::Continue(())
4915    }
4916
4917    fn vult64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4918        let a = self.state[operands.src1].get_u64x2();
4919        let b = self.state[operands.src2].get_u64x2();
4920        let mut c = [0; 2];
4921        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4922            *c = if a < b { u64::MAX } else { 0 };
4923        }
4924        self.state[operands.dst].set_u64x2(c);
4925        ControlFlow::Continue(())
4926    }
4927
4928    fn vulteq64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4929        let a = self.state[operands.src1].get_u64x2();
4930        let b = self.state[operands.src2].get_u64x2();
4931        let mut c = [0; 2];
4932        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4933            *c = if a <= b { u64::MAX } else { 0 };
4934        }
4935        self.state[operands.dst].set_u64x2(c);
4936        ControlFlow::Continue(())
4937    }
4938
4939    fn vneg8x16(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4940        let a = self.state[src].get_i8x16();
4941        self.state[dst].set_i8x16(a.map(|i| i.wrapping_neg()));
4942        ControlFlow::Continue(())
4943    }
4944
4945    fn vneg16x8(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4946        let a = self.state[src].get_i16x8();
4947        self.state[dst].set_i16x8(a.map(|i| i.wrapping_neg()));
4948        ControlFlow::Continue(())
4949    }
4950
4951    fn vneg32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4952        let a = self.state[src].get_i32x4();
4953        self.state[dst].set_i32x4(a.map(|i| i.wrapping_neg()));
4954        ControlFlow::Continue(())
4955    }
4956
4957    fn vneg64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4958        let a = self.state[src].get_i64x2();
4959        self.state[dst].set_i64x2(a.map(|i| i.wrapping_neg()));
4960        ControlFlow::Continue(())
4961    }
4962
4963    fn vnegf64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4964        let a = self.state[src].get_f64x2();
4965        self.state[dst].set_f64x2(a.map(|i| -i));
4966        ControlFlow::Continue(())
4967    }
4968
4969    fn vmin8x16_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4970        let mut a = self.state[operands.src1].get_i8x16();
4971        let b = self.state[operands.src2].get_i8x16();
4972        for (a, b) in a.iter_mut().zip(&b) {
4973            *a = (*a).min(*b);
4974        }
4975        self.state[operands.dst].set_i8x16(a);
4976        ControlFlow::Continue(())
4977    }
4978
4979    fn vmin8x16_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4980        let mut a = self.state[operands.src1].get_u8x16();
4981        let b = self.state[operands.src2].get_u8x16();
4982        for (a, b) in a.iter_mut().zip(&b) {
4983            *a = (*a).min(*b);
4984        }
4985        self.state[operands.dst].set_u8x16(a);
4986        ControlFlow::Continue(())
4987    }
4988
4989    fn vmin16x8_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4990        let mut a = self.state[operands.src1].get_i16x8();
4991        let b = self.state[operands.src2].get_i16x8();
4992        for (a, b) in a.iter_mut().zip(&b) {
4993            *a = (*a).min(*b);
4994        }
4995        self.state[operands.dst].set_i16x8(a);
4996        ControlFlow::Continue(())
4997    }
4998
4999    fn vmin16x8_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5000        let mut a = self.state[operands.src1].get_u16x8();
5001        let b = self.state[operands.src2].get_u16x8();
5002        for (a, b) in a.iter_mut().zip(&b) {
5003            *a = (*a).min(*b);
5004        }
5005        self.state[operands.dst].set_u16x8(a);
5006        ControlFlow::Continue(())
5007    }
5008
5009    fn vmin32x4_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5010        let mut a = self.state[operands.src1].get_i32x4();
5011        let b = self.state[operands.src2].get_i32x4();
5012        for (a, b) in a.iter_mut().zip(&b) {
5013            *a = (*a).min(*b);
5014        }
5015        self.state[operands.dst].set_i32x4(a);
5016        ControlFlow::Continue(())
5017    }
5018
5019    fn vmin32x4_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5020        let mut a = self.state[operands.src1].get_u32x4();
5021        let b = self.state[operands.src2].get_u32x4();
5022        for (a, b) in a.iter_mut().zip(&b) {
5023            *a = (*a).min(*b);
5024        }
5025        self.state[operands.dst].set_u32x4(a);
5026        ControlFlow::Continue(())
5027    }
5028
5029    fn vmax8x16_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5030        let mut a = self.state[operands.src1].get_i8x16();
5031        let b = self.state[operands.src2].get_i8x16();
5032        for (a, b) in a.iter_mut().zip(&b) {
5033            *a = (*a).max(*b);
5034        }
5035        self.state[operands.dst].set_i8x16(a);
5036        ControlFlow::Continue(())
5037    }
5038
5039    fn vmax8x16_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5040        let mut a = self.state[operands.src1].get_u8x16();
5041        let b = self.state[operands.src2].get_u8x16();
5042        for (a, b) in a.iter_mut().zip(&b) {
5043            *a = (*a).max(*b);
5044        }
5045        self.state[operands.dst].set_u8x16(a);
5046        ControlFlow::Continue(())
5047    }
5048
5049    fn vmax16x8_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5050        let mut a = self.state[operands.src1].get_i16x8();
5051        let b = self.state[operands.src2].get_i16x8();
5052        for (a, b) in a.iter_mut().zip(&b) {
5053            *a = (*a).max(*b);
5054        }
5055        self.state[operands.dst].set_i16x8(a);
5056        ControlFlow::Continue(())
5057    }
5058
5059    fn vmax16x8_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5060        let mut a = self.state[operands.src1].get_u16x8();
5061        let b = self.state[operands.src2].get_u16x8();
5062        for (a, b) in a.iter_mut().zip(&b) {
5063            *a = (*a).max(*b);
5064        }
5065        self.state[operands.dst].set_u16x8(a);
5066        ControlFlow::Continue(())
5067    }
5068
5069    fn vmax32x4_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5070        let mut a = self.state[operands.src1].get_i32x4();
5071        let b = self.state[operands.src2].get_i32x4();
5072        for (a, b) in a.iter_mut().zip(&b) {
5073            *a = (*a).max(*b);
5074        }
5075        self.state[operands.dst].set_i32x4(a);
5076        ControlFlow::Continue(())
5077    }
5078
5079    fn vmax32x4_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5080        let mut a = self.state[operands.src1].get_u32x4();
5081        let b = self.state[operands.src2].get_u32x4();
5082        for (a, b) in a.iter_mut().zip(&b) {
5083            *a = (*a).max(*b);
5084        }
5085        self.state[operands.dst].set_u32x4(a);
5086        ControlFlow::Continue(())
5087    }
5088
5089    fn vabs8x16(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
5090        let a = self.state[src].get_i8x16();
5091        self.state[dst].set_i8x16(a.map(|i| i.wrapping_abs()));
5092        ControlFlow::Continue(())
5093    }
5094
5095    fn vabs16x8(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
5096        let a = self.state[src].get_i16x8();
5097        self.state[dst].set_i16x8(a.map(|i| i.wrapping_abs()));
5098        ControlFlow::Continue(())
5099    }
5100
5101    fn vabs32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
5102        let a = self.state[src].get_i32x4();
5103        self.state[dst].set_i32x4(a.map(|i| i.wrapping_abs()));
5104        ControlFlow::Continue(())
5105    }
5106
5107    fn vabs64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
5108        let a = self.state[src].get_i64x2();
5109        self.state[dst].set_i64x2(a.map(|i| i.wrapping_abs()));
5110        ControlFlow::Continue(())
5111    }
5112
5113    fn vabsf32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
5114        let a = self.state[src].get_f32x4();
5115        self.state[dst].set_f32x4(a.map(|i| i.wasm_abs()));
5116        ControlFlow::Continue(())
5117    }
5118
5119    fn vabsf64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
5120        let a = self.state[src].get_f64x2();
5121        self.state[dst].set_f64x2(a.map(|i| i.wasm_abs()));
5122        ControlFlow::Continue(())
5123    }
5124
5125    fn vmaximumf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5126        let mut a = self.state[operands.src1].get_f32x4();
5127        let b = self.state[operands.src2].get_f32x4();
5128        for (a, b) in a.iter_mut().zip(&b) {
5129            *a = a.wasm_maximum(*b);
5130        }
5131        self.state[operands.dst].set_f32x4(a);
5132        ControlFlow::Continue(())
5133    }
5134
5135    fn vmaximumf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5136        let mut a = self.state[operands.src1].get_f64x2();
5137        let b = self.state[operands.src2].get_f64x2();
5138        for (a, b) in a.iter_mut().zip(&b) {
5139            *a = a.wasm_maximum(*b);
5140        }
5141        self.state[operands.dst].set_f64x2(a);
5142        ControlFlow::Continue(())
5143    }
5144
5145    fn vminimumf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5146        let mut a = self.state[operands.src1].get_f32x4();
5147        let b = self.state[operands.src2].get_f32x4();
5148        for (a, b) in a.iter_mut().zip(&b) {
5149            *a = a.wasm_minimum(*b);
5150        }
5151        self.state[operands.dst].set_f32x4(a);
5152        ControlFlow::Continue(())
5153    }
5154
5155    fn vminimumf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5156        let mut a = self.state[operands.src1].get_f64x2();
5157        let b = self.state[operands.src2].get_f64x2();
5158        for (a, b) in a.iter_mut().zip(&b) {
5159            *a = a.wasm_minimum(*b);
5160        }
5161        self.state[operands.dst].set_f64x2(a);
5162        ControlFlow::Continue(())
5163    }
5164
5165    fn vshuffle(&mut self, dst: VReg, src1: VReg, src2: VReg, mask: u128) -> ControlFlow<Done> {
5166        let a = self.state[src1].get_u8x16();
5167        let b = self.state[src2].get_u8x16();
5168        let result = mask.to_le_bytes().map(|m| {
5169            if m < 16 {
5170                a[m as usize]
5171            } else {
5172                b[m as usize - 16]
5173            }
5174        });
5175        self.state[dst].set_u8x16(result);
5176        ControlFlow::Continue(())
5177    }
5178
5179    fn vswizzlei8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5180        let src1 = self.state[operands.src1].get_i8x16();
5181        let src2 = self.state[operands.src2].get_i8x16();
5182        let mut dst = [0i8; 16];
5183        for (i, &idx) in src2.iter().enumerate() {
5184            if (idx as usize) < 16 {
5185                dst[i] = src1[idx as usize];
5186            } else {
5187                dst[i] = 0
5188            }
5189        }
5190        self.state[operands.dst].set_i8x16(dst);
5191        ControlFlow::Continue(())
5192    }
5193
5194    fn vavground8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5195        let mut a = self.state[operands.src1].get_u8x16();
5196        let b = self.state[operands.src2].get_u8x16();
5197        for (a, b) in a.iter_mut().zip(&b) {
5198            // use wider precision to avoid overflow
5199            *a = ((u32::from(*a) + u32::from(*b) + 1) / 2) as u8;
5200        }
5201        self.state[operands.dst].set_u8x16(a);
5202        ControlFlow::Continue(())
5203    }
5204
5205    fn vavground16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5206        let mut a = self.state[operands.src1].get_u16x8();
5207        let b = self.state[operands.src2].get_u16x8();
5208        for (a, b) in a.iter_mut().zip(&b) {
5209            // use wider precision to avoid overflow
5210            *a = ((u32::from(*a) + u32::from(*b) + 1) / 2) as u16;
5211        }
5212        self.state[operands.dst].set_u16x8(a);
5213        ControlFlow::Continue(())
5214    }
5215
5216    fn veqf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5217        let a = self.state[operands.src1].get_f32x4();
5218        let b = self.state[operands.src2].get_f32x4();
5219        let mut c = [0; 4];
5220        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5221            *c = if a == b { u32::MAX } else { 0 };
5222        }
5223        self.state[operands.dst].set_u32x4(c);
5224        ControlFlow::Continue(())
5225    }
5226
5227    fn vneqf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5228        let a = self.state[operands.src1].get_f32x4();
5229        let b = self.state[operands.src2].get_f32x4();
5230        let mut c = [0; 4];
5231        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5232            *c = if a != b { u32::MAX } else { 0 };
5233        }
5234        self.state[operands.dst].set_u32x4(c);
5235        ControlFlow::Continue(())
5236    }
5237
5238    fn vltf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5239        let a = self.state[operands.src1].get_f32x4();
5240        let b = self.state[operands.src2].get_f32x4();
5241        let mut c = [0; 4];
5242        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5243            *c = if a < b { u32::MAX } else { 0 };
5244        }
5245        self.state[operands.dst].set_u32x4(c);
5246        ControlFlow::Continue(())
5247    }
5248
5249    fn vlteqf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5250        let a = self.state[operands.src1].get_f32x4();
5251        let b = self.state[operands.src2].get_f32x4();
5252        let mut c = [0; 4];
5253        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5254            *c = if a <= b { u32::MAX } else { 0 };
5255        }
5256        self.state[operands.dst].set_u32x4(c);
5257        ControlFlow::Continue(())
5258    }
5259
5260    fn veqf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5261        let a = self.state[operands.src1].get_f64x2();
5262        let b = self.state[operands.src2].get_f64x2();
5263        let mut c = [0; 2];
5264        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5265            *c = if a == b { u64::MAX } else { 0 };
5266        }
5267        self.state[operands.dst].set_u64x2(c);
5268        ControlFlow::Continue(())
5269    }
5270
5271    fn vneqf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5272        let a = self.state[operands.src1].get_f64x2();
5273        let b = self.state[operands.src2].get_f64x2();
5274        let mut c = [0; 2];
5275        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5276            *c = if a != b { u64::MAX } else { 0 };
5277        }
5278        self.state[operands.dst].set_u64x2(c);
5279        ControlFlow::Continue(())
5280    }
5281
5282    fn vltf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5283        let a = self.state[operands.src1].get_f64x2();
5284        let b = self.state[operands.src2].get_f64x2();
5285        let mut c = [0; 2];
5286        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5287            *c = if a < b { u64::MAX } else { 0 };
5288        }
5289        self.state[operands.dst].set_u64x2(c);
5290        ControlFlow::Continue(())
5291    }
5292
5293    fn vlteqf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5294        let a = self.state[operands.src1].get_f64x2();
5295        let b = self.state[operands.src2].get_f64x2();
5296        let mut c = [0; 2];
5297        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5298            *c = if a <= b { u64::MAX } else { 0 };
5299        }
5300        self.state[operands.dst].set_u64x2(c);
5301        ControlFlow::Continue(())
5302    }
5303
5304    fn vfma32x4(&mut self, dst: VReg, a: VReg, b: VReg, c: VReg) -> ControlFlow<Done> {
5305        let mut a = self.state[a].get_f32x4();
5306        let b = self.state[b].get_f32x4();
5307        let c = self.state[c].get_f32x4();
5308        for ((a, b), c) in a.iter_mut().zip(b).zip(c) {
5309            *a = a.wasm_mul_add(b, c);
5310        }
5311        self.state[dst].set_f32x4(a);
5312        ControlFlow::Continue(())
5313    }
5314
5315    fn vfma64x2(&mut self, dst: VReg, a: VReg, b: VReg, c: VReg) -> ControlFlow<Done> {
5316        let mut a = self.state[a].get_f64x2();
5317        let b = self.state[b].get_f64x2();
5318        let c = self.state[c].get_f64x2();
5319        for ((a, b), c) in a.iter_mut().zip(b).zip(c) {
5320            *a = a.wasm_mul_add(b, c);
5321        }
5322        self.state[dst].set_f64x2(a);
5323        ControlFlow::Continue(())
5324    }
5325
5326    fn vselect(
5327        &mut self,
5328        dst: VReg,
5329        cond: XReg,
5330        if_nonzero: VReg,
5331        if_zero: VReg,
5332    ) -> ControlFlow<Done> {
5333        let result = if self.state[cond].get_u32() != 0 {
5334            self.state[if_nonzero]
5335        } else {
5336            self.state[if_zero]
5337        };
5338        self.state[dst] = result;
5339        ControlFlow::Continue(())
5340    }
5341
5342    fn xadd128(
5343        &mut self,
5344        dst_lo: XReg,
5345        dst_hi: XReg,
5346        lhs_lo: XReg,
5347        lhs_hi: XReg,
5348        rhs_lo: XReg,
5349        rhs_hi: XReg,
5350    ) -> ControlFlow<Done> {
5351        let lhs = self.get_i128(lhs_lo, lhs_hi);
5352        let rhs = self.get_i128(rhs_lo, rhs_hi);
5353        let result = lhs.wrapping_add(rhs);
5354        self.set_i128(dst_lo, dst_hi, result);
5355        ControlFlow::Continue(())
5356    }
5357
5358    fn xsub128(
5359        &mut self,
5360        dst_lo: XReg,
5361        dst_hi: XReg,
5362        lhs_lo: XReg,
5363        lhs_hi: XReg,
5364        rhs_lo: XReg,
5365        rhs_hi: XReg,
5366    ) -> ControlFlow<Done> {
5367        let lhs = self.get_i128(lhs_lo, lhs_hi);
5368        let rhs = self.get_i128(rhs_lo, rhs_hi);
5369        let result = lhs.wrapping_sub(rhs);
5370        self.set_i128(dst_lo, dst_hi, result);
5371        ControlFlow::Continue(())
5372    }
5373
5374    fn xwidemul64_s(
5375        &mut self,
5376        dst_lo: XReg,
5377        dst_hi: XReg,
5378        lhs: XReg,
5379        rhs: XReg,
5380    ) -> ControlFlow<Done> {
5381        let lhs = self.state[lhs].get_i64();
5382        let rhs = self.state[rhs].get_i64();
5383        let result = i128::from(lhs).wrapping_mul(i128::from(rhs));
5384        self.set_i128(dst_lo, dst_hi, result);
5385        ControlFlow::Continue(())
5386    }
5387
5388    fn xwidemul64_u(
5389        &mut self,
5390        dst_lo: XReg,
5391        dst_hi: XReg,
5392        lhs: XReg,
5393        rhs: XReg,
5394    ) -> ControlFlow<Done> {
5395        let lhs = self.state[lhs].get_u64();
5396        let rhs = self.state[rhs].get_u64();
5397        let result = u128::from(lhs).wrapping_mul(u128::from(rhs));
5398        self.set_i128(dst_lo, dst_hi, result as i128);
5399        ControlFlow::Continue(())
5400    }
5401}