pulley_interpreter/
interp.rs

1//! Interpretation of pulley bytecode.
2
3use crate::decode::*;
4use crate::encode::Encode;
5use crate::imms::*;
6use crate::profile::{ExecutingPc, ExecutingPcRef};
7use crate::regs::*;
8use alloc::string::ToString;
9use alloc::vec::Vec;
10use core::fmt;
11use core::mem;
12use core::ops::ControlFlow;
13use core::ops::{Index, IndexMut};
14use core::ptr::NonNull;
15use pulley_macros::interp_disable_if_cfg;
16use wasmtime_math::WasmFloat;
17
18mod debug;
19#[cfg(all(not(pulley_tail_calls), not(pulley_assume_llvm_makes_tail_calls)))]
20mod match_loop;
21#[cfg(any(pulley_tail_calls, pulley_assume_llvm_makes_tail_calls))]
22mod tail_loop;
23
24const DEFAULT_STACK_SIZE: usize = 1 << 20; // 1 MiB
25
26/// A virtual machine for interpreting Pulley bytecode.
27pub struct Vm {
28    state: MachineState,
29    executing_pc: ExecutingPc,
30}
31
32impl Default for Vm {
33    fn default() -> Self {
34        Vm::new()
35    }
36}
37
38impl Vm {
39    /// Create a new virtual machine with the default stack size.
40    pub fn new() -> Self {
41        Self::with_stack(DEFAULT_STACK_SIZE)
42    }
43
44    /// Create a new virtual machine with the given stack.
45    pub fn with_stack(stack_size: usize) -> Self {
46        Self {
47            state: MachineState::with_stack(stack_size),
48            executing_pc: ExecutingPc::default(),
49        }
50    }
51
52    /// Get a shared reference to this VM's machine state.
53    pub fn state(&self) -> &MachineState {
54        &self.state
55    }
56
57    /// Get an exclusive reference to this VM's machine state.
58    pub fn state_mut(&mut self) -> &mut MachineState {
59        &mut self.state
60    }
61
62    /// Call a bytecode function.
63    ///
64    /// The given `func` must point to the beginning of a valid Pulley bytecode
65    /// function.
66    ///
67    /// The given `args` must match the number and type of arguments that
68    /// function expects.
69    ///
70    /// The given `rets` must match the function's actual return types.
71    ///
72    /// Returns either the resulting values, or the PC at which a trap was
73    /// raised.
74    pub unsafe fn call<'a, T>(
75        &'a mut self,
76        func: NonNull<u8>,
77        args: &[Val],
78        rets: T,
79    ) -> DoneReason<impl Iterator<Item = Val> + use<'a, T>>
80    where
81        T: IntoIterator<Item = RegType> + 'a,
82    {
83        unsafe {
84            let lr = self.call_start(args);
85
86            match self.call_run(func) {
87                DoneReason::ReturnToHost(()) => DoneReason::ReturnToHost(self.call_end(lr, rets)),
88                DoneReason::Trap { pc, kind } => DoneReason::Trap { pc, kind },
89                DoneReason::CallIndirectHost { id, resume } => {
90                    DoneReason::CallIndirectHost { id, resume }
91                }
92            }
93        }
94    }
95
96    /// Peforms the initial part of [`Vm::call`] in setting up the `args`
97    /// provided in registers according to Pulley's ABI.
98    ///
99    /// # Return
100    ///
101    /// Returns the old `lr` register value. The current `lr` value is replaced
102    /// with a sentinel that triggers a return to the host when returned-to.
103    ///
104    /// # Unsafety
105    ///
106    /// All the same unsafety as `call` and additiionally, you must
107    /// invoke `call_run` and then `call_end` after calling `call_start`.
108    /// If you don't want to wrangle these invocations, use `call` instead
109    /// of `call_{start,run,end}`.
110    pub unsafe fn call_start<'a>(&'a mut self, args: &[Val]) -> *mut u8 {
111        // NB: make sure this method stays in sync with
112        // `PulleyMachineDeps::compute_arg_locs`!
113
114        let mut x_args = (0..16).map(|x| unsafe { XReg::new_unchecked(x) });
115        let mut f_args = (0..16).map(|f| unsafe { FReg::new_unchecked(f) });
116        #[cfg(not(pulley_disable_interp_simd))]
117        let mut v_args = (0..16).map(|v| unsafe { VReg::new_unchecked(v) });
118
119        for arg in args {
120            match arg {
121                Val::XReg(val) => match x_args.next() {
122                    Some(reg) => self.state[reg] = *val,
123                    None => todo!("stack slots"),
124                },
125                Val::FReg(val) => match f_args.next() {
126                    Some(reg) => self.state[reg] = *val,
127                    None => todo!("stack slots"),
128                },
129                #[cfg(not(pulley_disable_interp_simd))]
130                Val::VReg(val) => match v_args.next() {
131                    Some(reg) => self.state[reg] = *val,
132                    None => todo!("stack slots"),
133                },
134            }
135        }
136
137        mem::replace(&mut self.state.lr, HOST_RETURN_ADDR)
138    }
139
140    /// Peforms the internal part of [`Vm::call`] where bytecode is actually
141    /// executed.
142    ///
143    /// # Unsafety
144    ///
145    /// In addition to all the invariants documented for `call`, you
146    /// may only invoke `call_run` after invoking `call_start` to
147    /// initialize this call's arguments.
148    pub unsafe fn call_run(&mut self, pc: NonNull<u8>) -> DoneReason<()> {
149        self.state.debug_assert_done_reason_none();
150        let interpreter = Interpreter {
151            state: &mut self.state,
152            pc: unsafe { UnsafeBytecodeStream::new(pc) },
153            executing_pc: self.executing_pc.as_ref(),
154        };
155        let done = interpreter.run();
156        self.state.done_decode(done)
157    }
158
159    /// Peforms the tail end of [`Vm::call`] by returning the values as
160    /// determined by `rets` according to Pulley's ABI.
161    ///
162    /// The `old_ret` value should have been provided from `call_start`
163    /// previously.
164    ///
165    /// # Unsafety
166    ///
167    /// In addition to the invariants documented for `call`, this may
168    /// only be called after `call_run`.
169    pub unsafe fn call_end<'a>(
170        &'a mut self,
171        old_ret: *mut u8,
172        rets: impl IntoIterator<Item = RegType> + 'a,
173    ) -> impl Iterator<Item = Val> + 'a {
174        self.state.lr = old_ret;
175        // NB: make sure this method stays in sync with
176        // `PulleyMachineDeps::compute_arg_locs`!
177
178        let mut x_rets = (0..15).map(|x| unsafe { XReg::new_unchecked(x) });
179        let mut f_rets = (0..16).map(|f| unsafe { FReg::new_unchecked(f) });
180        #[cfg(not(pulley_disable_interp_simd))]
181        let mut v_rets = (0..16).map(|v| unsafe { VReg::new_unchecked(v) });
182
183        rets.into_iter().map(move |ty| match ty {
184            RegType::XReg => match x_rets.next() {
185                Some(reg) => Val::XReg(self.state[reg]),
186                None => todo!("stack slots"),
187            },
188            RegType::FReg => match f_rets.next() {
189                Some(reg) => Val::FReg(self.state[reg]),
190                None => todo!("stack slots"),
191            },
192            #[cfg(not(pulley_disable_interp_simd))]
193            RegType::VReg => match v_rets.next() {
194                Some(reg) => Val::VReg(self.state[reg]),
195                None => todo!("stack slots"),
196            },
197            #[cfg(pulley_disable_interp_simd)]
198            RegType::VReg => panic!("simd support disabled at compile time"),
199        })
200    }
201
202    /// Returns the current `fp` register value.
203    pub fn fp(&self) -> *mut u8 {
204        self.state.fp
205    }
206
207    /// Returns the current `lr` register value.
208    pub fn lr(&self) -> *mut u8 {
209        self.state.lr
210    }
211
212    /// Sets the current `fp` register value.
213    pub unsafe fn set_fp(&mut self, fp: *mut u8) {
214        self.state.fp = fp;
215    }
216
217    /// Sets the current `lr` register value.
218    pub unsafe fn set_lr(&mut self, lr: *mut u8) {
219        self.state.lr = lr;
220    }
221
222    /// Gets a handle to the currently executing program counter for this
223    /// interpreter which can be read from other threads.
224    //
225    // Note that despite this field still existing with `not(feature =
226    // "profile")` it's hidden from the public API in that scenario as it has no
227    // methods anyway.
228    #[cfg(feature = "profile")]
229    pub fn executing_pc(&self) -> &ExecutingPc {
230        &self.executing_pc
231    }
232}
233
234impl Drop for Vm {
235    fn drop(&mut self) {
236        self.executing_pc.set_done();
237    }
238}
239
240/// The type of a register in the Pulley machine state.
241#[derive(Clone, Copy, Debug)]
242pub enum RegType {
243    /// An `x` register: integers.
244    XReg,
245
246    /// An `f` register: floats.
247    FReg,
248
249    /// A `v` register: vectors.
250    VReg,
251}
252
253/// A value that can be stored in a register.
254#[derive(Clone, Copy, Debug)]
255pub enum Val {
256    /// An `x` register value: integers.
257    XReg(XRegVal),
258
259    /// An `f` register value: floats.
260    FReg(FRegVal),
261
262    /// A `v` register value: vectors.
263    #[cfg(not(pulley_disable_interp_simd))]
264    VReg(VRegVal),
265}
266
267impl fmt::LowerHex for Val {
268    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
269        match self {
270            Val::XReg(v) => fmt::LowerHex::fmt(v, f),
271            Val::FReg(v) => fmt::LowerHex::fmt(v, f),
272            #[cfg(not(pulley_disable_interp_simd))]
273            Val::VReg(v) => fmt::LowerHex::fmt(v, f),
274        }
275    }
276}
277
278impl From<XRegVal> for Val {
279    fn from(value: XRegVal) -> Self {
280        Val::XReg(value)
281    }
282}
283
284impl From<u64> for Val {
285    fn from(value: u64) -> Self {
286        XRegVal::new_u64(value).into()
287    }
288}
289
290impl From<u32> for Val {
291    fn from(value: u32) -> Self {
292        XRegVal::new_u32(value).into()
293    }
294}
295
296impl From<i64> for Val {
297    fn from(value: i64) -> Self {
298        XRegVal::new_i64(value).into()
299    }
300}
301
302impl From<i32> for Val {
303    fn from(value: i32) -> Self {
304        XRegVal::new_i32(value).into()
305    }
306}
307
308impl<T> From<*mut T> for Val {
309    fn from(value: *mut T) -> Self {
310        XRegVal::new_ptr(value).into()
311    }
312}
313
314impl From<FRegVal> for Val {
315    fn from(value: FRegVal) -> Self {
316        Val::FReg(value)
317    }
318}
319
320impl From<f64> for Val {
321    fn from(value: f64) -> Self {
322        FRegVal::new_f64(value).into()
323    }
324}
325
326impl From<f32> for Val {
327    fn from(value: f32) -> Self {
328        FRegVal::new_f32(value).into()
329    }
330}
331
332#[cfg(not(pulley_disable_interp_simd))]
333impl From<VRegVal> for Val {
334    fn from(value: VRegVal) -> Self {
335        Val::VReg(value)
336    }
337}
338
339/// An `x` register value: integers.
340#[derive(Copy, Clone)]
341pub struct XRegVal(XRegUnion);
342
343impl PartialEq for XRegVal {
344    fn eq(&self, other: &Self) -> bool {
345        self.get_u64() == other.get_u64()
346    }
347}
348
349impl Eq for XRegVal {}
350
351impl fmt::Debug for XRegVal {
352    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
353        f.debug_struct("XRegVal")
354            .field("as_u64", &self.get_u64())
355            .finish()
356    }
357}
358
359impl fmt::LowerHex for XRegVal {
360    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
361        fmt::LowerHex::fmt(&self.get_u64(), f)
362    }
363}
364
365/// Contents of an "x" register, or a general-purpose register.
366///
367/// This is represented as a Rust `union` to make it easier to access typed
368/// views of this, notably the `ptr` field which enables preserving a bit of
369/// provenance for Rust for values stored as a pointer and read as a pointer.
370///
371/// Note that the actual in-memory representation of this value is handled
372/// carefully at this time. Pulley bytecode exposes the ability to store a
373/// 32-bit result into a register and then read the 64-bit contents of the
374/// register. This leaves us with the question of what to do with the upper bits
375/// of the register when the 32-bit result is generated. Possibilities for
376/// handling this are:
377///
378/// 1. Do nothing, just store the 32-bit value. The problem with this approach
379///    means that the "upper bits" are now endianness-dependent. That means that
380///    the state of the register is now platform-dependent.
381/// 2. Sign or zero-extend. This restores platform-independent behavior but
382///    requires an extra store on 32-bit platforms because they can probably
383///    only store 32-bits at a time.
384/// 3. Always store the values in this union as little-endian. This means that
385///    big-endian platforms have to do a byte-swap but otherwise it has
386///    platform-independent behavior.
387///
388/// This union chooses route (3) at this time where the values here are always
389/// stored in little-endian form (even the `ptr` field). That guarantees
390/// cross-platform behavior while also minimizing the amount of data stored on
391/// writes.
392///
393/// In the future we may wish to benchmark this and possibly change this.
394/// Technically Cranelift-generated bytecode should never rely on the upper bits
395/// of a register if it didn't previously write them so this in theory doesn't
396/// actually matter for Cranelift or wasm semantics. The only cost right now is
397/// to big-endian platforms though and it's not certain how crucial performance
398/// will be there.
399///
400/// One final note is that this notably contrasts with native CPUs where
401/// native ISAs like RISC-V specifically define the entire register on every
402/// instruction, even if only the low half contains a significant result. Pulley
403/// is unlikely to become out-of-order within the CPU itself as it's interpreted
404/// meaning that severing data-dependencies with previous operations is
405/// hypothesized to not be too important. If this is ever a problem though it
406/// could increase the likelihood we go for route (2) above instead (or maybe
407/// even (1)).
408#[derive(Copy, Clone)]
409union XRegUnion {
410    i32: i32,
411    u32: u32,
412    i64: i64,
413    u64: u64,
414
415    // Note that this is intentionally `usize` and not an actual pointer like
416    // `*mut u8`. The reason for this is that provenance is required in Rust for
417    // pointers but Cranelift has no pointer type and thus no concept of
418    // provenance. That means that at-rest it's not known whether the value has
419    // provenance or not and basically means that Pulley is required to use
420    // "permissive provenance" in Rust as opposed to strict provenance.
421    //
422    // That's more-or-less a long-winded way of saying that storage of a pointer
423    // in this value is done with `.expose_provenance()` and reading a pointer
424    // uses `with_exposed_provenance_mut(..)`.
425    ptr: usize,
426}
427
428impl Default for XRegVal {
429    fn default() -> Self {
430        Self(unsafe { mem::zeroed() })
431    }
432}
433
434#[expect(missing_docs, reason = "self-describing methods")]
435impl XRegVal {
436    pub fn new_i32(x: i32) -> Self {
437        let mut val = XRegVal::default();
438        val.set_i32(x);
439        val
440    }
441
442    pub fn new_u32(x: u32) -> Self {
443        let mut val = XRegVal::default();
444        val.set_u32(x);
445        val
446    }
447
448    pub fn new_i64(x: i64) -> Self {
449        let mut val = XRegVal::default();
450        val.set_i64(x);
451        val
452    }
453
454    pub fn new_u64(x: u64) -> Self {
455        let mut val = XRegVal::default();
456        val.set_u64(x);
457        val
458    }
459
460    pub fn new_ptr<T>(ptr: *mut T) -> Self {
461        let mut val = XRegVal::default();
462        val.set_ptr(ptr);
463        val
464    }
465
466    pub fn get_i32(&self) -> i32 {
467        let x = unsafe { self.0.i32 };
468        i32::from_le(x)
469    }
470
471    pub fn get_u32(&self) -> u32 {
472        let x = unsafe { self.0.u32 };
473        u32::from_le(x)
474    }
475
476    pub fn get_i64(&self) -> i64 {
477        let x = unsafe { self.0.i64 };
478        i64::from_le(x)
479    }
480
481    pub fn get_u64(&self) -> u64 {
482        let x = unsafe { self.0.u64 };
483        u64::from_le(x)
484    }
485
486    pub fn get_ptr<T>(&self) -> *mut T {
487        let ptr = unsafe { self.0.ptr };
488        core::ptr::with_exposed_provenance_mut(usize::from_le(ptr))
489    }
490
491    pub fn set_i32(&mut self, x: i32) {
492        self.0.i32 = x.to_le();
493    }
494
495    pub fn set_u32(&mut self, x: u32) {
496        self.0.u32 = x.to_le();
497    }
498
499    pub fn set_i64(&mut self, x: i64) {
500        self.0.i64 = x.to_le();
501    }
502
503    pub fn set_u64(&mut self, x: u64) {
504        self.0.u64 = x.to_le();
505    }
506
507    pub fn set_ptr<T>(&mut self, ptr: *mut T) {
508        self.0.ptr = ptr.expose_provenance().to_le();
509    }
510}
511
512/// An `f` register value: floats.
513#[derive(Copy, Clone)]
514pub struct FRegVal(FRegUnion);
515
516impl fmt::Debug for FRegVal {
517    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
518        f.debug_struct("FRegVal")
519            .field("as_f32", &self.get_f32())
520            .field("as_f64", &self.get_f64())
521            .finish()
522    }
523}
524
525impl fmt::LowerHex for FRegVal {
526    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
527        fmt::LowerHex::fmt(&self.get_f64().to_bits(), f)
528    }
529}
530
531// NB: like `XRegUnion` values here are always little-endian, see the
532// documentation above for more details.
533#[derive(Copy, Clone)]
534union FRegUnion {
535    f32: u32,
536    f64: u64,
537}
538
539impl Default for FRegVal {
540    fn default() -> Self {
541        Self(unsafe { mem::zeroed() })
542    }
543}
544
545#[expect(missing_docs, reason = "self-describing methods")]
546impl FRegVal {
547    pub fn new_f32(f: f32) -> Self {
548        let mut val = Self::default();
549        val.set_f32(f);
550        val
551    }
552
553    pub fn new_f64(f: f64) -> Self {
554        let mut val = Self::default();
555        val.set_f64(f);
556        val
557    }
558
559    pub fn get_f32(&self) -> f32 {
560        let val = unsafe { self.0.f32 };
561        f32::from_le_bytes(val.to_ne_bytes())
562    }
563
564    pub fn get_f64(&self) -> f64 {
565        let val = unsafe { self.0.f64 };
566        f64::from_le_bytes(val.to_ne_bytes())
567    }
568
569    pub fn set_f32(&mut self, val: f32) {
570        self.0.f32 = u32::from_ne_bytes(val.to_le_bytes());
571    }
572
573    pub fn set_f64(&mut self, val: f64) {
574        self.0.f64 = u64::from_ne_bytes(val.to_le_bytes());
575    }
576}
577
578/// A `v` register value: vectors.
579#[derive(Copy, Clone)]
580#[cfg(not(pulley_disable_interp_simd))]
581pub struct VRegVal(VRegUnion);
582
583#[cfg(not(pulley_disable_interp_simd))]
584impl fmt::Debug for VRegVal {
585    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
586        f.debug_struct("VRegVal")
587            .field("as_u128", &unsafe { self.0.u128 })
588            .finish()
589    }
590}
591
592#[cfg(not(pulley_disable_interp_simd))]
593impl fmt::LowerHex for VRegVal {
594    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
595        fmt::LowerHex::fmt(unsafe { &self.0.u128 }, f)
596    }
597}
598
599/// 128-bit vector registers.
600///
601/// This register is always stored in little-endian order and has different
602/// constraints than `XRegVal` and `FRegVal` above. Notably all fields of this
603/// union are the same width so all bits are always defined. Note that
604/// little-endian is required though so bitcasts between different shapes of
605/// vectors works. This union cannot be stored in big-endian.
606#[derive(Copy, Clone)]
607#[repr(align(16))]
608#[cfg(not(pulley_disable_interp_simd))]
609union VRegUnion {
610    u128: u128,
611    i8x16: [i8; 16],
612    i16x8: [i16; 8],
613    i32x4: [i32; 4],
614    i64x2: [i64; 2],
615    u8x16: [u8; 16],
616    u16x8: [u16; 8],
617    u32x4: [u32; 4],
618    u64x2: [u64; 2],
619    // Note that these are `u32` and `u64`, not f32/f64. That's only because
620    // f32/f64 don't have `.to_le()` and `::from_le()` so need to go through the
621    // bits anyway.
622    f32x4: [u32; 4],
623    f64x2: [u64; 2],
624}
625
626#[cfg(not(pulley_disable_interp_simd))]
627impl Default for VRegVal {
628    fn default() -> Self {
629        Self(unsafe { mem::zeroed() })
630    }
631}
632
633#[expect(missing_docs, reason = "self-describing methods")]
634#[cfg(not(pulley_disable_interp_simd))]
635impl VRegVal {
636    pub fn new_u128(i: u128) -> Self {
637        let mut val = Self::default();
638        val.set_u128(i);
639        val
640    }
641
642    pub fn get_u128(&self) -> u128 {
643        let val = unsafe { self.0.u128 };
644        u128::from_le(val)
645    }
646
647    pub fn set_u128(&mut self, val: u128) {
648        self.0.u128 = val.to_le();
649    }
650
651    fn get_i8x16(&self) -> [i8; 16] {
652        let val = unsafe { self.0.i8x16 };
653        val.map(|e| i8::from_le(e))
654    }
655
656    fn set_i8x16(&mut self, val: [i8; 16]) {
657        self.0.i8x16 = val.map(|e| e.to_le());
658    }
659
660    fn get_u8x16(&self) -> [u8; 16] {
661        let val = unsafe { self.0.u8x16 };
662        val.map(|e| u8::from_le(e))
663    }
664
665    fn set_u8x16(&mut self, val: [u8; 16]) {
666        self.0.u8x16 = val.map(|e| e.to_le());
667    }
668
669    fn get_i16x8(&self) -> [i16; 8] {
670        let val = unsafe { self.0.i16x8 };
671        val.map(|e| i16::from_le(e))
672    }
673
674    fn set_i16x8(&mut self, val: [i16; 8]) {
675        self.0.i16x8 = val.map(|e| e.to_le());
676    }
677
678    fn get_u16x8(&self) -> [u16; 8] {
679        let val = unsafe { self.0.u16x8 };
680        val.map(|e| u16::from_le(e))
681    }
682
683    fn set_u16x8(&mut self, val: [u16; 8]) {
684        self.0.u16x8 = val.map(|e| e.to_le());
685    }
686
687    fn get_i32x4(&self) -> [i32; 4] {
688        let val = unsafe { self.0.i32x4 };
689        val.map(|e| i32::from_le(e))
690    }
691
692    fn set_i32x4(&mut self, val: [i32; 4]) {
693        self.0.i32x4 = val.map(|e| e.to_le());
694    }
695
696    fn get_u32x4(&self) -> [u32; 4] {
697        let val = unsafe { self.0.u32x4 };
698        val.map(|e| u32::from_le(e))
699    }
700
701    fn set_u32x4(&mut self, val: [u32; 4]) {
702        self.0.u32x4 = val.map(|e| e.to_le());
703    }
704
705    fn get_i64x2(&self) -> [i64; 2] {
706        let val = unsafe { self.0.i64x2 };
707        val.map(|e| i64::from_le(e))
708    }
709
710    fn set_i64x2(&mut self, val: [i64; 2]) {
711        self.0.i64x2 = val.map(|e| e.to_le());
712    }
713
714    fn get_u64x2(&self) -> [u64; 2] {
715        let val = unsafe { self.0.u64x2 };
716        val.map(|e| u64::from_le(e))
717    }
718
719    fn set_u64x2(&mut self, val: [u64; 2]) {
720        self.0.u64x2 = val.map(|e| e.to_le());
721    }
722
723    fn get_f64x2(&self) -> [f64; 2] {
724        let val = unsafe { self.0.f64x2 };
725        val.map(|e| f64::from_bits(u64::from_le(e)))
726    }
727
728    fn set_f64x2(&mut self, val: [f64; 2]) {
729        self.0.f64x2 = val.map(|e| e.to_bits().to_le());
730    }
731
732    fn get_f32x4(&self) -> [f32; 4] {
733        let val = unsafe { self.0.f32x4 };
734        val.map(|e| f32::from_bits(u32::from_le(e)))
735    }
736
737    fn set_f32x4(&mut self, val: [f32; 4]) {
738        self.0.f32x4 = val.map(|e| e.to_bits().to_le());
739    }
740}
741
742/// The machine state for a Pulley virtual machine: the various registers and
743/// stack.
744pub struct MachineState {
745    x_regs: [XRegVal; XReg::RANGE.end as usize],
746    f_regs: [FRegVal; FReg::RANGE.end as usize],
747    #[cfg(not(pulley_disable_interp_simd))]
748    v_regs: [VRegVal; VReg::RANGE.end as usize],
749    fp: *mut u8,
750    lr: *mut u8,
751    stack: Stack,
752    done_reason: Option<DoneReason<()>>,
753}
754
755unsafe impl Send for MachineState {}
756unsafe impl Sync for MachineState {}
757
758/// Helper structure to store the state of the Pulley stack.
759///
760/// The Pulley stack notably needs to be a 16-byte aligned allocation on the
761/// host to ensure that addresses handed out are indeed 16-byte aligned. This is
762/// done with a custom `Vec<T>` internally where `T` has size and align of 16.
763/// This is manually done with a helper `Align16` type below.
764struct Stack {
765    storage: Vec<Align16>,
766}
767
768/// Helper type used with `Stack` above.
769#[derive(Copy, Clone)]
770#[repr(align(16))]
771struct Align16 {
772    // Just here to give the structure a size of 16. The alignment is always 16
773    // regardless of what the host platform's alignment of u128 is.
774    _unused: u128,
775}
776
777impl Stack {
778    /// Creates a new stack which will have a byte size of at least `size`.
779    ///
780    /// The allocated stack might be slightly larger due to rounding necessary.
781    fn new(size: usize) -> Stack {
782        Stack {
783            // Round up `size` to the nearest multiple of 16. Note that the
784            // stack is also allocated here but not initialized, and that's
785            // intentional as pulley bytecode should always initialize the stack
786            // before use.
787            storage: Vec::with_capacity((size + 15) / 16),
788        }
789    }
790
791    /// Returns a pointer to the top of the stack (the highest address).
792    ///
793    /// Note that the returned pointer has provenance for the entire stack
794    /// allocation, however, not just the top.
795    fn top(&mut self) -> *mut u8 {
796        let len = self.len();
797        unsafe { self.base().add(len) }
798    }
799
800    /// Returns a pointer to the base of the stack (the lowest address).
801    ///
802    /// Note that the returned pointer has provenance for the entire stack
803    /// allocation, however, not just the top.
804    fn base(&mut self) -> *mut u8 {
805        self.storage.as_mut_ptr().cast::<u8>()
806    }
807
808    /// Returns the length, in bytes, of this stack allocation.
809    fn len(&self) -> usize {
810        self.storage.capacity() * mem::size_of::<Align16>()
811    }
812}
813
814impl fmt::Debug for MachineState {
815    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
816        let MachineState {
817            x_regs,
818            f_regs,
819            #[cfg(not(pulley_disable_interp_simd))]
820            v_regs,
821            stack: _,
822            done_reason: _,
823            fp: _,
824            lr: _,
825        } = self;
826
827        struct RegMap<'a, R>(&'a [R], fn(u8) -> alloc::string::String);
828
829        impl<R: fmt::Debug> fmt::Debug for RegMap<'_, R> {
830            fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
831                let mut f = f.debug_map();
832                for (i, r) in self.0.iter().enumerate() {
833                    f.entry(&(self.1)(i as u8), r);
834                }
835                f.finish()
836            }
837        }
838
839        let mut f = f.debug_struct("MachineState");
840
841        f.field(
842            "x_regs",
843            &RegMap(x_regs, |i| XReg::new(i).unwrap().to_string()),
844        )
845        .field(
846            "f_regs",
847            &RegMap(f_regs, |i| FReg::new(i).unwrap().to_string()),
848        );
849        #[cfg(not(pulley_disable_interp_simd))]
850        f.field(
851            "v_regs",
852            &RegMap(v_regs, |i| VReg::new(i).unwrap().to_string()),
853        );
854        f.finish_non_exhaustive()
855    }
856}
857
858macro_rules! index_reg {
859    ($reg_ty:ty,$value_ty:ty,$field:ident) => {
860        impl Index<$reg_ty> for Vm {
861            type Output = $value_ty;
862
863            fn index(&self, reg: $reg_ty) -> &Self::Output {
864                &self.state[reg]
865            }
866        }
867
868        impl IndexMut<$reg_ty> for Vm {
869            fn index_mut(&mut self, reg: $reg_ty) -> &mut Self::Output {
870                &mut self.state[reg]
871            }
872        }
873
874        impl Index<$reg_ty> for MachineState {
875            type Output = $value_ty;
876
877            fn index(&self, reg: $reg_ty) -> &Self::Output {
878                &self.$field[reg.index()]
879            }
880        }
881
882        impl IndexMut<$reg_ty> for MachineState {
883            fn index_mut(&mut self, reg: $reg_ty) -> &mut Self::Output {
884                &mut self.$field[reg.index()]
885            }
886        }
887    };
888}
889
890index_reg!(XReg, XRegVal, x_regs);
891index_reg!(FReg, FRegVal, f_regs);
892#[cfg(not(pulley_disable_interp_simd))]
893index_reg!(VReg, VRegVal, v_regs);
894
895/// Sentinel return address that signals the end of the call stack.
896const HOST_RETURN_ADDR: *mut u8 = usize::MAX as *mut u8;
897
898impl MachineState {
899    fn with_stack(stack_size: usize) -> Self {
900        let mut state = Self {
901            x_regs: [Default::default(); XReg::RANGE.end as usize],
902            f_regs: Default::default(),
903            #[cfg(not(pulley_disable_interp_simd))]
904            v_regs: Default::default(),
905            stack: Stack::new(stack_size),
906            done_reason: None,
907            fp: HOST_RETURN_ADDR,
908            lr: HOST_RETURN_ADDR,
909        };
910
911        let sp = state.stack.top();
912        state[XReg::sp] = XRegVal::new_ptr(sp);
913
914        state
915    }
916}
917
918/// Inner private module to prevent creation of the `Done` structure outside of
919/// this module.
920mod done {
921    use super::{Encode, Interpreter, MachineState};
922    use core::ops::ControlFlow;
923    use core::ptr::NonNull;
924
925    /// Zero-sized sentinel indicating that pulley execution has halted.
926    ///
927    /// The reason for halting is stored in `MachineState`.
928    #[derive(Copy, Clone, Debug, PartialEq, Eq)]
929    pub struct Done {
930        _priv: (),
931    }
932
933    /// Reason that the pulley interpreter has ceased execution.
934    pub enum DoneReason<T> {
935        /// A trap happened at this bytecode instruction.
936        Trap {
937            /// Which instruction is raising this trap.
938            pc: NonNull<u8>,
939            /// The kind of trap being raised, if known.
940            kind: Option<TrapKind>,
941        },
942        /// The `call_indirect_host` instruction was executed.
943        CallIndirectHost {
944            /// The payload of `call_indirect_host`.
945            id: u8,
946            /// Where to resume execution after the host has finished.
947            resume: NonNull<u8>,
948        },
949        /// Pulley has finished and the provided value is being returned.
950        ReturnToHost(T),
951    }
952
953    /// Stored within `DoneReason::Trap`.
954    #[expect(missing_docs, reason = "self-describing variants")]
955    pub enum TrapKind {
956        DivideByZero,
957        IntegerOverflow,
958        BadConversionToInteger,
959        MemoryOutOfBounds,
960        DisabledOpcode,
961    }
962
963    impl MachineState {
964        pub(super) fn debug_assert_done_reason_none(&mut self) {
965            debug_assert!(self.done_reason.is_none());
966        }
967
968        pub(super) fn done_decode(&mut self, Done { _priv }: Done) -> DoneReason<()> {
969            self.done_reason.take().unwrap()
970        }
971    }
972
973    impl Interpreter<'_> {
974        /// Finishes execution by recording `DoneReason::Trap`.
975        ///
976        /// This method takes an `I` generic parameter indicating which
977        /// instruction is executing this function and generating a trap. That's
978        /// used to go backwards from the current `pc` which is just beyond the
979        /// instruction to point to the instruction itself in the trap metadata
980        /// returned from the interpreter.
981        #[cold]
982        pub fn done_trap<I: Encode>(&mut self) -> ControlFlow<Done> {
983            self.done_trap_kind::<I>(None)
984        }
985
986        /// Same as `done_trap` but with an explicit `TrapKind`.
987        #[cold]
988        pub fn done_trap_kind<I: Encode>(&mut self, kind: Option<TrapKind>) -> ControlFlow<Done> {
989            let pc = self.current_pc::<I>();
990            self.state.done_reason = Some(DoneReason::Trap { pc, kind });
991            ControlFlow::Break(Done { _priv: () })
992        }
993
994        /// Finishes execution by recording `DoneReason::CallIndirectHost`.
995        #[cold]
996        pub fn done_call_indirect_host(&mut self, id: u8) -> ControlFlow<Done> {
997            self.state.done_reason = Some(DoneReason::CallIndirectHost {
998                id,
999                resume: self.pc.as_ptr(),
1000            });
1001            ControlFlow::Break(Done { _priv: () })
1002        }
1003
1004        /// Finishes execution by recording `DoneReason::ReturnToHost`.
1005        #[cold]
1006        pub fn done_return_to_host(&mut self) -> ControlFlow<Done> {
1007            self.state.done_reason = Some(DoneReason::ReturnToHost(()));
1008            ControlFlow::Break(Done { _priv: () })
1009        }
1010    }
1011}
1012
1013use done::Done;
1014pub use done::{DoneReason, TrapKind};
1015
1016struct Interpreter<'a> {
1017    state: &'a mut MachineState,
1018    pc: UnsafeBytecodeStream,
1019    executing_pc: ExecutingPcRef<'a>,
1020}
1021
1022impl Interpreter<'_> {
1023    /// Performs a relative jump of `offset` bytes from the current instruction.
1024    ///
1025    /// This will jump from the start of the current instruction, identified by
1026    /// `I`, `offset` bytes away. Note that the `self.pc` at the start of this
1027    /// function actually points to the instruction after this one so `I` is
1028    /// necessary to go back to ourselves after which we then go `offset` away.
1029    #[inline]
1030    fn pc_rel_jump<I: Encode>(&mut self, offset: PcRelOffset) -> ControlFlow<Done> {
1031        let offset = isize::try_from(i32::from(offset)).unwrap();
1032        let my_pc = self.current_pc::<I>();
1033        self.pc = unsafe { UnsafeBytecodeStream::new(my_pc.offset(offset)) };
1034        ControlFlow::Continue(())
1035    }
1036
1037    /// Returns the PC of the current instruction where `I` is the static type
1038    /// representing the current instruction.
1039    fn current_pc<I: Encode>(&self) -> NonNull<u8> {
1040        unsafe { self.pc.offset(-isize::from(I::WIDTH)).as_ptr() }
1041    }
1042
1043    /// `sp -= size_of::<T>(); *sp = val;`
1044    ///
1045    /// Note that `I` is the instruction which is pushing data to use if a trap
1046    /// is generated.
1047    #[must_use]
1048    fn push<I: Encode, T>(&mut self, val: T) -> ControlFlow<Done> {
1049        let new_sp = self.state[XReg::sp].get_ptr::<T>().wrapping_sub(1);
1050        self.set_sp::<I>(new_sp.cast())?;
1051        unsafe {
1052            new_sp.write_unaligned(val);
1053        }
1054        ControlFlow::Continue(())
1055    }
1056
1057    /// `ret = *sp; sp -= size_of::<T>()`
1058    fn pop<T>(&mut self) -> T {
1059        let sp = self.state[XReg::sp].get_ptr::<T>();
1060        let val = unsafe { sp.read_unaligned() };
1061        self.set_sp_unchecked(sp.wrapping_add(1));
1062        val
1063    }
1064
1065    /// Sets the stack pointer to the `sp` provided.
1066    ///
1067    /// Returns a trap if this would result in stack overflow, or if `sp` is
1068    /// beneath the base pointer of `self.state.stack`.
1069    ///
1070    /// The `I` parameter here is the instruction that is setting the stack
1071    /// pointer and is used to calculate this instruction's own `pc` if this
1072    /// instruction traps.
1073    #[must_use]
1074    fn set_sp<I: Encode>(&mut self, sp: *mut u8) -> ControlFlow<Done> {
1075        let sp_raw = sp as usize;
1076        let base_raw = self.state.stack.base() as usize;
1077        if sp_raw < base_raw {
1078            return self.done_trap::<I>();
1079        }
1080        self.set_sp_unchecked(sp);
1081        ControlFlow::Continue(())
1082    }
1083
1084    /// Same as `set_sp` but does not check to see if `sp` is in-bounds. Should
1085    /// only be used with stack increment operations such as `pop`.
1086    fn set_sp_unchecked<T>(&mut self, sp: *mut T) {
1087        if cfg!(debug_assertions) {
1088            let sp_raw = sp as usize;
1089            let base = self.state.stack.base() as usize;
1090            let end = base + self.state.stack.len();
1091            assert!(base <= sp_raw && sp_raw <= end);
1092        }
1093        self.state[XReg::sp].set_ptr(sp);
1094    }
1095
1096    /// Loads a value of `T` using native-endian byte ordering from the `addr`
1097    /// specified.
1098    ///
1099    /// The `I` type parameter is the instruction issuing this load which is
1100    /// used in case of traps to calculate the trapping pc.
1101    ///
1102    /// Returns `ControlFlow::Break` if a trap happens or
1103    /// `ControlFlow::Continue` if the value was loaded successfully.
1104    ///
1105    /// # Unsafety
1106    ///
1107    /// Safety of this method relies on the safety of the original bytecode
1108    /// itself and correctly annotating both `T` and `I`.
1109    #[must_use]
1110    unsafe fn load_ne<T, I: Encode>(&mut self, addr: impl AddressingMode) -> ControlFlow<Done, T> {
1111        unsafe { addr.load_ne::<T, I>(self) }
1112    }
1113
1114    /// Stores a `val` to the `addr` specified.
1115    ///
1116    /// The `I` type parameter is the instruction issuing this store which is
1117    /// used in case of traps to calculate the trapping pc.
1118    ///
1119    /// Returns `ControlFlow::Break` if a trap happens or
1120    /// `ControlFlow::Continue` if the value was stored successfully.
1121    ///
1122    /// # Unsafety
1123    ///
1124    /// Safety of this method relies on the safety of the original bytecode
1125    /// itself and correctly annotating both `T` and `I`.
1126    #[must_use]
1127    unsafe fn store_ne<T, I: Encode>(
1128        &mut self,
1129        addr: impl AddressingMode,
1130        val: T,
1131    ) -> ControlFlow<Done> {
1132        unsafe { addr.store_ne::<T, I>(self, val) }
1133    }
1134
1135    fn check_xnn_from_fnn<I: Encode>(&mut self, val: f64, lo: f64, hi: f64) -> ControlFlow<Done> {
1136        if val != val {
1137            return self.done_trap_kind::<I>(Some(TrapKind::BadConversionToInteger));
1138        }
1139        let val = val.wasm_trunc();
1140        if val <= lo || val >= hi {
1141            return self.done_trap_kind::<I>(Some(TrapKind::IntegerOverflow));
1142        }
1143        ControlFlow::Continue(())
1144    }
1145
1146    #[cfg(not(pulley_disable_interp_simd))]
1147    fn get_i128(&self, lo: XReg, hi: XReg) -> i128 {
1148        let lo = self.state[lo].get_u64();
1149        let hi = self.state[hi].get_i64();
1150        i128::from(lo) | (i128::from(hi) << 64)
1151    }
1152
1153    #[cfg(not(pulley_disable_interp_simd))]
1154    fn set_i128(&mut self, lo: XReg, hi: XReg, val: i128) {
1155        self.state[lo].set_u64(val as u64);
1156        self.state[hi].set_u64((val >> 64) as u64);
1157    }
1158
1159    fn record_executing_pc_for_profiling(&mut self) {
1160        // Note that this is a no-op if `feature = "profile"` is disabled.
1161        self.executing_pc.record(self.pc.as_ptr().as_ptr() as usize);
1162    }
1163}
1164
1165/// Helper trait to encompass the various addressing modes of Pulley.
1166trait AddressingMode: Sized {
1167    /// Calculates the native host address `*mut T` corresponding to this
1168    /// addressing mode.
1169    ///
1170    /// # Safety
1171    ///
1172    /// Relies on the original bytecode being safe to execute as this will
1173    /// otherwise perform unsafe byte offsets for example which requires the
1174    /// original bytecode to be correct.
1175    #[must_use]
1176    unsafe fn addr<T, I: Encode>(self, i: &mut Interpreter<'_>) -> ControlFlow<Done, *mut T>;
1177
1178    /// Loads a value of `T` from this address, using native-endian byte order.
1179    ///
1180    /// For more information see [`Interpreter::load_ne`].
1181    #[must_use]
1182    unsafe fn load_ne<T, I: Encode>(self, i: &mut Interpreter<'_>) -> ControlFlow<Done, T> {
1183        let ret = unsafe { self.addr::<T, I>(i)?.read_unaligned() };
1184        ControlFlow::Continue(ret)
1185    }
1186
1187    /// Stores a `val` to this address, using native-endian byte order.
1188    ///
1189    /// For more information see [`Interpreter::store_ne`].
1190    #[must_use]
1191    unsafe fn store_ne<T, I: Encode>(self, i: &mut Interpreter<'_>, val: T) -> ControlFlow<Done> {
1192        unsafe {
1193            self.addr::<T, I>(i)?.write_unaligned(val);
1194        }
1195        ControlFlow::Continue(())
1196    }
1197}
1198
1199impl AddressingMode for AddrO32 {
1200    unsafe fn addr<T, I: Encode>(self, i: &mut Interpreter<'_>) -> ControlFlow<Done, *mut T> {
1201        // Note that this addressing mode cannot return `ControlFlow::Break`
1202        // which is intentional. It's expected that LLVM optimizes away any
1203        // branches callers have.
1204        unsafe {
1205            ControlFlow::Continue(
1206                i.state[self.addr]
1207                    .get_ptr::<T>()
1208                    .byte_offset(self.offset as isize),
1209            )
1210        }
1211    }
1212}
1213
1214impl AddressingMode for AddrZ {
1215    unsafe fn addr<T, I: Encode>(self, i: &mut Interpreter<'_>) -> ControlFlow<Done, *mut T> {
1216        // This addressing mode defines loading/storing to the null address as
1217        // a trap, but all other addresses are allowed.
1218        let host_addr = i.state[self.addr].get_ptr::<T>();
1219        if host_addr.is_null() {
1220            i.done_trap_kind::<I>(Some(TrapKind::MemoryOutOfBounds))?;
1221            unreachable!();
1222        }
1223        unsafe {
1224            let addr = host_addr.byte_offset(self.offset as isize);
1225            ControlFlow::Continue(addr)
1226        }
1227    }
1228}
1229
1230impl AddressingMode for AddrG32 {
1231    unsafe fn addr<T, I: Encode>(self, i: &mut Interpreter<'_>) -> ControlFlow<Done, *mut T> {
1232        // Test if `bound - offset - T` is less than the wasm address to
1233        // generate a trap. It's a guarantee of this instruction that these
1234        // subtractions don't overflow.
1235        let bound = i.state[self.host_heap_bound].get_u64() as usize;
1236        let offset = usize::from(self.offset);
1237        let wasm_addr = i.state[self.wasm_addr].get_u32() as usize;
1238        if wasm_addr > bound - offset - size_of::<T>() {
1239            i.done_trap_kind::<I>(Some(TrapKind::MemoryOutOfBounds))?;
1240            unreachable!();
1241        }
1242        unsafe {
1243            let addr = i.state[self.host_heap_base]
1244                .get_ptr::<T>()
1245                .byte_add(wasm_addr)
1246                .byte_add(offset);
1247            ControlFlow::Continue(addr)
1248        }
1249    }
1250}
1251
1252impl AddressingMode for AddrG32Bne {
1253    unsafe fn addr<T, I: Encode>(self, i: &mut Interpreter<'_>) -> ControlFlow<Done, *mut T> {
1254        // Same as `AddrG32` above except that the bound is loaded from memory.
1255        let bound = unsafe {
1256            *i.state[self.host_heap_bound_addr]
1257                .get_ptr::<usize>()
1258                .byte_add(usize::from(self.host_heap_bound_offset))
1259        };
1260        let wasm_addr = i.state[self.wasm_addr].get_u32() as usize;
1261        let offset = usize::from(self.offset);
1262        if wasm_addr > bound - offset - size_of::<T>() {
1263            i.done_trap_kind::<I>(Some(TrapKind::MemoryOutOfBounds))?;
1264            unreachable!();
1265        }
1266        unsafe {
1267            let addr = i.state[self.host_heap_base]
1268                .get_ptr::<T>()
1269                .byte_add(wasm_addr)
1270                .byte_add(offset);
1271            ControlFlow::Continue(addr)
1272        }
1273    }
1274}
1275
1276#[test]
1277fn simple_push_pop() {
1278    let mut state = MachineState::with_stack(16);
1279    let pc = ExecutingPc::default();
1280    unsafe {
1281        let mut bytecode = [0; 10];
1282        let mut i = Interpreter {
1283            state: &mut state,
1284            // this isn't actually read so just manufacture a dummy one
1285            pc: UnsafeBytecodeStream::new(NonNull::new(bytecode.as_mut_ptr().offset(4)).unwrap()),
1286            executing_pc: pc.as_ref(),
1287        };
1288        assert!(i.push::<crate::Ret, _>(0_i32).is_continue());
1289        assert_eq!(i.pop::<i32>(), 0_i32);
1290        assert!(i.push::<crate::Ret, _>(1_i32).is_continue());
1291        assert!(i.push::<crate::Ret, _>(2_i32).is_continue());
1292        assert!(i.push::<crate::Ret, _>(3_i32).is_continue());
1293        assert!(i.push::<crate::Ret, _>(4_i32).is_continue());
1294        assert!(i.push::<crate::Ret, _>(5_i32).is_break());
1295        assert!(i.push::<crate::Ret, _>(6_i32).is_break());
1296        assert_eq!(i.pop::<i32>(), 4_i32);
1297        assert_eq!(i.pop::<i32>(), 3_i32);
1298        assert_eq!(i.pop::<i32>(), 2_i32);
1299        assert_eq!(i.pop::<i32>(), 1_i32);
1300    }
1301}
1302
1303macro_rules! br_if_imm {
1304    ($(
1305        fn $snake:ident(&mut self, a: XReg, b: $imm:ident, offset: PcRelOffset)
1306            = $camel:ident / $op:tt / $get:ident;
1307    )*) => {$(
1308        fn $snake(&mut self, a: XReg, b: $imm, offset: PcRelOffset) -> ControlFlow<Done> {
1309            let a = self.state[a].$get();
1310            if a $op b.into() {
1311                self.pc_rel_jump::<crate::$camel>(offset)
1312            } else {
1313                ControlFlow::Continue(())
1314            }
1315        }
1316    )*};
1317}
1318
1319impl OpVisitor for Interpreter<'_> {
1320    type BytecodeStream = UnsafeBytecodeStream;
1321    type Return = ControlFlow<Done>;
1322
1323    fn bytecode(&mut self) -> &mut UnsafeBytecodeStream {
1324        &mut self.pc
1325    }
1326
1327    fn ret(&mut self) -> ControlFlow<Done> {
1328        let lr = self.state.lr;
1329        if lr == HOST_RETURN_ADDR {
1330            self.done_return_to_host()
1331        } else {
1332            self.pc = unsafe { UnsafeBytecodeStream::new(NonNull::new_unchecked(lr)) };
1333            ControlFlow::Continue(())
1334        }
1335    }
1336
1337    fn call(&mut self, offset: PcRelOffset) -> ControlFlow<Done> {
1338        let return_addr = self.pc.as_ptr();
1339        self.state.lr = return_addr.as_ptr();
1340        self.pc_rel_jump::<crate::Call>(offset)
1341    }
1342
1343    fn call1(&mut self, arg1: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1344        let return_addr = self.pc.as_ptr();
1345        self.state.lr = return_addr.as_ptr();
1346        self.state[XReg::x0] = self.state[arg1];
1347        self.pc_rel_jump::<crate::Call1>(offset)
1348    }
1349
1350    fn call2(&mut self, arg1: XReg, arg2: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1351        let return_addr = self.pc.as_ptr();
1352        self.state.lr = return_addr.as_ptr();
1353        let (x0, x1) = (self.state[arg1], self.state[arg2]);
1354        self.state[XReg::x0] = x0;
1355        self.state[XReg::x1] = x1;
1356        self.pc_rel_jump::<crate::Call2>(offset)
1357    }
1358
1359    fn call3(
1360        &mut self,
1361        arg1: XReg,
1362        arg2: XReg,
1363        arg3: XReg,
1364        offset: PcRelOffset,
1365    ) -> ControlFlow<Done> {
1366        let return_addr = self.pc.as_ptr();
1367        self.state.lr = return_addr.as_ptr();
1368        let (x0, x1, x2) = (self.state[arg1], self.state[arg2], self.state[arg3]);
1369        self.state[XReg::x0] = x0;
1370        self.state[XReg::x1] = x1;
1371        self.state[XReg::x2] = x2;
1372        self.pc_rel_jump::<crate::Call3>(offset)
1373    }
1374
1375    fn call4(
1376        &mut self,
1377        arg1: XReg,
1378        arg2: XReg,
1379        arg3: XReg,
1380        arg4: XReg,
1381        offset: PcRelOffset,
1382    ) -> ControlFlow<Done> {
1383        let return_addr = self.pc.as_ptr();
1384        self.state.lr = return_addr.as_ptr();
1385        let (x0, x1, x2, x3) = (
1386            self.state[arg1],
1387            self.state[arg2],
1388            self.state[arg3],
1389            self.state[arg4],
1390        );
1391        self.state[XReg::x0] = x0;
1392        self.state[XReg::x1] = x1;
1393        self.state[XReg::x2] = x2;
1394        self.state[XReg::x3] = x3;
1395        self.pc_rel_jump::<crate::Call4>(offset)
1396    }
1397
1398    fn call_indirect(&mut self, dst: XReg) -> ControlFlow<Done> {
1399        let return_addr = self.pc.as_ptr();
1400        self.state.lr = return_addr.as_ptr();
1401        // SAFETY: part of the unsafe contract of the interpreter is only valid
1402        // bytecode is interpreted, so the jump destination is part of the validity
1403        // of the bytecode itself.
1404        unsafe {
1405            self.pc = UnsafeBytecodeStream::new(NonNull::new_unchecked(self.state[dst].get_ptr()));
1406        }
1407        ControlFlow::Continue(())
1408    }
1409
1410    fn jump(&mut self, offset: PcRelOffset) -> ControlFlow<Done> {
1411        self.pc_rel_jump::<crate::Jump>(offset)
1412    }
1413
1414    fn xjump(&mut self, reg: XReg) -> ControlFlow<Done> {
1415        unsafe {
1416            self.pc = UnsafeBytecodeStream::new(NonNull::new_unchecked(self.state[reg].get_ptr()));
1417        }
1418        ControlFlow::Continue(())
1419    }
1420
1421    fn br_if32(&mut self, cond: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1422        let cond = self.state[cond].get_u32();
1423        if cond != 0 {
1424            self.pc_rel_jump::<crate::BrIf>(offset)
1425        } else {
1426            ControlFlow::Continue(())
1427        }
1428    }
1429
1430    fn br_if_not32(&mut self, cond: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1431        let cond = self.state[cond].get_u32();
1432        if cond == 0 {
1433            self.pc_rel_jump::<crate::BrIfNot>(offset)
1434        } else {
1435            ControlFlow::Continue(())
1436        }
1437    }
1438
1439    fn br_if_xeq32(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1440        let a = self.state[a].get_u32();
1441        let b = self.state[b].get_u32();
1442        if a == b {
1443            self.pc_rel_jump::<crate::BrIfXeq32>(offset)
1444        } else {
1445            ControlFlow::Continue(())
1446        }
1447    }
1448
1449    fn br_if_xneq32(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1450        let a = self.state[a].get_u32();
1451        let b = self.state[b].get_u32();
1452        if a != b {
1453            self.pc_rel_jump::<crate::BrIfXneq32>(offset)
1454        } else {
1455            ControlFlow::Continue(())
1456        }
1457    }
1458
1459    fn br_if_xslt32(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1460        let a = self.state[a].get_i32();
1461        let b = self.state[b].get_i32();
1462        if a < b {
1463            self.pc_rel_jump::<crate::BrIfXslt32>(offset)
1464        } else {
1465            ControlFlow::Continue(())
1466        }
1467    }
1468
1469    fn br_if_xslteq32(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1470        let a = self.state[a].get_i32();
1471        let b = self.state[b].get_i32();
1472        if a <= b {
1473            self.pc_rel_jump::<crate::BrIfXslteq32>(offset)
1474        } else {
1475            ControlFlow::Continue(())
1476        }
1477    }
1478
1479    fn br_if_xult32(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1480        let a = self.state[a].get_u32();
1481        let b = self.state[b].get_u32();
1482        if a < b {
1483            self.pc_rel_jump::<crate::BrIfXult32>(offset)
1484        } else {
1485            ControlFlow::Continue(())
1486        }
1487    }
1488
1489    fn br_if_xulteq32(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1490        let a = self.state[a].get_u32();
1491        let b = self.state[b].get_u32();
1492        if a <= b {
1493            self.pc_rel_jump::<crate::BrIfXulteq32>(offset)
1494        } else {
1495            ControlFlow::Continue(())
1496        }
1497    }
1498
1499    fn br_if_xeq64(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1500        let a = self.state[a].get_u64();
1501        let b = self.state[b].get_u64();
1502        if a == b {
1503            self.pc_rel_jump::<crate::BrIfXeq64>(offset)
1504        } else {
1505            ControlFlow::Continue(())
1506        }
1507    }
1508
1509    fn br_if_xneq64(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1510        let a = self.state[a].get_u64();
1511        let b = self.state[b].get_u64();
1512        if a != b {
1513            self.pc_rel_jump::<crate::BrIfXneq64>(offset)
1514        } else {
1515            ControlFlow::Continue(())
1516        }
1517    }
1518
1519    fn br_if_xslt64(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1520        let a = self.state[a].get_i64();
1521        let b = self.state[b].get_i64();
1522        if a < b {
1523            self.pc_rel_jump::<crate::BrIfXslt64>(offset)
1524        } else {
1525            ControlFlow::Continue(())
1526        }
1527    }
1528
1529    fn br_if_xslteq64(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1530        let a = self.state[a].get_i64();
1531        let b = self.state[b].get_i64();
1532        if a <= b {
1533            self.pc_rel_jump::<crate::BrIfXslteq64>(offset)
1534        } else {
1535            ControlFlow::Continue(())
1536        }
1537    }
1538
1539    fn br_if_xult64(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1540        let a = self.state[a].get_u64();
1541        let b = self.state[b].get_u64();
1542        if a < b {
1543            self.pc_rel_jump::<crate::BrIfXult64>(offset)
1544        } else {
1545            ControlFlow::Continue(())
1546        }
1547    }
1548
1549    fn br_if_xulteq64(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1550        let a = self.state[a].get_u64();
1551        let b = self.state[b].get_u64();
1552        if a <= b {
1553            self.pc_rel_jump::<crate::BrIfXulteq64>(offset)
1554        } else {
1555            ControlFlow::Continue(())
1556        }
1557    }
1558
1559    br_if_imm! {
1560        fn br_if_xeq32_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1561            = BrIfXeq32I8 / == / get_i32;
1562        fn br_if_xeq32_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1563            = BrIfXeq32I32 / == / get_i32;
1564        fn br_if_xneq32_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1565            = BrIfXneq32I8 / != / get_i32;
1566        fn br_if_xneq32_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1567            = BrIfXneq32I32 / != / get_i32;
1568
1569        fn br_if_xslt32_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1570            = BrIfXslt32I8 / < / get_i32;
1571        fn br_if_xslt32_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1572            = BrIfXslt32I32 / < / get_i32;
1573        fn br_if_xsgt32_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1574            = BrIfXsgt32I8 / > / get_i32;
1575        fn br_if_xsgt32_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1576            = BrIfXsgt32I32 / > / get_i32;
1577        fn br_if_xslteq32_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1578            = BrIfXslteq32I8 / <= / get_i32;
1579        fn br_if_xslteq32_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1580            = BrIfXslteq32I32 / <= / get_i32;
1581        fn br_if_xsgteq32_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1582            = BrIfXsgteq32I8 / >= / get_i32;
1583        fn br_if_xsgteq32_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1584            = BrIfXsgteq32I32 / >= / get_i32;
1585
1586        fn br_if_xult32_u8(&mut self, a: XReg, b: u8, offset: PcRelOffset)
1587            = BrIfXult32U8 / < / get_u32;
1588        fn br_if_xult32_u32(&mut self, a: XReg, b: u32, offset: PcRelOffset)
1589            = BrIfXult32U32 / < / get_u32;
1590        fn br_if_xugt32_u8(&mut self, a: XReg, b: u8, offset: PcRelOffset)
1591            = BrIfXugt32U8 / > / get_u32;
1592        fn br_if_xugt32_u32(&mut self, a: XReg, b: u32, offset: PcRelOffset)
1593            = BrIfXugt32U32 / > / get_u32;
1594        fn br_if_xulteq32_u8(&mut self, a: XReg, b: u8, offset: PcRelOffset)
1595            = BrIfXulteq32U8 / <= / get_u32;
1596        fn br_if_xulteq32_u32(&mut self, a: XReg, b: u32, offset: PcRelOffset)
1597            = BrIfXulteq32U32 / <= / get_u32;
1598        fn br_if_xugteq32_u8(&mut self, a: XReg, b: u8, offset: PcRelOffset)
1599            = BrIfXugteq32U8 / >= / get_u32;
1600        fn br_if_xugteq32_u32(&mut self, a: XReg, b: u32, offset: PcRelOffset)
1601            = BrIfXugteq32U32 / >= / get_u32;
1602
1603        fn br_if_xeq64_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1604            = BrIfXeq64I8 / == / get_i64;
1605        fn br_if_xeq64_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1606            = BrIfXeq64I32 / == / get_i64;
1607        fn br_if_xneq64_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1608            = BrIfXneq64I8 / != / get_i64;
1609        fn br_if_xneq64_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1610            = BrIfXneq64I32 / != / get_i64;
1611
1612        fn br_if_xslt64_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1613            = BrIfXslt64I8 / < / get_i64;
1614        fn br_if_xslt64_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1615            = BrIfXslt64I32 / < / get_i64;
1616        fn br_if_xsgt64_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1617            = BrIfXsgt64I8 / > / get_i64;
1618        fn br_if_xsgt64_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1619            = BrIfXsgt64I32 / > / get_i64;
1620        fn br_if_xslteq64_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1621            = BrIfXslteq64I8 / <= / get_i64;
1622        fn br_if_xslteq64_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1623            = BrIfXslteq64I32 / <= / get_i64;
1624        fn br_if_xsgteq64_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1625            = BrIfXsgteq64I8 / >= / get_i64;
1626        fn br_if_xsgteq64_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1627            = BrIfXsgteq64I32 / >= / get_i64;
1628
1629        fn br_if_xult64_u8(&mut self, a: XReg, b: u8, offset: PcRelOffset)
1630            = BrIfXult64U8 / < / get_u64;
1631        fn br_if_xult64_u32(&mut self, a: XReg, b: u32, offset: PcRelOffset)
1632            = BrIfXult64U32 / < / get_u64;
1633        fn br_if_xugt64_u8(&mut self, a: XReg, b: u8, offset: PcRelOffset)
1634            = BrIfXugt64U8 / > / get_u64;
1635        fn br_if_xugt64_u32(&mut self, a: XReg, b: u32, offset: PcRelOffset)
1636            = BrIfXugt64U32 / > / get_u64;
1637        fn br_if_xulteq64_u8(&mut self, a: XReg, b: u8, offset: PcRelOffset)
1638            = BrIfXulteq64U8 / <= / get_u64;
1639        fn br_if_xulteq64_u32(&mut self, a: XReg, b: u32, offset: PcRelOffset)
1640            = BrIfXulteq64U32 / <= / get_u64;
1641        fn br_if_xugteq64_u8(&mut self, a: XReg, b: u8, offset: PcRelOffset)
1642            = BrIfXugteq64U8 / >= / get_u64;
1643        fn br_if_xugteq64_u32(&mut self, a: XReg, b: u32, offset: PcRelOffset)
1644            = BrIfXugteq64U32 / >= / get_u64;
1645    }
1646
1647    fn xmov(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
1648        let val = self.state[src];
1649        self.state[dst] = val;
1650        ControlFlow::Continue(())
1651    }
1652
1653    fn xconst8(&mut self, dst: XReg, imm: i8) -> ControlFlow<Done> {
1654        self.state[dst].set_i64(i64::from(imm));
1655        ControlFlow::Continue(())
1656    }
1657
1658    fn xzero(&mut self, dst: XReg) -> ControlFlow<Done> {
1659        self.state[dst].set_i64(0);
1660        ControlFlow::Continue(())
1661    }
1662
1663    fn xone(&mut self, dst: XReg) -> ControlFlow<Done> {
1664        self.state[dst].set_i64(1);
1665        ControlFlow::Continue(())
1666    }
1667
1668    fn xconst16(&mut self, dst: XReg, imm: i16) -> ControlFlow<Done> {
1669        self.state[dst].set_i64(i64::from(imm));
1670        ControlFlow::Continue(())
1671    }
1672
1673    fn xconst32(&mut self, dst: XReg, imm: i32) -> ControlFlow<Done> {
1674        self.state[dst].set_i64(i64::from(imm));
1675        ControlFlow::Continue(())
1676    }
1677
1678    fn xconst64(&mut self, dst: XReg, imm: i64) -> ControlFlow<Done> {
1679        self.state[dst].set_i64(imm);
1680        ControlFlow::Continue(())
1681    }
1682
1683    fn xadd32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1684        let a = self.state[operands.src1].get_u32();
1685        let b = self.state[operands.src2].get_u32();
1686        self.state[operands.dst].set_u32(a.wrapping_add(b));
1687        ControlFlow::Continue(())
1688    }
1689
1690    fn xadd32_u8(&mut self, dst: XReg, src1: XReg, src2: u8) -> ControlFlow<Done> {
1691        self.xadd32_u32(dst, src1, src2.into())
1692    }
1693
1694    fn xadd32_u32(&mut self, dst: XReg, src1: XReg, src2: u32) -> ControlFlow<Done> {
1695        let a = self.state[src1].get_u32();
1696        self.state[dst].set_u32(a.wrapping_add(src2.into()));
1697        ControlFlow::Continue(())
1698    }
1699
1700    fn xadd64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1701        let a = self.state[operands.src1].get_u64();
1702        let b = self.state[operands.src2].get_u64();
1703        self.state[operands.dst].set_u64(a.wrapping_add(b));
1704        ControlFlow::Continue(())
1705    }
1706
1707    fn xadd64_u8(&mut self, dst: XReg, src1: XReg, src2: u8) -> ControlFlow<Done> {
1708        self.xadd64_u32(dst, src1, src2.into())
1709    }
1710
1711    fn xadd64_u32(&mut self, dst: XReg, src1: XReg, src2: u32) -> ControlFlow<Done> {
1712        let a = self.state[src1].get_u64();
1713        self.state[dst].set_u64(a.wrapping_add(src2.into()));
1714        ControlFlow::Continue(())
1715    }
1716
1717    fn xmadd32(&mut self, dst: XReg, src1: XReg, src2: XReg, src3: XReg) -> ControlFlow<Done> {
1718        let a = self.state[src1].get_u32();
1719        let b = self.state[src2].get_u32();
1720        let c = self.state[src3].get_u32();
1721        self.state[dst].set_u32(a.wrapping_mul(b).wrapping_add(c));
1722        ControlFlow::Continue(())
1723    }
1724
1725    fn xmadd64(&mut self, dst: XReg, src1: XReg, src2: XReg, src3: XReg) -> ControlFlow<Done> {
1726        let a = self.state[src1].get_u64();
1727        let b = self.state[src2].get_u64();
1728        let c = self.state[src3].get_u64();
1729        self.state[dst].set_u64(a.wrapping_mul(b).wrapping_add(c));
1730        ControlFlow::Continue(())
1731    }
1732
1733    fn xsub32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1734        let a = self.state[operands.src1].get_u32();
1735        let b = self.state[operands.src2].get_u32();
1736        self.state[operands.dst].set_u32(a.wrapping_sub(b));
1737        ControlFlow::Continue(())
1738    }
1739
1740    fn xsub32_u8(&mut self, dst: XReg, src1: XReg, src2: u8) -> ControlFlow<Done> {
1741        self.xsub32_u32(dst, src1, src2.into())
1742    }
1743
1744    fn xsub32_u32(&mut self, dst: XReg, src1: XReg, src2: u32) -> ControlFlow<Done> {
1745        let a = self.state[src1].get_u32();
1746        self.state[dst].set_u32(a.wrapping_sub(src2.into()));
1747        ControlFlow::Continue(())
1748    }
1749
1750    fn xsub64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1751        let a = self.state[operands.src1].get_u64();
1752        let b = self.state[operands.src2].get_u64();
1753        self.state[operands.dst].set_u64(a.wrapping_sub(b));
1754        ControlFlow::Continue(())
1755    }
1756
1757    fn xsub64_u8(&mut self, dst: XReg, src1: XReg, src2: u8) -> ControlFlow<Done> {
1758        self.xsub64_u32(dst, src1, src2.into())
1759    }
1760
1761    fn xsub64_u32(&mut self, dst: XReg, src1: XReg, src2: u32) -> ControlFlow<Done> {
1762        let a = self.state[src1].get_u64();
1763        self.state[dst].set_u64(a.wrapping_sub(src2.into()));
1764        ControlFlow::Continue(())
1765    }
1766
1767    fn xmul32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1768        let a = self.state[operands.src1].get_u32();
1769        let b = self.state[operands.src2].get_u32();
1770        self.state[operands.dst].set_u32(a.wrapping_mul(b));
1771        ControlFlow::Continue(())
1772    }
1773
1774    fn xmul32_s8(&mut self, dst: XReg, src1: XReg, src2: i8) -> ControlFlow<Done> {
1775        self.xmul32_s32(dst, src1, src2.into())
1776    }
1777
1778    fn xmul32_s32(&mut self, dst: XReg, src1: XReg, src2: i32) -> ControlFlow<Done> {
1779        let a = self.state[src1].get_i32();
1780        self.state[dst].set_i32(a.wrapping_mul(src2));
1781        ControlFlow::Continue(())
1782    }
1783
1784    fn xmul64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1785        let a = self.state[operands.src1].get_u64();
1786        let b = self.state[operands.src2].get_u64();
1787        self.state[operands.dst].set_u64(a.wrapping_mul(b));
1788        ControlFlow::Continue(())
1789    }
1790
1791    fn xmul64_s8(&mut self, dst: XReg, src1: XReg, src2: i8) -> ControlFlow<Done> {
1792        self.xmul64_s32(dst, src1, src2.into())
1793    }
1794
1795    fn xmul64_s32(&mut self, dst: XReg, src1: XReg, src2: i32) -> ControlFlow<Done> {
1796        let a = self.state[src1].get_i64();
1797        self.state[dst].set_i64(a.wrapping_mul(src2.into()));
1798        ControlFlow::Continue(())
1799    }
1800
1801    fn xshl32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1802        let a = self.state[operands.src1].get_u32();
1803        let b = self.state[operands.src2].get_u32();
1804        self.state[operands.dst].set_u32(a.wrapping_shl(b));
1805        ControlFlow::Continue(())
1806    }
1807
1808    fn xshr32_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1809        let a = self.state[operands.src1].get_u32();
1810        let b = self.state[operands.src2].get_u32();
1811        self.state[operands.dst].set_u32(a.wrapping_shr(b));
1812        ControlFlow::Continue(())
1813    }
1814
1815    fn xshr32_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1816        let a = self.state[operands.src1].get_i32();
1817        let b = self.state[operands.src2].get_u32();
1818        self.state[operands.dst].set_i32(a.wrapping_shr(b));
1819        ControlFlow::Continue(())
1820    }
1821
1822    fn xshl64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1823        let a = self.state[operands.src1].get_u64();
1824        let b = self.state[operands.src2].get_u32();
1825        self.state[operands.dst].set_u64(a.wrapping_shl(b));
1826        ControlFlow::Continue(())
1827    }
1828
1829    fn xshr64_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1830        let a = self.state[operands.src1].get_u64();
1831        let b = self.state[operands.src2].get_u32();
1832        self.state[operands.dst].set_u64(a.wrapping_shr(b));
1833        ControlFlow::Continue(())
1834    }
1835
1836    fn xshr64_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1837        let a = self.state[operands.src1].get_i64();
1838        let b = self.state[operands.src2].get_u32();
1839        self.state[operands.dst].set_i64(a.wrapping_shr(b));
1840        ControlFlow::Continue(())
1841    }
1842
1843    fn xshl32_u6(&mut self, operands: BinaryOperands<XReg, XReg, U6>) -> ControlFlow<Done> {
1844        let a = self.state[operands.src1].get_u32();
1845        let b = u32::from(u8::from(operands.src2));
1846        self.state[operands.dst].set_u32(a.wrapping_shl(b));
1847        ControlFlow::Continue(())
1848    }
1849
1850    fn xshr32_u_u6(&mut self, operands: BinaryOperands<XReg, XReg, U6>) -> ControlFlow<Done> {
1851        let a = self.state[operands.src1].get_u32();
1852        let b = u32::from(u8::from(operands.src2));
1853        self.state[operands.dst].set_u32(a.wrapping_shr(b));
1854        ControlFlow::Continue(())
1855    }
1856
1857    fn xshr32_s_u6(&mut self, operands: BinaryOperands<XReg, XReg, U6>) -> ControlFlow<Done> {
1858        let a = self.state[operands.src1].get_i32();
1859        let b = u32::from(u8::from(operands.src2));
1860        self.state[operands.dst].set_i32(a.wrapping_shr(b));
1861        ControlFlow::Continue(())
1862    }
1863
1864    fn xshl64_u6(&mut self, operands: BinaryOperands<XReg, XReg, U6>) -> ControlFlow<Done> {
1865        let a = self.state[operands.src1].get_u64();
1866        let b = u32::from(u8::from(operands.src2));
1867        self.state[operands.dst].set_u64(a.wrapping_shl(b));
1868        ControlFlow::Continue(())
1869    }
1870
1871    fn xshr64_u_u6(&mut self, operands: BinaryOperands<XReg, XReg, U6>) -> ControlFlow<Done> {
1872        let a = self.state[operands.src1].get_u64();
1873        let b = u32::from(u8::from(operands.src2));
1874        self.state[operands.dst].set_u64(a.wrapping_shr(b));
1875        ControlFlow::Continue(())
1876    }
1877
1878    fn xshr64_s_u6(&mut self, operands: BinaryOperands<XReg, XReg, U6>) -> ControlFlow<Done> {
1879        let a = self.state[operands.src1].get_i64();
1880        let b = u32::from(u8::from(operands.src2));
1881        self.state[operands.dst].set_i64(a.wrapping_shr(b));
1882        ControlFlow::Continue(())
1883    }
1884
1885    fn xneg32(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
1886        let a = self.state[src].get_i32();
1887        self.state[dst].set_i32(a.wrapping_neg());
1888        ControlFlow::Continue(())
1889    }
1890
1891    fn xneg64(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
1892        let a = self.state[src].get_i64();
1893        self.state[dst].set_i64(a.wrapping_neg());
1894        ControlFlow::Continue(())
1895    }
1896
1897    fn xeq64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1898        let a = self.state[operands.src1].get_u64();
1899        let b = self.state[operands.src2].get_u64();
1900        self.state[operands.dst].set_u32(u32::from(a == b));
1901        ControlFlow::Continue(())
1902    }
1903
1904    fn xneq64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1905        let a = self.state[operands.src1].get_u64();
1906        let b = self.state[operands.src2].get_u64();
1907        self.state[operands.dst].set_u32(u32::from(a != b));
1908        ControlFlow::Continue(())
1909    }
1910
1911    fn xslt64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1912        let a = self.state[operands.src1].get_i64();
1913        let b = self.state[operands.src2].get_i64();
1914        self.state[operands.dst].set_u32(u32::from(a < b));
1915        ControlFlow::Continue(())
1916    }
1917
1918    fn xslteq64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1919        let a = self.state[operands.src1].get_i64();
1920        let b = self.state[operands.src2].get_i64();
1921        self.state[operands.dst].set_u32(u32::from(a <= b));
1922        ControlFlow::Continue(())
1923    }
1924
1925    fn xult64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1926        let a = self.state[operands.src1].get_u64();
1927        let b = self.state[operands.src2].get_u64();
1928        self.state[operands.dst].set_u32(u32::from(a < b));
1929        ControlFlow::Continue(())
1930    }
1931
1932    fn xulteq64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1933        let a = self.state[operands.src1].get_u64();
1934        let b = self.state[operands.src2].get_u64();
1935        self.state[operands.dst].set_u32(u32::from(a <= b));
1936        ControlFlow::Continue(())
1937    }
1938
1939    fn xeq32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1940        let a = self.state[operands.src1].get_u32();
1941        let b = self.state[operands.src2].get_u32();
1942        self.state[operands.dst].set_u32(u32::from(a == b));
1943        ControlFlow::Continue(())
1944    }
1945
1946    fn xneq32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1947        let a = self.state[operands.src1].get_u32();
1948        let b = self.state[operands.src2].get_u32();
1949        self.state[operands.dst].set_u32(u32::from(a != b));
1950        ControlFlow::Continue(())
1951    }
1952
1953    fn xslt32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1954        let a = self.state[operands.src1].get_i32();
1955        let b = self.state[operands.src2].get_i32();
1956        self.state[operands.dst].set_u32(u32::from(a < b));
1957        ControlFlow::Continue(())
1958    }
1959
1960    fn xslteq32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1961        let a = self.state[operands.src1].get_i32();
1962        let b = self.state[operands.src2].get_i32();
1963        self.state[operands.dst].set_u32(u32::from(a <= b));
1964        ControlFlow::Continue(())
1965    }
1966
1967    fn xult32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1968        let a = self.state[operands.src1].get_u32();
1969        let b = self.state[operands.src2].get_u32();
1970        self.state[operands.dst].set_u32(u32::from(a < b));
1971        ControlFlow::Continue(())
1972    }
1973
1974    fn xulteq32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1975        let a = self.state[operands.src1].get_u32();
1976        let b = self.state[operands.src2].get_u32();
1977        self.state[operands.dst].set_u32(u32::from(a <= b));
1978        ControlFlow::Continue(())
1979    }
1980
1981    fn push_frame(&mut self) -> ControlFlow<Done> {
1982        self.push::<crate::PushFrame, _>(self.state.lr)?;
1983        self.push::<crate::PushFrame, _>(self.state.fp)?;
1984        self.state.fp = self.state[XReg::sp].get_ptr();
1985        ControlFlow::Continue(())
1986    }
1987
1988    #[inline]
1989    fn push_frame_save(&mut self, amt: u16, regs: UpperRegSet<XReg>) -> ControlFlow<Done> {
1990        // Decrement the stack pointer `amt` bytes plus 2 pointers more for
1991        // fp/lr.
1992        let ptr_size = size_of::<usize>();
1993        let full_amt = usize::from(amt) + 2 * ptr_size;
1994        let new_sp = self.state[XReg::sp].get_ptr::<u8>().wrapping_sub(full_amt);
1995        self.set_sp::<crate::PushFrameSave>(new_sp)?;
1996
1997        unsafe {
1998            // Emulate `push_frame` by placing `lr` and `fp` onto the stack, in
1999            // that order, at the top of the allocated area.
2000            self.store_ne::<_, crate::PushFrameSave>(
2001                AddrO32 {
2002                    addr: XReg::sp,
2003                    offset: (full_amt - 1 * ptr_size) as i32,
2004                },
2005                self.state.lr,
2006            )?;
2007            self.store_ne::<_, crate::PushFrameSave>(
2008                AddrO32 {
2009                    addr: XReg::sp,
2010                    offset: (full_amt - 2 * ptr_size) as i32,
2011                },
2012                self.state.fp,
2013            )?;
2014
2015            // Set `fp` to the top of our frame, where `fp` is stored.
2016            let mut offset = amt as i32;
2017            self.state.fp = self.state[XReg::sp]
2018                .get_ptr::<u8>()
2019                .byte_offset(offset as isize);
2020
2021            // Next save any registers in `regs` to the stack.
2022            for reg in regs {
2023                offset -= 8;
2024                self.store_ne::<_, crate::PushFrameSave>(
2025                    AddrO32 {
2026                        addr: XReg::sp,
2027                        offset,
2028                    },
2029                    self.state[reg].get_u64(),
2030                )?;
2031            }
2032        }
2033        ControlFlow::Continue(())
2034    }
2035
2036    fn pop_frame_restore(&mut self, amt: u16, regs: UpperRegSet<XReg>) -> ControlFlow<Done> {
2037        // Restore all registers in `regs`, followed by the normal `pop_frame`
2038        // opcode below to restore fp/lr.
2039        unsafe {
2040            let mut offset = i32::from(amt);
2041            for reg in regs {
2042                offset -= 8;
2043                let val = self.load_ne::<_, crate::PopFrameRestore>(AddrO32 {
2044                    addr: XReg::sp,
2045                    offset,
2046                })?;
2047                self.state[reg].set_u64(val);
2048            }
2049        }
2050        self.pop_frame()
2051    }
2052
2053    fn pop_frame(&mut self) -> ControlFlow<Done> {
2054        self.set_sp_unchecked(self.state.fp);
2055        let fp = self.pop();
2056        let lr = self.pop();
2057        self.state.fp = fp;
2058        self.state.lr = lr;
2059        ControlFlow::Continue(())
2060    }
2061
2062    fn br_table32(&mut self, idx: XReg, amt: u32) -> ControlFlow<Done> {
2063        let idx = self.state[idx].get_u32().min(amt - 1) as isize;
2064        // SAFETY: part of the contract of the interpreter is only dealing with
2065        // valid bytecode, so this offset should be safe.
2066        self.pc = unsafe { self.pc.offset(idx * 4) };
2067
2068        // Decode the `PcRelOffset` without tampering with `self.pc` as the
2069        // jump is relative to `self.pc`.
2070        let mut tmp = self.pc;
2071        let Ok(rel) = PcRelOffset::decode(&mut tmp);
2072        let offset = isize::try_from(i32::from(rel)).unwrap();
2073        self.pc = unsafe { self.pc.offset(offset) };
2074        ControlFlow::Continue(())
2075    }
2076
2077    fn stack_alloc32(&mut self, amt: u32) -> ControlFlow<Done> {
2078        let amt = usize::try_from(amt).unwrap();
2079        let new_sp = self.state[XReg::sp].get_ptr::<u8>().wrapping_sub(amt);
2080        self.set_sp::<crate::StackAlloc32>(new_sp)?;
2081        ControlFlow::Continue(())
2082    }
2083
2084    fn stack_free32(&mut self, amt: u32) -> ControlFlow<Done> {
2085        let amt = usize::try_from(amt).unwrap();
2086        let new_sp = self.state[XReg::sp].get_ptr::<u8>().wrapping_add(amt);
2087        self.set_sp_unchecked(new_sp);
2088        ControlFlow::Continue(())
2089    }
2090
2091    fn zext8(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2092        let src = self.state[src].get_u64() as u8;
2093        self.state[dst].set_u64(src.into());
2094        ControlFlow::Continue(())
2095    }
2096
2097    fn zext16(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2098        let src = self.state[src].get_u64() as u16;
2099        self.state[dst].set_u64(src.into());
2100        ControlFlow::Continue(())
2101    }
2102
2103    fn zext32(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2104        let src = self.state[src].get_u64() as u32;
2105        self.state[dst].set_u64(src.into());
2106        ControlFlow::Continue(())
2107    }
2108
2109    fn sext8(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2110        let src = self.state[src].get_i64() as i8;
2111        self.state[dst].set_i64(src.into());
2112        ControlFlow::Continue(())
2113    }
2114
2115    fn sext16(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2116        let src = self.state[src].get_i64() as i16;
2117        self.state[dst].set_i64(src.into());
2118        ControlFlow::Continue(())
2119    }
2120
2121    fn sext32(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2122        let src = self.state[src].get_i64() as i32;
2123        self.state[dst].set_i64(src.into());
2124        ControlFlow::Continue(())
2125    }
2126
2127    fn xdiv32_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2128        let a = self.state[operands.src1].get_i32();
2129        let b = self.state[operands.src2].get_i32();
2130        match a.checked_div(b) {
2131            Some(result) => {
2132                self.state[operands.dst].set_i32(result);
2133                ControlFlow::Continue(())
2134            }
2135            None => {
2136                let kind = if b == 0 {
2137                    TrapKind::DivideByZero
2138                } else {
2139                    TrapKind::IntegerOverflow
2140                };
2141                self.done_trap_kind::<crate::XDiv32S>(Some(kind))
2142            }
2143        }
2144    }
2145
2146    fn xdiv64_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2147        let a = self.state[operands.src1].get_i64();
2148        let b = self.state[operands.src2].get_i64();
2149        match a.checked_div(b) {
2150            Some(result) => {
2151                self.state[operands.dst].set_i64(result);
2152                ControlFlow::Continue(())
2153            }
2154            None => {
2155                let kind = if b == 0 {
2156                    TrapKind::DivideByZero
2157                } else {
2158                    TrapKind::IntegerOverflow
2159                };
2160                self.done_trap_kind::<crate::XDiv64S>(Some(kind))
2161            }
2162        }
2163    }
2164
2165    fn xdiv32_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2166        let a = self.state[operands.src1].get_u32();
2167        let b = self.state[operands.src2].get_u32();
2168        match a.checked_div(b) {
2169            Some(result) => {
2170                self.state[operands.dst].set_u32(result);
2171                ControlFlow::Continue(())
2172            }
2173            None => self.done_trap_kind::<crate::XDiv64U>(Some(TrapKind::DivideByZero)),
2174        }
2175    }
2176
2177    fn xdiv64_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2178        let a = self.state[operands.src1].get_u64();
2179        let b = self.state[operands.src2].get_u64();
2180        match a.checked_div(b) {
2181            Some(result) => {
2182                self.state[operands.dst].set_u64(result);
2183                ControlFlow::Continue(())
2184            }
2185            None => self.done_trap_kind::<crate::XDiv64U>(Some(TrapKind::DivideByZero)),
2186        }
2187    }
2188
2189    fn xrem32_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2190        let a = self.state[operands.src1].get_i32();
2191        let b = self.state[operands.src2].get_i32();
2192        let result = if a == i32::MIN && b == -1 {
2193            Some(0)
2194        } else {
2195            a.checked_rem(b)
2196        };
2197        match result {
2198            Some(result) => {
2199                self.state[operands.dst].set_i32(result);
2200                ControlFlow::Continue(())
2201            }
2202            None => self.done_trap_kind::<crate::XRem32S>(Some(TrapKind::DivideByZero)),
2203        }
2204    }
2205
2206    fn xrem64_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2207        let a = self.state[operands.src1].get_i64();
2208        let b = self.state[operands.src2].get_i64();
2209        let result = if a == i64::MIN && b == -1 {
2210            Some(0)
2211        } else {
2212            a.checked_rem(b)
2213        };
2214        match result {
2215            Some(result) => {
2216                self.state[operands.dst].set_i64(result);
2217                ControlFlow::Continue(())
2218            }
2219            None => self.done_trap_kind::<crate::XRem64S>(Some(TrapKind::DivideByZero)),
2220        }
2221    }
2222
2223    fn xrem32_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2224        let a = self.state[operands.src1].get_u32();
2225        let b = self.state[operands.src2].get_u32();
2226        match a.checked_rem(b) {
2227            Some(result) => {
2228                self.state[operands.dst].set_u32(result);
2229                ControlFlow::Continue(())
2230            }
2231            None => self.done_trap_kind::<crate::XRem32U>(Some(TrapKind::DivideByZero)),
2232        }
2233    }
2234
2235    fn xrem64_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2236        let a = self.state[operands.src1].get_u64();
2237        let b = self.state[operands.src2].get_u64();
2238        match a.checked_rem(b) {
2239            Some(result) => {
2240                self.state[operands.dst].set_u64(result);
2241                ControlFlow::Continue(())
2242            }
2243            None => self.done_trap_kind::<crate::XRem64U>(Some(TrapKind::DivideByZero)),
2244        }
2245    }
2246
2247    fn xband32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2248        let a = self.state[operands.src1].get_u32();
2249        let b = self.state[operands.src2].get_u32();
2250        self.state[operands.dst].set_u32(a & b);
2251        ControlFlow::Continue(())
2252    }
2253
2254    fn xband32_s8(&mut self, dst: XReg, src1: XReg, src2: i8) -> ControlFlow<Done> {
2255        self.xband32_s32(dst, src1, src2.into())
2256    }
2257
2258    fn xband32_s32(&mut self, dst: XReg, src1: XReg, src2: i32) -> ControlFlow<Done> {
2259        let a = self.state[src1].get_i32();
2260        self.state[dst].set_i32(a & src2);
2261        ControlFlow::Continue(())
2262    }
2263
2264    fn xband64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2265        let a = self.state[operands.src1].get_u64();
2266        let b = self.state[operands.src2].get_u64();
2267        self.state[operands.dst].set_u64(a & b);
2268        ControlFlow::Continue(())
2269    }
2270
2271    fn xband64_s8(&mut self, dst: XReg, src1: XReg, src2: i8) -> ControlFlow<Done> {
2272        self.xband64_s32(dst, src1, src2.into())
2273    }
2274
2275    fn xband64_s32(&mut self, dst: XReg, src1: XReg, src2: i32) -> ControlFlow<Done> {
2276        let a = self.state[src1].get_i64();
2277        self.state[dst].set_i64(a & i64::from(src2));
2278        ControlFlow::Continue(())
2279    }
2280
2281    fn xbor32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2282        let a = self.state[operands.src1].get_u32();
2283        let b = self.state[operands.src2].get_u32();
2284        self.state[operands.dst].set_u32(a | b);
2285        ControlFlow::Continue(())
2286    }
2287
2288    fn xbor32_s8(&mut self, dst: XReg, src1: XReg, src2: i8) -> ControlFlow<Done> {
2289        self.xbor32_s32(dst, src1, src2.into())
2290    }
2291
2292    fn xbor32_s32(&mut self, dst: XReg, src1: XReg, src2: i32) -> ControlFlow<Done> {
2293        let a = self.state[src1].get_i32();
2294        self.state[dst].set_i32(a | src2);
2295        ControlFlow::Continue(())
2296    }
2297
2298    fn xbor64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2299        let a = self.state[operands.src1].get_u64();
2300        let b = self.state[operands.src2].get_u64();
2301        self.state[operands.dst].set_u64(a | b);
2302        ControlFlow::Continue(())
2303    }
2304
2305    fn xbor64_s8(&mut self, dst: XReg, src1: XReg, src2: i8) -> ControlFlow<Done> {
2306        self.xbor64_s32(dst, src1, src2.into())
2307    }
2308
2309    fn xbor64_s32(&mut self, dst: XReg, src1: XReg, src2: i32) -> ControlFlow<Done> {
2310        let a = self.state[src1].get_i64();
2311        self.state[dst].set_i64(a | i64::from(src2));
2312        ControlFlow::Continue(())
2313    }
2314
2315    fn xbxor32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2316        let a = self.state[operands.src1].get_u32();
2317        let b = self.state[operands.src2].get_u32();
2318        self.state[operands.dst].set_u32(a ^ b);
2319        ControlFlow::Continue(())
2320    }
2321
2322    fn xbxor32_s8(&mut self, dst: XReg, src1: XReg, src2: i8) -> ControlFlow<Done> {
2323        self.xbxor32_s32(dst, src1, src2.into())
2324    }
2325
2326    fn xbxor32_s32(&mut self, dst: XReg, src1: XReg, src2: i32) -> ControlFlow<Done> {
2327        let a = self.state[src1].get_i32();
2328        self.state[dst].set_i32(a ^ src2);
2329        ControlFlow::Continue(())
2330    }
2331
2332    fn xbxor64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2333        let a = self.state[operands.src1].get_u64();
2334        let b = self.state[operands.src2].get_u64();
2335        self.state[operands.dst].set_u64(a ^ b);
2336        ControlFlow::Continue(())
2337    }
2338
2339    fn xbxor64_s8(&mut self, dst: XReg, src1: XReg, src2: i8) -> ControlFlow<Done> {
2340        self.xbxor64_s32(dst, src1, src2.into())
2341    }
2342
2343    fn xbxor64_s32(&mut self, dst: XReg, src1: XReg, src2: i32) -> ControlFlow<Done> {
2344        let a = self.state[src1].get_i64();
2345        self.state[dst].set_i64(a ^ i64::from(src2));
2346        ControlFlow::Continue(())
2347    }
2348
2349    fn xbnot32(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2350        let a = self.state[src].get_u32();
2351        self.state[dst].set_u32(!a);
2352        ControlFlow::Continue(())
2353    }
2354
2355    fn xbnot64(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2356        let a = self.state[src].get_u64();
2357        self.state[dst].set_u64(!a);
2358        ControlFlow::Continue(())
2359    }
2360
2361    fn xmin32_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2362        let a = self.state[operands.src1].get_u32();
2363        let b = self.state[operands.src2].get_u32();
2364        self.state[operands.dst].set_u32(a.min(b));
2365        ControlFlow::Continue(())
2366    }
2367
2368    fn xmin32_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2369        let a = self.state[operands.src1].get_i32();
2370        let b = self.state[operands.src2].get_i32();
2371        self.state[operands.dst].set_i32(a.min(b));
2372        ControlFlow::Continue(())
2373    }
2374
2375    fn xmax32_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2376        let a = self.state[operands.src1].get_u32();
2377        let b = self.state[operands.src2].get_u32();
2378        self.state[operands.dst].set_u32(a.max(b));
2379        ControlFlow::Continue(())
2380    }
2381
2382    fn xmax32_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2383        let a = self.state[operands.src1].get_i32();
2384        let b = self.state[operands.src2].get_i32();
2385        self.state[operands.dst].set_i32(a.max(b));
2386        ControlFlow::Continue(())
2387    }
2388
2389    fn xmin64_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2390        let a = self.state[operands.src1].get_u64();
2391        let b = self.state[operands.src2].get_u64();
2392        self.state[operands.dst].set_u64(a.min(b));
2393        ControlFlow::Continue(())
2394    }
2395
2396    fn xmin64_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2397        let a = self.state[operands.src1].get_i64();
2398        let b = self.state[operands.src2].get_i64();
2399        self.state[operands.dst].set_i64(a.min(b));
2400        ControlFlow::Continue(())
2401    }
2402
2403    fn xmax64_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2404        let a = self.state[operands.src1].get_u64();
2405        let b = self.state[operands.src2].get_u64();
2406        self.state[operands.dst].set_u64(a.max(b));
2407        ControlFlow::Continue(())
2408    }
2409
2410    fn xmax64_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2411        let a = self.state[operands.src1].get_i64();
2412        let b = self.state[operands.src2].get_i64();
2413        self.state[operands.dst].set_i64(a.max(b));
2414        ControlFlow::Continue(())
2415    }
2416
2417    fn xctz32(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2418        let a = self.state[src].get_u32();
2419        self.state[dst].set_u32(a.trailing_zeros());
2420        ControlFlow::Continue(())
2421    }
2422
2423    fn xctz64(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2424        let a = self.state[src].get_u64();
2425        self.state[dst].set_u64(a.trailing_zeros().into());
2426        ControlFlow::Continue(())
2427    }
2428
2429    fn xclz32(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2430        let a = self.state[src].get_u32();
2431        self.state[dst].set_u32(a.leading_zeros());
2432        ControlFlow::Continue(())
2433    }
2434
2435    fn xclz64(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2436        let a = self.state[src].get_u64();
2437        self.state[dst].set_u64(a.leading_zeros().into());
2438        ControlFlow::Continue(())
2439    }
2440
2441    fn xpopcnt32(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2442        let a = self.state[src].get_u32();
2443        self.state[dst].set_u32(a.count_ones());
2444        ControlFlow::Continue(())
2445    }
2446
2447    fn xpopcnt64(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2448        let a = self.state[src].get_u64();
2449        self.state[dst].set_u64(a.count_ones().into());
2450        ControlFlow::Continue(())
2451    }
2452
2453    fn xrotl32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2454        let a = self.state[operands.src1].get_u32();
2455        let b = self.state[operands.src2].get_u32();
2456        self.state[operands.dst].set_u32(a.rotate_left(b));
2457        ControlFlow::Continue(())
2458    }
2459
2460    fn xrotl64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2461        let a = self.state[operands.src1].get_u64();
2462        let b = self.state[operands.src2].get_u32();
2463        self.state[operands.dst].set_u64(a.rotate_left(b));
2464        ControlFlow::Continue(())
2465    }
2466
2467    fn xrotr32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2468        let a = self.state[operands.src1].get_u32();
2469        let b = self.state[operands.src2].get_u32();
2470        self.state[operands.dst].set_u32(a.rotate_right(b));
2471        ControlFlow::Continue(())
2472    }
2473
2474    fn xrotr64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2475        let a = self.state[operands.src1].get_u64();
2476        let b = self.state[operands.src2].get_u32();
2477        self.state[operands.dst].set_u64(a.rotate_right(b));
2478        ControlFlow::Continue(())
2479    }
2480
2481    fn xselect32(
2482        &mut self,
2483        dst: XReg,
2484        cond: XReg,
2485        if_nonzero: XReg,
2486        if_zero: XReg,
2487    ) -> ControlFlow<Done> {
2488        let result = if self.state[cond].get_u32() != 0 {
2489            self.state[if_nonzero].get_u32()
2490        } else {
2491            self.state[if_zero].get_u32()
2492        };
2493        self.state[dst].set_u32(result);
2494        ControlFlow::Continue(())
2495    }
2496
2497    fn xselect64(
2498        &mut self,
2499        dst: XReg,
2500        cond: XReg,
2501        if_nonzero: XReg,
2502        if_zero: XReg,
2503    ) -> ControlFlow<Done> {
2504        let result = if self.state[cond].get_u32() != 0 {
2505            self.state[if_nonzero].get_u64()
2506        } else {
2507            self.state[if_zero].get_u64()
2508        };
2509        self.state[dst].set_u64(result);
2510        ControlFlow::Continue(())
2511    }
2512
2513    fn xabs32(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2514        let a = self.state[src].get_i32();
2515        self.state[dst].set_i32(a.wrapping_abs());
2516        ControlFlow::Continue(())
2517    }
2518
2519    fn xabs64(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2520        let a = self.state[src].get_i64();
2521        self.state[dst].set_i64(a.wrapping_abs());
2522        ControlFlow::Continue(())
2523    }
2524
2525    // =========================================================================
2526    // o32 addressing modes
2527
2528    fn xload8_u32_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done> {
2529        let result = unsafe { self.load_ne::<u8, crate::XLoad8U32O32>(addr)? };
2530        self.state[dst].set_u32(result.into());
2531        ControlFlow::Continue(())
2532    }
2533
2534    fn xload8_s32_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done> {
2535        let result = unsafe { self.load_ne::<i8, crate::XLoad8S32O32>(addr)? };
2536        self.state[dst].set_i32(result.into());
2537        ControlFlow::Continue(())
2538    }
2539
2540    fn xload16le_u32_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done> {
2541        let result = unsafe { self.load_ne::<u16, crate::XLoad16LeU32O32>(addr)? };
2542        self.state[dst].set_u32(u16::from_le(result).into());
2543        ControlFlow::Continue(())
2544    }
2545
2546    fn xload16le_s32_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done> {
2547        let result = unsafe { self.load_ne::<i16, crate::XLoad16LeS32O32>(addr)? };
2548        self.state[dst].set_i32(i16::from_le(result).into());
2549        ControlFlow::Continue(())
2550    }
2551
2552    fn xload32le_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done> {
2553        let result = unsafe { self.load_ne::<i32, crate::XLoad32LeO32>(addr)? };
2554        self.state[dst].set_i32(i32::from_le(result));
2555        ControlFlow::Continue(())
2556    }
2557
2558    fn xload64le_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done> {
2559        let result = unsafe { self.load_ne::<i64, crate::XLoad64LeO32>(addr)? };
2560        self.state[dst].set_i64(i64::from_le(result));
2561        ControlFlow::Continue(())
2562    }
2563
2564    fn xstore8_o32(&mut self, addr: AddrO32, val: XReg) -> ControlFlow<Done> {
2565        let val = self.state[val].get_u32() as u8;
2566        unsafe {
2567            self.store_ne::<u8, crate::XStore8O32>(addr, val)?;
2568        }
2569        ControlFlow::Continue(())
2570    }
2571
2572    fn xstore16le_o32(&mut self, addr: AddrO32, val: XReg) -> ControlFlow<Done> {
2573        let val = self.state[val].get_u32() as u16;
2574        unsafe {
2575            self.store_ne::<u16, crate::XStore16LeO32>(addr, val.to_le())?;
2576        }
2577        ControlFlow::Continue(())
2578    }
2579
2580    fn xstore32le_o32(&mut self, addr: AddrO32, val: XReg) -> ControlFlow<Done> {
2581        let val = self.state[val].get_u32();
2582        unsafe {
2583            self.store_ne::<u32, crate::XStore32LeO32>(addr, val.to_le())?;
2584        }
2585        ControlFlow::Continue(())
2586    }
2587
2588    fn xstore64le_o32(&mut self, addr: AddrO32, val: XReg) -> ControlFlow<Done> {
2589        let val = self.state[val].get_u64();
2590        unsafe {
2591            self.store_ne::<u64, crate::XStore64LeO32>(addr, val.to_le())?;
2592        }
2593        ControlFlow::Continue(())
2594    }
2595
2596    // =========================================================================
2597    // g32 addressing modes
2598
2599    fn xload8_u32_g32(&mut self, dst: XReg, addr: AddrG32) -> ControlFlow<Done> {
2600        let result = unsafe { self.load_ne::<u8, crate::XLoad8U32G32>(addr)? };
2601        self.state[dst].set_u32(result.into());
2602        ControlFlow::Continue(())
2603    }
2604
2605    fn xload8_s32_g32(&mut self, dst: XReg, addr: AddrG32) -> ControlFlow<Done> {
2606        let result = unsafe { self.load_ne::<i8, crate::XLoad8S32G32>(addr)? };
2607        self.state[dst].set_i32(result.into());
2608        ControlFlow::Continue(())
2609    }
2610
2611    fn xload16le_u32_g32(&mut self, dst: XReg, addr: AddrG32) -> ControlFlow<Done> {
2612        let result = unsafe { self.load_ne::<u16, crate::XLoad16LeU32G32>(addr)? };
2613        self.state[dst].set_u32(u16::from_le(result).into());
2614        ControlFlow::Continue(())
2615    }
2616
2617    fn xload16le_s32_g32(&mut self, dst: XReg, addr: AddrG32) -> ControlFlow<Done> {
2618        let result = unsafe { self.load_ne::<i16, crate::XLoad16LeS32G32>(addr)? };
2619        self.state[dst].set_i32(i16::from_le(result).into());
2620        ControlFlow::Continue(())
2621    }
2622
2623    fn xload32le_g32(&mut self, dst: XReg, addr: AddrG32) -> ControlFlow<Done> {
2624        let result = unsafe { self.load_ne::<i32, crate::XLoad32LeG32>(addr)? };
2625        self.state[dst].set_i32(i32::from_le(result));
2626        ControlFlow::Continue(())
2627    }
2628
2629    fn xload64le_g32(&mut self, dst: XReg, addr: AddrG32) -> ControlFlow<Done> {
2630        let result = unsafe { self.load_ne::<i64, crate::XLoad64LeG32>(addr)? };
2631        self.state[dst].set_i64(i64::from_le(result));
2632        ControlFlow::Continue(())
2633    }
2634
2635    fn xstore8_g32(&mut self, addr: AddrG32, val: XReg) -> ControlFlow<Done> {
2636        let val = self.state[val].get_u32() as u8;
2637        unsafe {
2638            self.store_ne::<u8, crate::XStore8G32>(addr, val)?;
2639        }
2640        ControlFlow::Continue(())
2641    }
2642
2643    fn xstore16le_g32(&mut self, addr: AddrG32, val: XReg) -> ControlFlow<Done> {
2644        let val = self.state[val].get_u32() as u16;
2645        unsafe {
2646            self.store_ne::<u16, crate::XStore16LeG32>(addr, val.to_le())?;
2647        }
2648        ControlFlow::Continue(())
2649    }
2650
2651    fn xstore32le_g32(&mut self, addr: AddrG32, val: XReg) -> ControlFlow<Done> {
2652        let val = self.state[val].get_u32();
2653        unsafe {
2654            self.store_ne::<u32, crate::XStore32LeG32>(addr, val.to_le())?;
2655        }
2656        ControlFlow::Continue(())
2657    }
2658
2659    fn xstore64le_g32(&mut self, addr: AddrG32, val: XReg) -> ControlFlow<Done> {
2660        let val = self.state[val].get_u64();
2661        unsafe {
2662            self.store_ne::<u64, crate::XStore64LeG32>(addr, val.to_le())?;
2663        }
2664        ControlFlow::Continue(())
2665    }
2666
2667    // =========================================================================
2668    // z addressing modes
2669
2670    fn xload8_u32_z(&mut self, dst: XReg, addr: AddrZ) -> ControlFlow<Done> {
2671        let result = unsafe { self.load_ne::<u8, crate::XLoad8U32Z>(addr)? };
2672        self.state[dst].set_u32(result.into());
2673        ControlFlow::Continue(())
2674    }
2675
2676    fn xload8_s32_z(&mut self, dst: XReg, addr: AddrZ) -> ControlFlow<Done> {
2677        let result = unsafe { self.load_ne::<i8, crate::XLoad8S32Z>(addr)? };
2678        self.state[dst].set_i32(result.into());
2679        ControlFlow::Continue(())
2680    }
2681
2682    fn xload16le_u32_z(&mut self, dst: XReg, addr: AddrZ) -> ControlFlow<Done> {
2683        let result = unsafe { self.load_ne::<u16, crate::XLoad16LeU32Z>(addr)? };
2684        self.state[dst].set_u32(u16::from_le(result).into());
2685        ControlFlow::Continue(())
2686    }
2687
2688    fn xload16le_s32_z(&mut self, dst: XReg, addr: AddrZ) -> ControlFlow<Done> {
2689        let result = unsafe { self.load_ne::<i16, crate::XLoad16LeS32Z>(addr)? };
2690        self.state[dst].set_i32(i16::from_le(result).into());
2691        ControlFlow::Continue(())
2692    }
2693
2694    fn xload32le_z(&mut self, dst: XReg, addr: AddrZ) -> ControlFlow<Done> {
2695        let result = unsafe { self.load_ne::<i32, crate::XLoad32LeZ>(addr)? };
2696        self.state[dst].set_i32(i32::from_le(result));
2697        ControlFlow::Continue(())
2698    }
2699
2700    fn xload64le_z(&mut self, dst: XReg, addr: AddrZ) -> ControlFlow<Done> {
2701        let result = unsafe { self.load_ne::<i64, crate::XLoad64LeZ>(addr)? };
2702        self.state[dst].set_i64(i64::from_le(result));
2703        ControlFlow::Continue(())
2704    }
2705
2706    fn xstore8_z(&mut self, addr: AddrZ, val: XReg) -> ControlFlow<Done> {
2707        let val = self.state[val].get_u32() as u8;
2708        unsafe {
2709            self.store_ne::<u8, crate::XStore8Z>(addr, val)?;
2710        }
2711        ControlFlow::Continue(())
2712    }
2713
2714    fn xstore16le_z(&mut self, addr: AddrZ, val: XReg) -> ControlFlow<Done> {
2715        let val = self.state[val].get_u32() as u16;
2716        unsafe {
2717            self.store_ne::<u16, crate::XStore16LeZ>(addr, val.to_le())?;
2718        }
2719        ControlFlow::Continue(())
2720    }
2721
2722    fn xstore32le_z(&mut self, addr: AddrZ, val: XReg) -> ControlFlow<Done> {
2723        let val = self.state[val].get_u32();
2724        unsafe {
2725            self.store_ne::<u32, crate::XStore32LeZ>(addr, val.to_le())?;
2726        }
2727        ControlFlow::Continue(())
2728    }
2729
2730    fn xstore64le_z(&mut self, addr: AddrZ, val: XReg) -> ControlFlow<Done> {
2731        let val = self.state[val].get_u64();
2732        unsafe {
2733            self.store_ne::<u64, crate::XStore64LeZ>(addr, val.to_le())?;
2734        }
2735        ControlFlow::Continue(())
2736    }
2737
2738    // =========================================================================
2739    // g32bne addressing modes
2740
2741    fn xload8_u32_g32bne(&mut self, dst: XReg, addr: AddrG32Bne) -> ControlFlow<Done> {
2742        let result = unsafe { self.load_ne::<u8, crate::XLoad8U32G32Bne>(addr)? };
2743        self.state[dst].set_u32(result.into());
2744        ControlFlow::Continue(())
2745    }
2746
2747    fn xload8_s32_g32bne(&mut self, dst: XReg, addr: AddrG32Bne) -> ControlFlow<Done> {
2748        let result = unsafe { self.load_ne::<i8, crate::XLoad8S32G32Bne>(addr)? };
2749        self.state[dst].set_i32(result.into());
2750        ControlFlow::Continue(())
2751    }
2752
2753    fn xload16le_u32_g32bne(&mut self, dst: XReg, addr: AddrG32Bne) -> ControlFlow<Done> {
2754        let result = unsafe { self.load_ne::<u16, crate::XLoad16LeU32G32Bne>(addr)? };
2755        self.state[dst].set_u32(u16::from_le(result).into());
2756        ControlFlow::Continue(())
2757    }
2758
2759    fn xload16le_s32_g32bne(&mut self, dst: XReg, addr: AddrG32Bne) -> ControlFlow<Done> {
2760        let result = unsafe { self.load_ne::<i16, crate::XLoad16LeS32G32Bne>(addr)? };
2761        self.state[dst].set_i32(i16::from_le(result).into());
2762        ControlFlow::Continue(())
2763    }
2764
2765    fn xload32le_g32bne(&mut self, dst: XReg, addr: AddrG32Bne) -> ControlFlow<Done> {
2766        let result = unsafe { self.load_ne::<i32, crate::XLoad32LeG32Bne>(addr)? };
2767        self.state[dst].set_i32(i32::from_le(result));
2768        ControlFlow::Continue(())
2769    }
2770
2771    fn xload64le_g32bne(&mut self, dst: XReg, addr: AddrG32Bne) -> ControlFlow<Done> {
2772        let result = unsafe { self.load_ne::<i64, crate::XLoad64LeG32Bne>(addr)? };
2773        self.state[dst].set_i64(i64::from_le(result));
2774        ControlFlow::Continue(())
2775    }
2776
2777    fn xstore8_g32bne(&mut self, addr: AddrG32Bne, val: XReg) -> ControlFlow<Done> {
2778        let val = self.state[val].get_u32() as u8;
2779        unsafe {
2780            self.store_ne::<u8, crate::XStore8G32Bne>(addr, val)?;
2781        }
2782        ControlFlow::Continue(())
2783    }
2784
2785    fn xstore16le_g32bne(&mut self, addr: AddrG32Bne, val: XReg) -> ControlFlow<Done> {
2786        let val = self.state[val].get_u32() as u16;
2787        unsafe {
2788            self.store_ne::<u16, crate::XStore16LeG32Bne>(addr, val.to_le())?;
2789        }
2790        ControlFlow::Continue(())
2791    }
2792
2793    fn xstore32le_g32bne(&mut self, addr: AddrG32Bne, val: XReg) -> ControlFlow<Done> {
2794        let val = self.state[val].get_u32();
2795        unsafe {
2796            self.store_ne::<u32, crate::XStore32LeG32Bne>(addr, val.to_le())?;
2797        }
2798        ControlFlow::Continue(())
2799    }
2800
2801    fn xstore64le_g32bne(&mut self, addr: AddrG32Bne, val: XReg) -> ControlFlow<Done> {
2802        let val = self.state[val].get_u64();
2803        unsafe {
2804            self.store_ne::<u64, crate::XStore64LeG32Bne>(addr, val.to_le())?;
2805        }
2806        ControlFlow::Continue(())
2807    }
2808}
2809
2810impl ExtendedOpVisitor for Interpreter<'_> {
2811    fn nop(&mut self) -> ControlFlow<Done> {
2812        ControlFlow::Continue(())
2813    }
2814
2815    fn trap(&mut self) -> ControlFlow<Done> {
2816        self.done_trap::<crate::Trap>()
2817    }
2818
2819    fn call_indirect_host(&mut self, id: u8) -> ControlFlow<Done> {
2820        self.done_call_indirect_host(id)
2821    }
2822
2823    fn bswap32(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2824        let src = self.state[src].get_u32();
2825        self.state[dst].set_u32(src.swap_bytes());
2826        ControlFlow::Continue(())
2827    }
2828
2829    fn bswap64(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2830        let src = self.state[src].get_u64();
2831        self.state[dst].set_u64(src.swap_bytes());
2832        ControlFlow::Continue(())
2833    }
2834
2835    fn xbmask32(&mut self, dst: XReg, src: XReg) -> Self::Return {
2836        let a = self.state[src].get_u32();
2837        if a == 0 {
2838            self.state[dst].set_u32(0);
2839        } else {
2840            self.state[dst].set_i32(-1);
2841        }
2842        ControlFlow::Continue(())
2843    }
2844
2845    fn xbmask64(&mut self, dst: XReg, src: XReg) -> Self::Return {
2846        let a = self.state[src].get_u64();
2847        if a == 0 {
2848            self.state[dst].set_u64(0);
2849        } else {
2850            self.state[dst].set_i64(-1);
2851        }
2852        ControlFlow::Continue(())
2853    }
2854
2855    fn xadd32_uoverflow_trap(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2856        let a = self.state[operands.src1].get_u32();
2857        let b = self.state[operands.src2].get_u32();
2858        match a.checked_add(b) {
2859            Some(c) => {
2860                self.state[operands.dst].set_u32(c);
2861                ControlFlow::Continue(())
2862            }
2863            None => self.done_trap::<crate::Xadd32UoverflowTrap>(),
2864        }
2865    }
2866
2867    fn xadd64_uoverflow_trap(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2868        let a = self.state[operands.src1].get_u64();
2869        let b = self.state[operands.src2].get_u64();
2870        match a.checked_add(b) {
2871            Some(c) => {
2872                self.state[operands.dst].set_u64(c);
2873                ControlFlow::Continue(())
2874            }
2875            None => self.done_trap::<crate::Xadd64UoverflowTrap>(),
2876        }
2877    }
2878
2879    fn xmulhi64_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2880        let a = self.state[operands.src1].get_i64();
2881        let b = self.state[operands.src2].get_i64();
2882        let result = ((i128::from(a) * i128::from(b)) >> 64) as i64;
2883        self.state[operands.dst].set_i64(result);
2884        ControlFlow::Continue(())
2885    }
2886
2887    fn xmulhi64_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2888        let a = self.state[operands.src1].get_u64();
2889        let b = self.state[operands.src2].get_u64();
2890        let result = ((u128::from(a) * u128::from(b)) >> 64) as u64;
2891        self.state[operands.dst].set_u64(result);
2892        ControlFlow::Continue(())
2893    }
2894
2895    // =========================================================================
2896    // o32 addressing modes for big-endian X-registers
2897
2898    fn xload16be_u32_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done> {
2899        let result = unsafe { self.load_ne::<u16, crate::XLoad16BeU32O32>(addr)? };
2900        self.state[dst].set_u32(u16::from_be(result).into());
2901        ControlFlow::Continue(())
2902    }
2903
2904    fn xload16be_s32_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done> {
2905        let result = unsafe { self.load_ne::<i16, crate::XLoad16BeS32O32>(addr)? };
2906        self.state[dst].set_i32(i16::from_be(result).into());
2907        ControlFlow::Continue(())
2908    }
2909
2910    fn xload32be_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done> {
2911        let result = unsafe { self.load_ne::<i32, crate::XLoad32BeO32>(addr)? };
2912        self.state[dst].set_i32(i32::from_be(result));
2913        ControlFlow::Continue(())
2914    }
2915
2916    fn xload64be_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done> {
2917        let result = unsafe { self.load_ne::<i64, crate::XLoad64BeO32>(addr)? };
2918        self.state[dst].set_i64(i64::from_be(result));
2919        ControlFlow::Continue(())
2920    }
2921
2922    fn xstore16be_o32(&mut self, addr: AddrO32, val: XReg) -> ControlFlow<Done> {
2923        let val = self.state[val].get_u32() as u16;
2924        unsafe {
2925            self.store_ne::<u16, crate::XStore16BeO32>(addr, val.to_be())?;
2926        }
2927        ControlFlow::Continue(())
2928    }
2929
2930    fn xstore32be_o32(&mut self, addr: AddrO32, val: XReg) -> ControlFlow<Done> {
2931        let val = self.state[val].get_u32();
2932        unsafe {
2933            self.store_ne::<u32, crate::XStore32BeO32>(addr, val.to_be())?;
2934        }
2935        ControlFlow::Continue(())
2936    }
2937
2938    fn xstore64be_o32(&mut self, addr: AddrO32, val: XReg) -> ControlFlow<Done> {
2939        let val = self.state[val].get_u64();
2940        unsafe {
2941            self.store_ne::<u64, crate::XStore64BeO32>(addr, val.to_be())?;
2942        }
2943        ControlFlow::Continue(())
2944    }
2945
2946    // =========================================================================
2947    // o32 addressing modes for little-endian F-registers
2948
2949    fn fload32le_o32(&mut self, dst: FReg, addr: AddrO32) -> ControlFlow<Done> {
2950        let val = unsafe { self.load_ne::<u32, crate::Fload32LeO32>(addr)? };
2951        self.state[dst].set_f32(f32::from_bits(u32::from_le(val)));
2952        ControlFlow::Continue(())
2953    }
2954
2955    fn fload64le_o32(&mut self, dst: FReg, addr: AddrO32) -> ControlFlow<Done> {
2956        let val = unsafe { self.load_ne::<u64, crate::Fload64LeO32>(addr)? };
2957        self.state[dst].set_f64(f64::from_bits(u64::from_le(val)));
2958        ControlFlow::Continue(())
2959    }
2960
2961    fn fstore32le_o32(&mut self, addr: AddrO32, src: FReg) -> ControlFlow<Done> {
2962        let val = self.state[src].get_f32();
2963        unsafe {
2964            self.store_ne::<u32, crate::Fstore32LeO32>(addr, val.to_bits().to_le())?;
2965        }
2966        ControlFlow::Continue(())
2967    }
2968
2969    fn fstore64le_o32(&mut self, addr: AddrO32, src: FReg) -> ControlFlow<Done> {
2970        let val = self.state[src].get_f64();
2971        unsafe {
2972            self.store_ne::<u64, crate::Fstore64LeO32>(addr, val.to_bits().to_le())?;
2973        }
2974        ControlFlow::Continue(())
2975    }
2976
2977    // =========================================================================
2978    // o32 addressing modes for big-endian F-registers
2979
2980    fn fload32be_o32(&mut self, dst: FReg, addr: AddrO32) -> ControlFlow<Done> {
2981        let val = unsafe { self.load_ne::<u32, crate::Fload32BeO32>(addr)? };
2982        self.state[dst].set_f32(f32::from_bits(u32::from_be(val)));
2983        ControlFlow::Continue(())
2984    }
2985
2986    fn fload64be_o32(&mut self, dst: FReg, addr: AddrO32) -> ControlFlow<Done> {
2987        let val = unsafe { self.load_ne::<u64, crate::Fload64BeO32>(addr)? };
2988        self.state[dst].set_f64(f64::from_bits(u64::from_be(val)));
2989        ControlFlow::Continue(())
2990    }
2991
2992    fn fstore32be_o32(&mut self, addr: AddrO32, src: FReg) -> ControlFlow<Done> {
2993        let val = self.state[src].get_f32();
2994        unsafe {
2995            self.store_ne::<u32, crate::Fstore32BeO32>(addr, val.to_bits().to_be())?;
2996        }
2997        ControlFlow::Continue(())
2998    }
2999
3000    fn fstore64be_o32(&mut self, addr: AddrO32, src: FReg) -> ControlFlow<Done> {
3001        let val = self.state[src].get_f64();
3002        unsafe {
3003            self.store_ne::<u64, crate::Fstore64BeO32>(addr, val.to_bits().to_be())?;
3004        }
3005        ControlFlow::Continue(())
3006    }
3007
3008    // =========================================================================
3009    // z addressing modes for little-endian F-registers
3010
3011    fn fload32le_z(&mut self, dst: FReg, addr: AddrZ) -> ControlFlow<Done> {
3012        let val = unsafe { self.load_ne::<u32, crate::Fload32LeZ>(addr)? };
3013        self.state[dst].set_f32(f32::from_bits(u32::from_le(val)));
3014        ControlFlow::Continue(())
3015    }
3016
3017    fn fload64le_z(&mut self, dst: FReg, addr: AddrZ) -> ControlFlow<Done> {
3018        let val = unsafe { self.load_ne::<u64, crate::Fload64LeZ>(addr)? };
3019        self.state[dst].set_f64(f64::from_bits(u64::from_le(val)));
3020        ControlFlow::Continue(())
3021    }
3022
3023    fn fstore32le_z(&mut self, addr: AddrZ, src: FReg) -> ControlFlow<Done> {
3024        let val = self.state[src].get_f32();
3025        unsafe {
3026            self.store_ne::<u32, crate::Fstore32LeZ>(addr, val.to_bits().to_le())?;
3027        }
3028        ControlFlow::Continue(())
3029    }
3030
3031    fn fstore64le_z(&mut self, addr: AddrZ, src: FReg) -> ControlFlow<Done> {
3032        let val = self.state[src].get_f64();
3033        unsafe {
3034            self.store_ne::<u64, crate::Fstore64LeZ>(addr, val.to_bits().to_le())?;
3035        }
3036        ControlFlow::Continue(())
3037    }
3038
3039    // =========================================================================
3040    // g32 addressing modes for little-endian F-registers
3041
3042    fn fload32le_g32(&mut self, dst: FReg, addr: AddrG32) -> ControlFlow<Done> {
3043        let val = unsafe { self.load_ne::<u32, crate::Fload32LeG32>(addr)? };
3044        self.state[dst].set_f32(f32::from_bits(u32::from_le(val)));
3045        ControlFlow::Continue(())
3046    }
3047
3048    fn fload64le_g32(&mut self, dst: FReg, addr: AddrG32) -> ControlFlow<Done> {
3049        let val = unsafe { self.load_ne::<u64, crate::Fload64LeG32>(addr)? };
3050        self.state[dst].set_f64(f64::from_bits(u64::from_le(val)));
3051        ControlFlow::Continue(())
3052    }
3053
3054    fn fstore32le_g32(&mut self, addr: AddrG32, src: FReg) -> ControlFlow<Done> {
3055        let val = self.state[src].get_f32();
3056        unsafe {
3057            self.store_ne::<u32, crate::Fstore32LeG32>(addr, val.to_bits().to_le())?;
3058        }
3059        ControlFlow::Continue(())
3060    }
3061
3062    fn fstore64le_g32(&mut self, addr: AddrG32, src: FReg) -> ControlFlow<Done> {
3063        let val = self.state[src].get_f64();
3064        unsafe {
3065            self.store_ne::<u64, crate::Fstore64LeG32>(addr, val.to_bits().to_le())?;
3066        }
3067        ControlFlow::Continue(())
3068    }
3069
3070    // =========================================================================
3071    // o32 addressing modes for little-endian V-registers
3072
3073    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3074    fn vload128le_o32(&mut self, dst: VReg, addr: AddrO32) -> ControlFlow<Done> {
3075        let val = unsafe { self.load_ne::<u128, crate::VLoad128O32>(addr)? };
3076        self.state[dst].set_u128(u128::from_le(val));
3077        ControlFlow::Continue(())
3078    }
3079
3080    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3081    fn vstore128le_o32(&mut self, addr: AddrO32, src: VReg) -> ControlFlow<Done> {
3082        let val = self.state[src].get_u128();
3083        unsafe {
3084            self.store_ne::<u128, crate::Vstore128LeO32>(addr, val.to_le())?;
3085        }
3086        ControlFlow::Continue(())
3087    }
3088
3089    // =========================================================================
3090    // z addressing modes for little-endian V-registers
3091
3092    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3093    fn vload128le_z(&mut self, dst: VReg, addr: AddrZ) -> ControlFlow<Done> {
3094        let val = unsafe { self.load_ne::<u128, crate::VLoad128Z>(addr)? };
3095        self.state[dst].set_u128(u128::from_le(val));
3096        ControlFlow::Continue(())
3097    }
3098
3099    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3100    fn vstore128le_z(&mut self, addr: AddrZ, src: VReg) -> ControlFlow<Done> {
3101        let val = self.state[src].get_u128();
3102        unsafe {
3103            self.store_ne::<u128, crate::Vstore128LeZ>(addr, val.to_le())?;
3104        }
3105        ControlFlow::Continue(())
3106    }
3107
3108    // =========================================================================
3109    // g32 addressing modes for little-endian V-registers
3110
3111    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3112    fn vload128le_g32(&mut self, dst: VReg, addr: AddrG32) -> ControlFlow<Done> {
3113        let val = unsafe { self.load_ne::<u128, crate::VLoad128G32>(addr)? };
3114        self.state[dst].set_u128(u128::from_le(val));
3115        ControlFlow::Continue(())
3116    }
3117
3118    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3119    fn vstore128le_g32(&mut self, addr: AddrG32, src: VReg) -> ControlFlow<Done> {
3120        let val = self.state[src].get_u128();
3121        unsafe {
3122            self.store_ne::<u128, crate::Vstore128LeG32>(addr, val.to_le())?;
3123        }
3124        ControlFlow::Continue(())
3125    }
3126
3127    fn xmov_fp(&mut self, dst: XReg) -> ControlFlow<Done> {
3128        let fp = self.state.fp;
3129        self.state[dst].set_ptr(fp);
3130        ControlFlow::Continue(())
3131    }
3132
3133    fn xmov_lr(&mut self, dst: XReg) -> ControlFlow<Done> {
3134        let lr = self.state.lr;
3135        self.state[dst].set_ptr(lr);
3136        ControlFlow::Continue(())
3137    }
3138
3139    fn fmov(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3140        let val = self.state[src];
3141        self.state[dst] = val;
3142        ControlFlow::Continue(())
3143    }
3144
3145    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3146    fn vmov(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3147        let val = self.state[src];
3148        self.state[dst] = val;
3149        ControlFlow::Continue(())
3150    }
3151
3152    fn fconst32(&mut self, dst: FReg, bits: u32) -> ControlFlow<Done> {
3153        self.state[dst].set_f32(f32::from_bits(bits));
3154        ControlFlow::Continue(())
3155    }
3156
3157    fn fconst64(&mut self, dst: FReg, bits: u64) -> ControlFlow<Done> {
3158        self.state[dst].set_f64(f64::from_bits(bits));
3159        ControlFlow::Continue(())
3160    }
3161
3162    fn bitcast_int_from_float_32(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3163        let val = self.state[src].get_f32();
3164        self.state[dst].set_u32(val.to_bits());
3165        ControlFlow::Continue(())
3166    }
3167
3168    fn bitcast_int_from_float_64(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3169        let val = self.state[src].get_f64();
3170        self.state[dst].set_u64(val.to_bits());
3171        ControlFlow::Continue(())
3172    }
3173
3174    fn bitcast_float_from_int_32(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done> {
3175        let val = self.state[src].get_u32();
3176        self.state[dst].set_f32(f32::from_bits(val));
3177        ControlFlow::Continue(())
3178    }
3179
3180    fn bitcast_float_from_int_64(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done> {
3181        let val = self.state[src].get_u64();
3182        self.state[dst].set_f64(f64::from_bits(val));
3183        ControlFlow::Continue(())
3184    }
3185
3186    fn feq32(&mut self, dst: XReg, src1: FReg, src2: FReg) -> ControlFlow<Done> {
3187        let a = self.state[src1].get_f32();
3188        let b = self.state[src2].get_f32();
3189        self.state[dst].set_u32(u32::from(a == b));
3190        ControlFlow::Continue(())
3191    }
3192
3193    fn fneq32(&mut self, dst: XReg, src1: FReg, src2: FReg) -> ControlFlow<Done> {
3194        let a = self.state[src1].get_f32();
3195        let b = self.state[src2].get_f32();
3196        self.state[dst].set_u32(u32::from(a != b));
3197        ControlFlow::Continue(())
3198    }
3199
3200    fn flt32(&mut self, dst: XReg, src1: FReg, src2: FReg) -> ControlFlow<Done> {
3201        let a = self.state[src1].get_f32();
3202        let b = self.state[src2].get_f32();
3203        self.state[dst].set_u32(u32::from(a < b));
3204        ControlFlow::Continue(())
3205    }
3206
3207    fn flteq32(&mut self, dst: XReg, src1: FReg, src2: FReg) -> ControlFlow<Done> {
3208        let a = self.state[src1].get_f32();
3209        let b = self.state[src2].get_f32();
3210        self.state[dst].set_u32(u32::from(a <= b));
3211        ControlFlow::Continue(())
3212    }
3213
3214    fn feq64(&mut self, dst: XReg, src1: FReg, src2: FReg) -> ControlFlow<Done> {
3215        let a = self.state[src1].get_f64();
3216        let b = self.state[src2].get_f64();
3217        self.state[dst].set_u32(u32::from(a == b));
3218        ControlFlow::Continue(())
3219    }
3220
3221    fn fneq64(&mut self, dst: XReg, src1: FReg, src2: FReg) -> ControlFlow<Done> {
3222        let a = self.state[src1].get_f64();
3223        let b = self.state[src2].get_f64();
3224        self.state[dst].set_u32(u32::from(a != b));
3225        ControlFlow::Continue(())
3226    }
3227
3228    fn flt64(&mut self, dst: XReg, src1: FReg, src2: FReg) -> ControlFlow<Done> {
3229        let a = self.state[src1].get_f64();
3230        let b = self.state[src2].get_f64();
3231        self.state[dst].set_u32(u32::from(a < b));
3232        ControlFlow::Continue(())
3233    }
3234
3235    fn flteq64(&mut self, dst: XReg, src1: FReg, src2: FReg) -> ControlFlow<Done> {
3236        let a = self.state[src1].get_f64();
3237        let b = self.state[src2].get_f64();
3238        self.state[dst].set_u32(u32::from(a <= b));
3239        ControlFlow::Continue(())
3240    }
3241
3242    fn fselect32(
3243        &mut self,
3244        dst: FReg,
3245        cond: XReg,
3246        if_nonzero: FReg,
3247        if_zero: FReg,
3248    ) -> ControlFlow<Done> {
3249        let result = if self.state[cond].get_u32() != 0 {
3250            self.state[if_nonzero].get_f32()
3251        } else {
3252            self.state[if_zero].get_f32()
3253        };
3254        self.state[dst].set_f32(result);
3255        ControlFlow::Continue(())
3256    }
3257
3258    fn fselect64(
3259        &mut self,
3260        dst: FReg,
3261        cond: XReg,
3262        if_nonzero: FReg,
3263        if_zero: FReg,
3264    ) -> ControlFlow<Done> {
3265        let result = if self.state[cond].get_u32() != 0 {
3266            self.state[if_nonzero].get_f64()
3267        } else {
3268            self.state[if_zero].get_f64()
3269        };
3270        self.state[dst].set_f64(result);
3271        ControlFlow::Continue(())
3272    }
3273
3274    fn f32_from_x32_s(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done> {
3275        let a = self.state[src].get_i32();
3276        self.state[dst].set_f32(a as f32);
3277        ControlFlow::Continue(())
3278    }
3279
3280    fn f32_from_x32_u(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done> {
3281        let a = self.state[src].get_u32();
3282        self.state[dst].set_f32(a as f32);
3283        ControlFlow::Continue(())
3284    }
3285
3286    fn f32_from_x64_s(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done> {
3287        let a = self.state[src].get_i64();
3288        self.state[dst].set_f32(a as f32);
3289        ControlFlow::Continue(())
3290    }
3291
3292    fn f32_from_x64_u(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done> {
3293        let a = self.state[src].get_u64();
3294        self.state[dst].set_f32(a as f32);
3295        ControlFlow::Continue(())
3296    }
3297
3298    fn f64_from_x32_s(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done> {
3299        let a = self.state[src].get_i32();
3300        self.state[dst].set_f64(a as f64);
3301        ControlFlow::Continue(())
3302    }
3303
3304    fn f64_from_x32_u(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done> {
3305        let a = self.state[src].get_u32();
3306        self.state[dst].set_f64(a as f64);
3307        ControlFlow::Continue(())
3308    }
3309
3310    fn f64_from_x64_s(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done> {
3311        let a = self.state[src].get_i64();
3312        self.state[dst].set_f64(a as f64);
3313        ControlFlow::Continue(())
3314    }
3315
3316    fn f64_from_x64_u(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done> {
3317        let a = self.state[src].get_u64();
3318        self.state[dst].set_f64(a as f64);
3319        ControlFlow::Continue(())
3320    }
3321
3322    fn x32_from_f32_s(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3323        let a = self.state[src].get_f32();
3324        self.check_xnn_from_fnn::<crate::X32FromF32S>(a.into(), -2147483649.0, 2147483648.0)?;
3325        self.state[dst].set_i32(a as i32);
3326        ControlFlow::Continue(())
3327    }
3328
3329    fn x32_from_f32_u(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3330        let a = self.state[src].get_f32();
3331        self.check_xnn_from_fnn::<crate::X32FromF32U>(a.into(), -1.0, 4294967296.0)?;
3332        self.state[dst].set_u32(a as u32);
3333        ControlFlow::Continue(())
3334    }
3335
3336    fn x64_from_f32_s(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3337        let a = self.state[src].get_f32();
3338        self.check_xnn_from_fnn::<crate::X64FromF32S>(
3339            a.into(),
3340            -9223372036854777856.0,
3341            9223372036854775808.0,
3342        )?;
3343        self.state[dst].set_i64(a as i64);
3344        ControlFlow::Continue(())
3345    }
3346
3347    fn x64_from_f32_u(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3348        let a = self.state[src].get_f32();
3349        self.check_xnn_from_fnn::<crate::X64FromF32U>(a.into(), -1.0, 18446744073709551616.0)?;
3350        self.state[dst].set_u64(a as u64);
3351        ControlFlow::Continue(())
3352    }
3353
3354    fn x32_from_f64_s(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3355        let a = self.state[src].get_f64();
3356        self.check_xnn_from_fnn::<crate::X32FromF64S>(a, -2147483649.0, 2147483648.0)?;
3357        self.state[dst].set_i32(a as i32);
3358        ControlFlow::Continue(())
3359    }
3360
3361    fn x32_from_f64_u(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3362        let a = self.state[src].get_f64();
3363        self.check_xnn_from_fnn::<crate::X32FromF64U>(a, -1.0, 4294967296.0)?;
3364        self.state[dst].set_u32(a as u32);
3365        ControlFlow::Continue(())
3366    }
3367
3368    fn x64_from_f64_s(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3369        let a = self.state[src].get_f64();
3370        self.check_xnn_from_fnn::<crate::X64FromF64S>(
3371            a,
3372            -9223372036854777856.0,
3373            9223372036854775808.0,
3374        )?;
3375        self.state[dst].set_i64(a as i64);
3376        ControlFlow::Continue(())
3377    }
3378
3379    fn x64_from_f64_u(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3380        let a = self.state[src].get_f64();
3381        self.check_xnn_from_fnn::<crate::X64FromF64U>(a, -1.0, 18446744073709551616.0)?;
3382        self.state[dst].set_u64(a as u64);
3383        ControlFlow::Continue(())
3384    }
3385
3386    fn x32_from_f32_s_sat(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3387        let a = self.state[src].get_f32();
3388        self.state[dst].set_i32(a as i32);
3389        ControlFlow::Continue(())
3390    }
3391
3392    fn x32_from_f32_u_sat(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3393        let a = self.state[src].get_f32();
3394        self.state[dst].set_u32(a as u32);
3395        ControlFlow::Continue(())
3396    }
3397
3398    fn x64_from_f32_s_sat(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3399        let a = self.state[src].get_f32();
3400        self.state[dst].set_i64(a as i64);
3401        ControlFlow::Continue(())
3402    }
3403
3404    fn x64_from_f32_u_sat(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3405        let a = self.state[src].get_f32();
3406        self.state[dst].set_u64(a as u64);
3407        ControlFlow::Continue(())
3408    }
3409
3410    fn x32_from_f64_s_sat(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3411        let a = self.state[src].get_f64();
3412        self.state[dst].set_i32(a as i32);
3413        ControlFlow::Continue(())
3414    }
3415
3416    fn x32_from_f64_u_sat(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3417        let a = self.state[src].get_f64();
3418        self.state[dst].set_u32(a as u32);
3419        ControlFlow::Continue(())
3420    }
3421
3422    fn x64_from_f64_s_sat(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3423        let a = self.state[src].get_f64();
3424        self.state[dst].set_i64(a as i64);
3425        ControlFlow::Continue(())
3426    }
3427
3428    fn x64_from_f64_u_sat(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3429        let a = self.state[src].get_f64();
3430        self.state[dst].set_u64(a as u64);
3431        ControlFlow::Continue(())
3432    }
3433
3434    fn f32_from_f64(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3435        let a = self.state[src].get_f64();
3436        self.state[dst].set_f32(a as f32);
3437        ControlFlow::Continue(())
3438    }
3439
3440    fn f64_from_f32(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3441        let a = self.state[src].get_f32();
3442        self.state[dst].set_f64(a.into());
3443        ControlFlow::Continue(())
3444    }
3445
3446    fn fcopysign32(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3447        let a = self.state[operands.src1].get_f32();
3448        let b = self.state[operands.src2].get_f32();
3449        self.state[operands.dst].set_f32(a.wasm_copysign(b));
3450        ControlFlow::Continue(())
3451    }
3452
3453    fn fcopysign64(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3454        let a = self.state[operands.src1].get_f64();
3455        let b = self.state[operands.src2].get_f64();
3456        self.state[operands.dst].set_f64(a.wasm_copysign(b));
3457        ControlFlow::Continue(())
3458    }
3459
3460    fn fadd32(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3461        let a = self.state[operands.src1].get_f32();
3462        let b = self.state[operands.src2].get_f32();
3463        self.state[operands.dst].set_f32(a + b);
3464        ControlFlow::Continue(())
3465    }
3466
3467    fn fsub32(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3468        let a = self.state[operands.src1].get_f32();
3469        let b = self.state[operands.src2].get_f32();
3470        self.state[operands.dst].set_f32(a - b);
3471        ControlFlow::Continue(())
3472    }
3473
3474    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3475    fn vsubf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3476        let mut a = self.state[operands.src1].get_f32x4();
3477        let b = self.state[operands.src2].get_f32x4();
3478        for (a, b) in a.iter_mut().zip(b) {
3479            *a = *a - b;
3480        }
3481        self.state[operands.dst].set_f32x4(a);
3482        ControlFlow::Continue(())
3483    }
3484
3485    fn fmul32(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3486        let a = self.state[operands.src1].get_f32();
3487        let b = self.state[operands.src2].get_f32();
3488        self.state[operands.dst].set_f32(a * b);
3489        ControlFlow::Continue(())
3490    }
3491
3492    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3493    fn vmulf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3494        let mut a = self.state[operands.src1].get_f32x4();
3495        let b = self.state[operands.src2].get_f32x4();
3496        for (a, b) in a.iter_mut().zip(b) {
3497            *a = *a * b;
3498        }
3499        self.state[operands.dst].set_f32x4(a);
3500        ControlFlow::Continue(())
3501    }
3502
3503    fn fdiv32(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3504        let a = self.state[operands.src1].get_f32();
3505        let b = self.state[operands.src2].get_f32();
3506        self.state[operands.dst].set_f32(a / b);
3507        ControlFlow::Continue(())
3508    }
3509
3510    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3511    fn vdivf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3512        let a = self.state[operands.src1].get_f32x4();
3513        let b = self.state[operands.src2].get_f32x4();
3514        let mut result = [0.0f32; 4];
3515
3516        for i in 0..4 {
3517            result[i] = a[i] / b[i];
3518        }
3519
3520        self.state[operands.dst].set_f32x4(result);
3521        ControlFlow::Continue(())
3522    }
3523
3524    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3525    fn vdivf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3526        let a = self.state[operands.src1].get_f64x2();
3527        let b = self.state[operands.src2].get_f64x2();
3528        let mut result = [0.0f64; 2];
3529
3530        for i in 0..2 {
3531            result[i] = a[i] / b[i];
3532        }
3533
3534        self.state[operands.dst].set_f64x2(result);
3535        ControlFlow::Continue(())
3536    }
3537
3538    fn fmaximum32(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3539        let a = self.state[operands.src1].get_f32();
3540        let b = self.state[operands.src2].get_f32();
3541        self.state[operands.dst].set_f32(a.wasm_maximum(b));
3542        ControlFlow::Continue(())
3543    }
3544
3545    fn fminimum32(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3546        let a = self.state[operands.src1].get_f32();
3547        let b = self.state[operands.src2].get_f32();
3548        self.state[operands.dst].set_f32(a.wasm_minimum(b));
3549        ControlFlow::Continue(())
3550    }
3551
3552    fn ftrunc32(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3553        let a = self.state[src].get_f32();
3554        self.state[dst].set_f32(a.wasm_trunc());
3555        ControlFlow::Continue(())
3556    }
3557
3558    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3559    fn vtrunc32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3560        let mut a = self.state[src].get_f32x4();
3561        for elem in a.iter_mut() {
3562            *elem = elem.wasm_trunc();
3563        }
3564        self.state[dst].set_f32x4(a);
3565        ControlFlow::Continue(())
3566    }
3567
3568    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3569    fn vtrunc64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3570        let mut a = self.state[src].get_f64x2();
3571        for elem in a.iter_mut() {
3572            *elem = elem.wasm_trunc();
3573        }
3574        self.state[dst].set_f64x2(a);
3575        ControlFlow::Continue(())
3576    }
3577
3578    fn ffloor32(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3579        let a = self.state[src].get_f32();
3580        self.state[dst].set_f32(a.wasm_floor());
3581        ControlFlow::Continue(())
3582    }
3583
3584    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3585    fn vfloor32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3586        let mut a = self.state[src].get_f32x4();
3587        for elem in a.iter_mut() {
3588            *elem = elem.wasm_floor();
3589        }
3590        self.state[dst].set_f32x4(a);
3591        ControlFlow::Continue(())
3592    }
3593
3594    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3595    fn vfloor64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3596        let mut a = self.state[src].get_f64x2();
3597        for elem in a.iter_mut() {
3598            *elem = elem.wasm_floor();
3599        }
3600        self.state[dst].set_f64x2(a);
3601        ControlFlow::Continue(())
3602    }
3603
3604    fn fceil32(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3605        let a = self.state[src].get_f32();
3606        self.state[dst].set_f32(a.wasm_ceil());
3607        ControlFlow::Continue(())
3608    }
3609
3610    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3611    fn vceil32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3612        let mut a = self.state[src].get_f32x4();
3613        for elem in a.iter_mut() {
3614            *elem = elem.wasm_ceil();
3615        }
3616        self.state[dst].set_f32x4(a);
3617
3618        ControlFlow::Continue(())
3619    }
3620
3621    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3622    fn vceil64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3623        let mut a = self.state[src].get_f64x2();
3624        for elem in a.iter_mut() {
3625            *elem = elem.wasm_ceil();
3626        }
3627        self.state[dst].set_f64x2(a);
3628
3629        ControlFlow::Continue(())
3630    }
3631
3632    fn fnearest32(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3633        let a = self.state[src].get_f32();
3634        self.state[dst].set_f32(a.wasm_nearest());
3635        ControlFlow::Continue(())
3636    }
3637
3638    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3639    fn vnearest32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3640        let mut a = self.state[src].get_f32x4();
3641        for elem in a.iter_mut() {
3642            *elem = elem.wasm_nearest();
3643        }
3644        self.state[dst].set_f32x4(a);
3645        ControlFlow::Continue(())
3646    }
3647
3648    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3649    fn vnearest64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3650        let mut a = self.state[src].get_f64x2();
3651        for elem in a.iter_mut() {
3652            *elem = elem.wasm_nearest();
3653        }
3654        self.state[dst].set_f64x2(a);
3655        ControlFlow::Continue(())
3656    }
3657
3658    fn fsqrt32(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3659        let a = self.state[src].get_f32();
3660        self.state[dst].set_f32(a.wasm_sqrt());
3661        ControlFlow::Continue(())
3662    }
3663
3664    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3665    fn vsqrt32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3666        let mut a = self.state[src].get_f32x4();
3667        for elem in a.iter_mut() {
3668            *elem = elem.wasm_sqrt();
3669        }
3670        self.state[dst].set_f32x4(a);
3671        ControlFlow::Continue(())
3672    }
3673
3674    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3675    fn vsqrt64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3676        let mut a = self.state[src].get_f64x2();
3677        for elem in a.iter_mut() {
3678            *elem = elem.wasm_sqrt();
3679        }
3680        self.state[dst].set_f64x2(a);
3681        ControlFlow::Continue(())
3682    }
3683
3684    fn fneg32(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3685        let a = self.state[src].get_f32();
3686        self.state[dst].set_f32(-a);
3687        ControlFlow::Continue(())
3688    }
3689
3690    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3691    fn vnegf32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3692        let mut a = self.state[src].get_f32x4();
3693        for elem in a.iter_mut() {
3694            *elem = -*elem;
3695        }
3696        self.state[dst].set_f32x4(a);
3697        ControlFlow::Continue(())
3698    }
3699
3700    fn fabs32(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3701        let a = self.state[src].get_f32();
3702        self.state[dst].set_f32(a.wasm_abs());
3703        ControlFlow::Continue(())
3704    }
3705
3706    fn fadd64(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3707        let a = self.state[operands.src1].get_f64();
3708        let b = self.state[operands.src2].get_f64();
3709        self.state[operands.dst].set_f64(a + b);
3710        ControlFlow::Continue(())
3711    }
3712
3713    fn fsub64(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3714        let a = self.state[operands.src1].get_f64();
3715        let b = self.state[operands.src2].get_f64();
3716        self.state[operands.dst].set_f64(a - b);
3717        ControlFlow::Continue(())
3718    }
3719
3720    fn fmul64(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3721        let a = self.state[operands.src1].get_f64();
3722        let b = self.state[operands.src2].get_f64();
3723        self.state[operands.dst].set_f64(a * b);
3724        ControlFlow::Continue(())
3725    }
3726
3727    fn fdiv64(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3728        let a = self.state[operands.src1].get_f64();
3729        let b = self.state[operands.src2].get_f64();
3730        self.state[operands.dst].set_f64(a / b);
3731        ControlFlow::Continue(())
3732    }
3733
3734    fn fmaximum64(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3735        let a = self.state[operands.src1].get_f64();
3736        let b = self.state[operands.src2].get_f64();
3737        self.state[operands.dst].set_f64(a.wasm_maximum(b));
3738        ControlFlow::Continue(())
3739    }
3740
3741    fn fminimum64(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3742        let a = self.state[operands.src1].get_f64();
3743        let b = self.state[operands.src2].get_f64();
3744        self.state[operands.dst].set_f64(a.wasm_minimum(b));
3745        ControlFlow::Continue(())
3746    }
3747
3748    fn ftrunc64(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3749        let a = self.state[src].get_f64();
3750        self.state[dst].set_f64(a.wasm_trunc());
3751        ControlFlow::Continue(())
3752    }
3753
3754    fn ffloor64(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3755        let a = self.state[src].get_f64();
3756        self.state[dst].set_f64(a.wasm_floor());
3757        ControlFlow::Continue(())
3758    }
3759
3760    fn fceil64(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3761        let a = self.state[src].get_f64();
3762        self.state[dst].set_f64(a.wasm_ceil());
3763        ControlFlow::Continue(())
3764    }
3765
3766    fn fnearest64(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3767        let a = self.state[src].get_f64();
3768        self.state[dst].set_f64(a.wasm_nearest());
3769        ControlFlow::Continue(())
3770    }
3771
3772    fn fsqrt64(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3773        let a = self.state[src].get_f64();
3774        self.state[dst].set_f64(a.wasm_sqrt());
3775        ControlFlow::Continue(())
3776    }
3777
3778    fn fneg64(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3779        let a = self.state[src].get_f64();
3780        self.state[dst].set_f64(-a);
3781        ControlFlow::Continue(())
3782    }
3783
3784    fn fabs64(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3785        let a = self.state[src].get_f64();
3786        self.state[dst].set_f64(a.wasm_abs());
3787        ControlFlow::Continue(())
3788    }
3789
3790    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3791    fn vaddi8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3792        let mut a = self.state[operands.src1].get_i8x16();
3793        let b = self.state[operands.src2].get_i8x16();
3794        for (a, b) in a.iter_mut().zip(b) {
3795            *a = a.wrapping_add(b);
3796        }
3797        self.state[operands.dst].set_i8x16(a);
3798        ControlFlow::Continue(())
3799    }
3800
3801    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3802    fn vaddi16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3803        let mut a = self.state[operands.src1].get_i16x8();
3804        let b = self.state[operands.src2].get_i16x8();
3805        for (a, b) in a.iter_mut().zip(b) {
3806            *a = a.wrapping_add(b);
3807        }
3808        self.state[operands.dst].set_i16x8(a);
3809        ControlFlow::Continue(())
3810    }
3811
3812    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3813    fn vaddi32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3814        let mut a = self.state[operands.src1].get_i32x4();
3815        let b = self.state[operands.src2].get_i32x4();
3816        for (a, b) in a.iter_mut().zip(b) {
3817            *a = a.wrapping_add(b);
3818        }
3819        self.state[operands.dst].set_i32x4(a);
3820        ControlFlow::Continue(())
3821    }
3822
3823    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3824    fn vaddi64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3825        let mut a = self.state[operands.src1].get_i64x2();
3826        let b = self.state[operands.src2].get_i64x2();
3827        for (a, b) in a.iter_mut().zip(b) {
3828            *a = a.wrapping_add(b);
3829        }
3830        self.state[operands.dst].set_i64x2(a);
3831        ControlFlow::Continue(())
3832    }
3833
3834    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3835    fn vaddf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3836        let mut a = self.state[operands.src1].get_f32x4();
3837        let b = self.state[operands.src2].get_f32x4();
3838        for (a, b) in a.iter_mut().zip(b) {
3839            *a += b;
3840        }
3841        self.state[operands.dst].set_f32x4(a);
3842        ControlFlow::Continue(())
3843    }
3844
3845    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3846    fn vaddf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3847        let mut a = self.state[operands.src1].get_f64x2();
3848        let b = self.state[operands.src2].get_f64x2();
3849        for (a, b) in a.iter_mut().zip(b) {
3850            *a += b;
3851        }
3852        self.state[operands.dst].set_f64x2(a);
3853        ControlFlow::Continue(())
3854    }
3855
3856    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3857    fn vaddi8x16_sat(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3858        let mut a = self.state[operands.src1].get_i8x16();
3859        let b = self.state[operands.src2].get_i8x16();
3860        for (a, b) in a.iter_mut().zip(b) {
3861            *a = (*a).saturating_add(b);
3862        }
3863        self.state[operands.dst].set_i8x16(a);
3864        ControlFlow::Continue(())
3865    }
3866
3867    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3868    fn vaddu8x16_sat(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3869        let mut a = self.state[operands.src1].get_u8x16();
3870        let b = self.state[operands.src2].get_u8x16();
3871        for (a, b) in a.iter_mut().zip(b) {
3872            *a = (*a).saturating_add(b);
3873        }
3874        self.state[operands.dst].set_u8x16(a);
3875        ControlFlow::Continue(())
3876    }
3877
3878    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3879    fn vaddi16x8_sat(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3880        let mut a = self.state[operands.src1].get_i16x8();
3881        let b = self.state[operands.src2].get_i16x8();
3882        for (a, b) in a.iter_mut().zip(b) {
3883            *a = (*a).saturating_add(b);
3884        }
3885        self.state[operands.dst].set_i16x8(a);
3886        ControlFlow::Continue(())
3887    }
3888
3889    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3890    fn vaddu16x8_sat(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3891        let mut a = self.state[operands.src1].get_u16x8();
3892        let b = self.state[operands.src2].get_u16x8();
3893        for (a, b) in a.iter_mut().zip(b) {
3894            *a = (*a).saturating_add(b);
3895        }
3896        self.state[operands.dst].set_u16x8(a);
3897        ControlFlow::Continue(())
3898    }
3899
3900    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3901    fn vaddpairwisei16x8_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3902        let a = self.state[operands.src1].get_i16x8();
3903        let b = self.state[operands.src2].get_i16x8();
3904        let mut result = [0i16; 8];
3905        let half = result.len() / 2;
3906        for i in 0..half {
3907            result[i] = a[2 * i].wrapping_add(a[2 * i + 1]);
3908            result[i + half] = b[2 * i].wrapping_add(b[2 * i + 1]);
3909        }
3910        self.state[operands.dst].set_i16x8(result);
3911        ControlFlow::Continue(())
3912    }
3913
3914    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3915    fn vaddpairwisei32x4_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3916        let a = self.state[operands.src1].get_i32x4();
3917        let b = self.state[operands.src2].get_i32x4();
3918        let mut result = [0i32; 4];
3919        result[0] = a[0].wrapping_add(a[1]);
3920        result[1] = a[2].wrapping_add(a[3]);
3921        result[2] = b[0].wrapping_add(b[1]);
3922        result[3] = b[2].wrapping_add(b[3]);
3923        self.state[operands.dst].set_i32x4(result);
3924        ControlFlow::Continue(())
3925    }
3926
3927    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3928    fn vshli8x16(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
3929        let a = self.state[operands.src1].get_i8x16();
3930        let b = self.state[operands.src2].get_u32();
3931        self.state[operands.dst].set_i8x16(a.map(|a| a.wrapping_shl(b)));
3932        ControlFlow::Continue(())
3933    }
3934
3935    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3936    fn vshli16x8(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
3937        let a = self.state[operands.src1].get_i16x8();
3938        let b = self.state[operands.src2].get_u32();
3939        self.state[operands.dst].set_i16x8(a.map(|a| a.wrapping_shl(b)));
3940        ControlFlow::Continue(())
3941    }
3942
3943    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3944    fn vshli32x4(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
3945        let a = self.state[operands.src1].get_i32x4();
3946        let b = self.state[operands.src2].get_u32();
3947        self.state[operands.dst].set_i32x4(a.map(|a| a.wrapping_shl(b)));
3948        ControlFlow::Continue(())
3949    }
3950
3951    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3952    fn vshli64x2(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
3953        let a = self.state[operands.src1].get_i64x2();
3954        let b = self.state[operands.src2].get_u32();
3955        self.state[operands.dst].set_i64x2(a.map(|a| a.wrapping_shl(b)));
3956        ControlFlow::Continue(())
3957    }
3958
3959    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3960    fn vshri8x16_s(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
3961        let a = self.state[operands.src1].get_i8x16();
3962        let b = self.state[operands.src2].get_u32();
3963        self.state[operands.dst].set_i8x16(a.map(|a| a.wrapping_shr(b)));
3964        ControlFlow::Continue(())
3965    }
3966
3967    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3968    fn vshri16x8_s(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
3969        let a = self.state[operands.src1].get_i16x8();
3970        let b = self.state[operands.src2].get_u32();
3971        self.state[operands.dst].set_i16x8(a.map(|a| a.wrapping_shr(b)));
3972        ControlFlow::Continue(())
3973    }
3974
3975    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3976    fn vshri32x4_s(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
3977        let a = self.state[operands.src1].get_i32x4();
3978        let b = self.state[operands.src2].get_u32();
3979        self.state[operands.dst].set_i32x4(a.map(|a| a.wrapping_shr(b)));
3980        ControlFlow::Continue(())
3981    }
3982
3983    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3984    fn vshri64x2_s(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
3985        let a = self.state[operands.src1].get_i64x2();
3986        let b = self.state[operands.src2].get_u32();
3987        self.state[operands.dst].set_i64x2(a.map(|a| a.wrapping_shr(b)));
3988        ControlFlow::Continue(())
3989    }
3990
3991    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3992    fn vshri8x16_u(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
3993        let a = self.state[operands.src1].get_u8x16();
3994        let b = self.state[operands.src2].get_u32();
3995        self.state[operands.dst].set_u8x16(a.map(|a| a.wrapping_shr(b)));
3996        ControlFlow::Continue(())
3997    }
3998
3999    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4000    fn vshri16x8_u(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
4001        let a = self.state[operands.src1].get_u16x8();
4002        let b = self.state[operands.src2].get_u32();
4003        self.state[operands.dst].set_u16x8(a.map(|a| a.wrapping_shr(b)));
4004        ControlFlow::Continue(())
4005    }
4006
4007    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4008    fn vshri32x4_u(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
4009        let a = self.state[operands.src1].get_u32x4();
4010        let b = self.state[operands.src2].get_u32();
4011        self.state[operands.dst].set_u32x4(a.map(|a| a.wrapping_shr(b)));
4012        ControlFlow::Continue(())
4013    }
4014
4015    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4016    fn vshri64x2_u(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
4017        let a = self.state[operands.src1].get_u64x2();
4018        let b = self.state[operands.src2].get_u32();
4019        self.state[operands.dst].set_u64x2(a.map(|a| a.wrapping_shr(b)));
4020        ControlFlow::Continue(())
4021    }
4022
4023    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4024    fn vconst128(&mut self, dst: VReg, val: u128) -> ControlFlow<Done> {
4025        self.state[dst].set_u128(val);
4026        ControlFlow::Continue(())
4027    }
4028
4029    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4030    fn vsplatx8(&mut self, dst: VReg, src: XReg) -> ControlFlow<Done> {
4031        let val = self.state[src].get_u32() as u8;
4032        self.state[dst].set_u8x16([val; 16]);
4033        ControlFlow::Continue(())
4034    }
4035
4036    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4037    fn vsplatx16(&mut self, dst: VReg, src: XReg) -> ControlFlow<Done> {
4038        let val = self.state[src].get_u32() as u16;
4039        self.state[dst].set_u16x8([val; 8]);
4040        ControlFlow::Continue(())
4041    }
4042
4043    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4044    fn vsplatx32(&mut self, dst: VReg, src: XReg) -> ControlFlow<Done> {
4045        let val = self.state[src].get_u32();
4046        self.state[dst].set_u32x4([val; 4]);
4047        ControlFlow::Continue(())
4048    }
4049
4050    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4051    fn vsplatx64(&mut self, dst: VReg, src: XReg) -> ControlFlow<Done> {
4052        let val = self.state[src].get_u64();
4053        self.state[dst].set_u64x2([val; 2]);
4054        ControlFlow::Continue(())
4055    }
4056
4057    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4058    fn vsplatf32(&mut self, dst: VReg, src: FReg) -> ControlFlow<Done> {
4059        let val = self.state[src].get_f32();
4060        self.state[dst].set_f32x4([val; 4]);
4061        ControlFlow::Continue(())
4062    }
4063
4064    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4065    fn vsplatf64(&mut self, dst: VReg, src: FReg) -> ControlFlow<Done> {
4066        let val = self.state[src].get_f64();
4067        self.state[dst].set_f64x2([val; 2]);
4068        ControlFlow::Continue(())
4069    }
4070
4071    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4072    fn vload8x8_s_z(&mut self, dst: VReg, addr: AddrZ) -> ControlFlow<Done> {
4073        let val = unsafe { self.load_ne::<[i8; 8], crate::VLoad8x8SZ>(addr)? };
4074        self.state[dst].set_i16x8(val.map(|i| i.into()));
4075        ControlFlow::Continue(())
4076    }
4077
4078    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4079    fn vload8x8_u_z(&mut self, dst: VReg, addr: AddrZ) -> ControlFlow<Done> {
4080        let val = unsafe { self.load_ne::<[u8; 8], crate::VLoad8x8UZ>(addr)? };
4081        self.state[dst].set_u16x8(val.map(|i| i.into()));
4082        ControlFlow::Continue(())
4083    }
4084
4085    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4086    fn vload16x4le_s_z(&mut self, dst: VReg, addr: AddrZ) -> ControlFlow<Done> {
4087        let val = unsafe { self.load_ne::<[i16; 4], crate::VLoad16x4LeSZ>(addr)? };
4088        self.state[dst].set_i32x4(val.map(|i| i16::from_le(i).into()));
4089        ControlFlow::Continue(())
4090    }
4091
4092    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4093    fn vload16x4le_u_z(&mut self, dst: VReg, addr: AddrZ) -> ControlFlow<Done> {
4094        let val = unsafe { self.load_ne::<[u16; 4], crate::VLoad16x4LeUZ>(addr)? };
4095        self.state[dst].set_u32x4(val.map(|i| u16::from_le(i).into()));
4096        ControlFlow::Continue(())
4097    }
4098
4099    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4100    fn vload32x2le_s_z(&mut self, dst: VReg, addr: AddrZ) -> ControlFlow<Done> {
4101        let val = unsafe { self.load_ne::<[i32; 2], crate::VLoad32x2LeSZ>(addr)? };
4102        self.state[dst].set_i64x2(val.map(|i| i32::from_le(i).into()));
4103        ControlFlow::Continue(())
4104    }
4105
4106    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4107    fn vload32x2le_u_z(&mut self, dst: VReg, addr: AddrZ) -> ControlFlow<Done> {
4108        let val = unsafe { self.load_ne::<[u32; 2], crate::VLoad32x2LeUZ>(addr)? };
4109        self.state[dst].set_u64x2(val.map(|i| u32::from_le(i).into()));
4110        ControlFlow::Continue(())
4111    }
4112
4113    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4114    fn vband128(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4115        let a = self.state[operands.src1].get_u128();
4116        let b = self.state[operands.src2].get_u128();
4117        self.state[operands.dst].set_u128(a & b);
4118        ControlFlow::Continue(())
4119    }
4120
4121    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4122    fn vbor128(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4123        let a = self.state[operands.src1].get_u128();
4124        let b = self.state[operands.src2].get_u128();
4125        self.state[operands.dst].set_u128(a | b);
4126        ControlFlow::Continue(())
4127    }
4128
4129    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4130    fn vbxor128(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4131        let a = self.state[operands.src1].get_u128();
4132        let b = self.state[operands.src2].get_u128();
4133        self.state[operands.dst].set_u128(a ^ b);
4134        ControlFlow::Continue(())
4135    }
4136
4137    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4138    fn vbnot128(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4139        let a = self.state[src].get_u128();
4140        self.state[dst].set_u128(!a);
4141        ControlFlow::Continue(())
4142    }
4143
4144    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4145    fn vbitselect128(&mut self, dst: VReg, c: VReg, x: VReg, y: VReg) -> ControlFlow<Done> {
4146        let c = self.state[c].get_u128();
4147        let x = self.state[x].get_u128();
4148        let y = self.state[y].get_u128();
4149        self.state[dst].set_u128((c & x) | (!c & y));
4150        ControlFlow::Continue(())
4151    }
4152
4153    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4154    fn vbitmask8x16(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4155        let a = self.state[src].get_u8x16();
4156        let mut result = 0;
4157        for item in a.iter().rev() {
4158            result <<= 1;
4159            result |= (*item >> 7) as u32;
4160        }
4161        self.state[dst].set_u32(result);
4162        ControlFlow::Continue(())
4163    }
4164
4165    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4166    fn vbitmask16x8(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4167        let a = self.state[src].get_u16x8();
4168        let mut result = 0;
4169        for item in a.iter().rev() {
4170            result <<= 1;
4171            result |= (*item >> 15) as u32;
4172        }
4173        self.state[dst].set_u32(result);
4174        ControlFlow::Continue(())
4175    }
4176
4177    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4178    fn vbitmask32x4(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4179        let a = self.state[src].get_u32x4();
4180        let mut result = 0;
4181        for item in a.iter().rev() {
4182            result <<= 1;
4183            result |= *item >> 31;
4184        }
4185        self.state[dst].set_u32(result);
4186        ControlFlow::Continue(())
4187    }
4188
4189    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4190    fn vbitmask64x2(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4191        let a = self.state[src].get_u64x2();
4192        let mut result = 0;
4193        for item in a.iter().rev() {
4194            result <<= 1;
4195            result |= (*item >> 63) as u32;
4196        }
4197        self.state[dst].set_u32(result);
4198        ControlFlow::Continue(())
4199    }
4200
4201    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4202    fn valltrue8x16(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4203        let a = self.state[src].get_u8x16();
4204        let result = a.iter().all(|a| *a != 0);
4205        self.state[dst].set_u32(u32::from(result));
4206        ControlFlow::Continue(())
4207    }
4208
4209    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4210    fn valltrue16x8(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4211        let a = self.state[src].get_u16x8();
4212        let result = a.iter().all(|a| *a != 0);
4213        self.state[dst].set_u32(u32::from(result));
4214        ControlFlow::Continue(())
4215    }
4216
4217    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4218    fn valltrue32x4(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4219        let a = self.state[src].get_u32x4();
4220        let result = a.iter().all(|a| *a != 0);
4221        self.state[dst].set_u32(u32::from(result));
4222        ControlFlow::Continue(())
4223    }
4224
4225    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4226    fn valltrue64x2(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4227        let a = self.state[src].get_u64x2();
4228        let result = a.iter().all(|a| *a != 0);
4229        self.state[dst].set_u32(u32::from(result));
4230        ControlFlow::Continue(())
4231    }
4232
4233    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4234    fn vanytrue8x16(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4235        let a = self.state[src].get_u8x16();
4236        let result = a.iter().any(|a| *a != 0);
4237        self.state[dst].set_u32(u32::from(result));
4238        ControlFlow::Continue(())
4239    }
4240
4241    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4242    fn vanytrue16x8(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4243        let a = self.state[src].get_u16x8();
4244        let result = a.iter().any(|a| *a != 0);
4245        self.state[dst].set_u32(u32::from(result));
4246        ControlFlow::Continue(())
4247    }
4248
4249    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4250    fn vanytrue32x4(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4251        let a = self.state[src].get_u32x4();
4252        let result = a.iter().any(|a| *a != 0);
4253        self.state[dst].set_u32(u32::from(result));
4254        ControlFlow::Continue(())
4255    }
4256
4257    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4258    fn vanytrue64x2(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4259        let a = self.state[src].get_u64x2();
4260        let result = a.iter().any(|a| *a != 0);
4261        self.state[dst].set_u32(u32::from(result));
4262        ControlFlow::Continue(())
4263    }
4264
4265    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4266    fn vf32x4_from_i32x4_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4267        let a = self.state[src].get_i32x4();
4268        self.state[dst].set_f32x4(a.map(|i| i as f32));
4269        ControlFlow::Continue(())
4270    }
4271
4272    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4273    fn vf32x4_from_i32x4_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4274        let a = self.state[src].get_u32x4();
4275        self.state[dst].set_f32x4(a.map(|i| i as f32));
4276        ControlFlow::Continue(())
4277    }
4278
4279    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4280    fn vf64x2_from_i64x2_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4281        let a = self.state[src].get_i64x2();
4282        self.state[dst].set_f64x2(a.map(|i| i as f64));
4283        ControlFlow::Continue(())
4284    }
4285
4286    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4287    fn vf64x2_from_i64x2_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4288        let a = self.state[src].get_u64x2();
4289        self.state[dst].set_f64x2(a.map(|i| i as f64));
4290        ControlFlow::Continue(())
4291    }
4292
4293    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4294    fn vi32x4_from_f32x4_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4295        let a = self.state[src].get_f32x4();
4296        self.state[dst].set_i32x4(a.map(|f| f as i32));
4297        ControlFlow::Continue(())
4298    }
4299
4300    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4301    fn vi32x4_from_f32x4_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4302        let a = self.state[src].get_f32x4();
4303        self.state[dst].set_u32x4(a.map(|f| f as u32));
4304        ControlFlow::Continue(())
4305    }
4306
4307    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4308    fn vi64x2_from_f64x2_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4309        let a = self.state[src].get_f64x2();
4310        self.state[dst].set_i64x2(a.map(|f| f as i64));
4311        ControlFlow::Continue(())
4312    }
4313
4314    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4315    fn vi64x2_from_f64x2_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4316        let a = self.state[src].get_f64x2();
4317        self.state[dst].set_u64x2(a.map(|f| f as u64));
4318        ControlFlow::Continue(())
4319    }
4320
4321    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4322    fn vwidenlow8x16_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4323        let a = *self.state[src].get_i8x16().first_chunk().unwrap();
4324        self.state[dst].set_i16x8(a.map(|i| i.into()));
4325        ControlFlow::Continue(())
4326    }
4327
4328    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4329    fn vwidenlow8x16_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4330        let a = *self.state[src].get_u8x16().first_chunk().unwrap();
4331        self.state[dst].set_u16x8(a.map(|i| i.into()));
4332        ControlFlow::Continue(())
4333    }
4334
4335    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4336    fn vwidenlow16x8_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4337        let a = *self.state[src].get_i16x8().first_chunk().unwrap();
4338        self.state[dst].set_i32x4(a.map(|i| i.into()));
4339        ControlFlow::Continue(())
4340    }
4341
4342    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4343    fn vwidenlow16x8_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4344        let a = *self.state[src].get_u16x8().first_chunk().unwrap();
4345        self.state[dst].set_u32x4(a.map(|i| i.into()));
4346        ControlFlow::Continue(())
4347    }
4348
4349    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4350    fn vwidenlow32x4_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4351        let a = *self.state[src].get_i32x4().first_chunk().unwrap();
4352        self.state[dst].set_i64x2(a.map(|i| i.into()));
4353        ControlFlow::Continue(())
4354    }
4355
4356    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4357    fn vwidenlow32x4_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4358        let a = *self.state[src].get_u32x4().first_chunk().unwrap();
4359        self.state[dst].set_u64x2(a.map(|i| i.into()));
4360        ControlFlow::Continue(())
4361    }
4362
4363    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4364    fn vwidenhigh8x16_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4365        let a = *self.state[src].get_i8x16().last_chunk().unwrap();
4366        self.state[dst].set_i16x8(a.map(|i| i.into()));
4367        ControlFlow::Continue(())
4368    }
4369
4370    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4371    fn vwidenhigh8x16_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4372        let a = *self.state[src].get_u8x16().last_chunk().unwrap();
4373        self.state[dst].set_u16x8(a.map(|i| i.into()));
4374        ControlFlow::Continue(())
4375    }
4376
4377    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4378    fn vwidenhigh16x8_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4379        let a = *self.state[src].get_i16x8().last_chunk().unwrap();
4380        self.state[dst].set_i32x4(a.map(|i| i.into()));
4381        ControlFlow::Continue(())
4382    }
4383
4384    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4385    fn vwidenhigh16x8_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4386        let a = *self.state[src].get_u16x8().last_chunk().unwrap();
4387        self.state[dst].set_u32x4(a.map(|i| i.into()));
4388        ControlFlow::Continue(())
4389    }
4390
4391    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4392    fn vwidenhigh32x4_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4393        let a = *self.state[src].get_i32x4().last_chunk().unwrap();
4394        self.state[dst].set_i64x2(a.map(|i| i.into()));
4395        ControlFlow::Continue(())
4396    }
4397
4398    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4399    fn vwidenhigh32x4_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4400        let a = *self.state[src].get_u32x4().last_chunk().unwrap();
4401        self.state[dst].set_u64x2(a.map(|i| i.into()));
4402        ControlFlow::Continue(())
4403    }
4404
4405    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4406    fn vnarrow16x8_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4407        let a = self.state[operands.src1].get_i16x8();
4408        let b = self.state[operands.src2].get_i16x8();
4409        let mut result = [0; 16];
4410        for (i, d) in a.iter().chain(&b).zip(&mut result) {
4411            *d = (*i)
4412                .try_into()
4413                .unwrap_or(if *i < 0 { i8::MIN } else { i8::MAX });
4414        }
4415        self.state[operands.dst].set_i8x16(result);
4416        ControlFlow::Continue(())
4417    }
4418
4419    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4420    fn vnarrow16x8_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4421        let a = self.state[operands.src1].get_i16x8();
4422        let b = self.state[operands.src2].get_i16x8();
4423        let mut result = [0; 16];
4424        for (i, d) in a.iter().chain(&b).zip(&mut result) {
4425            *d = (*i)
4426                .try_into()
4427                .unwrap_or(if *i < 0 { u8::MIN } else { u8::MAX });
4428        }
4429        self.state[operands.dst].set_u8x16(result);
4430        ControlFlow::Continue(())
4431    }
4432
4433    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4434    fn vnarrow32x4_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4435        let a = self.state[operands.src1].get_i32x4();
4436        let b = self.state[operands.src2].get_i32x4();
4437        let mut result = [0; 8];
4438        for (i, d) in a.iter().chain(&b).zip(&mut result) {
4439            *d = (*i)
4440                .try_into()
4441                .unwrap_or(if *i < 0 { i16::MIN } else { i16::MAX });
4442        }
4443        self.state[operands.dst].set_i16x8(result);
4444        ControlFlow::Continue(())
4445    }
4446
4447    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4448    fn vnarrow32x4_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4449        let a = self.state[operands.src1].get_i32x4();
4450        let b = self.state[operands.src2].get_i32x4();
4451        let mut result = [0; 8];
4452        for (i, d) in a.iter().chain(&b).zip(&mut result) {
4453            *d = (*i)
4454                .try_into()
4455                .unwrap_or(if *i < 0 { u16::MIN } else { u16::MAX });
4456        }
4457        self.state[operands.dst].set_u16x8(result);
4458        ControlFlow::Continue(())
4459    }
4460
4461    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4462    fn vnarrow64x2_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4463        let a = self.state[operands.src1].get_i64x2();
4464        let b = self.state[operands.src2].get_i64x2();
4465        let mut result = [0; 4];
4466        for (i, d) in a.iter().chain(&b).zip(&mut result) {
4467            *d = (*i)
4468                .try_into()
4469                .unwrap_or(if *i < 0 { i32::MIN } else { i32::MAX });
4470        }
4471        self.state[operands.dst].set_i32x4(result);
4472        ControlFlow::Continue(())
4473    }
4474
4475    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4476    fn vnarrow64x2_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4477        let a = self.state[operands.src1].get_i64x2();
4478        let b = self.state[operands.src2].get_i64x2();
4479        let mut result = [0; 4];
4480        for (i, d) in a.iter().chain(&b).zip(&mut result) {
4481            *d = (*i)
4482                .try_into()
4483                .unwrap_or(if *i < 0 { u32::MIN } else { u32::MAX });
4484        }
4485        self.state[operands.dst].set_u32x4(result);
4486        ControlFlow::Continue(())
4487    }
4488
4489    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4490    fn vunarrow64x2_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4491        let a = self.state[operands.src1].get_u64x2();
4492        let b = self.state[operands.src2].get_u64x2();
4493        let mut result = [0; 4];
4494        for (i, d) in a.iter().chain(&b).zip(&mut result) {
4495            *d = (*i).try_into().unwrap_or(u32::MAX);
4496        }
4497        self.state[operands.dst].set_u32x4(result);
4498        ControlFlow::Continue(())
4499    }
4500
4501    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4502    fn vfpromotelow(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4503        let a = self.state[src].get_f32x4();
4504        self.state[dst].set_f64x2([a[0].into(), a[1].into()]);
4505        ControlFlow::Continue(())
4506    }
4507
4508    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4509    fn vfdemote(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4510        let a = self.state[src].get_f64x2();
4511        self.state[dst].set_f32x4([a[0] as f32, a[1] as f32, 0.0, 0.0]);
4512        ControlFlow::Continue(())
4513    }
4514
4515    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4516    fn vsubi8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4517        let mut a = self.state[operands.src1].get_i8x16();
4518        let b = self.state[operands.src2].get_i8x16();
4519        for (a, b) in a.iter_mut().zip(b) {
4520            *a = a.wrapping_sub(b);
4521        }
4522        self.state[operands.dst].set_i8x16(a);
4523        ControlFlow::Continue(())
4524    }
4525
4526    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4527    fn vsubi16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4528        let mut a = self.state[operands.src1].get_i16x8();
4529        let b = self.state[operands.src2].get_i16x8();
4530        for (a, b) in a.iter_mut().zip(b) {
4531            *a = a.wrapping_sub(b);
4532        }
4533        self.state[operands.dst].set_i16x8(a);
4534        ControlFlow::Continue(())
4535    }
4536
4537    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4538    fn vsubi32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4539        let mut a = self.state[operands.src1].get_i32x4();
4540        let b = self.state[operands.src2].get_i32x4();
4541        for (a, b) in a.iter_mut().zip(b) {
4542            *a = a.wrapping_sub(b);
4543        }
4544        self.state[operands.dst].set_i32x4(a);
4545        ControlFlow::Continue(())
4546    }
4547
4548    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4549    fn vsubi64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4550        let mut a = self.state[operands.src1].get_i64x2();
4551        let b = self.state[operands.src2].get_i64x2();
4552        for (a, b) in a.iter_mut().zip(b) {
4553            *a = a.wrapping_sub(b);
4554        }
4555        self.state[operands.dst].set_i64x2(a);
4556        ControlFlow::Continue(())
4557    }
4558
4559    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4560    fn vsubi8x16_sat(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4561        let mut a = self.state[operands.src1].get_i8x16();
4562        let b = self.state[operands.src2].get_i8x16();
4563        for (a, b) in a.iter_mut().zip(b) {
4564            *a = a.saturating_sub(b);
4565        }
4566        self.state[operands.dst].set_i8x16(a);
4567        ControlFlow::Continue(())
4568    }
4569
4570    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4571    fn vsubu8x16_sat(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4572        let mut a = self.state[operands.src1].get_u8x16();
4573        let b = self.state[operands.src2].get_u8x16();
4574        for (a, b) in a.iter_mut().zip(b) {
4575            *a = a.saturating_sub(b);
4576        }
4577        self.state[operands.dst].set_u8x16(a);
4578        ControlFlow::Continue(())
4579    }
4580
4581    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4582    fn vsubi16x8_sat(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4583        let mut a = self.state[operands.src1].get_i16x8();
4584        let b = self.state[operands.src2].get_i16x8();
4585        for (a, b) in a.iter_mut().zip(b) {
4586            *a = a.saturating_sub(b);
4587        }
4588        self.state[operands.dst].set_i16x8(a);
4589        ControlFlow::Continue(())
4590    }
4591
4592    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4593    fn vsubu16x8_sat(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4594        let mut a = self.state[operands.src1].get_u16x8();
4595        let b = self.state[operands.src2].get_u16x8();
4596        for (a, b) in a.iter_mut().zip(b) {
4597            *a = a.saturating_sub(b);
4598        }
4599        self.state[operands.dst].set_u16x8(a);
4600        ControlFlow::Continue(())
4601    }
4602
4603    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4604    fn vsubf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4605        let mut a = self.state[operands.src1].get_f64x2();
4606        let b = self.state[operands.src2].get_f64x2();
4607        for (a, b) in a.iter_mut().zip(b) {
4608            *a = *a - b;
4609        }
4610        self.state[operands.dst].set_f64x2(a);
4611        ControlFlow::Continue(())
4612    }
4613
4614    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4615    fn vmuli8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4616        let mut a = self.state[operands.src1].get_i8x16();
4617        let b = self.state[operands.src2].get_i8x16();
4618        for (a, b) in a.iter_mut().zip(b) {
4619            *a = a.wrapping_mul(b);
4620        }
4621        self.state[operands.dst].set_i8x16(a);
4622        ControlFlow::Continue(())
4623    }
4624
4625    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4626    fn vmuli16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4627        let mut a = self.state[operands.src1].get_i16x8();
4628        let b = self.state[operands.src2].get_i16x8();
4629        for (a, b) in a.iter_mut().zip(b) {
4630            *a = a.wrapping_mul(b);
4631        }
4632        self.state[operands.dst].set_i16x8(a);
4633        ControlFlow::Continue(())
4634    }
4635
4636    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4637    fn vmuli32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4638        let mut a = self.state[operands.src1].get_i32x4();
4639        let b = self.state[operands.src2].get_i32x4();
4640        for (a, b) in a.iter_mut().zip(b) {
4641            *a = a.wrapping_mul(b);
4642        }
4643        self.state[operands.dst].set_i32x4(a);
4644        ControlFlow::Continue(())
4645    }
4646
4647    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4648    fn vmuli64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4649        let mut a = self.state[operands.src1].get_i64x2();
4650        let b = self.state[operands.src2].get_i64x2();
4651        for (a, b) in a.iter_mut().zip(b) {
4652            *a = a.wrapping_mul(b);
4653        }
4654        self.state[operands.dst].set_i64x2(a);
4655        ControlFlow::Continue(())
4656    }
4657
4658    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4659    fn vmulf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4660        let mut a = self.state[operands.src1].get_f64x2();
4661        let b = self.state[operands.src2].get_f64x2();
4662        for (a, b) in a.iter_mut().zip(b) {
4663            *a = *a * b;
4664        }
4665        self.state[operands.dst].set_f64x2(a);
4666        ControlFlow::Continue(())
4667    }
4668
4669    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4670    fn vqmulrsi16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4671        let mut a = self.state[operands.src1].get_i16x8();
4672        let b = self.state[operands.src2].get_i16x8();
4673        const MIN: i32 = i16::MIN as i32;
4674        const MAX: i32 = i16::MAX as i32;
4675        for (a, b) in a.iter_mut().zip(b) {
4676            let r = (i32::from(*a) * i32::from(b) + (1 << 14)) >> 15;
4677            *a = r.clamp(MIN, MAX) as i16;
4678        }
4679        self.state[operands.dst].set_i16x8(a);
4680        ControlFlow::Continue(())
4681    }
4682
4683    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4684    fn vpopcnt8x16(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4685        let a = self.state[src].get_u8x16();
4686        self.state[dst].set_u8x16(a.map(|i| i.count_ones() as u8));
4687        ControlFlow::Continue(())
4688    }
4689
4690    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4691    fn xextractv8x16(&mut self, dst: XReg, src: VReg, lane: u8) -> ControlFlow<Done> {
4692        let a = unsafe { *self.state[src].get_u8x16().get_unchecked(usize::from(lane)) };
4693        self.state[dst].set_u32(u32::from(a));
4694        ControlFlow::Continue(())
4695    }
4696
4697    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4698    fn xextractv16x8(&mut self, dst: XReg, src: VReg, lane: u8) -> ControlFlow<Done> {
4699        let a = unsafe { *self.state[src].get_u16x8().get_unchecked(usize::from(lane)) };
4700        self.state[dst].set_u32(u32::from(a));
4701        ControlFlow::Continue(())
4702    }
4703
4704    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4705    fn xextractv32x4(&mut self, dst: XReg, src: VReg, lane: u8) -> ControlFlow<Done> {
4706        let a = unsafe { *self.state[src].get_u32x4().get_unchecked(usize::from(lane)) };
4707        self.state[dst].set_u32(a);
4708        ControlFlow::Continue(())
4709    }
4710
4711    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4712    fn xextractv64x2(&mut self, dst: XReg, src: VReg, lane: u8) -> ControlFlow<Done> {
4713        let a = unsafe { *self.state[src].get_u64x2().get_unchecked(usize::from(lane)) };
4714        self.state[dst].set_u64(a);
4715        ControlFlow::Continue(())
4716    }
4717
4718    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4719    fn fextractv32x4(&mut self, dst: FReg, src: VReg, lane: u8) -> ControlFlow<Done> {
4720        let a = unsafe { *self.state[src].get_f32x4().get_unchecked(usize::from(lane)) };
4721        self.state[dst].set_f32(a);
4722        ControlFlow::Continue(())
4723    }
4724
4725    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4726    fn fextractv64x2(&mut self, dst: FReg, src: VReg, lane: u8) -> ControlFlow<Done> {
4727        let a = unsafe { *self.state[src].get_f64x2().get_unchecked(usize::from(lane)) };
4728        self.state[dst].set_f64(a);
4729        ControlFlow::Continue(())
4730    }
4731
4732    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4733    fn vinsertx8(
4734        &mut self,
4735        operands: BinaryOperands<VReg, VReg, XReg>,
4736        lane: u8,
4737    ) -> ControlFlow<Done> {
4738        let mut a = self.state[operands.src1].get_u8x16();
4739        let b = self.state[operands.src2].get_u32() as u8;
4740        unsafe {
4741            *a.get_unchecked_mut(usize::from(lane)) = b;
4742        }
4743        self.state[operands.dst].set_u8x16(a);
4744        ControlFlow::Continue(())
4745    }
4746
4747    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4748    fn vinsertx16(
4749        &mut self,
4750        operands: BinaryOperands<VReg, VReg, XReg>,
4751        lane: u8,
4752    ) -> ControlFlow<Done> {
4753        let mut a = self.state[operands.src1].get_u16x8();
4754        let b = self.state[operands.src2].get_u32() as u16;
4755        unsafe {
4756            *a.get_unchecked_mut(usize::from(lane)) = b;
4757        }
4758        self.state[operands.dst].set_u16x8(a);
4759        ControlFlow::Continue(())
4760    }
4761
4762    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4763    fn vinsertx32(
4764        &mut self,
4765        operands: BinaryOperands<VReg, VReg, XReg>,
4766        lane: u8,
4767    ) -> ControlFlow<Done> {
4768        let mut a = self.state[operands.src1].get_u32x4();
4769        let b = self.state[operands.src2].get_u32();
4770        unsafe {
4771            *a.get_unchecked_mut(usize::from(lane)) = b;
4772        }
4773        self.state[operands.dst].set_u32x4(a);
4774        ControlFlow::Continue(())
4775    }
4776
4777    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4778    fn vinsertx64(
4779        &mut self,
4780        operands: BinaryOperands<VReg, VReg, XReg>,
4781        lane: u8,
4782    ) -> ControlFlow<Done> {
4783        let mut a = self.state[operands.src1].get_u64x2();
4784        let b = self.state[operands.src2].get_u64();
4785        unsafe {
4786            *a.get_unchecked_mut(usize::from(lane)) = b;
4787        }
4788        self.state[operands.dst].set_u64x2(a);
4789        ControlFlow::Continue(())
4790    }
4791
4792    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4793    fn vinsertf32(
4794        &mut self,
4795        operands: BinaryOperands<VReg, VReg, FReg>,
4796        lane: u8,
4797    ) -> ControlFlow<Done> {
4798        let mut a = self.state[operands.src1].get_f32x4();
4799        let b = self.state[operands.src2].get_f32();
4800        unsafe {
4801            *a.get_unchecked_mut(usize::from(lane)) = b;
4802        }
4803        self.state[operands.dst].set_f32x4(a);
4804        ControlFlow::Continue(())
4805    }
4806
4807    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4808    fn vinsertf64(
4809        &mut self,
4810        operands: BinaryOperands<VReg, VReg, FReg>,
4811        lane: u8,
4812    ) -> ControlFlow<Done> {
4813        let mut a = self.state[operands.src1].get_f64x2();
4814        let b = self.state[operands.src2].get_f64();
4815        unsafe {
4816            *a.get_unchecked_mut(usize::from(lane)) = b;
4817        }
4818        self.state[operands.dst].set_f64x2(a);
4819        ControlFlow::Continue(())
4820    }
4821
4822    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4823    fn veq8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4824        let a = self.state[operands.src1].get_u8x16();
4825        let b = self.state[operands.src2].get_u8x16();
4826        let mut c = [0; 16];
4827        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4828            *c = if a == b { u8::MAX } else { 0 };
4829        }
4830        self.state[operands.dst].set_u8x16(c);
4831        ControlFlow::Continue(())
4832    }
4833
4834    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4835    fn vneq8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4836        let a = self.state[operands.src1].get_u8x16();
4837        let b = self.state[operands.src2].get_u8x16();
4838        let mut c = [0; 16];
4839        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4840            *c = if a != b { u8::MAX } else { 0 };
4841        }
4842        self.state[operands.dst].set_u8x16(c);
4843        ControlFlow::Continue(())
4844    }
4845
4846    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4847    fn vslt8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4848        let a = self.state[operands.src1].get_i8x16();
4849        let b = self.state[operands.src2].get_i8x16();
4850        let mut c = [0; 16];
4851        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4852            *c = if a < b { u8::MAX } else { 0 };
4853        }
4854        self.state[operands.dst].set_u8x16(c);
4855        ControlFlow::Continue(())
4856    }
4857
4858    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4859    fn vslteq8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4860        let a = self.state[operands.src1].get_i8x16();
4861        let b = self.state[operands.src2].get_i8x16();
4862        let mut c = [0; 16];
4863        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4864            *c = if a <= b { u8::MAX } else { 0 };
4865        }
4866        self.state[operands.dst].set_u8x16(c);
4867        ControlFlow::Continue(())
4868    }
4869
4870    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4871    fn vult8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4872        let a = self.state[operands.src1].get_u8x16();
4873        let b = self.state[operands.src2].get_u8x16();
4874        let mut c = [0; 16];
4875        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4876            *c = if a < b { u8::MAX } else { 0 };
4877        }
4878        self.state[operands.dst].set_u8x16(c);
4879        ControlFlow::Continue(())
4880    }
4881
4882    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4883    fn vulteq8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4884        let a = self.state[operands.src1].get_u8x16();
4885        let b = self.state[operands.src2].get_u8x16();
4886        let mut c = [0; 16];
4887        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4888            *c = if a <= b { u8::MAX } else { 0 };
4889        }
4890        self.state[operands.dst].set_u8x16(c);
4891        ControlFlow::Continue(())
4892    }
4893
4894    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4895    fn veq16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4896        let a = self.state[operands.src1].get_u16x8();
4897        let b = self.state[operands.src2].get_u16x8();
4898        let mut c = [0; 8];
4899        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4900            *c = if a == b { u16::MAX } else { 0 };
4901        }
4902        self.state[operands.dst].set_u16x8(c);
4903        ControlFlow::Continue(())
4904    }
4905
4906    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4907    fn vneq16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4908        let a = self.state[operands.src1].get_u16x8();
4909        let b = self.state[operands.src2].get_u16x8();
4910        let mut c = [0; 8];
4911        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4912            *c = if a != b { u16::MAX } else { 0 };
4913        }
4914        self.state[operands.dst].set_u16x8(c);
4915        ControlFlow::Continue(())
4916    }
4917
4918    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4919    fn vslt16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4920        let a = self.state[operands.src1].get_i16x8();
4921        let b = self.state[operands.src2].get_i16x8();
4922        let mut c = [0; 8];
4923        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4924            *c = if a < b { u16::MAX } else { 0 };
4925        }
4926        self.state[operands.dst].set_u16x8(c);
4927        ControlFlow::Continue(())
4928    }
4929
4930    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4931    fn vslteq16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4932        let a = self.state[operands.src1].get_i16x8();
4933        let b = self.state[operands.src2].get_i16x8();
4934        let mut c = [0; 8];
4935        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4936            *c = if a <= b { u16::MAX } else { 0 };
4937        }
4938        self.state[operands.dst].set_u16x8(c);
4939        ControlFlow::Continue(())
4940    }
4941
4942    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4943    fn vult16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4944        let a = self.state[operands.src1].get_u16x8();
4945        let b = self.state[operands.src2].get_u16x8();
4946        let mut c = [0; 8];
4947        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4948            *c = if a < b { u16::MAX } else { 0 };
4949        }
4950        self.state[operands.dst].set_u16x8(c);
4951        ControlFlow::Continue(())
4952    }
4953
4954    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4955    fn vulteq16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4956        let a = self.state[operands.src1].get_u16x8();
4957        let b = self.state[operands.src2].get_u16x8();
4958        let mut c = [0; 8];
4959        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4960            *c = if a <= b { u16::MAX } else { 0 };
4961        }
4962        self.state[operands.dst].set_u16x8(c);
4963        ControlFlow::Continue(())
4964    }
4965
4966    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4967    fn veq32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4968        let a = self.state[operands.src1].get_u32x4();
4969        let b = self.state[operands.src2].get_u32x4();
4970        let mut c = [0; 4];
4971        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4972            *c = if a == b { u32::MAX } else { 0 };
4973        }
4974        self.state[operands.dst].set_u32x4(c);
4975        ControlFlow::Continue(())
4976    }
4977
4978    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4979    fn vneq32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4980        let a = self.state[operands.src1].get_u32x4();
4981        let b = self.state[operands.src2].get_u32x4();
4982        let mut c = [0; 4];
4983        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4984            *c = if a != b { u32::MAX } else { 0 };
4985        }
4986        self.state[operands.dst].set_u32x4(c);
4987        ControlFlow::Continue(())
4988    }
4989
4990    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4991    fn vslt32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4992        let a = self.state[operands.src1].get_i32x4();
4993        let b = self.state[operands.src2].get_i32x4();
4994        let mut c = [0; 4];
4995        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4996            *c = if a < b { u32::MAX } else { 0 };
4997        }
4998        self.state[operands.dst].set_u32x4(c);
4999        ControlFlow::Continue(())
5000    }
5001
5002    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5003    fn vslteq32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5004        let a = self.state[operands.src1].get_i32x4();
5005        let b = self.state[operands.src2].get_i32x4();
5006        let mut c = [0; 4];
5007        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5008            *c = if a <= b { u32::MAX } else { 0 };
5009        }
5010        self.state[operands.dst].set_u32x4(c);
5011        ControlFlow::Continue(())
5012    }
5013
5014    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5015    fn vult32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5016        let a = self.state[operands.src1].get_u32x4();
5017        let b = self.state[operands.src2].get_u32x4();
5018        let mut c = [0; 4];
5019        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5020            *c = if a < b { u32::MAX } else { 0 };
5021        }
5022        self.state[operands.dst].set_u32x4(c);
5023        ControlFlow::Continue(())
5024    }
5025
5026    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5027    fn vulteq32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5028        let a = self.state[operands.src1].get_u32x4();
5029        let b = self.state[operands.src2].get_u32x4();
5030        let mut c = [0; 4];
5031        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5032            *c = if a <= b { u32::MAX } else { 0 };
5033        }
5034        self.state[operands.dst].set_u32x4(c);
5035        ControlFlow::Continue(())
5036    }
5037
5038    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5039    fn veq64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5040        let a = self.state[operands.src1].get_u64x2();
5041        let b = self.state[operands.src2].get_u64x2();
5042        let mut c = [0; 2];
5043        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5044            *c = if a == b { u64::MAX } else { 0 };
5045        }
5046        self.state[operands.dst].set_u64x2(c);
5047        ControlFlow::Continue(())
5048    }
5049
5050    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5051    fn vneq64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5052        let a = self.state[operands.src1].get_u64x2();
5053        let b = self.state[operands.src2].get_u64x2();
5054        let mut c = [0; 2];
5055        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5056            *c = if a != b { u64::MAX } else { 0 };
5057        }
5058        self.state[operands.dst].set_u64x2(c);
5059        ControlFlow::Continue(())
5060    }
5061
5062    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5063    fn vslt64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5064        let a = self.state[operands.src1].get_i64x2();
5065        let b = self.state[operands.src2].get_i64x2();
5066        let mut c = [0; 2];
5067        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5068            *c = if a < b { u64::MAX } else { 0 };
5069        }
5070        self.state[operands.dst].set_u64x2(c);
5071        ControlFlow::Continue(())
5072    }
5073
5074    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5075    fn vslteq64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5076        let a = self.state[operands.src1].get_i64x2();
5077        let b = self.state[operands.src2].get_i64x2();
5078        let mut c = [0; 2];
5079        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5080            *c = if a <= b { u64::MAX } else { 0 };
5081        }
5082        self.state[operands.dst].set_u64x2(c);
5083        ControlFlow::Continue(())
5084    }
5085
5086    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5087    fn vult64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5088        let a = self.state[operands.src1].get_u64x2();
5089        let b = self.state[operands.src2].get_u64x2();
5090        let mut c = [0; 2];
5091        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5092            *c = if a < b { u64::MAX } else { 0 };
5093        }
5094        self.state[operands.dst].set_u64x2(c);
5095        ControlFlow::Continue(())
5096    }
5097
5098    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5099    fn vulteq64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5100        let a = self.state[operands.src1].get_u64x2();
5101        let b = self.state[operands.src2].get_u64x2();
5102        let mut c = [0; 2];
5103        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5104            *c = if a <= b { u64::MAX } else { 0 };
5105        }
5106        self.state[operands.dst].set_u64x2(c);
5107        ControlFlow::Continue(())
5108    }
5109
5110    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5111    fn vneg8x16(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
5112        let a = self.state[src].get_i8x16();
5113        self.state[dst].set_i8x16(a.map(|i| i.wrapping_neg()));
5114        ControlFlow::Continue(())
5115    }
5116
5117    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5118    fn vneg16x8(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
5119        let a = self.state[src].get_i16x8();
5120        self.state[dst].set_i16x8(a.map(|i| i.wrapping_neg()));
5121        ControlFlow::Continue(())
5122    }
5123
5124    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5125    fn vneg32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
5126        let a = self.state[src].get_i32x4();
5127        self.state[dst].set_i32x4(a.map(|i| i.wrapping_neg()));
5128        ControlFlow::Continue(())
5129    }
5130
5131    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5132    fn vneg64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
5133        let a = self.state[src].get_i64x2();
5134        self.state[dst].set_i64x2(a.map(|i| i.wrapping_neg()));
5135        ControlFlow::Continue(())
5136    }
5137
5138    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5139    fn vnegf64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
5140        let a = self.state[src].get_f64x2();
5141        self.state[dst].set_f64x2(a.map(|i| -i));
5142        ControlFlow::Continue(())
5143    }
5144
5145    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5146    fn vmin8x16_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5147        let mut a = self.state[operands.src1].get_i8x16();
5148        let b = self.state[operands.src2].get_i8x16();
5149        for (a, b) in a.iter_mut().zip(&b) {
5150            *a = (*a).min(*b);
5151        }
5152        self.state[operands.dst].set_i8x16(a);
5153        ControlFlow::Continue(())
5154    }
5155
5156    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5157    fn vmin8x16_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5158        let mut a = self.state[operands.src1].get_u8x16();
5159        let b = self.state[operands.src2].get_u8x16();
5160        for (a, b) in a.iter_mut().zip(&b) {
5161            *a = (*a).min(*b);
5162        }
5163        self.state[operands.dst].set_u8x16(a);
5164        ControlFlow::Continue(())
5165    }
5166
5167    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5168    fn vmin16x8_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5169        let mut a = self.state[operands.src1].get_i16x8();
5170        let b = self.state[operands.src2].get_i16x8();
5171        for (a, b) in a.iter_mut().zip(&b) {
5172            *a = (*a).min(*b);
5173        }
5174        self.state[operands.dst].set_i16x8(a);
5175        ControlFlow::Continue(())
5176    }
5177
5178    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5179    fn vmin16x8_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5180        let mut a = self.state[operands.src1].get_u16x8();
5181        let b = self.state[operands.src2].get_u16x8();
5182        for (a, b) in a.iter_mut().zip(&b) {
5183            *a = (*a).min(*b);
5184        }
5185        self.state[operands.dst].set_u16x8(a);
5186        ControlFlow::Continue(())
5187    }
5188
5189    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5190    fn vmin32x4_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5191        let mut a = self.state[operands.src1].get_i32x4();
5192        let b = self.state[operands.src2].get_i32x4();
5193        for (a, b) in a.iter_mut().zip(&b) {
5194            *a = (*a).min(*b);
5195        }
5196        self.state[operands.dst].set_i32x4(a);
5197        ControlFlow::Continue(())
5198    }
5199
5200    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5201    fn vmin32x4_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5202        let mut a = self.state[operands.src1].get_u32x4();
5203        let b = self.state[operands.src2].get_u32x4();
5204        for (a, b) in a.iter_mut().zip(&b) {
5205            *a = (*a).min(*b);
5206        }
5207        self.state[operands.dst].set_u32x4(a);
5208        ControlFlow::Continue(())
5209    }
5210
5211    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5212    fn vmax8x16_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5213        let mut a = self.state[operands.src1].get_i8x16();
5214        let b = self.state[operands.src2].get_i8x16();
5215        for (a, b) in a.iter_mut().zip(&b) {
5216            *a = (*a).max(*b);
5217        }
5218        self.state[operands.dst].set_i8x16(a);
5219        ControlFlow::Continue(())
5220    }
5221
5222    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5223    fn vmax8x16_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5224        let mut a = self.state[operands.src1].get_u8x16();
5225        let b = self.state[operands.src2].get_u8x16();
5226        for (a, b) in a.iter_mut().zip(&b) {
5227            *a = (*a).max(*b);
5228        }
5229        self.state[operands.dst].set_u8x16(a);
5230        ControlFlow::Continue(())
5231    }
5232
5233    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5234    fn vmax16x8_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5235        let mut a = self.state[operands.src1].get_i16x8();
5236        let b = self.state[operands.src2].get_i16x8();
5237        for (a, b) in a.iter_mut().zip(&b) {
5238            *a = (*a).max(*b);
5239        }
5240        self.state[operands.dst].set_i16x8(a);
5241        ControlFlow::Continue(())
5242    }
5243
5244    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5245    fn vmax16x8_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5246        let mut a = self.state[operands.src1].get_u16x8();
5247        let b = self.state[operands.src2].get_u16x8();
5248        for (a, b) in a.iter_mut().zip(&b) {
5249            *a = (*a).max(*b);
5250        }
5251        self.state[operands.dst].set_u16x8(a);
5252        ControlFlow::Continue(())
5253    }
5254
5255    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5256    fn vmax32x4_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5257        let mut a = self.state[operands.src1].get_i32x4();
5258        let b = self.state[operands.src2].get_i32x4();
5259        for (a, b) in a.iter_mut().zip(&b) {
5260            *a = (*a).max(*b);
5261        }
5262        self.state[operands.dst].set_i32x4(a);
5263        ControlFlow::Continue(())
5264    }
5265
5266    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5267    fn vmax32x4_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5268        let mut a = self.state[operands.src1].get_u32x4();
5269        let b = self.state[operands.src2].get_u32x4();
5270        for (a, b) in a.iter_mut().zip(&b) {
5271            *a = (*a).max(*b);
5272        }
5273        self.state[operands.dst].set_u32x4(a);
5274        ControlFlow::Continue(())
5275    }
5276
5277    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5278    fn vabs8x16(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
5279        let a = self.state[src].get_i8x16();
5280        self.state[dst].set_i8x16(a.map(|i| i.wrapping_abs()));
5281        ControlFlow::Continue(())
5282    }
5283
5284    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5285    fn vabs16x8(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
5286        let a = self.state[src].get_i16x8();
5287        self.state[dst].set_i16x8(a.map(|i| i.wrapping_abs()));
5288        ControlFlow::Continue(())
5289    }
5290
5291    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5292    fn vabs32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
5293        let a = self.state[src].get_i32x4();
5294        self.state[dst].set_i32x4(a.map(|i| i.wrapping_abs()));
5295        ControlFlow::Continue(())
5296    }
5297
5298    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5299    fn vabs64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
5300        let a = self.state[src].get_i64x2();
5301        self.state[dst].set_i64x2(a.map(|i| i.wrapping_abs()));
5302        ControlFlow::Continue(())
5303    }
5304
5305    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5306    fn vabsf32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
5307        let a = self.state[src].get_f32x4();
5308        self.state[dst].set_f32x4(a.map(|i| i.wasm_abs()));
5309        ControlFlow::Continue(())
5310    }
5311
5312    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5313    fn vabsf64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
5314        let a = self.state[src].get_f64x2();
5315        self.state[dst].set_f64x2(a.map(|i| i.wasm_abs()));
5316        ControlFlow::Continue(())
5317    }
5318
5319    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5320    fn vmaximumf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5321        let mut a = self.state[operands.src1].get_f32x4();
5322        let b = self.state[operands.src2].get_f32x4();
5323        for (a, b) in a.iter_mut().zip(&b) {
5324            *a = a.wasm_maximum(*b);
5325        }
5326        self.state[operands.dst].set_f32x4(a);
5327        ControlFlow::Continue(())
5328    }
5329
5330    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5331    fn vmaximumf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5332        let mut a = self.state[operands.src1].get_f64x2();
5333        let b = self.state[operands.src2].get_f64x2();
5334        for (a, b) in a.iter_mut().zip(&b) {
5335            *a = a.wasm_maximum(*b);
5336        }
5337        self.state[operands.dst].set_f64x2(a);
5338        ControlFlow::Continue(())
5339    }
5340
5341    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5342    fn vminimumf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5343        let mut a = self.state[operands.src1].get_f32x4();
5344        let b = self.state[operands.src2].get_f32x4();
5345        for (a, b) in a.iter_mut().zip(&b) {
5346            *a = a.wasm_minimum(*b);
5347        }
5348        self.state[operands.dst].set_f32x4(a);
5349        ControlFlow::Continue(())
5350    }
5351
5352    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5353    fn vminimumf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5354        let mut a = self.state[operands.src1].get_f64x2();
5355        let b = self.state[operands.src2].get_f64x2();
5356        for (a, b) in a.iter_mut().zip(&b) {
5357            *a = a.wasm_minimum(*b);
5358        }
5359        self.state[operands.dst].set_f64x2(a);
5360        ControlFlow::Continue(())
5361    }
5362
5363    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5364    fn vshuffle(&mut self, dst: VReg, src1: VReg, src2: VReg, mask: u128) -> ControlFlow<Done> {
5365        let a = self.state[src1].get_u8x16();
5366        let b = self.state[src2].get_u8x16();
5367        let result = mask.to_le_bytes().map(|m| {
5368            if m < 16 {
5369                a[m as usize]
5370            } else {
5371                b[m as usize - 16]
5372            }
5373        });
5374        self.state[dst].set_u8x16(result);
5375        ControlFlow::Continue(())
5376    }
5377
5378    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5379    fn vswizzlei8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5380        let src1 = self.state[operands.src1].get_i8x16();
5381        let src2 = self.state[operands.src2].get_i8x16();
5382        let mut dst = [0i8; 16];
5383        for (i, &idx) in src2.iter().enumerate() {
5384            if (idx as usize) < 16 {
5385                dst[i] = src1[idx as usize];
5386            } else {
5387                dst[i] = 0
5388            }
5389        }
5390        self.state[operands.dst].set_i8x16(dst);
5391        ControlFlow::Continue(())
5392    }
5393
5394    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5395    fn vavground8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5396        let mut a = self.state[operands.src1].get_u8x16();
5397        let b = self.state[operands.src2].get_u8x16();
5398        for (a, b) in a.iter_mut().zip(&b) {
5399            // use wider precision to avoid overflow
5400            *a = ((u32::from(*a) + u32::from(*b) + 1) / 2) as u8;
5401        }
5402        self.state[operands.dst].set_u8x16(a);
5403        ControlFlow::Continue(())
5404    }
5405
5406    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5407    fn vavground16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5408        let mut a = self.state[operands.src1].get_u16x8();
5409        let b = self.state[operands.src2].get_u16x8();
5410        for (a, b) in a.iter_mut().zip(&b) {
5411            // use wider precision to avoid overflow
5412            *a = ((u32::from(*a) + u32::from(*b) + 1) / 2) as u16;
5413        }
5414        self.state[operands.dst].set_u16x8(a);
5415        ControlFlow::Continue(())
5416    }
5417
5418    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5419    fn veqf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5420        let a = self.state[operands.src1].get_f32x4();
5421        let b = self.state[operands.src2].get_f32x4();
5422        let mut c = [0; 4];
5423        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5424            *c = if a == b { u32::MAX } else { 0 };
5425        }
5426        self.state[operands.dst].set_u32x4(c);
5427        ControlFlow::Continue(())
5428    }
5429
5430    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5431    fn vneqf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5432        let a = self.state[operands.src1].get_f32x4();
5433        let b = self.state[operands.src2].get_f32x4();
5434        let mut c = [0; 4];
5435        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5436            *c = if a != b { u32::MAX } else { 0 };
5437        }
5438        self.state[operands.dst].set_u32x4(c);
5439        ControlFlow::Continue(())
5440    }
5441
5442    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5443    fn vltf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5444        let a = self.state[operands.src1].get_f32x4();
5445        let b = self.state[operands.src2].get_f32x4();
5446        let mut c = [0; 4];
5447        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5448            *c = if a < b { u32::MAX } else { 0 };
5449        }
5450        self.state[operands.dst].set_u32x4(c);
5451        ControlFlow::Continue(())
5452    }
5453
5454    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5455    fn vlteqf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5456        let a = self.state[operands.src1].get_f32x4();
5457        let b = self.state[operands.src2].get_f32x4();
5458        let mut c = [0; 4];
5459        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5460            *c = if a <= b { u32::MAX } else { 0 };
5461        }
5462        self.state[operands.dst].set_u32x4(c);
5463        ControlFlow::Continue(())
5464    }
5465
5466    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5467    fn veqf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5468        let a = self.state[operands.src1].get_f64x2();
5469        let b = self.state[operands.src2].get_f64x2();
5470        let mut c = [0; 2];
5471        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5472            *c = if a == b { u64::MAX } else { 0 };
5473        }
5474        self.state[operands.dst].set_u64x2(c);
5475        ControlFlow::Continue(())
5476    }
5477
5478    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5479    fn vneqf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5480        let a = self.state[operands.src1].get_f64x2();
5481        let b = self.state[operands.src2].get_f64x2();
5482        let mut c = [0; 2];
5483        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5484            *c = if a != b { u64::MAX } else { 0 };
5485        }
5486        self.state[operands.dst].set_u64x2(c);
5487        ControlFlow::Continue(())
5488    }
5489
5490    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5491    fn vltf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5492        let a = self.state[operands.src1].get_f64x2();
5493        let b = self.state[operands.src2].get_f64x2();
5494        let mut c = [0; 2];
5495        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5496            *c = if a < b { u64::MAX } else { 0 };
5497        }
5498        self.state[operands.dst].set_u64x2(c);
5499        ControlFlow::Continue(())
5500    }
5501
5502    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5503    fn vlteqf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5504        let a = self.state[operands.src1].get_f64x2();
5505        let b = self.state[operands.src2].get_f64x2();
5506        let mut c = [0; 2];
5507        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5508            *c = if a <= b { u64::MAX } else { 0 };
5509        }
5510        self.state[operands.dst].set_u64x2(c);
5511        ControlFlow::Continue(())
5512    }
5513
5514    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5515    fn vfma32x4(&mut self, dst: VReg, a: VReg, b: VReg, c: VReg) -> ControlFlow<Done> {
5516        let mut a = self.state[a].get_f32x4();
5517        let b = self.state[b].get_f32x4();
5518        let c = self.state[c].get_f32x4();
5519        for ((a, b), c) in a.iter_mut().zip(b).zip(c) {
5520            *a = a.wasm_mul_add(b, c);
5521        }
5522        self.state[dst].set_f32x4(a);
5523        ControlFlow::Continue(())
5524    }
5525
5526    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5527    fn vfma64x2(&mut self, dst: VReg, a: VReg, b: VReg, c: VReg) -> ControlFlow<Done> {
5528        let mut a = self.state[a].get_f64x2();
5529        let b = self.state[b].get_f64x2();
5530        let c = self.state[c].get_f64x2();
5531        for ((a, b), c) in a.iter_mut().zip(b).zip(c) {
5532            *a = a.wasm_mul_add(b, c);
5533        }
5534        self.state[dst].set_f64x2(a);
5535        ControlFlow::Continue(())
5536    }
5537
5538    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5539    fn vselect(
5540        &mut self,
5541        dst: VReg,
5542        cond: XReg,
5543        if_nonzero: VReg,
5544        if_zero: VReg,
5545    ) -> ControlFlow<Done> {
5546        let result = if self.state[cond].get_u32() != 0 {
5547            self.state[if_nonzero]
5548        } else {
5549            self.state[if_zero]
5550        };
5551        self.state[dst] = result;
5552        ControlFlow::Continue(())
5553    }
5554
5555    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5556    fn xadd128(
5557        &mut self,
5558        dst_lo: XReg,
5559        dst_hi: XReg,
5560        lhs_lo: XReg,
5561        lhs_hi: XReg,
5562        rhs_lo: XReg,
5563        rhs_hi: XReg,
5564    ) -> ControlFlow<Done> {
5565        let lhs = self.get_i128(lhs_lo, lhs_hi);
5566        let rhs = self.get_i128(rhs_lo, rhs_hi);
5567        let result = lhs.wrapping_add(rhs);
5568        self.set_i128(dst_lo, dst_hi, result);
5569        ControlFlow::Continue(())
5570    }
5571
5572    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5573    fn xsub128(
5574        &mut self,
5575        dst_lo: XReg,
5576        dst_hi: XReg,
5577        lhs_lo: XReg,
5578        lhs_hi: XReg,
5579        rhs_lo: XReg,
5580        rhs_hi: XReg,
5581    ) -> ControlFlow<Done> {
5582        let lhs = self.get_i128(lhs_lo, lhs_hi);
5583        let rhs = self.get_i128(rhs_lo, rhs_hi);
5584        let result = lhs.wrapping_sub(rhs);
5585        self.set_i128(dst_lo, dst_hi, result);
5586        ControlFlow::Continue(())
5587    }
5588
5589    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5590    fn xwidemul64_s(
5591        &mut self,
5592        dst_lo: XReg,
5593        dst_hi: XReg,
5594        lhs: XReg,
5595        rhs: XReg,
5596    ) -> ControlFlow<Done> {
5597        let lhs = self.state[lhs].get_i64();
5598        let rhs = self.state[rhs].get_i64();
5599        let result = i128::from(lhs).wrapping_mul(i128::from(rhs));
5600        self.set_i128(dst_lo, dst_hi, result);
5601        ControlFlow::Continue(())
5602    }
5603
5604    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5605    fn xwidemul64_u(
5606        &mut self,
5607        dst_lo: XReg,
5608        dst_hi: XReg,
5609        lhs: XReg,
5610        rhs: XReg,
5611    ) -> ControlFlow<Done> {
5612        let lhs = self.state[lhs].get_u64();
5613        let rhs = self.state[rhs].get_u64();
5614        let result = u128::from(lhs).wrapping_mul(u128::from(rhs));
5615        self.set_i128(dst_lo, dst_hi, result as i128);
5616        ControlFlow::Continue(())
5617    }
5618}