pulley_interpreter/
interp.rs

1//! Interpretation of pulley bytecode.
2
3use crate::decode::*;
4use crate::encode::Encode;
5use crate::imms::*;
6use crate::profile::{ExecutingPc, ExecutingPcRef};
7use crate::regs::*;
8use alloc::string::ToString;
9use alloc::vec::Vec;
10use core::fmt;
11use core::mem;
12use core::ops::ControlFlow;
13use core::ops::{Index, IndexMut};
14use core::ptr::NonNull;
15use pulley_macros::interp_disable_if_cfg;
16use wasmtime_math::{WasmFloat, f32_cvt_to_int_bounds, f64_cvt_to_int_bounds};
17
18mod debug;
19#[cfg(all(not(pulley_tail_calls), not(pulley_assume_llvm_makes_tail_calls)))]
20mod match_loop;
21#[cfg(any(pulley_tail_calls, pulley_assume_llvm_makes_tail_calls))]
22mod tail_loop;
23
24const DEFAULT_STACK_SIZE: usize = 1 << 20; // 1 MiB
25
26/// A virtual machine for interpreting Pulley bytecode.
27pub struct Vm {
28    state: MachineState,
29    executing_pc: ExecutingPc,
30}
31
32impl Default for Vm {
33    fn default() -> Self {
34        Vm::new()
35    }
36}
37
38impl Vm {
39    /// Create a new virtual machine with the default stack size.
40    pub fn new() -> Self {
41        Self::with_stack(DEFAULT_STACK_SIZE)
42    }
43
44    /// Create a new virtual machine with the given stack.
45    pub fn with_stack(stack_size: usize) -> Self {
46        Self {
47            state: MachineState::with_stack(stack_size),
48            executing_pc: ExecutingPc::default(),
49        }
50    }
51
52    /// Get a shared reference to this VM's machine state.
53    pub fn state(&self) -> &MachineState {
54        &self.state
55    }
56
57    /// Get an exclusive reference to this VM's machine state.
58    pub fn state_mut(&mut self) -> &mut MachineState {
59        &mut self.state
60    }
61
62    /// Call a bytecode function.
63    ///
64    /// The given `func` must point to the beginning of a valid Pulley bytecode
65    /// function.
66    ///
67    /// The given `args` must match the number and type of arguments that
68    /// function expects.
69    ///
70    /// The given `rets` must match the function's actual return types.
71    ///
72    /// Returns either the resulting values, or the PC at which a trap was
73    /// raised.
74    pub unsafe fn call<'a, T>(
75        &'a mut self,
76        func: NonNull<u8>,
77        args: &[Val],
78        rets: T,
79    ) -> DoneReason<impl Iterator<Item = Val> + use<'a, T>>
80    where
81        T: IntoIterator<Item = RegType> + 'a,
82    {
83        unsafe {
84            let lr = self.call_start(args);
85
86            match self.call_run(func) {
87                DoneReason::ReturnToHost(()) => DoneReason::ReturnToHost(self.call_end(lr, rets)),
88                DoneReason::Trap { pc, kind } => DoneReason::Trap { pc, kind },
89                DoneReason::CallIndirectHost { id, resume } => {
90                    DoneReason::CallIndirectHost { id, resume }
91                }
92            }
93        }
94    }
95
96    /// Peforms the initial part of [`Vm::call`] in setting up the `args`
97    /// provided in registers according to Pulley's ABI.
98    ///
99    /// # Return
100    ///
101    /// Returns the old `lr` register value. The current `lr` value is replaced
102    /// with a sentinel that triggers a return to the host when returned-to.
103    ///
104    /// # Unsafety
105    ///
106    /// All the same unsafety as `call` and additiionally, you must
107    /// invoke `call_run` and then `call_end` after calling `call_start`.
108    /// If you don't want to wrangle these invocations, use `call` instead
109    /// of `call_{start,run,end}`.
110    pub unsafe fn call_start<'a>(&'a mut self, args: &[Val]) -> *mut u8 {
111        // NB: make sure this method stays in sync with
112        // `PulleyMachineDeps::compute_arg_locs`!
113
114        let mut x_args = (0..16).map(|x| unsafe { XReg::new_unchecked(x) });
115        let mut f_args = (0..16).map(|f| unsafe { FReg::new_unchecked(f) });
116        #[cfg(not(pulley_disable_interp_simd))]
117        let mut v_args = (0..16).map(|v| unsafe { VReg::new_unchecked(v) });
118
119        for arg in args {
120            match arg {
121                Val::XReg(val) => match x_args.next() {
122                    Some(reg) => self.state[reg] = *val,
123                    None => todo!("stack slots"),
124                },
125                Val::FReg(val) => match f_args.next() {
126                    Some(reg) => self.state[reg] = *val,
127                    None => todo!("stack slots"),
128                },
129                #[cfg(not(pulley_disable_interp_simd))]
130                Val::VReg(val) => match v_args.next() {
131                    Some(reg) => self.state[reg] = *val,
132                    None => todo!("stack slots"),
133                },
134            }
135        }
136
137        mem::replace(&mut self.state.lr, HOST_RETURN_ADDR)
138    }
139
140    /// Peforms the internal part of [`Vm::call`] where bytecode is actually
141    /// executed.
142    ///
143    /// # Unsafety
144    ///
145    /// In addition to all the invariants documented for `call`, you
146    /// may only invoke `call_run` after invoking `call_start` to
147    /// initialize this call's arguments.
148    pub unsafe fn call_run(&mut self, pc: NonNull<u8>) -> DoneReason<()> {
149        self.state.debug_assert_done_reason_none();
150        let interpreter = Interpreter {
151            state: &mut self.state,
152            pc: unsafe { UnsafeBytecodeStream::new(pc) },
153            executing_pc: self.executing_pc.as_ref(),
154        };
155        let done = interpreter.run();
156        self.state.done_decode(done)
157    }
158
159    /// Peforms the tail end of [`Vm::call`] by returning the values as
160    /// determined by `rets` according to Pulley's ABI.
161    ///
162    /// The `old_ret` value should have been provided from `call_start`
163    /// previously.
164    ///
165    /// # Unsafety
166    ///
167    /// In addition to the invariants documented for `call`, this may
168    /// only be called after `call_run`.
169    pub unsafe fn call_end<'a>(
170        &'a mut self,
171        old_ret: *mut u8,
172        rets: impl IntoIterator<Item = RegType> + 'a,
173    ) -> impl Iterator<Item = Val> + 'a {
174        self.state.lr = old_ret;
175        // NB: make sure this method stays in sync with
176        // `PulleyMachineDeps::compute_arg_locs`!
177
178        let mut x_rets = (0..15).map(|x| unsafe { XReg::new_unchecked(x) });
179        let mut f_rets = (0..16).map(|f| unsafe { FReg::new_unchecked(f) });
180        #[cfg(not(pulley_disable_interp_simd))]
181        let mut v_rets = (0..16).map(|v| unsafe { VReg::new_unchecked(v) });
182
183        rets.into_iter().map(move |ty| match ty {
184            RegType::XReg => match x_rets.next() {
185                Some(reg) => Val::XReg(self.state[reg]),
186                None => todo!("stack slots"),
187            },
188            RegType::FReg => match f_rets.next() {
189                Some(reg) => Val::FReg(self.state[reg]),
190                None => todo!("stack slots"),
191            },
192            #[cfg(not(pulley_disable_interp_simd))]
193            RegType::VReg => match v_rets.next() {
194                Some(reg) => Val::VReg(self.state[reg]),
195                None => todo!("stack slots"),
196            },
197            #[cfg(pulley_disable_interp_simd)]
198            RegType::VReg => panic!("simd support disabled at compile time"),
199        })
200    }
201
202    /// Returns the current `fp` register value.
203    pub fn fp(&self) -> *mut u8 {
204        self.state.fp
205    }
206
207    /// Returns the current `lr` register value.
208    pub fn lr(&self) -> *mut u8 {
209        self.state.lr
210    }
211
212    /// Sets the current `fp` register value.
213    pub unsafe fn set_fp(&mut self, fp: *mut u8) {
214        self.state.fp = fp;
215    }
216
217    /// Sets the current `lr` register value.
218    pub unsafe fn set_lr(&mut self, lr: *mut u8) {
219        self.state.lr = lr;
220    }
221
222    /// Gets a handle to the currently executing program counter for this
223    /// interpreter which can be read from other threads.
224    //
225    // Note that despite this field still existing with `not(feature =
226    // "profile")` it's hidden from the public API in that scenario as it has no
227    // methods anyway.
228    #[cfg(feature = "profile")]
229    pub fn executing_pc(&self) -> &ExecutingPc {
230        &self.executing_pc
231    }
232}
233
234impl Drop for Vm {
235    fn drop(&mut self) {
236        self.executing_pc.set_done();
237    }
238}
239
240/// The type of a register in the Pulley machine state.
241#[derive(Clone, Copy, Debug)]
242pub enum RegType {
243    /// An `x` register: integers.
244    XReg,
245
246    /// An `f` register: floats.
247    FReg,
248
249    /// A `v` register: vectors.
250    VReg,
251}
252
253/// A value that can be stored in a register.
254#[derive(Clone, Copy, Debug)]
255pub enum Val {
256    /// An `x` register value: integers.
257    XReg(XRegVal),
258
259    /// An `f` register value: floats.
260    FReg(FRegVal),
261
262    /// A `v` register value: vectors.
263    #[cfg(not(pulley_disable_interp_simd))]
264    VReg(VRegVal),
265}
266
267impl fmt::LowerHex for Val {
268    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
269        match self {
270            Val::XReg(v) => fmt::LowerHex::fmt(v, f),
271            Val::FReg(v) => fmt::LowerHex::fmt(v, f),
272            #[cfg(not(pulley_disable_interp_simd))]
273            Val::VReg(v) => fmt::LowerHex::fmt(v, f),
274        }
275    }
276}
277
278impl From<XRegVal> for Val {
279    fn from(value: XRegVal) -> Self {
280        Val::XReg(value)
281    }
282}
283
284impl From<u64> for Val {
285    fn from(value: u64) -> Self {
286        XRegVal::new_u64(value).into()
287    }
288}
289
290impl From<u32> for Val {
291    fn from(value: u32) -> Self {
292        XRegVal::new_u32(value).into()
293    }
294}
295
296impl From<i64> for Val {
297    fn from(value: i64) -> Self {
298        XRegVal::new_i64(value).into()
299    }
300}
301
302impl From<i32> for Val {
303    fn from(value: i32) -> Self {
304        XRegVal::new_i32(value).into()
305    }
306}
307
308impl<T> From<*mut T> for Val {
309    fn from(value: *mut T) -> Self {
310        XRegVal::new_ptr(value).into()
311    }
312}
313
314impl From<FRegVal> for Val {
315    fn from(value: FRegVal) -> Self {
316        Val::FReg(value)
317    }
318}
319
320impl From<f64> for Val {
321    fn from(value: f64) -> Self {
322        FRegVal::new_f64(value).into()
323    }
324}
325
326impl From<f32> for Val {
327    fn from(value: f32) -> Self {
328        FRegVal::new_f32(value).into()
329    }
330}
331
332#[cfg(not(pulley_disable_interp_simd))]
333impl From<VRegVal> for Val {
334    fn from(value: VRegVal) -> Self {
335        Val::VReg(value)
336    }
337}
338
339/// An `x` register value: integers.
340#[derive(Copy, Clone)]
341pub struct XRegVal(XRegUnion);
342
343impl PartialEq for XRegVal {
344    fn eq(&self, other: &Self) -> bool {
345        self.get_u64() == other.get_u64()
346    }
347}
348
349impl Eq for XRegVal {}
350
351impl fmt::Debug for XRegVal {
352    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
353        f.debug_struct("XRegVal")
354            .field("as_u64", &self.get_u64())
355            .finish()
356    }
357}
358
359impl fmt::LowerHex for XRegVal {
360    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
361        fmt::LowerHex::fmt(&self.get_u64(), f)
362    }
363}
364
365/// Contents of an "x" register, or a general-purpose register.
366///
367/// This is represented as a Rust `union` to make it easier to access typed
368/// views of this, notably the `ptr` field which enables preserving a bit of
369/// provenance for Rust for values stored as a pointer and read as a pointer.
370///
371/// Note that the actual in-memory representation of this value is handled
372/// carefully at this time. Pulley bytecode exposes the ability to store a
373/// 32-bit result into a register and then read the 64-bit contents of the
374/// register. This leaves us with the question of what to do with the upper bits
375/// of the register when the 32-bit result is generated. Possibilities for
376/// handling this are:
377///
378/// 1. Do nothing, just store the 32-bit value. The problem with this approach
379///    means that the "upper bits" are now endianness-dependent. That means that
380///    the state of the register is now platform-dependent.
381/// 2. Sign or zero-extend. This restores platform-independent behavior but
382///    requires an extra store on 32-bit platforms because they can probably
383///    only store 32-bits at a time.
384/// 3. Always store the values in this union as little-endian. This means that
385///    big-endian platforms have to do a byte-swap but otherwise it has
386///    platform-independent behavior.
387///
388/// This union chooses route (3) at this time where the values here are always
389/// stored in little-endian form (even the `ptr` field). That guarantees
390/// cross-platform behavior while also minimizing the amount of data stored on
391/// writes.
392///
393/// In the future we may wish to benchmark this and possibly change this.
394/// Technically Cranelift-generated bytecode should never rely on the upper bits
395/// of a register if it didn't previously write them so this in theory doesn't
396/// actually matter for Cranelift or wasm semantics. The only cost right now is
397/// to big-endian platforms though and it's not certain how crucial performance
398/// will be there.
399///
400/// One final note is that this notably contrasts with native CPUs where
401/// native ISAs like RISC-V specifically define the entire register on every
402/// instruction, even if only the low half contains a significant result. Pulley
403/// is unlikely to become out-of-order within the CPU itself as it's interpreted
404/// meaning that severing data-dependencies with previous operations is
405/// hypothesized to not be too important. If this is ever a problem though it
406/// could increase the likelihood we go for route (2) above instead (or maybe
407/// even (1)).
408#[derive(Copy, Clone)]
409union XRegUnion {
410    i32: i32,
411    u32: u32,
412    i64: i64,
413    u64: u64,
414
415    // Note that this is intentionally `usize` and not an actual pointer like
416    // `*mut u8`. The reason for this is that provenance is required in Rust for
417    // pointers but Cranelift has no pointer type and thus no concept of
418    // provenance. That means that at-rest it's not known whether the value has
419    // provenance or not and basically means that Pulley is required to use
420    // "permissive provenance" in Rust as opposed to strict provenance.
421    //
422    // That's more-or-less a long-winded way of saying that storage of a pointer
423    // in this value is done with `.expose_provenance()` and reading a pointer
424    // uses `with_exposed_provenance_mut(..)`.
425    ptr: usize,
426}
427
428impl Default for XRegVal {
429    fn default() -> Self {
430        Self(unsafe { mem::zeroed() })
431    }
432}
433
434#[expect(missing_docs, reason = "self-describing methods")]
435impl XRegVal {
436    pub fn new_i32(x: i32) -> Self {
437        let mut val = XRegVal::default();
438        val.set_i32(x);
439        val
440    }
441
442    pub fn new_u32(x: u32) -> Self {
443        let mut val = XRegVal::default();
444        val.set_u32(x);
445        val
446    }
447
448    pub fn new_i64(x: i64) -> Self {
449        let mut val = XRegVal::default();
450        val.set_i64(x);
451        val
452    }
453
454    pub fn new_u64(x: u64) -> Self {
455        let mut val = XRegVal::default();
456        val.set_u64(x);
457        val
458    }
459
460    pub fn new_ptr<T>(ptr: *mut T) -> Self {
461        let mut val = XRegVal::default();
462        val.set_ptr(ptr);
463        val
464    }
465
466    pub fn get_i32(&self) -> i32 {
467        let x = unsafe { self.0.i32 };
468        i32::from_le(x)
469    }
470
471    pub fn get_u32(&self) -> u32 {
472        let x = unsafe { self.0.u32 };
473        u32::from_le(x)
474    }
475
476    pub fn get_i64(&self) -> i64 {
477        let x = unsafe { self.0.i64 };
478        i64::from_le(x)
479    }
480
481    pub fn get_u64(&self) -> u64 {
482        let x = unsafe { self.0.u64 };
483        u64::from_le(x)
484    }
485
486    pub fn get_ptr<T>(&self) -> *mut T {
487        let ptr = unsafe { self.0.ptr };
488        core::ptr::with_exposed_provenance_mut(usize::from_le(ptr))
489    }
490
491    pub fn set_i32(&mut self, x: i32) {
492        self.0.i32 = x.to_le();
493    }
494
495    pub fn set_u32(&mut self, x: u32) {
496        self.0.u32 = x.to_le();
497    }
498
499    pub fn set_i64(&mut self, x: i64) {
500        self.0.i64 = x.to_le();
501    }
502
503    pub fn set_u64(&mut self, x: u64) {
504        self.0.u64 = x.to_le();
505    }
506
507    pub fn set_ptr<T>(&mut self, ptr: *mut T) {
508        self.0.ptr = ptr.expose_provenance().to_le();
509    }
510}
511
512/// An `f` register value: floats.
513#[derive(Copy, Clone)]
514pub struct FRegVal(FRegUnion);
515
516impl fmt::Debug for FRegVal {
517    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
518        f.debug_struct("FRegVal")
519            .field("as_f32", &self.get_f32())
520            .field("as_f64", &self.get_f64())
521            .finish()
522    }
523}
524
525impl fmt::LowerHex for FRegVal {
526    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
527        fmt::LowerHex::fmt(&self.get_f64().to_bits(), f)
528    }
529}
530
531// NB: like `XRegUnion` values here are always little-endian, see the
532// documentation above for more details.
533#[derive(Copy, Clone)]
534union FRegUnion {
535    f32: u32,
536    f64: u64,
537}
538
539impl Default for FRegVal {
540    fn default() -> Self {
541        Self(unsafe { mem::zeroed() })
542    }
543}
544
545#[expect(missing_docs, reason = "self-describing methods")]
546impl FRegVal {
547    pub fn new_f32(f: f32) -> Self {
548        let mut val = Self::default();
549        val.set_f32(f);
550        val
551    }
552
553    pub fn new_f64(f: f64) -> Self {
554        let mut val = Self::default();
555        val.set_f64(f);
556        val
557    }
558
559    pub fn get_f32(&self) -> f32 {
560        let val = unsafe { self.0.f32 };
561        f32::from_le_bytes(val.to_ne_bytes())
562    }
563
564    pub fn get_f64(&self) -> f64 {
565        let val = unsafe { self.0.f64 };
566        f64::from_le_bytes(val.to_ne_bytes())
567    }
568
569    pub fn set_f32(&mut self, val: f32) {
570        self.0.f32 = u32::from_ne_bytes(val.to_le_bytes());
571    }
572
573    pub fn set_f64(&mut self, val: f64) {
574        self.0.f64 = u64::from_ne_bytes(val.to_le_bytes());
575    }
576}
577
578/// A `v` register value: vectors.
579#[derive(Copy, Clone)]
580#[cfg(not(pulley_disable_interp_simd))]
581pub struct VRegVal(VRegUnion);
582
583#[cfg(not(pulley_disable_interp_simd))]
584impl fmt::Debug for VRegVal {
585    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
586        f.debug_struct("VRegVal")
587            .field("as_u128", &unsafe { self.0.u128 })
588            .finish()
589    }
590}
591
592#[cfg(not(pulley_disable_interp_simd))]
593impl fmt::LowerHex for VRegVal {
594    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
595        fmt::LowerHex::fmt(unsafe { &self.0.u128 }, f)
596    }
597}
598
599/// 128-bit vector registers.
600///
601/// This register is always stored in little-endian order and has different
602/// constraints than `XRegVal` and `FRegVal` above. Notably all fields of this
603/// union are the same width so all bits are always defined. Note that
604/// little-endian is required though so bitcasts between different shapes of
605/// vectors works. This union cannot be stored in big-endian.
606#[derive(Copy, Clone)]
607#[repr(align(16))]
608#[cfg(not(pulley_disable_interp_simd))]
609union VRegUnion {
610    u128: u128,
611    i8x16: [i8; 16],
612    i16x8: [i16; 8],
613    i32x4: [i32; 4],
614    i64x2: [i64; 2],
615    u8x16: [u8; 16],
616    u16x8: [u16; 8],
617    u32x4: [u32; 4],
618    u64x2: [u64; 2],
619    // Note that these are `u32` and `u64`, not f32/f64. That's only because
620    // f32/f64 don't have `.to_le()` and `::from_le()` so need to go through the
621    // bits anyway.
622    f32x4: [u32; 4],
623    f64x2: [u64; 2],
624}
625
626#[cfg(not(pulley_disable_interp_simd))]
627impl Default for VRegVal {
628    fn default() -> Self {
629        Self(unsafe { mem::zeroed() })
630    }
631}
632
633#[expect(missing_docs, reason = "self-describing methods")]
634#[cfg(not(pulley_disable_interp_simd))]
635impl VRegVal {
636    pub fn new_u128(i: u128) -> Self {
637        let mut val = Self::default();
638        val.set_u128(i);
639        val
640    }
641
642    pub fn get_u128(&self) -> u128 {
643        let val = unsafe { self.0.u128 };
644        u128::from_le(val)
645    }
646
647    pub fn set_u128(&mut self, val: u128) {
648        self.0.u128 = val.to_le();
649    }
650
651    fn get_i8x16(&self) -> [i8; 16] {
652        let val = unsafe { self.0.i8x16 };
653        val.map(|e| i8::from_le(e))
654    }
655
656    fn set_i8x16(&mut self, val: [i8; 16]) {
657        self.0.i8x16 = val.map(|e| e.to_le());
658    }
659
660    fn get_u8x16(&self) -> [u8; 16] {
661        let val = unsafe { self.0.u8x16 };
662        val.map(|e| u8::from_le(e))
663    }
664
665    fn set_u8x16(&mut self, val: [u8; 16]) {
666        self.0.u8x16 = val.map(|e| e.to_le());
667    }
668
669    fn get_i16x8(&self) -> [i16; 8] {
670        let val = unsafe { self.0.i16x8 };
671        val.map(|e| i16::from_le(e))
672    }
673
674    fn set_i16x8(&mut self, val: [i16; 8]) {
675        self.0.i16x8 = val.map(|e| e.to_le());
676    }
677
678    fn get_u16x8(&self) -> [u16; 8] {
679        let val = unsafe { self.0.u16x8 };
680        val.map(|e| u16::from_le(e))
681    }
682
683    fn set_u16x8(&mut self, val: [u16; 8]) {
684        self.0.u16x8 = val.map(|e| e.to_le());
685    }
686
687    fn get_i32x4(&self) -> [i32; 4] {
688        let val = unsafe { self.0.i32x4 };
689        val.map(|e| i32::from_le(e))
690    }
691
692    fn set_i32x4(&mut self, val: [i32; 4]) {
693        self.0.i32x4 = val.map(|e| e.to_le());
694    }
695
696    fn get_u32x4(&self) -> [u32; 4] {
697        let val = unsafe { self.0.u32x4 };
698        val.map(|e| u32::from_le(e))
699    }
700
701    fn set_u32x4(&mut self, val: [u32; 4]) {
702        self.0.u32x4 = val.map(|e| e.to_le());
703    }
704
705    fn get_i64x2(&self) -> [i64; 2] {
706        let val = unsafe { self.0.i64x2 };
707        val.map(|e| i64::from_le(e))
708    }
709
710    fn set_i64x2(&mut self, val: [i64; 2]) {
711        self.0.i64x2 = val.map(|e| e.to_le());
712    }
713
714    fn get_u64x2(&self) -> [u64; 2] {
715        let val = unsafe { self.0.u64x2 };
716        val.map(|e| u64::from_le(e))
717    }
718
719    fn set_u64x2(&mut self, val: [u64; 2]) {
720        self.0.u64x2 = val.map(|e| e.to_le());
721    }
722
723    fn get_f64x2(&self) -> [f64; 2] {
724        let val = unsafe { self.0.f64x2 };
725        val.map(|e| f64::from_bits(u64::from_le(e)))
726    }
727
728    fn set_f64x2(&mut self, val: [f64; 2]) {
729        self.0.f64x2 = val.map(|e| e.to_bits().to_le());
730    }
731
732    fn get_f32x4(&self) -> [f32; 4] {
733        let val = unsafe { self.0.f32x4 };
734        val.map(|e| f32::from_bits(u32::from_le(e)))
735    }
736
737    fn set_f32x4(&mut self, val: [f32; 4]) {
738        self.0.f32x4 = val.map(|e| e.to_bits().to_le());
739    }
740}
741
742/// The machine state for a Pulley virtual machine: the various registers and
743/// stack.
744pub struct MachineState {
745    x_regs: [XRegVal; XReg::RANGE.end as usize],
746    f_regs: [FRegVal; FReg::RANGE.end as usize],
747    #[cfg(not(pulley_disable_interp_simd))]
748    v_regs: [VRegVal; VReg::RANGE.end as usize],
749    fp: *mut u8,
750    lr: *mut u8,
751    stack: Stack,
752    done_reason: Option<DoneReason<()>>,
753}
754
755unsafe impl Send for MachineState {}
756unsafe impl Sync for MachineState {}
757
758/// Helper structure to store the state of the Pulley stack.
759///
760/// The Pulley stack notably needs to be a 16-byte aligned allocation on the
761/// host to ensure that addresses handed out are indeed 16-byte aligned. This is
762/// done with a custom `Vec<T>` internally where `T` has size and align of 16.
763/// This is manually done with a helper `Align16` type below.
764struct Stack {
765    storage: Vec<Align16>,
766}
767
768/// Helper type used with `Stack` above.
769#[derive(Copy, Clone)]
770#[repr(align(16))]
771struct Align16 {
772    // Just here to give the structure a size of 16. The alignment is always 16
773    // regardless of what the host platform's alignment of u128 is.
774    _unused: u128,
775}
776
777impl Stack {
778    /// Creates a new stack which will have a byte size of at least `size`.
779    ///
780    /// The allocated stack might be slightly larger due to rounding necessary.
781    fn new(size: usize) -> Stack {
782        Stack {
783            // Round up `size` to the nearest multiple of 16. Note that the
784            // stack is also allocated here but not initialized, and that's
785            // intentional as pulley bytecode should always initialize the stack
786            // before use.
787            storage: Vec::with_capacity((size + 15) / 16),
788        }
789    }
790
791    /// Returns a pointer to the top of the stack (the highest address).
792    ///
793    /// Note that the returned pointer has provenance for the entire stack
794    /// allocation, however, not just the top.
795    fn top(&mut self) -> *mut u8 {
796        let len = self.len();
797        unsafe { self.base().add(len) }
798    }
799
800    /// Returns a pointer to the base of the stack (the lowest address).
801    ///
802    /// Note that the returned pointer has provenance for the entire stack
803    /// allocation, however, not just the top.
804    fn base(&mut self) -> *mut u8 {
805        self.storage.as_mut_ptr().cast::<u8>()
806    }
807
808    /// Returns the length, in bytes, of this stack allocation.
809    fn len(&self) -> usize {
810        self.storage.capacity() * mem::size_of::<Align16>()
811    }
812}
813
814impl fmt::Debug for MachineState {
815    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
816        let MachineState {
817            x_regs,
818            f_regs,
819            #[cfg(not(pulley_disable_interp_simd))]
820            v_regs,
821            stack: _,
822            done_reason: _,
823            fp: _,
824            lr: _,
825        } = self;
826
827        struct RegMap<'a, R>(&'a [R], fn(u8) -> alloc::string::String);
828
829        impl<R: fmt::Debug> fmt::Debug for RegMap<'_, R> {
830            fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
831                let mut f = f.debug_map();
832                for (i, r) in self.0.iter().enumerate() {
833                    f.entry(&(self.1)(i as u8), r);
834                }
835                f.finish()
836            }
837        }
838
839        let mut f = f.debug_struct("MachineState");
840
841        f.field(
842            "x_regs",
843            &RegMap(x_regs, |i| XReg::new(i).unwrap().to_string()),
844        )
845        .field(
846            "f_regs",
847            &RegMap(f_regs, |i| FReg::new(i).unwrap().to_string()),
848        );
849        #[cfg(not(pulley_disable_interp_simd))]
850        f.field(
851            "v_regs",
852            &RegMap(v_regs, |i| VReg::new(i).unwrap().to_string()),
853        );
854        f.finish_non_exhaustive()
855    }
856}
857
858macro_rules! index_reg {
859    ($reg_ty:ty,$value_ty:ty,$field:ident) => {
860        impl Index<$reg_ty> for Vm {
861            type Output = $value_ty;
862
863            fn index(&self, reg: $reg_ty) -> &Self::Output {
864                &self.state[reg]
865            }
866        }
867
868        impl IndexMut<$reg_ty> for Vm {
869            fn index_mut(&mut self, reg: $reg_ty) -> &mut Self::Output {
870                &mut self.state[reg]
871            }
872        }
873
874        impl Index<$reg_ty> for MachineState {
875            type Output = $value_ty;
876
877            fn index(&self, reg: $reg_ty) -> &Self::Output {
878                &self.$field[reg.index()]
879            }
880        }
881
882        impl IndexMut<$reg_ty> for MachineState {
883            fn index_mut(&mut self, reg: $reg_ty) -> &mut Self::Output {
884                &mut self.$field[reg.index()]
885            }
886        }
887    };
888}
889
890index_reg!(XReg, XRegVal, x_regs);
891index_reg!(FReg, FRegVal, f_regs);
892#[cfg(not(pulley_disable_interp_simd))]
893index_reg!(VReg, VRegVal, v_regs);
894
895/// Sentinel return address that signals the end of the call stack.
896const HOST_RETURN_ADDR: *mut u8 = usize::MAX as *mut u8;
897
898impl MachineState {
899    fn with_stack(stack_size: usize) -> Self {
900        let mut state = Self {
901            x_regs: [Default::default(); XReg::RANGE.end as usize],
902            f_regs: Default::default(),
903            #[cfg(not(pulley_disable_interp_simd))]
904            v_regs: Default::default(),
905            stack: Stack::new(stack_size),
906            done_reason: None,
907            fp: HOST_RETURN_ADDR,
908            lr: HOST_RETURN_ADDR,
909        };
910
911        let sp = state.stack.top();
912        state[XReg::sp] = XRegVal::new_ptr(sp);
913
914        state
915    }
916}
917
918/// Inner private module to prevent creation of the `Done` structure outside of
919/// this module.
920mod done {
921    use super::{Encode, Interpreter, MachineState};
922    use core::ops::ControlFlow;
923    use core::ptr::NonNull;
924
925    /// Zero-sized sentinel indicating that pulley execution has halted.
926    ///
927    /// The reason for halting is stored in `MachineState`.
928    #[derive(Copy, Clone, Debug, PartialEq, Eq)]
929    pub struct Done {
930        _priv: (),
931    }
932
933    /// Reason that the pulley interpreter has ceased execution.
934    pub enum DoneReason<T> {
935        /// A trap happened at this bytecode instruction.
936        Trap {
937            /// Which instruction is raising this trap.
938            pc: NonNull<u8>,
939            /// The kind of trap being raised, if known.
940            kind: Option<TrapKind>,
941        },
942        /// The `call_indirect_host` instruction was executed.
943        CallIndirectHost {
944            /// The payload of `call_indirect_host`.
945            id: u8,
946            /// Where to resume execution after the host has finished.
947            resume: NonNull<u8>,
948        },
949        /// Pulley has finished and the provided value is being returned.
950        ReturnToHost(T),
951    }
952
953    /// Stored within `DoneReason::Trap`.
954    #[expect(missing_docs, reason = "self-describing variants")]
955    pub enum TrapKind {
956        DivideByZero,
957        IntegerOverflow,
958        BadConversionToInteger,
959        MemoryOutOfBounds,
960        DisabledOpcode,
961        StackOverflow,
962    }
963
964    impl MachineState {
965        pub(super) fn debug_assert_done_reason_none(&mut self) {
966            debug_assert!(self.done_reason.is_none());
967        }
968
969        pub(super) fn done_decode(&mut self, Done { _priv }: Done) -> DoneReason<()> {
970            self.done_reason.take().unwrap()
971        }
972    }
973
974    impl Interpreter<'_> {
975        /// Finishes execution by recording `DoneReason::Trap`.
976        ///
977        /// This method takes an `I` generic parameter indicating which
978        /// instruction is executing this function and generating a trap. That's
979        /// used to go backwards from the current `pc` which is just beyond the
980        /// instruction to point to the instruction itself in the trap metadata
981        /// returned from the interpreter.
982        #[cold]
983        pub fn done_trap<I: Encode>(&mut self) -> ControlFlow<Done> {
984            self.done_trap_kind::<I>(None)
985        }
986
987        /// Same as `done_trap` but with an explicit `TrapKind`.
988        #[cold]
989        pub fn done_trap_kind<I: Encode>(&mut self, kind: Option<TrapKind>) -> ControlFlow<Done> {
990            let pc = self.current_pc::<I>();
991            self.state.done_reason = Some(DoneReason::Trap { pc, kind });
992            ControlFlow::Break(Done { _priv: () })
993        }
994
995        /// Finishes execution by recording `DoneReason::CallIndirectHost`.
996        #[cold]
997        pub fn done_call_indirect_host(&mut self, id: u8) -> ControlFlow<Done> {
998            self.state.done_reason = Some(DoneReason::CallIndirectHost {
999                id,
1000                resume: self.pc.as_ptr(),
1001            });
1002            ControlFlow::Break(Done { _priv: () })
1003        }
1004
1005        /// Finishes execution by recording `DoneReason::ReturnToHost`.
1006        #[cold]
1007        pub fn done_return_to_host(&mut self) -> ControlFlow<Done> {
1008            self.state.done_reason = Some(DoneReason::ReturnToHost(()));
1009            ControlFlow::Break(Done { _priv: () })
1010        }
1011    }
1012}
1013
1014use done::Done;
1015pub use done::{DoneReason, TrapKind};
1016
1017struct Interpreter<'a> {
1018    state: &'a mut MachineState,
1019    pc: UnsafeBytecodeStream,
1020    executing_pc: ExecutingPcRef<'a>,
1021}
1022
1023impl Interpreter<'_> {
1024    /// Performs a relative jump of `offset` bytes from the current instruction.
1025    ///
1026    /// This will jump from the start of the current instruction, identified by
1027    /// `I`, `offset` bytes away. Note that the `self.pc` at the start of this
1028    /// function actually points to the instruction after this one so `I` is
1029    /// necessary to go back to ourselves after which we then go `offset` away.
1030    #[inline]
1031    fn pc_rel_jump<I: Encode>(&mut self, offset: PcRelOffset) -> ControlFlow<Done> {
1032        let offset = isize::try_from(i32::from(offset)).unwrap();
1033        let my_pc = self.current_pc::<I>();
1034        self.pc = unsafe { UnsafeBytecodeStream::new(my_pc.offset(offset)) };
1035        ControlFlow::Continue(())
1036    }
1037
1038    /// Returns the PC of the current instruction where `I` is the static type
1039    /// representing the current instruction.
1040    fn current_pc<I: Encode>(&self) -> NonNull<u8> {
1041        unsafe { self.pc.offset(-isize::from(I::WIDTH)).as_ptr() }
1042    }
1043
1044    /// `sp -= size_of::<T>(); *sp = val;`
1045    ///
1046    /// Note that `I` is the instruction which is pushing data to use if a trap
1047    /// is generated.
1048    #[must_use]
1049    fn push<I: Encode, T>(&mut self, val: T) -> ControlFlow<Done> {
1050        let new_sp = self.state[XReg::sp].get_ptr::<T>().wrapping_sub(1);
1051        self.set_sp::<I>(new_sp.cast())?;
1052        unsafe {
1053            new_sp.write_unaligned(val);
1054        }
1055        ControlFlow::Continue(())
1056    }
1057
1058    /// `ret = *sp; sp -= size_of::<T>()`
1059    fn pop<T>(&mut self) -> T {
1060        let sp = self.state[XReg::sp].get_ptr::<T>();
1061        let val = unsafe { sp.read_unaligned() };
1062        self.set_sp_unchecked(sp.wrapping_add(1));
1063        val
1064    }
1065
1066    /// Sets the stack pointer to the `sp` provided.
1067    ///
1068    /// Returns a trap if this would result in stack overflow, or if `sp` is
1069    /// beneath the base pointer of `self.state.stack`.
1070    ///
1071    /// The `I` parameter here is the instruction that is setting the stack
1072    /// pointer and is used to calculate this instruction's own `pc` if this
1073    /// instruction traps.
1074    #[must_use]
1075    fn set_sp<I: Encode>(&mut self, sp: *mut u8) -> ControlFlow<Done> {
1076        let sp_raw = sp as usize;
1077        let base_raw = self.state.stack.base() as usize;
1078        if sp_raw < base_raw {
1079            return self.done_trap_kind::<I>(Some(TrapKind::StackOverflow));
1080        }
1081        self.set_sp_unchecked(sp);
1082        ControlFlow::Continue(())
1083    }
1084
1085    /// Same as `set_sp` but does not check to see if `sp` is in-bounds. Should
1086    /// only be used with stack increment operations such as `pop`.
1087    fn set_sp_unchecked<T>(&mut self, sp: *mut T) {
1088        if cfg!(debug_assertions) {
1089            let sp_raw = sp as usize;
1090            let base = self.state.stack.base() as usize;
1091            let end = base + self.state.stack.len();
1092            assert!(base <= sp_raw && sp_raw <= end);
1093        }
1094        self.state[XReg::sp].set_ptr(sp);
1095    }
1096
1097    /// Loads a value of `T` using native-endian byte ordering from the `addr`
1098    /// specified.
1099    ///
1100    /// The `I` type parameter is the instruction issuing this load which is
1101    /// used in case of traps to calculate the trapping pc.
1102    ///
1103    /// Returns `ControlFlow::Break` if a trap happens or
1104    /// `ControlFlow::Continue` if the value was loaded successfully.
1105    ///
1106    /// # Unsafety
1107    ///
1108    /// Safety of this method relies on the safety of the original bytecode
1109    /// itself and correctly annotating both `T` and `I`.
1110    #[must_use]
1111    unsafe fn load_ne<T, I: Encode>(&mut self, addr: impl AddressingMode) -> ControlFlow<Done, T> {
1112        unsafe { addr.load_ne::<T, I>(self) }
1113    }
1114
1115    /// Stores a `val` to the `addr` specified.
1116    ///
1117    /// The `I` type parameter is the instruction issuing this store which is
1118    /// used in case of traps to calculate the trapping pc.
1119    ///
1120    /// Returns `ControlFlow::Break` if a trap happens or
1121    /// `ControlFlow::Continue` if the value was stored successfully.
1122    ///
1123    /// # Unsafety
1124    ///
1125    /// Safety of this method relies on the safety of the original bytecode
1126    /// itself and correctly annotating both `T` and `I`.
1127    #[must_use]
1128    unsafe fn store_ne<T, I: Encode>(
1129        &mut self,
1130        addr: impl AddressingMode,
1131        val: T,
1132    ) -> ControlFlow<Done> {
1133        unsafe { addr.store_ne::<T, I>(self, val) }
1134    }
1135
1136    fn check_xnn_from_f32<I: Encode>(
1137        &mut self,
1138        val: f32,
1139        (lo, hi): (f32, f32),
1140    ) -> ControlFlow<Done> {
1141        self.check_xnn_from_f64::<I>(val.into(), (lo.into(), hi.into()))
1142    }
1143
1144    fn check_xnn_from_f64<I: Encode>(
1145        &mut self,
1146        val: f64,
1147        (lo, hi): (f64, f64),
1148    ) -> ControlFlow<Done> {
1149        if val != val {
1150            return self.done_trap_kind::<I>(Some(TrapKind::BadConversionToInteger));
1151        }
1152        let val = val.wasm_trunc();
1153        if val <= lo || val >= hi {
1154            return self.done_trap_kind::<I>(Some(TrapKind::IntegerOverflow));
1155        }
1156        ControlFlow::Continue(())
1157    }
1158
1159    #[cfg(not(pulley_disable_interp_simd))]
1160    fn get_i128(&self, lo: XReg, hi: XReg) -> i128 {
1161        let lo = self.state[lo].get_u64();
1162        let hi = self.state[hi].get_i64();
1163        i128::from(lo) | (i128::from(hi) << 64)
1164    }
1165
1166    #[cfg(not(pulley_disable_interp_simd))]
1167    fn set_i128(&mut self, lo: XReg, hi: XReg, val: i128) {
1168        self.state[lo].set_u64(val as u64);
1169        self.state[hi].set_u64((val >> 64) as u64);
1170    }
1171
1172    fn record_executing_pc_for_profiling(&mut self) {
1173        // Note that this is a no-op if `feature = "profile"` is disabled.
1174        self.executing_pc.record(self.pc.as_ptr().as_ptr() as usize);
1175    }
1176}
1177
1178/// Helper trait to encompass the various addressing modes of Pulley.
1179trait AddressingMode: Sized {
1180    /// Calculates the native host address `*mut T` corresponding to this
1181    /// addressing mode.
1182    ///
1183    /// # Safety
1184    ///
1185    /// Relies on the original bytecode being safe to execute as this will
1186    /// otherwise perform unsafe byte offsets for example which requires the
1187    /// original bytecode to be correct.
1188    #[must_use]
1189    unsafe fn addr<T, I: Encode>(self, i: &mut Interpreter<'_>) -> ControlFlow<Done, *mut T>;
1190
1191    /// Loads a value of `T` from this address, using native-endian byte order.
1192    ///
1193    /// For more information see [`Interpreter::load_ne`].
1194    #[must_use]
1195    unsafe fn load_ne<T, I: Encode>(self, i: &mut Interpreter<'_>) -> ControlFlow<Done, T> {
1196        let ret = unsafe { self.addr::<T, I>(i)?.read_unaligned() };
1197        ControlFlow::Continue(ret)
1198    }
1199
1200    /// Stores a `val` to this address, using native-endian byte order.
1201    ///
1202    /// For more information see [`Interpreter::store_ne`].
1203    #[must_use]
1204    unsafe fn store_ne<T, I: Encode>(self, i: &mut Interpreter<'_>, val: T) -> ControlFlow<Done> {
1205        unsafe {
1206            self.addr::<T, I>(i)?.write_unaligned(val);
1207        }
1208        ControlFlow::Continue(())
1209    }
1210}
1211
1212impl AddressingMode for AddrO32 {
1213    unsafe fn addr<T, I: Encode>(self, i: &mut Interpreter<'_>) -> ControlFlow<Done, *mut T> {
1214        // Note that this addressing mode cannot return `ControlFlow::Break`
1215        // which is intentional. It's expected that LLVM optimizes away any
1216        // branches callers have.
1217        unsafe {
1218            ControlFlow::Continue(
1219                i.state[self.addr]
1220                    .get_ptr::<T>()
1221                    .byte_offset(self.offset as isize),
1222            )
1223        }
1224    }
1225}
1226
1227impl AddressingMode for AddrZ {
1228    unsafe fn addr<T, I: Encode>(self, i: &mut Interpreter<'_>) -> ControlFlow<Done, *mut T> {
1229        // This addressing mode defines loading/storing to the null address as
1230        // a trap, but all other addresses are allowed.
1231        let host_addr = i.state[self.addr].get_ptr::<T>();
1232        if host_addr.is_null() {
1233            i.done_trap_kind::<I>(Some(TrapKind::MemoryOutOfBounds))?;
1234            unreachable!();
1235        }
1236        unsafe {
1237            let addr = host_addr.byte_offset(self.offset as isize);
1238            ControlFlow::Continue(addr)
1239        }
1240    }
1241}
1242
1243impl AddressingMode for AddrG32 {
1244    unsafe fn addr<T, I: Encode>(self, i: &mut Interpreter<'_>) -> ControlFlow<Done, *mut T> {
1245        // Test if `bound - offset - T` is less than the wasm address to
1246        // generate a trap. It's a guarantee of this instruction that these
1247        // subtractions don't overflow.
1248        let bound = i.state[self.host_heap_bound].get_u64() as usize;
1249        let offset = usize::from(self.offset);
1250        let wasm_addr = i.state[self.wasm_addr].get_u32() as usize;
1251        if wasm_addr > bound - offset - size_of::<T>() {
1252            i.done_trap_kind::<I>(Some(TrapKind::MemoryOutOfBounds))?;
1253            unreachable!();
1254        }
1255        unsafe {
1256            let addr = i.state[self.host_heap_base]
1257                .get_ptr::<T>()
1258                .byte_add(wasm_addr)
1259                .byte_add(offset);
1260            ControlFlow::Continue(addr)
1261        }
1262    }
1263}
1264
1265impl AddressingMode for AddrG32Bne {
1266    unsafe fn addr<T, I: Encode>(self, i: &mut Interpreter<'_>) -> ControlFlow<Done, *mut T> {
1267        // Same as `AddrG32` above except that the bound is loaded from memory.
1268        let bound = unsafe {
1269            *i.state[self.host_heap_bound_addr]
1270                .get_ptr::<usize>()
1271                .byte_add(usize::from(self.host_heap_bound_offset))
1272        };
1273        let wasm_addr = i.state[self.wasm_addr].get_u32() as usize;
1274        let offset = usize::from(self.offset);
1275        if wasm_addr > bound - offset - size_of::<T>() {
1276            i.done_trap_kind::<I>(Some(TrapKind::MemoryOutOfBounds))?;
1277            unreachable!();
1278        }
1279        unsafe {
1280            let addr = i.state[self.host_heap_base]
1281                .get_ptr::<T>()
1282                .byte_add(wasm_addr)
1283                .byte_add(offset);
1284            ControlFlow::Continue(addr)
1285        }
1286    }
1287}
1288
1289#[test]
1290fn simple_push_pop() {
1291    let mut state = MachineState::with_stack(16);
1292    let pc = ExecutingPc::default();
1293    unsafe {
1294        let mut bytecode = [0; 10];
1295        let mut i = Interpreter {
1296            state: &mut state,
1297            // this isn't actually read so just manufacture a dummy one
1298            pc: UnsafeBytecodeStream::new(NonNull::new(bytecode.as_mut_ptr().offset(4)).unwrap()),
1299            executing_pc: pc.as_ref(),
1300        };
1301        assert!(i.push::<crate::Ret, _>(0_i32).is_continue());
1302        assert_eq!(i.pop::<i32>(), 0_i32);
1303        assert!(i.push::<crate::Ret, _>(1_i32).is_continue());
1304        assert!(i.push::<crate::Ret, _>(2_i32).is_continue());
1305        assert!(i.push::<crate::Ret, _>(3_i32).is_continue());
1306        assert!(i.push::<crate::Ret, _>(4_i32).is_continue());
1307        assert!(i.push::<crate::Ret, _>(5_i32).is_break());
1308        assert!(i.push::<crate::Ret, _>(6_i32).is_break());
1309        assert_eq!(i.pop::<i32>(), 4_i32);
1310        assert_eq!(i.pop::<i32>(), 3_i32);
1311        assert_eq!(i.pop::<i32>(), 2_i32);
1312        assert_eq!(i.pop::<i32>(), 1_i32);
1313    }
1314}
1315
1316macro_rules! br_if_imm {
1317    ($(
1318        fn $snake:ident(&mut self, a: XReg, b: $imm:ident, offset: PcRelOffset)
1319            = $camel:ident / $op:tt / $get:ident;
1320    )*) => {$(
1321        fn $snake(&mut self, a: XReg, b: $imm, offset: PcRelOffset) -> ControlFlow<Done> {
1322            let a = self.state[a].$get();
1323            if a $op b.into() {
1324                self.pc_rel_jump::<crate::$camel>(offset)
1325            } else {
1326                ControlFlow::Continue(())
1327            }
1328        }
1329    )*};
1330}
1331
1332impl OpVisitor for Interpreter<'_> {
1333    type BytecodeStream = UnsafeBytecodeStream;
1334    type Return = ControlFlow<Done>;
1335
1336    fn bytecode(&mut self) -> &mut UnsafeBytecodeStream {
1337        &mut self.pc
1338    }
1339
1340    fn ret(&mut self) -> ControlFlow<Done> {
1341        let lr = self.state.lr;
1342        if lr == HOST_RETURN_ADDR {
1343            self.done_return_to_host()
1344        } else {
1345            self.pc = unsafe { UnsafeBytecodeStream::new(NonNull::new_unchecked(lr)) };
1346            ControlFlow::Continue(())
1347        }
1348    }
1349
1350    fn call(&mut self, offset: PcRelOffset) -> ControlFlow<Done> {
1351        let return_addr = self.pc.as_ptr();
1352        self.state.lr = return_addr.as_ptr();
1353        self.pc_rel_jump::<crate::Call>(offset)
1354    }
1355
1356    fn call1(&mut self, arg1: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1357        let return_addr = self.pc.as_ptr();
1358        self.state.lr = return_addr.as_ptr();
1359        self.state[XReg::x0] = self.state[arg1];
1360        self.pc_rel_jump::<crate::Call1>(offset)
1361    }
1362
1363    fn call2(&mut self, arg1: XReg, arg2: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1364        let return_addr = self.pc.as_ptr();
1365        self.state.lr = return_addr.as_ptr();
1366        let (x0, x1) = (self.state[arg1], self.state[arg2]);
1367        self.state[XReg::x0] = x0;
1368        self.state[XReg::x1] = x1;
1369        self.pc_rel_jump::<crate::Call2>(offset)
1370    }
1371
1372    fn call3(
1373        &mut self,
1374        arg1: XReg,
1375        arg2: XReg,
1376        arg3: XReg,
1377        offset: PcRelOffset,
1378    ) -> ControlFlow<Done> {
1379        let return_addr = self.pc.as_ptr();
1380        self.state.lr = return_addr.as_ptr();
1381        let (x0, x1, x2) = (self.state[arg1], self.state[arg2], self.state[arg3]);
1382        self.state[XReg::x0] = x0;
1383        self.state[XReg::x1] = x1;
1384        self.state[XReg::x2] = x2;
1385        self.pc_rel_jump::<crate::Call3>(offset)
1386    }
1387
1388    fn call4(
1389        &mut self,
1390        arg1: XReg,
1391        arg2: XReg,
1392        arg3: XReg,
1393        arg4: XReg,
1394        offset: PcRelOffset,
1395    ) -> ControlFlow<Done> {
1396        let return_addr = self.pc.as_ptr();
1397        self.state.lr = return_addr.as_ptr();
1398        let (x0, x1, x2, x3) = (
1399            self.state[arg1],
1400            self.state[arg2],
1401            self.state[arg3],
1402            self.state[arg4],
1403        );
1404        self.state[XReg::x0] = x0;
1405        self.state[XReg::x1] = x1;
1406        self.state[XReg::x2] = x2;
1407        self.state[XReg::x3] = x3;
1408        self.pc_rel_jump::<crate::Call4>(offset)
1409    }
1410
1411    fn call_indirect(&mut self, dst: XReg) -> ControlFlow<Done> {
1412        let return_addr = self.pc.as_ptr();
1413        self.state.lr = return_addr.as_ptr();
1414        // SAFETY: part of the unsafe contract of the interpreter is only valid
1415        // bytecode is interpreted, so the jump destination is part of the validity
1416        // of the bytecode itself.
1417        unsafe {
1418            self.pc = UnsafeBytecodeStream::new(NonNull::new_unchecked(self.state[dst].get_ptr()));
1419        }
1420        ControlFlow::Continue(())
1421    }
1422
1423    fn jump(&mut self, offset: PcRelOffset) -> ControlFlow<Done> {
1424        self.pc_rel_jump::<crate::Jump>(offset)
1425    }
1426
1427    fn xjump(&mut self, reg: XReg) -> ControlFlow<Done> {
1428        unsafe {
1429            self.pc = UnsafeBytecodeStream::new(NonNull::new_unchecked(self.state[reg].get_ptr()));
1430        }
1431        ControlFlow::Continue(())
1432    }
1433
1434    fn br_if32(&mut self, cond: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1435        let cond = self.state[cond].get_u32();
1436        if cond != 0 {
1437            self.pc_rel_jump::<crate::BrIf>(offset)
1438        } else {
1439            ControlFlow::Continue(())
1440        }
1441    }
1442
1443    fn br_if_not32(&mut self, cond: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1444        let cond = self.state[cond].get_u32();
1445        if cond == 0 {
1446            self.pc_rel_jump::<crate::BrIfNot>(offset)
1447        } else {
1448            ControlFlow::Continue(())
1449        }
1450    }
1451
1452    fn br_if_xeq32(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1453        let a = self.state[a].get_u32();
1454        let b = self.state[b].get_u32();
1455        if a == b {
1456            self.pc_rel_jump::<crate::BrIfXeq32>(offset)
1457        } else {
1458            ControlFlow::Continue(())
1459        }
1460    }
1461
1462    fn br_if_xneq32(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1463        let a = self.state[a].get_u32();
1464        let b = self.state[b].get_u32();
1465        if a != b {
1466            self.pc_rel_jump::<crate::BrIfXneq32>(offset)
1467        } else {
1468            ControlFlow::Continue(())
1469        }
1470    }
1471
1472    fn br_if_xslt32(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1473        let a = self.state[a].get_i32();
1474        let b = self.state[b].get_i32();
1475        if a < b {
1476            self.pc_rel_jump::<crate::BrIfXslt32>(offset)
1477        } else {
1478            ControlFlow::Continue(())
1479        }
1480    }
1481
1482    fn br_if_xslteq32(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1483        let a = self.state[a].get_i32();
1484        let b = self.state[b].get_i32();
1485        if a <= b {
1486            self.pc_rel_jump::<crate::BrIfXslteq32>(offset)
1487        } else {
1488            ControlFlow::Continue(())
1489        }
1490    }
1491
1492    fn br_if_xult32(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1493        let a = self.state[a].get_u32();
1494        let b = self.state[b].get_u32();
1495        if a < b {
1496            self.pc_rel_jump::<crate::BrIfXult32>(offset)
1497        } else {
1498            ControlFlow::Continue(())
1499        }
1500    }
1501
1502    fn br_if_xulteq32(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1503        let a = self.state[a].get_u32();
1504        let b = self.state[b].get_u32();
1505        if a <= b {
1506            self.pc_rel_jump::<crate::BrIfXulteq32>(offset)
1507        } else {
1508            ControlFlow::Continue(())
1509        }
1510    }
1511
1512    fn br_if_xeq64(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1513        let a = self.state[a].get_u64();
1514        let b = self.state[b].get_u64();
1515        if a == b {
1516            self.pc_rel_jump::<crate::BrIfXeq64>(offset)
1517        } else {
1518            ControlFlow::Continue(())
1519        }
1520    }
1521
1522    fn br_if_xneq64(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1523        let a = self.state[a].get_u64();
1524        let b = self.state[b].get_u64();
1525        if a != b {
1526            self.pc_rel_jump::<crate::BrIfXneq64>(offset)
1527        } else {
1528            ControlFlow::Continue(())
1529        }
1530    }
1531
1532    fn br_if_xslt64(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1533        let a = self.state[a].get_i64();
1534        let b = self.state[b].get_i64();
1535        if a < b {
1536            self.pc_rel_jump::<crate::BrIfXslt64>(offset)
1537        } else {
1538            ControlFlow::Continue(())
1539        }
1540    }
1541
1542    fn br_if_xslteq64(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1543        let a = self.state[a].get_i64();
1544        let b = self.state[b].get_i64();
1545        if a <= b {
1546            self.pc_rel_jump::<crate::BrIfXslteq64>(offset)
1547        } else {
1548            ControlFlow::Continue(())
1549        }
1550    }
1551
1552    fn br_if_xult64(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1553        let a = self.state[a].get_u64();
1554        let b = self.state[b].get_u64();
1555        if a < b {
1556            self.pc_rel_jump::<crate::BrIfXult64>(offset)
1557        } else {
1558            ControlFlow::Continue(())
1559        }
1560    }
1561
1562    fn br_if_xulteq64(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1563        let a = self.state[a].get_u64();
1564        let b = self.state[b].get_u64();
1565        if a <= b {
1566            self.pc_rel_jump::<crate::BrIfXulteq64>(offset)
1567        } else {
1568            ControlFlow::Continue(())
1569        }
1570    }
1571
1572    br_if_imm! {
1573        fn br_if_xeq32_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1574            = BrIfXeq32I8 / == / get_i32;
1575        fn br_if_xeq32_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1576            = BrIfXeq32I32 / == / get_i32;
1577        fn br_if_xneq32_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1578            = BrIfXneq32I8 / != / get_i32;
1579        fn br_if_xneq32_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1580            = BrIfXneq32I32 / != / get_i32;
1581
1582        fn br_if_xslt32_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1583            = BrIfXslt32I8 / < / get_i32;
1584        fn br_if_xslt32_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1585            = BrIfXslt32I32 / < / get_i32;
1586        fn br_if_xsgt32_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1587            = BrIfXsgt32I8 / > / get_i32;
1588        fn br_if_xsgt32_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1589            = BrIfXsgt32I32 / > / get_i32;
1590        fn br_if_xslteq32_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1591            = BrIfXslteq32I8 / <= / get_i32;
1592        fn br_if_xslteq32_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1593            = BrIfXslteq32I32 / <= / get_i32;
1594        fn br_if_xsgteq32_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1595            = BrIfXsgteq32I8 / >= / get_i32;
1596        fn br_if_xsgteq32_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1597            = BrIfXsgteq32I32 / >= / get_i32;
1598
1599        fn br_if_xult32_u8(&mut self, a: XReg, b: u8, offset: PcRelOffset)
1600            = BrIfXult32U8 / < / get_u32;
1601        fn br_if_xult32_u32(&mut self, a: XReg, b: u32, offset: PcRelOffset)
1602            = BrIfXult32U32 / < / get_u32;
1603        fn br_if_xugt32_u8(&mut self, a: XReg, b: u8, offset: PcRelOffset)
1604            = BrIfXugt32U8 / > / get_u32;
1605        fn br_if_xugt32_u32(&mut self, a: XReg, b: u32, offset: PcRelOffset)
1606            = BrIfXugt32U32 / > / get_u32;
1607        fn br_if_xulteq32_u8(&mut self, a: XReg, b: u8, offset: PcRelOffset)
1608            = BrIfXulteq32U8 / <= / get_u32;
1609        fn br_if_xulteq32_u32(&mut self, a: XReg, b: u32, offset: PcRelOffset)
1610            = BrIfXulteq32U32 / <= / get_u32;
1611        fn br_if_xugteq32_u8(&mut self, a: XReg, b: u8, offset: PcRelOffset)
1612            = BrIfXugteq32U8 / >= / get_u32;
1613        fn br_if_xugteq32_u32(&mut self, a: XReg, b: u32, offset: PcRelOffset)
1614            = BrIfXugteq32U32 / >= / get_u32;
1615
1616        fn br_if_xeq64_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1617            = BrIfXeq64I8 / == / get_i64;
1618        fn br_if_xeq64_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1619            = BrIfXeq64I32 / == / get_i64;
1620        fn br_if_xneq64_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1621            = BrIfXneq64I8 / != / get_i64;
1622        fn br_if_xneq64_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1623            = BrIfXneq64I32 / != / get_i64;
1624
1625        fn br_if_xslt64_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1626            = BrIfXslt64I8 / < / get_i64;
1627        fn br_if_xslt64_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1628            = BrIfXslt64I32 / < / get_i64;
1629        fn br_if_xsgt64_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1630            = BrIfXsgt64I8 / > / get_i64;
1631        fn br_if_xsgt64_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1632            = BrIfXsgt64I32 / > / get_i64;
1633        fn br_if_xslteq64_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1634            = BrIfXslteq64I8 / <= / get_i64;
1635        fn br_if_xslteq64_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1636            = BrIfXslteq64I32 / <= / get_i64;
1637        fn br_if_xsgteq64_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1638            = BrIfXsgteq64I8 / >= / get_i64;
1639        fn br_if_xsgteq64_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1640            = BrIfXsgteq64I32 / >= / get_i64;
1641
1642        fn br_if_xult64_u8(&mut self, a: XReg, b: u8, offset: PcRelOffset)
1643            = BrIfXult64U8 / < / get_u64;
1644        fn br_if_xult64_u32(&mut self, a: XReg, b: u32, offset: PcRelOffset)
1645            = BrIfXult64U32 / < / get_u64;
1646        fn br_if_xugt64_u8(&mut self, a: XReg, b: u8, offset: PcRelOffset)
1647            = BrIfXugt64U8 / > / get_u64;
1648        fn br_if_xugt64_u32(&mut self, a: XReg, b: u32, offset: PcRelOffset)
1649            = BrIfXugt64U32 / > / get_u64;
1650        fn br_if_xulteq64_u8(&mut self, a: XReg, b: u8, offset: PcRelOffset)
1651            = BrIfXulteq64U8 / <= / get_u64;
1652        fn br_if_xulteq64_u32(&mut self, a: XReg, b: u32, offset: PcRelOffset)
1653            = BrIfXulteq64U32 / <= / get_u64;
1654        fn br_if_xugteq64_u8(&mut self, a: XReg, b: u8, offset: PcRelOffset)
1655            = BrIfXugteq64U8 / >= / get_u64;
1656        fn br_if_xugteq64_u32(&mut self, a: XReg, b: u32, offset: PcRelOffset)
1657            = BrIfXugteq64U32 / >= / get_u64;
1658    }
1659
1660    fn xmov(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
1661        let val = self.state[src];
1662        self.state[dst] = val;
1663        ControlFlow::Continue(())
1664    }
1665
1666    fn xconst8(&mut self, dst: XReg, imm: i8) -> ControlFlow<Done> {
1667        self.state[dst].set_i64(i64::from(imm));
1668        ControlFlow::Continue(())
1669    }
1670
1671    fn xzero(&mut self, dst: XReg) -> ControlFlow<Done> {
1672        self.state[dst].set_i64(0);
1673        ControlFlow::Continue(())
1674    }
1675
1676    fn xone(&mut self, dst: XReg) -> ControlFlow<Done> {
1677        self.state[dst].set_i64(1);
1678        ControlFlow::Continue(())
1679    }
1680
1681    fn xconst16(&mut self, dst: XReg, imm: i16) -> ControlFlow<Done> {
1682        self.state[dst].set_i64(i64::from(imm));
1683        ControlFlow::Continue(())
1684    }
1685
1686    fn xconst32(&mut self, dst: XReg, imm: i32) -> ControlFlow<Done> {
1687        self.state[dst].set_i64(i64::from(imm));
1688        ControlFlow::Continue(())
1689    }
1690
1691    fn xconst64(&mut self, dst: XReg, imm: i64) -> ControlFlow<Done> {
1692        self.state[dst].set_i64(imm);
1693        ControlFlow::Continue(())
1694    }
1695
1696    fn xadd32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1697        let a = self.state[operands.src1].get_u32();
1698        let b = self.state[operands.src2].get_u32();
1699        self.state[operands.dst].set_u32(a.wrapping_add(b));
1700        ControlFlow::Continue(())
1701    }
1702
1703    fn xadd32_u8(&mut self, dst: XReg, src1: XReg, src2: u8) -> ControlFlow<Done> {
1704        self.xadd32_u32(dst, src1, src2.into())
1705    }
1706
1707    fn xadd32_u32(&mut self, dst: XReg, src1: XReg, src2: u32) -> ControlFlow<Done> {
1708        let a = self.state[src1].get_u32();
1709        self.state[dst].set_u32(a.wrapping_add(src2));
1710        ControlFlow::Continue(())
1711    }
1712
1713    fn xadd64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1714        let a = self.state[operands.src1].get_u64();
1715        let b = self.state[operands.src2].get_u64();
1716        self.state[operands.dst].set_u64(a.wrapping_add(b));
1717        ControlFlow::Continue(())
1718    }
1719
1720    fn xadd64_u8(&mut self, dst: XReg, src1: XReg, src2: u8) -> ControlFlow<Done> {
1721        self.xadd64_u32(dst, src1, src2.into())
1722    }
1723
1724    fn xadd64_u32(&mut self, dst: XReg, src1: XReg, src2: u32) -> ControlFlow<Done> {
1725        let a = self.state[src1].get_u64();
1726        self.state[dst].set_u64(a.wrapping_add(src2.into()));
1727        ControlFlow::Continue(())
1728    }
1729
1730    fn xmadd32(&mut self, dst: XReg, src1: XReg, src2: XReg, src3: XReg) -> ControlFlow<Done> {
1731        let a = self.state[src1].get_u32();
1732        let b = self.state[src2].get_u32();
1733        let c = self.state[src3].get_u32();
1734        self.state[dst].set_u32(a.wrapping_mul(b).wrapping_add(c));
1735        ControlFlow::Continue(())
1736    }
1737
1738    fn xmadd64(&mut self, dst: XReg, src1: XReg, src2: XReg, src3: XReg) -> ControlFlow<Done> {
1739        let a = self.state[src1].get_u64();
1740        let b = self.state[src2].get_u64();
1741        let c = self.state[src3].get_u64();
1742        self.state[dst].set_u64(a.wrapping_mul(b).wrapping_add(c));
1743        ControlFlow::Continue(())
1744    }
1745
1746    fn xsub32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1747        let a = self.state[operands.src1].get_u32();
1748        let b = self.state[operands.src2].get_u32();
1749        self.state[operands.dst].set_u32(a.wrapping_sub(b));
1750        ControlFlow::Continue(())
1751    }
1752
1753    fn xsub32_u8(&mut self, dst: XReg, src1: XReg, src2: u8) -> ControlFlow<Done> {
1754        self.xsub32_u32(dst, src1, src2.into())
1755    }
1756
1757    fn xsub32_u32(&mut self, dst: XReg, src1: XReg, src2: u32) -> ControlFlow<Done> {
1758        let a = self.state[src1].get_u32();
1759        self.state[dst].set_u32(a.wrapping_sub(src2));
1760        ControlFlow::Continue(())
1761    }
1762
1763    fn xsub64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1764        let a = self.state[operands.src1].get_u64();
1765        let b = self.state[operands.src2].get_u64();
1766        self.state[operands.dst].set_u64(a.wrapping_sub(b));
1767        ControlFlow::Continue(())
1768    }
1769
1770    fn xsub64_u8(&mut self, dst: XReg, src1: XReg, src2: u8) -> ControlFlow<Done> {
1771        self.xsub64_u32(dst, src1, src2.into())
1772    }
1773
1774    fn xsub64_u32(&mut self, dst: XReg, src1: XReg, src2: u32) -> ControlFlow<Done> {
1775        let a = self.state[src1].get_u64();
1776        self.state[dst].set_u64(a.wrapping_sub(src2.into()));
1777        ControlFlow::Continue(())
1778    }
1779
1780    fn xmul32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1781        let a = self.state[operands.src1].get_u32();
1782        let b = self.state[operands.src2].get_u32();
1783        self.state[operands.dst].set_u32(a.wrapping_mul(b));
1784        ControlFlow::Continue(())
1785    }
1786
1787    fn xmul32_s8(&mut self, dst: XReg, src1: XReg, src2: i8) -> ControlFlow<Done> {
1788        self.xmul32_s32(dst, src1, src2.into())
1789    }
1790
1791    fn xmul32_s32(&mut self, dst: XReg, src1: XReg, src2: i32) -> ControlFlow<Done> {
1792        let a = self.state[src1].get_i32();
1793        self.state[dst].set_i32(a.wrapping_mul(src2));
1794        ControlFlow::Continue(())
1795    }
1796
1797    fn xmul64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1798        let a = self.state[operands.src1].get_u64();
1799        let b = self.state[operands.src2].get_u64();
1800        self.state[operands.dst].set_u64(a.wrapping_mul(b));
1801        ControlFlow::Continue(())
1802    }
1803
1804    fn xmul64_s8(&mut self, dst: XReg, src1: XReg, src2: i8) -> ControlFlow<Done> {
1805        self.xmul64_s32(dst, src1, src2.into())
1806    }
1807
1808    fn xmul64_s32(&mut self, dst: XReg, src1: XReg, src2: i32) -> ControlFlow<Done> {
1809        let a = self.state[src1].get_i64();
1810        self.state[dst].set_i64(a.wrapping_mul(src2.into()));
1811        ControlFlow::Continue(())
1812    }
1813
1814    fn xshl32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1815        let a = self.state[operands.src1].get_u32();
1816        let b = self.state[operands.src2].get_u32();
1817        self.state[operands.dst].set_u32(a.wrapping_shl(b));
1818        ControlFlow::Continue(())
1819    }
1820
1821    fn xshr32_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1822        let a = self.state[operands.src1].get_u32();
1823        let b = self.state[operands.src2].get_u32();
1824        self.state[operands.dst].set_u32(a.wrapping_shr(b));
1825        ControlFlow::Continue(())
1826    }
1827
1828    fn xshr32_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1829        let a = self.state[operands.src1].get_i32();
1830        let b = self.state[operands.src2].get_u32();
1831        self.state[operands.dst].set_i32(a.wrapping_shr(b));
1832        ControlFlow::Continue(())
1833    }
1834
1835    fn xshl64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1836        let a = self.state[operands.src1].get_u64();
1837        let b = self.state[operands.src2].get_u32();
1838        self.state[operands.dst].set_u64(a.wrapping_shl(b));
1839        ControlFlow::Continue(())
1840    }
1841
1842    fn xshr64_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1843        let a = self.state[operands.src1].get_u64();
1844        let b = self.state[operands.src2].get_u32();
1845        self.state[operands.dst].set_u64(a.wrapping_shr(b));
1846        ControlFlow::Continue(())
1847    }
1848
1849    fn xshr64_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1850        let a = self.state[operands.src1].get_i64();
1851        let b = self.state[operands.src2].get_u32();
1852        self.state[operands.dst].set_i64(a.wrapping_shr(b));
1853        ControlFlow::Continue(())
1854    }
1855
1856    fn xshl32_u6(&mut self, operands: BinaryOperands<XReg, XReg, U6>) -> ControlFlow<Done> {
1857        let a = self.state[operands.src1].get_u32();
1858        let b = u32::from(u8::from(operands.src2));
1859        self.state[operands.dst].set_u32(a.wrapping_shl(b));
1860        ControlFlow::Continue(())
1861    }
1862
1863    fn xshr32_u_u6(&mut self, operands: BinaryOperands<XReg, XReg, U6>) -> ControlFlow<Done> {
1864        let a = self.state[operands.src1].get_u32();
1865        let b = u32::from(u8::from(operands.src2));
1866        self.state[operands.dst].set_u32(a.wrapping_shr(b));
1867        ControlFlow::Continue(())
1868    }
1869
1870    fn xshr32_s_u6(&mut self, operands: BinaryOperands<XReg, XReg, U6>) -> ControlFlow<Done> {
1871        let a = self.state[operands.src1].get_i32();
1872        let b = u32::from(u8::from(operands.src2));
1873        self.state[operands.dst].set_i32(a.wrapping_shr(b));
1874        ControlFlow::Continue(())
1875    }
1876
1877    fn xshl64_u6(&mut self, operands: BinaryOperands<XReg, XReg, U6>) -> ControlFlow<Done> {
1878        let a = self.state[operands.src1].get_u64();
1879        let b = u32::from(u8::from(operands.src2));
1880        self.state[operands.dst].set_u64(a.wrapping_shl(b));
1881        ControlFlow::Continue(())
1882    }
1883
1884    fn xshr64_u_u6(&mut self, operands: BinaryOperands<XReg, XReg, U6>) -> ControlFlow<Done> {
1885        let a = self.state[operands.src1].get_u64();
1886        let b = u32::from(u8::from(operands.src2));
1887        self.state[operands.dst].set_u64(a.wrapping_shr(b));
1888        ControlFlow::Continue(())
1889    }
1890
1891    fn xshr64_s_u6(&mut self, operands: BinaryOperands<XReg, XReg, U6>) -> ControlFlow<Done> {
1892        let a = self.state[operands.src1].get_i64();
1893        let b = u32::from(u8::from(operands.src2));
1894        self.state[operands.dst].set_i64(a.wrapping_shr(b));
1895        ControlFlow::Continue(())
1896    }
1897
1898    fn xneg32(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
1899        let a = self.state[src].get_i32();
1900        self.state[dst].set_i32(a.wrapping_neg());
1901        ControlFlow::Continue(())
1902    }
1903
1904    fn xneg64(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
1905        let a = self.state[src].get_i64();
1906        self.state[dst].set_i64(a.wrapping_neg());
1907        ControlFlow::Continue(())
1908    }
1909
1910    fn xeq64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1911        let a = self.state[operands.src1].get_u64();
1912        let b = self.state[operands.src2].get_u64();
1913        self.state[operands.dst].set_u32(u32::from(a == b));
1914        ControlFlow::Continue(())
1915    }
1916
1917    fn xneq64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1918        let a = self.state[operands.src1].get_u64();
1919        let b = self.state[operands.src2].get_u64();
1920        self.state[operands.dst].set_u32(u32::from(a != b));
1921        ControlFlow::Continue(())
1922    }
1923
1924    fn xslt64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1925        let a = self.state[operands.src1].get_i64();
1926        let b = self.state[operands.src2].get_i64();
1927        self.state[operands.dst].set_u32(u32::from(a < b));
1928        ControlFlow::Continue(())
1929    }
1930
1931    fn xslteq64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1932        let a = self.state[operands.src1].get_i64();
1933        let b = self.state[operands.src2].get_i64();
1934        self.state[operands.dst].set_u32(u32::from(a <= b));
1935        ControlFlow::Continue(())
1936    }
1937
1938    fn xult64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1939        let a = self.state[operands.src1].get_u64();
1940        let b = self.state[operands.src2].get_u64();
1941        self.state[operands.dst].set_u32(u32::from(a < b));
1942        ControlFlow::Continue(())
1943    }
1944
1945    fn xulteq64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1946        let a = self.state[operands.src1].get_u64();
1947        let b = self.state[operands.src2].get_u64();
1948        self.state[operands.dst].set_u32(u32::from(a <= b));
1949        ControlFlow::Continue(())
1950    }
1951
1952    fn xeq32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1953        let a = self.state[operands.src1].get_u32();
1954        let b = self.state[operands.src2].get_u32();
1955        self.state[operands.dst].set_u32(u32::from(a == b));
1956        ControlFlow::Continue(())
1957    }
1958
1959    fn xneq32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1960        let a = self.state[operands.src1].get_u32();
1961        let b = self.state[operands.src2].get_u32();
1962        self.state[operands.dst].set_u32(u32::from(a != b));
1963        ControlFlow::Continue(())
1964    }
1965
1966    fn xslt32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1967        let a = self.state[operands.src1].get_i32();
1968        let b = self.state[operands.src2].get_i32();
1969        self.state[operands.dst].set_u32(u32::from(a < b));
1970        ControlFlow::Continue(())
1971    }
1972
1973    fn xslteq32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1974        let a = self.state[operands.src1].get_i32();
1975        let b = self.state[operands.src2].get_i32();
1976        self.state[operands.dst].set_u32(u32::from(a <= b));
1977        ControlFlow::Continue(())
1978    }
1979
1980    fn xult32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1981        let a = self.state[operands.src1].get_u32();
1982        let b = self.state[operands.src2].get_u32();
1983        self.state[operands.dst].set_u32(u32::from(a < b));
1984        ControlFlow::Continue(())
1985    }
1986
1987    fn xulteq32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1988        let a = self.state[operands.src1].get_u32();
1989        let b = self.state[operands.src2].get_u32();
1990        self.state[operands.dst].set_u32(u32::from(a <= b));
1991        ControlFlow::Continue(())
1992    }
1993
1994    fn push_frame(&mut self) -> ControlFlow<Done> {
1995        self.push::<crate::PushFrame, _>(self.state.lr)?;
1996        self.push::<crate::PushFrame, _>(self.state.fp)?;
1997        self.state.fp = self.state[XReg::sp].get_ptr();
1998        ControlFlow::Continue(())
1999    }
2000
2001    #[inline]
2002    fn push_frame_save(&mut self, amt: u16, regs: UpperRegSet<XReg>) -> ControlFlow<Done> {
2003        // Decrement the stack pointer `amt` bytes plus 2 pointers more for
2004        // fp/lr.
2005        let ptr_size = size_of::<usize>();
2006        let full_amt = usize::from(amt) + 2 * ptr_size;
2007        let new_sp = self.state[XReg::sp].get_ptr::<u8>().wrapping_sub(full_amt);
2008        self.set_sp::<crate::PushFrameSave>(new_sp)?;
2009
2010        unsafe {
2011            // Emulate `push_frame` by placing `lr` and `fp` onto the stack, in
2012            // that order, at the top of the allocated area.
2013            self.store_ne::<_, crate::PushFrameSave>(
2014                AddrO32 {
2015                    addr: XReg::sp,
2016                    offset: (full_amt - 1 * ptr_size) as i32,
2017                },
2018                self.state.lr,
2019            )?;
2020            self.store_ne::<_, crate::PushFrameSave>(
2021                AddrO32 {
2022                    addr: XReg::sp,
2023                    offset: (full_amt - 2 * ptr_size) as i32,
2024                },
2025                self.state.fp,
2026            )?;
2027
2028            // Set `fp` to the top of our frame, where `fp` is stored.
2029            let mut offset = amt as i32;
2030            self.state.fp = self.state[XReg::sp]
2031                .get_ptr::<u8>()
2032                .byte_offset(offset as isize);
2033
2034            // Next save any registers in `regs` to the stack.
2035            for reg in regs {
2036                offset -= 8;
2037                self.store_ne::<_, crate::PushFrameSave>(
2038                    AddrO32 {
2039                        addr: XReg::sp,
2040                        offset,
2041                    },
2042                    self.state[reg].get_u64(),
2043                )?;
2044            }
2045        }
2046        ControlFlow::Continue(())
2047    }
2048
2049    fn pop_frame_restore(&mut self, amt: u16, regs: UpperRegSet<XReg>) -> ControlFlow<Done> {
2050        // Restore all registers in `regs`, followed by the normal `pop_frame`
2051        // opcode below to restore fp/lr.
2052        unsafe {
2053            let mut offset = i32::from(amt);
2054            for reg in regs {
2055                offset -= 8;
2056                let val = self.load_ne::<_, crate::PopFrameRestore>(AddrO32 {
2057                    addr: XReg::sp,
2058                    offset,
2059                })?;
2060                self.state[reg].set_u64(val);
2061            }
2062        }
2063        self.pop_frame()
2064    }
2065
2066    fn pop_frame(&mut self) -> ControlFlow<Done> {
2067        self.set_sp_unchecked(self.state.fp);
2068        let fp = self.pop();
2069        let lr = self.pop();
2070        self.state.fp = fp;
2071        self.state.lr = lr;
2072        ControlFlow::Continue(())
2073    }
2074
2075    fn br_table32(&mut self, idx: XReg, amt: u32) -> ControlFlow<Done> {
2076        let idx = self.state[idx].get_u32().min(amt - 1) as isize;
2077        // SAFETY: part of the contract of the interpreter is only dealing with
2078        // valid bytecode, so this offset should be safe.
2079        self.pc = unsafe { self.pc.offset(idx * 4) };
2080
2081        // Decode the `PcRelOffset` without tampering with `self.pc` as the
2082        // jump is relative to `self.pc`.
2083        let mut tmp = self.pc;
2084        let Ok(rel) = PcRelOffset::decode(&mut tmp);
2085        let offset = isize::try_from(i32::from(rel)).unwrap();
2086        self.pc = unsafe { self.pc.offset(offset) };
2087        ControlFlow::Continue(())
2088    }
2089
2090    fn stack_alloc32(&mut self, amt: u32) -> ControlFlow<Done> {
2091        let amt = usize::try_from(amt).unwrap();
2092        let new_sp = self.state[XReg::sp].get_ptr::<u8>().wrapping_sub(amt);
2093        self.set_sp::<crate::StackAlloc32>(new_sp)?;
2094        ControlFlow::Continue(())
2095    }
2096
2097    fn stack_free32(&mut self, amt: u32) -> ControlFlow<Done> {
2098        let amt = usize::try_from(amt).unwrap();
2099        let new_sp = self.state[XReg::sp].get_ptr::<u8>().wrapping_add(amt);
2100        self.set_sp_unchecked(new_sp);
2101        ControlFlow::Continue(())
2102    }
2103
2104    fn zext8(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2105        let src = self.state[src].get_u64() as u8;
2106        self.state[dst].set_u64(src.into());
2107        ControlFlow::Continue(())
2108    }
2109
2110    fn zext16(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2111        let src = self.state[src].get_u64() as u16;
2112        self.state[dst].set_u64(src.into());
2113        ControlFlow::Continue(())
2114    }
2115
2116    fn zext32(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2117        let src = self.state[src].get_u64() as u32;
2118        self.state[dst].set_u64(src.into());
2119        ControlFlow::Continue(())
2120    }
2121
2122    fn sext8(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2123        let src = self.state[src].get_i64() as i8;
2124        self.state[dst].set_i64(src.into());
2125        ControlFlow::Continue(())
2126    }
2127
2128    fn sext16(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2129        let src = self.state[src].get_i64() as i16;
2130        self.state[dst].set_i64(src.into());
2131        ControlFlow::Continue(())
2132    }
2133
2134    fn sext32(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2135        let src = self.state[src].get_i64() as i32;
2136        self.state[dst].set_i64(src.into());
2137        ControlFlow::Continue(())
2138    }
2139
2140    fn xdiv32_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2141        let a = self.state[operands.src1].get_i32();
2142        let b = self.state[operands.src2].get_i32();
2143        match a.checked_div(b) {
2144            Some(result) => {
2145                self.state[operands.dst].set_i32(result);
2146                ControlFlow::Continue(())
2147            }
2148            None => {
2149                let kind = if b == 0 {
2150                    TrapKind::DivideByZero
2151                } else {
2152                    TrapKind::IntegerOverflow
2153                };
2154                self.done_trap_kind::<crate::XDiv32S>(Some(kind))
2155            }
2156        }
2157    }
2158
2159    fn xdiv64_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2160        let a = self.state[operands.src1].get_i64();
2161        let b = self.state[operands.src2].get_i64();
2162        match a.checked_div(b) {
2163            Some(result) => {
2164                self.state[operands.dst].set_i64(result);
2165                ControlFlow::Continue(())
2166            }
2167            None => {
2168                let kind = if b == 0 {
2169                    TrapKind::DivideByZero
2170                } else {
2171                    TrapKind::IntegerOverflow
2172                };
2173                self.done_trap_kind::<crate::XDiv64S>(Some(kind))
2174            }
2175        }
2176    }
2177
2178    fn xdiv32_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2179        let a = self.state[operands.src1].get_u32();
2180        let b = self.state[operands.src2].get_u32();
2181        match a.checked_div(b) {
2182            Some(result) => {
2183                self.state[operands.dst].set_u32(result);
2184                ControlFlow::Continue(())
2185            }
2186            None => self.done_trap_kind::<crate::XDiv64U>(Some(TrapKind::DivideByZero)),
2187        }
2188    }
2189
2190    fn xdiv64_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2191        let a = self.state[operands.src1].get_u64();
2192        let b = self.state[operands.src2].get_u64();
2193        match a.checked_div(b) {
2194            Some(result) => {
2195                self.state[operands.dst].set_u64(result);
2196                ControlFlow::Continue(())
2197            }
2198            None => self.done_trap_kind::<crate::XDiv64U>(Some(TrapKind::DivideByZero)),
2199        }
2200    }
2201
2202    fn xrem32_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2203        let a = self.state[operands.src1].get_i32();
2204        let b = self.state[operands.src2].get_i32();
2205        let result = if a == i32::MIN && b == -1 {
2206            Some(0)
2207        } else {
2208            a.checked_rem(b)
2209        };
2210        match result {
2211            Some(result) => {
2212                self.state[operands.dst].set_i32(result);
2213                ControlFlow::Continue(())
2214            }
2215            None => self.done_trap_kind::<crate::XRem32S>(Some(TrapKind::DivideByZero)),
2216        }
2217    }
2218
2219    fn xrem64_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2220        let a = self.state[operands.src1].get_i64();
2221        let b = self.state[operands.src2].get_i64();
2222        let result = if a == i64::MIN && b == -1 {
2223            Some(0)
2224        } else {
2225            a.checked_rem(b)
2226        };
2227        match result {
2228            Some(result) => {
2229                self.state[operands.dst].set_i64(result);
2230                ControlFlow::Continue(())
2231            }
2232            None => self.done_trap_kind::<crate::XRem64S>(Some(TrapKind::DivideByZero)),
2233        }
2234    }
2235
2236    fn xrem32_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2237        let a = self.state[operands.src1].get_u32();
2238        let b = self.state[operands.src2].get_u32();
2239        match a.checked_rem(b) {
2240            Some(result) => {
2241                self.state[operands.dst].set_u32(result);
2242                ControlFlow::Continue(())
2243            }
2244            None => self.done_trap_kind::<crate::XRem32U>(Some(TrapKind::DivideByZero)),
2245        }
2246    }
2247
2248    fn xrem64_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2249        let a = self.state[operands.src1].get_u64();
2250        let b = self.state[operands.src2].get_u64();
2251        match a.checked_rem(b) {
2252            Some(result) => {
2253                self.state[operands.dst].set_u64(result);
2254                ControlFlow::Continue(())
2255            }
2256            None => self.done_trap_kind::<crate::XRem64U>(Some(TrapKind::DivideByZero)),
2257        }
2258    }
2259
2260    fn xband32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2261        let a = self.state[operands.src1].get_u32();
2262        let b = self.state[operands.src2].get_u32();
2263        self.state[operands.dst].set_u32(a & b);
2264        ControlFlow::Continue(())
2265    }
2266
2267    fn xband32_s8(&mut self, dst: XReg, src1: XReg, src2: i8) -> ControlFlow<Done> {
2268        self.xband32_s32(dst, src1, src2.into())
2269    }
2270
2271    fn xband32_s32(&mut self, dst: XReg, src1: XReg, src2: i32) -> ControlFlow<Done> {
2272        let a = self.state[src1].get_i32();
2273        self.state[dst].set_i32(a & src2);
2274        ControlFlow::Continue(())
2275    }
2276
2277    fn xband64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2278        let a = self.state[operands.src1].get_u64();
2279        let b = self.state[operands.src2].get_u64();
2280        self.state[operands.dst].set_u64(a & b);
2281        ControlFlow::Continue(())
2282    }
2283
2284    fn xband64_s8(&mut self, dst: XReg, src1: XReg, src2: i8) -> ControlFlow<Done> {
2285        self.xband64_s32(dst, src1, src2.into())
2286    }
2287
2288    fn xband64_s32(&mut self, dst: XReg, src1: XReg, src2: i32) -> ControlFlow<Done> {
2289        let a = self.state[src1].get_i64();
2290        self.state[dst].set_i64(a & i64::from(src2));
2291        ControlFlow::Continue(())
2292    }
2293
2294    fn xbor32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2295        let a = self.state[operands.src1].get_u32();
2296        let b = self.state[operands.src2].get_u32();
2297        self.state[operands.dst].set_u32(a | b);
2298        ControlFlow::Continue(())
2299    }
2300
2301    fn xbor32_s8(&mut self, dst: XReg, src1: XReg, src2: i8) -> ControlFlow<Done> {
2302        self.xbor32_s32(dst, src1, src2.into())
2303    }
2304
2305    fn xbor32_s32(&mut self, dst: XReg, src1: XReg, src2: i32) -> ControlFlow<Done> {
2306        let a = self.state[src1].get_i32();
2307        self.state[dst].set_i32(a | src2);
2308        ControlFlow::Continue(())
2309    }
2310
2311    fn xbor64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2312        let a = self.state[operands.src1].get_u64();
2313        let b = self.state[operands.src2].get_u64();
2314        self.state[operands.dst].set_u64(a | b);
2315        ControlFlow::Continue(())
2316    }
2317
2318    fn xbor64_s8(&mut self, dst: XReg, src1: XReg, src2: i8) -> ControlFlow<Done> {
2319        self.xbor64_s32(dst, src1, src2.into())
2320    }
2321
2322    fn xbor64_s32(&mut self, dst: XReg, src1: XReg, src2: i32) -> ControlFlow<Done> {
2323        let a = self.state[src1].get_i64();
2324        self.state[dst].set_i64(a | i64::from(src2));
2325        ControlFlow::Continue(())
2326    }
2327
2328    fn xbxor32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2329        let a = self.state[operands.src1].get_u32();
2330        let b = self.state[operands.src2].get_u32();
2331        self.state[operands.dst].set_u32(a ^ b);
2332        ControlFlow::Continue(())
2333    }
2334
2335    fn xbxor32_s8(&mut self, dst: XReg, src1: XReg, src2: i8) -> ControlFlow<Done> {
2336        self.xbxor32_s32(dst, src1, src2.into())
2337    }
2338
2339    fn xbxor32_s32(&mut self, dst: XReg, src1: XReg, src2: i32) -> ControlFlow<Done> {
2340        let a = self.state[src1].get_i32();
2341        self.state[dst].set_i32(a ^ src2);
2342        ControlFlow::Continue(())
2343    }
2344
2345    fn xbxor64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2346        let a = self.state[operands.src1].get_u64();
2347        let b = self.state[operands.src2].get_u64();
2348        self.state[operands.dst].set_u64(a ^ b);
2349        ControlFlow::Continue(())
2350    }
2351
2352    fn xbxor64_s8(&mut self, dst: XReg, src1: XReg, src2: i8) -> ControlFlow<Done> {
2353        self.xbxor64_s32(dst, src1, src2.into())
2354    }
2355
2356    fn xbxor64_s32(&mut self, dst: XReg, src1: XReg, src2: i32) -> ControlFlow<Done> {
2357        let a = self.state[src1].get_i64();
2358        self.state[dst].set_i64(a ^ i64::from(src2));
2359        ControlFlow::Continue(())
2360    }
2361
2362    fn xbnot32(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2363        let a = self.state[src].get_u32();
2364        self.state[dst].set_u32(!a);
2365        ControlFlow::Continue(())
2366    }
2367
2368    fn xbnot64(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2369        let a = self.state[src].get_u64();
2370        self.state[dst].set_u64(!a);
2371        ControlFlow::Continue(())
2372    }
2373
2374    fn xmin32_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2375        let a = self.state[operands.src1].get_u32();
2376        let b = self.state[operands.src2].get_u32();
2377        self.state[operands.dst].set_u32(a.min(b));
2378        ControlFlow::Continue(())
2379    }
2380
2381    fn xmin32_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2382        let a = self.state[operands.src1].get_i32();
2383        let b = self.state[operands.src2].get_i32();
2384        self.state[operands.dst].set_i32(a.min(b));
2385        ControlFlow::Continue(())
2386    }
2387
2388    fn xmax32_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2389        let a = self.state[operands.src1].get_u32();
2390        let b = self.state[operands.src2].get_u32();
2391        self.state[operands.dst].set_u32(a.max(b));
2392        ControlFlow::Continue(())
2393    }
2394
2395    fn xmax32_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2396        let a = self.state[operands.src1].get_i32();
2397        let b = self.state[operands.src2].get_i32();
2398        self.state[operands.dst].set_i32(a.max(b));
2399        ControlFlow::Continue(())
2400    }
2401
2402    fn xmin64_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2403        let a = self.state[operands.src1].get_u64();
2404        let b = self.state[operands.src2].get_u64();
2405        self.state[operands.dst].set_u64(a.min(b));
2406        ControlFlow::Continue(())
2407    }
2408
2409    fn xmin64_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2410        let a = self.state[operands.src1].get_i64();
2411        let b = self.state[operands.src2].get_i64();
2412        self.state[operands.dst].set_i64(a.min(b));
2413        ControlFlow::Continue(())
2414    }
2415
2416    fn xmax64_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2417        let a = self.state[operands.src1].get_u64();
2418        let b = self.state[operands.src2].get_u64();
2419        self.state[operands.dst].set_u64(a.max(b));
2420        ControlFlow::Continue(())
2421    }
2422
2423    fn xmax64_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2424        let a = self.state[operands.src1].get_i64();
2425        let b = self.state[operands.src2].get_i64();
2426        self.state[operands.dst].set_i64(a.max(b));
2427        ControlFlow::Continue(())
2428    }
2429
2430    fn xctz32(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2431        let a = self.state[src].get_u32();
2432        self.state[dst].set_u32(a.trailing_zeros());
2433        ControlFlow::Continue(())
2434    }
2435
2436    fn xctz64(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2437        let a = self.state[src].get_u64();
2438        self.state[dst].set_u64(a.trailing_zeros().into());
2439        ControlFlow::Continue(())
2440    }
2441
2442    fn xclz32(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2443        let a = self.state[src].get_u32();
2444        self.state[dst].set_u32(a.leading_zeros());
2445        ControlFlow::Continue(())
2446    }
2447
2448    fn xclz64(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2449        let a = self.state[src].get_u64();
2450        self.state[dst].set_u64(a.leading_zeros().into());
2451        ControlFlow::Continue(())
2452    }
2453
2454    fn xpopcnt32(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2455        let a = self.state[src].get_u32();
2456        self.state[dst].set_u32(a.count_ones());
2457        ControlFlow::Continue(())
2458    }
2459
2460    fn xpopcnt64(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2461        let a = self.state[src].get_u64();
2462        self.state[dst].set_u64(a.count_ones().into());
2463        ControlFlow::Continue(())
2464    }
2465
2466    fn xrotl32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2467        let a = self.state[operands.src1].get_u32();
2468        let b = self.state[operands.src2].get_u32();
2469        self.state[operands.dst].set_u32(a.rotate_left(b));
2470        ControlFlow::Continue(())
2471    }
2472
2473    fn xrotl64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2474        let a = self.state[operands.src1].get_u64();
2475        let b = self.state[operands.src2].get_u32();
2476        self.state[operands.dst].set_u64(a.rotate_left(b));
2477        ControlFlow::Continue(())
2478    }
2479
2480    fn xrotr32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2481        let a = self.state[operands.src1].get_u32();
2482        let b = self.state[operands.src2].get_u32();
2483        self.state[operands.dst].set_u32(a.rotate_right(b));
2484        ControlFlow::Continue(())
2485    }
2486
2487    fn xrotr64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2488        let a = self.state[operands.src1].get_u64();
2489        let b = self.state[operands.src2].get_u32();
2490        self.state[operands.dst].set_u64(a.rotate_right(b));
2491        ControlFlow::Continue(())
2492    }
2493
2494    fn xselect32(
2495        &mut self,
2496        dst: XReg,
2497        cond: XReg,
2498        if_nonzero: XReg,
2499        if_zero: XReg,
2500    ) -> ControlFlow<Done> {
2501        let result = if self.state[cond].get_u32() != 0 {
2502            self.state[if_nonzero].get_u32()
2503        } else {
2504            self.state[if_zero].get_u32()
2505        };
2506        self.state[dst].set_u32(result);
2507        ControlFlow::Continue(())
2508    }
2509
2510    fn xselect64(
2511        &mut self,
2512        dst: XReg,
2513        cond: XReg,
2514        if_nonzero: XReg,
2515        if_zero: XReg,
2516    ) -> ControlFlow<Done> {
2517        let result = if self.state[cond].get_u32() != 0 {
2518            self.state[if_nonzero].get_u64()
2519        } else {
2520            self.state[if_zero].get_u64()
2521        };
2522        self.state[dst].set_u64(result);
2523        ControlFlow::Continue(())
2524    }
2525
2526    fn xabs32(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2527        let a = self.state[src].get_i32();
2528        self.state[dst].set_i32(a.wrapping_abs());
2529        ControlFlow::Continue(())
2530    }
2531
2532    fn xabs64(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2533        let a = self.state[src].get_i64();
2534        self.state[dst].set_i64(a.wrapping_abs());
2535        ControlFlow::Continue(())
2536    }
2537
2538    // =========================================================================
2539    // o32 addressing modes
2540
2541    fn xload8_u32_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done> {
2542        let result = unsafe { self.load_ne::<u8, crate::XLoad8U32O32>(addr)? };
2543        self.state[dst].set_u32(result.into());
2544        ControlFlow::Continue(())
2545    }
2546
2547    fn xload8_s32_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done> {
2548        let result = unsafe { self.load_ne::<i8, crate::XLoad8S32O32>(addr)? };
2549        self.state[dst].set_i32(result.into());
2550        ControlFlow::Continue(())
2551    }
2552
2553    fn xload16le_u32_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done> {
2554        let result = unsafe { self.load_ne::<u16, crate::XLoad16LeU32O32>(addr)? };
2555        self.state[dst].set_u32(u16::from_le(result).into());
2556        ControlFlow::Continue(())
2557    }
2558
2559    fn xload16le_s32_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done> {
2560        let result = unsafe { self.load_ne::<i16, crate::XLoad16LeS32O32>(addr)? };
2561        self.state[dst].set_i32(i16::from_le(result).into());
2562        ControlFlow::Continue(())
2563    }
2564
2565    fn xload32le_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done> {
2566        let result = unsafe { self.load_ne::<i32, crate::XLoad32LeO32>(addr)? };
2567        self.state[dst].set_i32(i32::from_le(result));
2568        ControlFlow::Continue(())
2569    }
2570
2571    fn xload64le_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done> {
2572        let result = unsafe { self.load_ne::<i64, crate::XLoad64LeO32>(addr)? };
2573        self.state[dst].set_i64(i64::from_le(result));
2574        ControlFlow::Continue(())
2575    }
2576
2577    fn xstore8_o32(&mut self, addr: AddrO32, val: XReg) -> ControlFlow<Done> {
2578        let val = self.state[val].get_u32() as u8;
2579        unsafe {
2580            self.store_ne::<u8, crate::XStore8O32>(addr, val)?;
2581        }
2582        ControlFlow::Continue(())
2583    }
2584
2585    fn xstore16le_o32(&mut self, addr: AddrO32, val: XReg) -> ControlFlow<Done> {
2586        let val = self.state[val].get_u32() as u16;
2587        unsafe {
2588            self.store_ne::<u16, crate::XStore16LeO32>(addr, val.to_le())?;
2589        }
2590        ControlFlow::Continue(())
2591    }
2592
2593    fn xstore32le_o32(&mut self, addr: AddrO32, val: XReg) -> ControlFlow<Done> {
2594        let val = self.state[val].get_u32();
2595        unsafe {
2596            self.store_ne::<u32, crate::XStore32LeO32>(addr, val.to_le())?;
2597        }
2598        ControlFlow::Continue(())
2599    }
2600
2601    fn xstore64le_o32(&mut self, addr: AddrO32, val: XReg) -> ControlFlow<Done> {
2602        let val = self.state[val].get_u64();
2603        unsafe {
2604            self.store_ne::<u64, crate::XStore64LeO32>(addr, val.to_le())?;
2605        }
2606        ControlFlow::Continue(())
2607    }
2608
2609    // =========================================================================
2610    // g32 addressing modes
2611
2612    fn xload8_u32_g32(&mut self, dst: XReg, addr: AddrG32) -> ControlFlow<Done> {
2613        let result = unsafe { self.load_ne::<u8, crate::XLoad8U32G32>(addr)? };
2614        self.state[dst].set_u32(result.into());
2615        ControlFlow::Continue(())
2616    }
2617
2618    fn xload8_s32_g32(&mut self, dst: XReg, addr: AddrG32) -> ControlFlow<Done> {
2619        let result = unsafe { self.load_ne::<i8, crate::XLoad8S32G32>(addr)? };
2620        self.state[dst].set_i32(result.into());
2621        ControlFlow::Continue(())
2622    }
2623
2624    fn xload16le_u32_g32(&mut self, dst: XReg, addr: AddrG32) -> ControlFlow<Done> {
2625        let result = unsafe { self.load_ne::<u16, crate::XLoad16LeU32G32>(addr)? };
2626        self.state[dst].set_u32(u16::from_le(result).into());
2627        ControlFlow::Continue(())
2628    }
2629
2630    fn xload16le_s32_g32(&mut self, dst: XReg, addr: AddrG32) -> ControlFlow<Done> {
2631        let result = unsafe { self.load_ne::<i16, crate::XLoad16LeS32G32>(addr)? };
2632        self.state[dst].set_i32(i16::from_le(result).into());
2633        ControlFlow::Continue(())
2634    }
2635
2636    fn xload32le_g32(&mut self, dst: XReg, addr: AddrG32) -> ControlFlow<Done> {
2637        let result = unsafe { self.load_ne::<i32, crate::XLoad32LeG32>(addr)? };
2638        self.state[dst].set_i32(i32::from_le(result));
2639        ControlFlow::Continue(())
2640    }
2641
2642    fn xload64le_g32(&mut self, dst: XReg, addr: AddrG32) -> ControlFlow<Done> {
2643        let result = unsafe { self.load_ne::<i64, crate::XLoad64LeG32>(addr)? };
2644        self.state[dst].set_i64(i64::from_le(result));
2645        ControlFlow::Continue(())
2646    }
2647
2648    fn xstore8_g32(&mut self, addr: AddrG32, val: XReg) -> ControlFlow<Done> {
2649        let val = self.state[val].get_u32() as u8;
2650        unsafe {
2651            self.store_ne::<u8, crate::XStore8G32>(addr, val)?;
2652        }
2653        ControlFlow::Continue(())
2654    }
2655
2656    fn xstore16le_g32(&mut self, addr: AddrG32, val: XReg) -> ControlFlow<Done> {
2657        let val = self.state[val].get_u32() as u16;
2658        unsafe {
2659            self.store_ne::<u16, crate::XStore16LeG32>(addr, val.to_le())?;
2660        }
2661        ControlFlow::Continue(())
2662    }
2663
2664    fn xstore32le_g32(&mut self, addr: AddrG32, val: XReg) -> ControlFlow<Done> {
2665        let val = self.state[val].get_u32();
2666        unsafe {
2667            self.store_ne::<u32, crate::XStore32LeG32>(addr, val.to_le())?;
2668        }
2669        ControlFlow::Continue(())
2670    }
2671
2672    fn xstore64le_g32(&mut self, addr: AddrG32, val: XReg) -> ControlFlow<Done> {
2673        let val = self.state[val].get_u64();
2674        unsafe {
2675            self.store_ne::<u64, crate::XStore64LeG32>(addr, val.to_le())?;
2676        }
2677        ControlFlow::Continue(())
2678    }
2679
2680    // =========================================================================
2681    // z addressing modes
2682
2683    fn xload8_u32_z(&mut self, dst: XReg, addr: AddrZ) -> ControlFlow<Done> {
2684        let result = unsafe { self.load_ne::<u8, crate::XLoad8U32Z>(addr)? };
2685        self.state[dst].set_u32(result.into());
2686        ControlFlow::Continue(())
2687    }
2688
2689    fn xload8_s32_z(&mut self, dst: XReg, addr: AddrZ) -> ControlFlow<Done> {
2690        let result = unsafe { self.load_ne::<i8, crate::XLoad8S32Z>(addr)? };
2691        self.state[dst].set_i32(result.into());
2692        ControlFlow::Continue(())
2693    }
2694
2695    fn xload16le_u32_z(&mut self, dst: XReg, addr: AddrZ) -> ControlFlow<Done> {
2696        let result = unsafe { self.load_ne::<u16, crate::XLoad16LeU32Z>(addr)? };
2697        self.state[dst].set_u32(u16::from_le(result).into());
2698        ControlFlow::Continue(())
2699    }
2700
2701    fn xload16le_s32_z(&mut self, dst: XReg, addr: AddrZ) -> ControlFlow<Done> {
2702        let result = unsafe { self.load_ne::<i16, crate::XLoad16LeS32Z>(addr)? };
2703        self.state[dst].set_i32(i16::from_le(result).into());
2704        ControlFlow::Continue(())
2705    }
2706
2707    fn xload32le_z(&mut self, dst: XReg, addr: AddrZ) -> ControlFlow<Done> {
2708        let result = unsafe { self.load_ne::<i32, crate::XLoad32LeZ>(addr)? };
2709        self.state[dst].set_i32(i32::from_le(result));
2710        ControlFlow::Continue(())
2711    }
2712
2713    fn xload64le_z(&mut self, dst: XReg, addr: AddrZ) -> ControlFlow<Done> {
2714        let result = unsafe { self.load_ne::<i64, crate::XLoad64LeZ>(addr)? };
2715        self.state[dst].set_i64(i64::from_le(result));
2716        ControlFlow::Continue(())
2717    }
2718
2719    fn xstore8_z(&mut self, addr: AddrZ, val: XReg) -> ControlFlow<Done> {
2720        let val = self.state[val].get_u32() as u8;
2721        unsafe {
2722            self.store_ne::<u8, crate::XStore8Z>(addr, val)?;
2723        }
2724        ControlFlow::Continue(())
2725    }
2726
2727    fn xstore16le_z(&mut self, addr: AddrZ, val: XReg) -> ControlFlow<Done> {
2728        let val = self.state[val].get_u32() as u16;
2729        unsafe {
2730            self.store_ne::<u16, crate::XStore16LeZ>(addr, val.to_le())?;
2731        }
2732        ControlFlow::Continue(())
2733    }
2734
2735    fn xstore32le_z(&mut self, addr: AddrZ, val: XReg) -> ControlFlow<Done> {
2736        let val = self.state[val].get_u32();
2737        unsafe {
2738            self.store_ne::<u32, crate::XStore32LeZ>(addr, val.to_le())?;
2739        }
2740        ControlFlow::Continue(())
2741    }
2742
2743    fn xstore64le_z(&mut self, addr: AddrZ, val: XReg) -> ControlFlow<Done> {
2744        let val = self.state[val].get_u64();
2745        unsafe {
2746            self.store_ne::<u64, crate::XStore64LeZ>(addr, val.to_le())?;
2747        }
2748        ControlFlow::Continue(())
2749    }
2750
2751    // =========================================================================
2752    // g32bne addressing modes
2753
2754    fn xload8_u32_g32bne(&mut self, dst: XReg, addr: AddrG32Bne) -> ControlFlow<Done> {
2755        let result = unsafe { self.load_ne::<u8, crate::XLoad8U32G32Bne>(addr)? };
2756        self.state[dst].set_u32(result.into());
2757        ControlFlow::Continue(())
2758    }
2759
2760    fn xload8_s32_g32bne(&mut self, dst: XReg, addr: AddrG32Bne) -> ControlFlow<Done> {
2761        let result = unsafe { self.load_ne::<i8, crate::XLoad8S32G32Bne>(addr)? };
2762        self.state[dst].set_i32(result.into());
2763        ControlFlow::Continue(())
2764    }
2765
2766    fn xload16le_u32_g32bne(&mut self, dst: XReg, addr: AddrG32Bne) -> ControlFlow<Done> {
2767        let result = unsafe { self.load_ne::<u16, crate::XLoad16LeU32G32Bne>(addr)? };
2768        self.state[dst].set_u32(u16::from_le(result).into());
2769        ControlFlow::Continue(())
2770    }
2771
2772    fn xload16le_s32_g32bne(&mut self, dst: XReg, addr: AddrG32Bne) -> ControlFlow<Done> {
2773        let result = unsafe { self.load_ne::<i16, crate::XLoad16LeS32G32Bne>(addr)? };
2774        self.state[dst].set_i32(i16::from_le(result).into());
2775        ControlFlow::Continue(())
2776    }
2777
2778    fn xload32le_g32bne(&mut self, dst: XReg, addr: AddrG32Bne) -> ControlFlow<Done> {
2779        let result = unsafe { self.load_ne::<i32, crate::XLoad32LeG32Bne>(addr)? };
2780        self.state[dst].set_i32(i32::from_le(result));
2781        ControlFlow::Continue(())
2782    }
2783
2784    fn xload64le_g32bne(&mut self, dst: XReg, addr: AddrG32Bne) -> ControlFlow<Done> {
2785        let result = unsafe { self.load_ne::<i64, crate::XLoad64LeG32Bne>(addr)? };
2786        self.state[dst].set_i64(i64::from_le(result));
2787        ControlFlow::Continue(())
2788    }
2789
2790    fn xstore8_g32bne(&mut self, addr: AddrG32Bne, val: XReg) -> ControlFlow<Done> {
2791        let val = self.state[val].get_u32() as u8;
2792        unsafe {
2793            self.store_ne::<u8, crate::XStore8G32Bne>(addr, val)?;
2794        }
2795        ControlFlow::Continue(())
2796    }
2797
2798    fn xstore16le_g32bne(&mut self, addr: AddrG32Bne, val: XReg) -> ControlFlow<Done> {
2799        let val = self.state[val].get_u32() as u16;
2800        unsafe {
2801            self.store_ne::<u16, crate::XStore16LeG32Bne>(addr, val.to_le())?;
2802        }
2803        ControlFlow::Continue(())
2804    }
2805
2806    fn xstore32le_g32bne(&mut self, addr: AddrG32Bne, val: XReg) -> ControlFlow<Done> {
2807        let val = self.state[val].get_u32();
2808        unsafe {
2809            self.store_ne::<u32, crate::XStore32LeG32Bne>(addr, val.to_le())?;
2810        }
2811        ControlFlow::Continue(())
2812    }
2813
2814    fn xstore64le_g32bne(&mut self, addr: AddrG32Bne, val: XReg) -> ControlFlow<Done> {
2815        let val = self.state[val].get_u64();
2816        unsafe {
2817            self.store_ne::<u64, crate::XStore64LeG32Bne>(addr, val.to_le())?;
2818        }
2819        ControlFlow::Continue(())
2820    }
2821}
2822
2823impl ExtendedOpVisitor for Interpreter<'_> {
2824    fn nop(&mut self) -> ControlFlow<Done> {
2825        ControlFlow::Continue(())
2826    }
2827
2828    fn trap(&mut self) -> ControlFlow<Done> {
2829        self.done_trap::<crate::Trap>()
2830    }
2831
2832    fn call_indirect_host(&mut self, id: u8) -> ControlFlow<Done> {
2833        self.done_call_indirect_host(id)
2834    }
2835
2836    fn bswap32(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2837        let src = self.state[src].get_u32();
2838        self.state[dst].set_u32(src.swap_bytes());
2839        ControlFlow::Continue(())
2840    }
2841
2842    fn bswap64(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2843        let src = self.state[src].get_u64();
2844        self.state[dst].set_u64(src.swap_bytes());
2845        ControlFlow::Continue(())
2846    }
2847
2848    fn xbmask32(&mut self, dst: XReg, src: XReg) -> Self::Return {
2849        let a = self.state[src].get_u32();
2850        if a == 0 {
2851            self.state[dst].set_u32(0);
2852        } else {
2853            self.state[dst].set_i32(-1);
2854        }
2855        ControlFlow::Continue(())
2856    }
2857
2858    fn xbmask64(&mut self, dst: XReg, src: XReg) -> Self::Return {
2859        let a = self.state[src].get_u64();
2860        if a == 0 {
2861            self.state[dst].set_u64(0);
2862        } else {
2863            self.state[dst].set_i64(-1);
2864        }
2865        ControlFlow::Continue(())
2866    }
2867
2868    fn xadd32_uoverflow_trap(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2869        let a = self.state[operands.src1].get_u32();
2870        let b = self.state[operands.src2].get_u32();
2871        match a.checked_add(b) {
2872            Some(c) => {
2873                self.state[operands.dst].set_u32(c);
2874                ControlFlow::Continue(())
2875            }
2876            None => self.done_trap::<crate::Xadd32UoverflowTrap>(),
2877        }
2878    }
2879
2880    fn xadd64_uoverflow_trap(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2881        let a = self.state[operands.src1].get_u64();
2882        let b = self.state[operands.src2].get_u64();
2883        match a.checked_add(b) {
2884            Some(c) => {
2885                self.state[operands.dst].set_u64(c);
2886                ControlFlow::Continue(())
2887            }
2888            None => self.done_trap::<crate::Xadd64UoverflowTrap>(),
2889        }
2890    }
2891
2892    fn xmulhi64_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2893        let a = self.state[operands.src1].get_i64();
2894        let b = self.state[operands.src2].get_i64();
2895        let result = ((i128::from(a) * i128::from(b)) >> 64) as i64;
2896        self.state[operands.dst].set_i64(result);
2897        ControlFlow::Continue(())
2898    }
2899
2900    fn xmulhi64_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2901        let a = self.state[operands.src1].get_u64();
2902        let b = self.state[operands.src2].get_u64();
2903        let result = ((u128::from(a) * u128::from(b)) >> 64) as u64;
2904        self.state[operands.dst].set_u64(result);
2905        ControlFlow::Continue(())
2906    }
2907
2908    // =========================================================================
2909    // o32 addressing modes for big-endian X-registers
2910
2911    fn xload16be_u32_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done> {
2912        let result = unsafe { self.load_ne::<u16, crate::XLoad16BeU32O32>(addr)? };
2913        self.state[dst].set_u32(u16::from_be(result).into());
2914        ControlFlow::Continue(())
2915    }
2916
2917    fn xload16be_s32_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done> {
2918        let result = unsafe { self.load_ne::<i16, crate::XLoad16BeS32O32>(addr)? };
2919        self.state[dst].set_i32(i16::from_be(result).into());
2920        ControlFlow::Continue(())
2921    }
2922
2923    fn xload32be_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done> {
2924        let result = unsafe { self.load_ne::<i32, crate::XLoad32BeO32>(addr)? };
2925        self.state[dst].set_i32(i32::from_be(result));
2926        ControlFlow::Continue(())
2927    }
2928
2929    fn xload64be_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done> {
2930        let result = unsafe { self.load_ne::<i64, crate::XLoad64BeO32>(addr)? };
2931        self.state[dst].set_i64(i64::from_be(result));
2932        ControlFlow::Continue(())
2933    }
2934
2935    fn xstore16be_o32(&mut self, addr: AddrO32, val: XReg) -> ControlFlow<Done> {
2936        let val = self.state[val].get_u32() as u16;
2937        unsafe {
2938            self.store_ne::<u16, crate::XStore16BeO32>(addr, val.to_be())?;
2939        }
2940        ControlFlow::Continue(())
2941    }
2942
2943    fn xstore32be_o32(&mut self, addr: AddrO32, val: XReg) -> ControlFlow<Done> {
2944        let val = self.state[val].get_u32();
2945        unsafe {
2946            self.store_ne::<u32, crate::XStore32BeO32>(addr, val.to_be())?;
2947        }
2948        ControlFlow::Continue(())
2949    }
2950
2951    fn xstore64be_o32(&mut self, addr: AddrO32, val: XReg) -> ControlFlow<Done> {
2952        let val = self.state[val].get_u64();
2953        unsafe {
2954            self.store_ne::<u64, crate::XStore64BeO32>(addr, val.to_be())?;
2955        }
2956        ControlFlow::Continue(())
2957    }
2958
2959    // =========================================================================
2960    // o32 addressing modes for little-endian F-registers
2961
2962    fn fload32le_o32(&mut self, dst: FReg, addr: AddrO32) -> ControlFlow<Done> {
2963        let val = unsafe { self.load_ne::<u32, crate::Fload32LeO32>(addr)? };
2964        self.state[dst].set_f32(f32::from_bits(u32::from_le(val)));
2965        ControlFlow::Continue(())
2966    }
2967
2968    fn fload64le_o32(&mut self, dst: FReg, addr: AddrO32) -> ControlFlow<Done> {
2969        let val = unsafe { self.load_ne::<u64, crate::Fload64LeO32>(addr)? };
2970        self.state[dst].set_f64(f64::from_bits(u64::from_le(val)));
2971        ControlFlow::Continue(())
2972    }
2973
2974    fn fstore32le_o32(&mut self, addr: AddrO32, src: FReg) -> ControlFlow<Done> {
2975        let val = self.state[src].get_f32();
2976        unsafe {
2977            self.store_ne::<u32, crate::Fstore32LeO32>(addr, val.to_bits().to_le())?;
2978        }
2979        ControlFlow::Continue(())
2980    }
2981
2982    fn fstore64le_o32(&mut self, addr: AddrO32, src: FReg) -> ControlFlow<Done> {
2983        let val = self.state[src].get_f64();
2984        unsafe {
2985            self.store_ne::<u64, crate::Fstore64LeO32>(addr, val.to_bits().to_le())?;
2986        }
2987        ControlFlow::Continue(())
2988    }
2989
2990    // =========================================================================
2991    // o32 addressing modes for big-endian F-registers
2992
2993    fn fload32be_o32(&mut self, dst: FReg, addr: AddrO32) -> ControlFlow<Done> {
2994        let val = unsafe { self.load_ne::<u32, crate::Fload32BeO32>(addr)? };
2995        self.state[dst].set_f32(f32::from_bits(u32::from_be(val)));
2996        ControlFlow::Continue(())
2997    }
2998
2999    fn fload64be_o32(&mut self, dst: FReg, addr: AddrO32) -> ControlFlow<Done> {
3000        let val = unsafe { self.load_ne::<u64, crate::Fload64BeO32>(addr)? };
3001        self.state[dst].set_f64(f64::from_bits(u64::from_be(val)));
3002        ControlFlow::Continue(())
3003    }
3004
3005    fn fstore32be_o32(&mut self, addr: AddrO32, src: FReg) -> ControlFlow<Done> {
3006        let val = self.state[src].get_f32();
3007        unsafe {
3008            self.store_ne::<u32, crate::Fstore32BeO32>(addr, val.to_bits().to_be())?;
3009        }
3010        ControlFlow::Continue(())
3011    }
3012
3013    fn fstore64be_o32(&mut self, addr: AddrO32, src: FReg) -> ControlFlow<Done> {
3014        let val = self.state[src].get_f64();
3015        unsafe {
3016            self.store_ne::<u64, crate::Fstore64BeO32>(addr, val.to_bits().to_be())?;
3017        }
3018        ControlFlow::Continue(())
3019    }
3020
3021    // =========================================================================
3022    // z addressing modes for little-endian F-registers
3023
3024    fn fload32le_z(&mut self, dst: FReg, addr: AddrZ) -> ControlFlow<Done> {
3025        let val = unsafe { self.load_ne::<u32, crate::Fload32LeZ>(addr)? };
3026        self.state[dst].set_f32(f32::from_bits(u32::from_le(val)));
3027        ControlFlow::Continue(())
3028    }
3029
3030    fn fload64le_z(&mut self, dst: FReg, addr: AddrZ) -> ControlFlow<Done> {
3031        let val = unsafe { self.load_ne::<u64, crate::Fload64LeZ>(addr)? };
3032        self.state[dst].set_f64(f64::from_bits(u64::from_le(val)));
3033        ControlFlow::Continue(())
3034    }
3035
3036    fn fstore32le_z(&mut self, addr: AddrZ, src: FReg) -> ControlFlow<Done> {
3037        let val = self.state[src].get_f32();
3038        unsafe {
3039            self.store_ne::<u32, crate::Fstore32LeZ>(addr, val.to_bits().to_le())?;
3040        }
3041        ControlFlow::Continue(())
3042    }
3043
3044    fn fstore64le_z(&mut self, addr: AddrZ, src: FReg) -> ControlFlow<Done> {
3045        let val = self.state[src].get_f64();
3046        unsafe {
3047            self.store_ne::<u64, crate::Fstore64LeZ>(addr, val.to_bits().to_le())?;
3048        }
3049        ControlFlow::Continue(())
3050    }
3051
3052    // =========================================================================
3053    // g32 addressing modes for little-endian F-registers
3054
3055    fn fload32le_g32(&mut self, dst: FReg, addr: AddrG32) -> ControlFlow<Done> {
3056        let val = unsafe { self.load_ne::<u32, crate::Fload32LeG32>(addr)? };
3057        self.state[dst].set_f32(f32::from_bits(u32::from_le(val)));
3058        ControlFlow::Continue(())
3059    }
3060
3061    fn fload64le_g32(&mut self, dst: FReg, addr: AddrG32) -> ControlFlow<Done> {
3062        let val = unsafe { self.load_ne::<u64, crate::Fload64LeG32>(addr)? };
3063        self.state[dst].set_f64(f64::from_bits(u64::from_le(val)));
3064        ControlFlow::Continue(())
3065    }
3066
3067    fn fstore32le_g32(&mut self, addr: AddrG32, src: FReg) -> ControlFlow<Done> {
3068        let val = self.state[src].get_f32();
3069        unsafe {
3070            self.store_ne::<u32, crate::Fstore32LeG32>(addr, val.to_bits().to_le())?;
3071        }
3072        ControlFlow::Continue(())
3073    }
3074
3075    fn fstore64le_g32(&mut self, addr: AddrG32, src: FReg) -> ControlFlow<Done> {
3076        let val = self.state[src].get_f64();
3077        unsafe {
3078            self.store_ne::<u64, crate::Fstore64LeG32>(addr, val.to_bits().to_le())?;
3079        }
3080        ControlFlow::Continue(())
3081    }
3082
3083    // =========================================================================
3084    // o32 addressing modes for little-endian V-registers
3085
3086    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3087    fn vload128le_o32(&mut self, dst: VReg, addr: AddrO32) -> ControlFlow<Done> {
3088        let val = unsafe { self.load_ne::<u128, crate::VLoad128O32>(addr)? };
3089        self.state[dst].set_u128(u128::from_le(val));
3090        ControlFlow::Continue(())
3091    }
3092
3093    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3094    fn vstore128le_o32(&mut self, addr: AddrO32, src: VReg) -> ControlFlow<Done> {
3095        let val = self.state[src].get_u128();
3096        unsafe {
3097            self.store_ne::<u128, crate::Vstore128LeO32>(addr, val.to_le())?;
3098        }
3099        ControlFlow::Continue(())
3100    }
3101
3102    // =========================================================================
3103    // z addressing modes for little-endian V-registers
3104
3105    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3106    fn vload128le_z(&mut self, dst: VReg, addr: AddrZ) -> ControlFlow<Done> {
3107        let val = unsafe { self.load_ne::<u128, crate::VLoad128Z>(addr)? };
3108        self.state[dst].set_u128(u128::from_le(val));
3109        ControlFlow::Continue(())
3110    }
3111
3112    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3113    fn vstore128le_z(&mut self, addr: AddrZ, src: VReg) -> ControlFlow<Done> {
3114        let val = self.state[src].get_u128();
3115        unsafe {
3116            self.store_ne::<u128, crate::Vstore128LeZ>(addr, val.to_le())?;
3117        }
3118        ControlFlow::Continue(())
3119    }
3120
3121    // =========================================================================
3122    // g32 addressing modes for little-endian V-registers
3123
3124    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3125    fn vload128le_g32(&mut self, dst: VReg, addr: AddrG32) -> ControlFlow<Done> {
3126        let val = unsafe { self.load_ne::<u128, crate::VLoad128G32>(addr)? };
3127        self.state[dst].set_u128(u128::from_le(val));
3128        ControlFlow::Continue(())
3129    }
3130
3131    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3132    fn vstore128le_g32(&mut self, addr: AddrG32, src: VReg) -> ControlFlow<Done> {
3133        let val = self.state[src].get_u128();
3134        unsafe {
3135            self.store_ne::<u128, crate::Vstore128LeG32>(addr, val.to_le())?;
3136        }
3137        ControlFlow::Continue(())
3138    }
3139
3140    fn xmov_fp(&mut self, dst: XReg) -> ControlFlow<Done> {
3141        let fp = self.state.fp;
3142        self.state[dst].set_ptr(fp);
3143        ControlFlow::Continue(())
3144    }
3145
3146    fn xmov_lr(&mut self, dst: XReg) -> ControlFlow<Done> {
3147        let lr = self.state.lr;
3148        self.state[dst].set_ptr(lr);
3149        ControlFlow::Continue(())
3150    }
3151
3152    fn fmov(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3153        let val = self.state[src];
3154        self.state[dst] = val;
3155        ControlFlow::Continue(())
3156    }
3157
3158    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3159    fn vmov(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3160        let val = self.state[src];
3161        self.state[dst] = val;
3162        ControlFlow::Continue(())
3163    }
3164
3165    fn fconst32(&mut self, dst: FReg, bits: u32) -> ControlFlow<Done> {
3166        self.state[dst].set_f32(f32::from_bits(bits));
3167        ControlFlow::Continue(())
3168    }
3169
3170    fn fconst64(&mut self, dst: FReg, bits: u64) -> ControlFlow<Done> {
3171        self.state[dst].set_f64(f64::from_bits(bits));
3172        ControlFlow::Continue(())
3173    }
3174
3175    fn bitcast_int_from_float_32(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3176        let val = self.state[src].get_f32();
3177        self.state[dst].set_u32(val.to_bits());
3178        ControlFlow::Continue(())
3179    }
3180
3181    fn bitcast_int_from_float_64(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3182        let val = self.state[src].get_f64();
3183        self.state[dst].set_u64(val.to_bits());
3184        ControlFlow::Continue(())
3185    }
3186
3187    fn bitcast_float_from_int_32(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done> {
3188        let val = self.state[src].get_u32();
3189        self.state[dst].set_f32(f32::from_bits(val));
3190        ControlFlow::Continue(())
3191    }
3192
3193    fn bitcast_float_from_int_64(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done> {
3194        let val = self.state[src].get_u64();
3195        self.state[dst].set_f64(f64::from_bits(val));
3196        ControlFlow::Continue(())
3197    }
3198
3199    fn feq32(&mut self, dst: XReg, src1: FReg, src2: FReg) -> ControlFlow<Done> {
3200        let a = self.state[src1].get_f32();
3201        let b = self.state[src2].get_f32();
3202        self.state[dst].set_u32(u32::from(a == b));
3203        ControlFlow::Continue(())
3204    }
3205
3206    fn fneq32(&mut self, dst: XReg, src1: FReg, src2: FReg) -> ControlFlow<Done> {
3207        let a = self.state[src1].get_f32();
3208        let b = self.state[src2].get_f32();
3209        self.state[dst].set_u32(u32::from(a != b));
3210        ControlFlow::Continue(())
3211    }
3212
3213    fn flt32(&mut self, dst: XReg, src1: FReg, src2: FReg) -> ControlFlow<Done> {
3214        let a = self.state[src1].get_f32();
3215        let b = self.state[src2].get_f32();
3216        self.state[dst].set_u32(u32::from(a < b));
3217        ControlFlow::Continue(())
3218    }
3219
3220    fn flteq32(&mut self, dst: XReg, src1: FReg, src2: FReg) -> ControlFlow<Done> {
3221        let a = self.state[src1].get_f32();
3222        let b = self.state[src2].get_f32();
3223        self.state[dst].set_u32(u32::from(a <= b));
3224        ControlFlow::Continue(())
3225    }
3226
3227    fn feq64(&mut self, dst: XReg, src1: FReg, src2: FReg) -> ControlFlow<Done> {
3228        let a = self.state[src1].get_f64();
3229        let b = self.state[src2].get_f64();
3230        self.state[dst].set_u32(u32::from(a == b));
3231        ControlFlow::Continue(())
3232    }
3233
3234    fn fneq64(&mut self, dst: XReg, src1: FReg, src2: FReg) -> ControlFlow<Done> {
3235        let a = self.state[src1].get_f64();
3236        let b = self.state[src2].get_f64();
3237        self.state[dst].set_u32(u32::from(a != b));
3238        ControlFlow::Continue(())
3239    }
3240
3241    fn flt64(&mut self, dst: XReg, src1: FReg, src2: FReg) -> ControlFlow<Done> {
3242        let a = self.state[src1].get_f64();
3243        let b = self.state[src2].get_f64();
3244        self.state[dst].set_u32(u32::from(a < b));
3245        ControlFlow::Continue(())
3246    }
3247
3248    fn flteq64(&mut self, dst: XReg, src1: FReg, src2: FReg) -> ControlFlow<Done> {
3249        let a = self.state[src1].get_f64();
3250        let b = self.state[src2].get_f64();
3251        self.state[dst].set_u32(u32::from(a <= b));
3252        ControlFlow::Continue(())
3253    }
3254
3255    fn fselect32(
3256        &mut self,
3257        dst: FReg,
3258        cond: XReg,
3259        if_nonzero: FReg,
3260        if_zero: FReg,
3261    ) -> ControlFlow<Done> {
3262        let result = if self.state[cond].get_u32() != 0 {
3263            self.state[if_nonzero].get_f32()
3264        } else {
3265            self.state[if_zero].get_f32()
3266        };
3267        self.state[dst].set_f32(result);
3268        ControlFlow::Continue(())
3269    }
3270
3271    fn fselect64(
3272        &mut self,
3273        dst: FReg,
3274        cond: XReg,
3275        if_nonzero: FReg,
3276        if_zero: FReg,
3277    ) -> ControlFlow<Done> {
3278        let result = if self.state[cond].get_u32() != 0 {
3279            self.state[if_nonzero].get_f64()
3280        } else {
3281            self.state[if_zero].get_f64()
3282        };
3283        self.state[dst].set_f64(result);
3284        ControlFlow::Continue(())
3285    }
3286
3287    fn f32_from_x32_s(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done> {
3288        let a = self.state[src].get_i32();
3289        self.state[dst].set_f32(a as f32);
3290        ControlFlow::Continue(())
3291    }
3292
3293    fn f32_from_x32_u(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done> {
3294        let a = self.state[src].get_u32();
3295        self.state[dst].set_f32(a as f32);
3296        ControlFlow::Continue(())
3297    }
3298
3299    fn f32_from_x64_s(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done> {
3300        let a = self.state[src].get_i64();
3301        self.state[dst].set_f32(a as f32);
3302        ControlFlow::Continue(())
3303    }
3304
3305    fn f32_from_x64_u(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done> {
3306        let a = self.state[src].get_u64();
3307        self.state[dst].set_f32(a as f32);
3308        ControlFlow::Continue(())
3309    }
3310
3311    fn f64_from_x32_s(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done> {
3312        let a = self.state[src].get_i32();
3313        self.state[dst].set_f64(a as f64);
3314        ControlFlow::Continue(())
3315    }
3316
3317    fn f64_from_x32_u(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done> {
3318        let a = self.state[src].get_u32();
3319        self.state[dst].set_f64(a as f64);
3320        ControlFlow::Continue(())
3321    }
3322
3323    fn f64_from_x64_s(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done> {
3324        let a = self.state[src].get_i64();
3325        self.state[dst].set_f64(a as f64);
3326        ControlFlow::Continue(())
3327    }
3328
3329    fn f64_from_x64_u(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done> {
3330        let a = self.state[src].get_u64();
3331        self.state[dst].set_f64(a as f64);
3332        ControlFlow::Continue(())
3333    }
3334
3335    fn x32_from_f32_s(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3336        let a = self.state[src].get_f32();
3337        self.check_xnn_from_f32::<crate::X32FromF32S>(a, f32_cvt_to_int_bounds(true, 32))?;
3338        self.state[dst].set_i32(a as i32);
3339        ControlFlow::Continue(())
3340    }
3341
3342    fn x32_from_f32_u(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3343        let a = self.state[src].get_f32();
3344        self.check_xnn_from_f32::<crate::X32FromF32U>(a, f32_cvt_to_int_bounds(false, 32))?;
3345        self.state[dst].set_u32(a as u32);
3346        ControlFlow::Continue(())
3347    }
3348
3349    fn x64_from_f32_s(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3350        let a = self.state[src].get_f32();
3351        self.check_xnn_from_f32::<crate::X64FromF32S>(a, f32_cvt_to_int_bounds(true, 64))?;
3352        self.state[dst].set_i64(a as i64);
3353        ControlFlow::Continue(())
3354    }
3355
3356    fn x64_from_f32_u(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3357        let a = self.state[src].get_f32();
3358        self.check_xnn_from_f32::<crate::X64FromF32U>(a, f32_cvt_to_int_bounds(false, 64))?;
3359        self.state[dst].set_u64(a as u64);
3360        ControlFlow::Continue(())
3361    }
3362
3363    fn x32_from_f64_s(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3364        let a = self.state[src].get_f64();
3365        self.check_xnn_from_f64::<crate::X32FromF64S>(a, f64_cvt_to_int_bounds(true, 32))?;
3366        self.state[dst].set_i32(a as i32);
3367        ControlFlow::Continue(())
3368    }
3369
3370    fn x32_from_f64_u(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3371        let a = self.state[src].get_f64();
3372        self.check_xnn_from_f64::<crate::X32FromF64U>(a, f64_cvt_to_int_bounds(false, 32))?;
3373        self.state[dst].set_u32(a as u32);
3374        ControlFlow::Continue(())
3375    }
3376
3377    fn x64_from_f64_s(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3378        let a = self.state[src].get_f64();
3379        self.check_xnn_from_f64::<crate::X64FromF64S>(a, f64_cvt_to_int_bounds(true, 64))?;
3380        self.state[dst].set_i64(a as i64);
3381        ControlFlow::Continue(())
3382    }
3383
3384    fn x64_from_f64_u(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3385        let a = self.state[src].get_f64();
3386        self.check_xnn_from_f64::<crate::X64FromF64U>(a, f64_cvt_to_int_bounds(false, 64))?;
3387        self.state[dst].set_u64(a as u64);
3388        ControlFlow::Continue(())
3389    }
3390
3391    fn x32_from_f32_s_sat(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3392        let a = self.state[src].get_f32();
3393        self.state[dst].set_i32(a as i32);
3394        ControlFlow::Continue(())
3395    }
3396
3397    fn x32_from_f32_u_sat(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3398        let a = self.state[src].get_f32();
3399        self.state[dst].set_u32(a as u32);
3400        ControlFlow::Continue(())
3401    }
3402
3403    fn x64_from_f32_s_sat(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3404        let a = self.state[src].get_f32();
3405        self.state[dst].set_i64(a as i64);
3406        ControlFlow::Continue(())
3407    }
3408
3409    fn x64_from_f32_u_sat(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3410        let a = self.state[src].get_f32();
3411        self.state[dst].set_u64(a as u64);
3412        ControlFlow::Continue(())
3413    }
3414
3415    fn x32_from_f64_s_sat(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3416        let a = self.state[src].get_f64();
3417        self.state[dst].set_i32(a as i32);
3418        ControlFlow::Continue(())
3419    }
3420
3421    fn x32_from_f64_u_sat(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3422        let a = self.state[src].get_f64();
3423        self.state[dst].set_u32(a as u32);
3424        ControlFlow::Continue(())
3425    }
3426
3427    fn x64_from_f64_s_sat(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3428        let a = self.state[src].get_f64();
3429        self.state[dst].set_i64(a as i64);
3430        ControlFlow::Continue(())
3431    }
3432
3433    fn x64_from_f64_u_sat(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3434        let a = self.state[src].get_f64();
3435        self.state[dst].set_u64(a as u64);
3436        ControlFlow::Continue(())
3437    }
3438
3439    fn f32_from_f64(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3440        let a = self.state[src].get_f64();
3441        self.state[dst].set_f32(a as f32);
3442        ControlFlow::Continue(())
3443    }
3444
3445    fn f64_from_f32(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3446        let a = self.state[src].get_f32();
3447        self.state[dst].set_f64(a.into());
3448        ControlFlow::Continue(())
3449    }
3450
3451    fn fcopysign32(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3452        let a = self.state[operands.src1].get_f32();
3453        let b = self.state[operands.src2].get_f32();
3454        self.state[operands.dst].set_f32(a.wasm_copysign(b));
3455        ControlFlow::Continue(())
3456    }
3457
3458    fn fcopysign64(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3459        let a = self.state[operands.src1].get_f64();
3460        let b = self.state[operands.src2].get_f64();
3461        self.state[operands.dst].set_f64(a.wasm_copysign(b));
3462        ControlFlow::Continue(())
3463    }
3464
3465    fn fadd32(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3466        let a = self.state[operands.src1].get_f32();
3467        let b = self.state[operands.src2].get_f32();
3468        self.state[operands.dst].set_f32(a + b);
3469        ControlFlow::Continue(())
3470    }
3471
3472    fn fsub32(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3473        let a = self.state[operands.src1].get_f32();
3474        let b = self.state[operands.src2].get_f32();
3475        self.state[operands.dst].set_f32(a - b);
3476        ControlFlow::Continue(())
3477    }
3478
3479    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3480    fn vsubf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3481        let mut a = self.state[operands.src1].get_f32x4();
3482        let b = self.state[operands.src2].get_f32x4();
3483        for (a, b) in a.iter_mut().zip(b) {
3484            *a = *a - b;
3485        }
3486        self.state[operands.dst].set_f32x4(a);
3487        ControlFlow::Continue(())
3488    }
3489
3490    fn fmul32(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3491        let a = self.state[operands.src1].get_f32();
3492        let b = self.state[operands.src2].get_f32();
3493        self.state[operands.dst].set_f32(a * b);
3494        ControlFlow::Continue(())
3495    }
3496
3497    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3498    fn vmulf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3499        let mut a = self.state[operands.src1].get_f32x4();
3500        let b = self.state[operands.src2].get_f32x4();
3501        for (a, b) in a.iter_mut().zip(b) {
3502            *a = *a * b;
3503        }
3504        self.state[operands.dst].set_f32x4(a);
3505        ControlFlow::Continue(())
3506    }
3507
3508    fn fdiv32(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3509        let a = self.state[operands.src1].get_f32();
3510        let b = self.state[operands.src2].get_f32();
3511        self.state[operands.dst].set_f32(a / b);
3512        ControlFlow::Continue(())
3513    }
3514
3515    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3516    fn vdivf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3517        let a = self.state[operands.src1].get_f32x4();
3518        let b = self.state[operands.src2].get_f32x4();
3519        let mut result = [0.0f32; 4];
3520
3521        for i in 0..4 {
3522            result[i] = a[i] / b[i];
3523        }
3524
3525        self.state[operands.dst].set_f32x4(result);
3526        ControlFlow::Continue(())
3527    }
3528
3529    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3530    fn vdivf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3531        let a = self.state[operands.src1].get_f64x2();
3532        let b = self.state[operands.src2].get_f64x2();
3533        let mut result = [0.0f64; 2];
3534
3535        for i in 0..2 {
3536            result[i] = a[i] / b[i];
3537        }
3538
3539        self.state[operands.dst].set_f64x2(result);
3540        ControlFlow::Continue(())
3541    }
3542
3543    fn fmaximum32(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3544        let a = self.state[operands.src1].get_f32();
3545        let b = self.state[operands.src2].get_f32();
3546        self.state[operands.dst].set_f32(a.wasm_maximum(b));
3547        ControlFlow::Continue(())
3548    }
3549
3550    fn fminimum32(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3551        let a = self.state[operands.src1].get_f32();
3552        let b = self.state[operands.src2].get_f32();
3553        self.state[operands.dst].set_f32(a.wasm_minimum(b));
3554        ControlFlow::Continue(())
3555    }
3556
3557    fn ftrunc32(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3558        let a = self.state[src].get_f32();
3559        self.state[dst].set_f32(a.wasm_trunc());
3560        ControlFlow::Continue(())
3561    }
3562
3563    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3564    fn vtrunc32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3565        let mut a = self.state[src].get_f32x4();
3566        for elem in a.iter_mut() {
3567            *elem = elem.wasm_trunc();
3568        }
3569        self.state[dst].set_f32x4(a);
3570        ControlFlow::Continue(())
3571    }
3572
3573    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3574    fn vtrunc64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3575        let mut a = self.state[src].get_f64x2();
3576        for elem in a.iter_mut() {
3577            *elem = elem.wasm_trunc();
3578        }
3579        self.state[dst].set_f64x2(a);
3580        ControlFlow::Continue(())
3581    }
3582
3583    fn ffloor32(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3584        let a = self.state[src].get_f32();
3585        self.state[dst].set_f32(a.wasm_floor());
3586        ControlFlow::Continue(())
3587    }
3588
3589    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3590    fn vfloor32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3591        let mut a = self.state[src].get_f32x4();
3592        for elem in a.iter_mut() {
3593            *elem = elem.wasm_floor();
3594        }
3595        self.state[dst].set_f32x4(a);
3596        ControlFlow::Continue(())
3597    }
3598
3599    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3600    fn vfloor64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3601        let mut a = self.state[src].get_f64x2();
3602        for elem in a.iter_mut() {
3603            *elem = elem.wasm_floor();
3604        }
3605        self.state[dst].set_f64x2(a);
3606        ControlFlow::Continue(())
3607    }
3608
3609    fn fceil32(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3610        let a = self.state[src].get_f32();
3611        self.state[dst].set_f32(a.wasm_ceil());
3612        ControlFlow::Continue(())
3613    }
3614
3615    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3616    fn vceil32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3617        let mut a = self.state[src].get_f32x4();
3618        for elem in a.iter_mut() {
3619            *elem = elem.wasm_ceil();
3620        }
3621        self.state[dst].set_f32x4(a);
3622
3623        ControlFlow::Continue(())
3624    }
3625
3626    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3627    fn vceil64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3628        let mut a = self.state[src].get_f64x2();
3629        for elem in a.iter_mut() {
3630            *elem = elem.wasm_ceil();
3631        }
3632        self.state[dst].set_f64x2(a);
3633
3634        ControlFlow::Continue(())
3635    }
3636
3637    fn fnearest32(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3638        let a = self.state[src].get_f32();
3639        self.state[dst].set_f32(a.wasm_nearest());
3640        ControlFlow::Continue(())
3641    }
3642
3643    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3644    fn vnearest32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3645        let mut a = self.state[src].get_f32x4();
3646        for elem in a.iter_mut() {
3647            *elem = elem.wasm_nearest();
3648        }
3649        self.state[dst].set_f32x4(a);
3650        ControlFlow::Continue(())
3651    }
3652
3653    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3654    fn vnearest64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3655        let mut a = self.state[src].get_f64x2();
3656        for elem in a.iter_mut() {
3657            *elem = elem.wasm_nearest();
3658        }
3659        self.state[dst].set_f64x2(a);
3660        ControlFlow::Continue(())
3661    }
3662
3663    fn fsqrt32(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3664        let a = self.state[src].get_f32();
3665        self.state[dst].set_f32(a.wasm_sqrt());
3666        ControlFlow::Continue(())
3667    }
3668
3669    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3670    fn vsqrt32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3671        let mut a = self.state[src].get_f32x4();
3672        for elem in a.iter_mut() {
3673            *elem = elem.wasm_sqrt();
3674        }
3675        self.state[dst].set_f32x4(a);
3676        ControlFlow::Continue(())
3677    }
3678
3679    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3680    fn vsqrt64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3681        let mut a = self.state[src].get_f64x2();
3682        for elem in a.iter_mut() {
3683            *elem = elem.wasm_sqrt();
3684        }
3685        self.state[dst].set_f64x2(a);
3686        ControlFlow::Continue(())
3687    }
3688
3689    fn fneg32(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3690        let a = self.state[src].get_f32();
3691        self.state[dst].set_f32(-a);
3692        ControlFlow::Continue(())
3693    }
3694
3695    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3696    fn vnegf32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3697        let mut a = self.state[src].get_f32x4();
3698        for elem in a.iter_mut() {
3699            *elem = -*elem;
3700        }
3701        self.state[dst].set_f32x4(a);
3702        ControlFlow::Continue(())
3703    }
3704
3705    fn fabs32(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3706        let a = self.state[src].get_f32();
3707        self.state[dst].set_f32(a.wasm_abs());
3708        ControlFlow::Continue(())
3709    }
3710
3711    fn fadd64(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3712        let a = self.state[operands.src1].get_f64();
3713        let b = self.state[operands.src2].get_f64();
3714        self.state[operands.dst].set_f64(a + b);
3715        ControlFlow::Continue(())
3716    }
3717
3718    fn fsub64(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3719        let a = self.state[operands.src1].get_f64();
3720        let b = self.state[operands.src2].get_f64();
3721        self.state[operands.dst].set_f64(a - b);
3722        ControlFlow::Continue(())
3723    }
3724
3725    fn fmul64(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3726        let a = self.state[operands.src1].get_f64();
3727        let b = self.state[operands.src2].get_f64();
3728        self.state[operands.dst].set_f64(a * b);
3729        ControlFlow::Continue(())
3730    }
3731
3732    fn fdiv64(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3733        let a = self.state[operands.src1].get_f64();
3734        let b = self.state[operands.src2].get_f64();
3735        self.state[operands.dst].set_f64(a / b);
3736        ControlFlow::Continue(())
3737    }
3738
3739    fn fmaximum64(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3740        let a = self.state[operands.src1].get_f64();
3741        let b = self.state[operands.src2].get_f64();
3742        self.state[operands.dst].set_f64(a.wasm_maximum(b));
3743        ControlFlow::Continue(())
3744    }
3745
3746    fn fminimum64(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3747        let a = self.state[operands.src1].get_f64();
3748        let b = self.state[operands.src2].get_f64();
3749        self.state[operands.dst].set_f64(a.wasm_minimum(b));
3750        ControlFlow::Continue(())
3751    }
3752
3753    fn ftrunc64(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3754        let a = self.state[src].get_f64();
3755        self.state[dst].set_f64(a.wasm_trunc());
3756        ControlFlow::Continue(())
3757    }
3758
3759    fn ffloor64(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3760        let a = self.state[src].get_f64();
3761        self.state[dst].set_f64(a.wasm_floor());
3762        ControlFlow::Continue(())
3763    }
3764
3765    fn fceil64(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3766        let a = self.state[src].get_f64();
3767        self.state[dst].set_f64(a.wasm_ceil());
3768        ControlFlow::Continue(())
3769    }
3770
3771    fn fnearest64(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3772        let a = self.state[src].get_f64();
3773        self.state[dst].set_f64(a.wasm_nearest());
3774        ControlFlow::Continue(())
3775    }
3776
3777    fn fsqrt64(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3778        let a = self.state[src].get_f64();
3779        self.state[dst].set_f64(a.wasm_sqrt());
3780        ControlFlow::Continue(())
3781    }
3782
3783    fn fneg64(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3784        let a = self.state[src].get_f64();
3785        self.state[dst].set_f64(-a);
3786        ControlFlow::Continue(())
3787    }
3788
3789    fn fabs64(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3790        let a = self.state[src].get_f64();
3791        self.state[dst].set_f64(a.wasm_abs());
3792        ControlFlow::Continue(())
3793    }
3794
3795    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3796    fn vaddi8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3797        let mut a = self.state[operands.src1].get_i8x16();
3798        let b = self.state[operands.src2].get_i8x16();
3799        for (a, b) in a.iter_mut().zip(b) {
3800            *a = a.wrapping_add(b);
3801        }
3802        self.state[operands.dst].set_i8x16(a);
3803        ControlFlow::Continue(())
3804    }
3805
3806    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3807    fn vaddi16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3808        let mut a = self.state[operands.src1].get_i16x8();
3809        let b = self.state[operands.src2].get_i16x8();
3810        for (a, b) in a.iter_mut().zip(b) {
3811            *a = a.wrapping_add(b);
3812        }
3813        self.state[operands.dst].set_i16x8(a);
3814        ControlFlow::Continue(())
3815    }
3816
3817    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3818    fn vaddi32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3819        let mut a = self.state[operands.src1].get_i32x4();
3820        let b = self.state[operands.src2].get_i32x4();
3821        for (a, b) in a.iter_mut().zip(b) {
3822            *a = a.wrapping_add(b);
3823        }
3824        self.state[operands.dst].set_i32x4(a);
3825        ControlFlow::Continue(())
3826    }
3827
3828    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3829    fn vaddi64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3830        let mut a = self.state[operands.src1].get_i64x2();
3831        let b = self.state[operands.src2].get_i64x2();
3832        for (a, b) in a.iter_mut().zip(b) {
3833            *a = a.wrapping_add(b);
3834        }
3835        self.state[operands.dst].set_i64x2(a);
3836        ControlFlow::Continue(())
3837    }
3838
3839    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3840    fn vaddf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3841        let mut a = self.state[operands.src1].get_f32x4();
3842        let b = self.state[operands.src2].get_f32x4();
3843        for (a, b) in a.iter_mut().zip(b) {
3844            *a += b;
3845        }
3846        self.state[operands.dst].set_f32x4(a);
3847        ControlFlow::Continue(())
3848    }
3849
3850    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3851    fn vaddf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3852        let mut a = self.state[operands.src1].get_f64x2();
3853        let b = self.state[operands.src2].get_f64x2();
3854        for (a, b) in a.iter_mut().zip(b) {
3855            *a += b;
3856        }
3857        self.state[operands.dst].set_f64x2(a);
3858        ControlFlow::Continue(())
3859    }
3860
3861    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3862    fn vaddi8x16_sat(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3863        let mut a = self.state[operands.src1].get_i8x16();
3864        let b = self.state[operands.src2].get_i8x16();
3865        for (a, b) in a.iter_mut().zip(b) {
3866            *a = (*a).saturating_add(b);
3867        }
3868        self.state[operands.dst].set_i8x16(a);
3869        ControlFlow::Continue(())
3870    }
3871
3872    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3873    fn vaddu8x16_sat(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3874        let mut a = self.state[operands.src1].get_u8x16();
3875        let b = self.state[operands.src2].get_u8x16();
3876        for (a, b) in a.iter_mut().zip(b) {
3877            *a = (*a).saturating_add(b);
3878        }
3879        self.state[operands.dst].set_u8x16(a);
3880        ControlFlow::Continue(())
3881    }
3882
3883    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3884    fn vaddi16x8_sat(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3885        let mut a = self.state[operands.src1].get_i16x8();
3886        let b = self.state[operands.src2].get_i16x8();
3887        for (a, b) in a.iter_mut().zip(b) {
3888            *a = (*a).saturating_add(b);
3889        }
3890        self.state[operands.dst].set_i16x8(a);
3891        ControlFlow::Continue(())
3892    }
3893
3894    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3895    fn vaddu16x8_sat(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3896        let mut a = self.state[operands.src1].get_u16x8();
3897        let b = self.state[operands.src2].get_u16x8();
3898        for (a, b) in a.iter_mut().zip(b) {
3899            *a = (*a).saturating_add(b);
3900        }
3901        self.state[operands.dst].set_u16x8(a);
3902        ControlFlow::Continue(())
3903    }
3904
3905    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3906    fn vaddpairwisei16x8_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3907        let a = self.state[operands.src1].get_i16x8();
3908        let b = self.state[operands.src2].get_i16x8();
3909        let mut result = [0i16; 8];
3910        let half = result.len() / 2;
3911        for i in 0..half {
3912            result[i] = a[2 * i].wrapping_add(a[2 * i + 1]);
3913            result[i + half] = b[2 * i].wrapping_add(b[2 * i + 1]);
3914        }
3915        self.state[operands.dst].set_i16x8(result);
3916        ControlFlow::Continue(())
3917    }
3918
3919    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3920    fn vaddpairwisei32x4_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3921        let a = self.state[operands.src1].get_i32x4();
3922        let b = self.state[operands.src2].get_i32x4();
3923        let mut result = [0i32; 4];
3924        result[0] = a[0].wrapping_add(a[1]);
3925        result[1] = a[2].wrapping_add(a[3]);
3926        result[2] = b[0].wrapping_add(b[1]);
3927        result[3] = b[2].wrapping_add(b[3]);
3928        self.state[operands.dst].set_i32x4(result);
3929        ControlFlow::Continue(())
3930    }
3931
3932    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3933    fn vshli8x16(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
3934        let a = self.state[operands.src1].get_i8x16();
3935        let b = self.state[operands.src2].get_u32();
3936        self.state[operands.dst].set_i8x16(a.map(|a| a.wrapping_shl(b)));
3937        ControlFlow::Continue(())
3938    }
3939
3940    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3941    fn vshli16x8(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
3942        let a = self.state[operands.src1].get_i16x8();
3943        let b = self.state[operands.src2].get_u32();
3944        self.state[operands.dst].set_i16x8(a.map(|a| a.wrapping_shl(b)));
3945        ControlFlow::Continue(())
3946    }
3947
3948    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3949    fn vshli32x4(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
3950        let a = self.state[operands.src1].get_i32x4();
3951        let b = self.state[operands.src2].get_u32();
3952        self.state[operands.dst].set_i32x4(a.map(|a| a.wrapping_shl(b)));
3953        ControlFlow::Continue(())
3954    }
3955
3956    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3957    fn vshli64x2(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
3958        let a = self.state[operands.src1].get_i64x2();
3959        let b = self.state[operands.src2].get_u32();
3960        self.state[operands.dst].set_i64x2(a.map(|a| a.wrapping_shl(b)));
3961        ControlFlow::Continue(())
3962    }
3963
3964    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3965    fn vshri8x16_s(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
3966        let a = self.state[operands.src1].get_i8x16();
3967        let b = self.state[operands.src2].get_u32();
3968        self.state[operands.dst].set_i8x16(a.map(|a| a.wrapping_shr(b)));
3969        ControlFlow::Continue(())
3970    }
3971
3972    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3973    fn vshri16x8_s(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
3974        let a = self.state[operands.src1].get_i16x8();
3975        let b = self.state[operands.src2].get_u32();
3976        self.state[operands.dst].set_i16x8(a.map(|a| a.wrapping_shr(b)));
3977        ControlFlow::Continue(())
3978    }
3979
3980    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3981    fn vshri32x4_s(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
3982        let a = self.state[operands.src1].get_i32x4();
3983        let b = self.state[operands.src2].get_u32();
3984        self.state[operands.dst].set_i32x4(a.map(|a| a.wrapping_shr(b)));
3985        ControlFlow::Continue(())
3986    }
3987
3988    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3989    fn vshri64x2_s(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
3990        let a = self.state[operands.src1].get_i64x2();
3991        let b = self.state[operands.src2].get_u32();
3992        self.state[operands.dst].set_i64x2(a.map(|a| a.wrapping_shr(b)));
3993        ControlFlow::Continue(())
3994    }
3995
3996    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3997    fn vshri8x16_u(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
3998        let a = self.state[operands.src1].get_u8x16();
3999        let b = self.state[operands.src2].get_u32();
4000        self.state[operands.dst].set_u8x16(a.map(|a| a.wrapping_shr(b)));
4001        ControlFlow::Continue(())
4002    }
4003
4004    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4005    fn vshri16x8_u(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
4006        let a = self.state[operands.src1].get_u16x8();
4007        let b = self.state[operands.src2].get_u32();
4008        self.state[operands.dst].set_u16x8(a.map(|a| a.wrapping_shr(b)));
4009        ControlFlow::Continue(())
4010    }
4011
4012    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4013    fn vshri32x4_u(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
4014        let a = self.state[operands.src1].get_u32x4();
4015        let b = self.state[operands.src2].get_u32();
4016        self.state[operands.dst].set_u32x4(a.map(|a| a.wrapping_shr(b)));
4017        ControlFlow::Continue(())
4018    }
4019
4020    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4021    fn vshri64x2_u(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
4022        let a = self.state[operands.src1].get_u64x2();
4023        let b = self.state[operands.src2].get_u32();
4024        self.state[operands.dst].set_u64x2(a.map(|a| a.wrapping_shr(b)));
4025        ControlFlow::Continue(())
4026    }
4027
4028    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4029    fn vconst128(&mut self, dst: VReg, val: u128) -> ControlFlow<Done> {
4030        self.state[dst].set_u128(val);
4031        ControlFlow::Continue(())
4032    }
4033
4034    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4035    fn vsplatx8(&mut self, dst: VReg, src: XReg) -> ControlFlow<Done> {
4036        let val = self.state[src].get_u32() as u8;
4037        self.state[dst].set_u8x16([val; 16]);
4038        ControlFlow::Continue(())
4039    }
4040
4041    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4042    fn vsplatx16(&mut self, dst: VReg, src: XReg) -> ControlFlow<Done> {
4043        let val = self.state[src].get_u32() as u16;
4044        self.state[dst].set_u16x8([val; 8]);
4045        ControlFlow::Continue(())
4046    }
4047
4048    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4049    fn vsplatx32(&mut self, dst: VReg, src: XReg) -> ControlFlow<Done> {
4050        let val = self.state[src].get_u32();
4051        self.state[dst].set_u32x4([val; 4]);
4052        ControlFlow::Continue(())
4053    }
4054
4055    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4056    fn vsplatx64(&mut self, dst: VReg, src: XReg) -> ControlFlow<Done> {
4057        let val = self.state[src].get_u64();
4058        self.state[dst].set_u64x2([val; 2]);
4059        ControlFlow::Continue(())
4060    }
4061
4062    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4063    fn vsplatf32(&mut self, dst: VReg, src: FReg) -> ControlFlow<Done> {
4064        let val = self.state[src].get_f32();
4065        self.state[dst].set_f32x4([val; 4]);
4066        ControlFlow::Continue(())
4067    }
4068
4069    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4070    fn vsplatf64(&mut self, dst: VReg, src: FReg) -> ControlFlow<Done> {
4071        let val = self.state[src].get_f64();
4072        self.state[dst].set_f64x2([val; 2]);
4073        ControlFlow::Continue(())
4074    }
4075
4076    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4077    fn vload8x8_s_z(&mut self, dst: VReg, addr: AddrZ) -> ControlFlow<Done> {
4078        let val = unsafe { self.load_ne::<[i8; 8], crate::VLoad8x8SZ>(addr)? };
4079        self.state[dst].set_i16x8(val.map(|i| i.into()));
4080        ControlFlow::Continue(())
4081    }
4082
4083    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4084    fn vload8x8_u_z(&mut self, dst: VReg, addr: AddrZ) -> ControlFlow<Done> {
4085        let val = unsafe { self.load_ne::<[u8; 8], crate::VLoad8x8UZ>(addr)? };
4086        self.state[dst].set_u16x8(val.map(|i| i.into()));
4087        ControlFlow::Continue(())
4088    }
4089
4090    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4091    fn vload16x4le_s_z(&mut self, dst: VReg, addr: AddrZ) -> ControlFlow<Done> {
4092        let val = unsafe { self.load_ne::<[i16; 4], crate::VLoad16x4LeSZ>(addr)? };
4093        self.state[dst].set_i32x4(val.map(|i| i16::from_le(i).into()));
4094        ControlFlow::Continue(())
4095    }
4096
4097    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4098    fn vload16x4le_u_z(&mut self, dst: VReg, addr: AddrZ) -> ControlFlow<Done> {
4099        let val = unsafe { self.load_ne::<[u16; 4], crate::VLoad16x4LeUZ>(addr)? };
4100        self.state[dst].set_u32x4(val.map(|i| u16::from_le(i).into()));
4101        ControlFlow::Continue(())
4102    }
4103
4104    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4105    fn vload32x2le_s_z(&mut self, dst: VReg, addr: AddrZ) -> ControlFlow<Done> {
4106        let val = unsafe { self.load_ne::<[i32; 2], crate::VLoad32x2LeSZ>(addr)? };
4107        self.state[dst].set_i64x2(val.map(|i| i32::from_le(i).into()));
4108        ControlFlow::Continue(())
4109    }
4110
4111    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4112    fn vload32x2le_u_z(&mut self, dst: VReg, addr: AddrZ) -> ControlFlow<Done> {
4113        let val = unsafe { self.load_ne::<[u32; 2], crate::VLoad32x2LeUZ>(addr)? };
4114        self.state[dst].set_u64x2(val.map(|i| u32::from_le(i).into()));
4115        ControlFlow::Continue(())
4116    }
4117
4118    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4119    fn vband128(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4120        let a = self.state[operands.src1].get_u128();
4121        let b = self.state[operands.src2].get_u128();
4122        self.state[operands.dst].set_u128(a & b);
4123        ControlFlow::Continue(())
4124    }
4125
4126    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4127    fn vbor128(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4128        let a = self.state[operands.src1].get_u128();
4129        let b = self.state[operands.src2].get_u128();
4130        self.state[operands.dst].set_u128(a | b);
4131        ControlFlow::Continue(())
4132    }
4133
4134    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4135    fn vbxor128(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4136        let a = self.state[operands.src1].get_u128();
4137        let b = self.state[operands.src2].get_u128();
4138        self.state[operands.dst].set_u128(a ^ b);
4139        ControlFlow::Continue(())
4140    }
4141
4142    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4143    fn vbnot128(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4144        let a = self.state[src].get_u128();
4145        self.state[dst].set_u128(!a);
4146        ControlFlow::Continue(())
4147    }
4148
4149    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4150    fn vbitselect128(&mut self, dst: VReg, c: VReg, x: VReg, y: VReg) -> ControlFlow<Done> {
4151        let c = self.state[c].get_u128();
4152        let x = self.state[x].get_u128();
4153        let y = self.state[y].get_u128();
4154        self.state[dst].set_u128((c & x) | (!c & y));
4155        ControlFlow::Continue(())
4156    }
4157
4158    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4159    fn vbitmask8x16(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4160        let a = self.state[src].get_u8x16();
4161        let mut result = 0;
4162        for item in a.iter().rev() {
4163            result <<= 1;
4164            result |= (*item >> 7) as u32;
4165        }
4166        self.state[dst].set_u32(result);
4167        ControlFlow::Continue(())
4168    }
4169
4170    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4171    fn vbitmask16x8(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4172        let a = self.state[src].get_u16x8();
4173        let mut result = 0;
4174        for item in a.iter().rev() {
4175            result <<= 1;
4176            result |= (*item >> 15) as u32;
4177        }
4178        self.state[dst].set_u32(result);
4179        ControlFlow::Continue(())
4180    }
4181
4182    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4183    fn vbitmask32x4(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4184        let a = self.state[src].get_u32x4();
4185        let mut result = 0;
4186        for item in a.iter().rev() {
4187            result <<= 1;
4188            result |= *item >> 31;
4189        }
4190        self.state[dst].set_u32(result);
4191        ControlFlow::Continue(())
4192    }
4193
4194    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4195    fn vbitmask64x2(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4196        let a = self.state[src].get_u64x2();
4197        let mut result = 0;
4198        for item in a.iter().rev() {
4199            result <<= 1;
4200            result |= (*item >> 63) as u32;
4201        }
4202        self.state[dst].set_u32(result);
4203        ControlFlow::Continue(())
4204    }
4205
4206    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4207    fn valltrue8x16(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4208        let a = self.state[src].get_u8x16();
4209        let result = a.iter().all(|a| *a != 0);
4210        self.state[dst].set_u32(u32::from(result));
4211        ControlFlow::Continue(())
4212    }
4213
4214    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4215    fn valltrue16x8(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4216        let a = self.state[src].get_u16x8();
4217        let result = a.iter().all(|a| *a != 0);
4218        self.state[dst].set_u32(u32::from(result));
4219        ControlFlow::Continue(())
4220    }
4221
4222    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4223    fn valltrue32x4(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4224        let a = self.state[src].get_u32x4();
4225        let result = a.iter().all(|a| *a != 0);
4226        self.state[dst].set_u32(u32::from(result));
4227        ControlFlow::Continue(())
4228    }
4229
4230    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4231    fn valltrue64x2(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4232        let a = self.state[src].get_u64x2();
4233        let result = a.iter().all(|a| *a != 0);
4234        self.state[dst].set_u32(u32::from(result));
4235        ControlFlow::Continue(())
4236    }
4237
4238    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4239    fn vanytrue8x16(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4240        let a = self.state[src].get_u8x16();
4241        let result = a.iter().any(|a| *a != 0);
4242        self.state[dst].set_u32(u32::from(result));
4243        ControlFlow::Continue(())
4244    }
4245
4246    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4247    fn vanytrue16x8(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4248        let a = self.state[src].get_u16x8();
4249        let result = a.iter().any(|a| *a != 0);
4250        self.state[dst].set_u32(u32::from(result));
4251        ControlFlow::Continue(())
4252    }
4253
4254    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4255    fn vanytrue32x4(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4256        let a = self.state[src].get_u32x4();
4257        let result = a.iter().any(|a| *a != 0);
4258        self.state[dst].set_u32(u32::from(result));
4259        ControlFlow::Continue(())
4260    }
4261
4262    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4263    fn vanytrue64x2(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4264        let a = self.state[src].get_u64x2();
4265        let result = a.iter().any(|a| *a != 0);
4266        self.state[dst].set_u32(u32::from(result));
4267        ControlFlow::Continue(())
4268    }
4269
4270    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4271    fn vf32x4_from_i32x4_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4272        let a = self.state[src].get_i32x4();
4273        self.state[dst].set_f32x4(a.map(|i| i as f32));
4274        ControlFlow::Continue(())
4275    }
4276
4277    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4278    fn vf32x4_from_i32x4_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4279        let a = self.state[src].get_u32x4();
4280        self.state[dst].set_f32x4(a.map(|i| i as f32));
4281        ControlFlow::Continue(())
4282    }
4283
4284    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4285    fn vf64x2_from_i64x2_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4286        let a = self.state[src].get_i64x2();
4287        self.state[dst].set_f64x2(a.map(|i| i as f64));
4288        ControlFlow::Continue(())
4289    }
4290
4291    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4292    fn vf64x2_from_i64x2_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4293        let a = self.state[src].get_u64x2();
4294        self.state[dst].set_f64x2(a.map(|i| i as f64));
4295        ControlFlow::Continue(())
4296    }
4297
4298    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4299    fn vi32x4_from_f32x4_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4300        let a = self.state[src].get_f32x4();
4301        self.state[dst].set_i32x4(a.map(|f| f as i32));
4302        ControlFlow::Continue(())
4303    }
4304
4305    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4306    fn vi32x4_from_f32x4_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4307        let a = self.state[src].get_f32x4();
4308        self.state[dst].set_u32x4(a.map(|f| f as u32));
4309        ControlFlow::Continue(())
4310    }
4311
4312    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4313    fn vi64x2_from_f64x2_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4314        let a = self.state[src].get_f64x2();
4315        self.state[dst].set_i64x2(a.map(|f| f as i64));
4316        ControlFlow::Continue(())
4317    }
4318
4319    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4320    fn vi64x2_from_f64x2_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4321        let a = self.state[src].get_f64x2();
4322        self.state[dst].set_u64x2(a.map(|f| f as u64));
4323        ControlFlow::Continue(())
4324    }
4325
4326    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4327    fn vwidenlow8x16_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4328        let a = *self.state[src].get_i8x16().first_chunk().unwrap();
4329        self.state[dst].set_i16x8(a.map(|i| i.into()));
4330        ControlFlow::Continue(())
4331    }
4332
4333    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4334    fn vwidenlow8x16_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4335        let a = *self.state[src].get_u8x16().first_chunk().unwrap();
4336        self.state[dst].set_u16x8(a.map(|i| i.into()));
4337        ControlFlow::Continue(())
4338    }
4339
4340    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4341    fn vwidenlow16x8_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4342        let a = *self.state[src].get_i16x8().first_chunk().unwrap();
4343        self.state[dst].set_i32x4(a.map(|i| i.into()));
4344        ControlFlow::Continue(())
4345    }
4346
4347    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4348    fn vwidenlow16x8_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4349        let a = *self.state[src].get_u16x8().first_chunk().unwrap();
4350        self.state[dst].set_u32x4(a.map(|i| i.into()));
4351        ControlFlow::Continue(())
4352    }
4353
4354    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4355    fn vwidenlow32x4_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4356        let a = *self.state[src].get_i32x4().first_chunk().unwrap();
4357        self.state[dst].set_i64x2(a.map(|i| i.into()));
4358        ControlFlow::Continue(())
4359    }
4360
4361    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4362    fn vwidenlow32x4_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4363        let a = *self.state[src].get_u32x4().first_chunk().unwrap();
4364        self.state[dst].set_u64x2(a.map(|i| i.into()));
4365        ControlFlow::Continue(())
4366    }
4367
4368    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4369    fn vwidenhigh8x16_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4370        let a = *self.state[src].get_i8x16().last_chunk().unwrap();
4371        self.state[dst].set_i16x8(a.map(|i| i.into()));
4372        ControlFlow::Continue(())
4373    }
4374
4375    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4376    fn vwidenhigh8x16_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4377        let a = *self.state[src].get_u8x16().last_chunk().unwrap();
4378        self.state[dst].set_u16x8(a.map(|i| i.into()));
4379        ControlFlow::Continue(())
4380    }
4381
4382    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4383    fn vwidenhigh16x8_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4384        let a = *self.state[src].get_i16x8().last_chunk().unwrap();
4385        self.state[dst].set_i32x4(a.map(|i| i.into()));
4386        ControlFlow::Continue(())
4387    }
4388
4389    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4390    fn vwidenhigh16x8_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4391        let a = *self.state[src].get_u16x8().last_chunk().unwrap();
4392        self.state[dst].set_u32x4(a.map(|i| i.into()));
4393        ControlFlow::Continue(())
4394    }
4395
4396    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4397    fn vwidenhigh32x4_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4398        let a = *self.state[src].get_i32x4().last_chunk().unwrap();
4399        self.state[dst].set_i64x2(a.map(|i| i.into()));
4400        ControlFlow::Continue(())
4401    }
4402
4403    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4404    fn vwidenhigh32x4_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4405        let a = *self.state[src].get_u32x4().last_chunk().unwrap();
4406        self.state[dst].set_u64x2(a.map(|i| i.into()));
4407        ControlFlow::Continue(())
4408    }
4409
4410    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4411    fn vnarrow16x8_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4412        let a = self.state[operands.src1].get_i16x8();
4413        let b = self.state[operands.src2].get_i16x8();
4414        let mut result = [0; 16];
4415        for (i, d) in a.iter().chain(&b).zip(&mut result) {
4416            *d = (*i)
4417                .try_into()
4418                .unwrap_or(if *i < 0 { i8::MIN } else { i8::MAX });
4419        }
4420        self.state[operands.dst].set_i8x16(result);
4421        ControlFlow::Continue(())
4422    }
4423
4424    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4425    fn vnarrow16x8_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4426        let a = self.state[operands.src1].get_i16x8();
4427        let b = self.state[operands.src2].get_i16x8();
4428        let mut result = [0; 16];
4429        for (i, d) in a.iter().chain(&b).zip(&mut result) {
4430            *d = (*i)
4431                .try_into()
4432                .unwrap_or(if *i < 0 { u8::MIN } else { u8::MAX });
4433        }
4434        self.state[operands.dst].set_u8x16(result);
4435        ControlFlow::Continue(())
4436    }
4437
4438    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4439    fn vnarrow32x4_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4440        let a = self.state[operands.src1].get_i32x4();
4441        let b = self.state[operands.src2].get_i32x4();
4442        let mut result = [0; 8];
4443        for (i, d) in a.iter().chain(&b).zip(&mut result) {
4444            *d = (*i)
4445                .try_into()
4446                .unwrap_or(if *i < 0 { i16::MIN } else { i16::MAX });
4447        }
4448        self.state[operands.dst].set_i16x8(result);
4449        ControlFlow::Continue(())
4450    }
4451
4452    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4453    fn vnarrow32x4_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4454        let a = self.state[operands.src1].get_i32x4();
4455        let b = self.state[operands.src2].get_i32x4();
4456        let mut result = [0; 8];
4457        for (i, d) in a.iter().chain(&b).zip(&mut result) {
4458            *d = (*i)
4459                .try_into()
4460                .unwrap_or(if *i < 0 { u16::MIN } else { u16::MAX });
4461        }
4462        self.state[operands.dst].set_u16x8(result);
4463        ControlFlow::Continue(())
4464    }
4465
4466    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4467    fn vnarrow64x2_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4468        let a = self.state[operands.src1].get_i64x2();
4469        let b = self.state[operands.src2].get_i64x2();
4470        let mut result = [0; 4];
4471        for (i, d) in a.iter().chain(&b).zip(&mut result) {
4472            *d = (*i)
4473                .try_into()
4474                .unwrap_or(if *i < 0 { i32::MIN } else { i32::MAX });
4475        }
4476        self.state[operands.dst].set_i32x4(result);
4477        ControlFlow::Continue(())
4478    }
4479
4480    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4481    fn vnarrow64x2_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4482        let a = self.state[operands.src1].get_i64x2();
4483        let b = self.state[operands.src2].get_i64x2();
4484        let mut result = [0; 4];
4485        for (i, d) in a.iter().chain(&b).zip(&mut result) {
4486            *d = (*i)
4487                .try_into()
4488                .unwrap_or(if *i < 0 { u32::MIN } else { u32::MAX });
4489        }
4490        self.state[operands.dst].set_u32x4(result);
4491        ControlFlow::Continue(())
4492    }
4493
4494    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4495    fn vunarrow64x2_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4496        let a = self.state[operands.src1].get_u64x2();
4497        let b = self.state[operands.src2].get_u64x2();
4498        let mut result = [0; 4];
4499        for (i, d) in a.iter().chain(&b).zip(&mut result) {
4500            *d = (*i).try_into().unwrap_or(u32::MAX);
4501        }
4502        self.state[operands.dst].set_u32x4(result);
4503        ControlFlow::Continue(())
4504    }
4505
4506    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4507    fn vfpromotelow(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4508        let a = self.state[src].get_f32x4();
4509        self.state[dst].set_f64x2([a[0].into(), a[1].into()]);
4510        ControlFlow::Continue(())
4511    }
4512
4513    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4514    fn vfdemote(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4515        let a = self.state[src].get_f64x2();
4516        self.state[dst].set_f32x4([a[0] as f32, a[1] as f32, 0.0, 0.0]);
4517        ControlFlow::Continue(())
4518    }
4519
4520    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4521    fn vsubi8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4522        let mut a = self.state[operands.src1].get_i8x16();
4523        let b = self.state[operands.src2].get_i8x16();
4524        for (a, b) in a.iter_mut().zip(b) {
4525            *a = a.wrapping_sub(b);
4526        }
4527        self.state[operands.dst].set_i8x16(a);
4528        ControlFlow::Continue(())
4529    }
4530
4531    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4532    fn vsubi16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4533        let mut a = self.state[operands.src1].get_i16x8();
4534        let b = self.state[operands.src2].get_i16x8();
4535        for (a, b) in a.iter_mut().zip(b) {
4536            *a = a.wrapping_sub(b);
4537        }
4538        self.state[operands.dst].set_i16x8(a);
4539        ControlFlow::Continue(())
4540    }
4541
4542    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4543    fn vsubi32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4544        let mut a = self.state[operands.src1].get_i32x4();
4545        let b = self.state[operands.src2].get_i32x4();
4546        for (a, b) in a.iter_mut().zip(b) {
4547            *a = a.wrapping_sub(b);
4548        }
4549        self.state[operands.dst].set_i32x4(a);
4550        ControlFlow::Continue(())
4551    }
4552
4553    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4554    fn vsubi64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4555        let mut a = self.state[operands.src1].get_i64x2();
4556        let b = self.state[operands.src2].get_i64x2();
4557        for (a, b) in a.iter_mut().zip(b) {
4558            *a = a.wrapping_sub(b);
4559        }
4560        self.state[operands.dst].set_i64x2(a);
4561        ControlFlow::Continue(())
4562    }
4563
4564    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4565    fn vsubi8x16_sat(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4566        let mut a = self.state[operands.src1].get_i8x16();
4567        let b = self.state[operands.src2].get_i8x16();
4568        for (a, b) in a.iter_mut().zip(b) {
4569            *a = a.saturating_sub(b);
4570        }
4571        self.state[operands.dst].set_i8x16(a);
4572        ControlFlow::Continue(())
4573    }
4574
4575    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4576    fn vsubu8x16_sat(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4577        let mut a = self.state[operands.src1].get_u8x16();
4578        let b = self.state[operands.src2].get_u8x16();
4579        for (a, b) in a.iter_mut().zip(b) {
4580            *a = a.saturating_sub(b);
4581        }
4582        self.state[operands.dst].set_u8x16(a);
4583        ControlFlow::Continue(())
4584    }
4585
4586    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4587    fn vsubi16x8_sat(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4588        let mut a = self.state[operands.src1].get_i16x8();
4589        let b = self.state[operands.src2].get_i16x8();
4590        for (a, b) in a.iter_mut().zip(b) {
4591            *a = a.saturating_sub(b);
4592        }
4593        self.state[operands.dst].set_i16x8(a);
4594        ControlFlow::Continue(())
4595    }
4596
4597    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4598    fn vsubu16x8_sat(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4599        let mut a = self.state[operands.src1].get_u16x8();
4600        let b = self.state[operands.src2].get_u16x8();
4601        for (a, b) in a.iter_mut().zip(b) {
4602            *a = a.saturating_sub(b);
4603        }
4604        self.state[operands.dst].set_u16x8(a);
4605        ControlFlow::Continue(())
4606    }
4607
4608    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4609    fn vsubf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4610        let mut a = self.state[operands.src1].get_f64x2();
4611        let b = self.state[operands.src2].get_f64x2();
4612        for (a, b) in a.iter_mut().zip(b) {
4613            *a = *a - b;
4614        }
4615        self.state[operands.dst].set_f64x2(a);
4616        ControlFlow::Continue(())
4617    }
4618
4619    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4620    fn vmuli8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4621        let mut a = self.state[operands.src1].get_i8x16();
4622        let b = self.state[operands.src2].get_i8x16();
4623        for (a, b) in a.iter_mut().zip(b) {
4624            *a = a.wrapping_mul(b);
4625        }
4626        self.state[operands.dst].set_i8x16(a);
4627        ControlFlow::Continue(())
4628    }
4629
4630    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4631    fn vmuli16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4632        let mut a = self.state[operands.src1].get_i16x8();
4633        let b = self.state[operands.src2].get_i16x8();
4634        for (a, b) in a.iter_mut().zip(b) {
4635            *a = a.wrapping_mul(b);
4636        }
4637        self.state[operands.dst].set_i16x8(a);
4638        ControlFlow::Continue(())
4639    }
4640
4641    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4642    fn vmuli32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4643        let mut a = self.state[operands.src1].get_i32x4();
4644        let b = self.state[operands.src2].get_i32x4();
4645        for (a, b) in a.iter_mut().zip(b) {
4646            *a = a.wrapping_mul(b);
4647        }
4648        self.state[operands.dst].set_i32x4(a);
4649        ControlFlow::Continue(())
4650    }
4651
4652    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4653    fn vmuli64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4654        let mut a = self.state[operands.src1].get_i64x2();
4655        let b = self.state[operands.src2].get_i64x2();
4656        for (a, b) in a.iter_mut().zip(b) {
4657            *a = a.wrapping_mul(b);
4658        }
4659        self.state[operands.dst].set_i64x2(a);
4660        ControlFlow::Continue(())
4661    }
4662
4663    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4664    fn vmulf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4665        let mut a = self.state[operands.src1].get_f64x2();
4666        let b = self.state[operands.src2].get_f64x2();
4667        for (a, b) in a.iter_mut().zip(b) {
4668            *a = *a * b;
4669        }
4670        self.state[operands.dst].set_f64x2(a);
4671        ControlFlow::Continue(())
4672    }
4673
4674    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4675    fn vqmulrsi16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4676        let mut a = self.state[operands.src1].get_i16x8();
4677        let b = self.state[operands.src2].get_i16x8();
4678        const MIN: i32 = i16::MIN as i32;
4679        const MAX: i32 = i16::MAX as i32;
4680        for (a, b) in a.iter_mut().zip(b) {
4681            let r = (i32::from(*a) * i32::from(b) + (1 << 14)) >> 15;
4682            *a = r.clamp(MIN, MAX) as i16;
4683        }
4684        self.state[operands.dst].set_i16x8(a);
4685        ControlFlow::Continue(())
4686    }
4687
4688    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4689    fn vpopcnt8x16(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4690        let a = self.state[src].get_u8x16();
4691        self.state[dst].set_u8x16(a.map(|i| i.count_ones() as u8));
4692        ControlFlow::Continue(())
4693    }
4694
4695    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4696    fn xextractv8x16(&mut self, dst: XReg, src: VReg, lane: u8) -> ControlFlow<Done> {
4697        let a = unsafe { *self.state[src].get_u8x16().get_unchecked(usize::from(lane)) };
4698        self.state[dst].set_u32(u32::from(a));
4699        ControlFlow::Continue(())
4700    }
4701
4702    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4703    fn xextractv16x8(&mut self, dst: XReg, src: VReg, lane: u8) -> ControlFlow<Done> {
4704        let a = unsafe { *self.state[src].get_u16x8().get_unchecked(usize::from(lane)) };
4705        self.state[dst].set_u32(u32::from(a));
4706        ControlFlow::Continue(())
4707    }
4708
4709    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4710    fn xextractv32x4(&mut self, dst: XReg, src: VReg, lane: u8) -> ControlFlow<Done> {
4711        let a = unsafe { *self.state[src].get_u32x4().get_unchecked(usize::from(lane)) };
4712        self.state[dst].set_u32(a);
4713        ControlFlow::Continue(())
4714    }
4715
4716    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4717    fn xextractv64x2(&mut self, dst: XReg, src: VReg, lane: u8) -> ControlFlow<Done> {
4718        let a = unsafe { *self.state[src].get_u64x2().get_unchecked(usize::from(lane)) };
4719        self.state[dst].set_u64(a);
4720        ControlFlow::Continue(())
4721    }
4722
4723    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4724    fn fextractv32x4(&mut self, dst: FReg, src: VReg, lane: u8) -> ControlFlow<Done> {
4725        let a = unsafe { *self.state[src].get_f32x4().get_unchecked(usize::from(lane)) };
4726        self.state[dst].set_f32(a);
4727        ControlFlow::Continue(())
4728    }
4729
4730    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4731    fn fextractv64x2(&mut self, dst: FReg, src: VReg, lane: u8) -> ControlFlow<Done> {
4732        let a = unsafe { *self.state[src].get_f64x2().get_unchecked(usize::from(lane)) };
4733        self.state[dst].set_f64(a);
4734        ControlFlow::Continue(())
4735    }
4736
4737    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4738    fn vinsertx8(
4739        &mut self,
4740        operands: BinaryOperands<VReg, VReg, XReg>,
4741        lane: u8,
4742    ) -> ControlFlow<Done> {
4743        let mut a = self.state[operands.src1].get_u8x16();
4744        let b = self.state[operands.src2].get_u32() as u8;
4745        unsafe {
4746            *a.get_unchecked_mut(usize::from(lane)) = b;
4747        }
4748        self.state[operands.dst].set_u8x16(a);
4749        ControlFlow::Continue(())
4750    }
4751
4752    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4753    fn vinsertx16(
4754        &mut self,
4755        operands: BinaryOperands<VReg, VReg, XReg>,
4756        lane: u8,
4757    ) -> ControlFlow<Done> {
4758        let mut a = self.state[operands.src1].get_u16x8();
4759        let b = self.state[operands.src2].get_u32() as u16;
4760        unsafe {
4761            *a.get_unchecked_mut(usize::from(lane)) = b;
4762        }
4763        self.state[operands.dst].set_u16x8(a);
4764        ControlFlow::Continue(())
4765    }
4766
4767    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4768    fn vinsertx32(
4769        &mut self,
4770        operands: BinaryOperands<VReg, VReg, XReg>,
4771        lane: u8,
4772    ) -> ControlFlow<Done> {
4773        let mut a = self.state[operands.src1].get_u32x4();
4774        let b = self.state[operands.src2].get_u32();
4775        unsafe {
4776            *a.get_unchecked_mut(usize::from(lane)) = b;
4777        }
4778        self.state[operands.dst].set_u32x4(a);
4779        ControlFlow::Continue(())
4780    }
4781
4782    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4783    fn vinsertx64(
4784        &mut self,
4785        operands: BinaryOperands<VReg, VReg, XReg>,
4786        lane: u8,
4787    ) -> ControlFlow<Done> {
4788        let mut a = self.state[operands.src1].get_u64x2();
4789        let b = self.state[operands.src2].get_u64();
4790        unsafe {
4791            *a.get_unchecked_mut(usize::from(lane)) = b;
4792        }
4793        self.state[operands.dst].set_u64x2(a);
4794        ControlFlow::Continue(())
4795    }
4796
4797    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4798    fn vinsertf32(
4799        &mut self,
4800        operands: BinaryOperands<VReg, VReg, FReg>,
4801        lane: u8,
4802    ) -> ControlFlow<Done> {
4803        let mut a = self.state[operands.src1].get_f32x4();
4804        let b = self.state[operands.src2].get_f32();
4805        unsafe {
4806            *a.get_unchecked_mut(usize::from(lane)) = b;
4807        }
4808        self.state[operands.dst].set_f32x4(a);
4809        ControlFlow::Continue(())
4810    }
4811
4812    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4813    fn vinsertf64(
4814        &mut self,
4815        operands: BinaryOperands<VReg, VReg, FReg>,
4816        lane: u8,
4817    ) -> ControlFlow<Done> {
4818        let mut a = self.state[operands.src1].get_f64x2();
4819        let b = self.state[operands.src2].get_f64();
4820        unsafe {
4821            *a.get_unchecked_mut(usize::from(lane)) = b;
4822        }
4823        self.state[operands.dst].set_f64x2(a);
4824        ControlFlow::Continue(())
4825    }
4826
4827    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4828    fn veq8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4829        let a = self.state[operands.src1].get_u8x16();
4830        let b = self.state[operands.src2].get_u8x16();
4831        let mut c = [0; 16];
4832        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4833            *c = if a == b { u8::MAX } else { 0 };
4834        }
4835        self.state[operands.dst].set_u8x16(c);
4836        ControlFlow::Continue(())
4837    }
4838
4839    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4840    fn vneq8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4841        let a = self.state[operands.src1].get_u8x16();
4842        let b = self.state[operands.src2].get_u8x16();
4843        let mut c = [0; 16];
4844        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4845            *c = if a != b { u8::MAX } else { 0 };
4846        }
4847        self.state[operands.dst].set_u8x16(c);
4848        ControlFlow::Continue(())
4849    }
4850
4851    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4852    fn vslt8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4853        let a = self.state[operands.src1].get_i8x16();
4854        let b = self.state[operands.src2].get_i8x16();
4855        let mut c = [0; 16];
4856        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4857            *c = if a < b { u8::MAX } else { 0 };
4858        }
4859        self.state[operands.dst].set_u8x16(c);
4860        ControlFlow::Continue(())
4861    }
4862
4863    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4864    fn vslteq8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4865        let a = self.state[operands.src1].get_i8x16();
4866        let b = self.state[operands.src2].get_i8x16();
4867        let mut c = [0; 16];
4868        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4869            *c = if a <= b { u8::MAX } else { 0 };
4870        }
4871        self.state[operands.dst].set_u8x16(c);
4872        ControlFlow::Continue(())
4873    }
4874
4875    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4876    fn vult8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4877        let a = self.state[operands.src1].get_u8x16();
4878        let b = self.state[operands.src2].get_u8x16();
4879        let mut c = [0; 16];
4880        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4881            *c = if a < b { u8::MAX } else { 0 };
4882        }
4883        self.state[operands.dst].set_u8x16(c);
4884        ControlFlow::Continue(())
4885    }
4886
4887    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4888    fn vulteq8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4889        let a = self.state[operands.src1].get_u8x16();
4890        let b = self.state[operands.src2].get_u8x16();
4891        let mut c = [0; 16];
4892        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4893            *c = if a <= b { u8::MAX } else { 0 };
4894        }
4895        self.state[operands.dst].set_u8x16(c);
4896        ControlFlow::Continue(())
4897    }
4898
4899    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4900    fn veq16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4901        let a = self.state[operands.src1].get_u16x8();
4902        let b = self.state[operands.src2].get_u16x8();
4903        let mut c = [0; 8];
4904        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4905            *c = if a == b { u16::MAX } else { 0 };
4906        }
4907        self.state[operands.dst].set_u16x8(c);
4908        ControlFlow::Continue(())
4909    }
4910
4911    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4912    fn vneq16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4913        let a = self.state[operands.src1].get_u16x8();
4914        let b = self.state[operands.src2].get_u16x8();
4915        let mut c = [0; 8];
4916        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4917            *c = if a != b { u16::MAX } else { 0 };
4918        }
4919        self.state[operands.dst].set_u16x8(c);
4920        ControlFlow::Continue(())
4921    }
4922
4923    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4924    fn vslt16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4925        let a = self.state[operands.src1].get_i16x8();
4926        let b = self.state[operands.src2].get_i16x8();
4927        let mut c = [0; 8];
4928        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4929            *c = if a < b { u16::MAX } else { 0 };
4930        }
4931        self.state[operands.dst].set_u16x8(c);
4932        ControlFlow::Continue(())
4933    }
4934
4935    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4936    fn vslteq16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4937        let a = self.state[operands.src1].get_i16x8();
4938        let b = self.state[operands.src2].get_i16x8();
4939        let mut c = [0; 8];
4940        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4941            *c = if a <= b { u16::MAX } else { 0 };
4942        }
4943        self.state[operands.dst].set_u16x8(c);
4944        ControlFlow::Continue(())
4945    }
4946
4947    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4948    fn vult16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4949        let a = self.state[operands.src1].get_u16x8();
4950        let b = self.state[operands.src2].get_u16x8();
4951        let mut c = [0; 8];
4952        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4953            *c = if a < b { u16::MAX } else { 0 };
4954        }
4955        self.state[operands.dst].set_u16x8(c);
4956        ControlFlow::Continue(())
4957    }
4958
4959    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4960    fn vulteq16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4961        let a = self.state[operands.src1].get_u16x8();
4962        let b = self.state[operands.src2].get_u16x8();
4963        let mut c = [0; 8];
4964        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4965            *c = if a <= b { u16::MAX } else { 0 };
4966        }
4967        self.state[operands.dst].set_u16x8(c);
4968        ControlFlow::Continue(())
4969    }
4970
4971    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4972    fn veq32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4973        let a = self.state[operands.src1].get_u32x4();
4974        let b = self.state[operands.src2].get_u32x4();
4975        let mut c = [0; 4];
4976        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4977            *c = if a == b { u32::MAX } else { 0 };
4978        }
4979        self.state[operands.dst].set_u32x4(c);
4980        ControlFlow::Continue(())
4981    }
4982
4983    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4984    fn vneq32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4985        let a = self.state[operands.src1].get_u32x4();
4986        let b = self.state[operands.src2].get_u32x4();
4987        let mut c = [0; 4];
4988        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4989            *c = if a != b { u32::MAX } else { 0 };
4990        }
4991        self.state[operands.dst].set_u32x4(c);
4992        ControlFlow::Continue(())
4993    }
4994
4995    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4996    fn vslt32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4997        let a = self.state[operands.src1].get_i32x4();
4998        let b = self.state[operands.src2].get_i32x4();
4999        let mut c = [0; 4];
5000        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5001            *c = if a < b { u32::MAX } else { 0 };
5002        }
5003        self.state[operands.dst].set_u32x4(c);
5004        ControlFlow::Continue(())
5005    }
5006
5007    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5008    fn vslteq32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5009        let a = self.state[operands.src1].get_i32x4();
5010        let b = self.state[operands.src2].get_i32x4();
5011        let mut c = [0; 4];
5012        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5013            *c = if a <= b { u32::MAX } else { 0 };
5014        }
5015        self.state[operands.dst].set_u32x4(c);
5016        ControlFlow::Continue(())
5017    }
5018
5019    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5020    fn vult32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5021        let a = self.state[operands.src1].get_u32x4();
5022        let b = self.state[operands.src2].get_u32x4();
5023        let mut c = [0; 4];
5024        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5025            *c = if a < b { u32::MAX } else { 0 };
5026        }
5027        self.state[operands.dst].set_u32x4(c);
5028        ControlFlow::Continue(())
5029    }
5030
5031    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5032    fn vulteq32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5033        let a = self.state[operands.src1].get_u32x4();
5034        let b = self.state[operands.src2].get_u32x4();
5035        let mut c = [0; 4];
5036        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5037            *c = if a <= b { u32::MAX } else { 0 };
5038        }
5039        self.state[operands.dst].set_u32x4(c);
5040        ControlFlow::Continue(())
5041    }
5042
5043    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5044    fn veq64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5045        let a = self.state[operands.src1].get_u64x2();
5046        let b = self.state[operands.src2].get_u64x2();
5047        let mut c = [0; 2];
5048        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5049            *c = if a == b { u64::MAX } else { 0 };
5050        }
5051        self.state[operands.dst].set_u64x2(c);
5052        ControlFlow::Continue(())
5053    }
5054
5055    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5056    fn vneq64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5057        let a = self.state[operands.src1].get_u64x2();
5058        let b = self.state[operands.src2].get_u64x2();
5059        let mut c = [0; 2];
5060        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5061            *c = if a != b { u64::MAX } else { 0 };
5062        }
5063        self.state[operands.dst].set_u64x2(c);
5064        ControlFlow::Continue(())
5065    }
5066
5067    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5068    fn vslt64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5069        let a = self.state[operands.src1].get_i64x2();
5070        let b = self.state[operands.src2].get_i64x2();
5071        let mut c = [0; 2];
5072        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5073            *c = if a < b { u64::MAX } else { 0 };
5074        }
5075        self.state[operands.dst].set_u64x2(c);
5076        ControlFlow::Continue(())
5077    }
5078
5079    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5080    fn vslteq64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5081        let a = self.state[operands.src1].get_i64x2();
5082        let b = self.state[operands.src2].get_i64x2();
5083        let mut c = [0; 2];
5084        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5085            *c = if a <= b { u64::MAX } else { 0 };
5086        }
5087        self.state[operands.dst].set_u64x2(c);
5088        ControlFlow::Continue(())
5089    }
5090
5091    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5092    fn vult64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5093        let a = self.state[operands.src1].get_u64x2();
5094        let b = self.state[operands.src2].get_u64x2();
5095        let mut c = [0; 2];
5096        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5097            *c = if a < b { u64::MAX } else { 0 };
5098        }
5099        self.state[operands.dst].set_u64x2(c);
5100        ControlFlow::Continue(())
5101    }
5102
5103    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5104    fn vulteq64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5105        let a = self.state[operands.src1].get_u64x2();
5106        let b = self.state[operands.src2].get_u64x2();
5107        let mut c = [0; 2];
5108        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5109            *c = if a <= b { u64::MAX } else { 0 };
5110        }
5111        self.state[operands.dst].set_u64x2(c);
5112        ControlFlow::Continue(())
5113    }
5114
5115    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5116    fn vneg8x16(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
5117        let a = self.state[src].get_i8x16();
5118        self.state[dst].set_i8x16(a.map(|i| i.wrapping_neg()));
5119        ControlFlow::Continue(())
5120    }
5121
5122    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5123    fn vneg16x8(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
5124        let a = self.state[src].get_i16x8();
5125        self.state[dst].set_i16x8(a.map(|i| i.wrapping_neg()));
5126        ControlFlow::Continue(())
5127    }
5128
5129    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5130    fn vneg32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
5131        let a = self.state[src].get_i32x4();
5132        self.state[dst].set_i32x4(a.map(|i| i.wrapping_neg()));
5133        ControlFlow::Continue(())
5134    }
5135
5136    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5137    fn vneg64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
5138        let a = self.state[src].get_i64x2();
5139        self.state[dst].set_i64x2(a.map(|i| i.wrapping_neg()));
5140        ControlFlow::Continue(())
5141    }
5142
5143    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5144    fn vnegf64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
5145        let a = self.state[src].get_f64x2();
5146        self.state[dst].set_f64x2(a.map(|i| -i));
5147        ControlFlow::Continue(())
5148    }
5149
5150    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5151    fn vmin8x16_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5152        let mut a = self.state[operands.src1].get_i8x16();
5153        let b = self.state[operands.src2].get_i8x16();
5154        for (a, b) in a.iter_mut().zip(&b) {
5155            *a = (*a).min(*b);
5156        }
5157        self.state[operands.dst].set_i8x16(a);
5158        ControlFlow::Continue(())
5159    }
5160
5161    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5162    fn vmin8x16_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5163        let mut a = self.state[operands.src1].get_u8x16();
5164        let b = self.state[operands.src2].get_u8x16();
5165        for (a, b) in a.iter_mut().zip(&b) {
5166            *a = (*a).min(*b);
5167        }
5168        self.state[operands.dst].set_u8x16(a);
5169        ControlFlow::Continue(())
5170    }
5171
5172    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5173    fn vmin16x8_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5174        let mut a = self.state[operands.src1].get_i16x8();
5175        let b = self.state[operands.src2].get_i16x8();
5176        for (a, b) in a.iter_mut().zip(&b) {
5177            *a = (*a).min(*b);
5178        }
5179        self.state[operands.dst].set_i16x8(a);
5180        ControlFlow::Continue(())
5181    }
5182
5183    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5184    fn vmin16x8_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5185        let mut a = self.state[operands.src1].get_u16x8();
5186        let b = self.state[operands.src2].get_u16x8();
5187        for (a, b) in a.iter_mut().zip(&b) {
5188            *a = (*a).min(*b);
5189        }
5190        self.state[operands.dst].set_u16x8(a);
5191        ControlFlow::Continue(())
5192    }
5193
5194    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5195    fn vmin32x4_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5196        let mut a = self.state[operands.src1].get_i32x4();
5197        let b = self.state[operands.src2].get_i32x4();
5198        for (a, b) in a.iter_mut().zip(&b) {
5199            *a = (*a).min(*b);
5200        }
5201        self.state[operands.dst].set_i32x4(a);
5202        ControlFlow::Continue(())
5203    }
5204
5205    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5206    fn vmin32x4_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5207        let mut a = self.state[operands.src1].get_u32x4();
5208        let b = self.state[operands.src2].get_u32x4();
5209        for (a, b) in a.iter_mut().zip(&b) {
5210            *a = (*a).min(*b);
5211        }
5212        self.state[operands.dst].set_u32x4(a);
5213        ControlFlow::Continue(())
5214    }
5215
5216    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5217    fn vmax8x16_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5218        let mut a = self.state[operands.src1].get_i8x16();
5219        let b = self.state[operands.src2].get_i8x16();
5220        for (a, b) in a.iter_mut().zip(&b) {
5221            *a = (*a).max(*b);
5222        }
5223        self.state[operands.dst].set_i8x16(a);
5224        ControlFlow::Continue(())
5225    }
5226
5227    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5228    fn vmax8x16_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5229        let mut a = self.state[operands.src1].get_u8x16();
5230        let b = self.state[operands.src2].get_u8x16();
5231        for (a, b) in a.iter_mut().zip(&b) {
5232            *a = (*a).max(*b);
5233        }
5234        self.state[operands.dst].set_u8x16(a);
5235        ControlFlow::Continue(())
5236    }
5237
5238    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5239    fn vmax16x8_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5240        let mut a = self.state[operands.src1].get_i16x8();
5241        let b = self.state[operands.src2].get_i16x8();
5242        for (a, b) in a.iter_mut().zip(&b) {
5243            *a = (*a).max(*b);
5244        }
5245        self.state[operands.dst].set_i16x8(a);
5246        ControlFlow::Continue(())
5247    }
5248
5249    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5250    fn vmax16x8_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5251        let mut a = self.state[operands.src1].get_u16x8();
5252        let b = self.state[operands.src2].get_u16x8();
5253        for (a, b) in a.iter_mut().zip(&b) {
5254            *a = (*a).max(*b);
5255        }
5256        self.state[operands.dst].set_u16x8(a);
5257        ControlFlow::Continue(())
5258    }
5259
5260    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5261    fn vmax32x4_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5262        let mut a = self.state[operands.src1].get_i32x4();
5263        let b = self.state[operands.src2].get_i32x4();
5264        for (a, b) in a.iter_mut().zip(&b) {
5265            *a = (*a).max(*b);
5266        }
5267        self.state[operands.dst].set_i32x4(a);
5268        ControlFlow::Continue(())
5269    }
5270
5271    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5272    fn vmax32x4_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5273        let mut a = self.state[operands.src1].get_u32x4();
5274        let b = self.state[operands.src2].get_u32x4();
5275        for (a, b) in a.iter_mut().zip(&b) {
5276            *a = (*a).max(*b);
5277        }
5278        self.state[operands.dst].set_u32x4(a);
5279        ControlFlow::Continue(())
5280    }
5281
5282    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5283    fn vabs8x16(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
5284        let a = self.state[src].get_i8x16();
5285        self.state[dst].set_i8x16(a.map(|i| i.wrapping_abs()));
5286        ControlFlow::Continue(())
5287    }
5288
5289    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5290    fn vabs16x8(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
5291        let a = self.state[src].get_i16x8();
5292        self.state[dst].set_i16x8(a.map(|i| i.wrapping_abs()));
5293        ControlFlow::Continue(())
5294    }
5295
5296    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5297    fn vabs32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
5298        let a = self.state[src].get_i32x4();
5299        self.state[dst].set_i32x4(a.map(|i| i.wrapping_abs()));
5300        ControlFlow::Continue(())
5301    }
5302
5303    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5304    fn vabs64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
5305        let a = self.state[src].get_i64x2();
5306        self.state[dst].set_i64x2(a.map(|i| i.wrapping_abs()));
5307        ControlFlow::Continue(())
5308    }
5309
5310    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5311    fn vabsf32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
5312        let a = self.state[src].get_f32x4();
5313        self.state[dst].set_f32x4(a.map(|i| i.wasm_abs()));
5314        ControlFlow::Continue(())
5315    }
5316
5317    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5318    fn vabsf64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
5319        let a = self.state[src].get_f64x2();
5320        self.state[dst].set_f64x2(a.map(|i| i.wasm_abs()));
5321        ControlFlow::Continue(())
5322    }
5323
5324    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5325    fn vmaximumf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5326        let mut a = self.state[operands.src1].get_f32x4();
5327        let b = self.state[operands.src2].get_f32x4();
5328        for (a, b) in a.iter_mut().zip(&b) {
5329            *a = a.wasm_maximum(*b);
5330        }
5331        self.state[operands.dst].set_f32x4(a);
5332        ControlFlow::Continue(())
5333    }
5334
5335    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5336    fn vmaximumf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5337        let mut a = self.state[operands.src1].get_f64x2();
5338        let b = self.state[operands.src2].get_f64x2();
5339        for (a, b) in a.iter_mut().zip(&b) {
5340            *a = a.wasm_maximum(*b);
5341        }
5342        self.state[operands.dst].set_f64x2(a);
5343        ControlFlow::Continue(())
5344    }
5345
5346    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5347    fn vminimumf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5348        let mut a = self.state[operands.src1].get_f32x4();
5349        let b = self.state[operands.src2].get_f32x4();
5350        for (a, b) in a.iter_mut().zip(&b) {
5351            *a = a.wasm_minimum(*b);
5352        }
5353        self.state[operands.dst].set_f32x4(a);
5354        ControlFlow::Continue(())
5355    }
5356
5357    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5358    fn vminimumf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5359        let mut a = self.state[operands.src1].get_f64x2();
5360        let b = self.state[operands.src2].get_f64x2();
5361        for (a, b) in a.iter_mut().zip(&b) {
5362            *a = a.wasm_minimum(*b);
5363        }
5364        self.state[operands.dst].set_f64x2(a);
5365        ControlFlow::Continue(())
5366    }
5367
5368    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5369    fn vshuffle(&mut self, dst: VReg, src1: VReg, src2: VReg, mask: u128) -> ControlFlow<Done> {
5370        let a = self.state[src1].get_u8x16();
5371        let b = self.state[src2].get_u8x16();
5372        let result = mask.to_le_bytes().map(|m| {
5373            if m < 16 {
5374                a[m as usize]
5375            } else {
5376                b[m as usize - 16]
5377            }
5378        });
5379        self.state[dst].set_u8x16(result);
5380        ControlFlow::Continue(())
5381    }
5382
5383    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5384    fn vswizzlei8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5385        let src1 = self.state[operands.src1].get_i8x16();
5386        let src2 = self.state[operands.src2].get_i8x16();
5387        let mut dst = [0i8; 16];
5388        for (i, &idx) in src2.iter().enumerate() {
5389            if (idx as usize) < 16 {
5390                dst[i] = src1[idx as usize];
5391            } else {
5392                dst[i] = 0
5393            }
5394        }
5395        self.state[operands.dst].set_i8x16(dst);
5396        ControlFlow::Continue(())
5397    }
5398
5399    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5400    fn vavground8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5401        let mut a = self.state[operands.src1].get_u8x16();
5402        let b = self.state[operands.src2].get_u8x16();
5403        for (a, b) in a.iter_mut().zip(&b) {
5404            // use wider precision to avoid overflow
5405            *a = ((u32::from(*a) + u32::from(*b) + 1) / 2) as u8;
5406        }
5407        self.state[operands.dst].set_u8x16(a);
5408        ControlFlow::Continue(())
5409    }
5410
5411    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5412    fn vavground16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5413        let mut a = self.state[operands.src1].get_u16x8();
5414        let b = self.state[operands.src2].get_u16x8();
5415        for (a, b) in a.iter_mut().zip(&b) {
5416            // use wider precision to avoid overflow
5417            *a = ((u32::from(*a) + u32::from(*b) + 1) / 2) as u16;
5418        }
5419        self.state[operands.dst].set_u16x8(a);
5420        ControlFlow::Continue(())
5421    }
5422
5423    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5424    fn veqf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5425        let a = self.state[operands.src1].get_f32x4();
5426        let b = self.state[operands.src2].get_f32x4();
5427        let mut c = [0; 4];
5428        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5429            *c = if a == b { u32::MAX } else { 0 };
5430        }
5431        self.state[operands.dst].set_u32x4(c);
5432        ControlFlow::Continue(())
5433    }
5434
5435    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5436    fn vneqf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5437        let a = self.state[operands.src1].get_f32x4();
5438        let b = self.state[operands.src2].get_f32x4();
5439        let mut c = [0; 4];
5440        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5441            *c = if a != b { u32::MAX } else { 0 };
5442        }
5443        self.state[operands.dst].set_u32x4(c);
5444        ControlFlow::Continue(())
5445    }
5446
5447    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5448    fn vltf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5449        let a = self.state[operands.src1].get_f32x4();
5450        let b = self.state[operands.src2].get_f32x4();
5451        let mut c = [0; 4];
5452        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5453            *c = if a < b { u32::MAX } else { 0 };
5454        }
5455        self.state[operands.dst].set_u32x4(c);
5456        ControlFlow::Continue(())
5457    }
5458
5459    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5460    fn vlteqf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5461        let a = self.state[operands.src1].get_f32x4();
5462        let b = self.state[operands.src2].get_f32x4();
5463        let mut c = [0; 4];
5464        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5465            *c = if a <= b { u32::MAX } else { 0 };
5466        }
5467        self.state[operands.dst].set_u32x4(c);
5468        ControlFlow::Continue(())
5469    }
5470
5471    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5472    fn veqf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5473        let a = self.state[operands.src1].get_f64x2();
5474        let b = self.state[operands.src2].get_f64x2();
5475        let mut c = [0; 2];
5476        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5477            *c = if a == b { u64::MAX } else { 0 };
5478        }
5479        self.state[operands.dst].set_u64x2(c);
5480        ControlFlow::Continue(())
5481    }
5482
5483    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5484    fn vneqf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5485        let a = self.state[operands.src1].get_f64x2();
5486        let b = self.state[operands.src2].get_f64x2();
5487        let mut c = [0; 2];
5488        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5489            *c = if a != b { u64::MAX } else { 0 };
5490        }
5491        self.state[operands.dst].set_u64x2(c);
5492        ControlFlow::Continue(())
5493    }
5494
5495    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5496    fn vltf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5497        let a = self.state[operands.src1].get_f64x2();
5498        let b = self.state[operands.src2].get_f64x2();
5499        let mut c = [0; 2];
5500        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5501            *c = if a < b { u64::MAX } else { 0 };
5502        }
5503        self.state[operands.dst].set_u64x2(c);
5504        ControlFlow::Continue(())
5505    }
5506
5507    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5508    fn vlteqf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5509        let a = self.state[operands.src1].get_f64x2();
5510        let b = self.state[operands.src2].get_f64x2();
5511        let mut c = [0; 2];
5512        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5513            *c = if a <= b { u64::MAX } else { 0 };
5514        }
5515        self.state[operands.dst].set_u64x2(c);
5516        ControlFlow::Continue(())
5517    }
5518
5519    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5520    fn vfma32x4(&mut self, dst: VReg, a: VReg, b: VReg, c: VReg) -> ControlFlow<Done> {
5521        let mut a = self.state[a].get_f32x4();
5522        let b = self.state[b].get_f32x4();
5523        let c = self.state[c].get_f32x4();
5524        for ((a, b), c) in a.iter_mut().zip(b).zip(c) {
5525            *a = a.wasm_mul_add(b, c);
5526        }
5527        self.state[dst].set_f32x4(a);
5528        ControlFlow::Continue(())
5529    }
5530
5531    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5532    fn vfma64x2(&mut self, dst: VReg, a: VReg, b: VReg, c: VReg) -> ControlFlow<Done> {
5533        let mut a = self.state[a].get_f64x2();
5534        let b = self.state[b].get_f64x2();
5535        let c = self.state[c].get_f64x2();
5536        for ((a, b), c) in a.iter_mut().zip(b).zip(c) {
5537            *a = a.wasm_mul_add(b, c);
5538        }
5539        self.state[dst].set_f64x2(a);
5540        ControlFlow::Continue(())
5541    }
5542
5543    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5544    fn vselect(
5545        &mut self,
5546        dst: VReg,
5547        cond: XReg,
5548        if_nonzero: VReg,
5549        if_zero: VReg,
5550    ) -> ControlFlow<Done> {
5551        let result = if self.state[cond].get_u32() != 0 {
5552            self.state[if_nonzero]
5553        } else {
5554            self.state[if_zero]
5555        };
5556        self.state[dst] = result;
5557        ControlFlow::Continue(())
5558    }
5559
5560    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5561    fn xadd128(
5562        &mut self,
5563        dst_lo: XReg,
5564        dst_hi: XReg,
5565        lhs_lo: XReg,
5566        lhs_hi: XReg,
5567        rhs_lo: XReg,
5568        rhs_hi: XReg,
5569    ) -> ControlFlow<Done> {
5570        let lhs = self.get_i128(lhs_lo, lhs_hi);
5571        let rhs = self.get_i128(rhs_lo, rhs_hi);
5572        let result = lhs.wrapping_add(rhs);
5573        self.set_i128(dst_lo, dst_hi, result);
5574        ControlFlow::Continue(())
5575    }
5576
5577    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5578    fn xsub128(
5579        &mut self,
5580        dst_lo: XReg,
5581        dst_hi: XReg,
5582        lhs_lo: XReg,
5583        lhs_hi: XReg,
5584        rhs_lo: XReg,
5585        rhs_hi: XReg,
5586    ) -> ControlFlow<Done> {
5587        let lhs = self.get_i128(lhs_lo, lhs_hi);
5588        let rhs = self.get_i128(rhs_lo, rhs_hi);
5589        let result = lhs.wrapping_sub(rhs);
5590        self.set_i128(dst_lo, dst_hi, result);
5591        ControlFlow::Continue(())
5592    }
5593
5594    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5595    fn xwidemul64_s(
5596        &mut self,
5597        dst_lo: XReg,
5598        dst_hi: XReg,
5599        lhs: XReg,
5600        rhs: XReg,
5601    ) -> ControlFlow<Done> {
5602        let lhs = self.state[lhs].get_i64();
5603        let rhs = self.state[rhs].get_i64();
5604        let result = i128::from(lhs).wrapping_mul(i128::from(rhs));
5605        self.set_i128(dst_lo, dst_hi, result);
5606        ControlFlow::Continue(())
5607    }
5608
5609    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5610    fn xwidemul64_u(
5611        &mut self,
5612        dst_lo: XReg,
5613        dst_hi: XReg,
5614        lhs: XReg,
5615        rhs: XReg,
5616    ) -> ControlFlow<Done> {
5617        let lhs = self.state[lhs].get_u64();
5618        let rhs = self.state[rhs].get_u64();
5619        let result = u128::from(lhs).wrapping_mul(u128::from(rhs));
5620        self.set_i128(dst_lo, dst_hi, result as i128);
5621        ControlFlow::Continue(())
5622    }
5623}