pulley_interpreter/
interp.rs

1//! Interpretation of pulley bytecode.
2
3use crate::decode::*;
4use crate::encode::Encode;
5use crate::imms::*;
6use crate::profile::{ExecutingPc, ExecutingPcRef};
7use crate::regs::*;
8use alloc::string::ToString;
9use alloc::vec::Vec;
10use core::fmt;
11use core::mem;
12use core::ops::ControlFlow;
13use core::ops::{Index, IndexMut};
14use core::ptr::NonNull;
15use pulley_macros::interp_disable_if_cfg;
16use wasmtime_math::{WasmFloat, f32_cvt_to_int_bounds, f64_cvt_to_int_bounds};
17
18mod debug;
19#[cfg(all(not(pulley_tail_calls), not(pulley_assume_llvm_makes_tail_calls)))]
20mod match_loop;
21#[cfg(any(pulley_tail_calls, pulley_assume_llvm_makes_tail_calls))]
22mod tail_loop;
23
24const DEFAULT_STACK_SIZE: usize = 1 << 20; // 1 MiB
25
26/// A virtual machine for interpreting Pulley bytecode.
27pub struct Vm {
28    state: MachineState,
29    executing_pc: ExecutingPc,
30}
31
32impl Default for Vm {
33    fn default() -> Self {
34        Vm::new()
35    }
36}
37
38impl Vm {
39    /// Create a new virtual machine with the default stack size.
40    pub fn new() -> Self {
41        Self::with_stack(DEFAULT_STACK_SIZE)
42    }
43
44    /// Create a new virtual machine with the given stack.
45    pub fn with_stack(stack_size: usize) -> Self {
46        Self {
47            state: MachineState::with_stack(stack_size),
48            executing_pc: ExecutingPc::default(),
49        }
50    }
51
52    /// Get a shared reference to this VM's machine state.
53    pub fn state(&self) -> &MachineState {
54        &self.state
55    }
56
57    /// Get an exclusive reference to this VM's machine state.
58    pub fn state_mut(&mut self) -> &mut MachineState {
59        &mut self.state
60    }
61
62    /// Call a bytecode function.
63    ///
64    /// The given `func` must point to the beginning of a valid Pulley bytecode
65    /// function.
66    ///
67    /// The given `args` must match the number and type of arguments that
68    /// function expects.
69    ///
70    /// The given `rets` must match the function's actual return types.
71    ///
72    /// Returns either the resulting values, or the PC at which a trap was
73    /// raised.
74    pub unsafe fn call<'a, T>(
75        &'a mut self,
76        func: NonNull<u8>,
77        args: &[Val],
78        rets: T,
79    ) -> DoneReason<impl Iterator<Item = Val> + use<'a, T>>
80    where
81        T: IntoIterator<Item = RegType> + 'a,
82    {
83        unsafe {
84            let lr = self.call_start(args);
85
86            match self.call_run(func) {
87                DoneReason::ReturnToHost(()) => DoneReason::ReturnToHost(self.call_end(lr, rets)),
88                DoneReason::Trap { pc, kind } => DoneReason::Trap { pc, kind },
89                DoneReason::CallIndirectHost { id, resume } => {
90                    DoneReason::CallIndirectHost { id, resume }
91                }
92            }
93        }
94    }
95
96    /// Performs the initial part of [`Vm::call`] in setting up the `args`
97    /// provided in registers according to Pulley's ABI.
98    ///
99    /// # Return
100    ///
101    /// Returns the old `lr` register value. The current `lr` value is replaced
102    /// with a sentinel that triggers a return to the host when returned-to.
103    ///
104    /// # Unsafety
105    ///
106    /// All the same unsafety as `call` and additionally, you must
107    /// invoke `call_run` and then `call_end` after calling `call_start`.
108    /// If you don't want to wrangle these invocations, use `call` instead
109    /// of `call_{start,run,end}`.
110    pub unsafe fn call_start<'a>(&'a mut self, args: &[Val]) -> *mut u8 {
111        // NB: make sure this method stays in sync with
112        // `PulleyMachineDeps::compute_arg_locs`!
113
114        let mut x_args = (0..16).map(|x| unsafe { XReg::new_unchecked(x) });
115        let mut f_args = (0..16).map(|f| unsafe { FReg::new_unchecked(f) });
116        #[cfg(not(pulley_disable_interp_simd))]
117        let mut v_args = (0..16).map(|v| unsafe { VReg::new_unchecked(v) });
118
119        for arg in args {
120            match arg {
121                Val::XReg(val) => match x_args.next() {
122                    Some(reg) => self.state[reg] = *val,
123                    None => todo!("stack slots"),
124                },
125                Val::FReg(val) => match f_args.next() {
126                    Some(reg) => self.state[reg] = *val,
127                    None => todo!("stack slots"),
128                },
129                #[cfg(not(pulley_disable_interp_simd))]
130                Val::VReg(val) => match v_args.next() {
131                    Some(reg) => self.state[reg] = *val,
132                    None => todo!("stack slots"),
133                },
134            }
135        }
136
137        mem::replace(&mut self.state.lr, HOST_RETURN_ADDR)
138    }
139
140    /// Peforms the internal part of [`Vm::call`] where bytecode is actually
141    /// executed.
142    ///
143    /// # Unsafety
144    ///
145    /// In addition to all the invariants documented for `call`, you
146    /// may only invoke `call_run` after invoking `call_start` to
147    /// initialize this call's arguments.
148    pub unsafe fn call_run(&mut self, pc: NonNull<u8>) -> DoneReason<()> {
149        self.state.debug_assert_done_reason_none();
150        let interpreter = Interpreter {
151            state: &mut self.state,
152            pc: unsafe { UnsafeBytecodeStream::new(pc) },
153            executing_pc: self.executing_pc.as_ref(),
154        };
155        let done = interpreter.run();
156        self.state.done_decode(done)
157    }
158
159    /// Peforms the tail end of [`Vm::call`] by returning the values as
160    /// determined by `rets` according to Pulley's ABI.
161    ///
162    /// The `old_ret` value should have been provided from `call_start`
163    /// previously.
164    ///
165    /// # Unsafety
166    ///
167    /// In addition to the invariants documented for `call`, this may
168    /// only be called after `call_run`.
169    pub unsafe fn call_end<'a>(
170        &'a mut self,
171        old_ret: *mut u8,
172        rets: impl IntoIterator<Item = RegType> + 'a,
173    ) -> impl Iterator<Item = Val> + 'a {
174        self.state.lr = old_ret;
175        // NB: make sure this method stays in sync with
176        // `PulleyMachineDeps::compute_arg_locs`!
177
178        let mut x_rets = (0..15).map(|x| unsafe { XReg::new_unchecked(x) });
179        let mut f_rets = (0..16).map(|f| unsafe { FReg::new_unchecked(f) });
180        #[cfg(not(pulley_disable_interp_simd))]
181        let mut v_rets = (0..16).map(|v| unsafe { VReg::new_unchecked(v) });
182
183        rets.into_iter().map(move |ty| match ty {
184            RegType::XReg => match x_rets.next() {
185                Some(reg) => Val::XReg(self.state[reg]),
186                None => todo!("stack slots"),
187            },
188            RegType::FReg => match f_rets.next() {
189                Some(reg) => Val::FReg(self.state[reg]),
190                None => todo!("stack slots"),
191            },
192            #[cfg(not(pulley_disable_interp_simd))]
193            RegType::VReg => match v_rets.next() {
194                Some(reg) => Val::VReg(self.state[reg]),
195                None => todo!("stack slots"),
196            },
197            #[cfg(pulley_disable_interp_simd)]
198            RegType::VReg => panic!("simd support disabled at compile time"),
199        })
200    }
201
202    /// Returns the current `fp` register value.
203    pub fn fp(&self) -> *mut u8 {
204        self.state.fp
205    }
206
207    /// Returns the current `lr` register value.
208    pub fn lr(&self) -> *mut u8 {
209        self.state.lr
210    }
211
212    /// Sets the current `fp` register value.
213    pub unsafe fn set_fp(&mut self, fp: *mut u8) {
214        self.state.fp = fp;
215    }
216
217    /// Sets the current `lr` register value.
218    pub unsafe fn set_lr(&mut self, lr: *mut u8) {
219        self.state.lr = lr;
220    }
221
222    /// Gets a handle to the currently executing program counter for this
223    /// interpreter which can be read from other threads.
224    //
225    // Note that despite this field still existing with `not(feature =
226    // "profile")` it's hidden from the public API in that scenario as it has no
227    // methods anyway.
228    #[cfg(feature = "profile")]
229    pub fn executing_pc(&self) -> &ExecutingPc {
230        &self.executing_pc
231    }
232}
233
234impl Drop for Vm {
235    fn drop(&mut self) {
236        self.executing_pc.set_done();
237    }
238}
239
240/// The type of a register in the Pulley machine state.
241#[derive(Clone, Copy, Debug)]
242pub enum RegType {
243    /// An `x` register: integers.
244    XReg,
245
246    /// An `f` register: floats.
247    FReg,
248
249    /// A `v` register: vectors.
250    VReg,
251}
252
253/// A value that can be stored in a register.
254#[derive(Clone, Copy, Debug)]
255pub enum Val {
256    /// An `x` register value: integers.
257    XReg(XRegVal),
258
259    /// An `f` register value: floats.
260    FReg(FRegVal),
261
262    /// A `v` register value: vectors.
263    #[cfg(not(pulley_disable_interp_simd))]
264    VReg(VRegVal),
265}
266
267impl fmt::LowerHex for Val {
268    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
269        match self {
270            Val::XReg(v) => fmt::LowerHex::fmt(v, f),
271            Val::FReg(v) => fmt::LowerHex::fmt(v, f),
272            #[cfg(not(pulley_disable_interp_simd))]
273            Val::VReg(v) => fmt::LowerHex::fmt(v, f),
274        }
275    }
276}
277
278impl From<XRegVal> for Val {
279    fn from(value: XRegVal) -> Self {
280        Val::XReg(value)
281    }
282}
283
284impl From<u64> for Val {
285    fn from(value: u64) -> Self {
286        XRegVal::new_u64(value).into()
287    }
288}
289
290impl From<u32> for Val {
291    fn from(value: u32) -> Self {
292        XRegVal::new_u32(value).into()
293    }
294}
295
296impl From<i64> for Val {
297    fn from(value: i64) -> Self {
298        XRegVal::new_i64(value).into()
299    }
300}
301
302impl From<i32> for Val {
303    fn from(value: i32) -> Self {
304        XRegVal::new_i32(value).into()
305    }
306}
307
308impl<T> From<*mut T> for Val {
309    fn from(value: *mut T) -> Self {
310        XRegVal::new_ptr(value).into()
311    }
312}
313
314impl From<FRegVal> for Val {
315    fn from(value: FRegVal) -> Self {
316        Val::FReg(value)
317    }
318}
319
320impl From<f64> for Val {
321    fn from(value: f64) -> Self {
322        FRegVal::new_f64(value).into()
323    }
324}
325
326impl From<f32> for Val {
327    fn from(value: f32) -> Self {
328        FRegVal::new_f32(value).into()
329    }
330}
331
332#[cfg(not(pulley_disable_interp_simd))]
333impl From<VRegVal> for Val {
334    fn from(value: VRegVal) -> Self {
335        Val::VReg(value)
336    }
337}
338
339/// An `x` register value: integers.
340#[derive(Copy, Clone)]
341pub struct XRegVal(XRegUnion);
342
343impl PartialEq for XRegVal {
344    fn eq(&self, other: &Self) -> bool {
345        self.get_u64() == other.get_u64()
346    }
347}
348
349impl Eq for XRegVal {}
350
351impl fmt::Debug for XRegVal {
352    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
353        f.debug_struct("XRegVal")
354            .field("as_u64", &self.get_u64())
355            .finish()
356    }
357}
358
359impl fmt::LowerHex for XRegVal {
360    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
361        fmt::LowerHex::fmt(&self.get_u64(), f)
362    }
363}
364
365/// Contents of an "x" register, or a general-purpose register.
366///
367/// This is represented as a Rust `union` to make it easier to access typed
368/// views of this, notably the `ptr` field which enables preserving a bit of
369/// provenance for Rust for values stored as a pointer and read as a pointer.
370///
371/// Note that the actual in-memory representation of this value is handled
372/// carefully at this time. Pulley bytecode exposes the ability to store a
373/// 32-bit result into a register and then read the 64-bit contents of the
374/// register. This leaves us with the question of what to do with the upper bits
375/// of the register when the 32-bit result is generated. Possibilities for
376/// handling this are:
377///
378/// 1. Do nothing, just store the 32-bit value. The problem with this approach
379///    means that the "upper bits" are now endianness-dependent. That means that
380///    the state of the register is now platform-dependent.
381/// 2. Sign or zero-extend. This restores platform-independent behavior but
382///    requires an extra store on 32-bit platforms because they can probably
383///    only store 32-bits at a time.
384/// 3. Always store the values in this union as little-endian. This means that
385///    big-endian platforms have to do a byte-swap but otherwise it has
386///    platform-independent behavior.
387///
388/// This union chooses route (3) at this time where the values here are always
389/// stored in little-endian form (even the `ptr` field). That guarantees
390/// cross-platform behavior while also minimizing the amount of data stored on
391/// writes.
392///
393/// In the future we may wish to benchmark this and possibly change this.
394/// Technically Cranelift-generated bytecode should never rely on the upper bits
395/// of a register if it didn't previously write them so this in theory doesn't
396/// actually matter for Cranelift or wasm semantics. The only cost right now is
397/// to big-endian platforms though and it's not certain how crucial performance
398/// will be there.
399///
400/// One final note is that this notably contrasts with native CPUs where
401/// native ISAs like RISC-V specifically define the entire register on every
402/// instruction, even if only the low half contains a significant result. Pulley
403/// is unlikely to become out-of-order within the CPU itself as it's interpreted
404/// meaning that severing data-dependencies with previous operations is
405/// hypothesized to not be too important. If this is ever a problem though it
406/// could increase the likelihood we go for route (2) above instead (or maybe
407/// even (1)).
408#[derive(Copy, Clone)]
409union XRegUnion {
410    i32: i32,
411    u32: u32,
412    i64: i64,
413    u64: u64,
414
415    // Note that this is intentionally `usize` and not an actual pointer like
416    // `*mut u8`. The reason for this is that provenance is required in Rust for
417    // pointers but Cranelift has no pointer type and thus no concept of
418    // provenance. That means that at-rest it's not known whether the value has
419    // provenance or not and basically means that Pulley is required to use
420    // "permissive provenance" in Rust as opposed to strict provenance.
421    //
422    // That's more-or-less a long-winded way of saying that storage of a pointer
423    // in this value is done with `.expose_provenance()` and reading a pointer
424    // uses `with_exposed_provenance_mut(..)`.
425    ptr: usize,
426}
427
428impl Default for XRegVal {
429    fn default() -> Self {
430        Self(unsafe { mem::zeroed() })
431    }
432}
433
434#[expect(missing_docs, reason = "self-describing methods")]
435impl XRegVal {
436    pub fn new_i32(x: i32) -> Self {
437        let mut val = XRegVal::default();
438        val.set_i32(x);
439        val
440    }
441
442    pub fn new_u32(x: u32) -> Self {
443        let mut val = XRegVal::default();
444        val.set_u32(x);
445        val
446    }
447
448    pub fn new_i64(x: i64) -> Self {
449        let mut val = XRegVal::default();
450        val.set_i64(x);
451        val
452    }
453
454    pub fn new_u64(x: u64) -> Self {
455        let mut val = XRegVal::default();
456        val.set_u64(x);
457        val
458    }
459
460    pub fn new_ptr<T>(ptr: *mut T) -> Self {
461        let mut val = XRegVal::default();
462        val.set_ptr(ptr);
463        val
464    }
465
466    pub fn get_i32(&self) -> i32 {
467        let x = unsafe { self.0.i32 };
468        i32::from_le(x)
469    }
470
471    pub fn get_u32(&self) -> u32 {
472        let x = unsafe { self.0.u32 };
473        u32::from_le(x)
474    }
475
476    pub fn get_i64(&self) -> i64 {
477        let x = unsafe { self.0.i64 };
478        i64::from_le(x)
479    }
480
481    pub fn get_u64(&self) -> u64 {
482        let x = unsafe { self.0.u64 };
483        u64::from_le(x)
484    }
485
486    pub fn get_ptr<T>(&self) -> *mut T {
487        let ptr = unsafe { self.0.ptr };
488        core::ptr::with_exposed_provenance_mut(usize::from_le(ptr))
489    }
490
491    pub fn set_i32(&mut self, x: i32) {
492        self.0.i32 = x.to_le();
493    }
494
495    pub fn set_u32(&mut self, x: u32) {
496        self.0.u32 = x.to_le();
497    }
498
499    pub fn set_i64(&mut self, x: i64) {
500        self.0.i64 = x.to_le();
501    }
502
503    pub fn set_u64(&mut self, x: u64) {
504        self.0.u64 = x.to_le();
505    }
506
507    pub fn set_ptr<T>(&mut self, ptr: *mut T) {
508        self.0.ptr = ptr.expose_provenance().to_le();
509    }
510}
511
512/// An `f` register value: floats.
513#[derive(Copy, Clone)]
514pub struct FRegVal(FRegUnion);
515
516impl fmt::Debug for FRegVal {
517    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
518        f.debug_struct("FRegVal")
519            .field("as_f32", &self.get_f32())
520            .field("as_f64", &self.get_f64())
521            .finish()
522    }
523}
524
525impl fmt::LowerHex for FRegVal {
526    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
527        fmt::LowerHex::fmt(&self.get_f64().to_bits(), f)
528    }
529}
530
531// NB: like `XRegUnion` values here are always little-endian, see the
532// documentation above for more details.
533#[derive(Copy, Clone)]
534union FRegUnion {
535    f32: u32,
536    f64: u64,
537}
538
539impl Default for FRegVal {
540    fn default() -> Self {
541        Self(unsafe { mem::zeroed() })
542    }
543}
544
545#[expect(missing_docs, reason = "self-describing methods")]
546impl FRegVal {
547    pub fn new_f32(f: f32) -> Self {
548        let mut val = Self::default();
549        val.set_f32(f);
550        val
551    }
552
553    pub fn new_f64(f: f64) -> Self {
554        let mut val = Self::default();
555        val.set_f64(f);
556        val
557    }
558
559    pub fn get_f32(&self) -> f32 {
560        let val = unsafe { self.0.f32 };
561        f32::from_le_bytes(val.to_ne_bytes())
562    }
563
564    pub fn get_f64(&self) -> f64 {
565        let val = unsafe { self.0.f64 };
566        f64::from_le_bytes(val.to_ne_bytes())
567    }
568
569    pub fn set_f32(&mut self, val: f32) {
570        self.0.f32 = u32::from_ne_bytes(val.to_le_bytes());
571    }
572
573    pub fn set_f64(&mut self, val: f64) {
574        self.0.f64 = u64::from_ne_bytes(val.to_le_bytes());
575    }
576}
577
578/// A `v` register value: vectors.
579#[derive(Copy, Clone)]
580#[cfg(not(pulley_disable_interp_simd))]
581pub struct VRegVal(VRegUnion);
582
583#[cfg(not(pulley_disable_interp_simd))]
584impl fmt::Debug for VRegVal {
585    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
586        f.debug_struct("VRegVal")
587            .field("as_u128", &unsafe { self.0.u128 })
588            .finish()
589    }
590}
591
592#[cfg(not(pulley_disable_interp_simd))]
593impl fmt::LowerHex for VRegVal {
594    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
595        fmt::LowerHex::fmt(unsafe { &self.0.u128 }, f)
596    }
597}
598
599/// 128-bit vector registers.
600///
601/// This register is always stored in little-endian order and has different
602/// constraints than `XRegVal` and `FRegVal` above. Notably all fields of this
603/// union are the same width so all bits are always defined. Note that
604/// little-endian is required though so bitcasts between different shapes of
605/// vectors works. This union cannot be stored in big-endian.
606#[derive(Copy, Clone)]
607#[repr(align(16))]
608#[cfg(not(pulley_disable_interp_simd))]
609union VRegUnion {
610    u128: u128,
611    i8x16: [i8; 16],
612    i16x8: [i16; 8],
613    i32x4: [i32; 4],
614    i64x2: [i64; 2],
615    u8x16: [u8; 16],
616    u16x8: [u16; 8],
617    u32x4: [u32; 4],
618    u64x2: [u64; 2],
619    // Note that these are `u32` and `u64`, not f32/f64. That's only because
620    // f32/f64 don't have `.to_le()` and `::from_le()` so need to go through the
621    // bits anyway.
622    f32x4: [u32; 4],
623    f64x2: [u64; 2],
624}
625
626#[cfg(not(pulley_disable_interp_simd))]
627impl Default for VRegVal {
628    fn default() -> Self {
629        Self(unsafe { mem::zeroed() })
630    }
631}
632
633#[expect(missing_docs, reason = "self-describing methods")]
634#[cfg(not(pulley_disable_interp_simd))]
635impl VRegVal {
636    pub fn new_u128(i: u128) -> Self {
637        let mut val = Self::default();
638        val.set_u128(i);
639        val
640    }
641
642    pub fn get_u128(&self) -> u128 {
643        let val = unsafe { self.0.u128 };
644        u128::from_le(val)
645    }
646
647    pub fn set_u128(&mut self, val: u128) {
648        self.0.u128 = val.to_le();
649    }
650
651    fn get_i8x16(&self) -> [i8; 16] {
652        let val = unsafe { self.0.i8x16 };
653        val.map(|e| i8::from_le(e))
654    }
655
656    fn set_i8x16(&mut self, val: [i8; 16]) {
657        self.0.i8x16 = val.map(|e| e.to_le());
658    }
659
660    fn get_u8x16(&self) -> [u8; 16] {
661        let val = unsafe { self.0.u8x16 };
662        val.map(|e| u8::from_le(e))
663    }
664
665    fn set_u8x16(&mut self, val: [u8; 16]) {
666        self.0.u8x16 = val.map(|e| e.to_le());
667    }
668
669    fn get_i16x8(&self) -> [i16; 8] {
670        let val = unsafe { self.0.i16x8 };
671        val.map(|e| i16::from_le(e))
672    }
673
674    fn set_i16x8(&mut self, val: [i16; 8]) {
675        self.0.i16x8 = val.map(|e| e.to_le());
676    }
677
678    fn get_u16x8(&self) -> [u16; 8] {
679        let val = unsafe { self.0.u16x8 };
680        val.map(|e| u16::from_le(e))
681    }
682
683    fn set_u16x8(&mut self, val: [u16; 8]) {
684        self.0.u16x8 = val.map(|e| e.to_le());
685    }
686
687    fn get_i32x4(&self) -> [i32; 4] {
688        let val = unsafe { self.0.i32x4 };
689        val.map(|e| i32::from_le(e))
690    }
691
692    fn set_i32x4(&mut self, val: [i32; 4]) {
693        self.0.i32x4 = val.map(|e| e.to_le());
694    }
695
696    fn get_u32x4(&self) -> [u32; 4] {
697        let val = unsafe { self.0.u32x4 };
698        val.map(|e| u32::from_le(e))
699    }
700
701    fn set_u32x4(&mut self, val: [u32; 4]) {
702        self.0.u32x4 = val.map(|e| e.to_le());
703    }
704
705    fn get_i64x2(&self) -> [i64; 2] {
706        let val = unsafe { self.0.i64x2 };
707        val.map(|e| i64::from_le(e))
708    }
709
710    fn set_i64x2(&mut self, val: [i64; 2]) {
711        self.0.i64x2 = val.map(|e| e.to_le());
712    }
713
714    fn get_u64x2(&self) -> [u64; 2] {
715        let val = unsafe { self.0.u64x2 };
716        val.map(|e| u64::from_le(e))
717    }
718
719    fn set_u64x2(&mut self, val: [u64; 2]) {
720        self.0.u64x2 = val.map(|e| e.to_le());
721    }
722
723    fn get_f64x2(&self) -> [f64; 2] {
724        let val = unsafe { self.0.f64x2 };
725        val.map(|e| f64::from_bits(u64::from_le(e)))
726    }
727
728    fn set_f64x2(&mut self, val: [f64; 2]) {
729        self.0.f64x2 = val.map(|e| e.to_bits().to_le());
730    }
731
732    fn get_f32x4(&self) -> [f32; 4] {
733        let val = unsafe { self.0.f32x4 };
734        val.map(|e| f32::from_bits(u32::from_le(e)))
735    }
736
737    fn set_f32x4(&mut self, val: [f32; 4]) {
738        self.0.f32x4 = val.map(|e| e.to_bits().to_le());
739    }
740}
741
742/// The machine state for a Pulley virtual machine: the various registers and
743/// stack.
744pub struct MachineState {
745    x_regs: [XRegVal; XReg::RANGE.end as usize],
746    f_regs: [FRegVal; FReg::RANGE.end as usize],
747    #[cfg(not(pulley_disable_interp_simd))]
748    v_regs: [VRegVal; VReg::RANGE.end as usize],
749    fp: *mut u8,
750    lr: *mut u8,
751    stack: Stack,
752    done_reason: Option<DoneReason<()>>,
753}
754
755unsafe impl Send for MachineState {}
756unsafe impl Sync for MachineState {}
757
758/// Helper structure to store the state of the Pulley stack.
759///
760/// The Pulley stack notably needs to be a 16-byte aligned allocation on the
761/// host to ensure that addresses handed out are indeed 16-byte aligned. This is
762/// done with a custom `Vec<T>` internally where `T` has size and align of 16.
763/// This is manually done with a helper `Align16` type below.
764struct Stack {
765    storage: Vec<Align16>,
766}
767
768/// Helper type used with `Stack` above.
769#[derive(Copy, Clone)]
770#[repr(align(16))]
771struct Align16 {
772    // Just here to give the structure a size of 16. The alignment is always 16
773    // regardless of what the host platform's alignment of u128 is.
774    _unused: u128,
775}
776
777impl Stack {
778    /// Creates a new stack which will have a byte size of at least `size`.
779    ///
780    /// The allocated stack might be slightly larger due to rounding necessary.
781    fn new(size: usize) -> Stack {
782        Stack {
783            // Round up `size` to the nearest multiple of 16. Note that the
784            // stack is also allocated here but not initialized, and that's
785            // intentional as pulley bytecode should always initialize the stack
786            // before use.
787            storage: Vec::with_capacity((size + 15) / 16),
788        }
789    }
790
791    /// Returns a pointer to the top of the stack (the highest address).
792    ///
793    /// Note that the returned pointer has provenance for the entire stack
794    /// allocation, however, not just the top.
795    fn top(&mut self) -> *mut u8 {
796        let len = self.len();
797        unsafe { self.base().add(len) }
798    }
799
800    /// Returns a pointer to the base of the stack (the lowest address).
801    ///
802    /// Note that the returned pointer has provenance for the entire stack
803    /// allocation, however, not just the top.
804    fn base(&mut self) -> *mut u8 {
805        self.storage.as_mut_ptr().cast::<u8>()
806    }
807
808    /// Returns the length, in bytes, of this stack allocation.
809    fn len(&self) -> usize {
810        self.storage.capacity() * mem::size_of::<Align16>()
811    }
812}
813
814impl fmt::Debug for MachineState {
815    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
816        let MachineState {
817            x_regs,
818            f_regs,
819            #[cfg(not(pulley_disable_interp_simd))]
820            v_regs,
821            stack: _,
822            done_reason: _,
823            fp: _,
824            lr: _,
825        } = self;
826
827        struct RegMap<'a, R>(&'a [R], fn(u8) -> alloc::string::String);
828
829        impl<R: fmt::Debug> fmt::Debug for RegMap<'_, R> {
830            fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
831                let mut f = f.debug_map();
832                for (i, r) in self.0.iter().enumerate() {
833                    f.entry(&(self.1)(i as u8), r);
834                }
835                f.finish()
836            }
837        }
838
839        let mut f = f.debug_struct("MachineState");
840
841        f.field(
842            "x_regs",
843            &RegMap(x_regs, |i| XReg::new(i).unwrap().to_string()),
844        )
845        .field(
846            "f_regs",
847            &RegMap(f_regs, |i| FReg::new(i).unwrap().to_string()),
848        );
849        #[cfg(not(pulley_disable_interp_simd))]
850        f.field(
851            "v_regs",
852            &RegMap(v_regs, |i| VReg::new(i).unwrap().to_string()),
853        );
854        f.finish_non_exhaustive()
855    }
856}
857
858macro_rules! index_reg {
859    ($reg_ty:ty,$value_ty:ty,$field:ident) => {
860        impl Index<$reg_ty> for Vm {
861            type Output = $value_ty;
862
863            fn index(&self, reg: $reg_ty) -> &Self::Output {
864                &self.state[reg]
865            }
866        }
867
868        impl IndexMut<$reg_ty> for Vm {
869            fn index_mut(&mut self, reg: $reg_ty) -> &mut Self::Output {
870                &mut self.state[reg]
871            }
872        }
873
874        impl Index<$reg_ty> for MachineState {
875            type Output = $value_ty;
876
877            fn index(&self, reg: $reg_ty) -> &Self::Output {
878                &self.$field[reg.index()]
879            }
880        }
881
882        impl IndexMut<$reg_ty> for MachineState {
883            fn index_mut(&mut self, reg: $reg_ty) -> &mut Self::Output {
884                &mut self.$field[reg.index()]
885            }
886        }
887    };
888}
889
890index_reg!(XReg, XRegVal, x_regs);
891index_reg!(FReg, FRegVal, f_regs);
892#[cfg(not(pulley_disable_interp_simd))]
893index_reg!(VReg, VRegVal, v_regs);
894
895/// Sentinel return address that signals the end of the call stack.
896const HOST_RETURN_ADDR: *mut u8 = usize::MAX as *mut u8;
897
898impl MachineState {
899    fn with_stack(stack_size: usize) -> Self {
900        let mut state = Self {
901            x_regs: [Default::default(); XReg::RANGE.end as usize],
902            f_regs: Default::default(),
903            #[cfg(not(pulley_disable_interp_simd))]
904            v_regs: Default::default(),
905            stack: Stack::new(stack_size),
906            done_reason: None,
907            fp: HOST_RETURN_ADDR,
908            lr: HOST_RETURN_ADDR,
909        };
910
911        let sp = state.stack.top();
912        state[XReg::sp] = XRegVal::new_ptr(sp);
913
914        state
915    }
916}
917
918/// Inner private module to prevent creation of the `Done` structure outside of
919/// this module.
920mod done {
921    use super::{Encode, Interpreter, MachineState};
922    use core::ops::ControlFlow;
923    use core::ptr::NonNull;
924
925    /// Zero-sized sentinel indicating that pulley execution has halted.
926    ///
927    /// The reason for halting is stored in `MachineState`.
928    #[derive(Copy, Clone, Debug, PartialEq, Eq)]
929    pub struct Done {
930        _priv: (),
931    }
932
933    /// Reason that the pulley interpreter has ceased execution.
934    pub enum DoneReason<T> {
935        /// A trap happened at this bytecode instruction.
936        Trap {
937            /// Which instruction is raising this trap.
938            pc: NonNull<u8>,
939            /// The kind of trap being raised, if known.
940            kind: Option<TrapKind>,
941        },
942        /// The `call_indirect_host` instruction was executed.
943        CallIndirectHost {
944            /// The payload of `call_indirect_host`.
945            id: u8,
946            /// Where to resume execution after the host has finished.
947            resume: NonNull<u8>,
948        },
949        /// Pulley has finished and the provided value is being returned.
950        ReturnToHost(T),
951    }
952
953    /// Stored within `DoneReason::Trap`.
954    #[expect(missing_docs, reason = "self-describing variants")]
955    pub enum TrapKind {
956        DivideByZero,
957        IntegerOverflow,
958        BadConversionToInteger,
959        MemoryOutOfBounds,
960        DisabledOpcode,
961        StackOverflow,
962    }
963
964    impl MachineState {
965        pub(super) fn debug_assert_done_reason_none(&mut self) {
966            debug_assert!(self.done_reason.is_none());
967        }
968
969        pub(super) fn done_decode(&mut self, Done { _priv }: Done) -> DoneReason<()> {
970            self.done_reason.take().unwrap()
971        }
972    }
973
974    impl Interpreter<'_> {
975        /// Finishes execution by recording `DoneReason::Trap`.
976        ///
977        /// This method takes an `I` generic parameter indicating which
978        /// instruction is executing this function and generating a trap. That's
979        /// used to go backwards from the current `pc` which is just beyond the
980        /// instruction to point to the instruction itself in the trap metadata
981        /// returned from the interpreter.
982        #[cold]
983        pub fn done_trap<I: Encode>(&mut self) -> ControlFlow<Done> {
984            self.done_trap_kind::<I>(None)
985        }
986
987        /// Same as `done_trap` but with an explicit `TrapKind`.
988        #[cold]
989        pub fn done_trap_kind<I: Encode>(&mut self, kind: Option<TrapKind>) -> ControlFlow<Done> {
990            let pc = self.current_pc::<I>();
991            self.state.done_reason = Some(DoneReason::Trap { pc, kind });
992            ControlFlow::Break(Done { _priv: () })
993        }
994
995        /// Finishes execution by recording `DoneReason::CallIndirectHost`.
996        #[cold]
997        pub fn done_call_indirect_host(&mut self, id: u8) -> ControlFlow<Done> {
998            self.state.done_reason = Some(DoneReason::CallIndirectHost {
999                id,
1000                resume: self.pc.as_ptr(),
1001            });
1002            ControlFlow::Break(Done { _priv: () })
1003        }
1004
1005        /// Finishes execution by recording `DoneReason::ReturnToHost`.
1006        #[cold]
1007        pub fn done_return_to_host(&mut self) -> ControlFlow<Done> {
1008            self.state.done_reason = Some(DoneReason::ReturnToHost(()));
1009            ControlFlow::Break(Done { _priv: () })
1010        }
1011    }
1012}
1013
1014use done::Done;
1015pub use done::{DoneReason, TrapKind};
1016
1017struct Interpreter<'a> {
1018    state: &'a mut MachineState,
1019    pc: UnsafeBytecodeStream,
1020    executing_pc: ExecutingPcRef<'a>,
1021}
1022
1023impl Interpreter<'_> {
1024    /// Calculates the `offset` for the current instruction `I`.
1025    #[inline]
1026    fn pc_rel<I: Encode>(&mut self, offset: PcRelOffset) -> NonNull<u8> {
1027        let offset = isize::try_from(i32::from(offset)).unwrap();
1028        unsafe { self.current_pc::<I>().offset(offset) }
1029    }
1030
1031    /// Performs a relative jump of `offset` bytes from the current instruction.
1032    ///
1033    /// This will jump from the start of the current instruction, identified by
1034    /// `I`, `offset` bytes away. Note that the `self.pc` at the start of this
1035    /// function actually points to the instruction after this one so `I` is
1036    /// necessary to go back to ourselves after which we then go `offset` away.
1037    #[inline]
1038    fn pc_rel_jump<I: Encode>(&mut self, offset: PcRelOffset) -> ControlFlow<Done> {
1039        let new_pc = self.pc_rel::<I>(offset);
1040        self.pc = unsafe { UnsafeBytecodeStream::new(new_pc) };
1041        ControlFlow::Continue(())
1042    }
1043
1044    /// Returns the PC of the current instruction where `I` is the static type
1045    /// representing the current instruction.
1046    fn current_pc<I: Encode>(&self) -> NonNull<u8> {
1047        unsafe { self.pc.offset(-isize::from(I::WIDTH)).as_ptr() }
1048    }
1049
1050    /// `sp -= size_of::<T>(); *sp = val;`
1051    ///
1052    /// Note that `I` is the instruction which is pushing data to use if a trap
1053    /// is generated.
1054    #[must_use]
1055    fn push<I: Encode, T>(&mut self, val: T) -> ControlFlow<Done> {
1056        let new_sp = self.state[XReg::sp].get_ptr::<T>().wrapping_sub(1);
1057        self.set_sp::<I>(new_sp.cast())?;
1058        unsafe {
1059            new_sp.write_unaligned(val);
1060        }
1061        ControlFlow::Continue(())
1062    }
1063
1064    /// `ret = *sp; sp -= size_of::<T>()`
1065    fn pop<T>(&mut self) -> T {
1066        let sp = self.state[XReg::sp].get_ptr::<T>();
1067        let val = unsafe { sp.read_unaligned() };
1068        self.set_sp_unchecked(sp.wrapping_add(1));
1069        val
1070    }
1071
1072    /// Sets the stack pointer to the `sp` provided.
1073    ///
1074    /// Returns a trap if this would result in stack overflow, or if `sp` is
1075    /// beneath the base pointer of `self.state.stack`.
1076    ///
1077    /// The `I` parameter here is the instruction that is setting the stack
1078    /// pointer and is used to calculate this instruction's own `pc` if this
1079    /// instruction traps.
1080    #[must_use]
1081    fn set_sp<I: Encode>(&mut self, sp: *mut u8) -> ControlFlow<Done> {
1082        let sp_raw = sp as usize;
1083        let base_raw = self.state.stack.base() as usize;
1084        if sp_raw < base_raw {
1085            return self.done_trap_kind::<I>(Some(TrapKind::StackOverflow));
1086        }
1087        self.set_sp_unchecked(sp);
1088        ControlFlow::Continue(())
1089    }
1090
1091    /// Same as `set_sp` but does not check to see if `sp` is in-bounds. Should
1092    /// only be used with stack increment operations such as `pop`.
1093    fn set_sp_unchecked<T>(&mut self, sp: *mut T) {
1094        if cfg!(debug_assertions) {
1095            let sp_raw = sp as usize;
1096            let base = self.state.stack.base() as usize;
1097            let end = base + self.state.stack.len();
1098            assert!(base <= sp_raw && sp_raw <= end);
1099        }
1100        self.state[XReg::sp].set_ptr(sp);
1101    }
1102
1103    /// Loads a value of `T` using native-endian byte ordering from the `addr`
1104    /// specified.
1105    ///
1106    /// The `I` type parameter is the instruction issuing this load which is
1107    /// used in case of traps to calculate the trapping pc.
1108    ///
1109    /// Returns `ControlFlow::Break` if a trap happens or
1110    /// `ControlFlow::Continue` if the value was loaded successfully.
1111    ///
1112    /// # Unsafety
1113    ///
1114    /// Safety of this method relies on the safety of the original bytecode
1115    /// itself and correctly annotating both `T` and `I`.
1116    #[must_use]
1117    unsafe fn load_ne<T, I: Encode>(&mut self, addr: impl AddressingMode) -> ControlFlow<Done, T> {
1118        unsafe { addr.load_ne::<T, I>(self) }
1119    }
1120
1121    /// Stores a `val` to the `addr` specified.
1122    ///
1123    /// The `I` type parameter is the instruction issuing this store which is
1124    /// used in case of traps to calculate the trapping pc.
1125    ///
1126    /// Returns `ControlFlow::Break` if a trap happens or
1127    /// `ControlFlow::Continue` if the value was stored successfully.
1128    ///
1129    /// # Unsafety
1130    ///
1131    /// Safety of this method relies on the safety of the original bytecode
1132    /// itself and correctly annotating both `T` and `I`.
1133    #[must_use]
1134    unsafe fn store_ne<T, I: Encode>(
1135        &mut self,
1136        addr: impl AddressingMode,
1137        val: T,
1138    ) -> ControlFlow<Done> {
1139        unsafe { addr.store_ne::<T, I>(self, val) }
1140    }
1141
1142    fn check_xnn_from_f32<I: Encode>(
1143        &mut self,
1144        val: f32,
1145        (lo, hi): (f32, f32),
1146    ) -> ControlFlow<Done> {
1147        self.check_xnn_from_f64::<I>(val.into(), (lo.into(), hi.into()))
1148    }
1149
1150    fn check_xnn_from_f64<I: Encode>(
1151        &mut self,
1152        val: f64,
1153        (lo, hi): (f64, f64),
1154    ) -> ControlFlow<Done> {
1155        if val != val {
1156            return self.done_trap_kind::<I>(Some(TrapKind::BadConversionToInteger));
1157        }
1158        let val = val.wasm_trunc();
1159        if val <= lo || val >= hi {
1160            return self.done_trap_kind::<I>(Some(TrapKind::IntegerOverflow));
1161        }
1162        ControlFlow::Continue(())
1163    }
1164
1165    #[cfg(not(pulley_disable_interp_simd))]
1166    fn get_i128(&self, lo: XReg, hi: XReg) -> i128 {
1167        let lo = self.state[lo].get_u64();
1168        let hi = self.state[hi].get_i64();
1169        i128::from(lo) | (i128::from(hi) << 64)
1170    }
1171
1172    #[cfg(not(pulley_disable_interp_simd))]
1173    fn set_i128(&mut self, lo: XReg, hi: XReg, val: i128) {
1174        self.state[lo].set_u64(val as u64);
1175        self.state[hi].set_u64((val >> 64) as u64);
1176    }
1177
1178    fn record_executing_pc_for_profiling(&mut self) {
1179        // Note that this is a no-op if `feature = "profile"` is disabled.
1180        self.executing_pc.record(self.pc.as_ptr().as_ptr() as usize);
1181    }
1182}
1183
1184/// Helper trait to encompass the various addressing modes of Pulley.
1185trait AddressingMode: Sized {
1186    /// Calculates the native host address `*mut T` corresponding to this
1187    /// addressing mode.
1188    ///
1189    /// # Safety
1190    ///
1191    /// Relies on the original bytecode being safe to execute as this will
1192    /// otherwise perform unsafe byte offsets for example which requires the
1193    /// original bytecode to be correct.
1194    #[must_use]
1195    unsafe fn addr<T, I: Encode>(self, i: &mut Interpreter<'_>) -> ControlFlow<Done, *mut T>;
1196
1197    /// Loads a value of `T` from this address, using native-endian byte order.
1198    ///
1199    /// For more information see [`Interpreter::load_ne`].
1200    #[must_use]
1201    unsafe fn load_ne<T, I: Encode>(self, i: &mut Interpreter<'_>) -> ControlFlow<Done, T> {
1202        let ret = unsafe { self.addr::<T, I>(i)?.read_unaligned() };
1203        ControlFlow::Continue(ret)
1204    }
1205
1206    /// Stores a `val` to this address, using native-endian byte order.
1207    ///
1208    /// For more information see [`Interpreter::store_ne`].
1209    #[must_use]
1210    unsafe fn store_ne<T, I: Encode>(self, i: &mut Interpreter<'_>, val: T) -> ControlFlow<Done> {
1211        unsafe {
1212            self.addr::<T, I>(i)?.write_unaligned(val);
1213        }
1214        ControlFlow::Continue(())
1215    }
1216}
1217
1218impl AddressingMode for AddrO32 {
1219    unsafe fn addr<T, I: Encode>(self, i: &mut Interpreter<'_>) -> ControlFlow<Done, *mut T> {
1220        // Note that this addressing mode cannot return `ControlFlow::Break`
1221        // which is intentional. It's expected that LLVM optimizes away any
1222        // branches callers have.
1223        unsafe {
1224            ControlFlow::Continue(
1225                i.state[self.addr]
1226                    .get_ptr::<T>()
1227                    .byte_offset(self.offset as isize),
1228            )
1229        }
1230    }
1231}
1232
1233impl AddressingMode for AddrZ {
1234    unsafe fn addr<T, I: Encode>(self, i: &mut Interpreter<'_>) -> ControlFlow<Done, *mut T> {
1235        // This addressing mode defines loading/storing to the null address as
1236        // a trap, but all other addresses are allowed.
1237        let host_addr = i.state[self.addr].get_ptr::<T>();
1238        if host_addr.is_null() {
1239            i.done_trap_kind::<I>(Some(TrapKind::MemoryOutOfBounds))?;
1240            unreachable!();
1241        }
1242        unsafe {
1243            let addr = host_addr.byte_offset(self.offset as isize);
1244            ControlFlow::Continue(addr)
1245        }
1246    }
1247}
1248
1249impl AddressingMode for AddrG32 {
1250    unsafe fn addr<T, I: Encode>(self, i: &mut Interpreter<'_>) -> ControlFlow<Done, *mut T> {
1251        // Test if `bound - offset - T` is less than the wasm address to
1252        // generate a trap. It's a guarantee of this instruction that these
1253        // subtractions don't overflow.
1254        let bound = i.state[self.host_heap_bound].get_u64() as usize;
1255        let offset = usize::from(self.offset);
1256        let wasm_addr = i.state[self.wasm_addr].get_u32() as usize;
1257        if wasm_addr > bound - offset - size_of::<T>() {
1258            i.done_trap_kind::<I>(Some(TrapKind::MemoryOutOfBounds))?;
1259            unreachable!();
1260        }
1261        unsafe {
1262            let addr = i.state[self.host_heap_base]
1263                .get_ptr::<T>()
1264                .byte_add(wasm_addr)
1265                .byte_add(offset);
1266            ControlFlow::Continue(addr)
1267        }
1268    }
1269}
1270
1271impl AddressingMode for AddrG32Bne {
1272    unsafe fn addr<T, I: Encode>(self, i: &mut Interpreter<'_>) -> ControlFlow<Done, *mut T> {
1273        // Same as `AddrG32` above except that the bound is loaded from memory.
1274        let bound = unsafe {
1275            *i.state[self.host_heap_bound_addr]
1276                .get_ptr::<usize>()
1277                .byte_add(usize::from(self.host_heap_bound_offset))
1278        };
1279        let wasm_addr = i.state[self.wasm_addr].get_u32() as usize;
1280        let offset = usize::from(self.offset);
1281        if wasm_addr > bound - offset - size_of::<T>() {
1282            i.done_trap_kind::<I>(Some(TrapKind::MemoryOutOfBounds))?;
1283            unreachable!();
1284        }
1285        unsafe {
1286            let addr = i.state[self.host_heap_base]
1287                .get_ptr::<T>()
1288                .byte_add(wasm_addr)
1289                .byte_add(offset);
1290            ControlFlow::Continue(addr)
1291        }
1292    }
1293}
1294
1295#[test]
1296fn simple_push_pop() {
1297    let mut state = MachineState::with_stack(16);
1298    let pc = ExecutingPc::default();
1299    unsafe {
1300        let mut bytecode = [0; 10];
1301        let mut i = Interpreter {
1302            state: &mut state,
1303            // this isn't actually read so just manufacture a dummy one
1304            pc: UnsafeBytecodeStream::new(NonNull::new(bytecode.as_mut_ptr().offset(4)).unwrap()),
1305            executing_pc: pc.as_ref(),
1306        };
1307        assert!(i.push::<crate::Ret, _>(0_i32).is_continue());
1308        assert_eq!(i.pop::<i32>(), 0_i32);
1309        assert!(i.push::<crate::Ret, _>(1_i32).is_continue());
1310        assert!(i.push::<crate::Ret, _>(2_i32).is_continue());
1311        assert!(i.push::<crate::Ret, _>(3_i32).is_continue());
1312        assert!(i.push::<crate::Ret, _>(4_i32).is_continue());
1313        assert!(i.push::<crate::Ret, _>(5_i32).is_break());
1314        assert!(i.push::<crate::Ret, _>(6_i32).is_break());
1315        assert_eq!(i.pop::<i32>(), 4_i32);
1316        assert_eq!(i.pop::<i32>(), 3_i32);
1317        assert_eq!(i.pop::<i32>(), 2_i32);
1318        assert_eq!(i.pop::<i32>(), 1_i32);
1319    }
1320}
1321
1322macro_rules! br_if_imm {
1323    ($(
1324        fn $snake:ident(&mut self, a: XReg, b: $imm:ident, offset: PcRelOffset)
1325            = $camel:ident / $op:tt / $get:ident;
1326    )*) => {$(
1327        fn $snake(&mut self, a: XReg, b: $imm, offset: PcRelOffset) -> ControlFlow<Done> {
1328            let a = self.state[a].$get();
1329            if a $op b.into() {
1330                self.pc_rel_jump::<crate::$camel>(offset)
1331            } else {
1332                ControlFlow::Continue(())
1333            }
1334        }
1335    )*};
1336}
1337
1338impl OpVisitor for Interpreter<'_> {
1339    type BytecodeStream = UnsafeBytecodeStream;
1340    type Return = ControlFlow<Done>;
1341
1342    fn bytecode(&mut self) -> &mut UnsafeBytecodeStream {
1343        &mut self.pc
1344    }
1345
1346    fn ret(&mut self) -> ControlFlow<Done> {
1347        let lr = self.state.lr;
1348        if lr == HOST_RETURN_ADDR {
1349            self.done_return_to_host()
1350        } else {
1351            self.pc = unsafe { UnsafeBytecodeStream::new(NonNull::new_unchecked(lr)) };
1352            ControlFlow::Continue(())
1353        }
1354    }
1355
1356    fn call(&mut self, offset: PcRelOffset) -> ControlFlow<Done> {
1357        let return_addr = self.pc.as_ptr();
1358        self.state.lr = return_addr.as_ptr();
1359        self.pc_rel_jump::<crate::Call>(offset)
1360    }
1361
1362    fn call1(&mut self, arg1: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1363        let return_addr = self.pc.as_ptr();
1364        self.state.lr = return_addr.as_ptr();
1365        self.state[XReg::x0] = self.state[arg1];
1366        self.pc_rel_jump::<crate::Call1>(offset)
1367    }
1368
1369    fn call2(&mut self, arg1: XReg, arg2: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1370        let return_addr = self.pc.as_ptr();
1371        self.state.lr = return_addr.as_ptr();
1372        let (x0, x1) = (self.state[arg1], self.state[arg2]);
1373        self.state[XReg::x0] = x0;
1374        self.state[XReg::x1] = x1;
1375        self.pc_rel_jump::<crate::Call2>(offset)
1376    }
1377
1378    fn call3(
1379        &mut self,
1380        arg1: XReg,
1381        arg2: XReg,
1382        arg3: XReg,
1383        offset: PcRelOffset,
1384    ) -> ControlFlow<Done> {
1385        let return_addr = self.pc.as_ptr();
1386        self.state.lr = return_addr.as_ptr();
1387        let (x0, x1, x2) = (self.state[arg1], self.state[arg2], self.state[arg3]);
1388        self.state[XReg::x0] = x0;
1389        self.state[XReg::x1] = x1;
1390        self.state[XReg::x2] = x2;
1391        self.pc_rel_jump::<crate::Call3>(offset)
1392    }
1393
1394    fn call4(
1395        &mut self,
1396        arg1: XReg,
1397        arg2: XReg,
1398        arg3: XReg,
1399        arg4: XReg,
1400        offset: PcRelOffset,
1401    ) -> ControlFlow<Done> {
1402        let return_addr = self.pc.as_ptr();
1403        self.state.lr = return_addr.as_ptr();
1404        let (x0, x1, x2, x3) = (
1405            self.state[arg1],
1406            self.state[arg2],
1407            self.state[arg3],
1408            self.state[arg4],
1409        );
1410        self.state[XReg::x0] = x0;
1411        self.state[XReg::x1] = x1;
1412        self.state[XReg::x2] = x2;
1413        self.state[XReg::x3] = x3;
1414        self.pc_rel_jump::<crate::Call4>(offset)
1415    }
1416
1417    fn call_indirect(&mut self, dst: XReg) -> ControlFlow<Done> {
1418        let return_addr = self.pc.as_ptr();
1419        self.state.lr = return_addr.as_ptr();
1420        // SAFETY: part of the unsafe contract of the interpreter is only valid
1421        // bytecode is interpreted, so the jump destination is part of the validity
1422        // of the bytecode itself.
1423        unsafe {
1424            self.pc = UnsafeBytecodeStream::new(NonNull::new_unchecked(self.state[dst].get_ptr()));
1425        }
1426        ControlFlow::Continue(())
1427    }
1428
1429    fn jump(&mut self, offset: PcRelOffset) -> ControlFlow<Done> {
1430        self.pc_rel_jump::<crate::Jump>(offset)
1431    }
1432
1433    fn xjump(&mut self, reg: XReg) -> ControlFlow<Done> {
1434        unsafe {
1435            self.pc = UnsafeBytecodeStream::new(NonNull::new_unchecked(self.state[reg].get_ptr()));
1436        }
1437        ControlFlow::Continue(())
1438    }
1439
1440    fn br_if32(&mut self, cond: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1441        let cond = self.state[cond].get_u32();
1442        if cond != 0 {
1443            self.pc_rel_jump::<crate::BrIf>(offset)
1444        } else {
1445            ControlFlow::Continue(())
1446        }
1447    }
1448
1449    fn br_if_not32(&mut self, cond: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1450        let cond = self.state[cond].get_u32();
1451        if cond == 0 {
1452            self.pc_rel_jump::<crate::BrIfNot>(offset)
1453        } else {
1454            ControlFlow::Continue(())
1455        }
1456    }
1457
1458    fn br_if_xeq32(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1459        let a = self.state[a].get_u32();
1460        let b = self.state[b].get_u32();
1461        if a == b {
1462            self.pc_rel_jump::<crate::BrIfXeq32>(offset)
1463        } else {
1464            ControlFlow::Continue(())
1465        }
1466    }
1467
1468    fn br_if_xneq32(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1469        let a = self.state[a].get_u32();
1470        let b = self.state[b].get_u32();
1471        if a != b {
1472            self.pc_rel_jump::<crate::BrIfXneq32>(offset)
1473        } else {
1474            ControlFlow::Continue(())
1475        }
1476    }
1477
1478    fn br_if_xslt32(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1479        let a = self.state[a].get_i32();
1480        let b = self.state[b].get_i32();
1481        if a < b {
1482            self.pc_rel_jump::<crate::BrIfXslt32>(offset)
1483        } else {
1484            ControlFlow::Continue(())
1485        }
1486    }
1487
1488    fn br_if_xslteq32(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1489        let a = self.state[a].get_i32();
1490        let b = self.state[b].get_i32();
1491        if a <= b {
1492            self.pc_rel_jump::<crate::BrIfXslteq32>(offset)
1493        } else {
1494            ControlFlow::Continue(())
1495        }
1496    }
1497
1498    fn br_if_xult32(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1499        let a = self.state[a].get_u32();
1500        let b = self.state[b].get_u32();
1501        if a < b {
1502            self.pc_rel_jump::<crate::BrIfXult32>(offset)
1503        } else {
1504            ControlFlow::Continue(())
1505        }
1506    }
1507
1508    fn br_if_xulteq32(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1509        let a = self.state[a].get_u32();
1510        let b = self.state[b].get_u32();
1511        if a <= b {
1512            self.pc_rel_jump::<crate::BrIfXulteq32>(offset)
1513        } else {
1514            ControlFlow::Continue(())
1515        }
1516    }
1517
1518    fn br_if_xeq64(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1519        let a = self.state[a].get_u64();
1520        let b = self.state[b].get_u64();
1521        if a == b {
1522            self.pc_rel_jump::<crate::BrIfXeq64>(offset)
1523        } else {
1524            ControlFlow::Continue(())
1525        }
1526    }
1527
1528    fn br_if_xneq64(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1529        let a = self.state[a].get_u64();
1530        let b = self.state[b].get_u64();
1531        if a != b {
1532            self.pc_rel_jump::<crate::BrIfXneq64>(offset)
1533        } else {
1534            ControlFlow::Continue(())
1535        }
1536    }
1537
1538    fn br_if_xslt64(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1539        let a = self.state[a].get_i64();
1540        let b = self.state[b].get_i64();
1541        if a < b {
1542            self.pc_rel_jump::<crate::BrIfXslt64>(offset)
1543        } else {
1544            ControlFlow::Continue(())
1545        }
1546    }
1547
1548    fn br_if_xslteq64(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1549        let a = self.state[a].get_i64();
1550        let b = self.state[b].get_i64();
1551        if a <= b {
1552            self.pc_rel_jump::<crate::BrIfXslteq64>(offset)
1553        } else {
1554            ControlFlow::Continue(())
1555        }
1556    }
1557
1558    fn br_if_xult64(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1559        let a = self.state[a].get_u64();
1560        let b = self.state[b].get_u64();
1561        if a < b {
1562            self.pc_rel_jump::<crate::BrIfXult64>(offset)
1563        } else {
1564            ControlFlow::Continue(())
1565        }
1566    }
1567
1568    fn br_if_xulteq64(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1569        let a = self.state[a].get_u64();
1570        let b = self.state[b].get_u64();
1571        if a <= b {
1572            self.pc_rel_jump::<crate::BrIfXulteq64>(offset)
1573        } else {
1574            ControlFlow::Continue(())
1575        }
1576    }
1577
1578    br_if_imm! {
1579        fn br_if_xeq32_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1580            = BrIfXeq32I8 / == / get_i32;
1581        fn br_if_xeq32_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1582            = BrIfXeq32I32 / == / get_i32;
1583        fn br_if_xneq32_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1584            = BrIfXneq32I8 / != / get_i32;
1585        fn br_if_xneq32_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1586            = BrIfXneq32I32 / != / get_i32;
1587
1588        fn br_if_xslt32_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1589            = BrIfXslt32I8 / < / get_i32;
1590        fn br_if_xslt32_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1591            = BrIfXslt32I32 / < / get_i32;
1592        fn br_if_xsgt32_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1593            = BrIfXsgt32I8 / > / get_i32;
1594        fn br_if_xsgt32_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1595            = BrIfXsgt32I32 / > / get_i32;
1596        fn br_if_xslteq32_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1597            = BrIfXslteq32I8 / <= / get_i32;
1598        fn br_if_xslteq32_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1599            = BrIfXslteq32I32 / <= / get_i32;
1600        fn br_if_xsgteq32_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1601            = BrIfXsgteq32I8 / >= / get_i32;
1602        fn br_if_xsgteq32_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1603            = BrIfXsgteq32I32 / >= / get_i32;
1604
1605        fn br_if_xult32_u8(&mut self, a: XReg, b: u8, offset: PcRelOffset)
1606            = BrIfXult32U8 / < / get_u32;
1607        fn br_if_xult32_u32(&mut self, a: XReg, b: u32, offset: PcRelOffset)
1608            = BrIfXult32U32 / < / get_u32;
1609        fn br_if_xugt32_u8(&mut self, a: XReg, b: u8, offset: PcRelOffset)
1610            = BrIfXugt32U8 / > / get_u32;
1611        fn br_if_xugt32_u32(&mut self, a: XReg, b: u32, offset: PcRelOffset)
1612            = BrIfXugt32U32 / > / get_u32;
1613        fn br_if_xulteq32_u8(&mut self, a: XReg, b: u8, offset: PcRelOffset)
1614            = BrIfXulteq32U8 / <= / get_u32;
1615        fn br_if_xulteq32_u32(&mut self, a: XReg, b: u32, offset: PcRelOffset)
1616            = BrIfXulteq32U32 / <= / get_u32;
1617        fn br_if_xugteq32_u8(&mut self, a: XReg, b: u8, offset: PcRelOffset)
1618            = BrIfXugteq32U8 / >= / get_u32;
1619        fn br_if_xugteq32_u32(&mut self, a: XReg, b: u32, offset: PcRelOffset)
1620            = BrIfXugteq32U32 / >= / get_u32;
1621
1622        fn br_if_xeq64_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1623            = BrIfXeq64I8 / == / get_i64;
1624        fn br_if_xeq64_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1625            = BrIfXeq64I32 / == / get_i64;
1626        fn br_if_xneq64_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1627            = BrIfXneq64I8 / != / get_i64;
1628        fn br_if_xneq64_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1629            = BrIfXneq64I32 / != / get_i64;
1630
1631        fn br_if_xslt64_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1632            = BrIfXslt64I8 / < / get_i64;
1633        fn br_if_xslt64_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1634            = BrIfXslt64I32 / < / get_i64;
1635        fn br_if_xsgt64_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1636            = BrIfXsgt64I8 / > / get_i64;
1637        fn br_if_xsgt64_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1638            = BrIfXsgt64I32 / > / get_i64;
1639        fn br_if_xslteq64_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1640            = BrIfXslteq64I8 / <= / get_i64;
1641        fn br_if_xslteq64_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1642            = BrIfXslteq64I32 / <= / get_i64;
1643        fn br_if_xsgteq64_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1644            = BrIfXsgteq64I8 / >= / get_i64;
1645        fn br_if_xsgteq64_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1646            = BrIfXsgteq64I32 / >= / get_i64;
1647
1648        fn br_if_xult64_u8(&mut self, a: XReg, b: u8, offset: PcRelOffset)
1649            = BrIfXult64U8 / < / get_u64;
1650        fn br_if_xult64_u32(&mut self, a: XReg, b: u32, offset: PcRelOffset)
1651            = BrIfXult64U32 / < / get_u64;
1652        fn br_if_xugt64_u8(&mut self, a: XReg, b: u8, offset: PcRelOffset)
1653            = BrIfXugt64U8 / > / get_u64;
1654        fn br_if_xugt64_u32(&mut self, a: XReg, b: u32, offset: PcRelOffset)
1655            = BrIfXugt64U32 / > / get_u64;
1656        fn br_if_xulteq64_u8(&mut self, a: XReg, b: u8, offset: PcRelOffset)
1657            = BrIfXulteq64U8 / <= / get_u64;
1658        fn br_if_xulteq64_u32(&mut self, a: XReg, b: u32, offset: PcRelOffset)
1659            = BrIfXulteq64U32 / <= / get_u64;
1660        fn br_if_xugteq64_u8(&mut self, a: XReg, b: u8, offset: PcRelOffset)
1661            = BrIfXugteq64U8 / >= / get_u64;
1662        fn br_if_xugteq64_u32(&mut self, a: XReg, b: u32, offset: PcRelOffset)
1663            = BrIfXugteq64U32 / >= / get_u64;
1664    }
1665
1666    fn xmov(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
1667        let val = self.state[src];
1668        self.state[dst] = val;
1669        ControlFlow::Continue(())
1670    }
1671
1672    fn xconst8(&mut self, dst: XReg, imm: i8) -> ControlFlow<Done> {
1673        self.state[dst].set_i64(i64::from(imm));
1674        ControlFlow::Continue(())
1675    }
1676
1677    fn xzero(&mut self, dst: XReg) -> ControlFlow<Done> {
1678        self.state[dst].set_i64(0);
1679        ControlFlow::Continue(())
1680    }
1681
1682    fn xone(&mut self, dst: XReg) -> ControlFlow<Done> {
1683        self.state[dst].set_i64(1);
1684        ControlFlow::Continue(())
1685    }
1686
1687    fn xconst16(&mut self, dst: XReg, imm: i16) -> ControlFlow<Done> {
1688        self.state[dst].set_i64(i64::from(imm));
1689        ControlFlow::Continue(())
1690    }
1691
1692    fn xconst32(&mut self, dst: XReg, imm: i32) -> ControlFlow<Done> {
1693        self.state[dst].set_i64(i64::from(imm));
1694        ControlFlow::Continue(())
1695    }
1696
1697    fn xconst64(&mut self, dst: XReg, imm: i64) -> ControlFlow<Done> {
1698        self.state[dst].set_i64(imm);
1699        ControlFlow::Continue(())
1700    }
1701
1702    fn xadd32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1703        let a = self.state[operands.src1].get_u32();
1704        let b = self.state[operands.src2].get_u32();
1705        self.state[operands.dst].set_u32(a.wrapping_add(b));
1706        ControlFlow::Continue(())
1707    }
1708
1709    fn xadd32_u8(&mut self, dst: XReg, src1: XReg, src2: u8) -> ControlFlow<Done> {
1710        self.xadd32_u32(dst, src1, src2.into())
1711    }
1712
1713    fn xadd32_u32(&mut self, dst: XReg, src1: XReg, src2: u32) -> ControlFlow<Done> {
1714        let a = self.state[src1].get_u32();
1715        self.state[dst].set_u32(a.wrapping_add(src2));
1716        ControlFlow::Continue(())
1717    }
1718
1719    fn xadd64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1720        let a = self.state[operands.src1].get_u64();
1721        let b = self.state[operands.src2].get_u64();
1722        self.state[operands.dst].set_u64(a.wrapping_add(b));
1723        ControlFlow::Continue(())
1724    }
1725
1726    fn xadd64_u8(&mut self, dst: XReg, src1: XReg, src2: u8) -> ControlFlow<Done> {
1727        self.xadd64_u32(dst, src1, src2.into())
1728    }
1729
1730    fn xadd64_u32(&mut self, dst: XReg, src1: XReg, src2: u32) -> ControlFlow<Done> {
1731        let a = self.state[src1].get_u64();
1732        self.state[dst].set_u64(a.wrapping_add(src2.into()));
1733        ControlFlow::Continue(())
1734    }
1735
1736    fn xmadd32(&mut self, dst: XReg, src1: XReg, src2: XReg, src3: XReg) -> ControlFlow<Done> {
1737        let a = self.state[src1].get_u32();
1738        let b = self.state[src2].get_u32();
1739        let c = self.state[src3].get_u32();
1740        self.state[dst].set_u32(a.wrapping_mul(b).wrapping_add(c));
1741        ControlFlow::Continue(())
1742    }
1743
1744    fn xmadd64(&mut self, dst: XReg, src1: XReg, src2: XReg, src3: XReg) -> ControlFlow<Done> {
1745        let a = self.state[src1].get_u64();
1746        let b = self.state[src2].get_u64();
1747        let c = self.state[src3].get_u64();
1748        self.state[dst].set_u64(a.wrapping_mul(b).wrapping_add(c));
1749        ControlFlow::Continue(())
1750    }
1751
1752    fn xsub32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1753        let a = self.state[operands.src1].get_u32();
1754        let b = self.state[operands.src2].get_u32();
1755        self.state[operands.dst].set_u32(a.wrapping_sub(b));
1756        ControlFlow::Continue(())
1757    }
1758
1759    fn xsub32_u8(&mut self, dst: XReg, src1: XReg, src2: u8) -> ControlFlow<Done> {
1760        self.xsub32_u32(dst, src1, src2.into())
1761    }
1762
1763    fn xsub32_u32(&mut self, dst: XReg, src1: XReg, src2: u32) -> ControlFlow<Done> {
1764        let a = self.state[src1].get_u32();
1765        self.state[dst].set_u32(a.wrapping_sub(src2));
1766        ControlFlow::Continue(())
1767    }
1768
1769    fn xsub64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1770        let a = self.state[operands.src1].get_u64();
1771        let b = self.state[operands.src2].get_u64();
1772        self.state[operands.dst].set_u64(a.wrapping_sub(b));
1773        ControlFlow::Continue(())
1774    }
1775
1776    fn xsub64_u8(&mut self, dst: XReg, src1: XReg, src2: u8) -> ControlFlow<Done> {
1777        self.xsub64_u32(dst, src1, src2.into())
1778    }
1779
1780    fn xsub64_u32(&mut self, dst: XReg, src1: XReg, src2: u32) -> ControlFlow<Done> {
1781        let a = self.state[src1].get_u64();
1782        self.state[dst].set_u64(a.wrapping_sub(src2.into()));
1783        ControlFlow::Continue(())
1784    }
1785
1786    fn xmul32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1787        let a = self.state[operands.src1].get_u32();
1788        let b = self.state[operands.src2].get_u32();
1789        self.state[operands.dst].set_u32(a.wrapping_mul(b));
1790        ControlFlow::Continue(())
1791    }
1792
1793    fn xmul32_s8(&mut self, dst: XReg, src1: XReg, src2: i8) -> ControlFlow<Done> {
1794        self.xmul32_s32(dst, src1, src2.into())
1795    }
1796
1797    fn xmul32_s32(&mut self, dst: XReg, src1: XReg, src2: i32) -> ControlFlow<Done> {
1798        let a = self.state[src1].get_i32();
1799        self.state[dst].set_i32(a.wrapping_mul(src2));
1800        ControlFlow::Continue(())
1801    }
1802
1803    fn xmul64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1804        let a = self.state[operands.src1].get_u64();
1805        let b = self.state[operands.src2].get_u64();
1806        self.state[operands.dst].set_u64(a.wrapping_mul(b));
1807        ControlFlow::Continue(())
1808    }
1809
1810    fn xmul64_s8(&mut self, dst: XReg, src1: XReg, src2: i8) -> ControlFlow<Done> {
1811        self.xmul64_s32(dst, src1, src2.into())
1812    }
1813
1814    fn xmul64_s32(&mut self, dst: XReg, src1: XReg, src2: i32) -> ControlFlow<Done> {
1815        let a = self.state[src1].get_i64();
1816        self.state[dst].set_i64(a.wrapping_mul(src2.into()));
1817        ControlFlow::Continue(())
1818    }
1819
1820    fn xshl32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1821        let a = self.state[operands.src1].get_u32();
1822        let b = self.state[operands.src2].get_u32();
1823        self.state[operands.dst].set_u32(a.wrapping_shl(b));
1824        ControlFlow::Continue(())
1825    }
1826
1827    fn xshr32_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1828        let a = self.state[operands.src1].get_u32();
1829        let b = self.state[operands.src2].get_u32();
1830        self.state[operands.dst].set_u32(a.wrapping_shr(b));
1831        ControlFlow::Continue(())
1832    }
1833
1834    fn xshr32_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1835        let a = self.state[operands.src1].get_i32();
1836        let b = self.state[operands.src2].get_u32();
1837        self.state[operands.dst].set_i32(a.wrapping_shr(b));
1838        ControlFlow::Continue(())
1839    }
1840
1841    fn xshl64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1842        let a = self.state[operands.src1].get_u64();
1843        let b = self.state[operands.src2].get_u32();
1844        self.state[operands.dst].set_u64(a.wrapping_shl(b));
1845        ControlFlow::Continue(())
1846    }
1847
1848    fn xshr64_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1849        let a = self.state[operands.src1].get_u64();
1850        let b = self.state[operands.src2].get_u32();
1851        self.state[operands.dst].set_u64(a.wrapping_shr(b));
1852        ControlFlow::Continue(())
1853    }
1854
1855    fn xshr64_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1856        let a = self.state[operands.src1].get_i64();
1857        let b = self.state[operands.src2].get_u32();
1858        self.state[operands.dst].set_i64(a.wrapping_shr(b));
1859        ControlFlow::Continue(())
1860    }
1861
1862    fn xshl32_u6(&mut self, operands: BinaryOperands<XReg, XReg, U6>) -> ControlFlow<Done> {
1863        let a = self.state[operands.src1].get_u32();
1864        let b = u32::from(u8::from(operands.src2));
1865        self.state[operands.dst].set_u32(a.wrapping_shl(b));
1866        ControlFlow::Continue(())
1867    }
1868
1869    fn xshr32_u_u6(&mut self, operands: BinaryOperands<XReg, XReg, U6>) -> ControlFlow<Done> {
1870        let a = self.state[operands.src1].get_u32();
1871        let b = u32::from(u8::from(operands.src2));
1872        self.state[operands.dst].set_u32(a.wrapping_shr(b));
1873        ControlFlow::Continue(())
1874    }
1875
1876    fn xshr32_s_u6(&mut self, operands: BinaryOperands<XReg, XReg, U6>) -> ControlFlow<Done> {
1877        let a = self.state[operands.src1].get_i32();
1878        let b = u32::from(u8::from(operands.src2));
1879        self.state[operands.dst].set_i32(a.wrapping_shr(b));
1880        ControlFlow::Continue(())
1881    }
1882
1883    fn xshl64_u6(&mut self, operands: BinaryOperands<XReg, XReg, U6>) -> ControlFlow<Done> {
1884        let a = self.state[operands.src1].get_u64();
1885        let b = u32::from(u8::from(operands.src2));
1886        self.state[operands.dst].set_u64(a.wrapping_shl(b));
1887        ControlFlow::Continue(())
1888    }
1889
1890    fn xshr64_u_u6(&mut self, operands: BinaryOperands<XReg, XReg, U6>) -> ControlFlow<Done> {
1891        let a = self.state[operands.src1].get_u64();
1892        let b = u32::from(u8::from(operands.src2));
1893        self.state[operands.dst].set_u64(a.wrapping_shr(b));
1894        ControlFlow::Continue(())
1895    }
1896
1897    fn xshr64_s_u6(&mut self, operands: BinaryOperands<XReg, XReg, U6>) -> ControlFlow<Done> {
1898        let a = self.state[operands.src1].get_i64();
1899        let b = u32::from(u8::from(operands.src2));
1900        self.state[operands.dst].set_i64(a.wrapping_shr(b));
1901        ControlFlow::Continue(())
1902    }
1903
1904    fn xneg32(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
1905        let a = self.state[src].get_i32();
1906        self.state[dst].set_i32(a.wrapping_neg());
1907        ControlFlow::Continue(())
1908    }
1909
1910    fn xneg64(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
1911        let a = self.state[src].get_i64();
1912        self.state[dst].set_i64(a.wrapping_neg());
1913        ControlFlow::Continue(())
1914    }
1915
1916    fn xeq64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1917        let a = self.state[operands.src1].get_u64();
1918        let b = self.state[operands.src2].get_u64();
1919        self.state[operands.dst].set_u32(u32::from(a == b));
1920        ControlFlow::Continue(())
1921    }
1922
1923    fn xneq64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1924        let a = self.state[operands.src1].get_u64();
1925        let b = self.state[operands.src2].get_u64();
1926        self.state[operands.dst].set_u32(u32::from(a != b));
1927        ControlFlow::Continue(())
1928    }
1929
1930    fn xslt64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1931        let a = self.state[operands.src1].get_i64();
1932        let b = self.state[operands.src2].get_i64();
1933        self.state[operands.dst].set_u32(u32::from(a < b));
1934        ControlFlow::Continue(())
1935    }
1936
1937    fn xslteq64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1938        let a = self.state[operands.src1].get_i64();
1939        let b = self.state[operands.src2].get_i64();
1940        self.state[operands.dst].set_u32(u32::from(a <= b));
1941        ControlFlow::Continue(())
1942    }
1943
1944    fn xult64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1945        let a = self.state[operands.src1].get_u64();
1946        let b = self.state[operands.src2].get_u64();
1947        self.state[operands.dst].set_u32(u32::from(a < b));
1948        ControlFlow::Continue(())
1949    }
1950
1951    fn xulteq64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1952        let a = self.state[operands.src1].get_u64();
1953        let b = self.state[operands.src2].get_u64();
1954        self.state[operands.dst].set_u32(u32::from(a <= b));
1955        ControlFlow::Continue(())
1956    }
1957
1958    fn xeq32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1959        let a = self.state[operands.src1].get_u32();
1960        let b = self.state[operands.src2].get_u32();
1961        self.state[operands.dst].set_u32(u32::from(a == b));
1962        ControlFlow::Continue(())
1963    }
1964
1965    fn xneq32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1966        let a = self.state[operands.src1].get_u32();
1967        let b = self.state[operands.src2].get_u32();
1968        self.state[operands.dst].set_u32(u32::from(a != b));
1969        ControlFlow::Continue(())
1970    }
1971
1972    fn xslt32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1973        let a = self.state[operands.src1].get_i32();
1974        let b = self.state[operands.src2].get_i32();
1975        self.state[operands.dst].set_u32(u32::from(a < b));
1976        ControlFlow::Continue(())
1977    }
1978
1979    fn xslteq32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1980        let a = self.state[operands.src1].get_i32();
1981        let b = self.state[operands.src2].get_i32();
1982        self.state[operands.dst].set_u32(u32::from(a <= b));
1983        ControlFlow::Continue(())
1984    }
1985
1986    fn xult32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1987        let a = self.state[operands.src1].get_u32();
1988        let b = self.state[operands.src2].get_u32();
1989        self.state[operands.dst].set_u32(u32::from(a < b));
1990        ControlFlow::Continue(())
1991    }
1992
1993    fn xulteq32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1994        let a = self.state[operands.src1].get_u32();
1995        let b = self.state[operands.src2].get_u32();
1996        self.state[operands.dst].set_u32(u32::from(a <= b));
1997        ControlFlow::Continue(())
1998    }
1999
2000    fn push_frame(&mut self) -> ControlFlow<Done> {
2001        self.push::<crate::PushFrame, _>(self.state.lr)?;
2002        self.push::<crate::PushFrame, _>(self.state.fp)?;
2003        self.state.fp = self.state[XReg::sp].get_ptr();
2004        ControlFlow::Continue(())
2005    }
2006
2007    #[inline]
2008    fn push_frame_save(&mut self, amt: u16, regs: UpperRegSet<XReg>) -> ControlFlow<Done> {
2009        // Decrement the stack pointer `amt` bytes plus 2 pointers more for
2010        // fp/lr.
2011        let ptr_size = size_of::<usize>();
2012        let full_amt = usize::from(amt) + 2 * ptr_size;
2013        let new_sp = self.state[XReg::sp].get_ptr::<u8>().wrapping_sub(full_amt);
2014        self.set_sp::<crate::PushFrameSave>(new_sp)?;
2015
2016        unsafe {
2017            // Emulate `push_frame` by placing `lr` and `fp` onto the stack, in
2018            // that order, at the top of the allocated area.
2019            self.store_ne::<_, crate::PushFrameSave>(
2020                AddrO32 {
2021                    addr: XReg::sp,
2022                    offset: (full_amt - 1 * ptr_size) as i32,
2023                },
2024                self.state.lr,
2025            )?;
2026            self.store_ne::<_, crate::PushFrameSave>(
2027                AddrO32 {
2028                    addr: XReg::sp,
2029                    offset: (full_amt - 2 * ptr_size) as i32,
2030                },
2031                self.state.fp,
2032            )?;
2033
2034            // Set `fp` to the top of our frame, where `fp` is stored.
2035            let mut offset = amt as i32;
2036            self.state.fp = self.state[XReg::sp]
2037                .get_ptr::<u8>()
2038                .byte_offset(offset as isize);
2039
2040            // Next save any registers in `regs` to the stack.
2041            for reg in regs {
2042                offset -= 8;
2043                self.store_ne::<_, crate::PushFrameSave>(
2044                    AddrO32 {
2045                        addr: XReg::sp,
2046                        offset,
2047                    },
2048                    self.state[reg].get_u64(),
2049                )?;
2050            }
2051        }
2052        ControlFlow::Continue(())
2053    }
2054
2055    fn pop_frame_restore(&mut self, amt: u16, regs: UpperRegSet<XReg>) -> ControlFlow<Done> {
2056        // Restore all registers in `regs`, followed by the normal `pop_frame`
2057        // opcode below to restore fp/lr.
2058        unsafe {
2059            let mut offset = i32::from(amt);
2060            for reg in regs {
2061                offset -= 8;
2062                let val = self.load_ne::<_, crate::PopFrameRestore>(AddrO32 {
2063                    addr: XReg::sp,
2064                    offset,
2065                })?;
2066                self.state[reg].set_u64(val);
2067            }
2068        }
2069        self.pop_frame()
2070    }
2071
2072    fn pop_frame(&mut self) -> ControlFlow<Done> {
2073        self.set_sp_unchecked(self.state.fp);
2074        let fp = self.pop();
2075        let lr = self.pop();
2076        self.state.fp = fp;
2077        self.state.lr = lr;
2078        ControlFlow::Continue(())
2079    }
2080
2081    fn br_table32(&mut self, idx: XReg, amt: u32) -> ControlFlow<Done> {
2082        let idx = self.state[idx].get_u32().min(amt - 1) as isize;
2083        // SAFETY: part of the contract of the interpreter is only dealing with
2084        // valid bytecode, so this offset should be safe.
2085        self.pc = unsafe { self.pc.offset(idx * 4) };
2086
2087        // Decode the `PcRelOffset` without tampering with `self.pc` as the
2088        // jump is relative to `self.pc`.
2089        let mut tmp = self.pc;
2090        let Ok(rel) = PcRelOffset::decode(&mut tmp);
2091        let offset = isize::try_from(i32::from(rel)).unwrap();
2092        self.pc = unsafe { self.pc.offset(offset) };
2093        ControlFlow::Continue(())
2094    }
2095
2096    fn stack_alloc32(&mut self, amt: u32) -> ControlFlow<Done> {
2097        let amt = usize::try_from(amt).unwrap();
2098        let new_sp = self.state[XReg::sp].get_ptr::<u8>().wrapping_sub(amt);
2099        self.set_sp::<crate::StackAlloc32>(new_sp)?;
2100        ControlFlow::Continue(())
2101    }
2102
2103    fn stack_free32(&mut self, amt: u32) -> ControlFlow<Done> {
2104        let amt = usize::try_from(amt).unwrap();
2105        let new_sp = self.state[XReg::sp].get_ptr::<u8>().wrapping_add(amt);
2106        self.set_sp_unchecked(new_sp);
2107        ControlFlow::Continue(())
2108    }
2109
2110    fn zext8(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2111        let src = self.state[src].get_u64() as u8;
2112        self.state[dst].set_u64(src.into());
2113        ControlFlow::Continue(())
2114    }
2115
2116    fn zext16(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2117        let src = self.state[src].get_u64() as u16;
2118        self.state[dst].set_u64(src.into());
2119        ControlFlow::Continue(())
2120    }
2121
2122    fn zext32(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2123        let src = self.state[src].get_u64() as u32;
2124        self.state[dst].set_u64(src.into());
2125        ControlFlow::Continue(())
2126    }
2127
2128    fn sext8(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2129        let src = self.state[src].get_i64() as i8;
2130        self.state[dst].set_i64(src.into());
2131        ControlFlow::Continue(())
2132    }
2133
2134    fn sext16(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2135        let src = self.state[src].get_i64() as i16;
2136        self.state[dst].set_i64(src.into());
2137        ControlFlow::Continue(())
2138    }
2139
2140    fn sext32(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2141        let src = self.state[src].get_i64() as i32;
2142        self.state[dst].set_i64(src.into());
2143        ControlFlow::Continue(())
2144    }
2145
2146    fn xdiv32_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2147        let a = self.state[operands.src1].get_i32();
2148        let b = self.state[operands.src2].get_i32();
2149        match a.checked_div(b) {
2150            Some(result) => {
2151                self.state[operands.dst].set_i32(result);
2152                ControlFlow::Continue(())
2153            }
2154            None => {
2155                let kind = if b == 0 {
2156                    TrapKind::DivideByZero
2157                } else {
2158                    TrapKind::IntegerOverflow
2159                };
2160                self.done_trap_kind::<crate::XDiv32S>(Some(kind))
2161            }
2162        }
2163    }
2164
2165    fn xdiv64_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2166        let a = self.state[operands.src1].get_i64();
2167        let b = self.state[operands.src2].get_i64();
2168        match a.checked_div(b) {
2169            Some(result) => {
2170                self.state[operands.dst].set_i64(result);
2171                ControlFlow::Continue(())
2172            }
2173            None => {
2174                let kind = if b == 0 {
2175                    TrapKind::DivideByZero
2176                } else {
2177                    TrapKind::IntegerOverflow
2178                };
2179                self.done_trap_kind::<crate::XDiv64S>(Some(kind))
2180            }
2181        }
2182    }
2183
2184    fn xdiv32_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2185        let a = self.state[operands.src1].get_u32();
2186        let b = self.state[operands.src2].get_u32();
2187        match a.checked_div(b) {
2188            Some(result) => {
2189                self.state[operands.dst].set_u32(result);
2190                ControlFlow::Continue(())
2191            }
2192            None => self.done_trap_kind::<crate::XDiv64U>(Some(TrapKind::DivideByZero)),
2193        }
2194    }
2195
2196    fn xdiv64_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2197        let a = self.state[operands.src1].get_u64();
2198        let b = self.state[operands.src2].get_u64();
2199        match a.checked_div(b) {
2200            Some(result) => {
2201                self.state[operands.dst].set_u64(result);
2202                ControlFlow::Continue(())
2203            }
2204            None => self.done_trap_kind::<crate::XDiv64U>(Some(TrapKind::DivideByZero)),
2205        }
2206    }
2207
2208    fn xrem32_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2209        let a = self.state[operands.src1].get_i32();
2210        let b = self.state[operands.src2].get_i32();
2211        let result = if a == i32::MIN && b == -1 {
2212            Some(0)
2213        } else {
2214            a.checked_rem(b)
2215        };
2216        match result {
2217            Some(result) => {
2218                self.state[operands.dst].set_i32(result);
2219                ControlFlow::Continue(())
2220            }
2221            None => self.done_trap_kind::<crate::XRem32S>(Some(TrapKind::DivideByZero)),
2222        }
2223    }
2224
2225    fn xrem64_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2226        let a = self.state[operands.src1].get_i64();
2227        let b = self.state[operands.src2].get_i64();
2228        let result = if a == i64::MIN && b == -1 {
2229            Some(0)
2230        } else {
2231            a.checked_rem(b)
2232        };
2233        match result {
2234            Some(result) => {
2235                self.state[operands.dst].set_i64(result);
2236                ControlFlow::Continue(())
2237            }
2238            None => self.done_trap_kind::<crate::XRem64S>(Some(TrapKind::DivideByZero)),
2239        }
2240    }
2241
2242    fn xrem32_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2243        let a = self.state[operands.src1].get_u32();
2244        let b = self.state[operands.src2].get_u32();
2245        match a.checked_rem(b) {
2246            Some(result) => {
2247                self.state[operands.dst].set_u32(result);
2248                ControlFlow::Continue(())
2249            }
2250            None => self.done_trap_kind::<crate::XRem32U>(Some(TrapKind::DivideByZero)),
2251        }
2252    }
2253
2254    fn xrem64_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2255        let a = self.state[operands.src1].get_u64();
2256        let b = self.state[operands.src2].get_u64();
2257        match a.checked_rem(b) {
2258            Some(result) => {
2259                self.state[operands.dst].set_u64(result);
2260                ControlFlow::Continue(())
2261            }
2262            None => self.done_trap_kind::<crate::XRem64U>(Some(TrapKind::DivideByZero)),
2263        }
2264    }
2265
2266    fn xband32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2267        let a = self.state[operands.src1].get_u32();
2268        let b = self.state[operands.src2].get_u32();
2269        self.state[operands.dst].set_u32(a & b);
2270        ControlFlow::Continue(())
2271    }
2272
2273    fn xband32_s8(&mut self, dst: XReg, src1: XReg, src2: i8) -> ControlFlow<Done> {
2274        self.xband32_s32(dst, src1, src2.into())
2275    }
2276
2277    fn xband32_s32(&mut self, dst: XReg, src1: XReg, src2: i32) -> ControlFlow<Done> {
2278        let a = self.state[src1].get_i32();
2279        self.state[dst].set_i32(a & src2);
2280        ControlFlow::Continue(())
2281    }
2282
2283    fn xband64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2284        let a = self.state[operands.src1].get_u64();
2285        let b = self.state[operands.src2].get_u64();
2286        self.state[operands.dst].set_u64(a & b);
2287        ControlFlow::Continue(())
2288    }
2289
2290    fn xband64_s8(&mut self, dst: XReg, src1: XReg, src2: i8) -> ControlFlow<Done> {
2291        self.xband64_s32(dst, src1, src2.into())
2292    }
2293
2294    fn xband64_s32(&mut self, dst: XReg, src1: XReg, src2: i32) -> ControlFlow<Done> {
2295        let a = self.state[src1].get_i64();
2296        self.state[dst].set_i64(a & i64::from(src2));
2297        ControlFlow::Continue(())
2298    }
2299
2300    fn xbor32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2301        let a = self.state[operands.src1].get_u32();
2302        let b = self.state[operands.src2].get_u32();
2303        self.state[operands.dst].set_u32(a | b);
2304        ControlFlow::Continue(())
2305    }
2306
2307    fn xbor32_s8(&mut self, dst: XReg, src1: XReg, src2: i8) -> ControlFlow<Done> {
2308        self.xbor32_s32(dst, src1, src2.into())
2309    }
2310
2311    fn xbor32_s32(&mut self, dst: XReg, src1: XReg, src2: i32) -> ControlFlow<Done> {
2312        let a = self.state[src1].get_i32();
2313        self.state[dst].set_i32(a | src2);
2314        ControlFlow::Continue(())
2315    }
2316
2317    fn xbor64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2318        let a = self.state[operands.src1].get_u64();
2319        let b = self.state[operands.src2].get_u64();
2320        self.state[operands.dst].set_u64(a | b);
2321        ControlFlow::Continue(())
2322    }
2323
2324    fn xbor64_s8(&mut self, dst: XReg, src1: XReg, src2: i8) -> ControlFlow<Done> {
2325        self.xbor64_s32(dst, src1, src2.into())
2326    }
2327
2328    fn xbor64_s32(&mut self, dst: XReg, src1: XReg, src2: i32) -> ControlFlow<Done> {
2329        let a = self.state[src1].get_i64();
2330        self.state[dst].set_i64(a | i64::from(src2));
2331        ControlFlow::Continue(())
2332    }
2333
2334    fn xbxor32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2335        let a = self.state[operands.src1].get_u32();
2336        let b = self.state[operands.src2].get_u32();
2337        self.state[operands.dst].set_u32(a ^ b);
2338        ControlFlow::Continue(())
2339    }
2340
2341    fn xbxor32_s8(&mut self, dst: XReg, src1: XReg, src2: i8) -> ControlFlow<Done> {
2342        self.xbxor32_s32(dst, src1, src2.into())
2343    }
2344
2345    fn xbxor32_s32(&mut self, dst: XReg, src1: XReg, src2: i32) -> ControlFlow<Done> {
2346        let a = self.state[src1].get_i32();
2347        self.state[dst].set_i32(a ^ src2);
2348        ControlFlow::Continue(())
2349    }
2350
2351    fn xbxor64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2352        let a = self.state[operands.src1].get_u64();
2353        let b = self.state[operands.src2].get_u64();
2354        self.state[operands.dst].set_u64(a ^ b);
2355        ControlFlow::Continue(())
2356    }
2357
2358    fn xbxor64_s8(&mut self, dst: XReg, src1: XReg, src2: i8) -> ControlFlow<Done> {
2359        self.xbxor64_s32(dst, src1, src2.into())
2360    }
2361
2362    fn xbxor64_s32(&mut self, dst: XReg, src1: XReg, src2: i32) -> ControlFlow<Done> {
2363        let a = self.state[src1].get_i64();
2364        self.state[dst].set_i64(a ^ i64::from(src2));
2365        ControlFlow::Continue(())
2366    }
2367
2368    fn xbnot32(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2369        let a = self.state[src].get_u32();
2370        self.state[dst].set_u32(!a);
2371        ControlFlow::Continue(())
2372    }
2373
2374    fn xbnot64(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2375        let a = self.state[src].get_u64();
2376        self.state[dst].set_u64(!a);
2377        ControlFlow::Continue(())
2378    }
2379
2380    fn xmin32_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2381        let a = self.state[operands.src1].get_u32();
2382        let b = self.state[operands.src2].get_u32();
2383        self.state[operands.dst].set_u32(a.min(b));
2384        ControlFlow::Continue(())
2385    }
2386
2387    fn xmin32_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2388        let a = self.state[operands.src1].get_i32();
2389        let b = self.state[operands.src2].get_i32();
2390        self.state[operands.dst].set_i32(a.min(b));
2391        ControlFlow::Continue(())
2392    }
2393
2394    fn xmax32_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2395        let a = self.state[operands.src1].get_u32();
2396        let b = self.state[operands.src2].get_u32();
2397        self.state[operands.dst].set_u32(a.max(b));
2398        ControlFlow::Continue(())
2399    }
2400
2401    fn xmax32_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2402        let a = self.state[operands.src1].get_i32();
2403        let b = self.state[operands.src2].get_i32();
2404        self.state[operands.dst].set_i32(a.max(b));
2405        ControlFlow::Continue(())
2406    }
2407
2408    fn xmin64_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2409        let a = self.state[operands.src1].get_u64();
2410        let b = self.state[operands.src2].get_u64();
2411        self.state[operands.dst].set_u64(a.min(b));
2412        ControlFlow::Continue(())
2413    }
2414
2415    fn xmin64_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2416        let a = self.state[operands.src1].get_i64();
2417        let b = self.state[operands.src2].get_i64();
2418        self.state[operands.dst].set_i64(a.min(b));
2419        ControlFlow::Continue(())
2420    }
2421
2422    fn xmax64_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2423        let a = self.state[operands.src1].get_u64();
2424        let b = self.state[operands.src2].get_u64();
2425        self.state[operands.dst].set_u64(a.max(b));
2426        ControlFlow::Continue(())
2427    }
2428
2429    fn xmax64_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2430        let a = self.state[operands.src1].get_i64();
2431        let b = self.state[operands.src2].get_i64();
2432        self.state[operands.dst].set_i64(a.max(b));
2433        ControlFlow::Continue(())
2434    }
2435
2436    fn xctz32(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2437        let a = self.state[src].get_u32();
2438        self.state[dst].set_u32(a.trailing_zeros());
2439        ControlFlow::Continue(())
2440    }
2441
2442    fn xctz64(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2443        let a = self.state[src].get_u64();
2444        self.state[dst].set_u64(a.trailing_zeros().into());
2445        ControlFlow::Continue(())
2446    }
2447
2448    fn xclz32(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2449        let a = self.state[src].get_u32();
2450        self.state[dst].set_u32(a.leading_zeros());
2451        ControlFlow::Continue(())
2452    }
2453
2454    fn xclz64(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2455        let a = self.state[src].get_u64();
2456        self.state[dst].set_u64(a.leading_zeros().into());
2457        ControlFlow::Continue(())
2458    }
2459
2460    fn xpopcnt32(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2461        let a = self.state[src].get_u32();
2462        self.state[dst].set_u32(a.count_ones());
2463        ControlFlow::Continue(())
2464    }
2465
2466    fn xpopcnt64(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2467        let a = self.state[src].get_u64();
2468        self.state[dst].set_u64(a.count_ones().into());
2469        ControlFlow::Continue(())
2470    }
2471
2472    fn xrotl32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2473        let a = self.state[operands.src1].get_u32();
2474        let b = self.state[operands.src2].get_u32();
2475        self.state[operands.dst].set_u32(a.rotate_left(b));
2476        ControlFlow::Continue(())
2477    }
2478
2479    fn xrotl64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2480        let a = self.state[operands.src1].get_u64();
2481        let b = self.state[operands.src2].get_u32();
2482        self.state[operands.dst].set_u64(a.rotate_left(b));
2483        ControlFlow::Continue(())
2484    }
2485
2486    fn xrotr32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2487        let a = self.state[operands.src1].get_u32();
2488        let b = self.state[operands.src2].get_u32();
2489        self.state[operands.dst].set_u32(a.rotate_right(b));
2490        ControlFlow::Continue(())
2491    }
2492
2493    fn xrotr64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2494        let a = self.state[operands.src1].get_u64();
2495        let b = self.state[operands.src2].get_u32();
2496        self.state[operands.dst].set_u64(a.rotate_right(b));
2497        ControlFlow::Continue(())
2498    }
2499
2500    fn xselect32(
2501        &mut self,
2502        dst: XReg,
2503        cond: XReg,
2504        if_nonzero: XReg,
2505        if_zero: XReg,
2506    ) -> ControlFlow<Done> {
2507        let result = if self.state[cond].get_u32() != 0 {
2508            self.state[if_nonzero].get_u32()
2509        } else {
2510            self.state[if_zero].get_u32()
2511        };
2512        self.state[dst].set_u32(result);
2513        ControlFlow::Continue(())
2514    }
2515
2516    fn xselect64(
2517        &mut self,
2518        dst: XReg,
2519        cond: XReg,
2520        if_nonzero: XReg,
2521        if_zero: XReg,
2522    ) -> ControlFlow<Done> {
2523        let result = if self.state[cond].get_u32() != 0 {
2524            self.state[if_nonzero].get_u64()
2525        } else {
2526            self.state[if_zero].get_u64()
2527        };
2528        self.state[dst].set_u64(result);
2529        ControlFlow::Continue(())
2530    }
2531
2532    fn xabs32(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2533        let a = self.state[src].get_i32();
2534        self.state[dst].set_i32(a.wrapping_abs());
2535        ControlFlow::Continue(())
2536    }
2537
2538    fn xabs64(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2539        let a = self.state[src].get_i64();
2540        self.state[dst].set_i64(a.wrapping_abs());
2541        ControlFlow::Continue(())
2542    }
2543
2544    // =========================================================================
2545    // o32 addressing modes
2546
2547    fn xload8_u32_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done> {
2548        let result = unsafe { self.load_ne::<u8, crate::XLoad8U32O32>(addr)? };
2549        self.state[dst].set_u32(result.into());
2550        ControlFlow::Continue(())
2551    }
2552
2553    fn xload8_s32_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done> {
2554        let result = unsafe { self.load_ne::<i8, crate::XLoad8S32O32>(addr)? };
2555        self.state[dst].set_i32(result.into());
2556        ControlFlow::Continue(())
2557    }
2558
2559    fn xload16le_u32_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done> {
2560        let result = unsafe { self.load_ne::<u16, crate::XLoad16LeU32O32>(addr)? };
2561        self.state[dst].set_u32(u16::from_le(result).into());
2562        ControlFlow::Continue(())
2563    }
2564
2565    fn xload16le_s32_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done> {
2566        let result = unsafe { self.load_ne::<i16, crate::XLoad16LeS32O32>(addr)? };
2567        self.state[dst].set_i32(i16::from_le(result).into());
2568        ControlFlow::Continue(())
2569    }
2570
2571    fn xload32le_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done> {
2572        let result = unsafe { self.load_ne::<i32, crate::XLoad32LeO32>(addr)? };
2573        self.state[dst].set_i32(i32::from_le(result));
2574        ControlFlow::Continue(())
2575    }
2576
2577    fn xload64le_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done> {
2578        let result = unsafe { self.load_ne::<i64, crate::XLoad64LeO32>(addr)? };
2579        self.state[dst].set_i64(i64::from_le(result));
2580        ControlFlow::Continue(())
2581    }
2582
2583    fn xstore8_o32(&mut self, addr: AddrO32, val: XReg) -> ControlFlow<Done> {
2584        let val = self.state[val].get_u32() as u8;
2585        unsafe {
2586            self.store_ne::<u8, crate::XStore8O32>(addr, val)?;
2587        }
2588        ControlFlow::Continue(())
2589    }
2590
2591    fn xstore16le_o32(&mut self, addr: AddrO32, val: XReg) -> ControlFlow<Done> {
2592        let val = self.state[val].get_u32() as u16;
2593        unsafe {
2594            self.store_ne::<u16, crate::XStore16LeO32>(addr, val.to_le())?;
2595        }
2596        ControlFlow::Continue(())
2597    }
2598
2599    fn xstore32le_o32(&mut self, addr: AddrO32, val: XReg) -> ControlFlow<Done> {
2600        let val = self.state[val].get_u32();
2601        unsafe {
2602            self.store_ne::<u32, crate::XStore32LeO32>(addr, val.to_le())?;
2603        }
2604        ControlFlow::Continue(())
2605    }
2606
2607    fn xstore64le_o32(&mut self, addr: AddrO32, val: XReg) -> ControlFlow<Done> {
2608        let val = self.state[val].get_u64();
2609        unsafe {
2610            self.store_ne::<u64, crate::XStore64LeO32>(addr, val.to_le())?;
2611        }
2612        ControlFlow::Continue(())
2613    }
2614
2615    // =========================================================================
2616    // g32 addressing modes
2617
2618    fn xload8_u32_g32(&mut self, dst: XReg, addr: AddrG32) -> ControlFlow<Done> {
2619        let result = unsafe { self.load_ne::<u8, crate::XLoad8U32G32>(addr)? };
2620        self.state[dst].set_u32(result.into());
2621        ControlFlow::Continue(())
2622    }
2623
2624    fn xload8_s32_g32(&mut self, dst: XReg, addr: AddrG32) -> ControlFlow<Done> {
2625        let result = unsafe { self.load_ne::<i8, crate::XLoad8S32G32>(addr)? };
2626        self.state[dst].set_i32(result.into());
2627        ControlFlow::Continue(())
2628    }
2629
2630    fn xload16le_u32_g32(&mut self, dst: XReg, addr: AddrG32) -> ControlFlow<Done> {
2631        let result = unsafe { self.load_ne::<u16, crate::XLoad16LeU32G32>(addr)? };
2632        self.state[dst].set_u32(u16::from_le(result).into());
2633        ControlFlow::Continue(())
2634    }
2635
2636    fn xload16le_s32_g32(&mut self, dst: XReg, addr: AddrG32) -> ControlFlow<Done> {
2637        let result = unsafe { self.load_ne::<i16, crate::XLoad16LeS32G32>(addr)? };
2638        self.state[dst].set_i32(i16::from_le(result).into());
2639        ControlFlow::Continue(())
2640    }
2641
2642    fn xload32le_g32(&mut self, dst: XReg, addr: AddrG32) -> ControlFlow<Done> {
2643        let result = unsafe { self.load_ne::<i32, crate::XLoad32LeG32>(addr)? };
2644        self.state[dst].set_i32(i32::from_le(result));
2645        ControlFlow::Continue(())
2646    }
2647
2648    fn xload64le_g32(&mut self, dst: XReg, addr: AddrG32) -> ControlFlow<Done> {
2649        let result = unsafe { self.load_ne::<i64, crate::XLoad64LeG32>(addr)? };
2650        self.state[dst].set_i64(i64::from_le(result));
2651        ControlFlow::Continue(())
2652    }
2653
2654    fn xstore8_g32(&mut self, addr: AddrG32, val: XReg) -> ControlFlow<Done> {
2655        let val = self.state[val].get_u32() as u8;
2656        unsafe {
2657            self.store_ne::<u8, crate::XStore8G32>(addr, val)?;
2658        }
2659        ControlFlow::Continue(())
2660    }
2661
2662    fn xstore16le_g32(&mut self, addr: AddrG32, val: XReg) -> ControlFlow<Done> {
2663        let val = self.state[val].get_u32() as u16;
2664        unsafe {
2665            self.store_ne::<u16, crate::XStore16LeG32>(addr, val.to_le())?;
2666        }
2667        ControlFlow::Continue(())
2668    }
2669
2670    fn xstore32le_g32(&mut self, addr: AddrG32, val: XReg) -> ControlFlow<Done> {
2671        let val = self.state[val].get_u32();
2672        unsafe {
2673            self.store_ne::<u32, crate::XStore32LeG32>(addr, val.to_le())?;
2674        }
2675        ControlFlow::Continue(())
2676    }
2677
2678    fn xstore64le_g32(&mut self, addr: AddrG32, val: XReg) -> ControlFlow<Done> {
2679        let val = self.state[val].get_u64();
2680        unsafe {
2681            self.store_ne::<u64, crate::XStore64LeG32>(addr, val.to_le())?;
2682        }
2683        ControlFlow::Continue(())
2684    }
2685
2686    // =========================================================================
2687    // z addressing modes
2688
2689    fn xload8_u32_z(&mut self, dst: XReg, addr: AddrZ) -> ControlFlow<Done> {
2690        let result = unsafe { self.load_ne::<u8, crate::XLoad8U32Z>(addr)? };
2691        self.state[dst].set_u32(result.into());
2692        ControlFlow::Continue(())
2693    }
2694
2695    fn xload8_s32_z(&mut self, dst: XReg, addr: AddrZ) -> ControlFlow<Done> {
2696        let result = unsafe { self.load_ne::<i8, crate::XLoad8S32Z>(addr)? };
2697        self.state[dst].set_i32(result.into());
2698        ControlFlow::Continue(())
2699    }
2700
2701    fn xload16le_u32_z(&mut self, dst: XReg, addr: AddrZ) -> ControlFlow<Done> {
2702        let result = unsafe { self.load_ne::<u16, crate::XLoad16LeU32Z>(addr)? };
2703        self.state[dst].set_u32(u16::from_le(result).into());
2704        ControlFlow::Continue(())
2705    }
2706
2707    fn xload16le_s32_z(&mut self, dst: XReg, addr: AddrZ) -> ControlFlow<Done> {
2708        let result = unsafe { self.load_ne::<i16, crate::XLoad16LeS32Z>(addr)? };
2709        self.state[dst].set_i32(i16::from_le(result).into());
2710        ControlFlow::Continue(())
2711    }
2712
2713    fn xload32le_z(&mut self, dst: XReg, addr: AddrZ) -> ControlFlow<Done> {
2714        let result = unsafe { self.load_ne::<i32, crate::XLoad32LeZ>(addr)? };
2715        self.state[dst].set_i32(i32::from_le(result));
2716        ControlFlow::Continue(())
2717    }
2718
2719    fn xload64le_z(&mut self, dst: XReg, addr: AddrZ) -> ControlFlow<Done> {
2720        let result = unsafe { self.load_ne::<i64, crate::XLoad64LeZ>(addr)? };
2721        self.state[dst].set_i64(i64::from_le(result));
2722        ControlFlow::Continue(())
2723    }
2724
2725    fn xstore8_z(&mut self, addr: AddrZ, val: XReg) -> ControlFlow<Done> {
2726        let val = self.state[val].get_u32() as u8;
2727        unsafe {
2728            self.store_ne::<u8, crate::XStore8Z>(addr, val)?;
2729        }
2730        ControlFlow::Continue(())
2731    }
2732
2733    fn xstore16le_z(&mut self, addr: AddrZ, val: XReg) -> ControlFlow<Done> {
2734        let val = self.state[val].get_u32() as u16;
2735        unsafe {
2736            self.store_ne::<u16, crate::XStore16LeZ>(addr, val.to_le())?;
2737        }
2738        ControlFlow::Continue(())
2739    }
2740
2741    fn xstore32le_z(&mut self, addr: AddrZ, val: XReg) -> ControlFlow<Done> {
2742        let val = self.state[val].get_u32();
2743        unsafe {
2744            self.store_ne::<u32, crate::XStore32LeZ>(addr, val.to_le())?;
2745        }
2746        ControlFlow::Continue(())
2747    }
2748
2749    fn xstore64le_z(&mut self, addr: AddrZ, val: XReg) -> ControlFlow<Done> {
2750        let val = self.state[val].get_u64();
2751        unsafe {
2752            self.store_ne::<u64, crate::XStore64LeZ>(addr, val.to_le())?;
2753        }
2754        ControlFlow::Continue(())
2755    }
2756
2757    // =========================================================================
2758    // g32bne addressing modes
2759
2760    fn xload8_u32_g32bne(&mut self, dst: XReg, addr: AddrG32Bne) -> ControlFlow<Done> {
2761        let result = unsafe { self.load_ne::<u8, crate::XLoad8U32G32Bne>(addr)? };
2762        self.state[dst].set_u32(result.into());
2763        ControlFlow::Continue(())
2764    }
2765
2766    fn xload8_s32_g32bne(&mut self, dst: XReg, addr: AddrG32Bne) -> ControlFlow<Done> {
2767        let result = unsafe { self.load_ne::<i8, crate::XLoad8S32G32Bne>(addr)? };
2768        self.state[dst].set_i32(result.into());
2769        ControlFlow::Continue(())
2770    }
2771
2772    fn xload16le_u32_g32bne(&mut self, dst: XReg, addr: AddrG32Bne) -> ControlFlow<Done> {
2773        let result = unsafe { self.load_ne::<u16, crate::XLoad16LeU32G32Bne>(addr)? };
2774        self.state[dst].set_u32(u16::from_le(result).into());
2775        ControlFlow::Continue(())
2776    }
2777
2778    fn xload16le_s32_g32bne(&mut self, dst: XReg, addr: AddrG32Bne) -> ControlFlow<Done> {
2779        let result = unsafe { self.load_ne::<i16, crate::XLoad16LeS32G32Bne>(addr)? };
2780        self.state[dst].set_i32(i16::from_le(result).into());
2781        ControlFlow::Continue(())
2782    }
2783
2784    fn xload32le_g32bne(&mut self, dst: XReg, addr: AddrG32Bne) -> ControlFlow<Done> {
2785        let result = unsafe { self.load_ne::<i32, crate::XLoad32LeG32Bne>(addr)? };
2786        self.state[dst].set_i32(i32::from_le(result));
2787        ControlFlow::Continue(())
2788    }
2789
2790    fn xload64le_g32bne(&mut self, dst: XReg, addr: AddrG32Bne) -> ControlFlow<Done> {
2791        let result = unsafe { self.load_ne::<i64, crate::XLoad64LeG32Bne>(addr)? };
2792        self.state[dst].set_i64(i64::from_le(result));
2793        ControlFlow::Continue(())
2794    }
2795
2796    fn xstore8_g32bne(&mut self, addr: AddrG32Bne, val: XReg) -> ControlFlow<Done> {
2797        let val = self.state[val].get_u32() as u8;
2798        unsafe {
2799            self.store_ne::<u8, crate::XStore8G32Bne>(addr, val)?;
2800        }
2801        ControlFlow::Continue(())
2802    }
2803
2804    fn xstore16le_g32bne(&mut self, addr: AddrG32Bne, val: XReg) -> ControlFlow<Done> {
2805        let val = self.state[val].get_u32() as u16;
2806        unsafe {
2807            self.store_ne::<u16, crate::XStore16LeG32Bne>(addr, val.to_le())?;
2808        }
2809        ControlFlow::Continue(())
2810    }
2811
2812    fn xstore32le_g32bne(&mut self, addr: AddrG32Bne, val: XReg) -> ControlFlow<Done> {
2813        let val = self.state[val].get_u32();
2814        unsafe {
2815            self.store_ne::<u32, crate::XStore32LeG32Bne>(addr, val.to_le())?;
2816        }
2817        ControlFlow::Continue(())
2818    }
2819
2820    fn xstore64le_g32bne(&mut self, addr: AddrG32Bne, val: XReg) -> ControlFlow<Done> {
2821        let val = self.state[val].get_u64();
2822        unsafe {
2823            self.store_ne::<u64, crate::XStore64LeG32Bne>(addr, val.to_le())?;
2824        }
2825        ControlFlow::Continue(())
2826    }
2827}
2828
2829impl ExtendedOpVisitor for Interpreter<'_> {
2830    fn nop(&mut self) -> ControlFlow<Done> {
2831        ControlFlow::Continue(())
2832    }
2833
2834    fn trap(&mut self) -> ControlFlow<Done> {
2835        self.done_trap::<crate::Trap>()
2836    }
2837
2838    fn call_indirect_host(&mut self, id: u8) -> ControlFlow<Done> {
2839        self.done_call_indirect_host(id)
2840    }
2841
2842    fn xpcadd(&mut self, dst: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
2843        let pc = self.pc_rel::<crate::Xpcadd>(offset);
2844        self.state[dst].set_ptr(pc.as_ptr());
2845        ControlFlow::Continue(())
2846    }
2847
2848    fn bswap32(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2849        let src = self.state[src].get_u32();
2850        self.state[dst].set_u32(src.swap_bytes());
2851        ControlFlow::Continue(())
2852    }
2853
2854    fn bswap64(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2855        let src = self.state[src].get_u64();
2856        self.state[dst].set_u64(src.swap_bytes());
2857        ControlFlow::Continue(())
2858    }
2859
2860    fn xbmask32(&mut self, dst: XReg, src: XReg) -> Self::Return {
2861        let a = self.state[src].get_u32();
2862        if a == 0 {
2863            self.state[dst].set_u32(0);
2864        } else {
2865            self.state[dst].set_i32(-1);
2866        }
2867        ControlFlow::Continue(())
2868    }
2869
2870    fn xbmask64(&mut self, dst: XReg, src: XReg) -> Self::Return {
2871        let a = self.state[src].get_u64();
2872        if a == 0 {
2873            self.state[dst].set_u64(0);
2874        } else {
2875            self.state[dst].set_i64(-1);
2876        }
2877        ControlFlow::Continue(())
2878    }
2879
2880    fn xadd32_uoverflow_trap(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2881        let a = self.state[operands.src1].get_u32();
2882        let b = self.state[operands.src2].get_u32();
2883        match a.checked_add(b) {
2884            Some(c) => {
2885                self.state[operands.dst].set_u32(c);
2886                ControlFlow::Continue(())
2887            }
2888            None => self.done_trap::<crate::Xadd32UoverflowTrap>(),
2889        }
2890    }
2891
2892    fn xadd64_uoverflow_trap(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2893        let a = self.state[operands.src1].get_u64();
2894        let b = self.state[operands.src2].get_u64();
2895        match a.checked_add(b) {
2896            Some(c) => {
2897                self.state[operands.dst].set_u64(c);
2898                ControlFlow::Continue(())
2899            }
2900            None => self.done_trap::<crate::Xadd64UoverflowTrap>(),
2901        }
2902    }
2903
2904    fn xmulhi64_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2905        let a = self.state[operands.src1].get_i64();
2906        let b = self.state[operands.src2].get_i64();
2907        let result = ((i128::from(a) * i128::from(b)) >> 64) as i64;
2908        self.state[operands.dst].set_i64(result);
2909        ControlFlow::Continue(())
2910    }
2911
2912    fn xmulhi64_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2913        let a = self.state[operands.src1].get_u64();
2914        let b = self.state[operands.src2].get_u64();
2915        let result = ((u128::from(a) * u128::from(b)) >> 64) as u64;
2916        self.state[operands.dst].set_u64(result);
2917        ControlFlow::Continue(())
2918    }
2919
2920    // =========================================================================
2921    // o32 addressing modes for big-endian X-registers
2922
2923    fn xload16be_u32_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done> {
2924        let result = unsafe { self.load_ne::<u16, crate::XLoad16BeU32O32>(addr)? };
2925        self.state[dst].set_u32(u16::from_be(result).into());
2926        ControlFlow::Continue(())
2927    }
2928
2929    fn xload16be_s32_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done> {
2930        let result = unsafe { self.load_ne::<i16, crate::XLoad16BeS32O32>(addr)? };
2931        self.state[dst].set_i32(i16::from_be(result).into());
2932        ControlFlow::Continue(())
2933    }
2934
2935    fn xload32be_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done> {
2936        let result = unsafe { self.load_ne::<i32, crate::XLoad32BeO32>(addr)? };
2937        self.state[dst].set_i32(i32::from_be(result));
2938        ControlFlow::Continue(())
2939    }
2940
2941    fn xload64be_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done> {
2942        let result = unsafe { self.load_ne::<i64, crate::XLoad64BeO32>(addr)? };
2943        self.state[dst].set_i64(i64::from_be(result));
2944        ControlFlow::Continue(())
2945    }
2946
2947    fn xstore16be_o32(&mut self, addr: AddrO32, val: XReg) -> ControlFlow<Done> {
2948        let val = self.state[val].get_u32() as u16;
2949        unsafe {
2950            self.store_ne::<u16, crate::XStore16BeO32>(addr, val.to_be())?;
2951        }
2952        ControlFlow::Continue(())
2953    }
2954
2955    fn xstore32be_o32(&mut self, addr: AddrO32, val: XReg) -> ControlFlow<Done> {
2956        let val = self.state[val].get_u32();
2957        unsafe {
2958            self.store_ne::<u32, crate::XStore32BeO32>(addr, val.to_be())?;
2959        }
2960        ControlFlow::Continue(())
2961    }
2962
2963    fn xstore64be_o32(&mut self, addr: AddrO32, val: XReg) -> ControlFlow<Done> {
2964        let val = self.state[val].get_u64();
2965        unsafe {
2966            self.store_ne::<u64, crate::XStore64BeO32>(addr, val.to_be())?;
2967        }
2968        ControlFlow::Continue(())
2969    }
2970
2971    // =========================================================================
2972    // o32 addressing modes for little-endian F-registers
2973
2974    fn fload32le_o32(&mut self, dst: FReg, addr: AddrO32) -> ControlFlow<Done> {
2975        let val = unsafe { self.load_ne::<u32, crate::Fload32LeO32>(addr)? };
2976        self.state[dst].set_f32(f32::from_bits(u32::from_le(val)));
2977        ControlFlow::Continue(())
2978    }
2979
2980    fn fload64le_o32(&mut self, dst: FReg, addr: AddrO32) -> ControlFlow<Done> {
2981        let val = unsafe { self.load_ne::<u64, crate::Fload64LeO32>(addr)? };
2982        self.state[dst].set_f64(f64::from_bits(u64::from_le(val)));
2983        ControlFlow::Continue(())
2984    }
2985
2986    fn fstore32le_o32(&mut self, addr: AddrO32, src: FReg) -> ControlFlow<Done> {
2987        let val = self.state[src].get_f32();
2988        unsafe {
2989            self.store_ne::<u32, crate::Fstore32LeO32>(addr, val.to_bits().to_le())?;
2990        }
2991        ControlFlow::Continue(())
2992    }
2993
2994    fn fstore64le_o32(&mut self, addr: AddrO32, src: FReg) -> ControlFlow<Done> {
2995        let val = self.state[src].get_f64();
2996        unsafe {
2997            self.store_ne::<u64, crate::Fstore64LeO32>(addr, val.to_bits().to_le())?;
2998        }
2999        ControlFlow::Continue(())
3000    }
3001
3002    // =========================================================================
3003    // o32 addressing modes for big-endian F-registers
3004
3005    fn fload32be_o32(&mut self, dst: FReg, addr: AddrO32) -> ControlFlow<Done> {
3006        let val = unsafe { self.load_ne::<u32, crate::Fload32BeO32>(addr)? };
3007        self.state[dst].set_f32(f32::from_bits(u32::from_be(val)));
3008        ControlFlow::Continue(())
3009    }
3010
3011    fn fload64be_o32(&mut self, dst: FReg, addr: AddrO32) -> ControlFlow<Done> {
3012        let val = unsafe { self.load_ne::<u64, crate::Fload64BeO32>(addr)? };
3013        self.state[dst].set_f64(f64::from_bits(u64::from_be(val)));
3014        ControlFlow::Continue(())
3015    }
3016
3017    fn fstore32be_o32(&mut self, addr: AddrO32, src: FReg) -> ControlFlow<Done> {
3018        let val = self.state[src].get_f32();
3019        unsafe {
3020            self.store_ne::<u32, crate::Fstore32BeO32>(addr, val.to_bits().to_be())?;
3021        }
3022        ControlFlow::Continue(())
3023    }
3024
3025    fn fstore64be_o32(&mut self, addr: AddrO32, src: FReg) -> ControlFlow<Done> {
3026        let val = self.state[src].get_f64();
3027        unsafe {
3028            self.store_ne::<u64, crate::Fstore64BeO32>(addr, val.to_bits().to_be())?;
3029        }
3030        ControlFlow::Continue(())
3031    }
3032
3033    // =========================================================================
3034    // z addressing modes for little-endian F-registers
3035
3036    fn fload32le_z(&mut self, dst: FReg, addr: AddrZ) -> ControlFlow<Done> {
3037        let val = unsafe { self.load_ne::<u32, crate::Fload32LeZ>(addr)? };
3038        self.state[dst].set_f32(f32::from_bits(u32::from_le(val)));
3039        ControlFlow::Continue(())
3040    }
3041
3042    fn fload64le_z(&mut self, dst: FReg, addr: AddrZ) -> ControlFlow<Done> {
3043        let val = unsafe { self.load_ne::<u64, crate::Fload64LeZ>(addr)? };
3044        self.state[dst].set_f64(f64::from_bits(u64::from_le(val)));
3045        ControlFlow::Continue(())
3046    }
3047
3048    fn fstore32le_z(&mut self, addr: AddrZ, src: FReg) -> ControlFlow<Done> {
3049        let val = self.state[src].get_f32();
3050        unsafe {
3051            self.store_ne::<u32, crate::Fstore32LeZ>(addr, val.to_bits().to_le())?;
3052        }
3053        ControlFlow::Continue(())
3054    }
3055
3056    fn fstore64le_z(&mut self, addr: AddrZ, src: FReg) -> ControlFlow<Done> {
3057        let val = self.state[src].get_f64();
3058        unsafe {
3059            self.store_ne::<u64, crate::Fstore64LeZ>(addr, val.to_bits().to_le())?;
3060        }
3061        ControlFlow::Continue(())
3062    }
3063
3064    // =========================================================================
3065    // g32 addressing modes for little-endian F-registers
3066
3067    fn fload32le_g32(&mut self, dst: FReg, addr: AddrG32) -> ControlFlow<Done> {
3068        let val = unsafe { self.load_ne::<u32, crate::Fload32LeG32>(addr)? };
3069        self.state[dst].set_f32(f32::from_bits(u32::from_le(val)));
3070        ControlFlow::Continue(())
3071    }
3072
3073    fn fload64le_g32(&mut self, dst: FReg, addr: AddrG32) -> ControlFlow<Done> {
3074        let val = unsafe { self.load_ne::<u64, crate::Fload64LeG32>(addr)? };
3075        self.state[dst].set_f64(f64::from_bits(u64::from_le(val)));
3076        ControlFlow::Continue(())
3077    }
3078
3079    fn fstore32le_g32(&mut self, addr: AddrG32, src: FReg) -> ControlFlow<Done> {
3080        let val = self.state[src].get_f32();
3081        unsafe {
3082            self.store_ne::<u32, crate::Fstore32LeG32>(addr, val.to_bits().to_le())?;
3083        }
3084        ControlFlow::Continue(())
3085    }
3086
3087    fn fstore64le_g32(&mut self, addr: AddrG32, src: FReg) -> ControlFlow<Done> {
3088        let val = self.state[src].get_f64();
3089        unsafe {
3090            self.store_ne::<u64, crate::Fstore64LeG32>(addr, val.to_bits().to_le())?;
3091        }
3092        ControlFlow::Continue(())
3093    }
3094
3095    // =========================================================================
3096    // o32 addressing modes for little-endian V-registers
3097
3098    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3099    fn vload128le_o32(&mut self, dst: VReg, addr: AddrO32) -> ControlFlow<Done> {
3100        let val = unsafe { self.load_ne::<u128, crate::VLoad128O32>(addr)? };
3101        self.state[dst].set_u128(u128::from_le(val));
3102        ControlFlow::Continue(())
3103    }
3104
3105    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3106    fn vstore128le_o32(&mut self, addr: AddrO32, src: VReg) -> ControlFlow<Done> {
3107        let val = self.state[src].get_u128();
3108        unsafe {
3109            self.store_ne::<u128, crate::Vstore128LeO32>(addr, val.to_le())?;
3110        }
3111        ControlFlow::Continue(())
3112    }
3113
3114    // =========================================================================
3115    // z addressing modes for little-endian V-registers
3116
3117    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3118    fn vload128le_z(&mut self, dst: VReg, addr: AddrZ) -> ControlFlow<Done> {
3119        let val = unsafe { self.load_ne::<u128, crate::VLoad128Z>(addr)? };
3120        self.state[dst].set_u128(u128::from_le(val));
3121        ControlFlow::Continue(())
3122    }
3123
3124    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3125    fn vstore128le_z(&mut self, addr: AddrZ, src: VReg) -> ControlFlow<Done> {
3126        let val = self.state[src].get_u128();
3127        unsafe {
3128            self.store_ne::<u128, crate::Vstore128LeZ>(addr, val.to_le())?;
3129        }
3130        ControlFlow::Continue(())
3131    }
3132
3133    // =========================================================================
3134    // g32 addressing modes for little-endian V-registers
3135
3136    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3137    fn vload128le_g32(&mut self, dst: VReg, addr: AddrG32) -> ControlFlow<Done> {
3138        let val = unsafe { self.load_ne::<u128, crate::VLoad128G32>(addr)? };
3139        self.state[dst].set_u128(u128::from_le(val));
3140        ControlFlow::Continue(())
3141    }
3142
3143    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3144    fn vstore128le_g32(&mut self, addr: AddrG32, src: VReg) -> ControlFlow<Done> {
3145        let val = self.state[src].get_u128();
3146        unsafe {
3147            self.store_ne::<u128, crate::Vstore128LeG32>(addr, val.to_le())?;
3148        }
3149        ControlFlow::Continue(())
3150    }
3151
3152    fn xmov_fp(&mut self, dst: XReg) -> ControlFlow<Done> {
3153        let fp = self.state.fp;
3154        self.state[dst].set_ptr(fp);
3155        ControlFlow::Continue(())
3156    }
3157
3158    fn xmov_lr(&mut self, dst: XReg) -> ControlFlow<Done> {
3159        let lr = self.state.lr;
3160        self.state[dst].set_ptr(lr);
3161        ControlFlow::Continue(())
3162    }
3163
3164    fn fmov(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3165        let val = self.state[src];
3166        self.state[dst] = val;
3167        ControlFlow::Continue(())
3168    }
3169
3170    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3171    fn vmov(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3172        let val = self.state[src];
3173        self.state[dst] = val;
3174        ControlFlow::Continue(())
3175    }
3176
3177    fn fconst32(&mut self, dst: FReg, bits: u32) -> ControlFlow<Done> {
3178        self.state[dst].set_f32(f32::from_bits(bits));
3179        ControlFlow::Continue(())
3180    }
3181
3182    fn fconst64(&mut self, dst: FReg, bits: u64) -> ControlFlow<Done> {
3183        self.state[dst].set_f64(f64::from_bits(bits));
3184        ControlFlow::Continue(())
3185    }
3186
3187    fn bitcast_int_from_float_32(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3188        let val = self.state[src].get_f32();
3189        self.state[dst].set_u32(val.to_bits());
3190        ControlFlow::Continue(())
3191    }
3192
3193    fn bitcast_int_from_float_64(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3194        let val = self.state[src].get_f64();
3195        self.state[dst].set_u64(val.to_bits());
3196        ControlFlow::Continue(())
3197    }
3198
3199    fn bitcast_float_from_int_32(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done> {
3200        let val = self.state[src].get_u32();
3201        self.state[dst].set_f32(f32::from_bits(val));
3202        ControlFlow::Continue(())
3203    }
3204
3205    fn bitcast_float_from_int_64(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done> {
3206        let val = self.state[src].get_u64();
3207        self.state[dst].set_f64(f64::from_bits(val));
3208        ControlFlow::Continue(())
3209    }
3210
3211    fn feq32(&mut self, dst: XReg, src1: FReg, src2: FReg) -> ControlFlow<Done> {
3212        let a = self.state[src1].get_f32();
3213        let b = self.state[src2].get_f32();
3214        self.state[dst].set_u32(u32::from(a == b));
3215        ControlFlow::Continue(())
3216    }
3217
3218    fn fneq32(&mut self, dst: XReg, src1: FReg, src2: FReg) -> ControlFlow<Done> {
3219        let a = self.state[src1].get_f32();
3220        let b = self.state[src2].get_f32();
3221        self.state[dst].set_u32(u32::from(a != b));
3222        ControlFlow::Continue(())
3223    }
3224
3225    fn flt32(&mut self, dst: XReg, src1: FReg, src2: FReg) -> ControlFlow<Done> {
3226        let a = self.state[src1].get_f32();
3227        let b = self.state[src2].get_f32();
3228        self.state[dst].set_u32(u32::from(a < b));
3229        ControlFlow::Continue(())
3230    }
3231
3232    fn flteq32(&mut self, dst: XReg, src1: FReg, src2: FReg) -> ControlFlow<Done> {
3233        let a = self.state[src1].get_f32();
3234        let b = self.state[src2].get_f32();
3235        self.state[dst].set_u32(u32::from(a <= b));
3236        ControlFlow::Continue(())
3237    }
3238
3239    fn feq64(&mut self, dst: XReg, src1: FReg, src2: FReg) -> ControlFlow<Done> {
3240        let a = self.state[src1].get_f64();
3241        let b = self.state[src2].get_f64();
3242        self.state[dst].set_u32(u32::from(a == b));
3243        ControlFlow::Continue(())
3244    }
3245
3246    fn fneq64(&mut self, dst: XReg, src1: FReg, src2: FReg) -> ControlFlow<Done> {
3247        let a = self.state[src1].get_f64();
3248        let b = self.state[src2].get_f64();
3249        self.state[dst].set_u32(u32::from(a != b));
3250        ControlFlow::Continue(())
3251    }
3252
3253    fn flt64(&mut self, dst: XReg, src1: FReg, src2: FReg) -> ControlFlow<Done> {
3254        let a = self.state[src1].get_f64();
3255        let b = self.state[src2].get_f64();
3256        self.state[dst].set_u32(u32::from(a < b));
3257        ControlFlow::Continue(())
3258    }
3259
3260    fn flteq64(&mut self, dst: XReg, src1: FReg, src2: FReg) -> ControlFlow<Done> {
3261        let a = self.state[src1].get_f64();
3262        let b = self.state[src2].get_f64();
3263        self.state[dst].set_u32(u32::from(a <= b));
3264        ControlFlow::Continue(())
3265    }
3266
3267    fn fselect32(
3268        &mut self,
3269        dst: FReg,
3270        cond: XReg,
3271        if_nonzero: FReg,
3272        if_zero: FReg,
3273    ) -> ControlFlow<Done> {
3274        let result = if self.state[cond].get_u32() != 0 {
3275            self.state[if_nonzero].get_f32()
3276        } else {
3277            self.state[if_zero].get_f32()
3278        };
3279        self.state[dst].set_f32(result);
3280        ControlFlow::Continue(())
3281    }
3282
3283    fn fselect64(
3284        &mut self,
3285        dst: FReg,
3286        cond: XReg,
3287        if_nonzero: FReg,
3288        if_zero: FReg,
3289    ) -> ControlFlow<Done> {
3290        let result = if self.state[cond].get_u32() != 0 {
3291            self.state[if_nonzero].get_f64()
3292        } else {
3293            self.state[if_zero].get_f64()
3294        };
3295        self.state[dst].set_f64(result);
3296        ControlFlow::Continue(())
3297    }
3298
3299    fn f32_from_x32_s(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done> {
3300        let a = self.state[src].get_i32();
3301        self.state[dst].set_f32(a as f32);
3302        ControlFlow::Continue(())
3303    }
3304
3305    fn f32_from_x32_u(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done> {
3306        let a = self.state[src].get_u32();
3307        self.state[dst].set_f32(a as f32);
3308        ControlFlow::Continue(())
3309    }
3310
3311    fn f32_from_x64_s(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done> {
3312        let a = self.state[src].get_i64();
3313        self.state[dst].set_f32(a as f32);
3314        ControlFlow::Continue(())
3315    }
3316
3317    fn f32_from_x64_u(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done> {
3318        let a = self.state[src].get_u64();
3319        self.state[dst].set_f32(a as f32);
3320        ControlFlow::Continue(())
3321    }
3322
3323    fn f64_from_x32_s(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done> {
3324        let a = self.state[src].get_i32();
3325        self.state[dst].set_f64(a as f64);
3326        ControlFlow::Continue(())
3327    }
3328
3329    fn f64_from_x32_u(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done> {
3330        let a = self.state[src].get_u32();
3331        self.state[dst].set_f64(a as f64);
3332        ControlFlow::Continue(())
3333    }
3334
3335    fn f64_from_x64_s(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done> {
3336        let a = self.state[src].get_i64();
3337        self.state[dst].set_f64(a as f64);
3338        ControlFlow::Continue(())
3339    }
3340
3341    fn f64_from_x64_u(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done> {
3342        let a = self.state[src].get_u64();
3343        self.state[dst].set_f64(a as f64);
3344        ControlFlow::Continue(())
3345    }
3346
3347    fn x32_from_f32_s(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3348        let a = self.state[src].get_f32();
3349        self.check_xnn_from_f32::<crate::X32FromF32S>(a, f32_cvt_to_int_bounds(true, 32))?;
3350        self.state[dst].set_i32(a as i32);
3351        ControlFlow::Continue(())
3352    }
3353
3354    fn x32_from_f32_u(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3355        let a = self.state[src].get_f32();
3356        self.check_xnn_from_f32::<crate::X32FromF32U>(a, f32_cvt_to_int_bounds(false, 32))?;
3357        self.state[dst].set_u32(a as u32);
3358        ControlFlow::Continue(())
3359    }
3360
3361    fn x64_from_f32_s(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3362        let a = self.state[src].get_f32();
3363        self.check_xnn_from_f32::<crate::X64FromF32S>(a, f32_cvt_to_int_bounds(true, 64))?;
3364        self.state[dst].set_i64(a as i64);
3365        ControlFlow::Continue(())
3366    }
3367
3368    fn x64_from_f32_u(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3369        let a = self.state[src].get_f32();
3370        self.check_xnn_from_f32::<crate::X64FromF32U>(a, f32_cvt_to_int_bounds(false, 64))?;
3371        self.state[dst].set_u64(a as u64);
3372        ControlFlow::Continue(())
3373    }
3374
3375    fn x32_from_f64_s(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3376        let a = self.state[src].get_f64();
3377        self.check_xnn_from_f64::<crate::X32FromF64S>(a, f64_cvt_to_int_bounds(true, 32))?;
3378        self.state[dst].set_i32(a as i32);
3379        ControlFlow::Continue(())
3380    }
3381
3382    fn x32_from_f64_u(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3383        let a = self.state[src].get_f64();
3384        self.check_xnn_from_f64::<crate::X32FromF64U>(a, f64_cvt_to_int_bounds(false, 32))?;
3385        self.state[dst].set_u32(a as u32);
3386        ControlFlow::Continue(())
3387    }
3388
3389    fn x64_from_f64_s(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3390        let a = self.state[src].get_f64();
3391        self.check_xnn_from_f64::<crate::X64FromF64S>(a, f64_cvt_to_int_bounds(true, 64))?;
3392        self.state[dst].set_i64(a as i64);
3393        ControlFlow::Continue(())
3394    }
3395
3396    fn x64_from_f64_u(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3397        let a = self.state[src].get_f64();
3398        self.check_xnn_from_f64::<crate::X64FromF64U>(a, f64_cvt_to_int_bounds(false, 64))?;
3399        self.state[dst].set_u64(a as u64);
3400        ControlFlow::Continue(())
3401    }
3402
3403    fn x32_from_f32_s_sat(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3404        let a = self.state[src].get_f32();
3405        self.state[dst].set_i32(a as i32);
3406        ControlFlow::Continue(())
3407    }
3408
3409    fn x32_from_f32_u_sat(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3410        let a = self.state[src].get_f32();
3411        self.state[dst].set_u32(a as u32);
3412        ControlFlow::Continue(())
3413    }
3414
3415    fn x64_from_f32_s_sat(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3416        let a = self.state[src].get_f32();
3417        self.state[dst].set_i64(a as i64);
3418        ControlFlow::Continue(())
3419    }
3420
3421    fn x64_from_f32_u_sat(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3422        let a = self.state[src].get_f32();
3423        self.state[dst].set_u64(a as u64);
3424        ControlFlow::Continue(())
3425    }
3426
3427    fn x32_from_f64_s_sat(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3428        let a = self.state[src].get_f64();
3429        self.state[dst].set_i32(a as i32);
3430        ControlFlow::Continue(())
3431    }
3432
3433    fn x32_from_f64_u_sat(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3434        let a = self.state[src].get_f64();
3435        self.state[dst].set_u32(a as u32);
3436        ControlFlow::Continue(())
3437    }
3438
3439    fn x64_from_f64_s_sat(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3440        let a = self.state[src].get_f64();
3441        self.state[dst].set_i64(a as i64);
3442        ControlFlow::Continue(())
3443    }
3444
3445    fn x64_from_f64_u_sat(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3446        let a = self.state[src].get_f64();
3447        self.state[dst].set_u64(a as u64);
3448        ControlFlow::Continue(())
3449    }
3450
3451    fn f32_from_f64(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3452        let a = self.state[src].get_f64();
3453        self.state[dst].set_f32(a as f32);
3454        ControlFlow::Continue(())
3455    }
3456
3457    fn f64_from_f32(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3458        let a = self.state[src].get_f32();
3459        self.state[dst].set_f64(a.into());
3460        ControlFlow::Continue(())
3461    }
3462
3463    fn fcopysign32(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3464        let a = self.state[operands.src1].get_f32();
3465        let b = self.state[operands.src2].get_f32();
3466        self.state[operands.dst].set_f32(a.wasm_copysign(b));
3467        ControlFlow::Continue(())
3468    }
3469
3470    fn fcopysign64(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3471        let a = self.state[operands.src1].get_f64();
3472        let b = self.state[operands.src2].get_f64();
3473        self.state[operands.dst].set_f64(a.wasm_copysign(b));
3474        ControlFlow::Continue(())
3475    }
3476
3477    fn fadd32(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3478        let a = self.state[operands.src1].get_f32();
3479        let b = self.state[operands.src2].get_f32();
3480        self.state[operands.dst].set_f32(a + b);
3481        ControlFlow::Continue(())
3482    }
3483
3484    fn fsub32(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3485        let a = self.state[operands.src1].get_f32();
3486        let b = self.state[operands.src2].get_f32();
3487        self.state[operands.dst].set_f32(a - b);
3488        ControlFlow::Continue(())
3489    }
3490
3491    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3492    fn vsubf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3493        let mut a = self.state[operands.src1].get_f32x4();
3494        let b = self.state[operands.src2].get_f32x4();
3495        for (a, b) in a.iter_mut().zip(b) {
3496            *a = *a - b;
3497        }
3498        self.state[operands.dst].set_f32x4(a);
3499        ControlFlow::Continue(())
3500    }
3501
3502    fn fmul32(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3503        let a = self.state[operands.src1].get_f32();
3504        let b = self.state[operands.src2].get_f32();
3505        self.state[operands.dst].set_f32(a * b);
3506        ControlFlow::Continue(())
3507    }
3508
3509    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3510    fn vmulf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3511        let mut a = self.state[operands.src1].get_f32x4();
3512        let b = self.state[operands.src2].get_f32x4();
3513        for (a, b) in a.iter_mut().zip(b) {
3514            *a = *a * b;
3515        }
3516        self.state[operands.dst].set_f32x4(a);
3517        ControlFlow::Continue(())
3518    }
3519
3520    fn fdiv32(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3521        let a = self.state[operands.src1].get_f32();
3522        let b = self.state[operands.src2].get_f32();
3523        self.state[operands.dst].set_f32(a / b);
3524        ControlFlow::Continue(())
3525    }
3526
3527    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3528    fn vdivf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3529        let a = self.state[operands.src1].get_f32x4();
3530        let b = self.state[operands.src2].get_f32x4();
3531        let mut result = [0.0f32; 4];
3532
3533        for i in 0..4 {
3534            result[i] = a[i] / b[i];
3535        }
3536
3537        self.state[operands.dst].set_f32x4(result);
3538        ControlFlow::Continue(())
3539    }
3540
3541    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3542    fn vdivf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3543        let a = self.state[operands.src1].get_f64x2();
3544        let b = self.state[operands.src2].get_f64x2();
3545        let mut result = [0.0f64; 2];
3546
3547        for i in 0..2 {
3548            result[i] = a[i] / b[i];
3549        }
3550
3551        self.state[operands.dst].set_f64x2(result);
3552        ControlFlow::Continue(())
3553    }
3554
3555    fn fmaximum32(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3556        let a = self.state[operands.src1].get_f32();
3557        let b = self.state[operands.src2].get_f32();
3558        self.state[operands.dst].set_f32(a.wasm_maximum(b));
3559        ControlFlow::Continue(())
3560    }
3561
3562    fn fminimum32(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3563        let a = self.state[operands.src1].get_f32();
3564        let b = self.state[operands.src2].get_f32();
3565        self.state[operands.dst].set_f32(a.wasm_minimum(b));
3566        ControlFlow::Continue(())
3567    }
3568
3569    fn ftrunc32(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3570        let a = self.state[src].get_f32();
3571        self.state[dst].set_f32(a.wasm_trunc());
3572        ControlFlow::Continue(())
3573    }
3574
3575    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3576    fn vtrunc32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3577        let mut a = self.state[src].get_f32x4();
3578        for elem in a.iter_mut() {
3579            *elem = elem.wasm_trunc();
3580        }
3581        self.state[dst].set_f32x4(a);
3582        ControlFlow::Continue(())
3583    }
3584
3585    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3586    fn vtrunc64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3587        let mut a = self.state[src].get_f64x2();
3588        for elem in a.iter_mut() {
3589            *elem = elem.wasm_trunc();
3590        }
3591        self.state[dst].set_f64x2(a);
3592        ControlFlow::Continue(())
3593    }
3594
3595    fn ffloor32(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3596        let a = self.state[src].get_f32();
3597        self.state[dst].set_f32(a.wasm_floor());
3598        ControlFlow::Continue(())
3599    }
3600
3601    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3602    fn vfloor32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3603        let mut a = self.state[src].get_f32x4();
3604        for elem in a.iter_mut() {
3605            *elem = elem.wasm_floor();
3606        }
3607        self.state[dst].set_f32x4(a);
3608        ControlFlow::Continue(())
3609    }
3610
3611    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3612    fn vfloor64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3613        let mut a = self.state[src].get_f64x2();
3614        for elem in a.iter_mut() {
3615            *elem = elem.wasm_floor();
3616        }
3617        self.state[dst].set_f64x2(a);
3618        ControlFlow::Continue(())
3619    }
3620
3621    fn fceil32(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3622        let a = self.state[src].get_f32();
3623        self.state[dst].set_f32(a.wasm_ceil());
3624        ControlFlow::Continue(())
3625    }
3626
3627    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3628    fn vceil32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3629        let mut a = self.state[src].get_f32x4();
3630        for elem in a.iter_mut() {
3631            *elem = elem.wasm_ceil();
3632        }
3633        self.state[dst].set_f32x4(a);
3634
3635        ControlFlow::Continue(())
3636    }
3637
3638    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3639    fn vceil64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3640        let mut a = self.state[src].get_f64x2();
3641        for elem in a.iter_mut() {
3642            *elem = elem.wasm_ceil();
3643        }
3644        self.state[dst].set_f64x2(a);
3645
3646        ControlFlow::Continue(())
3647    }
3648
3649    fn fnearest32(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3650        let a = self.state[src].get_f32();
3651        self.state[dst].set_f32(a.wasm_nearest());
3652        ControlFlow::Continue(())
3653    }
3654
3655    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3656    fn vnearest32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3657        let mut a = self.state[src].get_f32x4();
3658        for elem in a.iter_mut() {
3659            *elem = elem.wasm_nearest();
3660        }
3661        self.state[dst].set_f32x4(a);
3662        ControlFlow::Continue(())
3663    }
3664
3665    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3666    fn vnearest64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3667        let mut a = self.state[src].get_f64x2();
3668        for elem in a.iter_mut() {
3669            *elem = elem.wasm_nearest();
3670        }
3671        self.state[dst].set_f64x2(a);
3672        ControlFlow::Continue(())
3673    }
3674
3675    fn fsqrt32(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3676        let a = self.state[src].get_f32();
3677        self.state[dst].set_f32(a.wasm_sqrt());
3678        ControlFlow::Continue(())
3679    }
3680
3681    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3682    fn vsqrt32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3683        let mut a = self.state[src].get_f32x4();
3684        for elem in a.iter_mut() {
3685            *elem = elem.wasm_sqrt();
3686        }
3687        self.state[dst].set_f32x4(a);
3688        ControlFlow::Continue(())
3689    }
3690
3691    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3692    fn vsqrt64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3693        let mut a = self.state[src].get_f64x2();
3694        for elem in a.iter_mut() {
3695            *elem = elem.wasm_sqrt();
3696        }
3697        self.state[dst].set_f64x2(a);
3698        ControlFlow::Continue(())
3699    }
3700
3701    fn fneg32(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3702        let a = self.state[src].get_f32();
3703        self.state[dst].set_f32(-a);
3704        ControlFlow::Continue(())
3705    }
3706
3707    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3708    fn vnegf32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3709        let mut a = self.state[src].get_f32x4();
3710        for elem in a.iter_mut() {
3711            *elem = -*elem;
3712        }
3713        self.state[dst].set_f32x4(a);
3714        ControlFlow::Continue(())
3715    }
3716
3717    fn fabs32(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3718        let a = self.state[src].get_f32();
3719        self.state[dst].set_f32(a.wasm_abs());
3720        ControlFlow::Continue(())
3721    }
3722
3723    fn fadd64(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3724        let a = self.state[operands.src1].get_f64();
3725        let b = self.state[operands.src2].get_f64();
3726        self.state[operands.dst].set_f64(a + b);
3727        ControlFlow::Continue(())
3728    }
3729
3730    fn fsub64(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3731        let a = self.state[operands.src1].get_f64();
3732        let b = self.state[operands.src2].get_f64();
3733        self.state[operands.dst].set_f64(a - b);
3734        ControlFlow::Continue(())
3735    }
3736
3737    fn fmul64(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3738        let a = self.state[operands.src1].get_f64();
3739        let b = self.state[operands.src2].get_f64();
3740        self.state[operands.dst].set_f64(a * b);
3741        ControlFlow::Continue(())
3742    }
3743
3744    fn fdiv64(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3745        let a = self.state[operands.src1].get_f64();
3746        let b = self.state[operands.src2].get_f64();
3747        self.state[operands.dst].set_f64(a / b);
3748        ControlFlow::Continue(())
3749    }
3750
3751    fn fmaximum64(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3752        let a = self.state[operands.src1].get_f64();
3753        let b = self.state[operands.src2].get_f64();
3754        self.state[operands.dst].set_f64(a.wasm_maximum(b));
3755        ControlFlow::Continue(())
3756    }
3757
3758    fn fminimum64(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3759        let a = self.state[operands.src1].get_f64();
3760        let b = self.state[operands.src2].get_f64();
3761        self.state[operands.dst].set_f64(a.wasm_minimum(b));
3762        ControlFlow::Continue(())
3763    }
3764
3765    fn ftrunc64(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3766        let a = self.state[src].get_f64();
3767        self.state[dst].set_f64(a.wasm_trunc());
3768        ControlFlow::Continue(())
3769    }
3770
3771    fn ffloor64(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3772        let a = self.state[src].get_f64();
3773        self.state[dst].set_f64(a.wasm_floor());
3774        ControlFlow::Continue(())
3775    }
3776
3777    fn fceil64(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3778        let a = self.state[src].get_f64();
3779        self.state[dst].set_f64(a.wasm_ceil());
3780        ControlFlow::Continue(())
3781    }
3782
3783    fn fnearest64(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3784        let a = self.state[src].get_f64();
3785        self.state[dst].set_f64(a.wasm_nearest());
3786        ControlFlow::Continue(())
3787    }
3788
3789    fn fsqrt64(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3790        let a = self.state[src].get_f64();
3791        self.state[dst].set_f64(a.wasm_sqrt());
3792        ControlFlow::Continue(())
3793    }
3794
3795    fn fneg64(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3796        let a = self.state[src].get_f64();
3797        self.state[dst].set_f64(-a);
3798        ControlFlow::Continue(())
3799    }
3800
3801    fn fabs64(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3802        let a = self.state[src].get_f64();
3803        self.state[dst].set_f64(a.wasm_abs());
3804        ControlFlow::Continue(())
3805    }
3806
3807    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3808    fn vaddi8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3809        let mut a = self.state[operands.src1].get_i8x16();
3810        let b = self.state[operands.src2].get_i8x16();
3811        for (a, b) in a.iter_mut().zip(b) {
3812            *a = a.wrapping_add(b);
3813        }
3814        self.state[operands.dst].set_i8x16(a);
3815        ControlFlow::Continue(())
3816    }
3817
3818    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3819    fn vaddi16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3820        let mut a = self.state[operands.src1].get_i16x8();
3821        let b = self.state[operands.src2].get_i16x8();
3822        for (a, b) in a.iter_mut().zip(b) {
3823            *a = a.wrapping_add(b);
3824        }
3825        self.state[operands.dst].set_i16x8(a);
3826        ControlFlow::Continue(())
3827    }
3828
3829    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3830    fn vaddi32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3831        let mut a = self.state[operands.src1].get_i32x4();
3832        let b = self.state[operands.src2].get_i32x4();
3833        for (a, b) in a.iter_mut().zip(b) {
3834            *a = a.wrapping_add(b);
3835        }
3836        self.state[operands.dst].set_i32x4(a);
3837        ControlFlow::Continue(())
3838    }
3839
3840    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3841    fn vaddi64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3842        let mut a = self.state[operands.src1].get_i64x2();
3843        let b = self.state[operands.src2].get_i64x2();
3844        for (a, b) in a.iter_mut().zip(b) {
3845            *a = a.wrapping_add(b);
3846        }
3847        self.state[operands.dst].set_i64x2(a);
3848        ControlFlow::Continue(())
3849    }
3850
3851    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3852    fn vaddf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3853        let mut a = self.state[operands.src1].get_f32x4();
3854        let b = self.state[operands.src2].get_f32x4();
3855        for (a, b) in a.iter_mut().zip(b) {
3856            *a += b;
3857        }
3858        self.state[operands.dst].set_f32x4(a);
3859        ControlFlow::Continue(())
3860    }
3861
3862    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3863    fn vaddf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3864        let mut a = self.state[operands.src1].get_f64x2();
3865        let b = self.state[operands.src2].get_f64x2();
3866        for (a, b) in a.iter_mut().zip(b) {
3867            *a += b;
3868        }
3869        self.state[operands.dst].set_f64x2(a);
3870        ControlFlow::Continue(())
3871    }
3872
3873    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3874    fn vaddi8x16_sat(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3875        let mut a = self.state[operands.src1].get_i8x16();
3876        let b = self.state[operands.src2].get_i8x16();
3877        for (a, b) in a.iter_mut().zip(b) {
3878            *a = (*a).saturating_add(b);
3879        }
3880        self.state[operands.dst].set_i8x16(a);
3881        ControlFlow::Continue(())
3882    }
3883
3884    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3885    fn vaddu8x16_sat(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3886        let mut a = self.state[operands.src1].get_u8x16();
3887        let b = self.state[operands.src2].get_u8x16();
3888        for (a, b) in a.iter_mut().zip(b) {
3889            *a = (*a).saturating_add(b);
3890        }
3891        self.state[operands.dst].set_u8x16(a);
3892        ControlFlow::Continue(())
3893    }
3894
3895    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3896    fn vaddi16x8_sat(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3897        let mut a = self.state[operands.src1].get_i16x8();
3898        let b = self.state[operands.src2].get_i16x8();
3899        for (a, b) in a.iter_mut().zip(b) {
3900            *a = (*a).saturating_add(b);
3901        }
3902        self.state[operands.dst].set_i16x8(a);
3903        ControlFlow::Continue(())
3904    }
3905
3906    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3907    fn vaddu16x8_sat(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3908        let mut a = self.state[operands.src1].get_u16x8();
3909        let b = self.state[operands.src2].get_u16x8();
3910        for (a, b) in a.iter_mut().zip(b) {
3911            *a = (*a).saturating_add(b);
3912        }
3913        self.state[operands.dst].set_u16x8(a);
3914        ControlFlow::Continue(())
3915    }
3916
3917    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3918    fn vaddpairwisei16x8_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3919        let a = self.state[operands.src1].get_i16x8();
3920        let b = self.state[operands.src2].get_i16x8();
3921        let mut result = [0i16; 8];
3922        let half = result.len() / 2;
3923        for i in 0..half {
3924            result[i] = a[2 * i].wrapping_add(a[2 * i + 1]);
3925            result[i + half] = b[2 * i].wrapping_add(b[2 * i + 1]);
3926        }
3927        self.state[operands.dst].set_i16x8(result);
3928        ControlFlow::Continue(())
3929    }
3930
3931    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3932    fn vaddpairwisei32x4_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3933        let a = self.state[operands.src1].get_i32x4();
3934        let b = self.state[operands.src2].get_i32x4();
3935        let mut result = [0i32; 4];
3936        result[0] = a[0].wrapping_add(a[1]);
3937        result[1] = a[2].wrapping_add(a[3]);
3938        result[2] = b[0].wrapping_add(b[1]);
3939        result[3] = b[2].wrapping_add(b[3]);
3940        self.state[operands.dst].set_i32x4(result);
3941        ControlFlow::Continue(())
3942    }
3943
3944    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3945    fn vshli8x16(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
3946        let a = self.state[operands.src1].get_i8x16();
3947        let b = self.state[operands.src2].get_u32();
3948        self.state[operands.dst].set_i8x16(a.map(|a| a.wrapping_shl(b)));
3949        ControlFlow::Continue(())
3950    }
3951
3952    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3953    fn vshli16x8(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
3954        let a = self.state[operands.src1].get_i16x8();
3955        let b = self.state[operands.src2].get_u32();
3956        self.state[operands.dst].set_i16x8(a.map(|a| a.wrapping_shl(b)));
3957        ControlFlow::Continue(())
3958    }
3959
3960    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3961    fn vshli32x4(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
3962        let a = self.state[operands.src1].get_i32x4();
3963        let b = self.state[operands.src2].get_u32();
3964        self.state[operands.dst].set_i32x4(a.map(|a| a.wrapping_shl(b)));
3965        ControlFlow::Continue(())
3966    }
3967
3968    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3969    fn vshli64x2(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
3970        let a = self.state[operands.src1].get_i64x2();
3971        let b = self.state[operands.src2].get_u32();
3972        self.state[operands.dst].set_i64x2(a.map(|a| a.wrapping_shl(b)));
3973        ControlFlow::Continue(())
3974    }
3975
3976    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3977    fn vshri8x16_s(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
3978        let a = self.state[operands.src1].get_i8x16();
3979        let b = self.state[operands.src2].get_u32();
3980        self.state[operands.dst].set_i8x16(a.map(|a| a.wrapping_shr(b)));
3981        ControlFlow::Continue(())
3982    }
3983
3984    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3985    fn vshri16x8_s(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
3986        let a = self.state[operands.src1].get_i16x8();
3987        let b = self.state[operands.src2].get_u32();
3988        self.state[operands.dst].set_i16x8(a.map(|a| a.wrapping_shr(b)));
3989        ControlFlow::Continue(())
3990    }
3991
3992    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3993    fn vshri32x4_s(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
3994        let a = self.state[operands.src1].get_i32x4();
3995        let b = self.state[operands.src2].get_u32();
3996        self.state[operands.dst].set_i32x4(a.map(|a| a.wrapping_shr(b)));
3997        ControlFlow::Continue(())
3998    }
3999
4000    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4001    fn vshri64x2_s(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
4002        let a = self.state[operands.src1].get_i64x2();
4003        let b = self.state[operands.src2].get_u32();
4004        self.state[operands.dst].set_i64x2(a.map(|a| a.wrapping_shr(b)));
4005        ControlFlow::Continue(())
4006    }
4007
4008    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4009    fn vshri8x16_u(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
4010        let a = self.state[operands.src1].get_u8x16();
4011        let b = self.state[operands.src2].get_u32();
4012        self.state[operands.dst].set_u8x16(a.map(|a| a.wrapping_shr(b)));
4013        ControlFlow::Continue(())
4014    }
4015
4016    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4017    fn vshri16x8_u(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
4018        let a = self.state[operands.src1].get_u16x8();
4019        let b = self.state[operands.src2].get_u32();
4020        self.state[operands.dst].set_u16x8(a.map(|a| a.wrapping_shr(b)));
4021        ControlFlow::Continue(())
4022    }
4023
4024    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4025    fn vshri32x4_u(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
4026        let a = self.state[operands.src1].get_u32x4();
4027        let b = self.state[operands.src2].get_u32();
4028        self.state[operands.dst].set_u32x4(a.map(|a| a.wrapping_shr(b)));
4029        ControlFlow::Continue(())
4030    }
4031
4032    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4033    fn vshri64x2_u(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
4034        let a = self.state[operands.src1].get_u64x2();
4035        let b = self.state[operands.src2].get_u32();
4036        self.state[operands.dst].set_u64x2(a.map(|a| a.wrapping_shr(b)));
4037        ControlFlow::Continue(())
4038    }
4039
4040    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4041    fn vconst128(&mut self, dst: VReg, val: u128) -> ControlFlow<Done> {
4042        self.state[dst].set_u128(val);
4043        ControlFlow::Continue(())
4044    }
4045
4046    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4047    fn vsplatx8(&mut self, dst: VReg, src: XReg) -> ControlFlow<Done> {
4048        let val = self.state[src].get_u32() as u8;
4049        self.state[dst].set_u8x16([val; 16]);
4050        ControlFlow::Continue(())
4051    }
4052
4053    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4054    fn vsplatx16(&mut self, dst: VReg, src: XReg) -> ControlFlow<Done> {
4055        let val = self.state[src].get_u32() as u16;
4056        self.state[dst].set_u16x8([val; 8]);
4057        ControlFlow::Continue(())
4058    }
4059
4060    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4061    fn vsplatx32(&mut self, dst: VReg, src: XReg) -> ControlFlow<Done> {
4062        let val = self.state[src].get_u32();
4063        self.state[dst].set_u32x4([val; 4]);
4064        ControlFlow::Continue(())
4065    }
4066
4067    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4068    fn vsplatx64(&mut self, dst: VReg, src: XReg) -> ControlFlow<Done> {
4069        let val = self.state[src].get_u64();
4070        self.state[dst].set_u64x2([val; 2]);
4071        ControlFlow::Continue(())
4072    }
4073
4074    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4075    fn vsplatf32(&mut self, dst: VReg, src: FReg) -> ControlFlow<Done> {
4076        let val = self.state[src].get_f32();
4077        self.state[dst].set_f32x4([val; 4]);
4078        ControlFlow::Continue(())
4079    }
4080
4081    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4082    fn vsplatf64(&mut self, dst: VReg, src: FReg) -> ControlFlow<Done> {
4083        let val = self.state[src].get_f64();
4084        self.state[dst].set_f64x2([val; 2]);
4085        ControlFlow::Continue(())
4086    }
4087
4088    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4089    fn vload8x8_s_z(&mut self, dst: VReg, addr: AddrZ) -> ControlFlow<Done> {
4090        let val = unsafe { self.load_ne::<[i8; 8], crate::VLoad8x8SZ>(addr)? };
4091        self.state[dst].set_i16x8(val.map(|i| i.into()));
4092        ControlFlow::Continue(())
4093    }
4094
4095    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4096    fn vload8x8_u_z(&mut self, dst: VReg, addr: AddrZ) -> ControlFlow<Done> {
4097        let val = unsafe { self.load_ne::<[u8; 8], crate::VLoad8x8UZ>(addr)? };
4098        self.state[dst].set_u16x8(val.map(|i| i.into()));
4099        ControlFlow::Continue(())
4100    }
4101
4102    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4103    fn vload16x4le_s_z(&mut self, dst: VReg, addr: AddrZ) -> ControlFlow<Done> {
4104        let val = unsafe { self.load_ne::<[i16; 4], crate::VLoad16x4LeSZ>(addr)? };
4105        self.state[dst].set_i32x4(val.map(|i| i16::from_le(i).into()));
4106        ControlFlow::Continue(())
4107    }
4108
4109    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4110    fn vload16x4le_u_z(&mut self, dst: VReg, addr: AddrZ) -> ControlFlow<Done> {
4111        let val = unsafe { self.load_ne::<[u16; 4], crate::VLoad16x4LeUZ>(addr)? };
4112        self.state[dst].set_u32x4(val.map(|i| u16::from_le(i).into()));
4113        ControlFlow::Continue(())
4114    }
4115
4116    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4117    fn vload32x2le_s_z(&mut self, dst: VReg, addr: AddrZ) -> ControlFlow<Done> {
4118        let val = unsafe { self.load_ne::<[i32; 2], crate::VLoad32x2LeSZ>(addr)? };
4119        self.state[dst].set_i64x2(val.map(|i| i32::from_le(i).into()));
4120        ControlFlow::Continue(())
4121    }
4122
4123    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4124    fn vload32x2le_u_z(&mut self, dst: VReg, addr: AddrZ) -> ControlFlow<Done> {
4125        let val = unsafe { self.load_ne::<[u32; 2], crate::VLoad32x2LeUZ>(addr)? };
4126        self.state[dst].set_u64x2(val.map(|i| u32::from_le(i).into()));
4127        ControlFlow::Continue(())
4128    }
4129
4130    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4131    fn vband128(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4132        let a = self.state[operands.src1].get_u128();
4133        let b = self.state[operands.src2].get_u128();
4134        self.state[operands.dst].set_u128(a & b);
4135        ControlFlow::Continue(())
4136    }
4137
4138    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4139    fn vbor128(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4140        let a = self.state[operands.src1].get_u128();
4141        let b = self.state[operands.src2].get_u128();
4142        self.state[operands.dst].set_u128(a | b);
4143        ControlFlow::Continue(())
4144    }
4145
4146    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4147    fn vbxor128(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4148        let a = self.state[operands.src1].get_u128();
4149        let b = self.state[operands.src2].get_u128();
4150        self.state[operands.dst].set_u128(a ^ b);
4151        ControlFlow::Continue(())
4152    }
4153
4154    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4155    fn vbnot128(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4156        let a = self.state[src].get_u128();
4157        self.state[dst].set_u128(!a);
4158        ControlFlow::Continue(())
4159    }
4160
4161    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4162    fn vbitselect128(&mut self, dst: VReg, c: VReg, x: VReg, y: VReg) -> ControlFlow<Done> {
4163        let c = self.state[c].get_u128();
4164        let x = self.state[x].get_u128();
4165        let y = self.state[y].get_u128();
4166        self.state[dst].set_u128((c & x) | (!c & y));
4167        ControlFlow::Continue(())
4168    }
4169
4170    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4171    fn vbitmask8x16(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4172        let a = self.state[src].get_u8x16();
4173        let mut result = 0;
4174        for item in a.iter().rev() {
4175            result <<= 1;
4176            result |= (*item >> 7) as u32;
4177        }
4178        self.state[dst].set_u32(result);
4179        ControlFlow::Continue(())
4180    }
4181
4182    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4183    fn vbitmask16x8(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4184        let a = self.state[src].get_u16x8();
4185        let mut result = 0;
4186        for item in a.iter().rev() {
4187            result <<= 1;
4188            result |= (*item >> 15) as u32;
4189        }
4190        self.state[dst].set_u32(result);
4191        ControlFlow::Continue(())
4192    }
4193
4194    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4195    fn vbitmask32x4(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4196        let a = self.state[src].get_u32x4();
4197        let mut result = 0;
4198        for item in a.iter().rev() {
4199            result <<= 1;
4200            result |= *item >> 31;
4201        }
4202        self.state[dst].set_u32(result);
4203        ControlFlow::Continue(())
4204    }
4205
4206    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4207    fn vbitmask64x2(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4208        let a = self.state[src].get_u64x2();
4209        let mut result = 0;
4210        for item in a.iter().rev() {
4211            result <<= 1;
4212            result |= (*item >> 63) as u32;
4213        }
4214        self.state[dst].set_u32(result);
4215        ControlFlow::Continue(())
4216    }
4217
4218    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4219    fn valltrue8x16(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4220        let a = self.state[src].get_u8x16();
4221        let result = a.iter().all(|a| *a != 0);
4222        self.state[dst].set_u32(u32::from(result));
4223        ControlFlow::Continue(())
4224    }
4225
4226    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4227    fn valltrue16x8(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4228        let a = self.state[src].get_u16x8();
4229        let result = a.iter().all(|a| *a != 0);
4230        self.state[dst].set_u32(u32::from(result));
4231        ControlFlow::Continue(())
4232    }
4233
4234    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4235    fn valltrue32x4(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4236        let a = self.state[src].get_u32x4();
4237        let result = a.iter().all(|a| *a != 0);
4238        self.state[dst].set_u32(u32::from(result));
4239        ControlFlow::Continue(())
4240    }
4241
4242    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4243    fn valltrue64x2(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4244        let a = self.state[src].get_u64x2();
4245        let result = a.iter().all(|a| *a != 0);
4246        self.state[dst].set_u32(u32::from(result));
4247        ControlFlow::Continue(())
4248    }
4249
4250    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4251    fn vanytrue8x16(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4252        let a = self.state[src].get_u8x16();
4253        let result = a.iter().any(|a| *a != 0);
4254        self.state[dst].set_u32(u32::from(result));
4255        ControlFlow::Continue(())
4256    }
4257
4258    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4259    fn vanytrue16x8(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4260        let a = self.state[src].get_u16x8();
4261        let result = a.iter().any(|a| *a != 0);
4262        self.state[dst].set_u32(u32::from(result));
4263        ControlFlow::Continue(())
4264    }
4265
4266    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4267    fn vanytrue32x4(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4268        let a = self.state[src].get_u32x4();
4269        let result = a.iter().any(|a| *a != 0);
4270        self.state[dst].set_u32(u32::from(result));
4271        ControlFlow::Continue(())
4272    }
4273
4274    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4275    fn vanytrue64x2(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4276        let a = self.state[src].get_u64x2();
4277        let result = a.iter().any(|a| *a != 0);
4278        self.state[dst].set_u32(u32::from(result));
4279        ControlFlow::Continue(())
4280    }
4281
4282    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4283    fn vf32x4_from_i32x4_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4284        let a = self.state[src].get_i32x4();
4285        self.state[dst].set_f32x4(a.map(|i| i as f32));
4286        ControlFlow::Continue(())
4287    }
4288
4289    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4290    fn vf32x4_from_i32x4_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4291        let a = self.state[src].get_u32x4();
4292        self.state[dst].set_f32x4(a.map(|i| i as f32));
4293        ControlFlow::Continue(())
4294    }
4295
4296    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4297    fn vf64x2_from_i64x2_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4298        let a = self.state[src].get_i64x2();
4299        self.state[dst].set_f64x2(a.map(|i| i as f64));
4300        ControlFlow::Continue(())
4301    }
4302
4303    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4304    fn vf64x2_from_i64x2_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4305        let a = self.state[src].get_u64x2();
4306        self.state[dst].set_f64x2(a.map(|i| i as f64));
4307        ControlFlow::Continue(())
4308    }
4309
4310    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4311    fn vi32x4_from_f32x4_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4312        let a = self.state[src].get_f32x4();
4313        self.state[dst].set_i32x4(a.map(|f| f as i32));
4314        ControlFlow::Continue(())
4315    }
4316
4317    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4318    fn vi32x4_from_f32x4_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4319        let a = self.state[src].get_f32x4();
4320        self.state[dst].set_u32x4(a.map(|f| f as u32));
4321        ControlFlow::Continue(())
4322    }
4323
4324    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4325    fn vi64x2_from_f64x2_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4326        let a = self.state[src].get_f64x2();
4327        self.state[dst].set_i64x2(a.map(|f| f as i64));
4328        ControlFlow::Continue(())
4329    }
4330
4331    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4332    fn vi64x2_from_f64x2_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4333        let a = self.state[src].get_f64x2();
4334        self.state[dst].set_u64x2(a.map(|f| f as u64));
4335        ControlFlow::Continue(())
4336    }
4337
4338    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4339    fn vwidenlow8x16_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4340        let a = *self.state[src].get_i8x16().first_chunk().unwrap();
4341        self.state[dst].set_i16x8(a.map(|i| i.into()));
4342        ControlFlow::Continue(())
4343    }
4344
4345    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4346    fn vwidenlow8x16_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4347        let a = *self.state[src].get_u8x16().first_chunk().unwrap();
4348        self.state[dst].set_u16x8(a.map(|i| i.into()));
4349        ControlFlow::Continue(())
4350    }
4351
4352    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4353    fn vwidenlow16x8_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4354        let a = *self.state[src].get_i16x8().first_chunk().unwrap();
4355        self.state[dst].set_i32x4(a.map(|i| i.into()));
4356        ControlFlow::Continue(())
4357    }
4358
4359    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4360    fn vwidenlow16x8_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4361        let a = *self.state[src].get_u16x8().first_chunk().unwrap();
4362        self.state[dst].set_u32x4(a.map(|i| i.into()));
4363        ControlFlow::Continue(())
4364    }
4365
4366    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4367    fn vwidenlow32x4_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4368        let a = *self.state[src].get_i32x4().first_chunk().unwrap();
4369        self.state[dst].set_i64x2(a.map(|i| i.into()));
4370        ControlFlow::Continue(())
4371    }
4372
4373    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4374    fn vwidenlow32x4_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4375        let a = *self.state[src].get_u32x4().first_chunk().unwrap();
4376        self.state[dst].set_u64x2(a.map(|i| i.into()));
4377        ControlFlow::Continue(())
4378    }
4379
4380    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4381    fn vwidenhigh8x16_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4382        let a = *self.state[src].get_i8x16().last_chunk().unwrap();
4383        self.state[dst].set_i16x8(a.map(|i| i.into()));
4384        ControlFlow::Continue(())
4385    }
4386
4387    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4388    fn vwidenhigh8x16_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4389        let a = *self.state[src].get_u8x16().last_chunk().unwrap();
4390        self.state[dst].set_u16x8(a.map(|i| i.into()));
4391        ControlFlow::Continue(())
4392    }
4393
4394    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4395    fn vwidenhigh16x8_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4396        let a = *self.state[src].get_i16x8().last_chunk().unwrap();
4397        self.state[dst].set_i32x4(a.map(|i| i.into()));
4398        ControlFlow::Continue(())
4399    }
4400
4401    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4402    fn vwidenhigh16x8_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4403        let a = *self.state[src].get_u16x8().last_chunk().unwrap();
4404        self.state[dst].set_u32x4(a.map(|i| i.into()));
4405        ControlFlow::Continue(())
4406    }
4407
4408    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4409    fn vwidenhigh32x4_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4410        let a = *self.state[src].get_i32x4().last_chunk().unwrap();
4411        self.state[dst].set_i64x2(a.map(|i| i.into()));
4412        ControlFlow::Continue(())
4413    }
4414
4415    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4416    fn vwidenhigh32x4_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4417        let a = *self.state[src].get_u32x4().last_chunk().unwrap();
4418        self.state[dst].set_u64x2(a.map(|i| i.into()));
4419        ControlFlow::Continue(())
4420    }
4421
4422    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4423    fn vnarrow16x8_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4424        let a = self.state[operands.src1].get_i16x8();
4425        let b = self.state[operands.src2].get_i16x8();
4426        let mut result = [0; 16];
4427        for (i, d) in a.iter().chain(&b).zip(&mut result) {
4428            *d = (*i)
4429                .try_into()
4430                .unwrap_or(if *i < 0 { i8::MIN } else { i8::MAX });
4431        }
4432        self.state[operands.dst].set_i8x16(result);
4433        ControlFlow::Continue(())
4434    }
4435
4436    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4437    fn vnarrow16x8_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4438        let a = self.state[operands.src1].get_i16x8();
4439        let b = self.state[operands.src2].get_i16x8();
4440        let mut result = [0; 16];
4441        for (i, d) in a.iter().chain(&b).zip(&mut result) {
4442            *d = (*i)
4443                .try_into()
4444                .unwrap_or(if *i < 0 { u8::MIN } else { u8::MAX });
4445        }
4446        self.state[operands.dst].set_u8x16(result);
4447        ControlFlow::Continue(())
4448    }
4449
4450    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4451    fn vnarrow32x4_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4452        let a = self.state[operands.src1].get_i32x4();
4453        let b = self.state[operands.src2].get_i32x4();
4454        let mut result = [0; 8];
4455        for (i, d) in a.iter().chain(&b).zip(&mut result) {
4456            *d = (*i)
4457                .try_into()
4458                .unwrap_or(if *i < 0 { i16::MIN } else { i16::MAX });
4459        }
4460        self.state[operands.dst].set_i16x8(result);
4461        ControlFlow::Continue(())
4462    }
4463
4464    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4465    fn vnarrow32x4_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4466        let a = self.state[operands.src1].get_i32x4();
4467        let b = self.state[operands.src2].get_i32x4();
4468        let mut result = [0; 8];
4469        for (i, d) in a.iter().chain(&b).zip(&mut result) {
4470            *d = (*i)
4471                .try_into()
4472                .unwrap_or(if *i < 0 { u16::MIN } else { u16::MAX });
4473        }
4474        self.state[operands.dst].set_u16x8(result);
4475        ControlFlow::Continue(())
4476    }
4477
4478    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4479    fn vnarrow64x2_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4480        let a = self.state[operands.src1].get_i64x2();
4481        let b = self.state[operands.src2].get_i64x2();
4482        let mut result = [0; 4];
4483        for (i, d) in a.iter().chain(&b).zip(&mut result) {
4484            *d = (*i)
4485                .try_into()
4486                .unwrap_or(if *i < 0 { i32::MIN } else { i32::MAX });
4487        }
4488        self.state[operands.dst].set_i32x4(result);
4489        ControlFlow::Continue(())
4490    }
4491
4492    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4493    fn vnarrow64x2_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4494        let a = self.state[operands.src1].get_i64x2();
4495        let b = self.state[operands.src2].get_i64x2();
4496        let mut result = [0; 4];
4497        for (i, d) in a.iter().chain(&b).zip(&mut result) {
4498            *d = (*i)
4499                .try_into()
4500                .unwrap_or(if *i < 0 { u32::MIN } else { u32::MAX });
4501        }
4502        self.state[operands.dst].set_u32x4(result);
4503        ControlFlow::Continue(())
4504    }
4505
4506    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4507    fn vunarrow64x2_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4508        let a = self.state[operands.src1].get_u64x2();
4509        let b = self.state[operands.src2].get_u64x2();
4510        let mut result = [0; 4];
4511        for (i, d) in a.iter().chain(&b).zip(&mut result) {
4512            *d = (*i).try_into().unwrap_or(u32::MAX);
4513        }
4514        self.state[operands.dst].set_u32x4(result);
4515        ControlFlow::Continue(())
4516    }
4517
4518    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4519    fn vfpromotelow(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4520        let a = self.state[src].get_f32x4();
4521        self.state[dst].set_f64x2([a[0].into(), a[1].into()]);
4522        ControlFlow::Continue(())
4523    }
4524
4525    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4526    fn vfdemote(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4527        let a = self.state[src].get_f64x2();
4528        self.state[dst].set_f32x4([a[0] as f32, a[1] as f32, 0.0, 0.0]);
4529        ControlFlow::Continue(())
4530    }
4531
4532    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4533    fn vsubi8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4534        let mut a = self.state[operands.src1].get_i8x16();
4535        let b = self.state[operands.src2].get_i8x16();
4536        for (a, b) in a.iter_mut().zip(b) {
4537            *a = a.wrapping_sub(b);
4538        }
4539        self.state[operands.dst].set_i8x16(a);
4540        ControlFlow::Continue(())
4541    }
4542
4543    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4544    fn vsubi16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4545        let mut a = self.state[operands.src1].get_i16x8();
4546        let b = self.state[operands.src2].get_i16x8();
4547        for (a, b) in a.iter_mut().zip(b) {
4548            *a = a.wrapping_sub(b);
4549        }
4550        self.state[operands.dst].set_i16x8(a);
4551        ControlFlow::Continue(())
4552    }
4553
4554    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4555    fn vsubi32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4556        let mut a = self.state[operands.src1].get_i32x4();
4557        let b = self.state[operands.src2].get_i32x4();
4558        for (a, b) in a.iter_mut().zip(b) {
4559            *a = a.wrapping_sub(b);
4560        }
4561        self.state[operands.dst].set_i32x4(a);
4562        ControlFlow::Continue(())
4563    }
4564
4565    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4566    fn vsubi64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4567        let mut a = self.state[operands.src1].get_i64x2();
4568        let b = self.state[operands.src2].get_i64x2();
4569        for (a, b) in a.iter_mut().zip(b) {
4570            *a = a.wrapping_sub(b);
4571        }
4572        self.state[operands.dst].set_i64x2(a);
4573        ControlFlow::Continue(())
4574    }
4575
4576    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4577    fn vsubi8x16_sat(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4578        let mut a = self.state[operands.src1].get_i8x16();
4579        let b = self.state[operands.src2].get_i8x16();
4580        for (a, b) in a.iter_mut().zip(b) {
4581            *a = a.saturating_sub(b);
4582        }
4583        self.state[operands.dst].set_i8x16(a);
4584        ControlFlow::Continue(())
4585    }
4586
4587    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4588    fn vsubu8x16_sat(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4589        let mut a = self.state[operands.src1].get_u8x16();
4590        let b = self.state[operands.src2].get_u8x16();
4591        for (a, b) in a.iter_mut().zip(b) {
4592            *a = a.saturating_sub(b);
4593        }
4594        self.state[operands.dst].set_u8x16(a);
4595        ControlFlow::Continue(())
4596    }
4597
4598    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4599    fn vsubi16x8_sat(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4600        let mut a = self.state[operands.src1].get_i16x8();
4601        let b = self.state[operands.src2].get_i16x8();
4602        for (a, b) in a.iter_mut().zip(b) {
4603            *a = a.saturating_sub(b);
4604        }
4605        self.state[operands.dst].set_i16x8(a);
4606        ControlFlow::Continue(())
4607    }
4608
4609    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4610    fn vsubu16x8_sat(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4611        let mut a = self.state[operands.src1].get_u16x8();
4612        let b = self.state[operands.src2].get_u16x8();
4613        for (a, b) in a.iter_mut().zip(b) {
4614            *a = a.saturating_sub(b);
4615        }
4616        self.state[operands.dst].set_u16x8(a);
4617        ControlFlow::Continue(())
4618    }
4619
4620    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4621    fn vsubf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4622        let mut a = self.state[operands.src1].get_f64x2();
4623        let b = self.state[operands.src2].get_f64x2();
4624        for (a, b) in a.iter_mut().zip(b) {
4625            *a = *a - b;
4626        }
4627        self.state[operands.dst].set_f64x2(a);
4628        ControlFlow::Continue(())
4629    }
4630
4631    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4632    fn vmuli8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4633        let mut a = self.state[operands.src1].get_i8x16();
4634        let b = self.state[operands.src2].get_i8x16();
4635        for (a, b) in a.iter_mut().zip(b) {
4636            *a = a.wrapping_mul(b);
4637        }
4638        self.state[operands.dst].set_i8x16(a);
4639        ControlFlow::Continue(())
4640    }
4641
4642    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4643    fn vmuli16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4644        let mut a = self.state[operands.src1].get_i16x8();
4645        let b = self.state[operands.src2].get_i16x8();
4646        for (a, b) in a.iter_mut().zip(b) {
4647            *a = a.wrapping_mul(b);
4648        }
4649        self.state[operands.dst].set_i16x8(a);
4650        ControlFlow::Continue(())
4651    }
4652
4653    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4654    fn vmuli32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4655        let mut a = self.state[operands.src1].get_i32x4();
4656        let b = self.state[operands.src2].get_i32x4();
4657        for (a, b) in a.iter_mut().zip(b) {
4658            *a = a.wrapping_mul(b);
4659        }
4660        self.state[operands.dst].set_i32x4(a);
4661        ControlFlow::Continue(())
4662    }
4663
4664    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4665    fn vmuli64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4666        let mut a = self.state[operands.src1].get_i64x2();
4667        let b = self.state[operands.src2].get_i64x2();
4668        for (a, b) in a.iter_mut().zip(b) {
4669            *a = a.wrapping_mul(b);
4670        }
4671        self.state[operands.dst].set_i64x2(a);
4672        ControlFlow::Continue(())
4673    }
4674
4675    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4676    fn vmulf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4677        let mut a = self.state[operands.src1].get_f64x2();
4678        let b = self.state[operands.src2].get_f64x2();
4679        for (a, b) in a.iter_mut().zip(b) {
4680            *a = *a * b;
4681        }
4682        self.state[operands.dst].set_f64x2(a);
4683        ControlFlow::Continue(())
4684    }
4685
4686    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4687    fn vqmulrsi16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4688        let mut a = self.state[operands.src1].get_i16x8();
4689        let b = self.state[operands.src2].get_i16x8();
4690        const MIN: i32 = i16::MIN as i32;
4691        const MAX: i32 = i16::MAX as i32;
4692        for (a, b) in a.iter_mut().zip(b) {
4693            let r = (i32::from(*a) * i32::from(b) + (1 << 14)) >> 15;
4694            *a = r.clamp(MIN, MAX) as i16;
4695        }
4696        self.state[operands.dst].set_i16x8(a);
4697        ControlFlow::Continue(())
4698    }
4699
4700    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4701    fn vpopcnt8x16(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4702        let a = self.state[src].get_u8x16();
4703        self.state[dst].set_u8x16(a.map(|i| i.count_ones() as u8));
4704        ControlFlow::Continue(())
4705    }
4706
4707    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4708    fn xextractv8x16(&mut self, dst: XReg, src: VReg, lane: u8) -> ControlFlow<Done> {
4709        let a = unsafe { *self.state[src].get_u8x16().get_unchecked(usize::from(lane)) };
4710        self.state[dst].set_u32(u32::from(a));
4711        ControlFlow::Continue(())
4712    }
4713
4714    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4715    fn xextractv16x8(&mut self, dst: XReg, src: VReg, lane: u8) -> ControlFlow<Done> {
4716        let a = unsafe { *self.state[src].get_u16x8().get_unchecked(usize::from(lane)) };
4717        self.state[dst].set_u32(u32::from(a));
4718        ControlFlow::Continue(())
4719    }
4720
4721    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4722    fn xextractv32x4(&mut self, dst: XReg, src: VReg, lane: u8) -> ControlFlow<Done> {
4723        let a = unsafe { *self.state[src].get_u32x4().get_unchecked(usize::from(lane)) };
4724        self.state[dst].set_u32(a);
4725        ControlFlow::Continue(())
4726    }
4727
4728    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4729    fn xextractv64x2(&mut self, dst: XReg, src: VReg, lane: u8) -> ControlFlow<Done> {
4730        let a = unsafe { *self.state[src].get_u64x2().get_unchecked(usize::from(lane)) };
4731        self.state[dst].set_u64(a);
4732        ControlFlow::Continue(())
4733    }
4734
4735    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4736    fn fextractv32x4(&mut self, dst: FReg, src: VReg, lane: u8) -> ControlFlow<Done> {
4737        let a = unsafe { *self.state[src].get_f32x4().get_unchecked(usize::from(lane)) };
4738        self.state[dst].set_f32(a);
4739        ControlFlow::Continue(())
4740    }
4741
4742    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4743    fn fextractv64x2(&mut self, dst: FReg, src: VReg, lane: u8) -> ControlFlow<Done> {
4744        let a = unsafe { *self.state[src].get_f64x2().get_unchecked(usize::from(lane)) };
4745        self.state[dst].set_f64(a);
4746        ControlFlow::Continue(())
4747    }
4748
4749    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4750    fn vinsertx8(
4751        &mut self,
4752        operands: BinaryOperands<VReg, VReg, XReg>,
4753        lane: u8,
4754    ) -> ControlFlow<Done> {
4755        let mut a = self.state[operands.src1].get_u8x16();
4756        let b = self.state[operands.src2].get_u32() as u8;
4757        unsafe {
4758            *a.get_unchecked_mut(usize::from(lane)) = b;
4759        }
4760        self.state[operands.dst].set_u8x16(a);
4761        ControlFlow::Continue(())
4762    }
4763
4764    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4765    fn vinsertx16(
4766        &mut self,
4767        operands: BinaryOperands<VReg, VReg, XReg>,
4768        lane: u8,
4769    ) -> ControlFlow<Done> {
4770        let mut a = self.state[operands.src1].get_u16x8();
4771        let b = self.state[operands.src2].get_u32() as u16;
4772        unsafe {
4773            *a.get_unchecked_mut(usize::from(lane)) = b;
4774        }
4775        self.state[operands.dst].set_u16x8(a);
4776        ControlFlow::Continue(())
4777    }
4778
4779    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4780    fn vinsertx32(
4781        &mut self,
4782        operands: BinaryOperands<VReg, VReg, XReg>,
4783        lane: u8,
4784    ) -> ControlFlow<Done> {
4785        let mut a = self.state[operands.src1].get_u32x4();
4786        let b = self.state[operands.src2].get_u32();
4787        unsafe {
4788            *a.get_unchecked_mut(usize::from(lane)) = b;
4789        }
4790        self.state[operands.dst].set_u32x4(a);
4791        ControlFlow::Continue(())
4792    }
4793
4794    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4795    fn vinsertx64(
4796        &mut self,
4797        operands: BinaryOperands<VReg, VReg, XReg>,
4798        lane: u8,
4799    ) -> ControlFlow<Done> {
4800        let mut a = self.state[operands.src1].get_u64x2();
4801        let b = self.state[operands.src2].get_u64();
4802        unsafe {
4803            *a.get_unchecked_mut(usize::from(lane)) = b;
4804        }
4805        self.state[operands.dst].set_u64x2(a);
4806        ControlFlow::Continue(())
4807    }
4808
4809    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4810    fn vinsertf32(
4811        &mut self,
4812        operands: BinaryOperands<VReg, VReg, FReg>,
4813        lane: u8,
4814    ) -> ControlFlow<Done> {
4815        let mut a = self.state[operands.src1].get_f32x4();
4816        let b = self.state[operands.src2].get_f32();
4817        unsafe {
4818            *a.get_unchecked_mut(usize::from(lane)) = b;
4819        }
4820        self.state[operands.dst].set_f32x4(a);
4821        ControlFlow::Continue(())
4822    }
4823
4824    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4825    fn vinsertf64(
4826        &mut self,
4827        operands: BinaryOperands<VReg, VReg, FReg>,
4828        lane: u8,
4829    ) -> ControlFlow<Done> {
4830        let mut a = self.state[operands.src1].get_f64x2();
4831        let b = self.state[operands.src2].get_f64();
4832        unsafe {
4833            *a.get_unchecked_mut(usize::from(lane)) = b;
4834        }
4835        self.state[operands.dst].set_f64x2(a);
4836        ControlFlow::Continue(())
4837    }
4838
4839    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4840    fn veq8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4841        let a = self.state[operands.src1].get_u8x16();
4842        let b = self.state[operands.src2].get_u8x16();
4843        let mut c = [0; 16];
4844        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4845            *c = if a == b { u8::MAX } else { 0 };
4846        }
4847        self.state[operands.dst].set_u8x16(c);
4848        ControlFlow::Continue(())
4849    }
4850
4851    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4852    fn vneq8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4853        let a = self.state[operands.src1].get_u8x16();
4854        let b = self.state[operands.src2].get_u8x16();
4855        let mut c = [0; 16];
4856        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4857            *c = if a != b { u8::MAX } else { 0 };
4858        }
4859        self.state[operands.dst].set_u8x16(c);
4860        ControlFlow::Continue(())
4861    }
4862
4863    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4864    fn vslt8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4865        let a = self.state[operands.src1].get_i8x16();
4866        let b = self.state[operands.src2].get_i8x16();
4867        let mut c = [0; 16];
4868        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4869            *c = if a < b { u8::MAX } else { 0 };
4870        }
4871        self.state[operands.dst].set_u8x16(c);
4872        ControlFlow::Continue(())
4873    }
4874
4875    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4876    fn vslteq8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4877        let a = self.state[operands.src1].get_i8x16();
4878        let b = self.state[operands.src2].get_i8x16();
4879        let mut c = [0; 16];
4880        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4881            *c = if a <= b { u8::MAX } else { 0 };
4882        }
4883        self.state[operands.dst].set_u8x16(c);
4884        ControlFlow::Continue(())
4885    }
4886
4887    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4888    fn vult8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4889        let a = self.state[operands.src1].get_u8x16();
4890        let b = self.state[operands.src2].get_u8x16();
4891        let mut c = [0; 16];
4892        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4893            *c = if a < b { u8::MAX } else { 0 };
4894        }
4895        self.state[operands.dst].set_u8x16(c);
4896        ControlFlow::Continue(())
4897    }
4898
4899    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4900    fn vulteq8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4901        let a = self.state[operands.src1].get_u8x16();
4902        let b = self.state[operands.src2].get_u8x16();
4903        let mut c = [0; 16];
4904        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4905            *c = if a <= b { u8::MAX } else { 0 };
4906        }
4907        self.state[operands.dst].set_u8x16(c);
4908        ControlFlow::Continue(())
4909    }
4910
4911    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4912    fn veq16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4913        let a = self.state[operands.src1].get_u16x8();
4914        let b = self.state[operands.src2].get_u16x8();
4915        let mut c = [0; 8];
4916        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4917            *c = if a == b { u16::MAX } else { 0 };
4918        }
4919        self.state[operands.dst].set_u16x8(c);
4920        ControlFlow::Continue(())
4921    }
4922
4923    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4924    fn vneq16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4925        let a = self.state[operands.src1].get_u16x8();
4926        let b = self.state[operands.src2].get_u16x8();
4927        let mut c = [0; 8];
4928        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4929            *c = if a != b { u16::MAX } else { 0 };
4930        }
4931        self.state[operands.dst].set_u16x8(c);
4932        ControlFlow::Continue(())
4933    }
4934
4935    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4936    fn vslt16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4937        let a = self.state[operands.src1].get_i16x8();
4938        let b = self.state[operands.src2].get_i16x8();
4939        let mut c = [0; 8];
4940        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4941            *c = if a < b { u16::MAX } else { 0 };
4942        }
4943        self.state[operands.dst].set_u16x8(c);
4944        ControlFlow::Continue(())
4945    }
4946
4947    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4948    fn vslteq16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4949        let a = self.state[operands.src1].get_i16x8();
4950        let b = self.state[operands.src2].get_i16x8();
4951        let mut c = [0; 8];
4952        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4953            *c = if a <= b { u16::MAX } else { 0 };
4954        }
4955        self.state[operands.dst].set_u16x8(c);
4956        ControlFlow::Continue(())
4957    }
4958
4959    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4960    fn vult16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4961        let a = self.state[operands.src1].get_u16x8();
4962        let b = self.state[operands.src2].get_u16x8();
4963        let mut c = [0; 8];
4964        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4965            *c = if a < b { u16::MAX } else { 0 };
4966        }
4967        self.state[operands.dst].set_u16x8(c);
4968        ControlFlow::Continue(())
4969    }
4970
4971    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4972    fn vulteq16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4973        let a = self.state[operands.src1].get_u16x8();
4974        let b = self.state[operands.src2].get_u16x8();
4975        let mut c = [0; 8];
4976        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4977            *c = if a <= b { u16::MAX } else { 0 };
4978        }
4979        self.state[operands.dst].set_u16x8(c);
4980        ControlFlow::Continue(())
4981    }
4982
4983    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4984    fn veq32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4985        let a = self.state[operands.src1].get_u32x4();
4986        let b = self.state[operands.src2].get_u32x4();
4987        let mut c = [0; 4];
4988        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4989            *c = if a == b { u32::MAX } else { 0 };
4990        }
4991        self.state[operands.dst].set_u32x4(c);
4992        ControlFlow::Continue(())
4993    }
4994
4995    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4996    fn vneq32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4997        let a = self.state[operands.src1].get_u32x4();
4998        let b = self.state[operands.src2].get_u32x4();
4999        let mut c = [0; 4];
5000        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5001            *c = if a != b { u32::MAX } else { 0 };
5002        }
5003        self.state[operands.dst].set_u32x4(c);
5004        ControlFlow::Continue(())
5005    }
5006
5007    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5008    fn vslt32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5009        let a = self.state[operands.src1].get_i32x4();
5010        let b = self.state[operands.src2].get_i32x4();
5011        let mut c = [0; 4];
5012        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5013            *c = if a < b { u32::MAX } else { 0 };
5014        }
5015        self.state[operands.dst].set_u32x4(c);
5016        ControlFlow::Continue(())
5017    }
5018
5019    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5020    fn vslteq32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5021        let a = self.state[operands.src1].get_i32x4();
5022        let b = self.state[operands.src2].get_i32x4();
5023        let mut c = [0; 4];
5024        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5025            *c = if a <= b { u32::MAX } else { 0 };
5026        }
5027        self.state[operands.dst].set_u32x4(c);
5028        ControlFlow::Continue(())
5029    }
5030
5031    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5032    fn vult32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5033        let a = self.state[operands.src1].get_u32x4();
5034        let b = self.state[operands.src2].get_u32x4();
5035        let mut c = [0; 4];
5036        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5037            *c = if a < b { u32::MAX } else { 0 };
5038        }
5039        self.state[operands.dst].set_u32x4(c);
5040        ControlFlow::Continue(())
5041    }
5042
5043    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5044    fn vulteq32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5045        let a = self.state[operands.src1].get_u32x4();
5046        let b = self.state[operands.src2].get_u32x4();
5047        let mut c = [0; 4];
5048        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5049            *c = if a <= b { u32::MAX } else { 0 };
5050        }
5051        self.state[operands.dst].set_u32x4(c);
5052        ControlFlow::Continue(())
5053    }
5054
5055    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5056    fn veq64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5057        let a = self.state[operands.src1].get_u64x2();
5058        let b = self.state[operands.src2].get_u64x2();
5059        let mut c = [0; 2];
5060        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5061            *c = if a == b { u64::MAX } else { 0 };
5062        }
5063        self.state[operands.dst].set_u64x2(c);
5064        ControlFlow::Continue(())
5065    }
5066
5067    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5068    fn vneq64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5069        let a = self.state[operands.src1].get_u64x2();
5070        let b = self.state[operands.src2].get_u64x2();
5071        let mut c = [0; 2];
5072        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5073            *c = if a != b { u64::MAX } else { 0 };
5074        }
5075        self.state[operands.dst].set_u64x2(c);
5076        ControlFlow::Continue(())
5077    }
5078
5079    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5080    fn vslt64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5081        let a = self.state[operands.src1].get_i64x2();
5082        let b = self.state[operands.src2].get_i64x2();
5083        let mut c = [0; 2];
5084        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5085            *c = if a < b { u64::MAX } else { 0 };
5086        }
5087        self.state[operands.dst].set_u64x2(c);
5088        ControlFlow::Continue(())
5089    }
5090
5091    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5092    fn vslteq64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5093        let a = self.state[operands.src1].get_i64x2();
5094        let b = self.state[operands.src2].get_i64x2();
5095        let mut c = [0; 2];
5096        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5097            *c = if a <= b { u64::MAX } else { 0 };
5098        }
5099        self.state[operands.dst].set_u64x2(c);
5100        ControlFlow::Continue(())
5101    }
5102
5103    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5104    fn vult64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5105        let a = self.state[operands.src1].get_u64x2();
5106        let b = self.state[operands.src2].get_u64x2();
5107        let mut c = [0; 2];
5108        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5109            *c = if a < b { u64::MAX } else { 0 };
5110        }
5111        self.state[operands.dst].set_u64x2(c);
5112        ControlFlow::Continue(())
5113    }
5114
5115    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5116    fn vulteq64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5117        let a = self.state[operands.src1].get_u64x2();
5118        let b = self.state[operands.src2].get_u64x2();
5119        let mut c = [0; 2];
5120        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5121            *c = if a <= b { u64::MAX } else { 0 };
5122        }
5123        self.state[operands.dst].set_u64x2(c);
5124        ControlFlow::Continue(())
5125    }
5126
5127    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5128    fn vneg8x16(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
5129        let a = self.state[src].get_i8x16();
5130        self.state[dst].set_i8x16(a.map(|i| i.wrapping_neg()));
5131        ControlFlow::Continue(())
5132    }
5133
5134    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5135    fn vneg16x8(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
5136        let a = self.state[src].get_i16x8();
5137        self.state[dst].set_i16x8(a.map(|i| i.wrapping_neg()));
5138        ControlFlow::Continue(())
5139    }
5140
5141    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5142    fn vneg32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
5143        let a = self.state[src].get_i32x4();
5144        self.state[dst].set_i32x4(a.map(|i| i.wrapping_neg()));
5145        ControlFlow::Continue(())
5146    }
5147
5148    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5149    fn vneg64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
5150        let a = self.state[src].get_i64x2();
5151        self.state[dst].set_i64x2(a.map(|i| i.wrapping_neg()));
5152        ControlFlow::Continue(())
5153    }
5154
5155    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5156    fn vnegf64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
5157        let a = self.state[src].get_f64x2();
5158        self.state[dst].set_f64x2(a.map(|i| -i));
5159        ControlFlow::Continue(())
5160    }
5161
5162    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5163    fn vmin8x16_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5164        let mut a = self.state[operands.src1].get_i8x16();
5165        let b = self.state[operands.src2].get_i8x16();
5166        for (a, b) in a.iter_mut().zip(&b) {
5167            *a = (*a).min(*b);
5168        }
5169        self.state[operands.dst].set_i8x16(a);
5170        ControlFlow::Continue(())
5171    }
5172
5173    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5174    fn vmin8x16_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5175        let mut a = self.state[operands.src1].get_u8x16();
5176        let b = self.state[operands.src2].get_u8x16();
5177        for (a, b) in a.iter_mut().zip(&b) {
5178            *a = (*a).min(*b);
5179        }
5180        self.state[operands.dst].set_u8x16(a);
5181        ControlFlow::Continue(())
5182    }
5183
5184    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5185    fn vmin16x8_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5186        let mut a = self.state[operands.src1].get_i16x8();
5187        let b = self.state[operands.src2].get_i16x8();
5188        for (a, b) in a.iter_mut().zip(&b) {
5189            *a = (*a).min(*b);
5190        }
5191        self.state[operands.dst].set_i16x8(a);
5192        ControlFlow::Continue(())
5193    }
5194
5195    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5196    fn vmin16x8_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5197        let mut a = self.state[operands.src1].get_u16x8();
5198        let b = self.state[operands.src2].get_u16x8();
5199        for (a, b) in a.iter_mut().zip(&b) {
5200            *a = (*a).min(*b);
5201        }
5202        self.state[operands.dst].set_u16x8(a);
5203        ControlFlow::Continue(())
5204    }
5205
5206    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5207    fn vmin32x4_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5208        let mut a = self.state[operands.src1].get_i32x4();
5209        let b = self.state[operands.src2].get_i32x4();
5210        for (a, b) in a.iter_mut().zip(&b) {
5211            *a = (*a).min(*b);
5212        }
5213        self.state[operands.dst].set_i32x4(a);
5214        ControlFlow::Continue(())
5215    }
5216
5217    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5218    fn vmin32x4_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5219        let mut a = self.state[operands.src1].get_u32x4();
5220        let b = self.state[operands.src2].get_u32x4();
5221        for (a, b) in a.iter_mut().zip(&b) {
5222            *a = (*a).min(*b);
5223        }
5224        self.state[operands.dst].set_u32x4(a);
5225        ControlFlow::Continue(())
5226    }
5227
5228    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5229    fn vmax8x16_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5230        let mut a = self.state[operands.src1].get_i8x16();
5231        let b = self.state[operands.src2].get_i8x16();
5232        for (a, b) in a.iter_mut().zip(&b) {
5233            *a = (*a).max(*b);
5234        }
5235        self.state[operands.dst].set_i8x16(a);
5236        ControlFlow::Continue(())
5237    }
5238
5239    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5240    fn vmax8x16_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5241        let mut a = self.state[operands.src1].get_u8x16();
5242        let b = self.state[operands.src2].get_u8x16();
5243        for (a, b) in a.iter_mut().zip(&b) {
5244            *a = (*a).max(*b);
5245        }
5246        self.state[operands.dst].set_u8x16(a);
5247        ControlFlow::Continue(())
5248    }
5249
5250    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5251    fn vmax16x8_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5252        let mut a = self.state[operands.src1].get_i16x8();
5253        let b = self.state[operands.src2].get_i16x8();
5254        for (a, b) in a.iter_mut().zip(&b) {
5255            *a = (*a).max(*b);
5256        }
5257        self.state[operands.dst].set_i16x8(a);
5258        ControlFlow::Continue(())
5259    }
5260
5261    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5262    fn vmax16x8_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5263        let mut a = self.state[operands.src1].get_u16x8();
5264        let b = self.state[operands.src2].get_u16x8();
5265        for (a, b) in a.iter_mut().zip(&b) {
5266            *a = (*a).max(*b);
5267        }
5268        self.state[operands.dst].set_u16x8(a);
5269        ControlFlow::Continue(())
5270    }
5271
5272    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5273    fn vmax32x4_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5274        let mut a = self.state[operands.src1].get_i32x4();
5275        let b = self.state[operands.src2].get_i32x4();
5276        for (a, b) in a.iter_mut().zip(&b) {
5277            *a = (*a).max(*b);
5278        }
5279        self.state[operands.dst].set_i32x4(a);
5280        ControlFlow::Continue(())
5281    }
5282
5283    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5284    fn vmax32x4_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5285        let mut a = self.state[operands.src1].get_u32x4();
5286        let b = self.state[operands.src2].get_u32x4();
5287        for (a, b) in a.iter_mut().zip(&b) {
5288            *a = (*a).max(*b);
5289        }
5290        self.state[operands.dst].set_u32x4(a);
5291        ControlFlow::Continue(())
5292    }
5293
5294    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5295    fn vabs8x16(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
5296        let a = self.state[src].get_i8x16();
5297        self.state[dst].set_i8x16(a.map(|i| i.wrapping_abs()));
5298        ControlFlow::Continue(())
5299    }
5300
5301    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5302    fn vabs16x8(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
5303        let a = self.state[src].get_i16x8();
5304        self.state[dst].set_i16x8(a.map(|i| i.wrapping_abs()));
5305        ControlFlow::Continue(())
5306    }
5307
5308    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5309    fn vabs32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
5310        let a = self.state[src].get_i32x4();
5311        self.state[dst].set_i32x4(a.map(|i| i.wrapping_abs()));
5312        ControlFlow::Continue(())
5313    }
5314
5315    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5316    fn vabs64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
5317        let a = self.state[src].get_i64x2();
5318        self.state[dst].set_i64x2(a.map(|i| i.wrapping_abs()));
5319        ControlFlow::Continue(())
5320    }
5321
5322    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5323    fn vabsf32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
5324        let a = self.state[src].get_f32x4();
5325        self.state[dst].set_f32x4(a.map(|i| i.wasm_abs()));
5326        ControlFlow::Continue(())
5327    }
5328
5329    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5330    fn vabsf64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
5331        let a = self.state[src].get_f64x2();
5332        self.state[dst].set_f64x2(a.map(|i| i.wasm_abs()));
5333        ControlFlow::Continue(())
5334    }
5335
5336    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5337    fn vmaximumf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5338        let mut a = self.state[operands.src1].get_f32x4();
5339        let b = self.state[operands.src2].get_f32x4();
5340        for (a, b) in a.iter_mut().zip(&b) {
5341            *a = a.wasm_maximum(*b);
5342        }
5343        self.state[operands.dst].set_f32x4(a);
5344        ControlFlow::Continue(())
5345    }
5346
5347    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5348    fn vmaximumf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5349        let mut a = self.state[operands.src1].get_f64x2();
5350        let b = self.state[operands.src2].get_f64x2();
5351        for (a, b) in a.iter_mut().zip(&b) {
5352            *a = a.wasm_maximum(*b);
5353        }
5354        self.state[operands.dst].set_f64x2(a);
5355        ControlFlow::Continue(())
5356    }
5357
5358    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5359    fn vminimumf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5360        let mut a = self.state[operands.src1].get_f32x4();
5361        let b = self.state[operands.src2].get_f32x4();
5362        for (a, b) in a.iter_mut().zip(&b) {
5363            *a = a.wasm_minimum(*b);
5364        }
5365        self.state[operands.dst].set_f32x4(a);
5366        ControlFlow::Continue(())
5367    }
5368
5369    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5370    fn vminimumf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5371        let mut a = self.state[operands.src1].get_f64x2();
5372        let b = self.state[operands.src2].get_f64x2();
5373        for (a, b) in a.iter_mut().zip(&b) {
5374            *a = a.wasm_minimum(*b);
5375        }
5376        self.state[operands.dst].set_f64x2(a);
5377        ControlFlow::Continue(())
5378    }
5379
5380    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5381    fn vshuffle(&mut self, dst: VReg, src1: VReg, src2: VReg, mask: u128) -> ControlFlow<Done> {
5382        let a = self.state[src1].get_u8x16();
5383        let b = self.state[src2].get_u8x16();
5384        let result = mask.to_le_bytes().map(|m| {
5385            if m < 16 {
5386                a[m as usize]
5387            } else {
5388                b[m as usize - 16]
5389            }
5390        });
5391        self.state[dst].set_u8x16(result);
5392        ControlFlow::Continue(())
5393    }
5394
5395    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5396    fn vswizzlei8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5397        let src1 = self.state[operands.src1].get_i8x16();
5398        let src2 = self.state[operands.src2].get_i8x16();
5399        let mut dst = [0i8; 16];
5400        for (i, &idx) in src2.iter().enumerate() {
5401            if (idx as usize) < 16 {
5402                dst[i] = src1[idx as usize];
5403            } else {
5404                dst[i] = 0
5405            }
5406        }
5407        self.state[operands.dst].set_i8x16(dst);
5408        ControlFlow::Continue(())
5409    }
5410
5411    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5412    fn vavground8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5413        let mut a = self.state[operands.src1].get_u8x16();
5414        let b = self.state[operands.src2].get_u8x16();
5415        for (a, b) in a.iter_mut().zip(&b) {
5416            // use wider precision to avoid overflow
5417            *a = ((u32::from(*a) + u32::from(*b) + 1) / 2) as u8;
5418        }
5419        self.state[operands.dst].set_u8x16(a);
5420        ControlFlow::Continue(())
5421    }
5422
5423    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5424    fn vavground16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5425        let mut a = self.state[operands.src1].get_u16x8();
5426        let b = self.state[operands.src2].get_u16x8();
5427        for (a, b) in a.iter_mut().zip(&b) {
5428            // use wider precision to avoid overflow
5429            *a = ((u32::from(*a) + u32::from(*b) + 1) / 2) as u16;
5430        }
5431        self.state[operands.dst].set_u16x8(a);
5432        ControlFlow::Continue(())
5433    }
5434
5435    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5436    fn veqf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5437        let a = self.state[operands.src1].get_f32x4();
5438        let b = self.state[operands.src2].get_f32x4();
5439        let mut c = [0; 4];
5440        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5441            *c = if a == b { u32::MAX } else { 0 };
5442        }
5443        self.state[operands.dst].set_u32x4(c);
5444        ControlFlow::Continue(())
5445    }
5446
5447    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5448    fn vneqf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5449        let a = self.state[operands.src1].get_f32x4();
5450        let b = self.state[operands.src2].get_f32x4();
5451        let mut c = [0; 4];
5452        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5453            *c = if a != b { u32::MAX } else { 0 };
5454        }
5455        self.state[operands.dst].set_u32x4(c);
5456        ControlFlow::Continue(())
5457    }
5458
5459    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5460    fn vltf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5461        let a = self.state[operands.src1].get_f32x4();
5462        let b = self.state[operands.src2].get_f32x4();
5463        let mut c = [0; 4];
5464        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5465            *c = if a < b { u32::MAX } else { 0 };
5466        }
5467        self.state[operands.dst].set_u32x4(c);
5468        ControlFlow::Continue(())
5469    }
5470
5471    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5472    fn vlteqf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5473        let a = self.state[operands.src1].get_f32x4();
5474        let b = self.state[operands.src2].get_f32x4();
5475        let mut c = [0; 4];
5476        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5477            *c = if a <= b { u32::MAX } else { 0 };
5478        }
5479        self.state[operands.dst].set_u32x4(c);
5480        ControlFlow::Continue(())
5481    }
5482
5483    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5484    fn veqf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5485        let a = self.state[operands.src1].get_f64x2();
5486        let b = self.state[operands.src2].get_f64x2();
5487        let mut c = [0; 2];
5488        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5489            *c = if a == b { u64::MAX } else { 0 };
5490        }
5491        self.state[operands.dst].set_u64x2(c);
5492        ControlFlow::Continue(())
5493    }
5494
5495    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5496    fn vneqf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5497        let a = self.state[operands.src1].get_f64x2();
5498        let b = self.state[operands.src2].get_f64x2();
5499        let mut c = [0; 2];
5500        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5501            *c = if a != b { u64::MAX } else { 0 };
5502        }
5503        self.state[operands.dst].set_u64x2(c);
5504        ControlFlow::Continue(())
5505    }
5506
5507    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5508    fn vltf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5509        let a = self.state[operands.src1].get_f64x2();
5510        let b = self.state[operands.src2].get_f64x2();
5511        let mut c = [0; 2];
5512        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5513            *c = if a < b { u64::MAX } else { 0 };
5514        }
5515        self.state[operands.dst].set_u64x2(c);
5516        ControlFlow::Continue(())
5517    }
5518
5519    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5520    fn vlteqf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5521        let a = self.state[operands.src1].get_f64x2();
5522        let b = self.state[operands.src2].get_f64x2();
5523        let mut c = [0; 2];
5524        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5525            *c = if a <= b { u64::MAX } else { 0 };
5526        }
5527        self.state[operands.dst].set_u64x2(c);
5528        ControlFlow::Continue(())
5529    }
5530
5531    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5532    fn vfma32x4(&mut self, dst: VReg, a: VReg, b: VReg, c: VReg) -> ControlFlow<Done> {
5533        let mut a = self.state[a].get_f32x4();
5534        let b = self.state[b].get_f32x4();
5535        let c = self.state[c].get_f32x4();
5536        for ((a, b), c) in a.iter_mut().zip(b).zip(c) {
5537            *a = a.wasm_mul_add(b, c);
5538        }
5539        self.state[dst].set_f32x4(a);
5540        ControlFlow::Continue(())
5541    }
5542
5543    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5544    fn vfma64x2(&mut self, dst: VReg, a: VReg, b: VReg, c: VReg) -> ControlFlow<Done> {
5545        let mut a = self.state[a].get_f64x2();
5546        let b = self.state[b].get_f64x2();
5547        let c = self.state[c].get_f64x2();
5548        for ((a, b), c) in a.iter_mut().zip(b).zip(c) {
5549            *a = a.wasm_mul_add(b, c);
5550        }
5551        self.state[dst].set_f64x2(a);
5552        ControlFlow::Continue(())
5553    }
5554
5555    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5556    fn vselect(
5557        &mut self,
5558        dst: VReg,
5559        cond: XReg,
5560        if_nonzero: VReg,
5561        if_zero: VReg,
5562    ) -> ControlFlow<Done> {
5563        let result = if self.state[cond].get_u32() != 0 {
5564            self.state[if_nonzero]
5565        } else {
5566            self.state[if_zero]
5567        };
5568        self.state[dst] = result;
5569        ControlFlow::Continue(())
5570    }
5571
5572    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5573    fn xadd128(
5574        &mut self,
5575        dst_lo: XReg,
5576        dst_hi: XReg,
5577        lhs_lo: XReg,
5578        lhs_hi: XReg,
5579        rhs_lo: XReg,
5580        rhs_hi: XReg,
5581    ) -> ControlFlow<Done> {
5582        let lhs = self.get_i128(lhs_lo, lhs_hi);
5583        let rhs = self.get_i128(rhs_lo, rhs_hi);
5584        let result = lhs.wrapping_add(rhs);
5585        self.set_i128(dst_lo, dst_hi, result);
5586        ControlFlow::Continue(())
5587    }
5588
5589    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5590    fn xsub128(
5591        &mut self,
5592        dst_lo: XReg,
5593        dst_hi: XReg,
5594        lhs_lo: XReg,
5595        lhs_hi: XReg,
5596        rhs_lo: XReg,
5597        rhs_hi: XReg,
5598    ) -> ControlFlow<Done> {
5599        let lhs = self.get_i128(lhs_lo, lhs_hi);
5600        let rhs = self.get_i128(rhs_lo, rhs_hi);
5601        let result = lhs.wrapping_sub(rhs);
5602        self.set_i128(dst_lo, dst_hi, result);
5603        ControlFlow::Continue(())
5604    }
5605
5606    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5607    fn xwidemul64_s(
5608        &mut self,
5609        dst_lo: XReg,
5610        dst_hi: XReg,
5611        lhs: XReg,
5612        rhs: XReg,
5613    ) -> ControlFlow<Done> {
5614        let lhs = self.state[lhs].get_i64();
5615        let rhs = self.state[rhs].get_i64();
5616        let result = i128::from(lhs).wrapping_mul(i128::from(rhs));
5617        self.set_i128(dst_lo, dst_hi, result);
5618        ControlFlow::Continue(())
5619    }
5620
5621    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5622    fn xwidemul64_u(
5623        &mut self,
5624        dst_lo: XReg,
5625        dst_hi: XReg,
5626        lhs: XReg,
5627        rhs: XReg,
5628    ) -> ControlFlow<Done> {
5629        let lhs = self.state[lhs].get_u64();
5630        let rhs = self.state[rhs].get_u64();
5631        let result = u128::from(lhs).wrapping_mul(u128::from(rhs));
5632        self.set_i128(dst_lo, dst_hi, result as i128);
5633        ControlFlow::Continue(())
5634    }
5635}