pulley_interpreter/
interp.rs

1//! Interpretation of pulley bytecode.
2
3use crate::decode::*;
4use crate::encode::Encode;
5use crate::imms::*;
6use crate::profile::{ExecutingPc, ExecutingPcRef};
7use crate::regs::*;
8use alloc::string::ToString;
9use alloc::vec::Vec;
10use core::fmt;
11use core::mem;
12use core::ops::ControlFlow;
13use core::ops::{Index, IndexMut};
14use core::ptr::NonNull;
15use pulley_macros::interp_disable_if_cfg;
16use wasmtime_math::{WasmFloat, f32_cvt_to_int_bounds, f64_cvt_to_int_bounds};
17
18mod debug;
19#[cfg(all(not(pulley_tail_calls), not(pulley_assume_llvm_makes_tail_calls)))]
20mod match_loop;
21#[cfg(any(pulley_tail_calls, pulley_assume_llvm_makes_tail_calls))]
22mod tail_loop;
23
24const DEFAULT_STACK_SIZE: usize = 1 << 20; // 1 MiB
25
26/// A virtual machine for interpreting Pulley bytecode.
27pub struct Vm {
28    state: MachineState,
29    executing_pc: ExecutingPc,
30}
31
32impl Default for Vm {
33    fn default() -> Self {
34        Vm::new()
35    }
36}
37
38impl Vm {
39    /// Create a new virtual machine with the default stack size.
40    pub fn new() -> Self {
41        Self::with_stack(DEFAULT_STACK_SIZE)
42    }
43
44    /// Create a new virtual machine with the given stack.
45    pub fn with_stack(stack_size: usize) -> Self {
46        Self {
47            state: MachineState::with_stack(stack_size),
48            executing_pc: ExecutingPc::default(),
49        }
50    }
51
52    /// Get a shared reference to this VM's machine state.
53    pub fn state(&self) -> &MachineState {
54        &self.state
55    }
56
57    /// Get an exclusive reference to this VM's machine state.
58    pub fn state_mut(&mut self) -> &mut MachineState {
59        &mut self.state
60    }
61
62    /// Call a bytecode function.
63    ///
64    /// The given `func` must point to the beginning of a valid Pulley bytecode
65    /// function.
66    ///
67    /// The given `args` must match the number and type of arguments that
68    /// function expects.
69    ///
70    /// The given `rets` must match the function's actual return types.
71    ///
72    /// Returns either the resulting values, or the PC at which a trap was
73    /// raised.
74    pub unsafe fn call<'a, T>(
75        &'a mut self,
76        func: NonNull<u8>,
77        args: &[Val],
78        rets: T,
79    ) -> DoneReason<impl Iterator<Item = Val> + use<'a, T>>
80    where
81        T: IntoIterator<Item = RegType> + 'a,
82    {
83        unsafe {
84            let lr = self.call_start(args);
85
86            match self.call_run(func) {
87                DoneReason::ReturnToHost(()) => DoneReason::ReturnToHost(self.call_end(lr, rets)),
88                DoneReason::Trap { pc, kind } => DoneReason::Trap { pc, kind },
89                DoneReason::CallIndirectHost { id, resume } => {
90                    DoneReason::CallIndirectHost { id, resume }
91                }
92            }
93        }
94    }
95
96    /// Peforms the initial part of [`Vm::call`] in setting up the `args`
97    /// provided in registers according to Pulley's ABI.
98    ///
99    /// # Return
100    ///
101    /// Returns the old `lr` register value. The current `lr` value is replaced
102    /// with a sentinel that triggers a return to the host when returned-to.
103    ///
104    /// # Unsafety
105    ///
106    /// All the same unsafety as `call` and additiionally, you must
107    /// invoke `call_run` and then `call_end` after calling `call_start`.
108    /// If you don't want to wrangle these invocations, use `call` instead
109    /// of `call_{start,run,end}`.
110    pub unsafe fn call_start<'a>(&'a mut self, args: &[Val]) -> *mut u8 {
111        // NB: make sure this method stays in sync with
112        // `PulleyMachineDeps::compute_arg_locs`!
113
114        let mut x_args = (0..16).map(|x| unsafe { XReg::new_unchecked(x) });
115        let mut f_args = (0..16).map(|f| unsafe { FReg::new_unchecked(f) });
116        #[cfg(not(pulley_disable_interp_simd))]
117        let mut v_args = (0..16).map(|v| unsafe { VReg::new_unchecked(v) });
118
119        for arg in args {
120            match arg {
121                Val::XReg(val) => match x_args.next() {
122                    Some(reg) => self.state[reg] = *val,
123                    None => todo!("stack slots"),
124                },
125                Val::FReg(val) => match f_args.next() {
126                    Some(reg) => self.state[reg] = *val,
127                    None => todo!("stack slots"),
128                },
129                #[cfg(not(pulley_disable_interp_simd))]
130                Val::VReg(val) => match v_args.next() {
131                    Some(reg) => self.state[reg] = *val,
132                    None => todo!("stack slots"),
133                },
134            }
135        }
136
137        mem::replace(&mut self.state.lr, HOST_RETURN_ADDR)
138    }
139
140    /// Peforms the internal part of [`Vm::call`] where bytecode is actually
141    /// executed.
142    ///
143    /// # Unsafety
144    ///
145    /// In addition to all the invariants documented for `call`, you
146    /// may only invoke `call_run` after invoking `call_start` to
147    /// initialize this call's arguments.
148    pub unsafe fn call_run(&mut self, pc: NonNull<u8>) -> DoneReason<()> {
149        self.state.debug_assert_done_reason_none();
150        let interpreter = Interpreter {
151            state: &mut self.state,
152            pc: unsafe { UnsafeBytecodeStream::new(pc) },
153            executing_pc: self.executing_pc.as_ref(),
154        };
155        let done = interpreter.run();
156        self.state.done_decode(done)
157    }
158
159    /// Peforms the tail end of [`Vm::call`] by returning the values as
160    /// determined by `rets` according to Pulley's ABI.
161    ///
162    /// The `old_ret` value should have been provided from `call_start`
163    /// previously.
164    ///
165    /// # Unsafety
166    ///
167    /// In addition to the invariants documented for `call`, this may
168    /// only be called after `call_run`.
169    pub unsafe fn call_end<'a>(
170        &'a mut self,
171        old_ret: *mut u8,
172        rets: impl IntoIterator<Item = RegType> + 'a,
173    ) -> impl Iterator<Item = Val> + 'a {
174        self.state.lr = old_ret;
175        // NB: make sure this method stays in sync with
176        // `PulleyMachineDeps::compute_arg_locs`!
177
178        let mut x_rets = (0..15).map(|x| unsafe { XReg::new_unchecked(x) });
179        let mut f_rets = (0..16).map(|f| unsafe { FReg::new_unchecked(f) });
180        #[cfg(not(pulley_disable_interp_simd))]
181        let mut v_rets = (0..16).map(|v| unsafe { VReg::new_unchecked(v) });
182
183        rets.into_iter().map(move |ty| match ty {
184            RegType::XReg => match x_rets.next() {
185                Some(reg) => Val::XReg(self.state[reg]),
186                None => todo!("stack slots"),
187            },
188            RegType::FReg => match f_rets.next() {
189                Some(reg) => Val::FReg(self.state[reg]),
190                None => todo!("stack slots"),
191            },
192            #[cfg(not(pulley_disable_interp_simd))]
193            RegType::VReg => match v_rets.next() {
194                Some(reg) => Val::VReg(self.state[reg]),
195                None => todo!("stack slots"),
196            },
197            #[cfg(pulley_disable_interp_simd)]
198            RegType::VReg => panic!("simd support disabled at compile time"),
199        })
200    }
201
202    /// Returns the current `fp` register value.
203    pub fn fp(&self) -> *mut u8 {
204        self.state.fp
205    }
206
207    /// Returns the current `lr` register value.
208    pub fn lr(&self) -> *mut u8 {
209        self.state.lr
210    }
211
212    /// Sets the current `fp` register value.
213    pub unsafe fn set_fp(&mut self, fp: *mut u8) {
214        self.state.fp = fp;
215    }
216
217    /// Sets the current `lr` register value.
218    pub unsafe fn set_lr(&mut self, lr: *mut u8) {
219        self.state.lr = lr;
220    }
221
222    /// Gets a handle to the currently executing program counter for this
223    /// interpreter which can be read from other threads.
224    //
225    // Note that despite this field still existing with `not(feature =
226    // "profile")` it's hidden from the public API in that scenario as it has no
227    // methods anyway.
228    #[cfg(feature = "profile")]
229    pub fn executing_pc(&self) -> &ExecutingPc {
230        &self.executing_pc
231    }
232}
233
234impl Drop for Vm {
235    fn drop(&mut self) {
236        self.executing_pc.set_done();
237    }
238}
239
240/// The type of a register in the Pulley machine state.
241#[derive(Clone, Copy, Debug)]
242pub enum RegType {
243    /// An `x` register: integers.
244    XReg,
245
246    /// An `f` register: floats.
247    FReg,
248
249    /// A `v` register: vectors.
250    VReg,
251}
252
253/// A value that can be stored in a register.
254#[derive(Clone, Copy, Debug)]
255pub enum Val {
256    /// An `x` register value: integers.
257    XReg(XRegVal),
258
259    /// An `f` register value: floats.
260    FReg(FRegVal),
261
262    /// A `v` register value: vectors.
263    #[cfg(not(pulley_disable_interp_simd))]
264    VReg(VRegVal),
265}
266
267impl fmt::LowerHex for Val {
268    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
269        match self {
270            Val::XReg(v) => fmt::LowerHex::fmt(v, f),
271            Val::FReg(v) => fmt::LowerHex::fmt(v, f),
272            #[cfg(not(pulley_disable_interp_simd))]
273            Val::VReg(v) => fmt::LowerHex::fmt(v, f),
274        }
275    }
276}
277
278impl From<XRegVal> for Val {
279    fn from(value: XRegVal) -> Self {
280        Val::XReg(value)
281    }
282}
283
284impl From<u64> for Val {
285    fn from(value: u64) -> Self {
286        XRegVal::new_u64(value).into()
287    }
288}
289
290impl From<u32> for Val {
291    fn from(value: u32) -> Self {
292        XRegVal::new_u32(value).into()
293    }
294}
295
296impl From<i64> for Val {
297    fn from(value: i64) -> Self {
298        XRegVal::new_i64(value).into()
299    }
300}
301
302impl From<i32> for Val {
303    fn from(value: i32) -> Self {
304        XRegVal::new_i32(value).into()
305    }
306}
307
308impl<T> From<*mut T> for Val {
309    fn from(value: *mut T) -> Self {
310        XRegVal::new_ptr(value).into()
311    }
312}
313
314impl From<FRegVal> for Val {
315    fn from(value: FRegVal) -> Self {
316        Val::FReg(value)
317    }
318}
319
320impl From<f64> for Val {
321    fn from(value: f64) -> Self {
322        FRegVal::new_f64(value).into()
323    }
324}
325
326impl From<f32> for Val {
327    fn from(value: f32) -> Self {
328        FRegVal::new_f32(value).into()
329    }
330}
331
332#[cfg(not(pulley_disable_interp_simd))]
333impl From<VRegVal> for Val {
334    fn from(value: VRegVal) -> Self {
335        Val::VReg(value)
336    }
337}
338
339/// An `x` register value: integers.
340#[derive(Copy, Clone)]
341pub struct XRegVal(XRegUnion);
342
343impl PartialEq for XRegVal {
344    fn eq(&self, other: &Self) -> bool {
345        self.get_u64() == other.get_u64()
346    }
347}
348
349impl Eq for XRegVal {}
350
351impl fmt::Debug for XRegVal {
352    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
353        f.debug_struct("XRegVal")
354            .field("as_u64", &self.get_u64())
355            .finish()
356    }
357}
358
359impl fmt::LowerHex for XRegVal {
360    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
361        fmt::LowerHex::fmt(&self.get_u64(), f)
362    }
363}
364
365/// Contents of an "x" register, or a general-purpose register.
366///
367/// This is represented as a Rust `union` to make it easier to access typed
368/// views of this, notably the `ptr` field which enables preserving a bit of
369/// provenance for Rust for values stored as a pointer and read as a pointer.
370///
371/// Note that the actual in-memory representation of this value is handled
372/// carefully at this time. Pulley bytecode exposes the ability to store a
373/// 32-bit result into a register and then read the 64-bit contents of the
374/// register. This leaves us with the question of what to do with the upper bits
375/// of the register when the 32-bit result is generated. Possibilities for
376/// handling this are:
377///
378/// 1. Do nothing, just store the 32-bit value. The problem with this approach
379///    means that the "upper bits" are now endianness-dependent. That means that
380///    the state of the register is now platform-dependent.
381/// 2. Sign or zero-extend. This restores platform-independent behavior but
382///    requires an extra store on 32-bit platforms because they can probably
383///    only store 32-bits at a time.
384/// 3. Always store the values in this union as little-endian. This means that
385///    big-endian platforms have to do a byte-swap but otherwise it has
386///    platform-independent behavior.
387///
388/// This union chooses route (3) at this time where the values here are always
389/// stored in little-endian form (even the `ptr` field). That guarantees
390/// cross-platform behavior while also minimizing the amount of data stored on
391/// writes.
392///
393/// In the future we may wish to benchmark this and possibly change this.
394/// Technically Cranelift-generated bytecode should never rely on the upper bits
395/// of a register if it didn't previously write them so this in theory doesn't
396/// actually matter for Cranelift or wasm semantics. The only cost right now is
397/// to big-endian platforms though and it's not certain how crucial performance
398/// will be there.
399///
400/// One final note is that this notably contrasts with native CPUs where
401/// native ISAs like RISC-V specifically define the entire register on every
402/// instruction, even if only the low half contains a significant result. Pulley
403/// is unlikely to become out-of-order within the CPU itself as it's interpreted
404/// meaning that severing data-dependencies with previous operations is
405/// hypothesized to not be too important. If this is ever a problem though it
406/// could increase the likelihood we go for route (2) above instead (or maybe
407/// even (1)).
408#[derive(Copy, Clone)]
409union XRegUnion {
410    i32: i32,
411    u32: u32,
412    i64: i64,
413    u64: u64,
414
415    // Note that this is intentionally `usize` and not an actual pointer like
416    // `*mut u8`. The reason for this is that provenance is required in Rust for
417    // pointers but Cranelift has no pointer type and thus no concept of
418    // provenance. That means that at-rest it's not known whether the value has
419    // provenance or not and basically means that Pulley is required to use
420    // "permissive provenance" in Rust as opposed to strict provenance.
421    //
422    // That's more-or-less a long-winded way of saying that storage of a pointer
423    // in this value is done with `.expose_provenance()` and reading a pointer
424    // uses `with_exposed_provenance_mut(..)`.
425    ptr: usize,
426}
427
428impl Default for XRegVal {
429    fn default() -> Self {
430        Self(unsafe { mem::zeroed() })
431    }
432}
433
434#[expect(missing_docs, reason = "self-describing methods")]
435impl XRegVal {
436    pub fn new_i32(x: i32) -> Self {
437        let mut val = XRegVal::default();
438        val.set_i32(x);
439        val
440    }
441
442    pub fn new_u32(x: u32) -> Self {
443        let mut val = XRegVal::default();
444        val.set_u32(x);
445        val
446    }
447
448    pub fn new_i64(x: i64) -> Self {
449        let mut val = XRegVal::default();
450        val.set_i64(x);
451        val
452    }
453
454    pub fn new_u64(x: u64) -> Self {
455        let mut val = XRegVal::default();
456        val.set_u64(x);
457        val
458    }
459
460    pub fn new_ptr<T>(ptr: *mut T) -> Self {
461        let mut val = XRegVal::default();
462        val.set_ptr(ptr);
463        val
464    }
465
466    pub fn get_i32(&self) -> i32 {
467        let x = unsafe { self.0.i32 };
468        i32::from_le(x)
469    }
470
471    pub fn get_u32(&self) -> u32 {
472        let x = unsafe { self.0.u32 };
473        u32::from_le(x)
474    }
475
476    pub fn get_i64(&self) -> i64 {
477        let x = unsafe { self.0.i64 };
478        i64::from_le(x)
479    }
480
481    pub fn get_u64(&self) -> u64 {
482        let x = unsafe { self.0.u64 };
483        u64::from_le(x)
484    }
485
486    pub fn get_ptr<T>(&self) -> *mut T {
487        let ptr = unsafe { self.0.ptr };
488        core::ptr::with_exposed_provenance_mut(usize::from_le(ptr))
489    }
490
491    pub fn set_i32(&mut self, x: i32) {
492        self.0.i32 = x.to_le();
493    }
494
495    pub fn set_u32(&mut self, x: u32) {
496        self.0.u32 = x.to_le();
497    }
498
499    pub fn set_i64(&mut self, x: i64) {
500        self.0.i64 = x.to_le();
501    }
502
503    pub fn set_u64(&mut self, x: u64) {
504        self.0.u64 = x.to_le();
505    }
506
507    pub fn set_ptr<T>(&mut self, ptr: *mut T) {
508        self.0.ptr = ptr.expose_provenance().to_le();
509    }
510}
511
512/// An `f` register value: floats.
513#[derive(Copy, Clone)]
514pub struct FRegVal(FRegUnion);
515
516impl fmt::Debug for FRegVal {
517    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
518        f.debug_struct("FRegVal")
519            .field("as_f32", &self.get_f32())
520            .field("as_f64", &self.get_f64())
521            .finish()
522    }
523}
524
525impl fmt::LowerHex for FRegVal {
526    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
527        fmt::LowerHex::fmt(&self.get_f64().to_bits(), f)
528    }
529}
530
531// NB: like `XRegUnion` values here are always little-endian, see the
532// documentation above for more details.
533#[derive(Copy, Clone)]
534union FRegUnion {
535    f32: u32,
536    f64: u64,
537}
538
539impl Default for FRegVal {
540    fn default() -> Self {
541        Self(unsafe { mem::zeroed() })
542    }
543}
544
545#[expect(missing_docs, reason = "self-describing methods")]
546impl FRegVal {
547    pub fn new_f32(f: f32) -> Self {
548        let mut val = Self::default();
549        val.set_f32(f);
550        val
551    }
552
553    pub fn new_f64(f: f64) -> Self {
554        let mut val = Self::default();
555        val.set_f64(f);
556        val
557    }
558
559    pub fn get_f32(&self) -> f32 {
560        let val = unsafe { self.0.f32 };
561        f32::from_le_bytes(val.to_ne_bytes())
562    }
563
564    pub fn get_f64(&self) -> f64 {
565        let val = unsafe { self.0.f64 };
566        f64::from_le_bytes(val.to_ne_bytes())
567    }
568
569    pub fn set_f32(&mut self, val: f32) {
570        self.0.f32 = u32::from_ne_bytes(val.to_le_bytes());
571    }
572
573    pub fn set_f64(&mut self, val: f64) {
574        self.0.f64 = u64::from_ne_bytes(val.to_le_bytes());
575    }
576}
577
578/// A `v` register value: vectors.
579#[derive(Copy, Clone)]
580#[cfg(not(pulley_disable_interp_simd))]
581pub struct VRegVal(VRegUnion);
582
583#[cfg(not(pulley_disable_interp_simd))]
584impl fmt::Debug for VRegVal {
585    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
586        f.debug_struct("VRegVal")
587            .field("as_u128", &unsafe { self.0.u128 })
588            .finish()
589    }
590}
591
592#[cfg(not(pulley_disable_interp_simd))]
593impl fmt::LowerHex for VRegVal {
594    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
595        fmt::LowerHex::fmt(unsafe { &self.0.u128 }, f)
596    }
597}
598
599/// 128-bit vector registers.
600///
601/// This register is always stored in little-endian order and has different
602/// constraints than `XRegVal` and `FRegVal` above. Notably all fields of this
603/// union are the same width so all bits are always defined. Note that
604/// little-endian is required though so bitcasts between different shapes of
605/// vectors works. This union cannot be stored in big-endian.
606#[derive(Copy, Clone)]
607#[repr(align(16))]
608#[cfg(not(pulley_disable_interp_simd))]
609union VRegUnion {
610    u128: u128,
611    i8x16: [i8; 16],
612    i16x8: [i16; 8],
613    i32x4: [i32; 4],
614    i64x2: [i64; 2],
615    u8x16: [u8; 16],
616    u16x8: [u16; 8],
617    u32x4: [u32; 4],
618    u64x2: [u64; 2],
619    // Note that these are `u32` and `u64`, not f32/f64. That's only because
620    // f32/f64 don't have `.to_le()` and `::from_le()` so need to go through the
621    // bits anyway.
622    f32x4: [u32; 4],
623    f64x2: [u64; 2],
624}
625
626#[cfg(not(pulley_disable_interp_simd))]
627impl Default for VRegVal {
628    fn default() -> Self {
629        Self(unsafe { mem::zeroed() })
630    }
631}
632
633#[expect(missing_docs, reason = "self-describing methods")]
634#[cfg(not(pulley_disable_interp_simd))]
635impl VRegVal {
636    pub fn new_u128(i: u128) -> Self {
637        let mut val = Self::default();
638        val.set_u128(i);
639        val
640    }
641
642    pub fn get_u128(&self) -> u128 {
643        let val = unsafe { self.0.u128 };
644        u128::from_le(val)
645    }
646
647    pub fn set_u128(&mut self, val: u128) {
648        self.0.u128 = val.to_le();
649    }
650
651    fn get_i8x16(&self) -> [i8; 16] {
652        let val = unsafe { self.0.i8x16 };
653        val.map(|e| i8::from_le(e))
654    }
655
656    fn set_i8x16(&mut self, val: [i8; 16]) {
657        self.0.i8x16 = val.map(|e| e.to_le());
658    }
659
660    fn get_u8x16(&self) -> [u8; 16] {
661        let val = unsafe { self.0.u8x16 };
662        val.map(|e| u8::from_le(e))
663    }
664
665    fn set_u8x16(&mut self, val: [u8; 16]) {
666        self.0.u8x16 = val.map(|e| e.to_le());
667    }
668
669    fn get_i16x8(&self) -> [i16; 8] {
670        let val = unsafe { self.0.i16x8 };
671        val.map(|e| i16::from_le(e))
672    }
673
674    fn set_i16x8(&mut self, val: [i16; 8]) {
675        self.0.i16x8 = val.map(|e| e.to_le());
676    }
677
678    fn get_u16x8(&self) -> [u16; 8] {
679        let val = unsafe { self.0.u16x8 };
680        val.map(|e| u16::from_le(e))
681    }
682
683    fn set_u16x8(&mut self, val: [u16; 8]) {
684        self.0.u16x8 = val.map(|e| e.to_le());
685    }
686
687    fn get_i32x4(&self) -> [i32; 4] {
688        let val = unsafe { self.0.i32x4 };
689        val.map(|e| i32::from_le(e))
690    }
691
692    fn set_i32x4(&mut self, val: [i32; 4]) {
693        self.0.i32x4 = val.map(|e| e.to_le());
694    }
695
696    fn get_u32x4(&self) -> [u32; 4] {
697        let val = unsafe { self.0.u32x4 };
698        val.map(|e| u32::from_le(e))
699    }
700
701    fn set_u32x4(&mut self, val: [u32; 4]) {
702        self.0.u32x4 = val.map(|e| e.to_le());
703    }
704
705    fn get_i64x2(&self) -> [i64; 2] {
706        let val = unsafe { self.0.i64x2 };
707        val.map(|e| i64::from_le(e))
708    }
709
710    fn set_i64x2(&mut self, val: [i64; 2]) {
711        self.0.i64x2 = val.map(|e| e.to_le());
712    }
713
714    fn get_u64x2(&self) -> [u64; 2] {
715        let val = unsafe { self.0.u64x2 };
716        val.map(|e| u64::from_le(e))
717    }
718
719    fn set_u64x2(&mut self, val: [u64; 2]) {
720        self.0.u64x2 = val.map(|e| e.to_le());
721    }
722
723    fn get_f64x2(&self) -> [f64; 2] {
724        let val = unsafe { self.0.f64x2 };
725        val.map(|e| f64::from_bits(u64::from_le(e)))
726    }
727
728    fn set_f64x2(&mut self, val: [f64; 2]) {
729        self.0.f64x2 = val.map(|e| e.to_bits().to_le());
730    }
731
732    fn get_f32x4(&self) -> [f32; 4] {
733        let val = unsafe { self.0.f32x4 };
734        val.map(|e| f32::from_bits(u32::from_le(e)))
735    }
736
737    fn set_f32x4(&mut self, val: [f32; 4]) {
738        self.0.f32x4 = val.map(|e| e.to_bits().to_le());
739    }
740}
741
742/// The machine state for a Pulley virtual machine: the various registers and
743/// stack.
744pub struct MachineState {
745    x_regs: [XRegVal; XReg::RANGE.end as usize],
746    f_regs: [FRegVal; FReg::RANGE.end as usize],
747    #[cfg(not(pulley_disable_interp_simd))]
748    v_regs: [VRegVal; VReg::RANGE.end as usize],
749    fp: *mut u8,
750    lr: *mut u8,
751    stack: Stack,
752    done_reason: Option<DoneReason<()>>,
753}
754
755unsafe impl Send for MachineState {}
756unsafe impl Sync for MachineState {}
757
758/// Helper structure to store the state of the Pulley stack.
759///
760/// The Pulley stack notably needs to be a 16-byte aligned allocation on the
761/// host to ensure that addresses handed out are indeed 16-byte aligned. This is
762/// done with a custom `Vec<T>` internally where `T` has size and align of 16.
763/// This is manually done with a helper `Align16` type below.
764struct Stack {
765    storage: Vec<Align16>,
766}
767
768/// Helper type used with `Stack` above.
769#[derive(Copy, Clone)]
770#[repr(align(16))]
771struct Align16 {
772    // Just here to give the structure a size of 16. The alignment is always 16
773    // regardless of what the host platform's alignment of u128 is.
774    _unused: u128,
775}
776
777impl Stack {
778    /// Creates a new stack which will have a byte size of at least `size`.
779    ///
780    /// The allocated stack might be slightly larger due to rounding necessary.
781    fn new(size: usize) -> Stack {
782        Stack {
783            // Round up `size` to the nearest multiple of 16. Note that the
784            // stack is also allocated here but not initialized, and that's
785            // intentional as pulley bytecode should always initialize the stack
786            // before use.
787            storage: Vec::with_capacity((size + 15) / 16),
788        }
789    }
790
791    /// Returns a pointer to the top of the stack (the highest address).
792    ///
793    /// Note that the returned pointer has provenance for the entire stack
794    /// allocation, however, not just the top.
795    fn top(&mut self) -> *mut u8 {
796        let len = self.len();
797        unsafe { self.base().add(len) }
798    }
799
800    /// Returns a pointer to the base of the stack (the lowest address).
801    ///
802    /// Note that the returned pointer has provenance for the entire stack
803    /// allocation, however, not just the top.
804    fn base(&mut self) -> *mut u8 {
805        self.storage.as_mut_ptr().cast::<u8>()
806    }
807
808    /// Returns the length, in bytes, of this stack allocation.
809    fn len(&self) -> usize {
810        self.storage.capacity() * mem::size_of::<Align16>()
811    }
812}
813
814impl fmt::Debug for MachineState {
815    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
816        let MachineState {
817            x_regs,
818            f_regs,
819            #[cfg(not(pulley_disable_interp_simd))]
820            v_regs,
821            stack: _,
822            done_reason: _,
823            fp: _,
824            lr: _,
825        } = self;
826
827        struct RegMap<'a, R>(&'a [R], fn(u8) -> alloc::string::String);
828
829        impl<R: fmt::Debug> fmt::Debug for RegMap<'_, R> {
830            fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
831                let mut f = f.debug_map();
832                for (i, r) in self.0.iter().enumerate() {
833                    f.entry(&(self.1)(i as u8), r);
834                }
835                f.finish()
836            }
837        }
838
839        let mut f = f.debug_struct("MachineState");
840
841        f.field(
842            "x_regs",
843            &RegMap(x_regs, |i| XReg::new(i).unwrap().to_string()),
844        )
845        .field(
846            "f_regs",
847            &RegMap(f_regs, |i| FReg::new(i).unwrap().to_string()),
848        );
849        #[cfg(not(pulley_disable_interp_simd))]
850        f.field(
851            "v_regs",
852            &RegMap(v_regs, |i| VReg::new(i).unwrap().to_string()),
853        );
854        f.finish_non_exhaustive()
855    }
856}
857
858macro_rules! index_reg {
859    ($reg_ty:ty,$value_ty:ty,$field:ident) => {
860        impl Index<$reg_ty> for Vm {
861            type Output = $value_ty;
862
863            fn index(&self, reg: $reg_ty) -> &Self::Output {
864                &self.state[reg]
865            }
866        }
867
868        impl IndexMut<$reg_ty> for Vm {
869            fn index_mut(&mut self, reg: $reg_ty) -> &mut Self::Output {
870                &mut self.state[reg]
871            }
872        }
873
874        impl Index<$reg_ty> for MachineState {
875            type Output = $value_ty;
876
877            fn index(&self, reg: $reg_ty) -> &Self::Output {
878                &self.$field[reg.index()]
879            }
880        }
881
882        impl IndexMut<$reg_ty> for MachineState {
883            fn index_mut(&mut self, reg: $reg_ty) -> &mut Self::Output {
884                &mut self.$field[reg.index()]
885            }
886        }
887    };
888}
889
890index_reg!(XReg, XRegVal, x_regs);
891index_reg!(FReg, FRegVal, f_regs);
892#[cfg(not(pulley_disable_interp_simd))]
893index_reg!(VReg, VRegVal, v_regs);
894
895/// Sentinel return address that signals the end of the call stack.
896const HOST_RETURN_ADDR: *mut u8 = usize::MAX as *mut u8;
897
898impl MachineState {
899    fn with_stack(stack_size: usize) -> Self {
900        let mut state = Self {
901            x_regs: [Default::default(); XReg::RANGE.end as usize],
902            f_regs: Default::default(),
903            #[cfg(not(pulley_disable_interp_simd))]
904            v_regs: Default::default(),
905            stack: Stack::new(stack_size),
906            done_reason: None,
907            fp: HOST_RETURN_ADDR,
908            lr: HOST_RETURN_ADDR,
909        };
910
911        let sp = state.stack.top();
912        state[XReg::sp] = XRegVal::new_ptr(sp);
913
914        state
915    }
916}
917
918/// Inner private module to prevent creation of the `Done` structure outside of
919/// this module.
920mod done {
921    use super::{Encode, Interpreter, MachineState};
922    use core::ops::ControlFlow;
923    use core::ptr::NonNull;
924
925    /// Zero-sized sentinel indicating that pulley execution has halted.
926    ///
927    /// The reason for halting is stored in `MachineState`.
928    #[derive(Copy, Clone, Debug, PartialEq, Eq)]
929    pub struct Done {
930        _priv: (),
931    }
932
933    /// Reason that the pulley interpreter has ceased execution.
934    pub enum DoneReason<T> {
935        /// A trap happened at this bytecode instruction.
936        Trap {
937            /// Which instruction is raising this trap.
938            pc: NonNull<u8>,
939            /// The kind of trap being raised, if known.
940            kind: Option<TrapKind>,
941        },
942        /// The `call_indirect_host` instruction was executed.
943        CallIndirectHost {
944            /// The payload of `call_indirect_host`.
945            id: u8,
946            /// Where to resume execution after the host has finished.
947            resume: NonNull<u8>,
948        },
949        /// Pulley has finished and the provided value is being returned.
950        ReturnToHost(T),
951    }
952
953    /// Stored within `DoneReason::Trap`.
954    #[expect(missing_docs, reason = "self-describing variants")]
955    pub enum TrapKind {
956        DivideByZero,
957        IntegerOverflow,
958        BadConversionToInteger,
959        MemoryOutOfBounds,
960        DisabledOpcode,
961    }
962
963    impl MachineState {
964        pub(super) fn debug_assert_done_reason_none(&mut self) {
965            debug_assert!(self.done_reason.is_none());
966        }
967
968        pub(super) fn done_decode(&mut self, Done { _priv }: Done) -> DoneReason<()> {
969            self.done_reason.take().unwrap()
970        }
971    }
972
973    impl Interpreter<'_> {
974        /// Finishes execution by recording `DoneReason::Trap`.
975        ///
976        /// This method takes an `I` generic parameter indicating which
977        /// instruction is executing this function and generating a trap. That's
978        /// used to go backwards from the current `pc` which is just beyond the
979        /// instruction to point to the instruction itself in the trap metadata
980        /// returned from the interpreter.
981        #[cold]
982        pub fn done_trap<I: Encode>(&mut self) -> ControlFlow<Done> {
983            self.done_trap_kind::<I>(None)
984        }
985
986        /// Same as `done_trap` but with an explicit `TrapKind`.
987        #[cold]
988        pub fn done_trap_kind<I: Encode>(&mut self, kind: Option<TrapKind>) -> ControlFlow<Done> {
989            let pc = self.current_pc::<I>();
990            self.state.done_reason = Some(DoneReason::Trap { pc, kind });
991            ControlFlow::Break(Done { _priv: () })
992        }
993
994        /// Finishes execution by recording `DoneReason::CallIndirectHost`.
995        #[cold]
996        pub fn done_call_indirect_host(&mut self, id: u8) -> ControlFlow<Done> {
997            self.state.done_reason = Some(DoneReason::CallIndirectHost {
998                id,
999                resume: self.pc.as_ptr(),
1000            });
1001            ControlFlow::Break(Done { _priv: () })
1002        }
1003
1004        /// Finishes execution by recording `DoneReason::ReturnToHost`.
1005        #[cold]
1006        pub fn done_return_to_host(&mut self) -> ControlFlow<Done> {
1007            self.state.done_reason = Some(DoneReason::ReturnToHost(()));
1008            ControlFlow::Break(Done { _priv: () })
1009        }
1010    }
1011}
1012
1013use done::Done;
1014pub use done::{DoneReason, TrapKind};
1015
1016struct Interpreter<'a> {
1017    state: &'a mut MachineState,
1018    pc: UnsafeBytecodeStream,
1019    executing_pc: ExecutingPcRef<'a>,
1020}
1021
1022impl Interpreter<'_> {
1023    /// Performs a relative jump of `offset` bytes from the current instruction.
1024    ///
1025    /// This will jump from the start of the current instruction, identified by
1026    /// `I`, `offset` bytes away. Note that the `self.pc` at the start of this
1027    /// function actually points to the instruction after this one so `I` is
1028    /// necessary to go back to ourselves after which we then go `offset` away.
1029    #[inline]
1030    fn pc_rel_jump<I: Encode>(&mut self, offset: PcRelOffset) -> ControlFlow<Done> {
1031        let offset = isize::try_from(i32::from(offset)).unwrap();
1032        let my_pc = self.current_pc::<I>();
1033        self.pc = unsafe { UnsafeBytecodeStream::new(my_pc.offset(offset)) };
1034        ControlFlow::Continue(())
1035    }
1036
1037    /// Returns the PC of the current instruction where `I` is the static type
1038    /// representing the current instruction.
1039    fn current_pc<I: Encode>(&self) -> NonNull<u8> {
1040        unsafe { self.pc.offset(-isize::from(I::WIDTH)).as_ptr() }
1041    }
1042
1043    /// `sp -= size_of::<T>(); *sp = val;`
1044    ///
1045    /// Note that `I` is the instruction which is pushing data to use if a trap
1046    /// is generated.
1047    #[must_use]
1048    fn push<I: Encode, T>(&mut self, val: T) -> ControlFlow<Done> {
1049        let new_sp = self.state[XReg::sp].get_ptr::<T>().wrapping_sub(1);
1050        self.set_sp::<I>(new_sp.cast())?;
1051        unsafe {
1052            new_sp.write_unaligned(val);
1053        }
1054        ControlFlow::Continue(())
1055    }
1056
1057    /// `ret = *sp; sp -= size_of::<T>()`
1058    fn pop<T>(&mut self) -> T {
1059        let sp = self.state[XReg::sp].get_ptr::<T>();
1060        let val = unsafe { sp.read_unaligned() };
1061        self.set_sp_unchecked(sp.wrapping_add(1));
1062        val
1063    }
1064
1065    /// Sets the stack pointer to the `sp` provided.
1066    ///
1067    /// Returns a trap if this would result in stack overflow, or if `sp` is
1068    /// beneath the base pointer of `self.state.stack`.
1069    ///
1070    /// The `I` parameter here is the instruction that is setting the stack
1071    /// pointer and is used to calculate this instruction's own `pc` if this
1072    /// instruction traps.
1073    #[must_use]
1074    fn set_sp<I: Encode>(&mut self, sp: *mut u8) -> ControlFlow<Done> {
1075        let sp_raw = sp as usize;
1076        let base_raw = self.state.stack.base() as usize;
1077        if sp_raw < base_raw {
1078            return self.done_trap::<I>();
1079        }
1080        self.set_sp_unchecked(sp);
1081        ControlFlow::Continue(())
1082    }
1083
1084    /// Same as `set_sp` but does not check to see if `sp` is in-bounds. Should
1085    /// only be used with stack increment operations such as `pop`.
1086    fn set_sp_unchecked<T>(&mut self, sp: *mut T) {
1087        if cfg!(debug_assertions) {
1088            let sp_raw = sp as usize;
1089            let base = self.state.stack.base() as usize;
1090            let end = base + self.state.stack.len();
1091            assert!(base <= sp_raw && sp_raw <= end);
1092        }
1093        self.state[XReg::sp].set_ptr(sp);
1094    }
1095
1096    /// Loads a value of `T` using native-endian byte ordering from the `addr`
1097    /// specified.
1098    ///
1099    /// The `I` type parameter is the instruction issuing this load which is
1100    /// used in case of traps to calculate the trapping pc.
1101    ///
1102    /// Returns `ControlFlow::Break` if a trap happens or
1103    /// `ControlFlow::Continue` if the value was loaded successfully.
1104    ///
1105    /// # Unsafety
1106    ///
1107    /// Safety of this method relies on the safety of the original bytecode
1108    /// itself and correctly annotating both `T` and `I`.
1109    #[must_use]
1110    unsafe fn load_ne<T, I: Encode>(&mut self, addr: impl AddressingMode) -> ControlFlow<Done, T> {
1111        unsafe { addr.load_ne::<T, I>(self) }
1112    }
1113
1114    /// Stores a `val` to the `addr` specified.
1115    ///
1116    /// The `I` type parameter is the instruction issuing this store which is
1117    /// used in case of traps to calculate the trapping pc.
1118    ///
1119    /// Returns `ControlFlow::Break` if a trap happens or
1120    /// `ControlFlow::Continue` if the value was stored successfully.
1121    ///
1122    /// # Unsafety
1123    ///
1124    /// Safety of this method relies on the safety of the original bytecode
1125    /// itself and correctly annotating both `T` and `I`.
1126    #[must_use]
1127    unsafe fn store_ne<T, I: Encode>(
1128        &mut self,
1129        addr: impl AddressingMode,
1130        val: T,
1131    ) -> ControlFlow<Done> {
1132        unsafe { addr.store_ne::<T, I>(self, val) }
1133    }
1134
1135    fn check_xnn_from_f32<I: Encode>(
1136        &mut self,
1137        val: f32,
1138        (lo, hi): (f32, f32),
1139    ) -> ControlFlow<Done> {
1140        self.check_xnn_from_f64::<I>(val.into(), (lo.into(), hi.into()))
1141    }
1142
1143    fn check_xnn_from_f64<I: Encode>(
1144        &mut self,
1145        val: f64,
1146        (lo, hi): (f64, f64),
1147    ) -> ControlFlow<Done> {
1148        if val != val {
1149            return self.done_trap_kind::<I>(Some(TrapKind::BadConversionToInteger));
1150        }
1151        let val = val.wasm_trunc();
1152        if val <= lo || val >= hi {
1153            return self.done_trap_kind::<I>(Some(TrapKind::IntegerOverflow));
1154        }
1155        ControlFlow::Continue(())
1156    }
1157
1158    #[cfg(not(pulley_disable_interp_simd))]
1159    fn get_i128(&self, lo: XReg, hi: XReg) -> i128 {
1160        let lo = self.state[lo].get_u64();
1161        let hi = self.state[hi].get_i64();
1162        i128::from(lo) | (i128::from(hi) << 64)
1163    }
1164
1165    #[cfg(not(pulley_disable_interp_simd))]
1166    fn set_i128(&mut self, lo: XReg, hi: XReg, val: i128) {
1167        self.state[lo].set_u64(val as u64);
1168        self.state[hi].set_u64((val >> 64) as u64);
1169    }
1170
1171    fn record_executing_pc_for_profiling(&mut self) {
1172        // Note that this is a no-op if `feature = "profile"` is disabled.
1173        self.executing_pc.record(self.pc.as_ptr().as_ptr() as usize);
1174    }
1175}
1176
1177/// Helper trait to encompass the various addressing modes of Pulley.
1178trait AddressingMode: Sized {
1179    /// Calculates the native host address `*mut T` corresponding to this
1180    /// addressing mode.
1181    ///
1182    /// # Safety
1183    ///
1184    /// Relies on the original bytecode being safe to execute as this will
1185    /// otherwise perform unsafe byte offsets for example which requires the
1186    /// original bytecode to be correct.
1187    #[must_use]
1188    unsafe fn addr<T, I: Encode>(self, i: &mut Interpreter<'_>) -> ControlFlow<Done, *mut T>;
1189
1190    /// Loads a value of `T` from this address, using native-endian byte order.
1191    ///
1192    /// For more information see [`Interpreter::load_ne`].
1193    #[must_use]
1194    unsafe fn load_ne<T, I: Encode>(self, i: &mut Interpreter<'_>) -> ControlFlow<Done, T> {
1195        let ret = unsafe { self.addr::<T, I>(i)?.read_unaligned() };
1196        ControlFlow::Continue(ret)
1197    }
1198
1199    /// Stores a `val` to this address, using native-endian byte order.
1200    ///
1201    /// For more information see [`Interpreter::store_ne`].
1202    #[must_use]
1203    unsafe fn store_ne<T, I: Encode>(self, i: &mut Interpreter<'_>, val: T) -> ControlFlow<Done> {
1204        unsafe {
1205            self.addr::<T, I>(i)?.write_unaligned(val);
1206        }
1207        ControlFlow::Continue(())
1208    }
1209}
1210
1211impl AddressingMode for AddrO32 {
1212    unsafe fn addr<T, I: Encode>(self, i: &mut Interpreter<'_>) -> ControlFlow<Done, *mut T> {
1213        // Note that this addressing mode cannot return `ControlFlow::Break`
1214        // which is intentional. It's expected that LLVM optimizes away any
1215        // branches callers have.
1216        unsafe {
1217            ControlFlow::Continue(
1218                i.state[self.addr]
1219                    .get_ptr::<T>()
1220                    .byte_offset(self.offset as isize),
1221            )
1222        }
1223    }
1224}
1225
1226impl AddressingMode for AddrZ {
1227    unsafe fn addr<T, I: Encode>(self, i: &mut Interpreter<'_>) -> ControlFlow<Done, *mut T> {
1228        // This addressing mode defines loading/storing to the null address as
1229        // a trap, but all other addresses are allowed.
1230        let host_addr = i.state[self.addr].get_ptr::<T>();
1231        if host_addr.is_null() {
1232            i.done_trap_kind::<I>(Some(TrapKind::MemoryOutOfBounds))?;
1233            unreachable!();
1234        }
1235        unsafe {
1236            let addr = host_addr.byte_offset(self.offset as isize);
1237            ControlFlow::Continue(addr)
1238        }
1239    }
1240}
1241
1242impl AddressingMode for AddrG32 {
1243    unsafe fn addr<T, I: Encode>(self, i: &mut Interpreter<'_>) -> ControlFlow<Done, *mut T> {
1244        // Test if `bound - offset - T` is less than the wasm address to
1245        // generate a trap. It's a guarantee of this instruction that these
1246        // subtractions don't overflow.
1247        let bound = i.state[self.host_heap_bound].get_u64() as usize;
1248        let offset = usize::from(self.offset);
1249        let wasm_addr = i.state[self.wasm_addr].get_u32() as usize;
1250        if wasm_addr > bound - offset - size_of::<T>() {
1251            i.done_trap_kind::<I>(Some(TrapKind::MemoryOutOfBounds))?;
1252            unreachable!();
1253        }
1254        unsafe {
1255            let addr = i.state[self.host_heap_base]
1256                .get_ptr::<T>()
1257                .byte_add(wasm_addr)
1258                .byte_add(offset);
1259            ControlFlow::Continue(addr)
1260        }
1261    }
1262}
1263
1264impl AddressingMode for AddrG32Bne {
1265    unsafe fn addr<T, I: Encode>(self, i: &mut Interpreter<'_>) -> ControlFlow<Done, *mut T> {
1266        // Same as `AddrG32` above except that the bound is loaded from memory.
1267        let bound = unsafe {
1268            *i.state[self.host_heap_bound_addr]
1269                .get_ptr::<usize>()
1270                .byte_add(usize::from(self.host_heap_bound_offset))
1271        };
1272        let wasm_addr = i.state[self.wasm_addr].get_u32() as usize;
1273        let offset = usize::from(self.offset);
1274        if wasm_addr > bound - offset - size_of::<T>() {
1275            i.done_trap_kind::<I>(Some(TrapKind::MemoryOutOfBounds))?;
1276            unreachable!();
1277        }
1278        unsafe {
1279            let addr = i.state[self.host_heap_base]
1280                .get_ptr::<T>()
1281                .byte_add(wasm_addr)
1282                .byte_add(offset);
1283            ControlFlow::Continue(addr)
1284        }
1285    }
1286}
1287
1288#[test]
1289fn simple_push_pop() {
1290    let mut state = MachineState::with_stack(16);
1291    let pc = ExecutingPc::default();
1292    unsafe {
1293        let mut bytecode = [0; 10];
1294        let mut i = Interpreter {
1295            state: &mut state,
1296            // this isn't actually read so just manufacture a dummy one
1297            pc: UnsafeBytecodeStream::new(NonNull::new(bytecode.as_mut_ptr().offset(4)).unwrap()),
1298            executing_pc: pc.as_ref(),
1299        };
1300        assert!(i.push::<crate::Ret, _>(0_i32).is_continue());
1301        assert_eq!(i.pop::<i32>(), 0_i32);
1302        assert!(i.push::<crate::Ret, _>(1_i32).is_continue());
1303        assert!(i.push::<crate::Ret, _>(2_i32).is_continue());
1304        assert!(i.push::<crate::Ret, _>(3_i32).is_continue());
1305        assert!(i.push::<crate::Ret, _>(4_i32).is_continue());
1306        assert!(i.push::<crate::Ret, _>(5_i32).is_break());
1307        assert!(i.push::<crate::Ret, _>(6_i32).is_break());
1308        assert_eq!(i.pop::<i32>(), 4_i32);
1309        assert_eq!(i.pop::<i32>(), 3_i32);
1310        assert_eq!(i.pop::<i32>(), 2_i32);
1311        assert_eq!(i.pop::<i32>(), 1_i32);
1312    }
1313}
1314
1315macro_rules! br_if_imm {
1316    ($(
1317        fn $snake:ident(&mut self, a: XReg, b: $imm:ident, offset: PcRelOffset)
1318            = $camel:ident / $op:tt / $get:ident;
1319    )*) => {$(
1320        fn $snake(&mut self, a: XReg, b: $imm, offset: PcRelOffset) -> ControlFlow<Done> {
1321            let a = self.state[a].$get();
1322            if a $op b.into() {
1323                self.pc_rel_jump::<crate::$camel>(offset)
1324            } else {
1325                ControlFlow::Continue(())
1326            }
1327        }
1328    )*};
1329}
1330
1331impl OpVisitor for Interpreter<'_> {
1332    type BytecodeStream = UnsafeBytecodeStream;
1333    type Return = ControlFlow<Done>;
1334
1335    fn bytecode(&mut self) -> &mut UnsafeBytecodeStream {
1336        &mut self.pc
1337    }
1338
1339    fn ret(&mut self) -> ControlFlow<Done> {
1340        let lr = self.state.lr;
1341        if lr == HOST_RETURN_ADDR {
1342            self.done_return_to_host()
1343        } else {
1344            self.pc = unsafe { UnsafeBytecodeStream::new(NonNull::new_unchecked(lr)) };
1345            ControlFlow::Continue(())
1346        }
1347    }
1348
1349    fn call(&mut self, offset: PcRelOffset) -> ControlFlow<Done> {
1350        let return_addr = self.pc.as_ptr();
1351        self.state.lr = return_addr.as_ptr();
1352        self.pc_rel_jump::<crate::Call>(offset)
1353    }
1354
1355    fn call1(&mut self, arg1: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1356        let return_addr = self.pc.as_ptr();
1357        self.state.lr = return_addr.as_ptr();
1358        self.state[XReg::x0] = self.state[arg1];
1359        self.pc_rel_jump::<crate::Call1>(offset)
1360    }
1361
1362    fn call2(&mut self, arg1: XReg, arg2: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1363        let return_addr = self.pc.as_ptr();
1364        self.state.lr = return_addr.as_ptr();
1365        let (x0, x1) = (self.state[arg1], self.state[arg2]);
1366        self.state[XReg::x0] = x0;
1367        self.state[XReg::x1] = x1;
1368        self.pc_rel_jump::<crate::Call2>(offset)
1369    }
1370
1371    fn call3(
1372        &mut self,
1373        arg1: XReg,
1374        arg2: XReg,
1375        arg3: XReg,
1376        offset: PcRelOffset,
1377    ) -> ControlFlow<Done> {
1378        let return_addr = self.pc.as_ptr();
1379        self.state.lr = return_addr.as_ptr();
1380        let (x0, x1, x2) = (self.state[arg1], self.state[arg2], self.state[arg3]);
1381        self.state[XReg::x0] = x0;
1382        self.state[XReg::x1] = x1;
1383        self.state[XReg::x2] = x2;
1384        self.pc_rel_jump::<crate::Call3>(offset)
1385    }
1386
1387    fn call4(
1388        &mut self,
1389        arg1: XReg,
1390        arg2: XReg,
1391        arg3: XReg,
1392        arg4: XReg,
1393        offset: PcRelOffset,
1394    ) -> ControlFlow<Done> {
1395        let return_addr = self.pc.as_ptr();
1396        self.state.lr = return_addr.as_ptr();
1397        let (x0, x1, x2, x3) = (
1398            self.state[arg1],
1399            self.state[arg2],
1400            self.state[arg3],
1401            self.state[arg4],
1402        );
1403        self.state[XReg::x0] = x0;
1404        self.state[XReg::x1] = x1;
1405        self.state[XReg::x2] = x2;
1406        self.state[XReg::x3] = x3;
1407        self.pc_rel_jump::<crate::Call4>(offset)
1408    }
1409
1410    fn call_indirect(&mut self, dst: XReg) -> ControlFlow<Done> {
1411        let return_addr = self.pc.as_ptr();
1412        self.state.lr = return_addr.as_ptr();
1413        // SAFETY: part of the unsafe contract of the interpreter is only valid
1414        // bytecode is interpreted, so the jump destination is part of the validity
1415        // of the bytecode itself.
1416        unsafe {
1417            self.pc = UnsafeBytecodeStream::new(NonNull::new_unchecked(self.state[dst].get_ptr()));
1418        }
1419        ControlFlow::Continue(())
1420    }
1421
1422    fn jump(&mut self, offset: PcRelOffset) -> ControlFlow<Done> {
1423        self.pc_rel_jump::<crate::Jump>(offset)
1424    }
1425
1426    fn xjump(&mut self, reg: XReg) -> ControlFlow<Done> {
1427        unsafe {
1428            self.pc = UnsafeBytecodeStream::new(NonNull::new_unchecked(self.state[reg].get_ptr()));
1429        }
1430        ControlFlow::Continue(())
1431    }
1432
1433    fn br_if32(&mut self, cond: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1434        let cond = self.state[cond].get_u32();
1435        if cond != 0 {
1436            self.pc_rel_jump::<crate::BrIf>(offset)
1437        } else {
1438            ControlFlow::Continue(())
1439        }
1440    }
1441
1442    fn br_if_not32(&mut self, cond: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1443        let cond = self.state[cond].get_u32();
1444        if cond == 0 {
1445            self.pc_rel_jump::<crate::BrIfNot>(offset)
1446        } else {
1447            ControlFlow::Continue(())
1448        }
1449    }
1450
1451    fn br_if_xeq32(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1452        let a = self.state[a].get_u32();
1453        let b = self.state[b].get_u32();
1454        if a == b {
1455            self.pc_rel_jump::<crate::BrIfXeq32>(offset)
1456        } else {
1457            ControlFlow::Continue(())
1458        }
1459    }
1460
1461    fn br_if_xneq32(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1462        let a = self.state[a].get_u32();
1463        let b = self.state[b].get_u32();
1464        if a != b {
1465            self.pc_rel_jump::<crate::BrIfXneq32>(offset)
1466        } else {
1467            ControlFlow::Continue(())
1468        }
1469    }
1470
1471    fn br_if_xslt32(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1472        let a = self.state[a].get_i32();
1473        let b = self.state[b].get_i32();
1474        if a < b {
1475            self.pc_rel_jump::<crate::BrIfXslt32>(offset)
1476        } else {
1477            ControlFlow::Continue(())
1478        }
1479    }
1480
1481    fn br_if_xslteq32(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1482        let a = self.state[a].get_i32();
1483        let b = self.state[b].get_i32();
1484        if a <= b {
1485            self.pc_rel_jump::<crate::BrIfXslteq32>(offset)
1486        } else {
1487            ControlFlow::Continue(())
1488        }
1489    }
1490
1491    fn br_if_xult32(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1492        let a = self.state[a].get_u32();
1493        let b = self.state[b].get_u32();
1494        if a < b {
1495            self.pc_rel_jump::<crate::BrIfXult32>(offset)
1496        } else {
1497            ControlFlow::Continue(())
1498        }
1499    }
1500
1501    fn br_if_xulteq32(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1502        let a = self.state[a].get_u32();
1503        let b = self.state[b].get_u32();
1504        if a <= b {
1505            self.pc_rel_jump::<crate::BrIfXulteq32>(offset)
1506        } else {
1507            ControlFlow::Continue(())
1508        }
1509    }
1510
1511    fn br_if_xeq64(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1512        let a = self.state[a].get_u64();
1513        let b = self.state[b].get_u64();
1514        if a == b {
1515            self.pc_rel_jump::<crate::BrIfXeq64>(offset)
1516        } else {
1517            ControlFlow::Continue(())
1518        }
1519    }
1520
1521    fn br_if_xneq64(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1522        let a = self.state[a].get_u64();
1523        let b = self.state[b].get_u64();
1524        if a != b {
1525            self.pc_rel_jump::<crate::BrIfXneq64>(offset)
1526        } else {
1527            ControlFlow::Continue(())
1528        }
1529    }
1530
1531    fn br_if_xslt64(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1532        let a = self.state[a].get_i64();
1533        let b = self.state[b].get_i64();
1534        if a < b {
1535            self.pc_rel_jump::<crate::BrIfXslt64>(offset)
1536        } else {
1537            ControlFlow::Continue(())
1538        }
1539    }
1540
1541    fn br_if_xslteq64(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1542        let a = self.state[a].get_i64();
1543        let b = self.state[b].get_i64();
1544        if a <= b {
1545            self.pc_rel_jump::<crate::BrIfXslteq64>(offset)
1546        } else {
1547            ControlFlow::Continue(())
1548        }
1549    }
1550
1551    fn br_if_xult64(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1552        let a = self.state[a].get_u64();
1553        let b = self.state[b].get_u64();
1554        if a < b {
1555            self.pc_rel_jump::<crate::BrIfXult64>(offset)
1556        } else {
1557            ControlFlow::Continue(())
1558        }
1559    }
1560
1561    fn br_if_xulteq64(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1562        let a = self.state[a].get_u64();
1563        let b = self.state[b].get_u64();
1564        if a <= b {
1565            self.pc_rel_jump::<crate::BrIfXulteq64>(offset)
1566        } else {
1567            ControlFlow::Continue(())
1568        }
1569    }
1570
1571    br_if_imm! {
1572        fn br_if_xeq32_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1573            = BrIfXeq32I8 / == / get_i32;
1574        fn br_if_xeq32_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1575            = BrIfXeq32I32 / == / get_i32;
1576        fn br_if_xneq32_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1577            = BrIfXneq32I8 / != / get_i32;
1578        fn br_if_xneq32_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1579            = BrIfXneq32I32 / != / get_i32;
1580
1581        fn br_if_xslt32_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1582            = BrIfXslt32I8 / < / get_i32;
1583        fn br_if_xslt32_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1584            = BrIfXslt32I32 / < / get_i32;
1585        fn br_if_xsgt32_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1586            = BrIfXsgt32I8 / > / get_i32;
1587        fn br_if_xsgt32_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1588            = BrIfXsgt32I32 / > / get_i32;
1589        fn br_if_xslteq32_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1590            = BrIfXslteq32I8 / <= / get_i32;
1591        fn br_if_xslteq32_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1592            = BrIfXslteq32I32 / <= / get_i32;
1593        fn br_if_xsgteq32_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1594            = BrIfXsgteq32I8 / >= / get_i32;
1595        fn br_if_xsgteq32_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1596            = BrIfXsgteq32I32 / >= / get_i32;
1597
1598        fn br_if_xult32_u8(&mut self, a: XReg, b: u8, offset: PcRelOffset)
1599            = BrIfXult32U8 / < / get_u32;
1600        fn br_if_xult32_u32(&mut self, a: XReg, b: u32, offset: PcRelOffset)
1601            = BrIfXult32U32 / < / get_u32;
1602        fn br_if_xugt32_u8(&mut self, a: XReg, b: u8, offset: PcRelOffset)
1603            = BrIfXugt32U8 / > / get_u32;
1604        fn br_if_xugt32_u32(&mut self, a: XReg, b: u32, offset: PcRelOffset)
1605            = BrIfXugt32U32 / > / get_u32;
1606        fn br_if_xulteq32_u8(&mut self, a: XReg, b: u8, offset: PcRelOffset)
1607            = BrIfXulteq32U8 / <= / get_u32;
1608        fn br_if_xulteq32_u32(&mut self, a: XReg, b: u32, offset: PcRelOffset)
1609            = BrIfXulteq32U32 / <= / get_u32;
1610        fn br_if_xugteq32_u8(&mut self, a: XReg, b: u8, offset: PcRelOffset)
1611            = BrIfXugteq32U8 / >= / get_u32;
1612        fn br_if_xugteq32_u32(&mut self, a: XReg, b: u32, offset: PcRelOffset)
1613            = BrIfXugteq32U32 / >= / get_u32;
1614
1615        fn br_if_xeq64_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1616            = BrIfXeq64I8 / == / get_i64;
1617        fn br_if_xeq64_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1618            = BrIfXeq64I32 / == / get_i64;
1619        fn br_if_xneq64_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1620            = BrIfXneq64I8 / != / get_i64;
1621        fn br_if_xneq64_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1622            = BrIfXneq64I32 / != / get_i64;
1623
1624        fn br_if_xslt64_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1625            = BrIfXslt64I8 / < / get_i64;
1626        fn br_if_xslt64_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1627            = BrIfXslt64I32 / < / get_i64;
1628        fn br_if_xsgt64_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1629            = BrIfXsgt64I8 / > / get_i64;
1630        fn br_if_xsgt64_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1631            = BrIfXsgt64I32 / > / get_i64;
1632        fn br_if_xslteq64_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1633            = BrIfXslteq64I8 / <= / get_i64;
1634        fn br_if_xslteq64_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1635            = BrIfXslteq64I32 / <= / get_i64;
1636        fn br_if_xsgteq64_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1637            = BrIfXsgteq64I8 / >= / get_i64;
1638        fn br_if_xsgteq64_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1639            = BrIfXsgteq64I32 / >= / get_i64;
1640
1641        fn br_if_xult64_u8(&mut self, a: XReg, b: u8, offset: PcRelOffset)
1642            = BrIfXult64U8 / < / get_u64;
1643        fn br_if_xult64_u32(&mut self, a: XReg, b: u32, offset: PcRelOffset)
1644            = BrIfXult64U32 / < / get_u64;
1645        fn br_if_xugt64_u8(&mut self, a: XReg, b: u8, offset: PcRelOffset)
1646            = BrIfXugt64U8 / > / get_u64;
1647        fn br_if_xugt64_u32(&mut self, a: XReg, b: u32, offset: PcRelOffset)
1648            = BrIfXugt64U32 / > / get_u64;
1649        fn br_if_xulteq64_u8(&mut self, a: XReg, b: u8, offset: PcRelOffset)
1650            = BrIfXulteq64U8 / <= / get_u64;
1651        fn br_if_xulteq64_u32(&mut self, a: XReg, b: u32, offset: PcRelOffset)
1652            = BrIfXulteq64U32 / <= / get_u64;
1653        fn br_if_xugteq64_u8(&mut self, a: XReg, b: u8, offset: PcRelOffset)
1654            = BrIfXugteq64U8 / >= / get_u64;
1655        fn br_if_xugteq64_u32(&mut self, a: XReg, b: u32, offset: PcRelOffset)
1656            = BrIfXugteq64U32 / >= / get_u64;
1657    }
1658
1659    fn xmov(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
1660        let val = self.state[src];
1661        self.state[dst] = val;
1662        ControlFlow::Continue(())
1663    }
1664
1665    fn xconst8(&mut self, dst: XReg, imm: i8) -> ControlFlow<Done> {
1666        self.state[dst].set_i64(i64::from(imm));
1667        ControlFlow::Continue(())
1668    }
1669
1670    fn xzero(&mut self, dst: XReg) -> ControlFlow<Done> {
1671        self.state[dst].set_i64(0);
1672        ControlFlow::Continue(())
1673    }
1674
1675    fn xone(&mut self, dst: XReg) -> ControlFlow<Done> {
1676        self.state[dst].set_i64(1);
1677        ControlFlow::Continue(())
1678    }
1679
1680    fn xconst16(&mut self, dst: XReg, imm: i16) -> ControlFlow<Done> {
1681        self.state[dst].set_i64(i64::from(imm));
1682        ControlFlow::Continue(())
1683    }
1684
1685    fn xconst32(&mut self, dst: XReg, imm: i32) -> ControlFlow<Done> {
1686        self.state[dst].set_i64(i64::from(imm));
1687        ControlFlow::Continue(())
1688    }
1689
1690    fn xconst64(&mut self, dst: XReg, imm: i64) -> ControlFlow<Done> {
1691        self.state[dst].set_i64(imm);
1692        ControlFlow::Continue(())
1693    }
1694
1695    fn xadd32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1696        let a = self.state[operands.src1].get_u32();
1697        let b = self.state[operands.src2].get_u32();
1698        self.state[operands.dst].set_u32(a.wrapping_add(b));
1699        ControlFlow::Continue(())
1700    }
1701
1702    fn xadd32_u8(&mut self, dst: XReg, src1: XReg, src2: u8) -> ControlFlow<Done> {
1703        self.xadd32_u32(dst, src1, src2.into())
1704    }
1705
1706    fn xadd32_u32(&mut self, dst: XReg, src1: XReg, src2: u32) -> ControlFlow<Done> {
1707        let a = self.state[src1].get_u32();
1708        self.state[dst].set_u32(a.wrapping_add(src2));
1709        ControlFlow::Continue(())
1710    }
1711
1712    fn xadd64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1713        let a = self.state[operands.src1].get_u64();
1714        let b = self.state[operands.src2].get_u64();
1715        self.state[operands.dst].set_u64(a.wrapping_add(b));
1716        ControlFlow::Continue(())
1717    }
1718
1719    fn xadd64_u8(&mut self, dst: XReg, src1: XReg, src2: u8) -> ControlFlow<Done> {
1720        self.xadd64_u32(dst, src1, src2.into())
1721    }
1722
1723    fn xadd64_u32(&mut self, dst: XReg, src1: XReg, src2: u32) -> ControlFlow<Done> {
1724        let a = self.state[src1].get_u64();
1725        self.state[dst].set_u64(a.wrapping_add(src2.into()));
1726        ControlFlow::Continue(())
1727    }
1728
1729    fn xmadd32(&mut self, dst: XReg, src1: XReg, src2: XReg, src3: XReg) -> ControlFlow<Done> {
1730        let a = self.state[src1].get_u32();
1731        let b = self.state[src2].get_u32();
1732        let c = self.state[src3].get_u32();
1733        self.state[dst].set_u32(a.wrapping_mul(b).wrapping_add(c));
1734        ControlFlow::Continue(())
1735    }
1736
1737    fn xmadd64(&mut self, dst: XReg, src1: XReg, src2: XReg, src3: XReg) -> ControlFlow<Done> {
1738        let a = self.state[src1].get_u64();
1739        let b = self.state[src2].get_u64();
1740        let c = self.state[src3].get_u64();
1741        self.state[dst].set_u64(a.wrapping_mul(b).wrapping_add(c));
1742        ControlFlow::Continue(())
1743    }
1744
1745    fn xsub32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1746        let a = self.state[operands.src1].get_u32();
1747        let b = self.state[operands.src2].get_u32();
1748        self.state[operands.dst].set_u32(a.wrapping_sub(b));
1749        ControlFlow::Continue(())
1750    }
1751
1752    fn xsub32_u8(&mut self, dst: XReg, src1: XReg, src2: u8) -> ControlFlow<Done> {
1753        self.xsub32_u32(dst, src1, src2.into())
1754    }
1755
1756    fn xsub32_u32(&mut self, dst: XReg, src1: XReg, src2: u32) -> ControlFlow<Done> {
1757        let a = self.state[src1].get_u32();
1758        self.state[dst].set_u32(a.wrapping_sub(src2));
1759        ControlFlow::Continue(())
1760    }
1761
1762    fn xsub64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1763        let a = self.state[operands.src1].get_u64();
1764        let b = self.state[operands.src2].get_u64();
1765        self.state[operands.dst].set_u64(a.wrapping_sub(b));
1766        ControlFlow::Continue(())
1767    }
1768
1769    fn xsub64_u8(&mut self, dst: XReg, src1: XReg, src2: u8) -> ControlFlow<Done> {
1770        self.xsub64_u32(dst, src1, src2.into())
1771    }
1772
1773    fn xsub64_u32(&mut self, dst: XReg, src1: XReg, src2: u32) -> ControlFlow<Done> {
1774        let a = self.state[src1].get_u64();
1775        self.state[dst].set_u64(a.wrapping_sub(src2.into()));
1776        ControlFlow::Continue(())
1777    }
1778
1779    fn xmul32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1780        let a = self.state[operands.src1].get_u32();
1781        let b = self.state[operands.src2].get_u32();
1782        self.state[operands.dst].set_u32(a.wrapping_mul(b));
1783        ControlFlow::Continue(())
1784    }
1785
1786    fn xmul32_s8(&mut self, dst: XReg, src1: XReg, src2: i8) -> ControlFlow<Done> {
1787        self.xmul32_s32(dst, src1, src2.into())
1788    }
1789
1790    fn xmul32_s32(&mut self, dst: XReg, src1: XReg, src2: i32) -> ControlFlow<Done> {
1791        let a = self.state[src1].get_i32();
1792        self.state[dst].set_i32(a.wrapping_mul(src2));
1793        ControlFlow::Continue(())
1794    }
1795
1796    fn xmul64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1797        let a = self.state[operands.src1].get_u64();
1798        let b = self.state[operands.src2].get_u64();
1799        self.state[operands.dst].set_u64(a.wrapping_mul(b));
1800        ControlFlow::Continue(())
1801    }
1802
1803    fn xmul64_s8(&mut self, dst: XReg, src1: XReg, src2: i8) -> ControlFlow<Done> {
1804        self.xmul64_s32(dst, src1, src2.into())
1805    }
1806
1807    fn xmul64_s32(&mut self, dst: XReg, src1: XReg, src2: i32) -> ControlFlow<Done> {
1808        let a = self.state[src1].get_i64();
1809        self.state[dst].set_i64(a.wrapping_mul(src2.into()));
1810        ControlFlow::Continue(())
1811    }
1812
1813    fn xshl32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1814        let a = self.state[operands.src1].get_u32();
1815        let b = self.state[operands.src2].get_u32();
1816        self.state[operands.dst].set_u32(a.wrapping_shl(b));
1817        ControlFlow::Continue(())
1818    }
1819
1820    fn xshr32_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1821        let a = self.state[operands.src1].get_u32();
1822        let b = self.state[operands.src2].get_u32();
1823        self.state[operands.dst].set_u32(a.wrapping_shr(b));
1824        ControlFlow::Continue(())
1825    }
1826
1827    fn xshr32_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1828        let a = self.state[operands.src1].get_i32();
1829        let b = self.state[operands.src2].get_u32();
1830        self.state[operands.dst].set_i32(a.wrapping_shr(b));
1831        ControlFlow::Continue(())
1832    }
1833
1834    fn xshl64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1835        let a = self.state[operands.src1].get_u64();
1836        let b = self.state[operands.src2].get_u32();
1837        self.state[operands.dst].set_u64(a.wrapping_shl(b));
1838        ControlFlow::Continue(())
1839    }
1840
1841    fn xshr64_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1842        let a = self.state[operands.src1].get_u64();
1843        let b = self.state[operands.src2].get_u32();
1844        self.state[operands.dst].set_u64(a.wrapping_shr(b));
1845        ControlFlow::Continue(())
1846    }
1847
1848    fn xshr64_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1849        let a = self.state[operands.src1].get_i64();
1850        let b = self.state[operands.src2].get_u32();
1851        self.state[operands.dst].set_i64(a.wrapping_shr(b));
1852        ControlFlow::Continue(())
1853    }
1854
1855    fn xshl32_u6(&mut self, operands: BinaryOperands<XReg, XReg, U6>) -> ControlFlow<Done> {
1856        let a = self.state[operands.src1].get_u32();
1857        let b = u32::from(u8::from(operands.src2));
1858        self.state[operands.dst].set_u32(a.wrapping_shl(b));
1859        ControlFlow::Continue(())
1860    }
1861
1862    fn xshr32_u_u6(&mut self, operands: BinaryOperands<XReg, XReg, U6>) -> ControlFlow<Done> {
1863        let a = self.state[operands.src1].get_u32();
1864        let b = u32::from(u8::from(operands.src2));
1865        self.state[operands.dst].set_u32(a.wrapping_shr(b));
1866        ControlFlow::Continue(())
1867    }
1868
1869    fn xshr32_s_u6(&mut self, operands: BinaryOperands<XReg, XReg, U6>) -> ControlFlow<Done> {
1870        let a = self.state[operands.src1].get_i32();
1871        let b = u32::from(u8::from(operands.src2));
1872        self.state[operands.dst].set_i32(a.wrapping_shr(b));
1873        ControlFlow::Continue(())
1874    }
1875
1876    fn xshl64_u6(&mut self, operands: BinaryOperands<XReg, XReg, U6>) -> ControlFlow<Done> {
1877        let a = self.state[operands.src1].get_u64();
1878        let b = u32::from(u8::from(operands.src2));
1879        self.state[operands.dst].set_u64(a.wrapping_shl(b));
1880        ControlFlow::Continue(())
1881    }
1882
1883    fn xshr64_u_u6(&mut self, operands: BinaryOperands<XReg, XReg, U6>) -> ControlFlow<Done> {
1884        let a = self.state[operands.src1].get_u64();
1885        let b = u32::from(u8::from(operands.src2));
1886        self.state[operands.dst].set_u64(a.wrapping_shr(b));
1887        ControlFlow::Continue(())
1888    }
1889
1890    fn xshr64_s_u6(&mut self, operands: BinaryOperands<XReg, XReg, U6>) -> ControlFlow<Done> {
1891        let a = self.state[operands.src1].get_i64();
1892        let b = u32::from(u8::from(operands.src2));
1893        self.state[operands.dst].set_i64(a.wrapping_shr(b));
1894        ControlFlow::Continue(())
1895    }
1896
1897    fn xneg32(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
1898        let a = self.state[src].get_i32();
1899        self.state[dst].set_i32(a.wrapping_neg());
1900        ControlFlow::Continue(())
1901    }
1902
1903    fn xneg64(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
1904        let a = self.state[src].get_i64();
1905        self.state[dst].set_i64(a.wrapping_neg());
1906        ControlFlow::Continue(())
1907    }
1908
1909    fn xeq64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1910        let a = self.state[operands.src1].get_u64();
1911        let b = self.state[operands.src2].get_u64();
1912        self.state[operands.dst].set_u32(u32::from(a == b));
1913        ControlFlow::Continue(())
1914    }
1915
1916    fn xneq64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1917        let a = self.state[operands.src1].get_u64();
1918        let b = self.state[operands.src2].get_u64();
1919        self.state[operands.dst].set_u32(u32::from(a != b));
1920        ControlFlow::Continue(())
1921    }
1922
1923    fn xslt64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1924        let a = self.state[operands.src1].get_i64();
1925        let b = self.state[operands.src2].get_i64();
1926        self.state[operands.dst].set_u32(u32::from(a < b));
1927        ControlFlow::Continue(())
1928    }
1929
1930    fn xslteq64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1931        let a = self.state[operands.src1].get_i64();
1932        let b = self.state[operands.src2].get_i64();
1933        self.state[operands.dst].set_u32(u32::from(a <= b));
1934        ControlFlow::Continue(())
1935    }
1936
1937    fn xult64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1938        let a = self.state[operands.src1].get_u64();
1939        let b = self.state[operands.src2].get_u64();
1940        self.state[operands.dst].set_u32(u32::from(a < b));
1941        ControlFlow::Continue(())
1942    }
1943
1944    fn xulteq64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1945        let a = self.state[operands.src1].get_u64();
1946        let b = self.state[operands.src2].get_u64();
1947        self.state[operands.dst].set_u32(u32::from(a <= b));
1948        ControlFlow::Continue(())
1949    }
1950
1951    fn xeq32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1952        let a = self.state[operands.src1].get_u32();
1953        let b = self.state[operands.src2].get_u32();
1954        self.state[operands.dst].set_u32(u32::from(a == b));
1955        ControlFlow::Continue(())
1956    }
1957
1958    fn xneq32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1959        let a = self.state[operands.src1].get_u32();
1960        let b = self.state[operands.src2].get_u32();
1961        self.state[operands.dst].set_u32(u32::from(a != b));
1962        ControlFlow::Continue(())
1963    }
1964
1965    fn xslt32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1966        let a = self.state[operands.src1].get_i32();
1967        let b = self.state[operands.src2].get_i32();
1968        self.state[operands.dst].set_u32(u32::from(a < b));
1969        ControlFlow::Continue(())
1970    }
1971
1972    fn xslteq32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1973        let a = self.state[operands.src1].get_i32();
1974        let b = self.state[operands.src2].get_i32();
1975        self.state[operands.dst].set_u32(u32::from(a <= b));
1976        ControlFlow::Continue(())
1977    }
1978
1979    fn xult32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1980        let a = self.state[operands.src1].get_u32();
1981        let b = self.state[operands.src2].get_u32();
1982        self.state[operands.dst].set_u32(u32::from(a < b));
1983        ControlFlow::Continue(())
1984    }
1985
1986    fn xulteq32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1987        let a = self.state[operands.src1].get_u32();
1988        let b = self.state[operands.src2].get_u32();
1989        self.state[operands.dst].set_u32(u32::from(a <= b));
1990        ControlFlow::Continue(())
1991    }
1992
1993    fn push_frame(&mut self) -> ControlFlow<Done> {
1994        self.push::<crate::PushFrame, _>(self.state.lr)?;
1995        self.push::<crate::PushFrame, _>(self.state.fp)?;
1996        self.state.fp = self.state[XReg::sp].get_ptr();
1997        ControlFlow::Continue(())
1998    }
1999
2000    #[inline]
2001    fn push_frame_save(&mut self, amt: u16, regs: UpperRegSet<XReg>) -> ControlFlow<Done> {
2002        // Decrement the stack pointer `amt` bytes plus 2 pointers more for
2003        // fp/lr.
2004        let ptr_size = size_of::<usize>();
2005        let full_amt = usize::from(amt) + 2 * ptr_size;
2006        let new_sp = self.state[XReg::sp].get_ptr::<u8>().wrapping_sub(full_amt);
2007        self.set_sp::<crate::PushFrameSave>(new_sp)?;
2008
2009        unsafe {
2010            // Emulate `push_frame` by placing `lr` and `fp` onto the stack, in
2011            // that order, at the top of the allocated area.
2012            self.store_ne::<_, crate::PushFrameSave>(
2013                AddrO32 {
2014                    addr: XReg::sp,
2015                    offset: (full_amt - 1 * ptr_size) as i32,
2016                },
2017                self.state.lr,
2018            )?;
2019            self.store_ne::<_, crate::PushFrameSave>(
2020                AddrO32 {
2021                    addr: XReg::sp,
2022                    offset: (full_amt - 2 * ptr_size) as i32,
2023                },
2024                self.state.fp,
2025            )?;
2026
2027            // Set `fp` to the top of our frame, where `fp` is stored.
2028            let mut offset = amt as i32;
2029            self.state.fp = self.state[XReg::sp]
2030                .get_ptr::<u8>()
2031                .byte_offset(offset as isize);
2032
2033            // Next save any registers in `regs` to the stack.
2034            for reg in regs {
2035                offset -= 8;
2036                self.store_ne::<_, crate::PushFrameSave>(
2037                    AddrO32 {
2038                        addr: XReg::sp,
2039                        offset,
2040                    },
2041                    self.state[reg].get_u64(),
2042                )?;
2043            }
2044        }
2045        ControlFlow::Continue(())
2046    }
2047
2048    fn pop_frame_restore(&mut self, amt: u16, regs: UpperRegSet<XReg>) -> ControlFlow<Done> {
2049        // Restore all registers in `regs`, followed by the normal `pop_frame`
2050        // opcode below to restore fp/lr.
2051        unsafe {
2052            let mut offset = i32::from(amt);
2053            for reg in regs {
2054                offset -= 8;
2055                let val = self.load_ne::<_, crate::PopFrameRestore>(AddrO32 {
2056                    addr: XReg::sp,
2057                    offset,
2058                })?;
2059                self.state[reg].set_u64(val);
2060            }
2061        }
2062        self.pop_frame()
2063    }
2064
2065    fn pop_frame(&mut self) -> ControlFlow<Done> {
2066        self.set_sp_unchecked(self.state.fp);
2067        let fp = self.pop();
2068        let lr = self.pop();
2069        self.state.fp = fp;
2070        self.state.lr = lr;
2071        ControlFlow::Continue(())
2072    }
2073
2074    fn br_table32(&mut self, idx: XReg, amt: u32) -> ControlFlow<Done> {
2075        let idx = self.state[idx].get_u32().min(amt - 1) as isize;
2076        // SAFETY: part of the contract of the interpreter is only dealing with
2077        // valid bytecode, so this offset should be safe.
2078        self.pc = unsafe { self.pc.offset(idx * 4) };
2079
2080        // Decode the `PcRelOffset` without tampering with `self.pc` as the
2081        // jump is relative to `self.pc`.
2082        let mut tmp = self.pc;
2083        let Ok(rel) = PcRelOffset::decode(&mut tmp);
2084        let offset = isize::try_from(i32::from(rel)).unwrap();
2085        self.pc = unsafe { self.pc.offset(offset) };
2086        ControlFlow::Continue(())
2087    }
2088
2089    fn stack_alloc32(&mut self, amt: u32) -> ControlFlow<Done> {
2090        let amt = usize::try_from(amt).unwrap();
2091        let new_sp = self.state[XReg::sp].get_ptr::<u8>().wrapping_sub(amt);
2092        self.set_sp::<crate::StackAlloc32>(new_sp)?;
2093        ControlFlow::Continue(())
2094    }
2095
2096    fn stack_free32(&mut self, amt: u32) -> ControlFlow<Done> {
2097        let amt = usize::try_from(amt).unwrap();
2098        let new_sp = self.state[XReg::sp].get_ptr::<u8>().wrapping_add(amt);
2099        self.set_sp_unchecked(new_sp);
2100        ControlFlow::Continue(())
2101    }
2102
2103    fn zext8(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2104        let src = self.state[src].get_u64() as u8;
2105        self.state[dst].set_u64(src.into());
2106        ControlFlow::Continue(())
2107    }
2108
2109    fn zext16(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2110        let src = self.state[src].get_u64() as u16;
2111        self.state[dst].set_u64(src.into());
2112        ControlFlow::Continue(())
2113    }
2114
2115    fn zext32(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2116        let src = self.state[src].get_u64() as u32;
2117        self.state[dst].set_u64(src.into());
2118        ControlFlow::Continue(())
2119    }
2120
2121    fn sext8(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2122        let src = self.state[src].get_i64() as i8;
2123        self.state[dst].set_i64(src.into());
2124        ControlFlow::Continue(())
2125    }
2126
2127    fn sext16(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2128        let src = self.state[src].get_i64() as i16;
2129        self.state[dst].set_i64(src.into());
2130        ControlFlow::Continue(())
2131    }
2132
2133    fn sext32(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2134        let src = self.state[src].get_i64() as i32;
2135        self.state[dst].set_i64(src.into());
2136        ControlFlow::Continue(())
2137    }
2138
2139    fn xdiv32_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2140        let a = self.state[operands.src1].get_i32();
2141        let b = self.state[operands.src2].get_i32();
2142        match a.checked_div(b) {
2143            Some(result) => {
2144                self.state[operands.dst].set_i32(result);
2145                ControlFlow::Continue(())
2146            }
2147            None => {
2148                let kind = if b == 0 {
2149                    TrapKind::DivideByZero
2150                } else {
2151                    TrapKind::IntegerOverflow
2152                };
2153                self.done_trap_kind::<crate::XDiv32S>(Some(kind))
2154            }
2155        }
2156    }
2157
2158    fn xdiv64_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2159        let a = self.state[operands.src1].get_i64();
2160        let b = self.state[operands.src2].get_i64();
2161        match a.checked_div(b) {
2162            Some(result) => {
2163                self.state[operands.dst].set_i64(result);
2164                ControlFlow::Continue(())
2165            }
2166            None => {
2167                let kind = if b == 0 {
2168                    TrapKind::DivideByZero
2169                } else {
2170                    TrapKind::IntegerOverflow
2171                };
2172                self.done_trap_kind::<crate::XDiv64S>(Some(kind))
2173            }
2174        }
2175    }
2176
2177    fn xdiv32_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2178        let a = self.state[operands.src1].get_u32();
2179        let b = self.state[operands.src2].get_u32();
2180        match a.checked_div(b) {
2181            Some(result) => {
2182                self.state[operands.dst].set_u32(result);
2183                ControlFlow::Continue(())
2184            }
2185            None => self.done_trap_kind::<crate::XDiv64U>(Some(TrapKind::DivideByZero)),
2186        }
2187    }
2188
2189    fn xdiv64_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2190        let a = self.state[operands.src1].get_u64();
2191        let b = self.state[operands.src2].get_u64();
2192        match a.checked_div(b) {
2193            Some(result) => {
2194                self.state[operands.dst].set_u64(result);
2195                ControlFlow::Continue(())
2196            }
2197            None => self.done_trap_kind::<crate::XDiv64U>(Some(TrapKind::DivideByZero)),
2198        }
2199    }
2200
2201    fn xrem32_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2202        let a = self.state[operands.src1].get_i32();
2203        let b = self.state[operands.src2].get_i32();
2204        let result = if a == i32::MIN && b == -1 {
2205            Some(0)
2206        } else {
2207            a.checked_rem(b)
2208        };
2209        match result {
2210            Some(result) => {
2211                self.state[operands.dst].set_i32(result);
2212                ControlFlow::Continue(())
2213            }
2214            None => self.done_trap_kind::<crate::XRem32S>(Some(TrapKind::DivideByZero)),
2215        }
2216    }
2217
2218    fn xrem64_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2219        let a = self.state[operands.src1].get_i64();
2220        let b = self.state[operands.src2].get_i64();
2221        let result = if a == i64::MIN && b == -1 {
2222            Some(0)
2223        } else {
2224            a.checked_rem(b)
2225        };
2226        match result {
2227            Some(result) => {
2228                self.state[operands.dst].set_i64(result);
2229                ControlFlow::Continue(())
2230            }
2231            None => self.done_trap_kind::<crate::XRem64S>(Some(TrapKind::DivideByZero)),
2232        }
2233    }
2234
2235    fn xrem32_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2236        let a = self.state[operands.src1].get_u32();
2237        let b = self.state[operands.src2].get_u32();
2238        match a.checked_rem(b) {
2239            Some(result) => {
2240                self.state[operands.dst].set_u32(result);
2241                ControlFlow::Continue(())
2242            }
2243            None => self.done_trap_kind::<crate::XRem32U>(Some(TrapKind::DivideByZero)),
2244        }
2245    }
2246
2247    fn xrem64_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2248        let a = self.state[operands.src1].get_u64();
2249        let b = self.state[operands.src2].get_u64();
2250        match a.checked_rem(b) {
2251            Some(result) => {
2252                self.state[operands.dst].set_u64(result);
2253                ControlFlow::Continue(())
2254            }
2255            None => self.done_trap_kind::<crate::XRem64U>(Some(TrapKind::DivideByZero)),
2256        }
2257    }
2258
2259    fn xband32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2260        let a = self.state[operands.src1].get_u32();
2261        let b = self.state[operands.src2].get_u32();
2262        self.state[operands.dst].set_u32(a & b);
2263        ControlFlow::Continue(())
2264    }
2265
2266    fn xband32_s8(&mut self, dst: XReg, src1: XReg, src2: i8) -> ControlFlow<Done> {
2267        self.xband32_s32(dst, src1, src2.into())
2268    }
2269
2270    fn xband32_s32(&mut self, dst: XReg, src1: XReg, src2: i32) -> ControlFlow<Done> {
2271        let a = self.state[src1].get_i32();
2272        self.state[dst].set_i32(a & src2);
2273        ControlFlow::Continue(())
2274    }
2275
2276    fn xband64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2277        let a = self.state[operands.src1].get_u64();
2278        let b = self.state[operands.src2].get_u64();
2279        self.state[operands.dst].set_u64(a & b);
2280        ControlFlow::Continue(())
2281    }
2282
2283    fn xband64_s8(&mut self, dst: XReg, src1: XReg, src2: i8) -> ControlFlow<Done> {
2284        self.xband64_s32(dst, src1, src2.into())
2285    }
2286
2287    fn xband64_s32(&mut self, dst: XReg, src1: XReg, src2: i32) -> ControlFlow<Done> {
2288        let a = self.state[src1].get_i64();
2289        self.state[dst].set_i64(a & i64::from(src2));
2290        ControlFlow::Continue(())
2291    }
2292
2293    fn xbor32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2294        let a = self.state[operands.src1].get_u32();
2295        let b = self.state[operands.src2].get_u32();
2296        self.state[operands.dst].set_u32(a | b);
2297        ControlFlow::Continue(())
2298    }
2299
2300    fn xbor32_s8(&mut self, dst: XReg, src1: XReg, src2: i8) -> ControlFlow<Done> {
2301        self.xbor32_s32(dst, src1, src2.into())
2302    }
2303
2304    fn xbor32_s32(&mut self, dst: XReg, src1: XReg, src2: i32) -> ControlFlow<Done> {
2305        let a = self.state[src1].get_i32();
2306        self.state[dst].set_i32(a | src2);
2307        ControlFlow::Continue(())
2308    }
2309
2310    fn xbor64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2311        let a = self.state[operands.src1].get_u64();
2312        let b = self.state[operands.src2].get_u64();
2313        self.state[operands.dst].set_u64(a | b);
2314        ControlFlow::Continue(())
2315    }
2316
2317    fn xbor64_s8(&mut self, dst: XReg, src1: XReg, src2: i8) -> ControlFlow<Done> {
2318        self.xbor64_s32(dst, src1, src2.into())
2319    }
2320
2321    fn xbor64_s32(&mut self, dst: XReg, src1: XReg, src2: i32) -> ControlFlow<Done> {
2322        let a = self.state[src1].get_i64();
2323        self.state[dst].set_i64(a | i64::from(src2));
2324        ControlFlow::Continue(())
2325    }
2326
2327    fn xbxor32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2328        let a = self.state[operands.src1].get_u32();
2329        let b = self.state[operands.src2].get_u32();
2330        self.state[operands.dst].set_u32(a ^ b);
2331        ControlFlow::Continue(())
2332    }
2333
2334    fn xbxor32_s8(&mut self, dst: XReg, src1: XReg, src2: i8) -> ControlFlow<Done> {
2335        self.xbxor32_s32(dst, src1, src2.into())
2336    }
2337
2338    fn xbxor32_s32(&mut self, dst: XReg, src1: XReg, src2: i32) -> ControlFlow<Done> {
2339        let a = self.state[src1].get_i32();
2340        self.state[dst].set_i32(a ^ src2);
2341        ControlFlow::Continue(())
2342    }
2343
2344    fn xbxor64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2345        let a = self.state[operands.src1].get_u64();
2346        let b = self.state[operands.src2].get_u64();
2347        self.state[operands.dst].set_u64(a ^ b);
2348        ControlFlow::Continue(())
2349    }
2350
2351    fn xbxor64_s8(&mut self, dst: XReg, src1: XReg, src2: i8) -> ControlFlow<Done> {
2352        self.xbxor64_s32(dst, src1, src2.into())
2353    }
2354
2355    fn xbxor64_s32(&mut self, dst: XReg, src1: XReg, src2: i32) -> ControlFlow<Done> {
2356        let a = self.state[src1].get_i64();
2357        self.state[dst].set_i64(a ^ i64::from(src2));
2358        ControlFlow::Continue(())
2359    }
2360
2361    fn xbnot32(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2362        let a = self.state[src].get_u32();
2363        self.state[dst].set_u32(!a);
2364        ControlFlow::Continue(())
2365    }
2366
2367    fn xbnot64(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2368        let a = self.state[src].get_u64();
2369        self.state[dst].set_u64(!a);
2370        ControlFlow::Continue(())
2371    }
2372
2373    fn xmin32_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2374        let a = self.state[operands.src1].get_u32();
2375        let b = self.state[operands.src2].get_u32();
2376        self.state[operands.dst].set_u32(a.min(b));
2377        ControlFlow::Continue(())
2378    }
2379
2380    fn xmin32_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2381        let a = self.state[operands.src1].get_i32();
2382        let b = self.state[operands.src2].get_i32();
2383        self.state[operands.dst].set_i32(a.min(b));
2384        ControlFlow::Continue(())
2385    }
2386
2387    fn xmax32_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2388        let a = self.state[operands.src1].get_u32();
2389        let b = self.state[operands.src2].get_u32();
2390        self.state[operands.dst].set_u32(a.max(b));
2391        ControlFlow::Continue(())
2392    }
2393
2394    fn xmax32_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2395        let a = self.state[operands.src1].get_i32();
2396        let b = self.state[operands.src2].get_i32();
2397        self.state[operands.dst].set_i32(a.max(b));
2398        ControlFlow::Continue(())
2399    }
2400
2401    fn xmin64_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2402        let a = self.state[operands.src1].get_u64();
2403        let b = self.state[operands.src2].get_u64();
2404        self.state[operands.dst].set_u64(a.min(b));
2405        ControlFlow::Continue(())
2406    }
2407
2408    fn xmin64_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2409        let a = self.state[operands.src1].get_i64();
2410        let b = self.state[operands.src2].get_i64();
2411        self.state[operands.dst].set_i64(a.min(b));
2412        ControlFlow::Continue(())
2413    }
2414
2415    fn xmax64_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2416        let a = self.state[operands.src1].get_u64();
2417        let b = self.state[operands.src2].get_u64();
2418        self.state[operands.dst].set_u64(a.max(b));
2419        ControlFlow::Continue(())
2420    }
2421
2422    fn xmax64_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2423        let a = self.state[operands.src1].get_i64();
2424        let b = self.state[operands.src2].get_i64();
2425        self.state[operands.dst].set_i64(a.max(b));
2426        ControlFlow::Continue(())
2427    }
2428
2429    fn xctz32(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2430        let a = self.state[src].get_u32();
2431        self.state[dst].set_u32(a.trailing_zeros());
2432        ControlFlow::Continue(())
2433    }
2434
2435    fn xctz64(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2436        let a = self.state[src].get_u64();
2437        self.state[dst].set_u64(a.trailing_zeros().into());
2438        ControlFlow::Continue(())
2439    }
2440
2441    fn xclz32(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2442        let a = self.state[src].get_u32();
2443        self.state[dst].set_u32(a.leading_zeros());
2444        ControlFlow::Continue(())
2445    }
2446
2447    fn xclz64(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2448        let a = self.state[src].get_u64();
2449        self.state[dst].set_u64(a.leading_zeros().into());
2450        ControlFlow::Continue(())
2451    }
2452
2453    fn xpopcnt32(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2454        let a = self.state[src].get_u32();
2455        self.state[dst].set_u32(a.count_ones());
2456        ControlFlow::Continue(())
2457    }
2458
2459    fn xpopcnt64(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2460        let a = self.state[src].get_u64();
2461        self.state[dst].set_u64(a.count_ones().into());
2462        ControlFlow::Continue(())
2463    }
2464
2465    fn xrotl32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2466        let a = self.state[operands.src1].get_u32();
2467        let b = self.state[operands.src2].get_u32();
2468        self.state[operands.dst].set_u32(a.rotate_left(b));
2469        ControlFlow::Continue(())
2470    }
2471
2472    fn xrotl64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2473        let a = self.state[operands.src1].get_u64();
2474        let b = self.state[operands.src2].get_u32();
2475        self.state[operands.dst].set_u64(a.rotate_left(b));
2476        ControlFlow::Continue(())
2477    }
2478
2479    fn xrotr32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2480        let a = self.state[operands.src1].get_u32();
2481        let b = self.state[operands.src2].get_u32();
2482        self.state[operands.dst].set_u32(a.rotate_right(b));
2483        ControlFlow::Continue(())
2484    }
2485
2486    fn xrotr64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2487        let a = self.state[operands.src1].get_u64();
2488        let b = self.state[operands.src2].get_u32();
2489        self.state[operands.dst].set_u64(a.rotate_right(b));
2490        ControlFlow::Continue(())
2491    }
2492
2493    fn xselect32(
2494        &mut self,
2495        dst: XReg,
2496        cond: XReg,
2497        if_nonzero: XReg,
2498        if_zero: XReg,
2499    ) -> ControlFlow<Done> {
2500        let result = if self.state[cond].get_u32() != 0 {
2501            self.state[if_nonzero].get_u32()
2502        } else {
2503            self.state[if_zero].get_u32()
2504        };
2505        self.state[dst].set_u32(result);
2506        ControlFlow::Continue(())
2507    }
2508
2509    fn xselect64(
2510        &mut self,
2511        dst: XReg,
2512        cond: XReg,
2513        if_nonzero: XReg,
2514        if_zero: XReg,
2515    ) -> ControlFlow<Done> {
2516        let result = if self.state[cond].get_u32() != 0 {
2517            self.state[if_nonzero].get_u64()
2518        } else {
2519            self.state[if_zero].get_u64()
2520        };
2521        self.state[dst].set_u64(result);
2522        ControlFlow::Continue(())
2523    }
2524
2525    fn xabs32(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2526        let a = self.state[src].get_i32();
2527        self.state[dst].set_i32(a.wrapping_abs());
2528        ControlFlow::Continue(())
2529    }
2530
2531    fn xabs64(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2532        let a = self.state[src].get_i64();
2533        self.state[dst].set_i64(a.wrapping_abs());
2534        ControlFlow::Continue(())
2535    }
2536
2537    // =========================================================================
2538    // o32 addressing modes
2539
2540    fn xload8_u32_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done> {
2541        let result = unsafe { self.load_ne::<u8, crate::XLoad8U32O32>(addr)? };
2542        self.state[dst].set_u32(result.into());
2543        ControlFlow::Continue(())
2544    }
2545
2546    fn xload8_s32_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done> {
2547        let result = unsafe { self.load_ne::<i8, crate::XLoad8S32O32>(addr)? };
2548        self.state[dst].set_i32(result.into());
2549        ControlFlow::Continue(())
2550    }
2551
2552    fn xload16le_u32_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done> {
2553        let result = unsafe { self.load_ne::<u16, crate::XLoad16LeU32O32>(addr)? };
2554        self.state[dst].set_u32(u16::from_le(result).into());
2555        ControlFlow::Continue(())
2556    }
2557
2558    fn xload16le_s32_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done> {
2559        let result = unsafe { self.load_ne::<i16, crate::XLoad16LeS32O32>(addr)? };
2560        self.state[dst].set_i32(i16::from_le(result).into());
2561        ControlFlow::Continue(())
2562    }
2563
2564    fn xload32le_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done> {
2565        let result = unsafe { self.load_ne::<i32, crate::XLoad32LeO32>(addr)? };
2566        self.state[dst].set_i32(i32::from_le(result));
2567        ControlFlow::Continue(())
2568    }
2569
2570    fn xload64le_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done> {
2571        let result = unsafe { self.load_ne::<i64, crate::XLoad64LeO32>(addr)? };
2572        self.state[dst].set_i64(i64::from_le(result));
2573        ControlFlow::Continue(())
2574    }
2575
2576    fn xstore8_o32(&mut self, addr: AddrO32, val: XReg) -> ControlFlow<Done> {
2577        let val = self.state[val].get_u32() as u8;
2578        unsafe {
2579            self.store_ne::<u8, crate::XStore8O32>(addr, val)?;
2580        }
2581        ControlFlow::Continue(())
2582    }
2583
2584    fn xstore16le_o32(&mut self, addr: AddrO32, val: XReg) -> ControlFlow<Done> {
2585        let val = self.state[val].get_u32() as u16;
2586        unsafe {
2587            self.store_ne::<u16, crate::XStore16LeO32>(addr, val.to_le())?;
2588        }
2589        ControlFlow::Continue(())
2590    }
2591
2592    fn xstore32le_o32(&mut self, addr: AddrO32, val: XReg) -> ControlFlow<Done> {
2593        let val = self.state[val].get_u32();
2594        unsafe {
2595            self.store_ne::<u32, crate::XStore32LeO32>(addr, val.to_le())?;
2596        }
2597        ControlFlow::Continue(())
2598    }
2599
2600    fn xstore64le_o32(&mut self, addr: AddrO32, val: XReg) -> ControlFlow<Done> {
2601        let val = self.state[val].get_u64();
2602        unsafe {
2603            self.store_ne::<u64, crate::XStore64LeO32>(addr, val.to_le())?;
2604        }
2605        ControlFlow::Continue(())
2606    }
2607
2608    // =========================================================================
2609    // g32 addressing modes
2610
2611    fn xload8_u32_g32(&mut self, dst: XReg, addr: AddrG32) -> ControlFlow<Done> {
2612        let result = unsafe { self.load_ne::<u8, crate::XLoad8U32G32>(addr)? };
2613        self.state[dst].set_u32(result.into());
2614        ControlFlow::Continue(())
2615    }
2616
2617    fn xload8_s32_g32(&mut self, dst: XReg, addr: AddrG32) -> ControlFlow<Done> {
2618        let result = unsafe { self.load_ne::<i8, crate::XLoad8S32G32>(addr)? };
2619        self.state[dst].set_i32(result.into());
2620        ControlFlow::Continue(())
2621    }
2622
2623    fn xload16le_u32_g32(&mut self, dst: XReg, addr: AddrG32) -> ControlFlow<Done> {
2624        let result = unsafe { self.load_ne::<u16, crate::XLoad16LeU32G32>(addr)? };
2625        self.state[dst].set_u32(u16::from_le(result).into());
2626        ControlFlow::Continue(())
2627    }
2628
2629    fn xload16le_s32_g32(&mut self, dst: XReg, addr: AddrG32) -> ControlFlow<Done> {
2630        let result = unsafe { self.load_ne::<i16, crate::XLoad16LeS32G32>(addr)? };
2631        self.state[dst].set_i32(i16::from_le(result).into());
2632        ControlFlow::Continue(())
2633    }
2634
2635    fn xload32le_g32(&mut self, dst: XReg, addr: AddrG32) -> ControlFlow<Done> {
2636        let result = unsafe { self.load_ne::<i32, crate::XLoad32LeG32>(addr)? };
2637        self.state[dst].set_i32(i32::from_le(result));
2638        ControlFlow::Continue(())
2639    }
2640
2641    fn xload64le_g32(&mut self, dst: XReg, addr: AddrG32) -> ControlFlow<Done> {
2642        let result = unsafe { self.load_ne::<i64, crate::XLoad64LeG32>(addr)? };
2643        self.state[dst].set_i64(i64::from_le(result));
2644        ControlFlow::Continue(())
2645    }
2646
2647    fn xstore8_g32(&mut self, addr: AddrG32, val: XReg) -> ControlFlow<Done> {
2648        let val = self.state[val].get_u32() as u8;
2649        unsafe {
2650            self.store_ne::<u8, crate::XStore8G32>(addr, val)?;
2651        }
2652        ControlFlow::Continue(())
2653    }
2654
2655    fn xstore16le_g32(&mut self, addr: AddrG32, val: XReg) -> ControlFlow<Done> {
2656        let val = self.state[val].get_u32() as u16;
2657        unsafe {
2658            self.store_ne::<u16, crate::XStore16LeG32>(addr, val.to_le())?;
2659        }
2660        ControlFlow::Continue(())
2661    }
2662
2663    fn xstore32le_g32(&mut self, addr: AddrG32, val: XReg) -> ControlFlow<Done> {
2664        let val = self.state[val].get_u32();
2665        unsafe {
2666            self.store_ne::<u32, crate::XStore32LeG32>(addr, val.to_le())?;
2667        }
2668        ControlFlow::Continue(())
2669    }
2670
2671    fn xstore64le_g32(&mut self, addr: AddrG32, val: XReg) -> ControlFlow<Done> {
2672        let val = self.state[val].get_u64();
2673        unsafe {
2674            self.store_ne::<u64, crate::XStore64LeG32>(addr, val.to_le())?;
2675        }
2676        ControlFlow::Continue(())
2677    }
2678
2679    // =========================================================================
2680    // z addressing modes
2681
2682    fn xload8_u32_z(&mut self, dst: XReg, addr: AddrZ) -> ControlFlow<Done> {
2683        let result = unsafe { self.load_ne::<u8, crate::XLoad8U32Z>(addr)? };
2684        self.state[dst].set_u32(result.into());
2685        ControlFlow::Continue(())
2686    }
2687
2688    fn xload8_s32_z(&mut self, dst: XReg, addr: AddrZ) -> ControlFlow<Done> {
2689        let result = unsafe { self.load_ne::<i8, crate::XLoad8S32Z>(addr)? };
2690        self.state[dst].set_i32(result.into());
2691        ControlFlow::Continue(())
2692    }
2693
2694    fn xload16le_u32_z(&mut self, dst: XReg, addr: AddrZ) -> ControlFlow<Done> {
2695        let result = unsafe { self.load_ne::<u16, crate::XLoad16LeU32Z>(addr)? };
2696        self.state[dst].set_u32(u16::from_le(result).into());
2697        ControlFlow::Continue(())
2698    }
2699
2700    fn xload16le_s32_z(&mut self, dst: XReg, addr: AddrZ) -> ControlFlow<Done> {
2701        let result = unsafe { self.load_ne::<i16, crate::XLoad16LeS32Z>(addr)? };
2702        self.state[dst].set_i32(i16::from_le(result).into());
2703        ControlFlow::Continue(())
2704    }
2705
2706    fn xload32le_z(&mut self, dst: XReg, addr: AddrZ) -> ControlFlow<Done> {
2707        let result = unsafe { self.load_ne::<i32, crate::XLoad32LeZ>(addr)? };
2708        self.state[dst].set_i32(i32::from_le(result));
2709        ControlFlow::Continue(())
2710    }
2711
2712    fn xload64le_z(&mut self, dst: XReg, addr: AddrZ) -> ControlFlow<Done> {
2713        let result = unsafe { self.load_ne::<i64, crate::XLoad64LeZ>(addr)? };
2714        self.state[dst].set_i64(i64::from_le(result));
2715        ControlFlow::Continue(())
2716    }
2717
2718    fn xstore8_z(&mut self, addr: AddrZ, val: XReg) -> ControlFlow<Done> {
2719        let val = self.state[val].get_u32() as u8;
2720        unsafe {
2721            self.store_ne::<u8, crate::XStore8Z>(addr, val)?;
2722        }
2723        ControlFlow::Continue(())
2724    }
2725
2726    fn xstore16le_z(&mut self, addr: AddrZ, val: XReg) -> ControlFlow<Done> {
2727        let val = self.state[val].get_u32() as u16;
2728        unsafe {
2729            self.store_ne::<u16, crate::XStore16LeZ>(addr, val.to_le())?;
2730        }
2731        ControlFlow::Continue(())
2732    }
2733
2734    fn xstore32le_z(&mut self, addr: AddrZ, val: XReg) -> ControlFlow<Done> {
2735        let val = self.state[val].get_u32();
2736        unsafe {
2737            self.store_ne::<u32, crate::XStore32LeZ>(addr, val.to_le())?;
2738        }
2739        ControlFlow::Continue(())
2740    }
2741
2742    fn xstore64le_z(&mut self, addr: AddrZ, val: XReg) -> ControlFlow<Done> {
2743        let val = self.state[val].get_u64();
2744        unsafe {
2745            self.store_ne::<u64, crate::XStore64LeZ>(addr, val.to_le())?;
2746        }
2747        ControlFlow::Continue(())
2748    }
2749
2750    // =========================================================================
2751    // g32bne addressing modes
2752
2753    fn xload8_u32_g32bne(&mut self, dst: XReg, addr: AddrG32Bne) -> ControlFlow<Done> {
2754        let result = unsafe { self.load_ne::<u8, crate::XLoad8U32G32Bne>(addr)? };
2755        self.state[dst].set_u32(result.into());
2756        ControlFlow::Continue(())
2757    }
2758
2759    fn xload8_s32_g32bne(&mut self, dst: XReg, addr: AddrG32Bne) -> ControlFlow<Done> {
2760        let result = unsafe { self.load_ne::<i8, crate::XLoad8S32G32Bne>(addr)? };
2761        self.state[dst].set_i32(result.into());
2762        ControlFlow::Continue(())
2763    }
2764
2765    fn xload16le_u32_g32bne(&mut self, dst: XReg, addr: AddrG32Bne) -> ControlFlow<Done> {
2766        let result = unsafe { self.load_ne::<u16, crate::XLoad16LeU32G32Bne>(addr)? };
2767        self.state[dst].set_u32(u16::from_le(result).into());
2768        ControlFlow::Continue(())
2769    }
2770
2771    fn xload16le_s32_g32bne(&mut self, dst: XReg, addr: AddrG32Bne) -> ControlFlow<Done> {
2772        let result = unsafe { self.load_ne::<i16, crate::XLoad16LeS32G32Bne>(addr)? };
2773        self.state[dst].set_i32(i16::from_le(result).into());
2774        ControlFlow::Continue(())
2775    }
2776
2777    fn xload32le_g32bne(&mut self, dst: XReg, addr: AddrG32Bne) -> ControlFlow<Done> {
2778        let result = unsafe { self.load_ne::<i32, crate::XLoad32LeG32Bne>(addr)? };
2779        self.state[dst].set_i32(i32::from_le(result));
2780        ControlFlow::Continue(())
2781    }
2782
2783    fn xload64le_g32bne(&mut self, dst: XReg, addr: AddrG32Bne) -> ControlFlow<Done> {
2784        let result = unsafe { self.load_ne::<i64, crate::XLoad64LeG32Bne>(addr)? };
2785        self.state[dst].set_i64(i64::from_le(result));
2786        ControlFlow::Continue(())
2787    }
2788
2789    fn xstore8_g32bne(&mut self, addr: AddrG32Bne, val: XReg) -> ControlFlow<Done> {
2790        let val = self.state[val].get_u32() as u8;
2791        unsafe {
2792            self.store_ne::<u8, crate::XStore8G32Bne>(addr, val)?;
2793        }
2794        ControlFlow::Continue(())
2795    }
2796
2797    fn xstore16le_g32bne(&mut self, addr: AddrG32Bne, val: XReg) -> ControlFlow<Done> {
2798        let val = self.state[val].get_u32() as u16;
2799        unsafe {
2800            self.store_ne::<u16, crate::XStore16LeG32Bne>(addr, val.to_le())?;
2801        }
2802        ControlFlow::Continue(())
2803    }
2804
2805    fn xstore32le_g32bne(&mut self, addr: AddrG32Bne, val: XReg) -> ControlFlow<Done> {
2806        let val = self.state[val].get_u32();
2807        unsafe {
2808            self.store_ne::<u32, crate::XStore32LeG32Bne>(addr, val.to_le())?;
2809        }
2810        ControlFlow::Continue(())
2811    }
2812
2813    fn xstore64le_g32bne(&mut self, addr: AddrG32Bne, val: XReg) -> ControlFlow<Done> {
2814        let val = self.state[val].get_u64();
2815        unsafe {
2816            self.store_ne::<u64, crate::XStore64LeG32Bne>(addr, val.to_le())?;
2817        }
2818        ControlFlow::Continue(())
2819    }
2820}
2821
2822impl ExtendedOpVisitor for Interpreter<'_> {
2823    fn nop(&mut self) -> ControlFlow<Done> {
2824        ControlFlow::Continue(())
2825    }
2826
2827    fn trap(&mut self) -> ControlFlow<Done> {
2828        self.done_trap::<crate::Trap>()
2829    }
2830
2831    fn call_indirect_host(&mut self, id: u8) -> ControlFlow<Done> {
2832        self.done_call_indirect_host(id)
2833    }
2834
2835    fn bswap32(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2836        let src = self.state[src].get_u32();
2837        self.state[dst].set_u32(src.swap_bytes());
2838        ControlFlow::Continue(())
2839    }
2840
2841    fn bswap64(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2842        let src = self.state[src].get_u64();
2843        self.state[dst].set_u64(src.swap_bytes());
2844        ControlFlow::Continue(())
2845    }
2846
2847    fn xbmask32(&mut self, dst: XReg, src: XReg) -> Self::Return {
2848        let a = self.state[src].get_u32();
2849        if a == 0 {
2850            self.state[dst].set_u32(0);
2851        } else {
2852            self.state[dst].set_i32(-1);
2853        }
2854        ControlFlow::Continue(())
2855    }
2856
2857    fn xbmask64(&mut self, dst: XReg, src: XReg) -> Self::Return {
2858        let a = self.state[src].get_u64();
2859        if a == 0 {
2860            self.state[dst].set_u64(0);
2861        } else {
2862            self.state[dst].set_i64(-1);
2863        }
2864        ControlFlow::Continue(())
2865    }
2866
2867    fn xadd32_uoverflow_trap(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2868        let a = self.state[operands.src1].get_u32();
2869        let b = self.state[operands.src2].get_u32();
2870        match a.checked_add(b) {
2871            Some(c) => {
2872                self.state[operands.dst].set_u32(c);
2873                ControlFlow::Continue(())
2874            }
2875            None => self.done_trap::<crate::Xadd32UoverflowTrap>(),
2876        }
2877    }
2878
2879    fn xadd64_uoverflow_trap(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2880        let a = self.state[operands.src1].get_u64();
2881        let b = self.state[operands.src2].get_u64();
2882        match a.checked_add(b) {
2883            Some(c) => {
2884                self.state[operands.dst].set_u64(c);
2885                ControlFlow::Continue(())
2886            }
2887            None => self.done_trap::<crate::Xadd64UoverflowTrap>(),
2888        }
2889    }
2890
2891    fn xmulhi64_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2892        let a = self.state[operands.src1].get_i64();
2893        let b = self.state[operands.src2].get_i64();
2894        let result = ((i128::from(a) * i128::from(b)) >> 64) as i64;
2895        self.state[operands.dst].set_i64(result);
2896        ControlFlow::Continue(())
2897    }
2898
2899    fn xmulhi64_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2900        let a = self.state[operands.src1].get_u64();
2901        let b = self.state[operands.src2].get_u64();
2902        let result = ((u128::from(a) * u128::from(b)) >> 64) as u64;
2903        self.state[operands.dst].set_u64(result);
2904        ControlFlow::Continue(())
2905    }
2906
2907    // =========================================================================
2908    // o32 addressing modes for big-endian X-registers
2909
2910    fn xload16be_u32_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done> {
2911        let result = unsafe { self.load_ne::<u16, crate::XLoad16BeU32O32>(addr)? };
2912        self.state[dst].set_u32(u16::from_be(result).into());
2913        ControlFlow::Continue(())
2914    }
2915
2916    fn xload16be_s32_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done> {
2917        let result = unsafe { self.load_ne::<i16, crate::XLoad16BeS32O32>(addr)? };
2918        self.state[dst].set_i32(i16::from_be(result).into());
2919        ControlFlow::Continue(())
2920    }
2921
2922    fn xload32be_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done> {
2923        let result = unsafe { self.load_ne::<i32, crate::XLoad32BeO32>(addr)? };
2924        self.state[dst].set_i32(i32::from_be(result));
2925        ControlFlow::Continue(())
2926    }
2927
2928    fn xload64be_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done> {
2929        let result = unsafe { self.load_ne::<i64, crate::XLoad64BeO32>(addr)? };
2930        self.state[dst].set_i64(i64::from_be(result));
2931        ControlFlow::Continue(())
2932    }
2933
2934    fn xstore16be_o32(&mut self, addr: AddrO32, val: XReg) -> ControlFlow<Done> {
2935        let val = self.state[val].get_u32() as u16;
2936        unsafe {
2937            self.store_ne::<u16, crate::XStore16BeO32>(addr, val.to_be())?;
2938        }
2939        ControlFlow::Continue(())
2940    }
2941
2942    fn xstore32be_o32(&mut self, addr: AddrO32, val: XReg) -> ControlFlow<Done> {
2943        let val = self.state[val].get_u32();
2944        unsafe {
2945            self.store_ne::<u32, crate::XStore32BeO32>(addr, val.to_be())?;
2946        }
2947        ControlFlow::Continue(())
2948    }
2949
2950    fn xstore64be_o32(&mut self, addr: AddrO32, val: XReg) -> ControlFlow<Done> {
2951        let val = self.state[val].get_u64();
2952        unsafe {
2953            self.store_ne::<u64, crate::XStore64BeO32>(addr, val.to_be())?;
2954        }
2955        ControlFlow::Continue(())
2956    }
2957
2958    // =========================================================================
2959    // o32 addressing modes for little-endian F-registers
2960
2961    fn fload32le_o32(&mut self, dst: FReg, addr: AddrO32) -> ControlFlow<Done> {
2962        let val = unsafe { self.load_ne::<u32, crate::Fload32LeO32>(addr)? };
2963        self.state[dst].set_f32(f32::from_bits(u32::from_le(val)));
2964        ControlFlow::Continue(())
2965    }
2966
2967    fn fload64le_o32(&mut self, dst: FReg, addr: AddrO32) -> ControlFlow<Done> {
2968        let val = unsafe { self.load_ne::<u64, crate::Fload64LeO32>(addr)? };
2969        self.state[dst].set_f64(f64::from_bits(u64::from_le(val)));
2970        ControlFlow::Continue(())
2971    }
2972
2973    fn fstore32le_o32(&mut self, addr: AddrO32, src: FReg) -> ControlFlow<Done> {
2974        let val = self.state[src].get_f32();
2975        unsafe {
2976            self.store_ne::<u32, crate::Fstore32LeO32>(addr, val.to_bits().to_le())?;
2977        }
2978        ControlFlow::Continue(())
2979    }
2980
2981    fn fstore64le_o32(&mut self, addr: AddrO32, src: FReg) -> ControlFlow<Done> {
2982        let val = self.state[src].get_f64();
2983        unsafe {
2984            self.store_ne::<u64, crate::Fstore64LeO32>(addr, val.to_bits().to_le())?;
2985        }
2986        ControlFlow::Continue(())
2987    }
2988
2989    // =========================================================================
2990    // o32 addressing modes for big-endian F-registers
2991
2992    fn fload32be_o32(&mut self, dst: FReg, addr: AddrO32) -> ControlFlow<Done> {
2993        let val = unsafe { self.load_ne::<u32, crate::Fload32BeO32>(addr)? };
2994        self.state[dst].set_f32(f32::from_bits(u32::from_be(val)));
2995        ControlFlow::Continue(())
2996    }
2997
2998    fn fload64be_o32(&mut self, dst: FReg, addr: AddrO32) -> ControlFlow<Done> {
2999        let val = unsafe { self.load_ne::<u64, crate::Fload64BeO32>(addr)? };
3000        self.state[dst].set_f64(f64::from_bits(u64::from_be(val)));
3001        ControlFlow::Continue(())
3002    }
3003
3004    fn fstore32be_o32(&mut self, addr: AddrO32, src: FReg) -> ControlFlow<Done> {
3005        let val = self.state[src].get_f32();
3006        unsafe {
3007            self.store_ne::<u32, crate::Fstore32BeO32>(addr, val.to_bits().to_be())?;
3008        }
3009        ControlFlow::Continue(())
3010    }
3011
3012    fn fstore64be_o32(&mut self, addr: AddrO32, src: FReg) -> ControlFlow<Done> {
3013        let val = self.state[src].get_f64();
3014        unsafe {
3015            self.store_ne::<u64, crate::Fstore64BeO32>(addr, val.to_bits().to_be())?;
3016        }
3017        ControlFlow::Continue(())
3018    }
3019
3020    // =========================================================================
3021    // z addressing modes for little-endian F-registers
3022
3023    fn fload32le_z(&mut self, dst: FReg, addr: AddrZ) -> ControlFlow<Done> {
3024        let val = unsafe { self.load_ne::<u32, crate::Fload32LeZ>(addr)? };
3025        self.state[dst].set_f32(f32::from_bits(u32::from_le(val)));
3026        ControlFlow::Continue(())
3027    }
3028
3029    fn fload64le_z(&mut self, dst: FReg, addr: AddrZ) -> ControlFlow<Done> {
3030        let val = unsafe { self.load_ne::<u64, crate::Fload64LeZ>(addr)? };
3031        self.state[dst].set_f64(f64::from_bits(u64::from_le(val)));
3032        ControlFlow::Continue(())
3033    }
3034
3035    fn fstore32le_z(&mut self, addr: AddrZ, src: FReg) -> ControlFlow<Done> {
3036        let val = self.state[src].get_f32();
3037        unsafe {
3038            self.store_ne::<u32, crate::Fstore32LeZ>(addr, val.to_bits().to_le())?;
3039        }
3040        ControlFlow::Continue(())
3041    }
3042
3043    fn fstore64le_z(&mut self, addr: AddrZ, src: FReg) -> ControlFlow<Done> {
3044        let val = self.state[src].get_f64();
3045        unsafe {
3046            self.store_ne::<u64, crate::Fstore64LeZ>(addr, val.to_bits().to_le())?;
3047        }
3048        ControlFlow::Continue(())
3049    }
3050
3051    // =========================================================================
3052    // g32 addressing modes for little-endian F-registers
3053
3054    fn fload32le_g32(&mut self, dst: FReg, addr: AddrG32) -> ControlFlow<Done> {
3055        let val = unsafe { self.load_ne::<u32, crate::Fload32LeG32>(addr)? };
3056        self.state[dst].set_f32(f32::from_bits(u32::from_le(val)));
3057        ControlFlow::Continue(())
3058    }
3059
3060    fn fload64le_g32(&mut self, dst: FReg, addr: AddrG32) -> ControlFlow<Done> {
3061        let val = unsafe { self.load_ne::<u64, crate::Fload64LeG32>(addr)? };
3062        self.state[dst].set_f64(f64::from_bits(u64::from_le(val)));
3063        ControlFlow::Continue(())
3064    }
3065
3066    fn fstore32le_g32(&mut self, addr: AddrG32, src: FReg) -> ControlFlow<Done> {
3067        let val = self.state[src].get_f32();
3068        unsafe {
3069            self.store_ne::<u32, crate::Fstore32LeG32>(addr, val.to_bits().to_le())?;
3070        }
3071        ControlFlow::Continue(())
3072    }
3073
3074    fn fstore64le_g32(&mut self, addr: AddrG32, src: FReg) -> ControlFlow<Done> {
3075        let val = self.state[src].get_f64();
3076        unsafe {
3077            self.store_ne::<u64, crate::Fstore64LeG32>(addr, val.to_bits().to_le())?;
3078        }
3079        ControlFlow::Continue(())
3080    }
3081
3082    // =========================================================================
3083    // o32 addressing modes for little-endian V-registers
3084
3085    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3086    fn vload128le_o32(&mut self, dst: VReg, addr: AddrO32) -> ControlFlow<Done> {
3087        let val = unsafe { self.load_ne::<u128, crate::VLoad128O32>(addr)? };
3088        self.state[dst].set_u128(u128::from_le(val));
3089        ControlFlow::Continue(())
3090    }
3091
3092    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3093    fn vstore128le_o32(&mut self, addr: AddrO32, src: VReg) -> ControlFlow<Done> {
3094        let val = self.state[src].get_u128();
3095        unsafe {
3096            self.store_ne::<u128, crate::Vstore128LeO32>(addr, val.to_le())?;
3097        }
3098        ControlFlow::Continue(())
3099    }
3100
3101    // =========================================================================
3102    // z addressing modes for little-endian V-registers
3103
3104    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3105    fn vload128le_z(&mut self, dst: VReg, addr: AddrZ) -> ControlFlow<Done> {
3106        let val = unsafe { self.load_ne::<u128, crate::VLoad128Z>(addr)? };
3107        self.state[dst].set_u128(u128::from_le(val));
3108        ControlFlow::Continue(())
3109    }
3110
3111    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3112    fn vstore128le_z(&mut self, addr: AddrZ, src: VReg) -> ControlFlow<Done> {
3113        let val = self.state[src].get_u128();
3114        unsafe {
3115            self.store_ne::<u128, crate::Vstore128LeZ>(addr, val.to_le())?;
3116        }
3117        ControlFlow::Continue(())
3118    }
3119
3120    // =========================================================================
3121    // g32 addressing modes for little-endian V-registers
3122
3123    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3124    fn vload128le_g32(&mut self, dst: VReg, addr: AddrG32) -> ControlFlow<Done> {
3125        let val = unsafe { self.load_ne::<u128, crate::VLoad128G32>(addr)? };
3126        self.state[dst].set_u128(u128::from_le(val));
3127        ControlFlow::Continue(())
3128    }
3129
3130    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3131    fn vstore128le_g32(&mut self, addr: AddrG32, src: VReg) -> ControlFlow<Done> {
3132        let val = self.state[src].get_u128();
3133        unsafe {
3134            self.store_ne::<u128, crate::Vstore128LeG32>(addr, val.to_le())?;
3135        }
3136        ControlFlow::Continue(())
3137    }
3138
3139    fn xmov_fp(&mut self, dst: XReg) -> ControlFlow<Done> {
3140        let fp = self.state.fp;
3141        self.state[dst].set_ptr(fp);
3142        ControlFlow::Continue(())
3143    }
3144
3145    fn xmov_lr(&mut self, dst: XReg) -> ControlFlow<Done> {
3146        let lr = self.state.lr;
3147        self.state[dst].set_ptr(lr);
3148        ControlFlow::Continue(())
3149    }
3150
3151    fn fmov(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3152        let val = self.state[src];
3153        self.state[dst] = val;
3154        ControlFlow::Continue(())
3155    }
3156
3157    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3158    fn vmov(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3159        let val = self.state[src];
3160        self.state[dst] = val;
3161        ControlFlow::Continue(())
3162    }
3163
3164    fn fconst32(&mut self, dst: FReg, bits: u32) -> ControlFlow<Done> {
3165        self.state[dst].set_f32(f32::from_bits(bits));
3166        ControlFlow::Continue(())
3167    }
3168
3169    fn fconst64(&mut self, dst: FReg, bits: u64) -> ControlFlow<Done> {
3170        self.state[dst].set_f64(f64::from_bits(bits));
3171        ControlFlow::Continue(())
3172    }
3173
3174    fn bitcast_int_from_float_32(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3175        let val = self.state[src].get_f32();
3176        self.state[dst].set_u32(val.to_bits());
3177        ControlFlow::Continue(())
3178    }
3179
3180    fn bitcast_int_from_float_64(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3181        let val = self.state[src].get_f64();
3182        self.state[dst].set_u64(val.to_bits());
3183        ControlFlow::Continue(())
3184    }
3185
3186    fn bitcast_float_from_int_32(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done> {
3187        let val = self.state[src].get_u32();
3188        self.state[dst].set_f32(f32::from_bits(val));
3189        ControlFlow::Continue(())
3190    }
3191
3192    fn bitcast_float_from_int_64(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done> {
3193        let val = self.state[src].get_u64();
3194        self.state[dst].set_f64(f64::from_bits(val));
3195        ControlFlow::Continue(())
3196    }
3197
3198    fn feq32(&mut self, dst: XReg, src1: FReg, src2: FReg) -> ControlFlow<Done> {
3199        let a = self.state[src1].get_f32();
3200        let b = self.state[src2].get_f32();
3201        self.state[dst].set_u32(u32::from(a == b));
3202        ControlFlow::Continue(())
3203    }
3204
3205    fn fneq32(&mut self, dst: XReg, src1: FReg, src2: FReg) -> ControlFlow<Done> {
3206        let a = self.state[src1].get_f32();
3207        let b = self.state[src2].get_f32();
3208        self.state[dst].set_u32(u32::from(a != b));
3209        ControlFlow::Continue(())
3210    }
3211
3212    fn flt32(&mut self, dst: XReg, src1: FReg, src2: FReg) -> ControlFlow<Done> {
3213        let a = self.state[src1].get_f32();
3214        let b = self.state[src2].get_f32();
3215        self.state[dst].set_u32(u32::from(a < b));
3216        ControlFlow::Continue(())
3217    }
3218
3219    fn flteq32(&mut self, dst: XReg, src1: FReg, src2: FReg) -> ControlFlow<Done> {
3220        let a = self.state[src1].get_f32();
3221        let b = self.state[src2].get_f32();
3222        self.state[dst].set_u32(u32::from(a <= b));
3223        ControlFlow::Continue(())
3224    }
3225
3226    fn feq64(&mut self, dst: XReg, src1: FReg, src2: FReg) -> ControlFlow<Done> {
3227        let a = self.state[src1].get_f64();
3228        let b = self.state[src2].get_f64();
3229        self.state[dst].set_u32(u32::from(a == b));
3230        ControlFlow::Continue(())
3231    }
3232
3233    fn fneq64(&mut self, dst: XReg, src1: FReg, src2: FReg) -> ControlFlow<Done> {
3234        let a = self.state[src1].get_f64();
3235        let b = self.state[src2].get_f64();
3236        self.state[dst].set_u32(u32::from(a != b));
3237        ControlFlow::Continue(())
3238    }
3239
3240    fn flt64(&mut self, dst: XReg, src1: FReg, src2: FReg) -> ControlFlow<Done> {
3241        let a = self.state[src1].get_f64();
3242        let b = self.state[src2].get_f64();
3243        self.state[dst].set_u32(u32::from(a < b));
3244        ControlFlow::Continue(())
3245    }
3246
3247    fn flteq64(&mut self, dst: XReg, src1: FReg, src2: FReg) -> ControlFlow<Done> {
3248        let a = self.state[src1].get_f64();
3249        let b = self.state[src2].get_f64();
3250        self.state[dst].set_u32(u32::from(a <= b));
3251        ControlFlow::Continue(())
3252    }
3253
3254    fn fselect32(
3255        &mut self,
3256        dst: FReg,
3257        cond: XReg,
3258        if_nonzero: FReg,
3259        if_zero: FReg,
3260    ) -> ControlFlow<Done> {
3261        let result = if self.state[cond].get_u32() != 0 {
3262            self.state[if_nonzero].get_f32()
3263        } else {
3264            self.state[if_zero].get_f32()
3265        };
3266        self.state[dst].set_f32(result);
3267        ControlFlow::Continue(())
3268    }
3269
3270    fn fselect64(
3271        &mut self,
3272        dst: FReg,
3273        cond: XReg,
3274        if_nonzero: FReg,
3275        if_zero: FReg,
3276    ) -> ControlFlow<Done> {
3277        let result = if self.state[cond].get_u32() != 0 {
3278            self.state[if_nonzero].get_f64()
3279        } else {
3280            self.state[if_zero].get_f64()
3281        };
3282        self.state[dst].set_f64(result);
3283        ControlFlow::Continue(())
3284    }
3285
3286    fn f32_from_x32_s(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done> {
3287        let a = self.state[src].get_i32();
3288        self.state[dst].set_f32(a as f32);
3289        ControlFlow::Continue(())
3290    }
3291
3292    fn f32_from_x32_u(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done> {
3293        let a = self.state[src].get_u32();
3294        self.state[dst].set_f32(a as f32);
3295        ControlFlow::Continue(())
3296    }
3297
3298    fn f32_from_x64_s(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done> {
3299        let a = self.state[src].get_i64();
3300        self.state[dst].set_f32(a as f32);
3301        ControlFlow::Continue(())
3302    }
3303
3304    fn f32_from_x64_u(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done> {
3305        let a = self.state[src].get_u64();
3306        self.state[dst].set_f32(a as f32);
3307        ControlFlow::Continue(())
3308    }
3309
3310    fn f64_from_x32_s(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done> {
3311        let a = self.state[src].get_i32();
3312        self.state[dst].set_f64(a as f64);
3313        ControlFlow::Continue(())
3314    }
3315
3316    fn f64_from_x32_u(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done> {
3317        let a = self.state[src].get_u32();
3318        self.state[dst].set_f64(a as f64);
3319        ControlFlow::Continue(())
3320    }
3321
3322    fn f64_from_x64_s(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done> {
3323        let a = self.state[src].get_i64();
3324        self.state[dst].set_f64(a as f64);
3325        ControlFlow::Continue(())
3326    }
3327
3328    fn f64_from_x64_u(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done> {
3329        let a = self.state[src].get_u64();
3330        self.state[dst].set_f64(a as f64);
3331        ControlFlow::Continue(())
3332    }
3333
3334    fn x32_from_f32_s(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3335        let a = self.state[src].get_f32();
3336        self.check_xnn_from_f32::<crate::X32FromF32S>(a, f32_cvt_to_int_bounds(true, 32))?;
3337        self.state[dst].set_i32(a as i32);
3338        ControlFlow::Continue(())
3339    }
3340
3341    fn x32_from_f32_u(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3342        let a = self.state[src].get_f32();
3343        self.check_xnn_from_f32::<crate::X32FromF32U>(a, f32_cvt_to_int_bounds(false, 32))?;
3344        self.state[dst].set_u32(a as u32);
3345        ControlFlow::Continue(())
3346    }
3347
3348    fn x64_from_f32_s(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3349        let a = self.state[src].get_f32();
3350        self.check_xnn_from_f32::<crate::X64FromF32S>(a, f32_cvt_to_int_bounds(true, 64))?;
3351        self.state[dst].set_i64(a as i64);
3352        ControlFlow::Continue(())
3353    }
3354
3355    fn x64_from_f32_u(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3356        let a = self.state[src].get_f32();
3357        self.check_xnn_from_f32::<crate::X64FromF32U>(a, f32_cvt_to_int_bounds(false, 64))?;
3358        self.state[dst].set_u64(a as u64);
3359        ControlFlow::Continue(())
3360    }
3361
3362    fn x32_from_f64_s(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3363        let a = self.state[src].get_f64();
3364        self.check_xnn_from_f64::<crate::X32FromF64S>(a, f64_cvt_to_int_bounds(true, 32))?;
3365        self.state[dst].set_i32(a as i32);
3366        ControlFlow::Continue(())
3367    }
3368
3369    fn x32_from_f64_u(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3370        let a = self.state[src].get_f64();
3371        self.check_xnn_from_f64::<crate::X32FromF64U>(a, f64_cvt_to_int_bounds(false, 32))?;
3372        self.state[dst].set_u32(a as u32);
3373        ControlFlow::Continue(())
3374    }
3375
3376    fn x64_from_f64_s(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3377        let a = self.state[src].get_f64();
3378        self.check_xnn_from_f64::<crate::X64FromF64S>(a, f64_cvt_to_int_bounds(true, 64))?;
3379        self.state[dst].set_i64(a as i64);
3380        ControlFlow::Continue(())
3381    }
3382
3383    fn x64_from_f64_u(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3384        let a = self.state[src].get_f64();
3385        self.check_xnn_from_f64::<crate::X64FromF64U>(a, f64_cvt_to_int_bounds(false, 64))?;
3386        self.state[dst].set_u64(a as u64);
3387        ControlFlow::Continue(())
3388    }
3389
3390    fn x32_from_f32_s_sat(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3391        let a = self.state[src].get_f32();
3392        self.state[dst].set_i32(a as i32);
3393        ControlFlow::Continue(())
3394    }
3395
3396    fn x32_from_f32_u_sat(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3397        let a = self.state[src].get_f32();
3398        self.state[dst].set_u32(a as u32);
3399        ControlFlow::Continue(())
3400    }
3401
3402    fn x64_from_f32_s_sat(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3403        let a = self.state[src].get_f32();
3404        self.state[dst].set_i64(a as i64);
3405        ControlFlow::Continue(())
3406    }
3407
3408    fn x64_from_f32_u_sat(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3409        let a = self.state[src].get_f32();
3410        self.state[dst].set_u64(a as u64);
3411        ControlFlow::Continue(())
3412    }
3413
3414    fn x32_from_f64_s_sat(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3415        let a = self.state[src].get_f64();
3416        self.state[dst].set_i32(a as i32);
3417        ControlFlow::Continue(())
3418    }
3419
3420    fn x32_from_f64_u_sat(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3421        let a = self.state[src].get_f64();
3422        self.state[dst].set_u32(a as u32);
3423        ControlFlow::Continue(())
3424    }
3425
3426    fn x64_from_f64_s_sat(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3427        let a = self.state[src].get_f64();
3428        self.state[dst].set_i64(a as i64);
3429        ControlFlow::Continue(())
3430    }
3431
3432    fn x64_from_f64_u_sat(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3433        let a = self.state[src].get_f64();
3434        self.state[dst].set_u64(a as u64);
3435        ControlFlow::Continue(())
3436    }
3437
3438    fn f32_from_f64(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3439        let a = self.state[src].get_f64();
3440        self.state[dst].set_f32(a as f32);
3441        ControlFlow::Continue(())
3442    }
3443
3444    fn f64_from_f32(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3445        let a = self.state[src].get_f32();
3446        self.state[dst].set_f64(a.into());
3447        ControlFlow::Continue(())
3448    }
3449
3450    fn fcopysign32(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3451        let a = self.state[operands.src1].get_f32();
3452        let b = self.state[operands.src2].get_f32();
3453        self.state[operands.dst].set_f32(a.wasm_copysign(b));
3454        ControlFlow::Continue(())
3455    }
3456
3457    fn fcopysign64(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3458        let a = self.state[operands.src1].get_f64();
3459        let b = self.state[operands.src2].get_f64();
3460        self.state[operands.dst].set_f64(a.wasm_copysign(b));
3461        ControlFlow::Continue(())
3462    }
3463
3464    fn fadd32(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3465        let a = self.state[operands.src1].get_f32();
3466        let b = self.state[operands.src2].get_f32();
3467        self.state[operands.dst].set_f32(a + b);
3468        ControlFlow::Continue(())
3469    }
3470
3471    fn fsub32(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3472        let a = self.state[operands.src1].get_f32();
3473        let b = self.state[operands.src2].get_f32();
3474        self.state[operands.dst].set_f32(a - b);
3475        ControlFlow::Continue(())
3476    }
3477
3478    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3479    fn vsubf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3480        let mut a = self.state[operands.src1].get_f32x4();
3481        let b = self.state[operands.src2].get_f32x4();
3482        for (a, b) in a.iter_mut().zip(b) {
3483            *a = *a - b;
3484        }
3485        self.state[operands.dst].set_f32x4(a);
3486        ControlFlow::Continue(())
3487    }
3488
3489    fn fmul32(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3490        let a = self.state[operands.src1].get_f32();
3491        let b = self.state[operands.src2].get_f32();
3492        self.state[operands.dst].set_f32(a * b);
3493        ControlFlow::Continue(())
3494    }
3495
3496    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3497    fn vmulf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3498        let mut a = self.state[operands.src1].get_f32x4();
3499        let b = self.state[operands.src2].get_f32x4();
3500        for (a, b) in a.iter_mut().zip(b) {
3501            *a = *a * b;
3502        }
3503        self.state[operands.dst].set_f32x4(a);
3504        ControlFlow::Continue(())
3505    }
3506
3507    fn fdiv32(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3508        let a = self.state[operands.src1].get_f32();
3509        let b = self.state[operands.src2].get_f32();
3510        self.state[operands.dst].set_f32(a / b);
3511        ControlFlow::Continue(())
3512    }
3513
3514    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3515    fn vdivf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3516        let a = self.state[operands.src1].get_f32x4();
3517        let b = self.state[operands.src2].get_f32x4();
3518        let mut result = [0.0f32; 4];
3519
3520        for i in 0..4 {
3521            result[i] = a[i] / b[i];
3522        }
3523
3524        self.state[operands.dst].set_f32x4(result);
3525        ControlFlow::Continue(())
3526    }
3527
3528    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3529    fn vdivf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3530        let a = self.state[operands.src1].get_f64x2();
3531        let b = self.state[operands.src2].get_f64x2();
3532        let mut result = [0.0f64; 2];
3533
3534        for i in 0..2 {
3535            result[i] = a[i] / b[i];
3536        }
3537
3538        self.state[operands.dst].set_f64x2(result);
3539        ControlFlow::Continue(())
3540    }
3541
3542    fn fmaximum32(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3543        let a = self.state[operands.src1].get_f32();
3544        let b = self.state[operands.src2].get_f32();
3545        self.state[operands.dst].set_f32(a.wasm_maximum(b));
3546        ControlFlow::Continue(())
3547    }
3548
3549    fn fminimum32(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3550        let a = self.state[operands.src1].get_f32();
3551        let b = self.state[operands.src2].get_f32();
3552        self.state[operands.dst].set_f32(a.wasm_minimum(b));
3553        ControlFlow::Continue(())
3554    }
3555
3556    fn ftrunc32(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3557        let a = self.state[src].get_f32();
3558        self.state[dst].set_f32(a.wasm_trunc());
3559        ControlFlow::Continue(())
3560    }
3561
3562    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3563    fn vtrunc32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3564        let mut a = self.state[src].get_f32x4();
3565        for elem in a.iter_mut() {
3566            *elem = elem.wasm_trunc();
3567        }
3568        self.state[dst].set_f32x4(a);
3569        ControlFlow::Continue(())
3570    }
3571
3572    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3573    fn vtrunc64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3574        let mut a = self.state[src].get_f64x2();
3575        for elem in a.iter_mut() {
3576            *elem = elem.wasm_trunc();
3577        }
3578        self.state[dst].set_f64x2(a);
3579        ControlFlow::Continue(())
3580    }
3581
3582    fn ffloor32(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3583        let a = self.state[src].get_f32();
3584        self.state[dst].set_f32(a.wasm_floor());
3585        ControlFlow::Continue(())
3586    }
3587
3588    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3589    fn vfloor32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3590        let mut a = self.state[src].get_f32x4();
3591        for elem in a.iter_mut() {
3592            *elem = elem.wasm_floor();
3593        }
3594        self.state[dst].set_f32x4(a);
3595        ControlFlow::Continue(())
3596    }
3597
3598    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3599    fn vfloor64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3600        let mut a = self.state[src].get_f64x2();
3601        for elem in a.iter_mut() {
3602            *elem = elem.wasm_floor();
3603        }
3604        self.state[dst].set_f64x2(a);
3605        ControlFlow::Continue(())
3606    }
3607
3608    fn fceil32(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3609        let a = self.state[src].get_f32();
3610        self.state[dst].set_f32(a.wasm_ceil());
3611        ControlFlow::Continue(())
3612    }
3613
3614    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3615    fn vceil32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3616        let mut a = self.state[src].get_f32x4();
3617        for elem in a.iter_mut() {
3618            *elem = elem.wasm_ceil();
3619        }
3620        self.state[dst].set_f32x4(a);
3621
3622        ControlFlow::Continue(())
3623    }
3624
3625    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3626    fn vceil64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3627        let mut a = self.state[src].get_f64x2();
3628        for elem in a.iter_mut() {
3629            *elem = elem.wasm_ceil();
3630        }
3631        self.state[dst].set_f64x2(a);
3632
3633        ControlFlow::Continue(())
3634    }
3635
3636    fn fnearest32(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3637        let a = self.state[src].get_f32();
3638        self.state[dst].set_f32(a.wasm_nearest());
3639        ControlFlow::Continue(())
3640    }
3641
3642    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3643    fn vnearest32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3644        let mut a = self.state[src].get_f32x4();
3645        for elem in a.iter_mut() {
3646            *elem = elem.wasm_nearest();
3647        }
3648        self.state[dst].set_f32x4(a);
3649        ControlFlow::Continue(())
3650    }
3651
3652    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3653    fn vnearest64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3654        let mut a = self.state[src].get_f64x2();
3655        for elem in a.iter_mut() {
3656            *elem = elem.wasm_nearest();
3657        }
3658        self.state[dst].set_f64x2(a);
3659        ControlFlow::Continue(())
3660    }
3661
3662    fn fsqrt32(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3663        let a = self.state[src].get_f32();
3664        self.state[dst].set_f32(a.wasm_sqrt());
3665        ControlFlow::Continue(())
3666    }
3667
3668    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3669    fn vsqrt32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3670        let mut a = self.state[src].get_f32x4();
3671        for elem in a.iter_mut() {
3672            *elem = elem.wasm_sqrt();
3673        }
3674        self.state[dst].set_f32x4(a);
3675        ControlFlow::Continue(())
3676    }
3677
3678    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3679    fn vsqrt64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3680        let mut a = self.state[src].get_f64x2();
3681        for elem in a.iter_mut() {
3682            *elem = elem.wasm_sqrt();
3683        }
3684        self.state[dst].set_f64x2(a);
3685        ControlFlow::Continue(())
3686    }
3687
3688    fn fneg32(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3689        let a = self.state[src].get_f32();
3690        self.state[dst].set_f32(-a);
3691        ControlFlow::Continue(())
3692    }
3693
3694    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3695    fn vnegf32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3696        let mut a = self.state[src].get_f32x4();
3697        for elem in a.iter_mut() {
3698            *elem = -*elem;
3699        }
3700        self.state[dst].set_f32x4(a);
3701        ControlFlow::Continue(())
3702    }
3703
3704    fn fabs32(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3705        let a = self.state[src].get_f32();
3706        self.state[dst].set_f32(a.wasm_abs());
3707        ControlFlow::Continue(())
3708    }
3709
3710    fn fadd64(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3711        let a = self.state[operands.src1].get_f64();
3712        let b = self.state[operands.src2].get_f64();
3713        self.state[operands.dst].set_f64(a + b);
3714        ControlFlow::Continue(())
3715    }
3716
3717    fn fsub64(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3718        let a = self.state[operands.src1].get_f64();
3719        let b = self.state[operands.src2].get_f64();
3720        self.state[operands.dst].set_f64(a - b);
3721        ControlFlow::Continue(())
3722    }
3723
3724    fn fmul64(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3725        let a = self.state[operands.src1].get_f64();
3726        let b = self.state[operands.src2].get_f64();
3727        self.state[operands.dst].set_f64(a * b);
3728        ControlFlow::Continue(())
3729    }
3730
3731    fn fdiv64(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3732        let a = self.state[operands.src1].get_f64();
3733        let b = self.state[operands.src2].get_f64();
3734        self.state[operands.dst].set_f64(a / b);
3735        ControlFlow::Continue(())
3736    }
3737
3738    fn fmaximum64(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3739        let a = self.state[operands.src1].get_f64();
3740        let b = self.state[operands.src2].get_f64();
3741        self.state[operands.dst].set_f64(a.wasm_maximum(b));
3742        ControlFlow::Continue(())
3743    }
3744
3745    fn fminimum64(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3746        let a = self.state[operands.src1].get_f64();
3747        let b = self.state[operands.src2].get_f64();
3748        self.state[operands.dst].set_f64(a.wasm_minimum(b));
3749        ControlFlow::Continue(())
3750    }
3751
3752    fn ftrunc64(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3753        let a = self.state[src].get_f64();
3754        self.state[dst].set_f64(a.wasm_trunc());
3755        ControlFlow::Continue(())
3756    }
3757
3758    fn ffloor64(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3759        let a = self.state[src].get_f64();
3760        self.state[dst].set_f64(a.wasm_floor());
3761        ControlFlow::Continue(())
3762    }
3763
3764    fn fceil64(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3765        let a = self.state[src].get_f64();
3766        self.state[dst].set_f64(a.wasm_ceil());
3767        ControlFlow::Continue(())
3768    }
3769
3770    fn fnearest64(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3771        let a = self.state[src].get_f64();
3772        self.state[dst].set_f64(a.wasm_nearest());
3773        ControlFlow::Continue(())
3774    }
3775
3776    fn fsqrt64(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3777        let a = self.state[src].get_f64();
3778        self.state[dst].set_f64(a.wasm_sqrt());
3779        ControlFlow::Continue(())
3780    }
3781
3782    fn fneg64(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3783        let a = self.state[src].get_f64();
3784        self.state[dst].set_f64(-a);
3785        ControlFlow::Continue(())
3786    }
3787
3788    fn fabs64(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3789        let a = self.state[src].get_f64();
3790        self.state[dst].set_f64(a.wasm_abs());
3791        ControlFlow::Continue(())
3792    }
3793
3794    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3795    fn vaddi8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3796        let mut a = self.state[operands.src1].get_i8x16();
3797        let b = self.state[operands.src2].get_i8x16();
3798        for (a, b) in a.iter_mut().zip(b) {
3799            *a = a.wrapping_add(b);
3800        }
3801        self.state[operands.dst].set_i8x16(a);
3802        ControlFlow::Continue(())
3803    }
3804
3805    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3806    fn vaddi16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3807        let mut a = self.state[operands.src1].get_i16x8();
3808        let b = self.state[operands.src2].get_i16x8();
3809        for (a, b) in a.iter_mut().zip(b) {
3810            *a = a.wrapping_add(b);
3811        }
3812        self.state[operands.dst].set_i16x8(a);
3813        ControlFlow::Continue(())
3814    }
3815
3816    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3817    fn vaddi32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3818        let mut a = self.state[operands.src1].get_i32x4();
3819        let b = self.state[operands.src2].get_i32x4();
3820        for (a, b) in a.iter_mut().zip(b) {
3821            *a = a.wrapping_add(b);
3822        }
3823        self.state[operands.dst].set_i32x4(a);
3824        ControlFlow::Continue(())
3825    }
3826
3827    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3828    fn vaddi64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3829        let mut a = self.state[operands.src1].get_i64x2();
3830        let b = self.state[operands.src2].get_i64x2();
3831        for (a, b) in a.iter_mut().zip(b) {
3832            *a = a.wrapping_add(b);
3833        }
3834        self.state[operands.dst].set_i64x2(a);
3835        ControlFlow::Continue(())
3836    }
3837
3838    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3839    fn vaddf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3840        let mut a = self.state[operands.src1].get_f32x4();
3841        let b = self.state[operands.src2].get_f32x4();
3842        for (a, b) in a.iter_mut().zip(b) {
3843            *a += b;
3844        }
3845        self.state[operands.dst].set_f32x4(a);
3846        ControlFlow::Continue(())
3847    }
3848
3849    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3850    fn vaddf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3851        let mut a = self.state[operands.src1].get_f64x2();
3852        let b = self.state[operands.src2].get_f64x2();
3853        for (a, b) in a.iter_mut().zip(b) {
3854            *a += b;
3855        }
3856        self.state[operands.dst].set_f64x2(a);
3857        ControlFlow::Continue(())
3858    }
3859
3860    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3861    fn vaddi8x16_sat(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3862        let mut a = self.state[operands.src1].get_i8x16();
3863        let b = self.state[operands.src2].get_i8x16();
3864        for (a, b) in a.iter_mut().zip(b) {
3865            *a = (*a).saturating_add(b);
3866        }
3867        self.state[operands.dst].set_i8x16(a);
3868        ControlFlow::Continue(())
3869    }
3870
3871    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3872    fn vaddu8x16_sat(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3873        let mut a = self.state[operands.src1].get_u8x16();
3874        let b = self.state[operands.src2].get_u8x16();
3875        for (a, b) in a.iter_mut().zip(b) {
3876            *a = (*a).saturating_add(b);
3877        }
3878        self.state[operands.dst].set_u8x16(a);
3879        ControlFlow::Continue(())
3880    }
3881
3882    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3883    fn vaddi16x8_sat(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3884        let mut a = self.state[operands.src1].get_i16x8();
3885        let b = self.state[operands.src2].get_i16x8();
3886        for (a, b) in a.iter_mut().zip(b) {
3887            *a = (*a).saturating_add(b);
3888        }
3889        self.state[operands.dst].set_i16x8(a);
3890        ControlFlow::Continue(())
3891    }
3892
3893    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3894    fn vaddu16x8_sat(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3895        let mut a = self.state[operands.src1].get_u16x8();
3896        let b = self.state[operands.src2].get_u16x8();
3897        for (a, b) in a.iter_mut().zip(b) {
3898            *a = (*a).saturating_add(b);
3899        }
3900        self.state[operands.dst].set_u16x8(a);
3901        ControlFlow::Continue(())
3902    }
3903
3904    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3905    fn vaddpairwisei16x8_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3906        let a = self.state[operands.src1].get_i16x8();
3907        let b = self.state[operands.src2].get_i16x8();
3908        let mut result = [0i16; 8];
3909        let half = result.len() / 2;
3910        for i in 0..half {
3911            result[i] = a[2 * i].wrapping_add(a[2 * i + 1]);
3912            result[i + half] = b[2 * i].wrapping_add(b[2 * i + 1]);
3913        }
3914        self.state[operands.dst].set_i16x8(result);
3915        ControlFlow::Continue(())
3916    }
3917
3918    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3919    fn vaddpairwisei32x4_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3920        let a = self.state[operands.src1].get_i32x4();
3921        let b = self.state[operands.src2].get_i32x4();
3922        let mut result = [0i32; 4];
3923        result[0] = a[0].wrapping_add(a[1]);
3924        result[1] = a[2].wrapping_add(a[3]);
3925        result[2] = b[0].wrapping_add(b[1]);
3926        result[3] = b[2].wrapping_add(b[3]);
3927        self.state[operands.dst].set_i32x4(result);
3928        ControlFlow::Continue(())
3929    }
3930
3931    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3932    fn vshli8x16(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
3933        let a = self.state[operands.src1].get_i8x16();
3934        let b = self.state[operands.src2].get_u32();
3935        self.state[operands.dst].set_i8x16(a.map(|a| a.wrapping_shl(b)));
3936        ControlFlow::Continue(())
3937    }
3938
3939    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3940    fn vshli16x8(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
3941        let a = self.state[operands.src1].get_i16x8();
3942        let b = self.state[operands.src2].get_u32();
3943        self.state[operands.dst].set_i16x8(a.map(|a| a.wrapping_shl(b)));
3944        ControlFlow::Continue(())
3945    }
3946
3947    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3948    fn vshli32x4(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
3949        let a = self.state[operands.src1].get_i32x4();
3950        let b = self.state[operands.src2].get_u32();
3951        self.state[operands.dst].set_i32x4(a.map(|a| a.wrapping_shl(b)));
3952        ControlFlow::Continue(())
3953    }
3954
3955    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3956    fn vshli64x2(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
3957        let a = self.state[operands.src1].get_i64x2();
3958        let b = self.state[operands.src2].get_u32();
3959        self.state[operands.dst].set_i64x2(a.map(|a| a.wrapping_shl(b)));
3960        ControlFlow::Continue(())
3961    }
3962
3963    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3964    fn vshri8x16_s(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
3965        let a = self.state[operands.src1].get_i8x16();
3966        let b = self.state[operands.src2].get_u32();
3967        self.state[operands.dst].set_i8x16(a.map(|a| a.wrapping_shr(b)));
3968        ControlFlow::Continue(())
3969    }
3970
3971    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3972    fn vshri16x8_s(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
3973        let a = self.state[operands.src1].get_i16x8();
3974        let b = self.state[operands.src2].get_u32();
3975        self.state[operands.dst].set_i16x8(a.map(|a| a.wrapping_shr(b)));
3976        ControlFlow::Continue(())
3977    }
3978
3979    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3980    fn vshri32x4_s(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
3981        let a = self.state[operands.src1].get_i32x4();
3982        let b = self.state[operands.src2].get_u32();
3983        self.state[operands.dst].set_i32x4(a.map(|a| a.wrapping_shr(b)));
3984        ControlFlow::Continue(())
3985    }
3986
3987    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3988    fn vshri64x2_s(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
3989        let a = self.state[operands.src1].get_i64x2();
3990        let b = self.state[operands.src2].get_u32();
3991        self.state[operands.dst].set_i64x2(a.map(|a| a.wrapping_shr(b)));
3992        ControlFlow::Continue(())
3993    }
3994
3995    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3996    fn vshri8x16_u(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
3997        let a = self.state[operands.src1].get_u8x16();
3998        let b = self.state[operands.src2].get_u32();
3999        self.state[operands.dst].set_u8x16(a.map(|a| a.wrapping_shr(b)));
4000        ControlFlow::Continue(())
4001    }
4002
4003    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4004    fn vshri16x8_u(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
4005        let a = self.state[operands.src1].get_u16x8();
4006        let b = self.state[operands.src2].get_u32();
4007        self.state[operands.dst].set_u16x8(a.map(|a| a.wrapping_shr(b)));
4008        ControlFlow::Continue(())
4009    }
4010
4011    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4012    fn vshri32x4_u(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
4013        let a = self.state[operands.src1].get_u32x4();
4014        let b = self.state[operands.src2].get_u32();
4015        self.state[operands.dst].set_u32x4(a.map(|a| a.wrapping_shr(b)));
4016        ControlFlow::Continue(())
4017    }
4018
4019    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4020    fn vshri64x2_u(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
4021        let a = self.state[operands.src1].get_u64x2();
4022        let b = self.state[operands.src2].get_u32();
4023        self.state[operands.dst].set_u64x2(a.map(|a| a.wrapping_shr(b)));
4024        ControlFlow::Continue(())
4025    }
4026
4027    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4028    fn vconst128(&mut self, dst: VReg, val: u128) -> ControlFlow<Done> {
4029        self.state[dst].set_u128(val);
4030        ControlFlow::Continue(())
4031    }
4032
4033    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4034    fn vsplatx8(&mut self, dst: VReg, src: XReg) -> ControlFlow<Done> {
4035        let val = self.state[src].get_u32() as u8;
4036        self.state[dst].set_u8x16([val; 16]);
4037        ControlFlow::Continue(())
4038    }
4039
4040    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4041    fn vsplatx16(&mut self, dst: VReg, src: XReg) -> ControlFlow<Done> {
4042        let val = self.state[src].get_u32() as u16;
4043        self.state[dst].set_u16x8([val; 8]);
4044        ControlFlow::Continue(())
4045    }
4046
4047    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4048    fn vsplatx32(&mut self, dst: VReg, src: XReg) -> ControlFlow<Done> {
4049        let val = self.state[src].get_u32();
4050        self.state[dst].set_u32x4([val; 4]);
4051        ControlFlow::Continue(())
4052    }
4053
4054    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4055    fn vsplatx64(&mut self, dst: VReg, src: XReg) -> ControlFlow<Done> {
4056        let val = self.state[src].get_u64();
4057        self.state[dst].set_u64x2([val; 2]);
4058        ControlFlow::Continue(())
4059    }
4060
4061    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4062    fn vsplatf32(&mut self, dst: VReg, src: FReg) -> ControlFlow<Done> {
4063        let val = self.state[src].get_f32();
4064        self.state[dst].set_f32x4([val; 4]);
4065        ControlFlow::Continue(())
4066    }
4067
4068    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4069    fn vsplatf64(&mut self, dst: VReg, src: FReg) -> ControlFlow<Done> {
4070        let val = self.state[src].get_f64();
4071        self.state[dst].set_f64x2([val; 2]);
4072        ControlFlow::Continue(())
4073    }
4074
4075    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4076    fn vload8x8_s_z(&mut self, dst: VReg, addr: AddrZ) -> ControlFlow<Done> {
4077        let val = unsafe { self.load_ne::<[i8; 8], crate::VLoad8x8SZ>(addr)? };
4078        self.state[dst].set_i16x8(val.map(|i| i.into()));
4079        ControlFlow::Continue(())
4080    }
4081
4082    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4083    fn vload8x8_u_z(&mut self, dst: VReg, addr: AddrZ) -> ControlFlow<Done> {
4084        let val = unsafe { self.load_ne::<[u8; 8], crate::VLoad8x8UZ>(addr)? };
4085        self.state[dst].set_u16x8(val.map(|i| i.into()));
4086        ControlFlow::Continue(())
4087    }
4088
4089    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4090    fn vload16x4le_s_z(&mut self, dst: VReg, addr: AddrZ) -> ControlFlow<Done> {
4091        let val = unsafe { self.load_ne::<[i16; 4], crate::VLoad16x4LeSZ>(addr)? };
4092        self.state[dst].set_i32x4(val.map(|i| i16::from_le(i).into()));
4093        ControlFlow::Continue(())
4094    }
4095
4096    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4097    fn vload16x4le_u_z(&mut self, dst: VReg, addr: AddrZ) -> ControlFlow<Done> {
4098        let val = unsafe { self.load_ne::<[u16; 4], crate::VLoad16x4LeUZ>(addr)? };
4099        self.state[dst].set_u32x4(val.map(|i| u16::from_le(i).into()));
4100        ControlFlow::Continue(())
4101    }
4102
4103    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4104    fn vload32x2le_s_z(&mut self, dst: VReg, addr: AddrZ) -> ControlFlow<Done> {
4105        let val = unsafe { self.load_ne::<[i32; 2], crate::VLoad32x2LeSZ>(addr)? };
4106        self.state[dst].set_i64x2(val.map(|i| i32::from_le(i).into()));
4107        ControlFlow::Continue(())
4108    }
4109
4110    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4111    fn vload32x2le_u_z(&mut self, dst: VReg, addr: AddrZ) -> ControlFlow<Done> {
4112        let val = unsafe { self.load_ne::<[u32; 2], crate::VLoad32x2LeUZ>(addr)? };
4113        self.state[dst].set_u64x2(val.map(|i| u32::from_le(i).into()));
4114        ControlFlow::Continue(())
4115    }
4116
4117    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4118    fn vband128(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4119        let a = self.state[operands.src1].get_u128();
4120        let b = self.state[operands.src2].get_u128();
4121        self.state[operands.dst].set_u128(a & b);
4122        ControlFlow::Continue(())
4123    }
4124
4125    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4126    fn vbor128(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4127        let a = self.state[operands.src1].get_u128();
4128        let b = self.state[operands.src2].get_u128();
4129        self.state[operands.dst].set_u128(a | b);
4130        ControlFlow::Continue(())
4131    }
4132
4133    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4134    fn vbxor128(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4135        let a = self.state[operands.src1].get_u128();
4136        let b = self.state[operands.src2].get_u128();
4137        self.state[operands.dst].set_u128(a ^ b);
4138        ControlFlow::Continue(())
4139    }
4140
4141    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4142    fn vbnot128(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4143        let a = self.state[src].get_u128();
4144        self.state[dst].set_u128(!a);
4145        ControlFlow::Continue(())
4146    }
4147
4148    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4149    fn vbitselect128(&mut self, dst: VReg, c: VReg, x: VReg, y: VReg) -> ControlFlow<Done> {
4150        let c = self.state[c].get_u128();
4151        let x = self.state[x].get_u128();
4152        let y = self.state[y].get_u128();
4153        self.state[dst].set_u128((c & x) | (!c & y));
4154        ControlFlow::Continue(())
4155    }
4156
4157    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4158    fn vbitmask8x16(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4159        let a = self.state[src].get_u8x16();
4160        let mut result = 0;
4161        for item in a.iter().rev() {
4162            result <<= 1;
4163            result |= (*item >> 7) as u32;
4164        }
4165        self.state[dst].set_u32(result);
4166        ControlFlow::Continue(())
4167    }
4168
4169    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4170    fn vbitmask16x8(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4171        let a = self.state[src].get_u16x8();
4172        let mut result = 0;
4173        for item in a.iter().rev() {
4174            result <<= 1;
4175            result |= (*item >> 15) as u32;
4176        }
4177        self.state[dst].set_u32(result);
4178        ControlFlow::Continue(())
4179    }
4180
4181    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4182    fn vbitmask32x4(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4183        let a = self.state[src].get_u32x4();
4184        let mut result = 0;
4185        for item in a.iter().rev() {
4186            result <<= 1;
4187            result |= *item >> 31;
4188        }
4189        self.state[dst].set_u32(result);
4190        ControlFlow::Continue(())
4191    }
4192
4193    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4194    fn vbitmask64x2(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4195        let a = self.state[src].get_u64x2();
4196        let mut result = 0;
4197        for item in a.iter().rev() {
4198            result <<= 1;
4199            result |= (*item >> 63) as u32;
4200        }
4201        self.state[dst].set_u32(result);
4202        ControlFlow::Continue(())
4203    }
4204
4205    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4206    fn valltrue8x16(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4207        let a = self.state[src].get_u8x16();
4208        let result = a.iter().all(|a| *a != 0);
4209        self.state[dst].set_u32(u32::from(result));
4210        ControlFlow::Continue(())
4211    }
4212
4213    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4214    fn valltrue16x8(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4215        let a = self.state[src].get_u16x8();
4216        let result = a.iter().all(|a| *a != 0);
4217        self.state[dst].set_u32(u32::from(result));
4218        ControlFlow::Continue(())
4219    }
4220
4221    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4222    fn valltrue32x4(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4223        let a = self.state[src].get_u32x4();
4224        let result = a.iter().all(|a| *a != 0);
4225        self.state[dst].set_u32(u32::from(result));
4226        ControlFlow::Continue(())
4227    }
4228
4229    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4230    fn valltrue64x2(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4231        let a = self.state[src].get_u64x2();
4232        let result = a.iter().all(|a| *a != 0);
4233        self.state[dst].set_u32(u32::from(result));
4234        ControlFlow::Continue(())
4235    }
4236
4237    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4238    fn vanytrue8x16(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4239        let a = self.state[src].get_u8x16();
4240        let result = a.iter().any(|a| *a != 0);
4241        self.state[dst].set_u32(u32::from(result));
4242        ControlFlow::Continue(())
4243    }
4244
4245    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4246    fn vanytrue16x8(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4247        let a = self.state[src].get_u16x8();
4248        let result = a.iter().any(|a| *a != 0);
4249        self.state[dst].set_u32(u32::from(result));
4250        ControlFlow::Continue(())
4251    }
4252
4253    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4254    fn vanytrue32x4(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4255        let a = self.state[src].get_u32x4();
4256        let result = a.iter().any(|a| *a != 0);
4257        self.state[dst].set_u32(u32::from(result));
4258        ControlFlow::Continue(())
4259    }
4260
4261    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4262    fn vanytrue64x2(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4263        let a = self.state[src].get_u64x2();
4264        let result = a.iter().any(|a| *a != 0);
4265        self.state[dst].set_u32(u32::from(result));
4266        ControlFlow::Continue(())
4267    }
4268
4269    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4270    fn vf32x4_from_i32x4_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4271        let a = self.state[src].get_i32x4();
4272        self.state[dst].set_f32x4(a.map(|i| i as f32));
4273        ControlFlow::Continue(())
4274    }
4275
4276    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4277    fn vf32x4_from_i32x4_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4278        let a = self.state[src].get_u32x4();
4279        self.state[dst].set_f32x4(a.map(|i| i as f32));
4280        ControlFlow::Continue(())
4281    }
4282
4283    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4284    fn vf64x2_from_i64x2_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4285        let a = self.state[src].get_i64x2();
4286        self.state[dst].set_f64x2(a.map(|i| i as f64));
4287        ControlFlow::Continue(())
4288    }
4289
4290    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4291    fn vf64x2_from_i64x2_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4292        let a = self.state[src].get_u64x2();
4293        self.state[dst].set_f64x2(a.map(|i| i as f64));
4294        ControlFlow::Continue(())
4295    }
4296
4297    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4298    fn vi32x4_from_f32x4_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4299        let a = self.state[src].get_f32x4();
4300        self.state[dst].set_i32x4(a.map(|f| f as i32));
4301        ControlFlow::Continue(())
4302    }
4303
4304    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4305    fn vi32x4_from_f32x4_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4306        let a = self.state[src].get_f32x4();
4307        self.state[dst].set_u32x4(a.map(|f| f as u32));
4308        ControlFlow::Continue(())
4309    }
4310
4311    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4312    fn vi64x2_from_f64x2_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4313        let a = self.state[src].get_f64x2();
4314        self.state[dst].set_i64x2(a.map(|f| f as i64));
4315        ControlFlow::Continue(())
4316    }
4317
4318    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4319    fn vi64x2_from_f64x2_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4320        let a = self.state[src].get_f64x2();
4321        self.state[dst].set_u64x2(a.map(|f| f as u64));
4322        ControlFlow::Continue(())
4323    }
4324
4325    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4326    fn vwidenlow8x16_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4327        let a = *self.state[src].get_i8x16().first_chunk().unwrap();
4328        self.state[dst].set_i16x8(a.map(|i| i.into()));
4329        ControlFlow::Continue(())
4330    }
4331
4332    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4333    fn vwidenlow8x16_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4334        let a = *self.state[src].get_u8x16().first_chunk().unwrap();
4335        self.state[dst].set_u16x8(a.map(|i| i.into()));
4336        ControlFlow::Continue(())
4337    }
4338
4339    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4340    fn vwidenlow16x8_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4341        let a = *self.state[src].get_i16x8().first_chunk().unwrap();
4342        self.state[dst].set_i32x4(a.map(|i| i.into()));
4343        ControlFlow::Continue(())
4344    }
4345
4346    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4347    fn vwidenlow16x8_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4348        let a = *self.state[src].get_u16x8().first_chunk().unwrap();
4349        self.state[dst].set_u32x4(a.map(|i| i.into()));
4350        ControlFlow::Continue(())
4351    }
4352
4353    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4354    fn vwidenlow32x4_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4355        let a = *self.state[src].get_i32x4().first_chunk().unwrap();
4356        self.state[dst].set_i64x2(a.map(|i| i.into()));
4357        ControlFlow::Continue(())
4358    }
4359
4360    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4361    fn vwidenlow32x4_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4362        let a = *self.state[src].get_u32x4().first_chunk().unwrap();
4363        self.state[dst].set_u64x2(a.map(|i| i.into()));
4364        ControlFlow::Continue(())
4365    }
4366
4367    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4368    fn vwidenhigh8x16_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4369        let a = *self.state[src].get_i8x16().last_chunk().unwrap();
4370        self.state[dst].set_i16x8(a.map(|i| i.into()));
4371        ControlFlow::Continue(())
4372    }
4373
4374    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4375    fn vwidenhigh8x16_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4376        let a = *self.state[src].get_u8x16().last_chunk().unwrap();
4377        self.state[dst].set_u16x8(a.map(|i| i.into()));
4378        ControlFlow::Continue(())
4379    }
4380
4381    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4382    fn vwidenhigh16x8_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4383        let a = *self.state[src].get_i16x8().last_chunk().unwrap();
4384        self.state[dst].set_i32x4(a.map(|i| i.into()));
4385        ControlFlow::Continue(())
4386    }
4387
4388    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4389    fn vwidenhigh16x8_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4390        let a = *self.state[src].get_u16x8().last_chunk().unwrap();
4391        self.state[dst].set_u32x4(a.map(|i| i.into()));
4392        ControlFlow::Continue(())
4393    }
4394
4395    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4396    fn vwidenhigh32x4_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4397        let a = *self.state[src].get_i32x4().last_chunk().unwrap();
4398        self.state[dst].set_i64x2(a.map(|i| i.into()));
4399        ControlFlow::Continue(())
4400    }
4401
4402    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4403    fn vwidenhigh32x4_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4404        let a = *self.state[src].get_u32x4().last_chunk().unwrap();
4405        self.state[dst].set_u64x2(a.map(|i| i.into()));
4406        ControlFlow::Continue(())
4407    }
4408
4409    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4410    fn vnarrow16x8_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4411        let a = self.state[operands.src1].get_i16x8();
4412        let b = self.state[operands.src2].get_i16x8();
4413        let mut result = [0; 16];
4414        for (i, d) in a.iter().chain(&b).zip(&mut result) {
4415            *d = (*i)
4416                .try_into()
4417                .unwrap_or(if *i < 0 { i8::MIN } else { i8::MAX });
4418        }
4419        self.state[operands.dst].set_i8x16(result);
4420        ControlFlow::Continue(())
4421    }
4422
4423    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4424    fn vnarrow16x8_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4425        let a = self.state[operands.src1].get_i16x8();
4426        let b = self.state[operands.src2].get_i16x8();
4427        let mut result = [0; 16];
4428        for (i, d) in a.iter().chain(&b).zip(&mut result) {
4429            *d = (*i)
4430                .try_into()
4431                .unwrap_or(if *i < 0 { u8::MIN } else { u8::MAX });
4432        }
4433        self.state[operands.dst].set_u8x16(result);
4434        ControlFlow::Continue(())
4435    }
4436
4437    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4438    fn vnarrow32x4_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4439        let a = self.state[operands.src1].get_i32x4();
4440        let b = self.state[operands.src2].get_i32x4();
4441        let mut result = [0; 8];
4442        for (i, d) in a.iter().chain(&b).zip(&mut result) {
4443            *d = (*i)
4444                .try_into()
4445                .unwrap_or(if *i < 0 { i16::MIN } else { i16::MAX });
4446        }
4447        self.state[operands.dst].set_i16x8(result);
4448        ControlFlow::Continue(())
4449    }
4450
4451    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4452    fn vnarrow32x4_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4453        let a = self.state[operands.src1].get_i32x4();
4454        let b = self.state[operands.src2].get_i32x4();
4455        let mut result = [0; 8];
4456        for (i, d) in a.iter().chain(&b).zip(&mut result) {
4457            *d = (*i)
4458                .try_into()
4459                .unwrap_or(if *i < 0 { u16::MIN } else { u16::MAX });
4460        }
4461        self.state[operands.dst].set_u16x8(result);
4462        ControlFlow::Continue(())
4463    }
4464
4465    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4466    fn vnarrow64x2_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4467        let a = self.state[operands.src1].get_i64x2();
4468        let b = self.state[operands.src2].get_i64x2();
4469        let mut result = [0; 4];
4470        for (i, d) in a.iter().chain(&b).zip(&mut result) {
4471            *d = (*i)
4472                .try_into()
4473                .unwrap_or(if *i < 0 { i32::MIN } else { i32::MAX });
4474        }
4475        self.state[operands.dst].set_i32x4(result);
4476        ControlFlow::Continue(())
4477    }
4478
4479    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4480    fn vnarrow64x2_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4481        let a = self.state[operands.src1].get_i64x2();
4482        let b = self.state[operands.src2].get_i64x2();
4483        let mut result = [0; 4];
4484        for (i, d) in a.iter().chain(&b).zip(&mut result) {
4485            *d = (*i)
4486                .try_into()
4487                .unwrap_or(if *i < 0 { u32::MIN } else { u32::MAX });
4488        }
4489        self.state[operands.dst].set_u32x4(result);
4490        ControlFlow::Continue(())
4491    }
4492
4493    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4494    fn vunarrow64x2_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4495        let a = self.state[operands.src1].get_u64x2();
4496        let b = self.state[operands.src2].get_u64x2();
4497        let mut result = [0; 4];
4498        for (i, d) in a.iter().chain(&b).zip(&mut result) {
4499            *d = (*i).try_into().unwrap_or(u32::MAX);
4500        }
4501        self.state[operands.dst].set_u32x4(result);
4502        ControlFlow::Continue(())
4503    }
4504
4505    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4506    fn vfpromotelow(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4507        let a = self.state[src].get_f32x4();
4508        self.state[dst].set_f64x2([a[0].into(), a[1].into()]);
4509        ControlFlow::Continue(())
4510    }
4511
4512    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4513    fn vfdemote(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4514        let a = self.state[src].get_f64x2();
4515        self.state[dst].set_f32x4([a[0] as f32, a[1] as f32, 0.0, 0.0]);
4516        ControlFlow::Continue(())
4517    }
4518
4519    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4520    fn vsubi8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4521        let mut a = self.state[operands.src1].get_i8x16();
4522        let b = self.state[operands.src2].get_i8x16();
4523        for (a, b) in a.iter_mut().zip(b) {
4524            *a = a.wrapping_sub(b);
4525        }
4526        self.state[operands.dst].set_i8x16(a);
4527        ControlFlow::Continue(())
4528    }
4529
4530    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4531    fn vsubi16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4532        let mut a = self.state[operands.src1].get_i16x8();
4533        let b = self.state[operands.src2].get_i16x8();
4534        for (a, b) in a.iter_mut().zip(b) {
4535            *a = a.wrapping_sub(b);
4536        }
4537        self.state[operands.dst].set_i16x8(a);
4538        ControlFlow::Continue(())
4539    }
4540
4541    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4542    fn vsubi32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4543        let mut a = self.state[operands.src1].get_i32x4();
4544        let b = self.state[operands.src2].get_i32x4();
4545        for (a, b) in a.iter_mut().zip(b) {
4546            *a = a.wrapping_sub(b);
4547        }
4548        self.state[operands.dst].set_i32x4(a);
4549        ControlFlow::Continue(())
4550    }
4551
4552    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4553    fn vsubi64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4554        let mut a = self.state[operands.src1].get_i64x2();
4555        let b = self.state[operands.src2].get_i64x2();
4556        for (a, b) in a.iter_mut().zip(b) {
4557            *a = a.wrapping_sub(b);
4558        }
4559        self.state[operands.dst].set_i64x2(a);
4560        ControlFlow::Continue(())
4561    }
4562
4563    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4564    fn vsubi8x16_sat(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4565        let mut a = self.state[operands.src1].get_i8x16();
4566        let b = self.state[operands.src2].get_i8x16();
4567        for (a, b) in a.iter_mut().zip(b) {
4568            *a = a.saturating_sub(b);
4569        }
4570        self.state[operands.dst].set_i8x16(a);
4571        ControlFlow::Continue(())
4572    }
4573
4574    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4575    fn vsubu8x16_sat(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4576        let mut a = self.state[operands.src1].get_u8x16();
4577        let b = self.state[operands.src2].get_u8x16();
4578        for (a, b) in a.iter_mut().zip(b) {
4579            *a = a.saturating_sub(b);
4580        }
4581        self.state[operands.dst].set_u8x16(a);
4582        ControlFlow::Continue(())
4583    }
4584
4585    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4586    fn vsubi16x8_sat(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4587        let mut a = self.state[operands.src1].get_i16x8();
4588        let b = self.state[operands.src2].get_i16x8();
4589        for (a, b) in a.iter_mut().zip(b) {
4590            *a = a.saturating_sub(b);
4591        }
4592        self.state[operands.dst].set_i16x8(a);
4593        ControlFlow::Continue(())
4594    }
4595
4596    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4597    fn vsubu16x8_sat(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4598        let mut a = self.state[operands.src1].get_u16x8();
4599        let b = self.state[operands.src2].get_u16x8();
4600        for (a, b) in a.iter_mut().zip(b) {
4601            *a = a.saturating_sub(b);
4602        }
4603        self.state[operands.dst].set_u16x8(a);
4604        ControlFlow::Continue(())
4605    }
4606
4607    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4608    fn vsubf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4609        let mut a = self.state[operands.src1].get_f64x2();
4610        let b = self.state[operands.src2].get_f64x2();
4611        for (a, b) in a.iter_mut().zip(b) {
4612            *a = *a - b;
4613        }
4614        self.state[operands.dst].set_f64x2(a);
4615        ControlFlow::Continue(())
4616    }
4617
4618    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4619    fn vmuli8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4620        let mut a = self.state[operands.src1].get_i8x16();
4621        let b = self.state[operands.src2].get_i8x16();
4622        for (a, b) in a.iter_mut().zip(b) {
4623            *a = a.wrapping_mul(b);
4624        }
4625        self.state[operands.dst].set_i8x16(a);
4626        ControlFlow::Continue(())
4627    }
4628
4629    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4630    fn vmuli16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4631        let mut a = self.state[operands.src1].get_i16x8();
4632        let b = self.state[operands.src2].get_i16x8();
4633        for (a, b) in a.iter_mut().zip(b) {
4634            *a = a.wrapping_mul(b);
4635        }
4636        self.state[operands.dst].set_i16x8(a);
4637        ControlFlow::Continue(())
4638    }
4639
4640    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4641    fn vmuli32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4642        let mut a = self.state[operands.src1].get_i32x4();
4643        let b = self.state[operands.src2].get_i32x4();
4644        for (a, b) in a.iter_mut().zip(b) {
4645            *a = a.wrapping_mul(b);
4646        }
4647        self.state[operands.dst].set_i32x4(a);
4648        ControlFlow::Continue(())
4649    }
4650
4651    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4652    fn vmuli64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4653        let mut a = self.state[operands.src1].get_i64x2();
4654        let b = self.state[operands.src2].get_i64x2();
4655        for (a, b) in a.iter_mut().zip(b) {
4656            *a = a.wrapping_mul(b);
4657        }
4658        self.state[operands.dst].set_i64x2(a);
4659        ControlFlow::Continue(())
4660    }
4661
4662    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4663    fn vmulf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4664        let mut a = self.state[operands.src1].get_f64x2();
4665        let b = self.state[operands.src2].get_f64x2();
4666        for (a, b) in a.iter_mut().zip(b) {
4667            *a = *a * b;
4668        }
4669        self.state[operands.dst].set_f64x2(a);
4670        ControlFlow::Continue(())
4671    }
4672
4673    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4674    fn vqmulrsi16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4675        let mut a = self.state[operands.src1].get_i16x8();
4676        let b = self.state[operands.src2].get_i16x8();
4677        const MIN: i32 = i16::MIN as i32;
4678        const MAX: i32 = i16::MAX as i32;
4679        for (a, b) in a.iter_mut().zip(b) {
4680            let r = (i32::from(*a) * i32::from(b) + (1 << 14)) >> 15;
4681            *a = r.clamp(MIN, MAX) as i16;
4682        }
4683        self.state[operands.dst].set_i16x8(a);
4684        ControlFlow::Continue(())
4685    }
4686
4687    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4688    fn vpopcnt8x16(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4689        let a = self.state[src].get_u8x16();
4690        self.state[dst].set_u8x16(a.map(|i| i.count_ones() as u8));
4691        ControlFlow::Continue(())
4692    }
4693
4694    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4695    fn xextractv8x16(&mut self, dst: XReg, src: VReg, lane: u8) -> ControlFlow<Done> {
4696        let a = unsafe { *self.state[src].get_u8x16().get_unchecked(usize::from(lane)) };
4697        self.state[dst].set_u32(u32::from(a));
4698        ControlFlow::Continue(())
4699    }
4700
4701    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4702    fn xextractv16x8(&mut self, dst: XReg, src: VReg, lane: u8) -> ControlFlow<Done> {
4703        let a = unsafe { *self.state[src].get_u16x8().get_unchecked(usize::from(lane)) };
4704        self.state[dst].set_u32(u32::from(a));
4705        ControlFlow::Continue(())
4706    }
4707
4708    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4709    fn xextractv32x4(&mut self, dst: XReg, src: VReg, lane: u8) -> ControlFlow<Done> {
4710        let a = unsafe { *self.state[src].get_u32x4().get_unchecked(usize::from(lane)) };
4711        self.state[dst].set_u32(a);
4712        ControlFlow::Continue(())
4713    }
4714
4715    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4716    fn xextractv64x2(&mut self, dst: XReg, src: VReg, lane: u8) -> ControlFlow<Done> {
4717        let a = unsafe { *self.state[src].get_u64x2().get_unchecked(usize::from(lane)) };
4718        self.state[dst].set_u64(a);
4719        ControlFlow::Continue(())
4720    }
4721
4722    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4723    fn fextractv32x4(&mut self, dst: FReg, src: VReg, lane: u8) -> ControlFlow<Done> {
4724        let a = unsafe { *self.state[src].get_f32x4().get_unchecked(usize::from(lane)) };
4725        self.state[dst].set_f32(a);
4726        ControlFlow::Continue(())
4727    }
4728
4729    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4730    fn fextractv64x2(&mut self, dst: FReg, src: VReg, lane: u8) -> ControlFlow<Done> {
4731        let a = unsafe { *self.state[src].get_f64x2().get_unchecked(usize::from(lane)) };
4732        self.state[dst].set_f64(a);
4733        ControlFlow::Continue(())
4734    }
4735
4736    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4737    fn vinsertx8(
4738        &mut self,
4739        operands: BinaryOperands<VReg, VReg, XReg>,
4740        lane: u8,
4741    ) -> ControlFlow<Done> {
4742        let mut a = self.state[operands.src1].get_u8x16();
4743        let b = self.state[operands.src2].get_u32() as u8;
4744        unsafe {
4745            *a.get_unchecked_mut(usize::from(lane)) = b;
4746        }
4747        self.state[operands.dst].set_u8x16(a);
4748        ControlFlow::Continue(())
4749    }
4750
4751    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4752    fn vinsertx16(
4753        &mut self,
4754        operands: BinaryOperands<VReg, VReg, XReg>,
4755        lane: u8,
4756    ) -> ControlFlow<Done> {
4757        let mut a = self.state[operands.src1].get_u16x8();
4758        let b = self.state[operands.src2].get_u32() as u16;
4759        unsafe {
4760            *a.get_unchecked_mut(usize::from(lane)) = b;
4761        }
4762        self.state[operands.dst].set_u16x8(a);
4763        ControlFlow::Continue(())
4764    }
4765
4766    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4767    fn vinsertx32(
4768        &mut self,
4769        operands: BinaryOperands<VReg, VReg, XReg>,
4770        lane: u8,
4771    ) -> ControlFlow<Done> {
4772        let mut a = self.state[operands.src1].get_u32x4();
4773        let b = self.state[operands.src2].get_u32();
4774        unsafe {
4775            *a.get_unchecked_mut(usize::from(lane)) = b;
4776        }
4777        self.state[operands.dst].set_u32x4(a);
4778        ControlFlow::Continue(())
4779    }
4780
4781    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4782    fn vinsertx64(
4783        &mut self,
4784        operands: BinaryOperands<VReg, VReg, XReg>,
4785        lane: u8,
4786    ) -> ControlFlow<Done> {
4787        let mut a = self.state[operands.src1].get_u64x2();
4788        let b = self.state[operands.src2].get_u64();
4789        unsafe {
4790            *a.get_unchecked_mut(usize::from(lane)) = b;
4791        }
4792        self.state[operands.dst].set_u64x2(a);
4793        ControlFlow::Continue(())
4794    }
4795
4796    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4797    fn vinsertf32(
4798        &mut self,
4799        operands: BinaryOperands<VReg, VReg, FReg>,
4800        lane: u8,
4801    ) -> ControlFlow<Done> {
4802        let mut a = self.state[operands.src1].get_f32x4();
4803        let b = self.state[operands.src2].get_f32();
4804        unsafe {
4805            *a.get_unchecked_mut(usize::from(lane)) = b;
4806        }
4807        self.state[operands.dst].set_f32x4(a);
4808        ControlFlow::Continue(())
4809    }
4810
4811    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4812    fn vinsertf64(
4813        &mut self,
4814        operands: BinaryOperands<VReg, VReg, FReg>,
4815        lane: u8,
4816    ) -> ControlFlow<Done> {
4817        let mut a = self.state[operands.src1].get_f64x2();
4818        let b = self.state[operands.src2].get_f64();
4819        unsafe {
4820            *a.get_unchecked_mut(usize::from(lane)) = b;
4821        }
4822        self.state[operands.dst].set_f64x2(a);
4823        ControlFlow::Continue(())
4824    }
4825
4826    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4827    fn veq8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4828        let a = self.state[operands.src1].get_u8x16();
4829        let b = self.state[operands.src2].get_u8x16();
4830        let mut c = [0; 16];
4831        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4832            *c = if a == b { u8::MAX } else { 0 };
4833        }
4834        self.state[operands.dst].set_u8x16(c);
4835        ControlFlow::Continue(())
4836    }
4837
4838    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4839    fn vneq8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4840        let a = self.state[operands.src1].get_u8x16();
4841        let b = self.state[operands.src2].get_u8x16();
4842        let mut c = [0; 16];
4843        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4844            *c = if a != b { u8::MAX } else { 0 };
4845        }
4846        self.state[operands.dst].set_u8x16(c);
4847        ControlFlow::Continue(())
4848    }
4849
4850    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4851    fn vslt8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4852        let a = self.state[operands.src1].get_i8x16();
4853        let b = self.state[operands.src2].get_i8x16();
4854        let mut c = [0; 16];
4855        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4856            *c = if a < b { u8::MAX } else { 0 };
4857        }
4858        self.state[operands.dst].set_u8x16(c);
4859        ControlFlow::Continue(())
4860    }
4861
4862    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4863    fn vslteq8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4864        let a = self.state[operands.src1].get_i8x16();
4865        let b = self.state[operands.src2].get_i8x16();
4866        let mut c = [0; 16];
4867        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4868            *c = if a <= b { u8::MAX } else { 0 };
4869        }
4870        self.state[operands.dst].set_u8x16(c);
4871        ControlFlow::Continue(())
4872    }
4873
4874    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4875    fn vult8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4876        let a = self.state[operands.src1].get_u8x16();
4877        let b = self.state[operands.src2].get_u8x16();
4878        let mut c = [0; 16];
4879        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4880            *c = if a < b { u8::MAX } else { 0 };
4881        }
4882        self.state[operands.dst].set_u8x16(c);
4883        ControlFlow::Continue(())
4884    }
4885
4886    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4887    fn vulteq8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4888        let a = self.state[operands.src1].get_u8x16();
4889        let b = self.state[operands.src2].get_u8x16();
4890        let mut c = [0; 16];
4891        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4892            *c = if a <= b { u8::MAX } else { 0 };
4893        }
4894        self.state[operands.dst].set_u8x16(c);
4895        ControlFlow::Continue(())
4896    }
4897
4898    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4899    fn veq16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4900        let a = self.state[operands.src1].get_u16x8();
4901        let b = self.state[operands.src2].get_u16x8();
4902        let mut c = [0; 8];
4903        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4904            *c = if a == b { u16::MAX } else { 0 };
4905        }
4906        self.state[operands.dst].set_u16x8(c);
4907        ControlFlow::Continue(())
4908    }
4909
4910    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4911    fn vneq16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4912        let a = self.state[operands.src1].get_u16x8();
4913        let b = self.state[operands.src2].get_u16x8();
4914        let mut c = [0; 8];
4915        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4916            *c = if a != b { u16::MAX } else { 0 };
4917        }
4918        self.state[operands.dst].set_u16x8(c);
4919        ControlFlow::Continue(())
4920    }
4921
4922    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4923    fn vslt16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4924        let a = self.state[operands.src1].get_i16x8();
4925        let b = self.state[operands.src2].get_i16x8();
4926        let mut c = [0; 8];
4927        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4928            *c = if a < b { u16::MAX } else { 0 };
4929        }
4930        self.state[operands.dst].set_u16x8(c);
4931        ControlFlow::Continue(())
4932    }
4933
4934    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4935    fn vslteq16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4936        let a = self.state[operands.src1].get_i16x8();
4937        let b = self.state[operands.src2].get_i16x8();
4938        let mut c = [0; 8];
4939        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4940            *c = if a <= b { u16::MAX } else { 0 };
4941        }
4942        self.state[operands.dst].set_u16x8(c);
4943        ControlFlow::Continue(())
4944    }
4945
4946    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4947    fn vult16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4948        let a = self.state[operands.src1].get_u16x8();
4949        let b = self.state[operands.src2].get_u16x8();
4950        let mut c = [0; 8];
4951        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4952            *c = if a < b { u16::MAX } else { 0 };
4953        }
4954        self.state[operands.dst].set_u16x8(c);
4955        ControlFlow::Continue(())
4956    }
4957
4958    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4959    fn vulteq16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4960        let a = self.state[operands.src1].get_u16x8();
4961        let b = self.state[operands.src2].get_u16x8();
4962        let mut c = [0; 8];
4963        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4964            *c = if a <= b { u16::MAX } else { 0 };
4965        }
4966        self.state[operands.dst].set_u16x8(c);
4967        ControlFlow::Continue(())
4968    }
4969
4970    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4971    fn veq32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4972        let a = self.state[operands.src1].get_u32x4();
4973        let b = self.state[operands.src2].get_u32x4();
4974        let mut c = [0; 4];
4975        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4976            *c = if a == b { u32::MAX } else { 0 };
4977        }
4978        self.state[operands.dst].set_u32x4(c);
4979        ControlFlow::Continue(())
4980    }
4981
4982    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4983    fn vneq32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4984        let a = self.state[operands.src1].get_u32x4();
4985        let b = self.state[operands.src2].get_u32x4();
4986        let mut c = [0; 4];
4987        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4988            *c = if a != b { u32::MAX } else { 0 };
4989        }
4990        self.state[operands.dst].set_u32x4(c);
4991        ControlFlow::Continue(())
4992    }
4993
4994    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4995    fn vslt32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4996        let a = self.state[operands.src1].get_i32x4();
4997        let b = self.state[operands.src2].get_i32x4();
4998        let mut c = [0; 4];
4999        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5000            *c = if a < b { u32::MAX } else { 0 };
5001        }
5002        self.state[operands.dst].set_u32x4(c);
5003        ControlFlow::Continue(())
5004    }
5005
5006    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5007    fn vslteq32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5008        let a = self.state[operands.src1].get_i32x4();
5009        let b = self.state[operands.src2].get_i32x4();
5010        let mut c = [0; 4];
5011        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5012            *c = if a <= b { u32::MAX } else { 0 };
5013        }
5014        self.state[operands.dst].set_u32x4(c);
5015        ControlFlow::Continue(())
5016    }
5017
5018    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5019    fn vult32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5020        let a = self.state[operands.src1].get_u32x4();
5021        let b = self.state[operands.src2].get_u32x4();
5022        let mut c = [0; 4];
5023        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5024            *c = if a < b { u32::MAX } else { 0 };
5025        }
5026        self.state[operands.dst].set_u32x4(c);
5027        ControlFlow::Continue(())
5028    }
5029
5030    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5031    fn vulteq32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5032        let a = self.state[operands.src1].get_u32x4();
5033        let b = self.state[operands.src2].get_u32x4();
5034        let mut c = [0; 4];
5035        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5036            *c = if a <= b { u32::MAX } else { 0 };
5037        }
5038        self.state[operands.dst].set_u32x4(c);
5039        ControlFlow::Continue(())
5040    }
5041
5042    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5043    fn veq64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5044        let a = self.state[operands.src1].get_u64x2();
5045        let b = self.state[operands.src2].get_u64x2();
5046        let mut c = [0; 2];
5047        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5048            *c = if a == b { u64::MAX } else { 0 };
5049        }
5050        self.state[operands.dst].set_u64x2(c);
5051        ControlFlow::Continue(())
5052    }
5053
5054    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5055    fn vneq64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5056        let a = self.state[operands.src1].get_u64x2();
5057        let b = self.state[operands.src2].get_u64x2();
5058        let mut c = [0; 2];
5059        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5060            *c = if a != b { u64::MAX } else { 0 };
5061        }
5062        self.state[operands.dst].set_u64x2(c);
5063        ControlFlow::Continue(())
5064    }
5065
5066    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5067    fn vslt64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5068        let a = self.state[operands.src1].get_i64x2();
5069        let b = self.state[operands.src2].get_i64x2();
5070        let mut c = [0; 2];
5071        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5072            *c = if a < b { u64::MAX } else { 0 };
5073        }
5074        self.state[operands.dst].set_u64x2(c);
5075        ControlFlow::Continue(())
5076    }
5077
5078    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5079    fn vslteq64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5080        let a = self.state[operands.src1].get_i64x2();
5081        let b = self.state[operands.src2].get_i64x2();
5082        let mut c = [0; 2];
5083        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5084            *c = if a <= b { u64::MAX } else { 0 };
5085        }
5086        self.state[operands.dst].set_u64x2(c);
5087        ControlFlow::Continue(())
5088    }
5089
5090    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5091    fn vult64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5092        let a = self.state[operands.src1].get_u64x2();
5093        let b = self.state[operands.src2].get_u64x2();
5094        let mut c = [0; 2];
5095        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5096            *c = if a < b { u64::MAX } else { 0 };
5097        }
5098        self.state[operands.dst].set_u64x2(c);
5099        ControlFlow::Continue(())
5100    }
5101
5102    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5103    fn vulteq64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5104        let a = self.state[operands.src1].get_u64x2();
5105        let b = self.state[operands.src2].get_u64x2();
5106        let mut c = [0; 2];
5107        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5108            *c = if a <= b { u64::MAX } else { 0 };
5109        }
5110        self.state[operands.dst].set_u64x2(c);
5111        ControlFlow::Continue(())
5112    }
5113
5114    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5115    fn vneg8x16(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
5116        let a = self.state[src].get_i8x16();
5117        self.state[dst].set_i8x16(a.map(|i| i.wrapping_neg()));
5118        ControlFlow::Continue(())
5119    }
5120
5121    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5122    fn vneg16x8(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
5123        let a = self.state[src].get_i16x8();
5124        self.state[dst].set_i16x8(a.map(|i| i.wrapping_neg()));
5125        ControlFlow::Continue(())
5126    }
5127
5128    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5129    fn vneg32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
5130        let a = self.state[src].get_i32x4();
5131        self.state[dst].set_i32x4(a.map(|i| i.wrapping_neg()));
5132        ControlFlow::Continue(())
5133    }
5134
5135    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5136    fn vneg64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
5137        let a = self.state[src].get_i64x2();
5138        self.state[dst].set_i64x2(a.map(|i| i.wrapping_neg()));
5139        ControlFlow::Continue(())
5140    }
5141
5142    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5143    fn vnegf64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
5144        let a = self.state[src].get_f64x2();
5145        self.state[dst].set_f64x2(a.map(|i| -i));
5146        ControlFlow::Continue(())
5147    }
5148
5149    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5150    fn vmin8x16_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5151        let mut a = self.state[operands.src1].get_i8x16();
5152        let b = self.state[operands.src2].get_i8x16();
5153        for (a, b) in a.iter_mut().zip(&b) {
5154            *a = (*a).min(*b);
5155        }
5156        self.state[operands.dst].set_i8x16(a);
5157        ControlFlow::Continue(())
5158    }
5159
5160    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5161    fn vmin8x16_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5162        let mut a = self.state[operands.src1].get_u8x16();
5163        let b = self.state[operands.src2].get_u8x16();
5164        for (a, b) in a.iter_mut().zip(&b) {
5165            *a = (*a).min(*b);
5166        }
5167        self.state[operands.dst].set_u8x16(a);
5168        ControlFlow::Continue(())
5169    }
5170
5171    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5172    fn vmin16x8_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5173        let mut a = self.state[operands.src1].get_i16x8();
5174        let b = self.state[operands.src2].get_i16x8();
5175        for (a, b) in a.iter_mut().zip(&b) {
5176            *a = (*a).min(*b);
5177        }
5178        self.state[operands.dst].set_i16x8(a);
5179        ControlFlow::Continue(())
5180    }
5181
5182    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5183    fn vmin16x8_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5184        let mut a = self.state[operands.src1].get_u16x8();
5185        let b = self.state[operands.src2].get_u16x8();
5186        for (a, b) in a.iter_mut().zip(&b) {
5187            *a = (*a).min(*b);
5188        }
5189        self.state[operands.dst].set_u16x8(a);
5190        ControlFlow::Continue(())
5191    }
5192
5193    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5194    fn vmin32x4_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5195        let mut a = self.state[operands.src1].get_i32x4();
5196        let b = self.state[operands.src2].get_i32x4();
5197        for (a, b) in a.iter_mut().zip(&b) {
5198            *a = (*a).min(*b);
5199        }
5200        self.state[operands.dst].set_i32x4(a);
5201        ControlFlow::Continue(())
5202    }
5203
5204    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5205    fn vmin32x4_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5206        let mut a = self.state[operands.src1].get_u32x4();
5207        let b = self.state[operands.src2].get_u32x4();
5208        for (a, b) in a.iter_mut().zip(&b) {
5209            *a = (*a).min(*b);
5210        }
5211        self.state[operands.dst].set_u32x4(a);
5212        ControlFlow::Continue(())
5213    }
5214
5215    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5216    fn vmax8x16_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5217        let mut a = self.state[operands.src1].get_i8x16();
5218        let b = self.state[operands.src2].get_i8x16();
5219        for (a, b) in a.iter_mut().zip(&b) {
5220            *a = (*a).max(*b);
5221        }
5222        self.state[operands.dst].set_i8x16(a);
5223        ControlFlow::Continue(())
5224    }
5225
5226    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5227    fn vmax8x16_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5228        let mut a = self.state[operands.src1].get_u8x16();
5229        let b = self.state[operands.src2].get_u8x16();
5230        for (a, b) in a.iter_mut().zip(&b) {
5231            *a = (*a).max(*b);
5232        }
5233        self.state[operands.dst].set_u8x16(a);
5234        ControlFlow::Continue(())
5235    }
5236
5237    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5238    fn vmax16x8_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5239        let mut a = self.state[operands.src1].get_i16x8();
5240        let b = self.state[operands.src2].get_i16x8();
5241        for (a, b) in a.iter_mut().zip(&b) {
5242            *a = (*a).max(*b);
5243        }
5244        self.state[operands.dst].set_i16x8(a);
5245        ControlFlow::Continue(())
5246    }
5247
5248    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5249    fn vmax16x8_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5250        let mut a = self.state[operands.src1].get_u16x8();
5251        let b = self.state[operands.src2].get_u16x8();
5252        for (a, b) in a.iter_mut().zip(&b) {
5253            *a = (*a).max(*b);
5254        }
5255        self.state[operands.dst].set_u16x8(a);
5256        ControlFlow::Continue(())
5257    }
5258
5259    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5260    fn vmax32x4_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5261        let mut a = self.state[operands.src1].get_i32x4();
5262        let b = self.state[operands.src2].get_i32x4();
5263        for (a, b) in a.iter_mut().zip(&b) {
5264            *a = (*a).max(*b);
5265        }
5266        self.state[operands.dst].set_i32x4(a);
5267        ControlFlow::Continue(())
5268    }
5269
5270    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5271    fn vmax32x4_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5272        let mut a = self.state[operands.src1].get_u32x4();
5273        let b = self.state[operands.src2].get_u32x4();
5274        for (a, b) in a.iter_mut().zip(&b) {
5275            *a = (*a).max(*b);
5276        }
5277        self.state[operands.dst].set_u32x4(a);
5278        ControlFlow::Continue(())
5279    }
5280
5281    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5282    fn vabs8x16(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
5283        let a = self.state[src].get_i8x16();
5284        self.state[dst].set_i8x16(a.map(|i| i.wrapping_abs()));
5285        ControlFlow::Continue(())
5286    }
5287
5288    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5289    fn vabs16x8(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
5290        let a = self.state[src].get_i16x8();
5291        self.state[dst].set_i16x8(a.map(|i| i.wrapping_abs()));
5292        ControlFlow::Continue(())
5293    }
5294
5295    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5296    fn vabs32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
5297        let a = self.state[src].get_i32x4();
5298        self.state[dst].set_i32x4(a.map(|i| i.wrapping_abs()));
5299        ControlFlow::Continue(())
5300    }
5301
5302    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5303    fn vabs64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
5304        let a = self.state[src].get_i64x2();
5305        self.state[dst].set_i64x2(a.map(|i| i.wrapping_abs()));
5306        ControlFlow::Continue(())
5307    }
5308
5309    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5310    fn vabsf32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
5311        let a = self.state[src].get_f32x4();
5312        self.state[dst].set_f32x4(a.map(|i| i.wasm_abs()));
5313        ControlFlow::Continue(())
5314    }
5315
5316    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5317    fn vabsf64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
5318        let a = self.state[src].get_f64x2();
5319        self.state[dst].set_f64x2(a.map(|i| i.wasm_abs()));
5320        ControlFlow::Continue(())
5321    }
5322
5323    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5324    fn vmaximumf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5325        let mut a = self.state[operands.src1].get_f32x4();
5326        let b = self.state[operands.src2].get_f32x4();
5327        for (a, b) in a.iter_mut().zip(&b) {
5328            *a = a.wasm_maximum(*b);
5329        }
5330        self.state[operands.dst].set_f32x4(a);
5331        ControlFlow::Continue(())
5332    }
5333
5334    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5335    fn vmaximumf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5336        let mut a = self.state[operands.src1].get_f64x2();
5337        let b = self.state[operands.src2].get_f64x2();
5338        for (a, b) in a.iter_mut().zip(&b) {
5339            *a = a.wasm_maximum(*b);
5340        }
5341        self.state[operands.dst].set_f64x2(a);
5342        ControlFlow::Continue(())
5343    }
5344
5345    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5346    fn vminimumf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5347        let mut a = self.state[operands.src1].get_f32x4();
5348        let b = self.state[operands.src2].get_f32x4();
5349        for (a, b) in a.iter_mut().zip(&b) {
5350            *a = a.wasm_minimum(*b);
5351        }
5352        self.state[operands.dst].set_f32x4(a);
5353        ControlFlow::Continue(())
5354    }
5355
5356    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5357    fn vminimumf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5358        let mut a = self.state[operands.src1].get_f64x2();
5359        let b = self.state[operands.src2].get_f64x2();
5360        for (a, b) in a.iter_mut().zip(&b) {
5361            *a = a.wasm_minimum(*b);
5362        }
5363        self.state[operands.dst].set_f64x2(a);
5364        ControlFlow::Continue(())
5365    }
5366
5367    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5368    fn vshuffle(&mut self, dst: VReg, src1: VReg, src2: VReg, mask: u128) -> ControlFlow<Done> {
5369        let a = self.state[src1].get_u8x16();
5370        let b = self.state[src2].get_u8x16();
5371        let result = mask.to_le_bytes().map(|m| {
5372            if m < 16 {
5373                a[m as usize]
5374            } else {
5375                b[m as usize - 16]
5376            }
5377        });
5378        self.state[dst].set_u8x16(result);
5379        ControlFlow::Continue(())
5380    }
5381
5382    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5383    fn vswizzlei8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5384        let src1 = self.state[operands.src1].get_i8x16();
5385        let src2 = self.state[operands.src2].get_i8x16();
5386        let mut dst = [0i8; 16];
5387        for (i, &idx) in src2.iter().enumerate() {
5388            if (idx as usize) < 16 {
5389                dst[i] = src1[idx as usize];
5390            } else {
5391                dst[i] = 0
5392            }
5393        }
5394        self.state[operands.dst].set_i8x16(dst);
5395        ControlFlow::Continue(())
5396    }
5397
5398    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5399    fn vavground8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5400        let mut a = self.state[operands.src1].get_u8x16();
5401        let b = self.state[operands.src2].get_u8x16();
5402        for (a, b) in a.iter_mut().zip(&b) {
5403            // use wider precision to avoid overflow
5404            *a = ((u32::from(*a) + u32::from(*b) + 1) / 2) as u8;
5405        }
5406        self.state[operands.dst].set_u8x16(a);
5407        ControlFlow::Continue(())
5408    }
5409
5410    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5411    fn vavground16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5412        let mut a = self.state[operands.src1].get_u16x8();
5413        let b = self.state[operands.src2].get_u16x8();
5414        for (a, b) in a.iter_mut().zip(&b) {
5415            // use wider precision to avoid overflow
5416            *a = ((u32::from(*a) + u32::from(*b) + 1) / 2) as u16;
5417        }
5418        self.state[operands.dst].set_u16x8(a);
5419        ControlFlow::Continue(())
5420    }
5421
5422    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5423    fn veqf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5424        let a = self.state[operands.src1].get_f32x4();
5425        let b = self.state[operands.src2].get_f32x4();
5426        let mut c = [0; 4];
5427        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5428            *c = if a == b { u32::MAX } else { 0 };
5429        }
5430        self.state[operands.dst].set_u32x4(c);
5431        ControlFlow::Continue(())
5432    }
5433
5434    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5435    fn vneqf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5436        let a = self.state[operands.src1].get_f32x4();
5437        let b = self.state[operands.src2].get_f32x4();
5438        let mut c = [0; 4];
5439        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5440            *c = if a != b { u32::MAX } else { 0 };
5441        }
5442        self.state[operands.dst].set_u32x4(c);
5443        ControlFlow::Continue(())
5444    }
5445
5446    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5447    fn vltf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5448        let a = self.state[operands.src1].get_f32x4();
5449        let b = self.state[operands.src2].get_f32x4();
5450        let mut c = [0; 4];
5451        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5452            *c = if a < b { u32::MAX } else { 0 };
5453        }
5454        self.state[operands.dst].set_u32x4(c);
5455        ControlFlow::Continue(())
5456    }
5457
5458    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5459    fn vlteqf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5460        let a = self.state[operands.src1].get_f32x4();
5461        let b = self.state[operands.src2].get_f32x4();
5462        let mut c = [0; 4];
5463        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5464            *c = if a <= b { u32::MAX } else { 0 };
5465        }
5466        self.state[operands.dst].set_u32x4(c);
5467        ControlFlow::Continue(())
5468    }
5469
5470    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5471    fn veqf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5472        let a = self.state[operands.src1].get_f64x2();
5473        let b = self.state[operands.src2].get_f64x2();
5474        let mut c = [0; 2];
5475        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5476            *c = if a == b { u64::MAX } else { 0 };
5477        }
5478        self.state[operands.dst].set_u64x2(c);
5479        ControlFlow::Continue(())
5480    }
5481
5482    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5483    fn vneqf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5484        let a = self.state[operands.src1].get_f64x2();
5485        let b = self.state[operands.src2].get_f64x2();
5486        let mut c = [0; 2];
5487        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5488            *c = if a != b { u64::MAX } else { 0 };
5489        }
5490        self.state[operands.dst].set_u64x2(c);
5491        ControlFlow::Continue(())
5492    }
5493
5494    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5495    fn vltf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5496        let a = self.state[operands.src1].get_f64x2();
5497        let b = self.state[operands.src2].get_f64x2();
5498        let mut c = [0; 2];
5499        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5500            *c = if a < b { u64::MAX } else { 0 };
5501        }
5502        self.state[operands.dst].set_u64x2(c);
5503        ControlFlow::Continue(())
5504    }
5505
5506    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5507    fn vlteqf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5508        let a = self.state[operands.src1].get_f64x2();
5509        let b = self.state[operands.src2].get_f64x2();
5510        let mut c = [0; 2];
5511        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5512            *c = if a <= b { u64::MAX } else { 0 };
5513        }
5514        self.state[operands.dst].set_u64x2(c);
5515        ControlFlow::Continue(())
5516    }
5517
5518    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5519    fn vfma32x4(&mut self, dst: VReg, a: VReg, b: VReg, c: VReg) -> ControlFlow<Done> {
5520        let mut a = self.state[a].get_f32x4();
5521        let b = self.state[b].get_f32x4();
5522        let c = self.state[c].get_f32x4();
5523        for ((a, b), c) in a.iter_mut().zip(b).zip(c) {
5524            *a = a.wasm_mul_add(b, c);
5525        }
5526        self.state[dst].set_f32x4(a);
5527        ControlFlow::Continue(())
5528    }
5529
5530    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5531    fn vfma64x2(&mut self, dst: VReg, a: VReg, b: VReg, c: VReg) -> ControlFlow<Done> {
5532        let mut a = self.state[a].get_f64x2();
5533        let b = self.state[b].get_f64x2();
5534        let c = self.state[c].get_f64x2();
5535        for ((a, b), c) in a.iter_mut().zip(b).zip(c) {
5536            *a = a.wasm_mul_add(b, c);
5537        }
5538        self.state[dst].set_f64x2(a);
5539        ControlFlow::Continue(())
5540    }
5541
5542    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5543    fn vselect(
5544        &mut self,
5545        dst: VReg,
5546        cond: XReg,
5547        if_nonzero: VReg,
5548        if_zero: VReg,
5549    ) -> ControlFlow<Done> {
5550        let result = if self.state[cond].get_u32() != 0 {
5551            self.state[if_nonzero]
5552        } else {
5553            self.state[if_zero]
5554        };
5555        self.state[dst] = result;
5556        ControlFlow::Continue(())
5557    }
5558
5559    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5560    fn xadd128(
5561        &mut self,
5562        dst_lo: XReg,
5563        dst_hi: XReg,
5564        lhs_lo: XReg,
5565        lhs_hi: XReg,
5566        rhs_lo: XReg,
5567        rhs_hi: XReg,
5568    ) -> ControlFlow<Done> {
5569        let lhs = self.get_i128(lhs_lo, lhs_hi);
5570        let rhs = self.get_i128(rhs_lo, rhs_hi);
5571        let result = lhs.wrapping_add(rhs);
5572        self.set_i128(dst_lo, dst_hi, result);
5573        ControlFlow::Continue(())
5574    }
5575
5576    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5577    fn xsub128(
5578        &mut self,
5579        dst_lo: XReg,
5580        dst_hi: XReg,
5581        lhs_lo: XReg,
5582        lhs_hi: XReg,
5583        rhs_lo: XReg,
5584        rhs_hi: XReg,
5585    ) -> ControlFlow<Done> {
5586        let lhs = self.get_i128(lhs_lo, lhs_hi);
5587        let rhs = self.get_i128(rhs_lo, rhs_hi);
5588        let result = lhs.wrapping_sub(rhs);
5589        self.set_i128(dst_lo, dst_hi, result);
5590        ControlFlow::Continue(())
5591    }
5592
5593    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5594    fn xwidemul64_s(
5595        &mut self,
5596        dst_lo: XReg,
5597        dst_hi: XReg,
5598        lhs: XReg,
5599        rhs: XReg,
5600    ) -> ControlFlow<Done> {
5601        let lhs = self.state[lhs].get_i64();
5602        let rhs = self.state[rhs].get_i64();
5603        let result = i128::from(lhs).wrapping_mul(i128::from(rhs));
5604        self.set_i128(dst_lo, dst_hi, result);
5605        ControlFlow::Continue(())
5606    }
5607
5608    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5609    fn xwidemul64_u(
5610        &mut self,
5611        dst_lo: XReg,
5612        dst_hi: XReg,
5613        lhs: XReg,
5614        rhs: XReg,
5615    ) -> ControlFlow<Done> {
5616        let lhs = self.state[lhs].get_u64();
5617        let rhs = self.state[rhs].get_u64();
5618        let result = u128::from(lhs).wrapping_mul(u128::from(rhs));
5619        self.set_i128(dst_lo, dst_hi, result as i128);
5620        ControlFlow::Continue(())
5621    }
5622}