Skip to main content

pulley_interpreter/
interp.rs

1//! Interpretation of pulley bytecode.
2
3use crate::decode::*;
4use crate::encode::Encode;
5use crate::imms::*;
6use crate::profile::{ExecutingPc, ExecutingPcRef};
7use crate::regs::*;
8use alloc::string::ToString;
9use core::fmt;
10use core::mem;
11use core::ops::ControlFlow;
12use core::ops::{Index, IndexMut};
13use core::ptr::NonNull;
14use pulley_macros::interp_disable_if_cfg;
15use wasmtime_core::alloc::TryVec;
16use wasmtime_core::error::OutOfMemory;
17use wasmtime_core::math::{WasmFloat, f32_cvt_to_int_bounds, f64_cvt_to_int_bounds};
18
19mod debug;
20#[cfg(all(not(pulley_tail_calls), not(pulley_assume_llvm_makes_tail_calls)))]
21mod match_loop;
22#[cfg(any(pulley_tail_calls, pulley_assume_llvm_makes_tail_calls))]
23mod tail_loop;
24
25const DEFAULT_STACK_SIZE: usize = 1 << 20; // 1 MiB
26
27/// A virtual machine for interpreting Pulley bytecode.
28pub struct Vm {
29    state: MachineState,
30    executing_pc: ExecutingPc,
31}
32
33impl Vm {
34    /// Create a new virtual machine with the default stack size.
35    pub fn new() -> Result<Self, OutOfMemory> {
36        Self::with_stack(DEFAULT_STACK_SIZE)
37    }
38
39    /// Create a new virtual machine with the given stack.
40    pub fn with_stack(stack_size: usize) -> Result<Self, OutOfMemory> {
41        Ok(Self {
42            state: MachineState::with_stack(stack_size)?,
43            executing_pc: ExecutingPc::default(),
44        })
45    }
46
47    /// Get a shared reference to this VM's machine state.
48    pub fn state(&self) -> &MachineState {
49        &self.state
50    }
51
52    /// Get an exclusive reference to this VM's machine state.
53    pub fn state_mut(&mut self) -> &mut MachineState {
54        &mut self.state
55    }
56
57    /// Call a bytecode function.
58    ///
59    /// The given `func` must point to the beginning of a valid Pulley bytecode
60    /// function.
61    ///
62    /// The given `args` must match the number and type of arguments that
63    /// function expects.
64    ///
65    /// The given `rets` must match the function's actual return types.
66    ///
67    /// Returns either the resulting values, or the PC at which a trap was
68    /// raised.
69    pub unsafe fn call<'a, T>(
70        &'a mut self,
71        func: NonNull<u8>,
72        args: &[Val],
73        rets: T,
74    ) -> DoneReason<impl Iterator<Item = Val> + use<'a, T>>
75    where
76        T: IntoIterator<Item = RegType> + 'a,
77    {
78        unsafe {
79            let lr = self.call_start(args);
80
81            match self.call_run(func) {
82                DoneReason::ReturnToHost(()) => DoneReason::ReturnToHost(self.call_end(lr, rets)),
83                DoneReason::Trap { pc, kind } => DoneReason::Trap { pc, kind },
84                DoneReason::CallIndirectHost { id, resume } => {
85                    DoneReason::CallIndirectHost { id, resume }
86                }
87            }
88        }
89    }
90
91    /// Performs the initial part of [`Vm::call`] in setting up the `args`
92    /// provided in registers according to Pulley's ABI.
93    ///
94    /// # Return
95    ///
96    /// Returns the old `lr` register value. The current `lr` value is replaced
97    /// with a sentinel that triggers a return to the host when returned-to.
98    ///
99    /// # Unsafety
100    ///
101    /// All the same unsafety as `call` and additionally, you must
102    /// invoke `call_run` and then `call_end` after calling `call_start`.
103    /// If you don't want to wrangle these invocations, use `call` instead
104    /// of `call_{start,run,end}`.
105    pub unsafe fn call_start<'a>(&'a mut self, args: &[Val]) -> *mut u8 {
106        // NB: make sure this method stays in sync with
107        // `PulleyMachineDeps::compute_arg_locs`!
108
109        let mut x_args = (0..15).map(|x| unsafe { XReg::new_unchecked(x) });
110        let mut f_args = (0..16).map(|f| unsafe { FReg::new_unchecked(f) });
111        #[cfg(not(pulley_disable_interp_simd))]
112        let mut v_args = (0..16).map(|v| unsafe { VReg::new_unchecked(v) });
113
114        for arg in args {
115            match arg {
116                Val::XReg(val) => match x_args.next() {
117                    Some(reg) => self.state[reg] = *val,
118                    None => todo!("stack slots"),
119                },
120                Val::FReg(val) => match f_args.next() {
121                    Some(reg) => self.state[reg] = *val,
122                    None => todo!("stack slots"),
123                },
124                #[cfg(not(pulley_disable_interp_simd))]
125                Val::VReg(val) => match v_args.next() {
126                    Some(reg) => self.state[reg] = *val,
127                    None => todo!("stack slots"),
128                },
129            }
130        }
131
132        mem::replace(&mut self.state.lr, HOST_RETURN_ADDR)
133    }
134
135    /// Performs the internal part of [`Vm::call`] where bytecode is actually
136    /// executed.
137    ///
138    /// # Unsafety
139    ///
140    /// In addition to all the invariants documented for `call`, you
141    /// may only invoke `call_run` after invoking `call_start` to
142    /// initialize this call's arguments.
143    pub unsafe fn call_run(&mut self, pc: NonNull<u8>) -> DoneReason<()> {
144        self.state.debug_assert_done_reason_none();
145        let interpreter = Interpreter {
146            state: &mut self.state,
147            pc: unsafe { UnsafeBytecodeStream::new(pc) },
148            executing_pc: self.executing_pc.as_ref(),
149        };
150        let done = interpreter.run();
151        self.state.done_decode(done)
152    }
153
154    /// Performs the tail end of [`Vm::call`] by returning the values as
155    /// determined by `rets` according to Pulley's ABI.
156    ///
157    /// The `old_ret` value should have been provided from `call_start`
158    /// previously.
159    ///
160    /// # Unsafety
161    ///
162    /// In addition to the invariants documented for `call`, this may
163    /// only be called after `call_run`.
164    pub unsafe fn call_end<'a>(
165        &'a mut self,
166        old_ret: *mut u8,
167        rets: impl IntoIterator<Item = RegType> + 'a,
168    ) -> impl Iterator<Item = Val> + 'a {
169        self.state.lr = old_ret;
170        // NB: make sure this method stays in sync with
171        // `PulleyMachineDeps::compute_arg_locs`!
172
173        let mut x_rets = (0..15).map(|x| unsafe { XReg::new_unchecked(x) });
174        let mut f_rets = (0..16).map(|f| unsafe { FReg::new_unchecked(f) });
175        #[cfg(not(pulley_disable_interp_simd))]
176        let mut v_rets = (0..16).map(|v| unsafe { VReg::new_unchecked(v) });
177
178        rets.into_iter().map(move |ty| match ty {
179            RegType::XReg => match x_rets.next() {
180                Some(reg) => Val::XReg(self.state[reg]),
181                None => todo!("stack slots"),
182            },
183            RegType::FReg => match f_rets.next() {
184                Some(reg) => Val::FReg(self.state[reg]),
185                None => todo!("stack slots"),
186            },
187            #[cfg(not(pulley_disable_interp_simd))]
188            RegType::VReg => match v_rets.next() {
189                Some(reg) => Val::VReg(self.state[reg]),
190                None => todo!("stack slots"),
191            },
192            #[cfg(pulley_disable_interp_simd)]
193            RegType::VReg => panic!("simd support disabled at compile time"),
194        })
195    }
196
197    /// Returns the current `fp` register value.
198    pub fn fp(&self) -> *mut u8 {
199        self.state.fp
200    }
201
202    /// Returns the current `lr` register value.
203    pub fn lr(&self) -> *mut u8 {
204        self.state.lr
205    }
206
207    /// Sets the current `fp` register value.
208    pub unsafe fn set_fp(&mut self, fp: *mut u8) {
209        self.state.fp = fp;
210    }
211
212    /// Sets the current `lr` register value.
213    pub unsafe fn set_lr(&mut self, lr: *mut u8) {
214        self.state.lr = lr;
215    }
216
217    /// Gets a handle to the currently executing program counter for this
218    /// interpreter which can be read from other threads.
219    //
220    // Note that despite this field still existing with `not(feature =
221    // "profile")` it's hidden from the public API in that scenario as it has no
222    // methods anyway.
223    #[cfg(feature = "profile")]
224    pub fn executing_pc(&self) -> &ExecutingPc {
225        &self.executing_pc
226    }
227}
228
229impl Drop for Vm {
230    fn drop(&mut self) {
231        self.executing_pc.set_done();
232    }
233}
234
235/// The type of a register in the Pulley machine state.
236#[derive(Clone, Copy, Debug)]
237pub enum RegType {
238    /// An `x` register: integers.
239    XReg,
240
241    /// An `f` register: floats.
242    FReg,
243
244    /// A `v` register: vectors.
245    VReg,
246}
247
248/// A value that can be stored in a register.
249#[derive(Clone, Copy, Debug)]
250pub enum Val {
251    /// An `x` register value: integers.
252    XReg(XRegVal),
253
254    /// An `f` register value: floats.
255    FReg(FRegVal),
256
257    /// A `v` register value: vectors.
258    #[cfg(not(pulley_disable_interp_simd))]
259    VReg(VRegVal),
260}
261
262impl fmt::LowerHex for Val {
263    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
264        match self {
265            Val::XReg(v) => fmt::LowerHex::fmt(v, f),
266            Val::FReg(v) => fmt::LowerHex::fmt(v, f),
267            #[cfg(not(pulley_disable_interp_simd))]
268            Val::VReg(v) => fmt::LowerHex::fmt(v, f),
269        }
270    }
271}
272
273impl From<XRegVal> for Val {
274    fn from(value: XRegVal) -> Self {
275        Val::XReg(value)
276    }
277}
278
279impl From<u64> for Val {
280    fn from(value: u64) -> Self {
281        XRegVal::new_u64(value).into()
282    }
283}
284
285impl From<u32> for Val {
286    fn from(value: u32) -> Self {
287        XRegVal::new_u32(value).into()
288    }
289}
290
291impl From<i64> for Val {
292    fn from(value: i64) -> Self {
293        XRegVal::new_i64(value).into()
294    }
295}
296
297impl From<i32> for Val {
298    fn from(value: i32) -> Self {
299        XRegVal::new_i32(value).into()
300    }
301}
302
303impl<T> From<*mut T> for Val {
304    fn from(value: *mut T) -> Self {
305        XRegVal::new_ptr(value).into()
306    }
307}
308
309impl From<FRegVal> for Val {
310    fn from(value: FRegVal) -> Self {
311        Val::FReg(value)
312    }
313}
314
315impl From<f64> for Val {
316    fn from(value: f64) -> Self {
317        FRegVal::new_f64(value).into()
318    }
319}
320
321impl From<f32> for Val {
322    fn from(value: f32) -> Self {
323        FRegVal::new_f32(value).into()
324    }
325}
326
327#[cfg(not(pulley_disable_interp_simd))]
328impl From<VRegVal> for Val {
329    fn from(value: VRegVal) -> Self {
330        Val::VReg(value)
331    }
332}
333
334/// An `x` register value: integers.
335#[derive(Copy, Clone)]
336pub struct XRegVal(XRegUnion);
337
338impl PartialEq for XRegVal {
339    fn eq(&self, other: &Self) -> bool {
340        self.get_u64() == other.get_u64()
341    }
342}
343
344impl Eq for XRegVal {}
345
346impl fmt::Debug for XRegVal {
347    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
348        f.debug_struct("XRegVal")
349            .field("as_u64", &self.get_u64())
350            .finish()
351    }
352}
353
354impl fmt::LowerHex for XRegVal {
355    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
356        fmt::LowerHex::fmt(&self.get_u64(), f)
357    }
358}
359
360/// Contents of an "x" register, or a general-purpose register.
361///
362/// This is represented as a Rust `union` to make it easier to access typed
363/// views of this, notably the `ptr` field which enables preserving a bit of
364/// provenance for Rust for values stored as a pointer and read as a pointer.
365///
366/// Note that the actual in-memory representation of this value is handled
367/// carefully at this time. Pulley bytecode exposes the ability to store a
368/// 32-bit result into a register and then read the 64-bit contents of the
369/// register. This leaves us with the question of what to do with the upper bits
370/// of the register when the 32-bit result is generated. Possibilities for
371/// handling this are:
372///
373/// 1. Do nothing, just store the 32-bit value. The problem with this approach
374///    means that the "upper bits" are now endianness-dependent. That means that
375///    the state of the register is now platform-dependent.
376/// 2. Sign or zero-extend. This restores platform-independent behavior but
377///    requires an extra store on 32-bit platforms because they can probably
378///    only store 32-bits at a time.
379/// 3. Always store the values in this union as little-endian. This means that
380///    big-endian platforms have to do a byte-swap but otherwise it has
381///    platform-independent behavior.
382///
383/// This union chooses route (3) at this time where the values here are always
384/// stored in little-endian form (even the `ptr` field). That guarantees
385/// cross-platform behavior while also minimizing the amount of data stored on
386/// writes.
387///
388/// In the future we may wish to benchmark this and possibly change this.
389/// Technically Cranelift-generated bytecode should never rely on the upper bits
390/// of a register if it didn't previously write them so this in theory doesn't
391/// actually matter for Cranelift or wasm semantics. The only cost right now is
392/// to big-endian platforms though and it's not certain how crucial performance
393/// will be there.
394///
395/// One final note is that this notably contrasts with native CPUs where
396/// native ISAs like RISC-V specifically define the entire register on every
397/// instruction, even if only the low half contains a significant result. Pulley
398/// is unlikely to become out-of-order within the CPU itself as it's interpreted
399/// meaning that severing data-dependencies with previous operations is
400/// hypothesized to not be too important. If this is ever a problem though it
401/// could increase the likelihood we go for route (2) above instead (or maybe
402/// even (1)).
403#[derive(Copy, Clone)]
404union XRegUnion {
405    i32: i32,
406    u32: u32,
407    i64: i64,
408    u64: u64,
409
410    // Note that this is intentionally `usize` and not an actual pointer like
411    // `*mut u8`. The reason for this is that provenance is required in Rust for
412    // pointers but Cranelift has no pointer type and thus no concept of
413    // provenance. That means that at-rest it's not known whether the value has
414    // provenance or not and basically means that Pulley is required to use
415    // "permissive provenance" in Rust as opposed to strict provenance.
416    //
417    // That's more-or-less a long-winded way of saying that storage of a pointer
418    // in this value is done with `.expose_provenance()` and reading a pointer
419    // uses `with_exposed_provenance_mut(..)`.
420    ptr: usize,
421}
422
423impl Default for XRegVal {
424    fn default() -> Self {
425        Self(unsafe { mem::zeroed() })
426    }
427}
428
429#[expect(missing_docs, reason = "self-describing methods")]
430impl XRegVal {
431    pub fn new_i32(x: i32) -> Self {
432        let mut val = XRegVal::default();
433        val.set_i32(x);
434        val
435    }
436
437    pub fn new_u32(x: u32) -> Self {
438        let mut val = XRegVal::default();
439        val.set_u32(x);
440        val
441    }
442
443    pub fn new_i64(x: i64) -> Self {
444        let mut val = XRegVal::default();
445        val.set_i64(x);
446        val
447    }
448
449    pub fn new_u64(x: u64) -> Self {
450        let mut val = XRegVal::default();
451        val.set_u64(x);
452        val
453    }
454
455    pub fn new_ptr<T>(ptr: *mut T) -> Self {
456        let mut val = XRegVal::default();
457        val.set_ptr(ptr);
458        val
459    }
460
461    pub fn get_i32(&self) -> i32 {
462        let x = unsafe { self.0.i32 };
463        i32::from_le(x)
464    }
465
466    pub fn get_u32(&self) -> u32 {
467        let x = unsafe { self.0.u32 };
468        u32::from_le(x)
469    }
470
471    pub fn get_i64(&self) -> i64 {
472        let x = unsafe { self.0.i64 };
473        i64::from_le(x)
474    }
475
476    pub fn get_u64(&self) -> u64 {
477        let x = unsafe { self.0.u64 };
478        u64::from_le(x)
479    }
480
481    pub fn get_ptr<T>(&self) -> *mut T {
482        let ptr = unsafe { self.0.ptr };
483        core::ptr::with_exposed_provenance_mut(usize::from_le(ptr))
484    }
485
486    pub fn set_i32(&mut self, x: i32) {
487        self.0.i32 = x.to_le();
488    }
489
490    pub fn set_u32(&mut self, x: u32) {
491        self.0.u32 = x.to_le();
492    }
493
494    pub fn set_i64(&mut self, x: i64) {
495        self.0.i64 = x.to_le();
496    }
497
498    pub fn set_u64(&mut self, x: u64) {
499        self.0.u64 = x.to_le();
500    }
501
502    pub fn set_ptr<T>(&mut self, ptr: *mut T) {
503        self.0.ptr = ptr.expose_provenance().to_le();
504    }
505}
506
507/// An `f` register value: floats.
508#[derive(Copy, Clone)]
509pub struct FRegVal(FRegUnion);
510
511impl fmt::Debug for FRegVal {
512    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
513        f.debug_struct("FRegVal")
514            .field("as_f32", &self.get_f32())
515            .field("as_f64", &self.get_f64())
516            .finish()
517    }
518}
519
520impl fmt::LowerHex for FRegVal {
521    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
522        fmt::LowerHex::fmt(&self.get_f64().to_bits(), f)
523    }
524}
525
526// NB: like `XRegUnion` values here are always little-endian, see the
527// documentation above for more details.
528#[derive(Copy, Clone)]
529union FRegUnion {
530    f32: u32,
531    f64: u64,
532}
533
534impl Default for FRegVal {
535    fn default() -> Self {
536        Self(unsafe { mem::zeroed() })
537    }
538}
539
540#[expect(missing_docs, reason = "self-describing methods")]
541impl FRegVal {
542    pub fn new_f32(f: f32) -> Self {
543        let mut val = Self::default();
544        val.set_f32(f);
545        val
546    }
547
548    pub fn new_f64(f: f64) -> Self {
549        let mut val = Self::default();
550        val.set_f64(f);
551        val
552    }
553
554    pub fn get_f32(&self) -> f32 {
555        let val = unsafe { self.0.f32 };
556        f32::from_le_bytes(val.to_ne_bytes())
557    }
558
559    pub fn get_f64(&self) -> f64 {
560        let val = unsafe { self.0.f64 };
561        f64::from_le_bytes(val.to_ne_bytes())
562    }
563
564    pub fn set_f32(&mut self, val: f32) {
565        self.0.f32 = u32::from_ne_bytes(val.to_le_bytes());
566    }
567
568    pub fn set_f64(&mut self, val: f64) {
569        self.0.f64 = u64::from_ne_bytes(val.to_le_bytes());
570    }
571}
572
573/// A `v` register value: vectors.
574#[derive(Copy, Clone)]
575#[cfg(not(pulley_disable_interp_simd))]
576pub struct VRegVal(VRegUnion);
577
578#[cfg(not(pulley_disable_interp_simd))]
579impl fmt::Debug for VRegVal {
580    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
581        f.debug_struct("VRegVal")
582            .field("as_u128", &unsafe { self.0.u128 })
583            .finish()
584    }
585}
586
587#[cfg(not(pulley_disable_interp_simd))]
588impl fmt::LowerHex for VRegVal {
589    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
590        fmt::LowerHex::fmt(unsafe { &self.0.u128 }, f)
591    }
592}
593
594/// 128-bit vector registers.
595///
596/// This register is always stored in little-endian order and has different
597/// constraints than `XRegVal` and `FRegVal` above. Notably all fields of this
598/// union are the same width so all bits are always defined. Note that
599/// little-endian is required though so bitcasts between different shapes of
600/// vectors works. This union cannot be stored in big-endian.
601#[derive(Copy, Clone)]
602#[repr(align(16))]
603#[cfg(not(pulley_disable_interp_simd))]
604union VRegUnion {
605    u128: u128,
606    i8x16: [i8; 16],
607    i16x8: [i16; 8],
608    i32x4: [i32; 4],
609    i64x2: [i64; 2],
610    u8x16: [u8; 16],
611    u16x8: [u16; 8],
612    u32x4: [u32; 4],
613    u64x2: [u64; 2],
614    // Note that these are `u32` and `u64`, not f32/f64. That's only because
615    // f32/f64 don't have `.to_le()` and `::from_le()` so need to go through the
616    // bits anyway.
617    f32x4: [u32; 4],
618    f64x2: [u64; 2],
619}
620
621#[cfg(not(pulley_disable_interp_simd))]
622impl Default for VRegVal {
623    fn default() -> Self {
624        Self(unsafe { mem::zeroed() })
625    }
626}
627
628#[expect(missing_docs, reason = "self-describing methods")]
629#[cfg(not(pulley_disable_interp_simd))]
630impl VRegVal {
631    pub fn new_u128(i: u128) -> Self {
632        let mut val = Self::default();
633        val.set_u128(i);
634        val
635    }
636
637    pub fn get_u128(&self) -> u128 {
638        let val = unsafe { self.0.u128 };
639        u128::from_le(val)
640    }
641
642    pub fn set_u128(&mut self, val: u128) {
643        self.0.u128 = val.to_le();
644    }
645
646    fn get_i8x16(&self) -> [i8; 16] {
647        let val = unsafe { self.0.i8x16 };
648        val.map(|e| i8::from_le(e))
649    }
650
651    fn set_i8x16(&mut self, val: [i8; 16]) {
652        self.0.i8x16 = val.map(|e| e.to_le());
653    }
654
655    fn get_u8x16(&self) -> [u8; 16] {
656        let val = unsafe { self.0.u8x16 };
657        val.map(|e| u8::from_le(e))
658    }
659
660    fn set_u8x16(&mut self, val: [u8; 16]) {
661        self.0.u8x16 = val.map(|e| e.to_le());
662    }
663
664    fn get_i16x8(&self) -> [i16; 8] {
665        let val = unsafe { self.0.i16x8 };
666        val.map(|e| i16::from_le(e))
667    }
668
669    fn set_i16x8(&mut self, val: [i16; 8]) {
670        self.0.i16x8 = val.map(|e| e.to_le());
671    }
672
673    fn get_u16x8(&self) -> [u16; 8] {
674        let val = unsafe { self.0.u16x8 };
675        val.map(|e| u16::from_le(e))
676    }
677
678    fn set_u16x8(&mut self, val: [u16; 8]) {
679        self.0.u16x8 = val.map(|e| e.to_le());
680    }
681
682    fn get_i32x4(&self) -> [i32; 4] {
683        let val = unsafe { self.0.i32x4 };
684        val.map(|e| i32::from_le(e))
685    }
686
687    fn set_i32x4(&mut self, val: [i32; 4]) {
688        self.0.i32x4 = val.map(|e| e.to_le());
689    }
690
691    fn get_u32x4(&self) -> [u32; 4] {
692        let val = unsafe { self.0.u32x4 };
693        val.map(|e| u32::from_le(e))
694    }
695
696    fn set_u32x4(&mut self, val: [u32; 4]) {
697        self.0.u32x4 = val.map(|e| e.to_le());
698    }
699
700    fn get_i64x2(&self) -> [i64; 2] {
701        let val = unsafe { self.0.i64x2 };
702        val.map(|e| i64::from_le(e))
703    }
704
705    fn set_i64x2(&mut self, val: [i64; 2]) {
706        self.0.i64x2 = val.map(|e| e.to_le());
707    }
708
709    fn get_u64x2(&self) -> [u64; 2] {
710        let val = unsafe { self.0.u64x2 };
711        val.map(|e| u64::from_le(e))
712    }
713
714    fn set_u64x2(&mut self, val: [u64; 2]) {
715        self.0.u64x2 = val.map(|e| e.to_le());
716    }
717
718    fn get_f64x2(&self) -> [f64; 2] {
719        let val = unsafe { self.0.f64x2 };
720        val.map(|e| f64::from_bits(u64::from_le(e)))
721    }
722
723    fn set_f64x2(&mut self, val: [f64; 2]) {
724        self.0.f64x2 = val.map(|e| e.to_bits().to_le());
725    }
726
727    fn get_f32x4(&self) -> [f32; 4] {
728        let val = unsafe { self.0.f32x4 };
729        val.map(|e| f32::from_bits(u32::from_le(e)))
730    }
731
732    fn set_f32x4(&mut self, val: [f32; 4]) {
733        self.0.f32x4 = val.map(|e| e.to_bits().to_le());
734    }
735}
736
737/// The machine state for a Pulley virtual machine: the various registers and
738/// stack.
739pub struct MachineState {
740    x_regs: [XRegVal; XReg::RANGE.end as usize],
741    f_regs: [FRegVal; FReg::RANGE.end as usize],
742    #[cfg(not(pulley_disable_interp_simd))]
743    v_regs: [VRegVal; VReg::RANGE.end as usize],
744    fp: *mut u8,
745    lr: *mut u8,
746    stack: Stack,
747    done_reason: Option<DoneReason<()>>,
748}
749
750unsafe impl Send for MachineState {}
751unsafe impl Sync for MachineState {}
752
753/// Helper structure to store the state of the Pulley stack.
754///
755/// The Pulley stack notably needs to be a 16-byte aligned allocation on the
756/// host to ensure that addresses handed out are indeed 16-byte aligned. This is
757/// done with a custom `Vec<T>` internally where `T` has size and align of 16.
758/// This is manually done with a helper `Align16` type below.
759struct Stack {
760    storage: TryVec<Align16>,
761}
762
763/// Helper type used with `Stack` above.
764#[derive(Copy, Clone)]
765#[repr(align(16))]
766struct Align16 {
767    // Just here to give the structure a size of 16. The alignment is always 16
768    // regardless of what the host platform's alignment of u128 is.
769    _unused: u128,
770}
771
772impl Stack {
773    /// Creates a new stack which will have a byte size of at least `size`.
774    ///
775    /// The allocated stack might be slightly larger due to rounding necessary.
776    fn new(size: usize) -> Result<Stack, OutOfMemory> {
777        let mut storage = TryVec::new();
778        // Round up `size` to the nearest multiple of 16. Note that the
779        // stack is also allocated here but not initialized, and that's
780        // intentional as pulley bytecode should always initialize the stack
781        // before use.
782        storage.reserve_exact(size.checked_next_multiple_of(16).unwrap_or(usize::MAX) / 16)?;
783        Ok(Stack { storage })
784    }
785
786    /// Returns a pointer to the top of the stack (the highest address).
787    ///
788    /// Note that the returned pointer has provenance for the entire stack
789    /// allocation, however, not just the top.
790    fn top(&mut self) -> *mut u8 {
791        let len = self.len();
792        unsafe { self.base().add(len) }
793    }
794
795    /// Returns a pointer to the base of the stack (the lowest address).
796    ///
797    /// Note that the returned pointer has provenance for the entire stack
798    /// allocation, however, not just the top.
799    fn base(&mut self) -> *mut u8 {
800        self.storage.as_mut_ptr().cast::<u8>()
801    }
802
803    /// Returns the length, in bytes, of this stack allocation.
804    fn len(&self) -> usize {
805        self.storage.capacity() * mem::size_of::<Align16>()
806    }
807}
808
809impl fmt::Debug for MachineState {
810    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
811        let MachineState {
812            x_regs,
813            f_regs,
814            #[cfg(not(pulley_disable_interp_simd))]
815            v_regs,
816            stack: _,
817            done_reason: _,
818            fp: _,
819            lr: _,
820        } = self;
821
822        struct RegMap<'a, R>(&'a [R], fn(u8) -> alloc::string::String);
823
824        impl<R: fmt::Debug> fmt::Debug for RegMap<'_, R> {
825            fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
826                let mut f = f.debug_map();
827                for (i, r) in self.0.iter().enumerate() {
828                    f.entry(&(self.1)(i as u8), r);
829                }
830                f.finish()
831            }
832        }
833
834        let mut f = f.debug_struct("MachineState");
835
836        f.field(
837            "x_regs",
838            &RegMap(x_regs, |i| XReg::new(i).unwrap().to_string()),
839        )
840        .field(
841            "f_regs",
842            &RegMap(f_regs, |i| FReg::new(i).unwrap().to_string()),
843        );
844        #[cfg(not(pulley_disable_interp_simd))]
845        f.field(
846            "v_regs",
847            &RegMap(v_regs, |i| VReg::new(i).unwrap().to_string()),
848        );
849        f.finish_non_exhaustive()
850    }
851}
852
853macro_rules! index_reg {
854    ($reg_ty:ty,$value_ty:ty,$field:ident) => {
855        impl Index<$reg_ty> for Vm {
856            type Output = $value_ty;
857
858            fn index(&self, reg: $reg_ty) -> &Self::Output {
859                &self.state[reg]
860            }
861        }
862
863        impl IndexMut<$reg_ty> for Vm {
864            fn index_mut(&mut self, reg: $reg_ty) -> &mut Self::Output {
865                &mut self.state[reg]
866            }
867        }
868
869        impl Index<$reg_ty> for MachineState {
870            type Output = $value_ty;
871
872            fn index(&self, reg: $reg_ty) -> &Self::Output {
873                &self.$field[reg.index()]
874            }
875        }
876
877        impl IndexMut<$reg_ty> for MachineState {
878            fn index_mut(&mut self, reg: $reg_ty) -> &mut Self::Output {
879                &mut self.$field[reg.index()]
880            }
881        }
882    };
883}
884
885index_reg!(XReg, XRegVal, x_regs);
886index_reg!(FReg, FRegVal, f_regs);
887#[cfg(not(pulley_disable_interp_simd))]
888index_reg!(VReg, VRegVal, v_regs);
889
890/// Sentinel return address that signals the end of the call stack.
891const HOST_RETURN_ADDR: *mut u8 = usize::MAX as *mut u8;
892
893impl MachineState {
894    fn with_stack(stack_size: usize) -> Result<Self, OutOfMemory> {
895        let mut state = Self {
896            x_regs: [Default::default(); XReg::RANGE.end as usize],
897            f_regs: Default::default(),
898            #[cfg(not(pulley_disable_interp_simd))]
899            v_regs: Default::default(),
900            stack: Stack::new(stack_size)?,
901            done_reason: None,
902            fp: HOST_RETURN_ADDR,
903            lr: HOST_RETURN_ADDR,
904        };
905
906        let sp = state.stack.top();
907        state[XReg::sp] = XRegVal::new_ptr(sp);
908
909        Ok(state)
910    }
911}
912
913/// Inner private module to prevent creation of the `Done` structure outside of
914/// this module.
915mod done {
916    use super::{Encode, Interpreter, MachineState};
917    use core::ops::ControlFlow;
918    use core::ptr::NonNull;
919
920    /// Zero-sized sentinel indicating that pulley execution has halted.
921    ///
922    /// The reason for halting is stored in `MachineState`.
923    #[derive(Copy, Clone, Debug, PartialEq, Eq)]
924    pub struct Done {
925        _priv: (),
926    }
927
928    /// Reason that the pulley interpreter has ceased execution.
929    pub enum DoneReason<T> {
930        /// A trap happened at this bytecode instruction.
931        Trap {
932            /// Which instruction is raising this trap.
933            pc: NonNull<u8>,
934            /// The kind of trap being raised, if known.
935            kind: Option<TrapKind>,
936        },
937        /// The `call_indirect_host` instruction was executed.
938        CallIndirectHost {
939            /// The payload of `call_indirect_host`.
940            id: u8,
941            /// Where to resume execution after the host has finished.
942            resume: NonNull<u8>,
943        },
944        /// Pulley has finished and the provided value is being returned.
945        ReturnToHost(T),
946    }
947
948    /// Stored within `DoneReason::Trap`.
949    #[expect(missing_docs, reason = "self-describing variants")]
950    pub enum TrapKind {
951        DivideByZero,
952        IntegerOverflow,
953        BadConversionToInteger,
954        MemoryOutOfBounds,
955        DisabledOpcode,
956        StackOverflow,
957    }
958
959    impl MachineState {
960        pub(super) fn debug_assert_done_reason_none(&mut self) {
961            debug_assert!(self.done_reason.is_none());
962        }
963
964        pub(super) fn done_decode(&mut self, Done { _priv }: Done) -> DoneReason<()> {
965            self.done_reason.take().unwrap()
966        }
967    }
968
969    impl Interpreter<'_> {
970        /// Finishes execution by recording `DoneReason::Trap`.
971        ///
972        /// This method takes an `I` generic parameter indicating which
973        /// instruction is executing this function and generating a trap. That's
974        /// used to go backwards from the current `pc` which is just beyond the
975        /// instruction to point to the instruction itself in the trap metadata
976        /// returned from the interpreter.
977        #[cold]
978        pub fn done_trap<I: Encode>(&mut self) -> ControlFlow<Done> {
979            self.done_trap_kind::<I>(None)
980        }
981
982        /// Same as `done_trap` but with an explicit `TrapKind`.
983        #[cold]
984        pub fn done_trap_kind<I: Encode>(&mut self, kind: Option<TrapKind>) -> ControlFlow<Done> {
985            let pc = self.current_pc::<I>();
986            self.state.done_reason = Some(DoneReason::Trap { pc, kind });
987            ControlFlow::Break(Done { _priv: () })
988        }
989
990        /// Finishes execution by recording `DoneReason::CallIndirectHost`.
991        #[cold]
992        pub fn done_call_indirect_host(&mut self, id: u8) -> ControlFlow<Done> {
993            self.state.done_reason = Some(DoneReason::CallIndirectHost {
994                id,
995                resume: self.pc.as_ptr(),
996            });
997            ControlFlow::Break(Done { _priv: () })
998        }
999
1000        /// Finishes execution by recording `DoneReason::ReturnToHost`.
1001        #[cold]
1002        pub fn done_return_to_host(&mut self) -> ControlFlow<Done> {
1003            self.state.done_reason = Some(DoneReason::ReturnToHost(()));
1004            ControlFlow::Break(Done { _priv: () })
1005        }
1006    }
1007}
1008
1009use done::Done;
1010pub use done::{DoneReason, TrapKind};
1011
1012struct Interpreter<'a> {
1013    state: &'a mut MachineState,
1014    pc: UnsafeBytecodeStream,
1015    executing_pc: ExecutingPcRef<'a>,
1016}
1017
1018impl Interpreter<'_> {
1019    /// Calculates the `offset` for the current instruction `I`.
1020    #[inline]
1021    fn pc_rel<I: Encode>(&mut self, offset: PcRelOffset) -> NonNull<u8> {
1022        let offset = isize::try_from(i32::from(offset)).unwrap();
1023        unsafe { self.current_pc::<I>().offset(offset) }
1024    }
1025
1026    /// Performs a relative jump of `offset` bytes from the current instruction.
1027    ///
1028    /// This will jump from the start of the current instruction, identified by
1029    /// `I`, `offset` bytes away. Note that the `self.pc` at the start of this
1030    /// function actually points to the instruction after this one so `I` is
1031    /// necessary to go back to ourselves after which we then go `offset` away.
1032    #[inline]
1033    fn pc_rel_jump<I: Encode>(&mut self, offset: PcRelOffset) -> ControlFlow<Done> {
1034        let new_pc = self.pc_rel::<I>(offset);
1035        self.pc = unsafe { UnsafeBytecodeStream::new(new_pc) };
1036        ControlFlow::Continue(())
1037    }
1038
1039    /// Returns the PC of the current instruction where `I` is the static type
1040    /// representing the current instruction.
1041    fn current_pc<I: Encode>(&self) -> NonNull<u8> {
1042        unsafe { self.pc.offset(-isize::from(I::WIDTH)).as_ptr() }
1043    }
1044
1045    /// `sp -= size_of::<T>(); *sp = val;`
1046    ///
1047    /// Note that `I` is the instruction which is pushing data to use if a trap
1048    /// is generated.
1049    #[must_use]
1050    fn push<I: Encode, T>(&mut self, val: T) -> ControlFlow<Done> {
1051        let new_sp = self.state[XReg::sp].get_ptr::<T>().wrapping_sub(1);
1052        self.set_sp::<I>(new_sp.cast())?;
1053        unsafe {
1054            new_sp.write_unaligned(val);
1055        }
1056        ControlFlow::Continue(())
1057    }
1058
1059    /// `ret = *sp; sp -= size_of::<T>()`
1060    fn pop<T>(&mut self) -> T {
1061        let sp = self.state[XReg::sp].get_ptr::<T>();
1062        let val = unsafe { sp.read_unaligned() };
1063        self.set_sp_unchecked(sp.wrapping_add(1));
1064        val
1065    }
1066
1067    /// Sets the stack pointer to the `sp` provided.
1068    ///
1069    /// Returns a trap if this would result in stack overflow, or if `sp` is
1070    /// beneath the base pointer of `self.state.stack`.
1071    ///
1072    /// The `I` parameter here is the instruction that is setting the stack
1073    /// pointer and is used to calculate this instruction's own `pc` if this
1074    /// instruction traps.
1075    #[must_use]
1076    fn set_sp<I: Encode>(&mut self, sp: *mut u8) -> ControlFlow<Done> {
1077        let sp_raw = sp as usize;
1078        let base_raw = self.state.stack.base() as usize;
1079        if sp_raw < base_raw {
1080            return self.done_trap_kind::<I>(Some(TrapKind::StackOverflow));
1081        }
1082        self.set_sp_unchecked(sp);
1083        ControlFlow::Continue(())
1084    }
1085
1086    /// Same as `set_sp` but does not check to see if `sp` is in-bounds. Should
1087    /// only be used with stack increment operations such as `pop`.
1088    fn set_sp_unchecked<T>(&mut self, sp: *mut T) {
1089        if cfg!(debug_assertions) {
1090            let sp_raw = sp as usize;
1091            let base = self.state.stack.base() as usize;
1092            let end = base + self.state.stack.len();
1093            assert!(base <= sp_raw && sp_raw <= end);
1094        }
1095        self.state[XReg::sp].set_ptr(sp);
1096    }
1097
1098    /// Loads a value of `T` using native-endian byte ordering from the `addr`
1099    /// specified.
1100    ///
1101    /// The `I` type parameter is the instruction issuing this load which is
1102    /// used in case of traps to calculate the trapping pc.
1103    ///
1104    /// Returns `ControlFlow::Break` if a trap happens or
1105    /// `ControlFlow::Continue` if the value was loaded successfully.
1106    ///
1107    /// # Unsafety
1108    ///
1109    /// Safety of this method relies on the safety of the original bytecode
1110    /// itself and correctly annotating both `T` and `I`.
1111    #[must_use]
1112    unsafe fn load_ne<T, I: Encode>(&mut self, addr: impl AddressingMode) -> ControlFlow<Done, T> {
1113        unsafe { addr.load_ne::<T, I>(self) }
1114    }
1115
1116    /// Stores a `val` to the `addr` specified.
1117    ///
1118    /// The `I` type parameter is the instruction issuing this store which is
1119    /// used in case of traps to calculate the trapping pc.
1120    ///
1121    /// Returns `ControlFlow::Break` if a trap happens or
1122    /// `ControlFlow::Continue` if the value was stored successfully.
1123    ///
1124    /// # Unsafety
1125    ///
1126    /// Safety of this method relies on the safety of the original bytecode
1127    /// itself and correctly annotating both `T` and `I`.
1128    #[must_use]
1129    unsafe fn store_ne<T, I: Encode>(
1130        &mut self,
1131        addr: impl AddressingMode,
1132        val: T,
1133    ) -> ControlFlow<Done> {
1134        unsafe { addr.store_ne::<T, I>(self, val) }
1135    }
1136
1137    fn check_xnn_from_f32<I: Encode>(
1138        &mut self,
1139        val: f32,
1140        (lo, hi): (f32, f32),
1141    ) -> ControlFlow<Done> {
1142        self.check_xnn_from_f64::<I>(val.into(), (lo.into(), hi.into()))
1143    }
1144
1145    fn check_xnn_from_f64<I: Encode>(
1146        &mut self,
1147        val: f64,
1148        (lo, hi): (f64, f64),
1149    ) -> ControlFlow<Done> {
1150        if val != val {
1151            return self.done_trap_kind::<I>(Some(TrapKind::BadConversionToInteger));
1152        }
1153        let val = val.wasm_trunc();
1154        if val <= lo || val >= hi {
1155            return self.done_trap_kind::<I>(Some(TrapKind::IntegerOverflow));
1156        }
1157        ControlFlow::Continue(())
1158    }
1159
1160    #[cfg(not(pulley_disable_interp_simd))]
1161    fn get_i128(&self, lo: XReg, hi: XReg) -> i128 {
1162        let lo = self.state[lo].get_u64();
1163        let hi = self.state[hi].get_i64();
1164        i128::from(lo) | (i128::from(hi) << 64)
1165    }
1166
1167    #[cfg(not(pulley_disable_interp_simd))]
1168    fn set_i128(&mut self, lo: XReg, hi: XReg, val: i128) {
1169        self.state[lo].set_u64(val as u64);
1170        self.state[hi].set_u64((val >> 64) as u64);
1171    }
1172
1173    fn record_executing_pc_for_profiling(&mut self) {
1174        // Note that this is a no-op if `feature = "profile"` is disabled.
1175        self.executing_pc.record(self.pc.as_ptr().as_ptr() as usize);
1176    }
1177}
1178
1179/// Helper trait to encompass the various addressing modes of Pulley.
1180trait AddressingMode: Sized {
1181    /// Calculates the native host address `*mut T` corresponding to this
1182    /// addressing mode.
1183    ///
1184    /// # Safety
1185    ///
1186    /// Relies on the original bytecode being safe to execute as this will
1187    /// otherwise perform unsafe byte offsets for example which requires the
1188    /// original bytecode to be correct.
1189    #[must_use]
1190    unsafe fn addr<T, I: Encode>(self, i: &mut Interpreter<'_>) -> ControlFlow<Done, *mut T>;
1191
1192    /// Loads a value of `T` from this address, using native-endian byte order.
1193    ///
1194    /// For more information see [`Interpreter::load_ne`].
1195    #[must_use]
1196    unsafe fn load_ne<T, I: Encode>(self, i: &mut Interpreter<'_>) -> ControlFlow<Done, T> {
1197        let ret = unsafe { self.addr::<T, I>(i)?.read_unaligned() };
1198        ControlFlow::Continue(ret)
1199    }
1200
1201    /// Stores a `val` to this address, using native-endian byte order.
1202    ///
1203    /// For more information see [`Interpreter::store_ne`].
1204    #[must_use]
1205    unsafe fn store_ne<T, I: Encode>(self, i: &mut Interpreter<'_>, val: T) -> ControlFlow<Done> {
1206        unsafe {
1207            self.addr::<T, I>(i)?.write_unaligned(val);
1208        }
1209        ControlFlow::Continue(())
1210    }
1211}
1212
1213impl AddressingMode for AddrO32 {
1214    unsafe fn addr<T, I: Encode>(self, i: &mut Interpreter<'_>) -> ControlFlow<Done, *mut T> {
1215        // Note that this addressing mode cannot return `ControlFlow::Break`
1216        // which is intentional. It's expected that LLVM optimizes away any
1217        // branches callers have.
1218        unsafe {
1219            ControlFlow::Continue(
1220                i.state[self.addr]
1221                    .get_ptr::<T>()
1222                    .byte_offset(self.offset as isize),
1223            )
1224        }
1225    }
1226}
1227
1228impl AddressingMode for AddrZ {
1229    unsafe fn addr<T, I: Encode>(self, i: &mut Interpreter<'_>) -> ControlFlow<Done, *mut T> {
1230        // This addressing mode defines loading/storing to the null address as
1231        // a trap, but all other addresses are allowed.
1232        let host_addr = i.state[self.addr].get_ptr::<T>();
1233        if host_addr.is_null() {
1234            i.done_trap_kind::<I>(Some(TrapKind::MemoryOutOfBounds))?;
1235            unreachable!();
1236        }
1237        unsafe {
1238            let addr = host_addr.byte_offset(self.offset as isize);
1239            ControlFlow::Continue(addr)
1240        }
1241    }
1242}
1243
1244impl AddressingMode for AddrG32 {
1245    unsafe fn addr<T, I: Encode>(self, i: &mut Interpreter<'_>) -> ControlFlow<Done, *mut T> {
1246        // Test if `bound - offset - T` is less than the wasm address to
1247        // generate a trap. It's a guarantee of this instruction that these
1248        // subtractions don't overflow.
1249        let bound = i.state[self.host_heap_bound].get_u64() as usize;
1250        let offset = usize::from(self.offset);
1251        let wasm_addr = i.state[self.wasm_addr].get_u32() as usize;
1252        if wasm_addr > bound - offset - size_of::<T>() {
1253            i.done_trap_kind::<I>(Some(TrapKind::MemoryOutOfBounds))?;
1254            unreachable!();
1255        }
1256        unsafe {
1257            let addr = i.state[self.host_heap_base]
1258                .get_ptr::<T>()
1259                .byte_add(wasm_addr)
1260                .byte_add(offset);
1261            ControlFlow::Continue(addr)
1262        }
1263    }
1264}
1265
1266impl AddressingMode for AddrG32Bne {
1267    unsafe fn addr<T, I: Encode>(self, i: &mut Interpreter<'_>) -> ControlFlow<Done, *mut T> {
1268        // Same as `AddrG32` above except that the bound is loaded from memory.
1269        let bound = unsafe {
1270            *i.state[self.host_heap_bound_addr]
1271                .get_ptr::<usize>()
1272                .byte_add(usize::from(self.host_heap_bound_offset))
1273        };
1274        let wasm_addr = i.state[self.wasm_addr].get_u32() as usize;
1275        let offset = usize::from(self.offset);
1276        if wasm_addr > bound - offset - size_of::<T>() {
1277            i.done_trap_kind::<I>(Some(TrapKind::MemoryOutOfBounds))?;
1278            unreachable!();
1279        }
1280        unsafe {
1281            let addr = i.state[self.host_heap_base]
1282                .get_ptr::<T>()
1283                .byte_add(wasm_addr)
1284                .byte_add(offset);
1285            ControlFlow::Continue(addr)
1286        }
1287    }
1288}
1289
1290#[test]
1291fn simple_push_pop() {
1292    let mut state = MachineState::with_stack(16).unwrap();
1293    let pc = ExecutingPc::default();
1294    unsafe {
1295        let mut bytecode = [0; 10];
1296        let mut i = Interpreter {
1297            state: &mut state,
1298            // this isn't actually read so just manufacture a dummy one
1299            pc: UnsafeBytecodeStream::new(NonNull::new(bytecode.as_mut_ptr().offset(4)).unwrap()),
1300            executing_pc: pc.as_ref(),
1301        };
1302        assert!(i.push::<crate::Ret, _>(0_i32).is_continue());
1303        assert_eq!(i.pop::<i32>(), 0_i32);
1304        assert!(i.push::<crate::Ret, _>(1_i32).is_continue());
1305        assert!(i.push::<crate::Ret, _>(2_i32).is_continue());
1306        assert!(i.push::<crate::Ret, _>(3_i32).is_continue());
1307        assert!(i.push::<crate::Ret, _>(4_i32).is_continue());
1308        assert!(i.push::<crate::Ret, _>(5_i32).is_break());
1309        assert!(i.push::<crate::Ret, _>(6_i32).is_break());
1310        assert_eq!(i.pop::<i32>(), 4_i32);
1311        assert_eq!(i.pop::<i32>(), 3_i32);
1312        assert_eq!(i.pop::<i32>(), 2_i32);
1313        assert_eq!(i.pop::<i32>(), 1_i32);
1314    }
1315}
1316
1317macro_rules! br_if_imm {
1318    ($(
1319        fn $snake:ident(&mut self, a: XReg, b: $imm:ident, offset: PcRelOffset)
1320            = $camel:ident / $op:tt / $get:ident;
1321    )*) => {$(
1322        fn $snake(&mut self, a: XReg, b: $imm, offset: PcRelOffset) -> ControlFlow<Done> {
1323            let a = self.state[a].$get();
1324            if a $op b.into() {
1325                self.pc_rel_jump::<crate::$camel>(offset)
1326            } else {
1327                ControlFlow::Continue(())
1328            }
1329        }
1330    )*};
1331}
1332
1333impl OpVisitor for Interpreter<'_> {
1334    type BytecodeStream = UnsafeBytecodeStream;
1335    type Return = ControlFlow<Done>;
1336
1337    fn bytecode(&mut self) -> &mut UnsafeBytecodeStream {
1338        &mut self.pc
1339    }
1340
1341    fn nop(&mut self) -> ControlFlow<Done> {
1342        ControlFlow::Continue(())
1343    }
1344
1345    fn ret(&mut self) -> ControlFlow<Done> {
1346        let lr = self.state.lr;
1347        if lr == HOST_RETURN_ADDR {
1348            self.done_return_to_host()
1349        } else {
1350            self.pc = unsafe { UnsafeBytecodeStream::new(NonNull::new_unchecked(lr)) };
1351            ControlFlow::Continue(())
1352        }
1353    }
1354
1355    fn call(&mut self, offset: PcRelOffset) -> ControlFlow<Done> {
1356        let return_addr = self.pc.as_ptr();
1357        self.state.lr = return_addr.as_ptr();
1358        self.pc_rel_jump::<crate::Call>(offset)
1359    }
1360
1361    fn call1(&mut self, arg1: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1362        let return_addr = self.pc.as_ptr();
1363        self.state.lr = return_addr.as_ptr();
1364        self.state[XReg::x0] = self.state[arg1];
1365        self.pc_rel_jump::<crate::Call1>(offset)
1366    }
1367
1368    fn call2(&mut self, arg1: XReg, arg2: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1369        let return_addr = self.pc.as_ptr();
1370        self.state.lr = return_addr.as_ptr();
1371        let (x0, x1) = (self.state[arg1], self.state[arg2]);
1372        self.state[XReg::x0] = x0;
1373        self.state[XReg::x1] = x1;
1374        self.pc_rel_jump::<crate::Call2>(offset)
1375    }
1376
1377    fn call3(
1378        &mut self,
1379        arg1: XReg,
1380        arg2: XReg,
1381        arg3: XReg,
1382        offset: PcRelOffset,
1383    ) -> ControlFlow<Done> {
1384        let return_addr = self.pc.as_ptr();
1385        self.state.lr = return_addr.as_ptr();
1386        let (x0, x1, x2) = (self.state[arg1], self.state[arg2], self.state[arg3]);
1387        self.state[XReg::x0] = x0;
1388        self.state[XReg::x1] = x1;
1389        self.state[XReg::x2] = x2;
1390        self.pc_rel_jump::<crate::Call3>(offset)
1391    }
1392
1393    fn call4(
1394        &mut self,
1395        arg1: XReg,
1396        arg2: XReg,
1397        arg3: XReg,
1398        arg4: XReg,
1399        offset: PcRelOffset,
1400    ) -> ControlFlow<Done> {
1401        let return_addr = self.pc.as_ptr();
1402        self.state.lr = return_addr.as_ptr();
1403        let (x0, x1, x2, x3) = (
1404            self.state[arg1],
1405            self.state[arg2],
1406            self.state[arg3],
1407            self.state[arg4],
1408        );
1409        self.state[XReg::x0] = x0;
1410        self.state[XReg::x1] = x1;
1411        self.state[XReg::x2] = x2;
1412        self.state[XReg::x3] = x3;
1413        self.pc_rel_jump::<crate::Call4>(offset)
1414    }
1415
1416    fn call_indirect(&mut self, dst: XReg) -> ControlFlow<Done> {
1417        let return_addr = self.pc.as_ptr();
1418        self.state.lr = return_addr.as_ptr();
1419        // SAFETY: part of the unsafe contract of the interpreter is only valid
1420        // bytecode is interpreted, so the jump destination is part of the validity
1421        // of the bytecode itself.
1422        unsafe {
1423            self.pc = UnsafeBytecodeStream::new(NonNull::new_unchecked(self.state[dst].get_ptr()));
1424        }
1425        ControlFlow::Continue(())
1426    }
1427
1428    fn jump(&mut self, offset: PcRelOffset) -> ControlFlow<Done> {
1429        self.pc_rel_jump::<crate::Jump>(offset)
1430    }
1431
1432    fn xjump(&mut self, reg: XReg) -> ControlFlow<Done> {
1433        unsafe {
1434            self.pc = UnsafeBytecodeStream::new(NonNull::new_unchecked(self.state[reg].get_ptr()));
1435        }
1436        ControlFlow::Continue(())
1437    }
1438
1439    fn br_if32(&mut self, cond: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1440        let cond = self.state[cond].get_u32();
1441        if cond != 0 {
1442            self.pc_rel_jump::<crate::BrIf>(offset)
1443        } else {
1444            ControlFlow::Continue(())
1445        }
1446    }
1447
1448    fn br_if_not32(&mut self, cond: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1449        let cond = self.state[cond].get_u32();
1450        if cond == 0 {
1451            self.pc_rel_jump::<crate::BrIfNot>(offset)
1452        } else {
1453            ControlFlow::Continue(())
1454        }
1455    }
1456
1457    fn br_if_xeq32(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1458        let a = self.state[a].get_u32();
1459        let b = self.state[b].get_u32();
1460        if a == b {
1461            self.pc_rel_jump::<crate::BrIfXeq32>(offset)
1462        } else {
1463            ControlFlow::Continue(())
1464        }
1465    }
1466
1467    fn br_if_xneq32(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1468        let a = self.state[a].get_u32();
1469        let b = self.state[b].get_u32();
1470        if a != b {
1471            self.pc_rel_jump::<crate::BrIfXneq32>(offset)
1472        } else {
1473            ControlFlow::Continue(())
1474        }
1475    }
1476
1477    fn br_if_xslt32(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1478        let a = self.state[a].get_i32();
1479        let b = self.state[b].get_i32();
1480        if a < b {
1481            self.pc_rel_jump::<crate::BrIfXslt32>(offset)
1482        } else {
1483            ControlFlow::Continue(())
1484        }
1485    }
1486
1487    fn br_if_xslteq32(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1488        let a = self.state[a].get_i32();
1489        let b = self.state[b].get_i32();
1490        if a <= b {
1491            self.pc_rel_jump::<crate::BrIfXslteq32>(offset)
1492        } else {
1493            ControlFlow::Continue(())
1494        }
1495    }
1496
1497    fn br_if_xult32(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1498        let a = self.state[a].get_u32();
1499        let b = self.state[b].get_u32();
1500        if a < b {
1501            self.pc_rel_jump::<crate::BrIfXult32>(offset)
1502        } else {
1503            ControlFlow::Continue(())
1504        }
1505    }
1506
1507    fn br_if_xulteq32(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1508        let a = self.state[a].get_u32();
1509        let b = self.state[b].get_u32();
1510        if a <= b {
1511            self.pc_rel_jump::<crate::BrIfXulteq32>(offset)
1512        } else {
1513            ControlFlow::Continue(())
1514        }
1515    }
1516
1517    fn br_if_xeq64(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1518        let a = self.state[a].get_u64();
1519        let b = self.state[b].get_u64();
1520        if a == b {
1521            self.pc_rel_jump::<crate::BrIfXeq64>(offset)
1522        } else {
1523            ControlFlow::Continue(())
1524        }
1525    }
1526
1527    fn br_if_xneq64(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1528        let a = self.state[a].get_u64();
1529        let b = self.state[b].get_u64();
1530        if a != b {
1531            self.pc_rel_jump::<crate::BrIfXneq64>(offset)
1532        } else {
1533            ControlFlow::Continue(())
1534        }
1535    }
1536
1537    fn br_if_xslt64(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1538        let a = self.state[a].get_i64();
1539        let b = self.state[b].get_i64();
1540        if a < b {
1541            self.pc_rel_jump::<crate::BrIfXslt64>(offset)
1542        } else {
1543            ControlFlow::Continue(())
1544        }
1545    }
1546
1547    fn br_if_xslteq64(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1548        let a = self.state[a].get_i64();
1549        let b = self.state[b].get_i64();
1550        if a <= b {
1551            self.pc_rel_jump::<crate::BrIfXslteq64>(offset)
1552        } else {
1553            ControlFlow::Continue(())
1554        }
1555    }
1556
1557    fn br_if_xult64(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1558        let a = self.state[a].get_u64();
1559        let b = self.state[b].get_u64();
1560        if a < b {
1561            self.pc_rel_jump::<crate::BrIfXult64>(offset)
1562        } else {
1563            ControlFlow::Continue(())
1564        }
1565    }
1566
1567    fn br_if_xulteq64(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1568        let a = self.state[a].get_u64();
1569        let b = self.state[b].get_u64();
1570        if a <= b {
1571            self.pc_rel_jump::<crate::BrIfXulteq64>(offset)
1572        } else {
1573            ControlFlow::Continue(())
1574        }
1575    }
1576
1577    br_if_imm! {
1578        fn br_if_xeq32_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1579            = BrIfXeq32I8 / == / get_i32;
1580        fn br_if_xeq32_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1581            = BrIfXeq32I32 / == / get_i32;
1582        fn br_if_xneq32_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1583            = BrIfXneq32I8 / != / get_i32;
1584        fn br_if_xneq32_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1585            = BrIfXneq32I32 / != / get_i32;
1586
1587        fn br_if_xslt32_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1588            = BrIfXslt32I8 / < / get_i32;
1589        fn br_if_xslt32_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1590            = BrIfXslt32I32 / < / get_i32;
1591        fn br_if_xsgt32_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1592            = BrIfXsgt32I8 / > / get_i32;
1593        fn br_if_xsgt32_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1594            = BrIfXsgt32I32 / > / get_i32;
1595        fn br_if_xslteq32_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1596            = BrIfXslteq32I8 / <= / get_i32;
1597        fn br_if_xslteq32_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1598            = BrIfXslteq32I32 / <= / get_i32;
1599        fn br_if_xsgteq32_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1600            = BrIfXsgteq32I8 / >= / get_i32;
1601        fn br_if_xsgteq32_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1602            = BrIfXsgteq32I32 / >= / get_i32;
1603
1604        fn br_if_xult32_u8(&mut self, a: XReg, b: u8, offset: PcRelOffset)
1605            = BrIfXult32U8 / < / get_u32;
1606        fn br_if_xult32_u32(&mut self, a: XReg, b: u32, offset: PcRelOffset)
1607            = BrIfXult32U32 / < / get_u32;
1608        fn br_if_xugt32_u8(&mut self, a: XReg, b: u8, offset: PcRelOffset)
1609            = BrIfXugt32U8 / > / get_u32;
1610        fn br_if_xugt32_u32(&mut self, a: XReg, b: u32, offset: PcRelOffset)
1611            = BrIfXugt32U32 / > / get_u32;
1612        fn br_if_xulteq32_u8(&mut self, a: XReg, b: u8, offset: PcRelOffset)
1613            = BrIfXulteq32U8 / <= / get_u32;
1614        fn br_if_xulteq32_u32(&mut self, a: XReg, b: u32, offset: PcRelOffset)
1615            = BrIfXulteq32U32 / <= / get_u32;
1616        fn br_if_xugteq32_u8(&mut self, a: XReg, b: u8, offset: PcRelOffset)
1617            = BrIfXugteq32U8 / >= / get_u32;
1618        fn br_if_xugteq32_u32(&mut self, a: XReg, b: u32, offset: PcRelOffset)
1619            = BrIfXugteq32U32 / >= / get_u32;
1620
1621        fn br_if_xeq64_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1622            = BrIfXeq64I8 / == / get_i64;
1623        fn br_if_xeq64_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1624            = BrIfXeq64I32 / == / get_i64;
1625        fn br_if_xneq64_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1626            = BrIfXneq64I8 / != / get_i64;
1627        fn br_if_xneq64_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1628            = BrIfXneq64I32 / != / get_i64;
1629
1630        fn br_if_xslt64_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1631            = BrIfXslt64I8 / < / get_i64;
1632        fn br_if_xslt64_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1633            = BrIfXslt64I32 / < / get_i64;
1634        fn br_if_xsgt64_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1635            = BrIfXsgt64I8 / > / get_i64;
1636        fn br_if_xsgt64_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1637            = BrIfXsgt64I32 / > / get_i64;
1638        fn br_if_xslteq64_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1639            = BrIfXslteq64I8 / <= / get_i64;
1640        fn br_if_xslteq64_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1641            = BrIfXslteq64I32 / <= / get_i64;
1642        fn br_if_xsgteq64_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1643            = BrIfXsgteq64I8 / >= / get_i64;
1644        fn br_if_xsgteq64_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1645            = BrIfXsgteq64I32 / >= / get_i64;
1646
1647        fn br_if_xult64_u8(&mut self, a: XReg, b: u8, offset: PcRelOffset)
1648            = BrIfXult64U8 / < / get_u64;
1649        fn br_if_xult64_u32(&mut self, a: XReg, b: u32, offset: PcRelOffset)
1650            = BrIfXult64U32 / < / get_u64;
1651        fn br_if_xugt64_u8(&mut self, a: XReg, b: u8, offset: PcRelOffset)
1652            = BrIfXugt64U8 / > / get_u64;
1653        fn br_if_xugt64_u32(&mut self, a: XReg, b: u32, offset: PcRelOffset)
1654            = BrIfXugt64U32 / > / get_u64;
1655        fn br_if_xulteq64_u8(&mut self, a: XReg, b: u8, offset: PcRelOffset)
1656            = BrIfXulteq64U8 / <= / get_u64;
1657        fn br_if_xulteq64_u32(&mut self, a: XReg, b: u32, offset: PcRelOffset)
1658            = BrIfXulteq64U32 / <= / get_u64;
1659        fn br_if_xugteq64_u8(&mut self, a: XReg, b: u8, offset: PcRelOffset)
1660            = BrIfXugteq64U8 / >= / get_u64;
1661        fn br_if_xugteq64_u32(&mut self, a: XReg, b: u32, offset: PcRelOffset)
1662            = BrIfXugteq64U32 / >= / get_u64;
1663    }
1664
1665    fn xmov(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
1666        let val = self.state[src];
1667        self.state[dst] = val;
1668        ControlFlow::Continue(())
1669    }
1670
1671    fn xconst8(&mut self, dst: XReg, imm: i8) -> ControlFlow<Done> {
1672        self.state[dst].set_i64(i64::from(imm));
1673        ControlFlow::Continue(())
1674    }
1675
1676    fn xzero(&mut self, dst: XReg) -> ControlFlow<Done> {
1677        self.state[dst].set_i64(0);
1678        ControlFlow::Continue(())
1679    }
1680
1681    fn xone(&mut self, dst: XReg) -> ControlFlow<Done> {
1682        self.state[dst].set_i64(1);
1683        ControlFlow::Continue(())
1684    }
1685
1686    fn xconst16(&mut self, dst: XReg, imm: i16) -> ControlFlow<Done> {
1687        self.state[dst].set_i64(i64::from(imm));
1688        ControlFlow::Continue(())
1689    }
1690
1691    fn xconst32(&mut self, dst: XReg, imm: i32) -> ControlFlow<Done> {
1692        self.state[dst].set_i64(i64::from(imm));
1693        ControlFlow::Continue(())
1694    }
1695
1696    fn xconst64(&mut self, dst: XReg, imm: i64) -> ControlFlow<Done> {
1697        self.state[dst].set_i64(imm);
1698        ControlFlow::Continue(())
1699    }
1700
1701    fn xadd32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1702        let a = self.state[operands.src1].get_u32();
1703        let b = self.state[operands.src2].get_u32();
1704        self.state[operands.dst].set_u32(a.wrapping_add(b));
1705        ControlFlow::Continue(())
1706    }
1707
1708    fn xadd32_u8(&mut self, dst: XReg, src1: XReg, src2: u8) -> ControlFlow<Done> {
1709        self.xadd32_u32(dst, src1, src2.into())
1710    }
1711
1712    fn xadd32_u32(&mut self, dst: XReg, src1: XReg, src2: u32) -> ControlFlow<Done> {
1713        let a = self.state[src1].get_u32();
1714        self.state[dst].set_u32(a.wrapping_add(src2));
1715        ControlFlow::Continue(())
1716    }
1717
1718    fn xadd64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1719        let a = self.state[operands.src1].get_u64();
1720        let b = self.state[operands.src2].get_u64();
1721        self.state[operands.dst].set_u64(a.wrapping_add(b));
1722        ControlFlow::Continue(())
1723    }
1724
1725    fn xadd64_u8(&mut self, dst: XReg, src1: XReg, src2: u8) -> ControlFlow<Done> {
1726        self.xadd64_u32(dst, src1, src2.into())
1727    }
1728
1729    fn xadd64_u32(&mut self, dst: XReg, src1: XReg, src2: u32) -> ControlFlow<Done> {
1730        let a = self.state[src1].get_u64();
1731        self.state[dst].set_u64(a.wrapping_add(src2.into()));
1732        ControlFlow::Continue(())
1733    }
1734
1735    fn xmadd32(&mut self, dst: XReg, src1: XReg, src2: XReg, src3: XReg) -> ControlFlow<Done> {
1736        let a = self.state[src1].get_u32();
1737        let b = self.state[src2].get_u32();
1738        let c = self.state[src3].get_u32();
1739        self.state[dst].set_u32(a.wrapping_mul(b).wrapping_add(c));
1740        ControlFlow::Continue(())
1741    }
1742
1743    fn xmadd64(&mut self, dst: XReg, src1: XReg, src2: XReg, src3: XReg) -> ControlFlow<Done> {
1744        let a = self.state[src1].get_u64();
1745        let b = self.state[src2].get_u64();
1746        let c = self.state[src3].get_u64();
1747        self.state[dst].set_u64(a.wrapping_mul(b).wrapping_add(c));
1748        ControlFlow::Continue(())
1749    }
1750
1751    fn xsub32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1752        let a = self.state[operands.src1].get_u32();
1753        let b = self.state[operands.src2].get_u32();
1754        self.state[operands.dst].set_u32(a.wrapping_sub(b));
1755        ControlFlow::Continue(())
1756    }
1757
1758    fn xsub32_u8(&mut self, dst: XReg, src1: XReg, src2: u8) -> ControlFlow<Done> {
1759        self.xsub32_u32(dst, src1, src2.into())
1760    }
1761
1762    fn xsub32_u32(&mut self, dst: XReg, src1: XReg, src2: u32) -> ControlFlow<Done> {
1763        let a = self.state[src1].get_u32();
1764        self.state[dst].set_u32(a.wrapping_sub(src2));
1765        ControlFlow::Continue(())
1766    }
1767
1768    fn xsub64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1769        let a = self.state[operands.src1].get_u64();
1770        let b = self.state[operands.src2].get_u64();
1771        self.state[operands.dst].set_u64(a.wrapping_sub(b));
1772        ControlFlow::Continue(())
1773    }
1774
1775    fn xsub64_u8(&mut self, dst: XReg, src1: XReg, src2: u8) -> ControlFlow<Done> {
1776        self.xsub64_u32(dst, src1, src2.into())
1777    }
1778
1779    fn xsub64_u32(&mut self, dst: XReg, src1: XReg, src2: u32) -> ControlFlow<Done> {
1780        let a = self.state[src1].get_u64();
1781        self.state[dst].set_u64(a.wrapping_sub(src2.into()));
1782        ControlFlow::Continue(())
1783    }
1784
1785    fn xmul32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1786        let a = self.state[operands.src1].get_u32();
1787        let b = self.state[operands.src2].get_u32();
1788        self.state[operands.dst].set_u32(a.wrapping_mul(b));
1789        ControlFlow::Continue(())
1790    }
1791
1792    fn xmul32_s8(&mut self, dst: XReg, src1: XReg, src2: i8) -> ControlFlow<Done> {
1793        self.xmul32_s32(dst, src1, src2.into())
1794    }
1795
1796    fn xmul32_s32(&mut self, dst: XReg, src1: XReg, src2: i32) -> ControlFlow<Done> {
1797        let a = self.state[src1].get_i32();
1798        self.state[dst].set_i32(a.wrapping_mul(src2));
1799        ControlFlow::Continue(())
1800    }
1801
1802    fn xmul64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1803        let a = self.state[operands.src1].get_u64();
1804        let b = self.state[operands.src2].get_u64();
1805        self.state[operands.dst].set_u64(a.wrapping_mul(b));
1806        ControlFlow::Continue(())
1807    }
1808
1809    fn xmul64_s8(&mut self, dst: XReg, src1: XReg, src2: i8) -> ControlFlow<Done> {
1810        self.xmul64_s32(dst, src1, src2.into())
1811    }
1812
1813    fn xmul64_s32(&mut self, dst: XReg, src1: XReg, src2: i32) -> ControlFlow<Done> {
1814        let a = self.state[src1].get_i64();
1815        self.state[dst].set_i64(a.wrapping_mul(src2.into()));
1816        ControlFlow::Continue(())
1817    }
1818
1819    fn xshl32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1820        let a = self.state[operands.src1].get_u32();
1821        let b = self.state[operands.src2].get_u32();
1822        self.state[operands.dst].set_u32(a.wrapping_shl(b));
1823        ControlFlow::Continue(())
1824    }
1825
1826    fn xshr32_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1827        let a = self.state[operands.src1].get_u32();
1828        let b = self.state[operands.src2].get_u32();
1829        self.state[operands.dst].set_u32(a.wrapping_shr(b));
1830        ControlFlow::Continue(())
1831    }
1832
1833    fn xshr32_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1834        let a = self.state[operands.src1].get_i32();
1835        let b = self.state[operands.src2].get_u32();
1836        self.state[operands.dst].set_i32(a.wrapping_shr(b));
1837        ControlFlow::Continue(())
1838    }
1839
1840    fn xshl64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1841        let a = self.state[operands.src1].get_u64();
1842        let b = self.state[operands.src2].get_u32();
1843        self.state[operands.dst].set_u64(a.wrapping_shl(b));
1844        ControlFlow::Continue(())
1845    }
1846
1847    fn xshr64_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1848        let a = self.state[operands.src1].get_u64();
1849        let b = self.state[operands.src2].get_u32();
1850        self.state[operands.dst].set_u64(a.wrapping_shr(b));
1851        ControlFlow::Continue(())
1852    }
1853
1854    fn xshr64_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1855        let a = self.state[operands.src1].get_i64();
1856        let b = self.state[operands.src2].get_u32();
1857        self.state[operands.dst].set_i64(a.wrapping_shr(b));
1858        ControlFlow::Continue(())
1859    }
1860
1861    fn xshl32_u6(&mut self, operands: BinaryOperands<XReg, XReg, U6>) -> ControlFlow<Done> {
1862        let a = self.state[operands.src1].get_u32();
1863        let b = u32::from(u8::from(operands.src2));
1864        self.state[operands.dst].set_u32(a.wrapping_shl(b));
1865        ControlFlow::Continue(())
1866    }
1867
1868    fn xshr32_u_u6(&mut self, operands: BinaryOperands<XReg, XReg, U6>) -> ControlFlow<Done> {
1869        let a = self.state[operands.src1].get_u32();
1870        let b = u32::from(u8::from(operands.src2));
1871        self.state[operands.dst].set_u32(a.wrapping_shr(b));
1872        ControlFlow::Continue(())
1873    }
1874
1875    fn xshr32_s_u6(&mut self, operands: BinaryOperands<XReg, XReg, U6>) -> ControlFlow<Done> {
1876        let a = self.state[operands.src1].get_i32();
1877        let b = u32::from(u8::from(operands.src2));
1878        self.state[operands.dst].set_i32(a.wrapping_shr(b));
1879        ControlFlow::Continue(())
1880    }
1881
1882    fn xshl64_u6(&mut self, operands: BinaryOperands<XReg, XReg, U6>) -> ControlFlow<Done> {
1883        let a = self.state[operands.src1].get_u64();
1884        let b = u32::from(u8::from(operands.src2));
1885        self.state[operands.dst].set_u64(a.wrapping_shl(b));
1886        ControlFlow::Continue(())
1887    }
1888
1889    fn xshr64_u_u6(&mut self, operands: BinaryOperands<XReg, XReg, U6>) -> ControlFlow<Done> {
1890        let a = self.state[operands.src1].get_u64();
1891        let b = u32::from(u8::from(operands.src2));
1892        self.state[operands.dst].set_u64(a.wrapping_shr(b));
1893        ControlFlow::Continue(())
1894    }
1895
1896    fn xshr64_s_u6(&mut self, operands: BinaryOperands<XReg, XReg, U6>) -> ControlFlow<Done> {
1897        let a = self.state[operands.src1].get_i64();
1898        let b = u32::from(u8::from(operands.src2));
1899        self.state[operands.dst].set_i64(a.wrapping_shr(b));
1900        ControlFlow::Continue(())
1901    }
1902
1903    fn xneg32(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
1904        let a = self.state[src].get_i32();
1905        self.state[dst].set_i32(a.wrapping_neg());
1906        ControlFlow::Continue(())
1907    }
1908
1909    fn xneg64(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
1910        let a = self.state[src].get_i64();
1911        self.state[dst].set_i64(a.wrapping_neg());
1912        ControlFlow::Continue(())
1913    }
1914
1915    fn xeq64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1916        let a = self.state[operands.src1].get_u64();
1917        let b = self.state[operands.src2].get_u64();
1918        self.state[operands.dst].set_u32(u32::from(a == b));
1919        ControlFlow::Continue(())
1920    }
1921
1922    fn xneq64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1923        let a = self.state[operands.src1].get_u64();
1924        let b = self.state[operands.src2].get_u64();
1925        self.state[operands.dst].set_u32(u32::from(a != b));
1926        ControlFlow::Continue(())
1927    }
1928
1929    fn xslt64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1930        let a = self.state[operands.src1].get_i64();
1931        let b = self.state[operands.src2].get_i64();
1932        self.state[operands.dst].set_u32(u32::from(a < b));
1933        ControlFlow::Continue(())
1934    }
1935
1936    fn xslteq64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1937        let a = self.state[operands.src1].get_i64();
1938        let b = self.state[operands.src2].get_i64();
1939        self.state[operands.dst].set_u32(u32::from(a <= b));
1940        ControlFlow::Continue(())
1941    }
1942
1943    fn xult64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1944        let a = self.state[operands.src1].get_u64();
1945        let b = self.state[operands.src2].get_u64();
1946        self.state[operands.dst].set_u32(u32::from(a < b));
1947        ControlFlow::Continue(())
1948    }
1949
1950    fn xulteq64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1951        let a = self.state[operands.src1].get_u64();
1952        let b = self.state[operands.src2].get_u64();
1953        self.state[operands.dst].set_u32(u32::from(a <= b));
1954        ControlFlow::Continue(())
1955    }
1956
1957    fn xeq32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1958        let a = self.state[operands.src1].get_u32();
1959        let b = self.state[operands.src2].get_u32();
1960        self.state[operands.dst].set_u32(u32::from(a == b));
1961        ControlFlow::Continue(())
1962    }
1963
1964    fn xneq32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1965        let a = self.state[operands.src1].get_u32();
1966        let b = self.state[operands.src2].get_u32();
1967        self.state[operands.dst].set_u32(u32::from(a != b));
1968        ControlFlow::Continue(())
1969    }
1970
1971    fn xslt32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1972        let a = self.state[operands.src1].get_i32();
1973        let b = self.state[operands.src2].get_i32();
1974        self.state[operands.dst].set_u32(u32::from(a < b));
1975        ControlFlow::Continue(())
1976    }
1977
1978    fn xslteq32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1979        let a = self.state[operands.src1].get_i32();
1980        let b = self.state[operands.src2].get_i32();
1981        self.state[operands.dst].set_u32(u32::from(a <= b));
1982        ControlFlow::Continue(())
1983    }
1984
1985    fn xult32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1986        let a = self.state[operands.src1].get_u32();
1987        let b = self.state[operands.src2].get_u32();
1988        self.state[operands.dst].set_u32(u32::from(a < b));
1989        ControlFlow::Continue(())
1990    }
1991
1992    fn xulteq32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1993        let a = self.state[operands.src1].get_u32();
1994        let b = self.state[operands.src2].get_u32();
1995        self.state[operands.dst].set_u32(u32::from(a <= b));
1996        ControlFlow::Continue(())
1997    }
1998
1999    fn push_frame(&mut self) -> ControlFlow<Done> {
2000        self.push::<crate::PushFrame, _>(self.state.lr)?;
2001        self.push::<crate::PushFrame, _>(self.state.fp)?;
2002        self.state.fp = self.state[XReg::sp].get_ptr();
2003        ControlFlow::Continue(())
2004    }
2005
2006    #[inline]
2007    fn push_frame_save(&mut self, amt: u16, regs: UpperRegSet<XReg>) -> ControlFlow<Done> {
2008        // Decrement the stack pointer `amt` bytes plus 2 pointers more for
2009        // fp/lr.
2010        let ptr_size = size_of::<usize>();
2011        let full_amt = usize::from(amt) + 2 * ptr_size;
2012        let new_sp = self.state[XReg::sp].get_ptr::<u8>().wrapping_sub(full_amt);
2013        self.set_sp::<crate::PushFrameSave>(new_sp)?;
2014
2015        unsafe {
2016            // Emulate `push_frame` by placing `lr` and `fp` onto the stack, in
2017            // that order, at the top of the allocated area.
2018            self.store_ne::<_, crate::PushFrameSave>(
2019                AddrO32 {
2020                    addr: XReg::sp,
2021                    offset: (full_amt - 1 * ptr_size) as i32,
2022                },
2023                self.state.lr,
2024            )?;
2025            self.store_ne::<_, crate::PushFrameSave>(
2026                AddrO32 {
2027                    addr: XReg::sp,
2028                    offset: (full_amt - 2 * ptr_size) as i32,
2029                },
2030                self.state.fp,
2031            )?;
2032
2033            // Set `fp` to the top of our frame, where `fp` is stored.
2034            let mut offset = amt as i32;
2035            self.state.fp = self.state[XReg::sp]
2036                .get_ptr::<u8>()
2037                .byte_offset(offset as isize);
2038
2039            // Next save any registers in `regs` to the stack.
2040            for reg in regs {
2041                offset -= 8;
2042                self.store_ne::<_, crate::PushFrameSave>(
2043                    AddrO32 {
2044                        addr: XReg::sp,
2045                        offset,
2046                    },
2047                    self.state[reg].get_u64(),
2048                )?;
2049            }
2050        }
2051        ControlFlow::Continue(())
2052    }
2053
2054    fn pop_frame_restore(&mut self, amt: u16, regs: UpperRegSet<XReg>) -> ControlFlow<Done> {
2055        // Restore all registers in `regs`, followed by the normal `pop_frame`
2056        // opcode below to restore fp/lr.
2057        unsafe {
2058            let mut offset = i32::from(amt);
2059            for reg in regs {
2060                offset -= 8;
2061                let val = self.load_ne::<_, crate::PopFrameRestore>(AddrO32 {
2062                    addr: XReg::sp,
2063                    offset,
2064                })?;
2065                self.state[reg].set_u64(val);
2066            }
2067        }
2068        self.pop_frame()
2069    }
2070
2071    fn pop_frame(&mut self) -> ControlFlow<Done> {
2072        self.set_sp_unchecked(self.state.fp);
2073        let fp = self.pop();
2074        let lr = self.pop();
2075        self.state.fp = fp;
2076        self.state.lr = lr;
2077        ControlFlow::Continue(())
2078    }
2079
2080    fn br_table32(&mut self, idx: XReg, amt: u32) -> ControlFlow<Done> {
2081        let idx = self.state[idx].get_u32().min(amt - 1) as isize;
2082        // SAFETY: part of the contract of the interpreter is only dealing with
2083        // valid bytecode, so this offset should be safe.
2084        self.pc = unsafe { self.pc.offset(idx * 4) };
2085
2086        // Decode the `PcRelOffset` without tampering with `self.pc` as the
2087        // jump is relative to `self.pc`.
2088        let mut tmp = self.pc;
2089        let Ok(rel) = PcRelOffset::decode(&mut tmp);
2090        let offset = isize::try_from(i32::from(rel)).unwrap();
2091        self.pc = unsafe { self.pc.offset(offset) };
2092        ControlFlow::Continue(())
2093    }
2094
2095    fn stack_alloc32(&mut self, amt: u32) -> ControlFlow<Done> {
2096        let amt = usize::try_from(amt).unwrap();
2097        let new_sp = self.state[XReg::sp].get_ptr::<u8>().wrapping_sub(amt);
2098        self.set_sp::<crate::StackAlloc32>(new_sp)?;
2099        ControlFlow::Continue(())
2100    }
2101
2102    fn stack_free32(&mut self, amt: u32) -> ControlFlow<Done> {
2103        let amt = usize::try_from(amt).unwrap();
2104        let new_sp = self.state[XReg::sp].get_ptr::<u8>().wrapping_add(amt);
2105        self.set_sp_unchecked(new_sp);
2106        ControlFlow::Continue(())
2107    }
2108
2109    fn zext8(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2110        let src = self.state[src].get_u64() as u8;
2111        self.state[dst].set_u64(src.into());
2112        ControlFlow::Continue(())
2113    }
2114
2115    fn zext16(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2116        let src = self.state[src].get_u64() as u16;
2117        self.state[dst].set_u64(src.into());
2118        ControlFlow::Continue(())
2119    }
2120
2121    fn zext32(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2122        let src = self.state[src].get_u64() as u32;
2123        self.state[dst].set_u64(src.into());
2124        ControlFlow::Continue(())
2125    }
2126
2127    fn sext8(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2128        let src = self.state[src].get_i64() as i8;
2129        self.state[dst].set_i64(src.into());
2130        ControlFlow::Continue(())
2131    }
2132
2133    fn sext16(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2134        let src = self.state[src].get_i64() as i16;
2135        self.state[dst].set_i64(src.into());
2136        ControlFlow::Continue(())
2137    }
2138
2139    fn sext32(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2140        let src = self.state[src].get_i64() as i32;
2141        self.state[dst].set_i64(src.into());
2142        ControlFlow::Continue(())
2143    }
2144
2145    fn xdiv32_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2146        let a = self.state[operands.src1].get_i32();
2147        let b = self.state[operands.src2].get_i32();
2148        match a.checked_div(b) {
2149            Some(result) => {
2150                self.state[operands.dst].set_i32(result);
2151                ControlFlow::Continue(())
2152            }
2153            None => {
2154                let kind = if b == 0 {
2155                    TrapKind::DivideByZero
2156                } else {
2157                    TrapKind::IntegerOverflow
2158                };
2159                self.done_trap_kind::<crate::XDiv32S>(Some(kind))
2160            }
2161        }
2162    }
2163
2164    fn xdiv64_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2165        let a = self.state[operands.src1].get_i64();
2166        let b = self.state[operands.src2].get_i64();
2167        match a.checked_div(b) {
2168            Some(result) => {
2169                self.state[operands.dst].set_i64(result);
2170                ControlFlow::Continue(())
2171            }
2172            None => {
2173                let kind = if b == 0 {
2174                    TrapKind::DivideByZero
2175                } else {
2176                    TrapKind::IntegerOverflow
2177                };
2178                self.done_trap_kind::<crate::XDiv64S>(Some(kind))
2179            }
2180        }
2181    }
2182
2183    fn xdiv32_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2184        let a = self.state[operands.src1].get_u32();
2185        let b = self.state[operands.src2].get_u32();
2186        match a.checked_div(b) {
2187            Some(result) => {
2188                self.state[operands.dst].set_u32(result);
2189                ControlFlow::Continue(())
2190            }
2191            None => self.done_trap_kind::<crate::XDiv32U>(Some(TrapKind::DivideByZero)),
2192        }
2193    }
2194
2195    fn xdiv64_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2196        let a = self.state[operands.src1].get_u64();
2197        let b = self.state[operands.src2].get_u64();
2198        match a.checked_div(b) {
2199            Some(result) => {
2200                self.state[operands.dst].set_u64(result);
2201                ControlFlow::Continue(())
2202            }
2203            None => self.done_trap_kind::<crate::XDiv64U>(Some(TrapKind::DivideByZero)),
2204        }
2205    }
2206
2207    fn xrem32_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2208        let a = self.state[operands.src1].get_i32();
2209        let b = self.state[operands.src2].get_i32();
2210        let result = if a == i32::MIN && b == -1 {
2211            Some(0)
2212        } else {
2213            a.checked_rem(b)
2214        };
2215        match result {
2216            Some(result) => {
2217                self.state[operands.dst].set_i32(result);
2218                ControlFlow::Continue(())
2219            }
2220            None => self.done_trap_kind::<crate::XRem32S>(Some(TrapKind::DivideByZero)),
2221        }
2222    }
2223
2224    fn xrem64_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2225        let a = self.state[operands.src1].get_i64();
2226        let b = self.state[operands.src2].get_i64();
2227        let result = if a == i64::MIN && b == -1 {
2228            Some(0)
2229        } else {
2230            a.checked_rem(b)
2231        };
2232        match result {
2233            Some(result) => {
2234                self.state[operands.dst].set_i64(result);
2235                ControlFlow::Continue(())
2236            }
2237            None => self.done_trap_kind::<crate::XRem64S>(Some(TrapKind::DivideByZero)),
2238        }
2239    }
2240
2241    fn xrem32_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2242        let a = self.state[operands.src1].get_u32();
2243        let b = self.state[operands.src2].get_u32();
2244        match a.checked_rem(b) {
2245            Some(result) => {
2246                self.state[operands.dst].set_u32(result);
2247                ControlFlow::Continue(())
2248            }
2249            None => self.done_trap_kind::<crate::XRem32U>(Some(TrapKind::DivideByZero)),
2250        }
2251    }
2252
2253    fn xrem64_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2254        let a = self.state[operands.src1].get_u64();
2255        let b = self.state[operands.src2].get_u64();
2256        match a.checked_rem(b) {
2257            Some(result) => {
2258                self.state[operands.dst].set_u64(result);
2259                ControlFlow::Continue(())
2260            }
2261            None => self.done_trap_kind::<crate::XRem64U>(Some(TrapKind::DivideByZero)),
2262        }
2263    }
2264
2265    fn xband32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2266        let a = self.state[operands.src1].get_u32();
2267        let b = self.state[operands.src2].get_u32();
2268        self.state[operands.dst].set_u32(a & b);
2269        ControlFlow::Continue(())
2270    }
2271
2272    fn xband32_s8(&mut self, dst: XReg, src1: XReg, src2: i8) -> ControlFlow<Done> {
2273        self.xband32_s32(dst, src1, src2.into())
2274    }
2275
2276    fn xband32_s32(&mut self, dst: XReg, src1: XReg, src2: i32) -> ControlFlow<Done> {
2277        let a = self.state[src1].get_i32();
2278        self.state[dst].set_i32(a & src2);
2279        ControlFlow::Continue(())
2280    }
2281
2282    fn xband64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2283        let a = self.state[operands.src1].get_u64();
2284        let b = self.state[operands.src2].get_u64();
2285        self.state[operands.dst].set_u64(a & b);
2286        ControlFlow::Continue(())
2287    }
2288
2289    fn xband64_s8(&mut self, dst: XReg, src1: XReg, src2: i8) -> ControlFlow<Done> {
2290        self.xband64_s32(dst, src1, src2.into())
2291    }
2292
2293    fn xband64_s32(&mut self, dst: XReg, src1: XReg, src2: i32) -> ControlFlow<Done> {
2294        let a = self.state[src1].get_i64();
2295        self.state[dst].set_i64(a & i64::from(src2));
2296        ControlFlow::Continue(())
2297    }
2298
2299    fn xbor32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2300        let a = self.state[operands.src1].get_u32();
2301        let b = self.state[operands.src2].get_u32();
2302        self.state[operands.dst].set_u32(a | b);
2303        ControlFlow::Continue(())
2304    }
2305
2306    fn xbor32_s8(&mut self, dst: XReg, src1: XReg, src2: i8) -> ControlFlow<Done> {
2307        self.xbor32_s32(dst, src1, src2.into())
2308    }
2309
2310    fn xbor32_s32(&mut self, dst: XReg, src1: XReg, src2: i32) -> ControlFlow<Done> {
2311        let a = self.state[src1].get_i32();
2312        self.state[dst].set_i32(a | src2);
2313        ControlFlow::Continue(())
2314    }
2315
2316    fn xbor64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2317        let a = self.state[operands.src1].get_u64();
2318        let b = self.state[operands.src2].get_u64();
2319        self.state[operands.dst].set_u64(a | b);
2320        ControlFlow::Continue(())
2321    }
2322
2323    fn xbor64_s8(&mut self, dst: XReg, src1: XReg, src2: i8) -> ControlFlow<Done> {
2324        self.xbor64_s32(dst, src1, src2.into())
2325    }
2326
2327    fn xbor64_s32(&mut self, dst: XReg, src1: XReg, src2: i32) -> ControlFlow<Done> {
2328        let a = self.state[src1].get_i64();
2329        self.state[dst].set_i64(a | i64::from(src2));
2330        ControlFlow::Continue(())
2331    }
2332
2333    fn xbxor32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2334        let a = self.state[operands.src1].get_u32();
2335        let b = self.state[operands.src2].get_u32();
2336        self.state[operands.dst].set_u32(a ^ b);
2337        ControlFlow::Continue(())
2338    }
2339
2340    fn xbxor32_s8(&mut self, dst: XReg, src1: XReg, src2: i8) -> ControlFlow<Done> {
2341        self.xbxor32_s32(dst, src1, src2.into())
2342    }
2343
2344    fn xbxor32_s32(&mut self, dst: XReg, src1: XReg, src2: i32) -> ControlFlow<Done> {
2345        let a = self.state[src1].get_i32();
2346        self.state[dst].set_i32(a ^ src2);
2347        ControlFlow::Continue(())
2348    }
2349
2350    fn xbxor64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2351        let a = self.state[operands.src1].get_u64();
2352        let b = self.state[operands.src2].get_u64();
2353        self.state[operands.dst].set_u64(a ^ b);
2354        ControlFlow::Continue(())
2355    }
2356
2357    fn xbxor64_s8(&mut self, dst: XReg, src1: XReg, src2: i8) -> ControlFlow<Done> {
2358        self.xbxor64_s32(dst, src1, src2.into())
2359    }
2360
2361    fn xbxor64_s32(&mut self, dst: XReg, src1: XReg, src2: i32) -> ControlFlow<Done> {
2362        let a = self.state[src1].get_i64();
2363        self.state[dst].set_i64(a ^ i64::from(src2));
2364        ControlFlow::Continue(())
2365    }
2366
2367    fn xbnot32(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2368        let a = self.state[src].get_u32();
2369        self.state[dst].set_u32(!a);
2370        ControlFlow::Continue(())
2371    }
2372
2373    fn xbnot64(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2374        let a = self.state[src].get_u64();
2375        self.state[dst].set_u64(!a);
2376        ControlFlow::Continue(())
2377    }
2378
2379    fn xmin32_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2380        let a = self.state[operands.src1].get_u32();
2381        let b = self.state[operands.src2].get_u32();
2382        self.state[operands.dst].set_u32(a.min(b));
2383        ControlFlow::Continue(())
2384    }
2385
2386    fn xmin32_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2387        let a = self.state[operands.src1].get_i32();
2388        let b = self.state[operands.src2].get_i32();
2389        self.state[operands.dst].set_i32(a.min(b));
2390        ControlFlow::Continue(())
2391    }
2392
2393    fn xmax32_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2394        let a = self.state[operands.src1].get_u32();
2395        let b = self.state[operands.src2].get_u32();
2396        self.state[operands.dst].set_u32(a.max(b));
2397        ControlFlow::Continue(())
2398    }
2399
2400    fn xmax32_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2401        let a = self.state[operands.src1].get_i32();
2402        let b = self.state[operands.src2].get_i32();
2403        self.state[operands.dst].set_i32(a.max(b));
2404        ControlFlow::Continue(())
2405    }
2406
2407    fn xmin64_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2408        let a = self.state[operands.src1].get_u64();
2409        let b = self.state[operands.src2].get_u64();
2410        self.state[operands.dst].set_u64(a.min(b));
2411        ControlFlow::Continue(())
2412    }
2413
2414    fn xmin64_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2415        let a = self.state[operands.src1].get_i64();
2416        let b = self.state[operands.src2].get_i64();
2417        self.state[operands.dst].set_i64(a.min(b));
2418        ControlFlow::Continue(())
2419    }
2420
2421    fn xmax64_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2422        let a = self.state[operands.src1].get_u64();
2423        let b = self.state[operands.src2].get_u64();
2424        self.state[operands.dst].set_u64(a.max(b));
2425        ControlFlow::Continue(())
2426    }
2427
2428    fn xmax64_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2429        let a = self.state[operands.src1].get_i64();
2430        let b = self.state[operands.src2].get_i64();
2431        self.state[operands.dst].set_i64(a.max(b));
2432        ControlFlow::Continue(())
2433    }
2434
2435    fn xctz32(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2436        let a = self.state[src].get_u32();
2437        self.state[dst].set_u32(a.trailing_zeros());
2438        ControlFlow::Continue(())
2439    }
2440
2441    fn xctz64(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2442        let a = self.state[src].get_u64();
2443        self.state[dst].set_u64(a.trailing_zeros().into());
2444        ControlFlow::Continue(())
2445    }
2446
2447    fn xclz32(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2448        let a = self.state[src].get_u32();
2449        self.state[dst].set_u32(a.leading_zeros());
2450        ControlFlow::Continue(())
2451    }
2452
2453    fn xclz64(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2454        let a = self.state[src].get_u64();
2455        self.state[dst].set_u64(a.leading_zeros().into());
2456        ControlFlow::Continue(())
2457    }
2458
2459    fn xpopcnt32(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2460        let a = self.state[src].get_u32();
2461        self.state[dst].set_u32(a.count_ones());
2462        ControlFlow::Continue(())
2463    }
2464
2465    fn xpopcnt64(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2466        let a = self.state[src].get_u64();
2467        self.state[dst].set_u64(a.count_ones().into());
2468        ControlFlow::Continue(())
2469    }
2470
2471    fn xrotl32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2472        let a = self.state[operands.src1].get_u32();
2473        let b = self.state[operands.src2].get_u32();
2474        self.state[operands.dst].set_u32(a.rotate_left(b));
2475        ControlFlow::Continue(())
2476    }
2477
2478    fn xrotl64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2479        let a = self.state[operands.src1].get_u64();
2480        let b = self.state[operands.src2].get_u32();
2481        self.state[operands.dst].set_u64(a.rotate_left(b));
2482        ControlFlow::Continue(())
2483    }
2484
2485    fn xrotr32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2486        let a = self.state[operands.src1].get_u32();
2487        let b = self.state[operands.src2].get_u32();
2488        self.state[operands.dst].set_u32(a.rotate_right(b));
2489        ControlFlow::Continue(())
2490    }
2491
2492    fn xrotr64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2493        let a = self.state[operands.src1].get_u64();
2494        let b = self.state[operands.src2].get_u32();
2495        self.state[operands.dst].set_u64(a.rotate_right(b));
2496        ControlFlow::Continue(())
2497    }
2498
2499    fn xselect32(
2500        &mut self,
2501        dst: XReg,
2502        cond: XReg,
2503        if_nonzero: XReg,
2504        if_zero: XReg,
2505    ) -> ControlFlow<Done> {
2506        let result = if self.state[cond].get_u32() != 0 {
2507            self.state[if_nonzero].get_u32()
2508        } else {
2509            self.state[if_zero].get_u32()
2510        };
2511        self.state[dst].set_u32(result);
2512        ControlFlow::Continue(())
2513    }
2514
2515    fn xselect64(
2516        &mut self,
2517        dst: XReg,
2518        cond: XReg,
2519        if_nonzero: XReg,
2520        if_zero: XReg,
2521    ) -> ControlFlow<Done> {
2522        let result = if self.state[cond].get_u32() != 0 {
2523            self.state[if_nonzero].get_u64()
2524        } else {
2525            self.state[if_zero].get_u64()
2526        };
2527        self.state[dst].set_u64(result);
2528        ControlFlow::Continue(())
2529    }
2530
2531    fn xabs32(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2532        let a = self.state[src].get_i32();
2533        self.state[dst].set_i32(a.wrapping_abs());
2534        ControlFlow::Continue(())
2535    }
2536
2537    fn xabs64(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2538        let a = self.state[src].get_i64();
2539        self.state[dst].set_i64(a.wrapping_abs());
2540        ControlFlow::Continue(())
2541    }
2542
2543    // =========================================================================
2544    // o32 addressing modes
2545
2546    fn xload8_u32_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done> {
2547        let result = unsafe { self.load_ne::<u8, crate::XLoad8U32O32>(addr)? };
2548        self.state[dst].set_u32(result.into());
2549        ControlFlow::Continue(())
2550    }
2551
2552    fn xload8_s32_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done> {
2553        let result = unsafe { self.load_ne::<i8, crate::XLoad8S32O32>(addr)? };
2554        self.state[dst].set_i32(result.into());
2555        ControlFlow::Continue(())
2556    }
2557
2558    fn xload16le_u32_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done> {
2559        let result = unsafe { self.load_ne::<u16, crate::XLoad16LeU32O32>(addr)? };
2560        self.state[dst].set_u32(u16::from_le(result).into());
2561        ControlFlow::Continue(())
2562    }
2563
2564    fn xload16le_s32_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done> {
2565        let result = unsafe { self.load_ne::<i16, crate::XLoad16LeS32O32>(addr)? };
2566        self.state[dst].set_i32(i16::from_le(result).into());
2567        ControlFlow::Continue(())
2568    }
2569
2570    fn xload32le_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done> {
2571        let result = unsafe { self.load_ne::<i32, crate::XLoad32LeO32>(addr)? };
2572        self.state[dst].set_i32(i32::from_le(result));
2573        ControlFlow::Continue(())
2574    }
2575
2576    fn xload64le_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done> {
2577        let result = unsafe { self.load_ne::<i64, crate::XLoad64LeO32>(addr)? };
2578        self.state[dst].set_i64(i64::from_le(result));
2579        ControlFlow::Continue(())
2580    }
2581
2582    fn xstore8_o32(&mut self, addr: AddrO32, val: XReg) -> ControlFlow<Done> {
2583        let val = self.state[val].get_u32() as u8;
2584        unsafe {
2585            self.store_ne::<u8, crate::XStore8O32>(addr, val)?;
2586        }
2587        ControlFlow::Continue(())
2588    }
2589
2590    fn xstore16le_o32(&mut self, addr: AddrO32, val: XReg) -> ControlFlow<Done> {
2591        let val = self.state[val].get_u32() as u16;
2592        unsafe {
2593            self.store_ne::<u16, crate::XStore16LeO32>(addr, val.to_le())?;
2594        }
2595        ControlFlow::Continue(())
2596    }
2597
2598    fn xstore32le_o32(&mut self, addr: AddrO32, val: XReg) -> ControlFlow<Done> {
2599        let val = self.state[val].get_u32();
2600        unsafe {
2601            self.store_ne::<u32, crate::XStore32LeO32>(addr, val.to_le())?;
2602        }
2603        ControlFlow::Continue(())
2604    }
2605
2606    fn xstore64le_o32(&mut self, addr: AddrO32, val: XReg) -> ControlFlow<Done> {
2607        let val = self.state[val].get_u64();
2608        unsafe {
2609            self.store_ne::<u64, crate::XStore64LeO32>(addr, val.to_le())?;
2610        }
2611        ControlFlow::Continue(())
2612    }
2613
2614    // =========================================================================
2615    // g32 addressing modes
2616
2617    fn xload8_u32_g32(&mut self, dst: XReg, addr: AddrG32) -> ControlFlow<Done> {
2618        let result = unsafe { self.load_ne::<u8, crate::XLoad8U32G32>(addr)? };
2619        self.state[dst].set_u32(result.into());
2620        ControlFlow::Continue(())
2621    }
2622
2623    fn xload8_s32_g32(&mut self, dst: XReg, addr: AddrG32) -> ControlFlow<Done> {
2624        let result = unsafe { self.load_ne::<i8, crate::XLoad8S32G32>(addr)? };
2625        self.state[dst].set_i32(result.into());
2626        ControlFlow::Continue(())
2627    }
2628
2629    fn xload16le_u32_g32(&mut self, dst: XReg, addr: AddrG32) -> ControlFlow<Done> {
2630        let result = unsafe { self.load_ne::<u16, crate::XLoad16LeU32G32>(addr)? };
2631        self.state[dst].set_u32(u16::from_le(result).into());
2632        ControlFlow::Continue(())
2633    }
2634
2635    fn xload16le_s32_g32(&mut self, dst: XReg, addr: AddrG32) -> ControlFlow<Done> {
2636        let result = unsafe { self.load_ne::<i16, crate::XLoad16LeS32G32>(addr)? };
2637        self.state[dst].set_i32(i16::from_le(result).into());
2638        ControlFlow::Continue(())
2639    }
2640
2641    fn xload32le_g32(&mut self, dst: XReg, addr: AddrG32) -> ControlFlow<Done> {
2642        let result = unsafe { self.load_ne::<i32, crate::XLoad32LeG32>(addr)? };
2643        self.state[dst].set_i32(i32::from_le(result));
2644        ControlFlow::Continue(())
2645    }
2646
2647    fn xload64le_g32(&mut self, dst: XReg, addr: AddrG32) -> ControlFlow<Done> {
2648        let result = unsafe { self.load_ne::<i64, crate::XLoad64LeG32>(addr)? };
2649        self.state[dst].set_i64(i64::from_le(result));
2650        ControlFlow::Continue(())
2651    }
2652
2653    fn xstore8_g32(&mut self, addr: AddrG32, val: XReg) -> ControlFlow<Done> {
2654        let val = self.state[val].get_u32() as u8;
2655        unsafe {
2656            self.store_ne::<u8, crate::XStore8G32>(addr, val)?;
2657        }
2658        ControlFlow::Continue(())
2659    }
2660
2661    fn xstore16le_g32(&mut self, addr: AddrG32, val: XReg) -> ControlFlow<Done> {
2662        let val = self.state[val].get_u32() as u16;
2663        unsafe {
2664            self.store_ne::<u16, crate::XStore16LeG32>(addr, val.to_le())?;
2665        }
2666        ControlFlow::Continue(())
2667    }
2668
2669    fn xstore32le_g32(&mut self, addr: AddrG32, val: XReg) -> ControlFlow<Done> {
2670        let val = self.state[val].get_u32();
2671        unsafe {
2672            self.store_ne::<u32, crate::XStore32LeG32>(addr, val.to_le())?;
2673        }
2674        ControlFlow::Continue(())
2675    }
2676
2677    fn xstore64le_g32(&mut self, addr: AddrG32, val: XReg) -> ControlFlow<Done> {
2678        let val = self.state[val].get_u64();
2679        unsafe {
2680            self.store_ne::<u64, crate::XStore64LeG32>(addr, val.to_le())?;
2681        }
2682        ControlFlow::Continue(())
2683    }
2684
2685    // =========================================================================
2686    // z addressing modes
2687
2688    fn xload8_u32_z(&mut self, dst: XReg, addr: AddrZ) -> ControlFlow<Done> {
2689        let result = unsafe { self.load_ne::<u8, crate::XLoad8U32Z>(addr)? };
2690        self.state[dst].set_u32(result.into());
2691        ControlFlow::Continue(())
2692    }
2693
2694    fn xload8_s32_z(&mut self, dst: XReg, addr: AddrZ) -> ControlFlow<Done> {
2695        let result = unsafe { self.load_ne::<i8, crate::XLoad8S32Z>(addr)? };
2696        self.state[dst].set_i32(result.into());
2697        ControlFlow::Continue(())
2698    }
2699
2700    fn xload16le_u32_z(&mut self, dst: XReg, addr: AddrZ) -> ControlFlow<Done> {
2701        let result = unsafe { self.load_ne::<u16, crate::XLoad16LeU32Z>(addr)? };
2702        self.state[dst].set_u32(u16::from_le(result).into());
2703        ControlFlow::Continue(())
2704    }
2705
2706    fn xload16le_s32_z(&mut self, dst: XReg, addr: AddrZ) -> ControlFlow<Done> {
2707        let result = unsafe { self.load_ne::<i16, crate::XLoad16LeS32Z>(addr)? };
2708        self.state[dst].set_i32(i16::from_le(result).into());
2709        ControlFlow::Continue(())
2710    }
2711
2712    fn xload32le_z(&mut self, dst: XReg, addr: AddrZ) -> ControlFlow<Done> {
2713        let result = unsafe { self.load_ne::<i32, crate::XLoad32LeZ>(addr)? };
2714        self.state[dst].set_i32(i32::from_le(result));
2715        ControlFlow::Continue(())
2716    }
2717
2718    fn xload64le_z(&mut self, dst: XReg, addr: AddrZ) -> ControlFlow<Done> {
2719        let result = unsafe { self.load_ne::<i64, crate::XLoad64LeZ>(addr)? };
2720        self.state[dst].set_i64(i64::from_le(result));
2721        ControlFlow::Continue(())
2722    }
2723
2724    fn xstore8_z(&mut self, addr: AddrZ, val: XReg) -> ControlFlow<Done> {
2725        let val = self.state[val].get_u32() as u8;
2726        unsafe {
2727            self.store_ne::<u8, crate::XStore8Z>(addr, val)?;
2728        }
2729        ControlFlow::Continue(())
2730    }
2731
2732    fn xstore16le_z(&mut self, addr: AddrZ, val: XReg) -> ControlFlow<Done> {
2733        let val = self.state[val].get_u32() as u16;
2734        unsafe {
2735            self.store_ne::<u16, crate::XStore16LeZ>(addr, val.to_le())?;
2736        }
2737        ControlFlow::Continue(())
2738    }
2739
2740    fn xstore32le_z(&mut self, addr: AddrZ, val: XReg) -> ControlFlow<Done> {
2741        let val = self.state[val].get_u32();
2742        unsafe {
2743            self.store_ne::<u32, crate::XStore32LeZ>(addr, val.to_le())?;
2744        }
2745        ControlFlow::Continue(())
2746    }
2747
2748    fn xstore64le_z(&mut self, addr: AddrZ, val: XReg) -> ControlFlow<Done> {
2749        let val = self.state[val].get_u64();
2750        unsafe {
2751            self.store_ne::<u64, crate::XStore64LeZ>(addr, val.to_le())?;
2752        }
2753        ControlFlow::Continue(())
2754    }
2755
2756    // =========================================================================
2757    // g32bne addressing modes
2758
2759    fn xload8_u32_g32bne(&mut self, dst: XReg, addr: AddrG32Bne) -> ControlFlow<Done> {
2760        let result = unsafe { self.load_ne::<u8, crate::XLoad8U32G32Bne>(addr)? };
2761        self.state[dst].set_u32(result.into());
2762        ControlFlow::Continue(())
2763    }
2764
2765    fn xload8_s32_g32bne(&mut self, dst: XReg, addr: AddrG32Bne) -> ControlFlow<Done> {
2766        let result = unsafe { self.load_ne::<i8, crate::XLoad8S32G32Bne>(addr)? };
2767        self.state[dst].set_i32(result.into());
2768        ControlFlow::Continue(())
2769    }
2770
2771    fn xload16le_u32_g32bne(&mut self, dst: XReg, addr: AddrG32Bne) -> ControlFlow<Done> {
2772        let result = unsafe { self.load_ne::<u16, crate::XLoad16LeU32G32Bne>(addr)? };
2773        self.state[dst].set_u32(u16::from_le(result).into());
2774        ControlFlow::Continue(())
2775    }
2776
2777    fn xload16le_s32_g32bne(&mut self, dst: XReg, addr: AddrG32Bne) -> ControlFlow<Done> {
2778        let result = unsafe { self.load_ne::<i16, crate::XLoad16LeS32G32Bne>(addr)? };
2779        self.state[dst].set_i32(i16::from_le(result).into());
2780        ControlFlow::Continue(())
2781    }
2782
2783    fn xload32le_g32bne(&mut self, dst: XReg, addr: AddrG32Bne) -> ControlFlow<Done> {
2784        let result = unsafe { self.load_ne::<i32, crate::XLoad32LeG32Bne>(addr)? };
2785        self.state[dst].set_i32(i32::from_le(result));
2786        ControlFlow::Continue(())
2787    }
2788
2789    fn xload64le_g32bne(&mut self, dst: XReg, addr: AddrG32Bne) -> ControlFlow<Done> {
2790        let result = unsafe { self.load_ne::<i64, crate::XLoad64LeG32Bne>(addr)? };
2791        self.state[dst].set_i64(i64::from_le(result));
2792        ControlFlow::Continue(())
2793    }
2794
2795    fn xstore8_g32bne(&mut self, addr: AddrG32Bne, val: XReg) -> ControlFlow<Done> {
2796        let val = self.state[val].get_u32() as u8;
2797        unsafe {
2798            self.store_ne::<u8, crate::XStore8G32Bne>(addr, val)?;
2799        }
2800        ControlFlow::Continue(())
2801    }
2802
2803    fn xstore16le_g32bne(&mut self, addr: AddrG32Bne, val: XReg) -> ControlFlow<Done> {
2804        let val = self.state[val].get_u32() as u16;
2805        unsafe {
2806            self.store_ne::<u16, crate::XStore16LeG32Bne>(addr, val.to_le())?;
2807        }
2808        ControlFlow::Continue(())
2809    }
2810
2811    fn xstore32le_g32bne(&mut self, addr: AddrG32Bne, val: XReg) -> ControlFlow<Done> {
2812        let val = self.state[val].get_u32();
2813        unsafe {
2814            self.store_ne::<u32, crate::XStore32LeG32Bne>(addr, val.to_le())?;
2815        }
2816        ControlFlow::Continue(())
2817    }
2818
2819    fn xstore64le_g32bne(&mut self, addr: AddrG32Bne, val: XReg) -> ControlFlow<Done> {
2820        let val = self.state[val].get_u64();
2821        unsafe {
2822            self.store_ne::<u64, crate::XStore64LeG32Bne>(addr, val.to_le())?;
2823        }
2824        ControlFlow::Continue(())
2825    }
2826}
2827
2828impl ExtendedOpVisitor for Interpreter<'_> {
2829    fn trap(&mut self) -> ControlFlow<Done> {
2830        self.done_trap::<crate::Trap>()
2831    }
2832
2833    fn call_indirect_host(&mut self, id: u8) -> ControlFlow<Done> {
2834        self.done_call_indirect_host(id)
2835    }
2836
2837    fn xpcadd(&mut self, dst: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
2838        let pc = self.pc_rel::<crate::Xpcadd>(offset);
2839        self.state[dst].set_ptr(pc.as_ptr());
2840        ControlFlow::Continue(())
2841    }
2842
2843    fn bswap32(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2844        let src = self.state[src].get_u32();
2845        self.state[dst].set_u32(src.swap_bytes());
2846        ControlFlow::Continue(())
2847    }
2848
2849    fn bswap64(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2850        let src = self.state[src].get_u64();
2851        self.state[dst].set_u64(src.swap_bytes());
2852        ControlFlow::Continue(())
2853    }
2854
2855    fn xbmask32(&mut self, dst: XReg, src: XReg) -> Self::Return {
2856        let a = self.state[src].get_u32();
2857        if a == 0 {
2858            self.state[dst].set_u32(0);
2859        } else {
2860            self.state[dst].set_i32(-1);
2861        }
2862        ControlFlow::Continue(())
2863    }
2864
2865    fn xbmask64(&mut self, dst: XReg, src: XReg) -> Self::Return {
2866        let a = self.state[src].get_u64();
2867        if a == 0 {
2868            self.state[dst].set_u64(0);
2869        } else {
2870            self.state[dst].set_i64(-1);
2871        }
2872        ControlFlow::Continue(())
2873    }
2874
2875    fn xadd32_uoverflow_trap(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2876        let a = self.state[operands.src1].get_u32();
2877        let b = self.state[operands.src2].get_u32();
2878        match a.checked_add(b) {
2879            Some(c) => {
2880                self.state[operands.dst].set_u32(c);
2881                ControlFlow::Continue(())
2882            }
2883            None => self.done_trap::<crate::Xadd32UoverflowTrap>(),
2884        }
2885    }
2886
2887    fn xadd64_uoverflow_trap(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2888        let a = self.state[operands.src1].get_u64();
2889        let b = self.state[operands.src2].get_u64();
2890        match a.checked_add(b) {
2891            Some(c) => {
2892                self.state[operands.dst].set_u64(c);
2893                ControlFlow::Continue(())
2894            }
2895            None => self.done_trap::<crate::Xadd64UoverflowTrap>(),
2896        }
2897    }
2898
2899    fn xmulhi64_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2900        let a = self.state[operands.src1].get_i64();
2901        let b = self.state[operands.src2].get_i64();
2902        let result = ((i128::from(a) * i128::from(b)) >> 64) as i64;
2903        self.state[operands.dst].set_i64(result);
2904        ControlFlow::Continue(())
2905    }
2906
2907    fn xmulhi64_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2908        let a = self.state[operands.src1].get_u64();
2909        let b = self.state[operands.src2].get_u64();
2910        let result = ((u128::from(a) * u128::from(b)) >> 64) as u64;
2911        self.state[operands.dst].set_u64(result);
2912        ControlFlow::Continue(())
2913    }
2914
2915    // =========================================================================
2916    // o32 addressing modes for big-endian X-registers
2917
2918    fn xload16be_u32_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done> {
2919        let result = unsafe { self.load_ne::<u16, crate::XLoad16BeU32O32>(addr)? };
2920        self.state[dst].set_u32(u16::from_be(result).into());
2921        ControlFlow::Continue(())
2922    }
2923
2924    fn xload16be_s32_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done> {
2925        let result = unsafe { self.load_ne::<i16, crate::XLoad16BeS32O32>(addr)? };
2926        self.state[dst].set_i32(i16::from_be(result).into());
2927        ControlFlow::Continue(())
2928    }
2929
2930    fn xload32be_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done> {
2931        let result = unsafe { self.load_ne::<i32, crate::XLoad32BeO32>(addr)? };
2932        self.state[dst].set_i32(i32::from_be(result));
2933        ControlFlow::Continue(())
2934    }
2935
2936    fn xload64be_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done> {
2937        let result = unsafe { self.load_ne::<i64, crate::XLoad64BeO32>(addr)? };
2938        self.state[dst].set_i64(i64::from_be(result));
2939        ControlFlow::Continue(())
2940    }
2941
2942    fn xstore16be_o32(&mut self, addr: AddrO32, val: XReg) -> ControlFlow<Done> {
2943        let val = self.state[val].get_u32() as u16;
2944        unsafe {
2945            self.store_ne::<u16, crate::XStore16BeO32>(addr, val.to_be())?;
2946        }
2947        ControlFlow::Continue(())
2948    }
2949
2950    fn xstore32be_o32(&mut self, addr: AddrO32, val: XReg) -> ControlFlow<Done> {
2951        let val = self.state[val].get_u32();
2952        unsafe {
2953            self.store_ne::<u32, crate::XStore32BeO32>(addr, val.to_be())?;
2954        }
2955        ControlFlow::Continue(())
2956    }
2957
2958    fn xstore64be_o32(&mut self, addr: AddrO32, val: XReg) -> ControlFlow<Done> {
2959        let val = self.state[val].get_u64();
2960        unsafe {
2961            self.store_ne::<u64, crate::XStore64BeO32>(addr, val.to_be())?;
2962        }
2963        ControlFlow::Continue(())
2964    }
2965
2966    // =========================================================================
2967    // o32 addressing modes for little-endian F-registers
2968
2969    fn fload32le_o32(&mut self, dst: FReg, addr: AddrO32) -> ControlFlow<Done> {
2970        let val = unsafe { self.load_ne::<u32, crate::Fload32LeO32>(addr)? };
2971        self.state[dst].set_f32(f32::from_bits(u32::from_le(val)));
2972        ControlFlow::Continue(())
2973    }
2974
2975    fn fload64le_o32(&mut self, dst: FReg, addr: AddrO32) -> ControlFlow<Done> {
2976        let val = unsafe { self.load_ne::<u64, crate::Fload64LeO32>(addr)? };
2977        self.state[dst].set_f64(f64::from_bits(u64::from_le(val)));
2978        ControlFlow::Continue(())
2979    }
2980
2981    fn fstore32le_o32(&mut self, addr: AddrO32, src: FReg) -> ControlFlow<Done> {
2982        let val = self.state[src].get_f32();
2983        unsafe {
2984            self.store_ne::<u32, crate::Fstore32LeO32>(addr, val.to_bits().to_le())?;
2985        }
2986        ControlFlow::Continue(())
2987    }
2988
2989    fn fstore64le_o32(&mut self, addr: AddrO32, src: FReg) -> ControlFlow<Done> {
2990        let val = self.state[src].get_f64();
2991        unsafe {
2992            self.store_ne::<u64, crate::Fstore64LeO32>(addr, val.to_bits().to_le())?;
2993        }
2994        ControlFlow::Continue(())
2995    }
2996
2997    // =========================================================================
2998    // o32 addressing modes for big-endian F-registers
2999
3000    fn fload32be_o32(&mut self, dst: FReg, addr: AddrO32) -> ControlFlow<Done> {
3001        let val = unsafe { self.load_ne::<u32, crate::Fload32BeO32>(addr)? };
3002        self.state[dst].set_f32(f32::from_bits(u32::from_be(val)));
3003        ControlFlow::Continue(())
3004    }
3005
3006    fn fload64be_o32(&mut self, dst: FReg, addr: AddrO32) -> ControlFlow<Done> {
3007        let val = unsafe { self.load_ne::<u64, crate::Fload64BeO32>(addr)? };
3008        self.state[dst].set_f64(f64::from_bits(u64::from_be(val)));
3009        ControlFlow::Continue(())
3010    }
3011
3012    fn fstore32be_o32(&mut self, addr: AddrO32, src: FReg) -> ControlFlow<Done> {
3013        let val = self.state[src].get_f32();
3014        unsafe {
3015            self.store_ne::<u32, crate::Fstore32BeO32>(addr, val.to_bits().to_be())?;
3016        }
3017        ControlFlow::Continue(())
3018    }
3019
3020    fn fstore64be_o32(&mut self, addr: AddrO32, src: FReg) -> ControlFlow<Done> {
3021        let val = self.state[src].get_f64();
3022        unsafe {
3023            self.store_ne::<u64, crate::Fstore64BeO32>(addr, val.to_bits().to_be())?;
3024        }
3025        ControlFlow::Continue(())
3026    }
3027
3028    // =========================================================================
3029    // z addressing modes for little-endian F-registers
3030
3031    fn fload32le_z(&mut self, dst: FReg, addr: AddrZ) -> ControlFlow<Done> {
3032        let val = unsafe { self.load_ne::<u32, crate::Fload32LeZ>(addr)? };
3033        self.state[dst].set_f32(f32::from_bits(u32::from_le(val)));
3034        ControlFlow::Continue(())
3035    }
3036
3037    fn fload64le_z(&mut self, dst: FReg, addr: AddrZ) -> ControlFlow<Done> {
3038        let val = unsafe { self.load_ne::<u64, crate::Fload64LeZ>(addr)? };
3039        self.state[dst].set_f64(f64::from_bits(u64::from_le(val)));
3040        ControlFlow::Continue(())
3041    }
3042
3043    fn fstore32le_z(&mut self, addr: AddrZ, src: FReg) -> ControlFlow<Done> {
3044        let val = self.state[src].get_f32();
3045        unsafe {
3046            self.store_ne::<u32, crate::Fstore32LeZ>(addr, val.to_bits().to_le())?;
3047        }
3048        ControlFlow::Continue(())
3049    }
3050
3051    fn fstore64le_z(&mut self, addr: AddrZ, src: FReg) -> ControlFlow<Done> {
3052        let val = self.state[src].get_f64();
3053        unsafe {
3054            self.store_ne::<u64, crate::Fstore64LeZ>(addr, val.to_bits().to_le())?;
3055        }
3056        ControlFlow::Continue(())
3057    }
3058
3059    // =========================================================================
3060    // g32 addressing modes for little-endian F-registers
3061
3062    fn fload32le_g32(&mut self, dst: FReg, addr: AddrG32) -> ControlFlow<Done> {
3063        let val = unsafe { self.load_ne::<u32, crate::Fload32LeG32>(addr)? };
3064        self.state[dst].set_f32(f32::from_bits(u32::from_le(val)));
3065        ControlFlow::Continue(())
3066    }
3067
3068    fn fload64le_g32(&mut self, dst: FReg, addr: AddrG32) -> ControlFlow<Done> {
3069        let val = unsafe { self.load_ne::<u64, crate::Fload64LeG32>(addr)? };
3070        self.state[dst].set_f64(f64::from_bits(u64::from_le(val)));
3071        ControlFlow::Continue(())
3072    }
3073
3074    fn fstore32le_g32(&mut self, addr: AddrG32, src: FReg) -> ControlFlow<Done> {
3075        let val = self.state[src].get_f32();
3076        unsafe {
3077            self.store_ne::<u32, crate::Fstore32LeG32>(addr, val.to_bits().to_le())?;
3078        }
3079        ControlFlow::Continue(())
3080    }
3081
3082    fn fstore64le_g32(&mut self, addr: AddrG32, src: FReg) -> ControlFlow<Done> {
3083        let val = self.state[src].get_f64();
3084        unsafe {
3085            self.store_ne::<u64, crate::Fstore64LeG32>(addr, val.to_bits().to_le())?;
3086        }
3087        ControlFlow::Continue(())
3088    }
3089
3090    // =========================================================================
3091    // o32 addressing modes for little-endian V-registers
3092
3093    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3094    fn vload128le_o32(&mut self, dst: VReg, addr: AddrO32) -> ControlFlow<Done> {
3095        let val = unsafe { self.load_ne::<u128, crate::VLoad128O32>(addr)? };
3096        self.state[dst].set_u128(u128::from_le(val));
3097        ControlFlow::Continue(())
3098    }
3099
3100    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3101    fn vstore128le_o32(&mut self, addr: AddrO32, src: VReg) -> ControlFlow<Done> {
3102        let val = self.state[src].get_u128();
3103        unsafe {
3104            self.store_ne::<u128, crate::Vstore128LeO32>(addr, val.to_le())?;
3105        }
3106        ControlFlow::Continue(())
3107    }
3108
3109    // =========================================================================
3110    // z addressing modes for little-endian V-registers
3111
3112    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3113    fn vload128le_z(&mut self, dst: VReg, addr: AddrZ) -> ControlFlow<Done> {
3114        let val = unsafe { self.load_ne::<u128, crate::VLoad128Z>(addr)? };
3115        self.state[dst].set_u128(u128::from_le(val));
3116        ControlFlow::Continue(())
3117    }
3118
3119    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3120    fn vstore128le_z(&mut self, addr: AddrZ, src: VReg) -> ControlFlow<Done> {
3121        let val = self.state[src].get_u128();
3122        unsafe {
3123            self.store_ne::<u128, crate::Vstore128LeZ>(addr, val.to_le())?;
3124        }
3125        ControlFlow::Continue(())
3126    }
3127
3128    // =========================================================================
3129    // g32 addressing modes for little-endian V-registers
3130
3131    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3132    fn vload128le_g32(&mut self, dst: VReg, addr: AddrG32) -> ControlFlow<Done> {
3133        let val = unsafe { self.load_ne::<u128, crate::VLoad128G32>(addr)? };
3134        self.state[dst].set_u128(u128::from_le(val));
3135        ControlFlow::Continue(())
3136    }
3137
3138    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3139    fn vstore128le_g32(&mut self, addr: AddrG32, src: VReg) -> ControlFlow<Done> {
3140        let val = self.state[src].get_u128();
3141        unsafe {
3142            self.store_ne::<u128, crate::Vstore128LeG32>(addr, val.to_le())?;
3143        }
3144        ControlFlow::Continue(())
3145    }
3146
3147    fn xmov_fp(&mut self, dst: XReg) -> ControlFlow<Done> {
3148        let fp = self.state.fp;
3149        self.state[dst].set_ptr(fp);
3150        ControlFlow::Continue(())
3151    }
3152
3153    fn xmov_lr(&mut self, dst: XReg) -> ControlFlow<Done> {
3154        let lr = self.state.lr;
3155        self.state[dst].set_ptr(lr);
3156        ControlFlow::Continue(())
3157    }
3158
3159    fn fmov(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3160        let val = self.state[src];
3161        self.state[dst] = val;
3162        ControlFlow::Continue(())
3163    }
3164
3165    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3166    fn vmov(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3167        let val = self.state[src];
3168        self.state[dst] = val;
3169        ControlFlow::Continue(())
3170    }
3171
3172    fn fconst32(&mut self, dst: FReg, bits: u32) -> ControlFlow<Done> {
3173        self.state[dst].set_f32(f32::from_bits(bits));
3174        ControlFlow::Continue(())
3175    }
3176
3177    fn fconst64(&mut self, dst: FReg, bits: u64) -> ControlFlow<Done> {
3178        self.state[dst].set_f64(f64::from_bits(bits));
3179        ControlFlow::Continue(())
3180    }
3181
3182    fn bitcast_int_from_float_32(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3183        let val = self.state[src].get_f32();
3184        self.state[dst].set_u32(val.to_bits());
3185        ControlFlow::Continue(())
3186    }
3187
3188    fn bitcast_int_from_float_64(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3189        let val = self.state[src].get_f64();
3190        self.state[dst].set_u64(val.to_bits());
3191        ControlFlow::Continue(())
3192    }
3193
3194    fn bitcast_float_from_int_32(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done> {
3195        let val = self.state[src].get_u32();
3196        self.state[dst].set_f32(f32::from_bits(val));
3197        ControlFlow::Continue(())
3198    }
3199
3200    fn bitcast_float_from_int_64(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done> {
3201        let val = self.state[src].get_u64();
3202        self.state[dst].set_f64(f64::from_bits(val));
3203        ControlFlow::Continue(())
3204    }
3205
3206    fn feq32(&mut self, dst: XReg, src1: FReg, src2: FReg) -> ControlFlow<Done> {
3207        let a = self.state[src1].get_f32();
3208        let b = self.state[src2].get_f32();
3209        self.state[dst].set_u32(u32::from(a == b));
3210        ControlFlow::Continue(())
3211    }
3212
3213    fn fneq32(&mut self, dst: XReg, src1: FReg, src2: FReg) -> ControlFlow<Done> {
3214        let a = self.state[src1].get_f32();
3215        let b = self.state[src2].get_f32();
3216        self.state[dst].set_u32(u32::from(a != b));
3217        ControlFlow::Continue(())
3218    }
3219
3220    fn flt32(&mut self, dst: XReg, src1: FReg, src2: FReg) -> ControlFlow<Done> {
3221        let a = self.state[src1].get_f32();
3222        let b = self.state[src2].get_f32();
3223        self.state[dst].set_u32(u32::from(a < b));
3224        ControlFlow::Continue(())
3225    }
3226
3227    fn flteq32(&mut self, dst: XReg, src1: FReg, src2: FReg) -> ControlFlow<Done> {
3228        let a = self.state[src1].get_f32();
3229        let b = self.state[src2].get_f32();
3230        self.state[dst].set_u32(u32::from(a <= b));
3231        ControlFlow::Continue(())
3232    }
3233
3234    fn feq64(&mut self, dst: XReg, src1: FReg, src2: FReg) -> ControlFlow<Done> {
3235        let a = self.state[src1].get_f64();
3236        let b = self.state[src2].get_f64();
3237        self.state[dst].set_u32(u32::from(a == b));
3238        ControlFlow::Continue(())
3239    }
3240
3241    fn fneq64(&mut self, dst: XReg, src1: FReg, src2: FReg) -> ControlFlow<Done> {
3242        let a = self.state[src1].get_f64();
3243        let b = self.state[src2].get_f64();
3244        self.state[dst].set_u32(u32::from(a != b));
3245        ControlFlow::Continue(())
3246    }
3247
3248    fn flt64(&mut self, dst: XReg, src1: FReg, src2: FReg) -> ControlFlow<Done> {
3249        let a = self.state[src1].get_f64();
3250        let b = self.state[src2].get_f64();
3251        self.state[dst].set_u32(u32::from(a < b));
3252        ControlFlow::Continue(())
3253    }
3254
3255    fn flteq64(&mut self, dst: XReg, src1: FReg, src2: FReg) -> ControlFlow<Done> {
3256        let a = self.state[src1].get_f64();
3257        let b = self.state[src2].get_f64();
3258        self.state[dst].set_u32(u32::from(a <= b));
3259        ControlFlow::Continue(())
3260    }
3261
3262    fn fselect32(
3263        &mut self,
3264        dst: FReg,
3265        cond: XReg,
3266        if_nonzero: FReg,
3267        if_zero: FReg,
3268    ) -> ControlFlow<Done> {
3269        let result = if self.state[cond].get_u32() != 0 {
3270            self.state[if_nonzero].get_f32()
3271        } else {
3272            self.state[if_zero].get_f32()
3273        };
3274        self.state[dst].set_f32(result);
3275        ControlFlow::Continue(())
3276    }
3277
3278    fn fselect64(
3279        &mut self,
3280        dst: FReg,
3281        cond: XReg,
3282        if_nonzero: FReg,
3283        if_zero: FReg,
3284    ) -> ControlFlow<Done> {
3285        let result = if self.state[cond].get_u32() != 0 {
3286            self.state[if_nonzero].get_f64()
3287        } else {
3288            self.state[if_zero].get_f64()
3289        };
3290        self.state[dst].set_f64(result);
3291        ControlFlow::Continue(())
3292    }
3293
3294    fn f32_from_x32_s(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done> {
3295        let a = self.state[src].get_i32();
3296        self.state[dst].set_f32(a as f32);
3297        ControlFlow::Continue(())
3298    }
3299
3300    fn f32_from_x32_u(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done> {
3301        let a = self.state[src].get_u32();
3302        self.state[dst].set_f32(a as f32);
3303        ControlFlow::Continue(())
3304    }
3305
3306    fn f32_from_x64_s(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done> {
3307        let a = self.state[src].get_i64();
3308        self.state[dst].set_f32(a as f32);
3309        ControlFlow::Continue(())
3310    }
3311
3312    fn f32_from_x64_u(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done> {
3313        let a = self.state[src].get_u64();
3314        self.state[dst].set_f32(a as f32);
3315        ControlFlow::Continue(())
3316    }
3317
3318    fn f64_from_x32_s(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done> {
3319        let a = self.state[src].get_i32();
3320        self.state[dst].set_f64(a as f64);
3321        ControlFlow::Continue(())
3322    }
3323
3324    fn f64_from_x32_u(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done> {
3325        let a = self.state[src].get_u32();
3326        self.state[dst].set_f64(a as f64);
3327        ControlFlow::Continue(())
3328    }
3329
3330    fn f64_from_x64_s(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done> {
3331        let a = self.state[src].get_i64();
3332        self.state[dst].set_f64(a as f64);
3333        ControlFlow::Continue(())
3334    }
3335
3336    fn f64_from_x64_u(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done> {
3337        let a = self.state[src].get_u64();
3338        self.state[dst].set_f64(a as f64);
3339        ControlFlow::Continue(())
3340    }
3341
3342    fn x32_from_f32_s(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3343        let a = self.state[src].get_f32();
3344        self.check_xnn_from_f32::<crate::X32FromF32S>(a, f32_cvt_to_int_bounds(true, 32))?;
3345        self.state[dst].set_i32(a as i32);
3346        ControlFlow::Continue(())
3347    }
3348
3349    fn x32_from_f32_u(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3350        let a = self.state[src].get_f32();
3351        self.check_xnn_from_f32::<crate::X32FromF32U>(a, f32_cvt_to_int_bounds(false, 32))?;
3352        self.state[dst].set_u32(a as u32);
3353        ControlFlow::Continue(())
3354    }
3355
3356    fn x64_from_f32_s(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3357        let a = self.state[src].get_f32();
3358        self.check_xnn_from_f32::<crate::X64FromF32S>(a, f32_cvt_to_int_bounds(true, 64))?;
3359        self.state[dst].set_i64(a as i64);
3360        ControlFlow::Continue(())
3361    }
3362
3363    fn x64_from_f32_u(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3364        let a = self.state[src].get_f32();
3365        self.check_xnn_from_f32::<crate::X64FromF32U>(a, f32_cvt_to_int_bounds(false, 64))?;
3366        self.state[dst].set_u64(a as u64);
3367        ControlFlow::Continue(())
3368    }
3369
3370    fn x32_from_f64_s(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3371        let a = self.state[src].get_f64();
3372        self.check_xnn_from_f64::<crate::X32FromF64S>(a, f64_cvt_to_int_bounds(true, 32))?;
3373        self.state[dst].set_i32(a as i32);
3374        ControlFlow::Continue(())
3375    }
3376
3377    fn x32_from_f64_u(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3378        let a = self.state[src].get_f64();
3379        self.check_xnn_from_f64::<crate::X32FromF64U>(a, f64_cvt_to_int_bounds(false, 32))?;
3380        self.state[dst].set_u32(a as u32);
3381        ControlFlow::Continue(())
3382    }
3383
3384    fn x64_from_f64_s(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3385        let a = self.state[src].get_f64();
3386        self.check_xnn_from_f64::<crate::X64FromF64S>(a, f64_cvt_to_int_bounds(true, 64))?;
3387        self.state[dst].set_i64(a as i64);
3388        ControlFlow::Continue(())
3389    }
3390
3391    fn x64_from_f64_u(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3392        let a = self.state[src].get_f64();
3393        self.check_xnn_from_f64::<crate::X64FromF64U>(a, f64_cvt_to_int_bounds(false, 64))?;
3394        self.state[dst].set_u64(a as u64);
3395        ControlFlow::Continue(())
3396    }
3397
3398    fn x32_from_f32_s_sat(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3399        let a = self.state[src].get_f32();
3400        self.state[dst].set_i32(a as i32);
3401        ControlFlow::Continue(())
3402    }
3403
3404    fn x32_from_f32_u_sat(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3405        let a = self.state[src].get_f32();
3406        self.state[dst].set_u32(a as u32);
3407        ControlFlow::Continue(())
3408    }
3409
3410    fn x64_from_f32_s_sat(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3411        let a = self.state[src].get_f32();
3412        self.state[dst].set_i64(a as i64);
3413        ControlFlow::Continue(())
3414    }
3415
3416    fn x64_from_f32_u_sat(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3417        let a = self.state[src].get_f32();
3418        self.state[dst].set_u64(a as u64);
3419        ControlFlow::Continue(())
3420    }
3421
3422    fn x32_from_f64_s_sat(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3423        let a = self.state[src].get_f64();
3424        self.state[dst].set_i32(a as i32);
3425        ControlFlow::Continue(())
3426    }
3427
3428    fn x32_from_f64_u_sat(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3429        let a = self.state[src].get_f64();
3430        self.state[dst].set_u32(a as u32);
3431        ControlFlow::Continue(())
3432    }
3433
3434    fn x64_from_f64_s_sat(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3435        let a = self.state[src].get_f64();
3436        self.state[dst].set_i64(a as i64);
3437        ControlFlow::Continue(())
3438    }
3439
3440    fn x64_from_f64_u_sat(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3441        let a = self.state[src].get_f64();
3442        self.state[dst].set_u64(a as u64);
3443        ControlFlow::Continue(())
3444    }
3445
3446    fn f32_from_f64(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3447        let a = self.state[src].get_f64();
3448        self.state[dst].set_f32(a as f32);
3449        ControlFlow::Continue(())
3450    }
3451
3452    fn f64_from_f32(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3453        let a = self.state[src].get_f32();
3454        self.state[dst].set_f64(a.into());
3455        ControlFlow::Continue(())
3456    }
3457
3458    fn fcopysign32(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3459        let a = self.state[operands.src1].get_f32();
3460        let b = self.state[operands.src2].get_f32();
3461        self.state[operands.dst].set_f32(a.wasm_copysign(b));
3462        ControlFlow::Continue(())
3463    }
3464
3465    fn fcopysign64(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3466        let a = self.state[operands.src1].get_f64();
3467        let b = self.state[operands.src2].get_f64();
3468        self.state[operands.dst].set_f64(a.wasm_copysign(b));
3469        ControlFlow::Continue(())
3470    }
3471
3472    fn fadd32(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3473        let a = self.state[operands.src1].get_f32();
3474        let b = self.state[operands.src2].get_f32();
3475        self.state[operands.dst].set_f32(a + b);
3476        ControlFlow::Continue(())
3477    }
3478
3479    fn fsub32(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3480        let a = self.state[operands.src1].get_f32();
3481        let b = self.state[operands.src2].get_f32();
3482        self.state[operands.dst].set_f32(a - b);
3483        ControlFlow::Continue(())
3484    }
3485
3486    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3487    fn vsubf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3488        let mut a = self.state[operands.src1].get_f32x4();
3489        let b = self.state[operands.src2].get_f32x4();
3490        for (a, b) in a.iter_mut().zip(b) {
3491            *a = *a - b;
3492        }
3493        self.state[operands.dst].set_f32x4(a);
3494        ControlFlow::Continue(())
3495    }
3496
3497    fn fmul32(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3498        let a = self.state[operands.src1].get_f32();
3499        let b = self.state[operands.src2].get_f32();
3500        self.state[operands.dst].set_f32(a * b);
3501        ControlFlow::Continue(())
3502    }
3503
3504    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3505    fn vmulf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3506        let mut a = self.state[operands.src1].get_f32x4();
3507        let b = self.state[operands.src2].get_f32x4();
3508        for (a, b) in a.iter_mut().zip(b) {
3509            *a = *a * b;
3510        }
3511        self.state[operands.dst].set_f32x4(a);
3512        ControlFlow::Continue(())
3513    }
3514
3515    fn fdiv32(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3516        let a = self.state[operands.src1].get_f32();
3517        let b = self.state[operands.src2].get_f32();
3518        self.state[operands.dst].set_f32(a / b);
3519        ControlFlow::Continue(())
3520    }
3521
3522    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3523    fn vdivf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3524        let a = self.state[operands.src1].get_f32x4();
3525        let b = self.state[operands.src2].get_f32x4();
3526        let mut result = [0.0f32; 4];
3527
3528        for i in 0..4 {
3529            result[i] = a[i] / b[i];
3530        }
3531
3532        self.state[operands.dst].set_f32x4(result);
3533        ControlFlow::Continue(())
3534    }
3535
3536    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3537    fn vdivf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3538        let a = self.state[operands.src1].get_f64x2();
3539        let b = self.state[operands.src2].get_f64x2();
3540        let mut result = [0.0f64; 2];
3541
3542        for i in 0..2 {
3543            result[i] = a[i] / b[i];
3544        }
3545
3546        self.state[operands.dst].set_f64x2(result);
3547        ControlFlow::Continue(())
3548    }
3549
3550    fn fmaximum32(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3551        let a = self.state[operands.src1].get_f32();
3552        let b = self.state[operands.src2].get_f32();
3553        self.state[operands.dst].set_f32(a.wasm_maximum(b));
3554        ControlFlow::Continue(())
3555    }
3556
3557    fn fminimum32(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3558        let a = self.state[operands.src1].get_f32();
3559        let b = self.state[operands.src2].get_f32();
3560        self.state[operands.dst].set_f32(a.wasm_minimum(b));
3561        ControlFlow::Continue(())
3562    }
3563
3564    fn ftrunc32(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3565        let a = self.state[src].get_f32();
3566        self.state[dst].set_f32(a.wasm_trunc());
3567        ControlFlow::Continue(())
3568    }
3569
3570    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3571    fn vtrunc32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3572        let mut a = self.state[src].get_f32x4();
3573        for elem in a.iter_mut() {
3574            *elem = elem.wasm_trunc();
3575        }
3576        self.state[dst].set_f32x4(a);
3577        ControlFlow::Continue(())
3578    }
3579
3580    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3581    fn vtrunc64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3582        let mut a = self.state[src].get_f64x2();
3583        for elem in a.iter_mut() {
3584            *elem = elem.wasm_trunc();
3585        }
3586        self.state[dst].set_f64x2(a);
3587        ControlFlow::Continue(())
3588    }
3589
3590    fn ffloor32(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3591        let a = self.state[src].get_f32();
3592        self.state[dst].set_f32(a.wasm_floor());
3593        ControlFlow::Continue(())
3594    }
3595
3596    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3597    fn vfloor32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3598        let mut a = self.state[src].get_f32x4();
3599        for elem in a.iter_mut() {
3600            *elem = elem.wasm_floor();
3601        }
3602        self.state[dst].set_f32x4(a);
3603        ControlFlow::Continue(())
3604    }
3605
3606    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3607    fn vfloor64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3608        let mut a = self.state[src].get_f64x2();
3609        for elem in a.iter_mut() {
3610            *elem = elem.wasm_floor();
3611        }
3612        self.state[dst].set_f64x2(a);
3613        ControlFlow::Continue(())
3614    }
3615
3616    fn fceil32(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3617        let a = self.state[src].get_f32();
3618        self.state[dst].set_f32(a.wasm_ceil());
3619        ControlFlow::Continue(())
3620    }
3621
3622    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3623    fn vceil32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3624        let mut a = self.state[src].get_f32x4();
3625        for elem in a.iter_mut() {
3626            *elem = elem.wasm_ceil();
3627        }
3628        self.state[dst].set_f32x4(a);
3629
3630        ControlFlow::Continue(())
3631    }
3632
3633    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3634    fn vceil64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3635        let mut a = self.state[src].get_f64x2();
3636        for elem in a.iter_mut() {
3637            *elem = elem.wasm_ceil();
3638        }
3639        self.state[dst].set_f64x2(a);
3640
3641        ControlFlow::Continue(())
3642    }
3643
3644    fn fnearest32(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3645        let a = self.state[src].get_f32();
3646        self.state[dst].set_f32(a.wasm_nearest());
3647        ControlFlow::Continue(())
3648    }
3649
3650    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3651    fn vnearest32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3652        let mut a = self.state[src].get_f32x4();
3653        for elem in a.iter_mut() {
3654            *elem = elem.wasm_nearest();
3655        }
3656        self.state[dst].set_f32x4(a);
3657        ControlFlow::Continue(())
3658    }
3659
3660    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3661    fn vnearest64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3662        let mut a = self.state[src].get_f64x2();
3663        for elem in a.iter_mut() {
3664            *elem = elem.wasm_nearest();
3665        }
3666        self.state[dst].set_f64x2(a);
3667        ControlFlow::Continue(())
3668    }
3669
3670    fn fsqrt32(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3671        let a = self.state[src].get_f32();
3672        self.state[dst].set_f32(a.wasm_sqrt());
3673        ControlFlow::Continue(())
3674    }
3675
3676    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3677    fn vsqrt32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3678        let mut a = self.state[src].get_f32x4();
3679        for elem in a.iter_mut() {
3680            *elem = elem.wasm_sqrt();
3681        }
3682        self.state[dst].set_f32x4(a);
3683        ControlFlow::Continue(())
3684    }
3685
3686    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3687    fn vsqrt64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3688        let mut a = self.state[src].get_f64x2();
3689        for elem in a.iter_mut() {
3690            *elem = elem.wasm_sqrt();
3691        }
3692        self.state[dst].set_f64x2(a);
3693        ControlFlow::Continue(())
3694    }
3695
3696    fn fneg32(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3697        let a = self.state[src].get_f32();
3698        self.state[dst].set_f32(-a);
3699        ControlFlow::Continue(())
3700    }
3701
3702    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3703    fn vnegf32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3704        let mut a = self.state[src].get_f32x4();
3705        for elem in a.iter_mut() {
3706            *elem = -*elem;
3707        }
3708        self.state[dst].set_f32x4(a);
3709        ControlFlow::Continue(())
3710    }
3711
3712    fn fabs32(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3713        let a = self.state[src].get_f32();
3714        self.state[dst].set_f32(a.wasm_abs());
3715        ControlFlow::Continue(())
3716    }
3717
3718    fn fadd64(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3719        let a = self.state[operands.src1].get_f64();
3720        let b = self.state[operands.src2].get_f64();
3721        self.state[operands.dst].set_f64(a + b);
3722        ControlFlow::Continue(())
3723    }
3724
3725    fn fsub64(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3726        let a = self.state[operands.src1].get_f64();
3727        let b = self.state[operands.src2].get_f64();
3728        self.state[operands.dst].set_f64(a - b);
3729        ControlFlow::Continue(())
3730    }
3731
3732    fn fmul64(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3733        let a = self.state[operands.src1].get_f64();
3734        let b = self.state[operands.src2].get_f64();
3735        self.state[operands.dst].set_f64(a * b);
3736        ControlFlow::Continue(())
3737    }
3738
3739    fn fdiv64(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3740        let a = self.state[operands.src1].get_f64();
3741        let b = self.state[operands.src2].get_f64();
3742        self.state[operands.dst].set_f64(a / b);
3743        ControlFlow::Continue(())
3744    }
3745
3746    fn fmaximum64(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3747        let a = self.state[operands.src1].get_f64();
3748        let b = self.state[operands.src2].get_f64();
3749        self.state[operands.dst].set_f64(a.wasm_maximum(b));
3750        ControlFlow::Continue(())
3751    }
3752
3753    fn fminimum64(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3754        let a = self.state[operands.src1].get_f64();
3755        let b = self.state[operands.src2].get_f64();
3756        self.state[operands.dst].set_f64(a.wasm_minimum(b));
3757        ControlFlow::Continue(())
3758    }
3759
3760    fn ftrunc64(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3761        let a = self.state[src].get_f64();
3762        self.state[dst].set_f64(a.wasm_trunc());
3763        ControlFlow::Continue(())
3764    }
3765
3766    fn ffloor64(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3767        let a = self.state[src].get_f64();
3768        self.state[dst].set_f64(a.wasm_floor());
3769        ControlFlow::Continue(())
3770    }
3771
3772    fn fceil64(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3773        let a = self.state[src].get_f64();
3774        self.state[dst].set_f64(a.wasm_ceil());
3775        ControlFlow::Continue(())
3776    }
3777
3778    fn fnearest64(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3779        let a = self.state[src].get_f64();
3780        self.state[dst].set_f64(a.wasm_nearest());
3781        ControlFlow::Continue(())
3782    }
3783
3784    fn fsqrt64(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3785        let a = self.state[src].get_f64();
3786        self.state[dst].set_f64(a.wasm_sqrt());
3787        ControlFlow::Continue(())
3788    }
3789
3790    fn fneg64(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3791        let a = self.state[src].get_f64();
3792        self.state[dst].set_f64(-a);
3793        ControlFlow::Continue(())
3794    }
3795
3796    fn fabs64(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3797        let a = self.state[src].get_f64();
3798        self.state[dst].set_f64(a.wasm_abs());
3799        ControlFlow::Continue(())
3800    }
3801
3802    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3803    fn vaddi8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3804        let mut a = self.state[operands.src1].get_i8x16();
3805        let b = self.state[operands.src2].get_i8x16();
3806        for (a, b) in a.iter_mut().zip(b) {
3807            *a = a.wrapping_add(b);
3808        }
3809        self.state[operands.dst].set_i8x16(a);
3810        ControlFlow::Continue(())
3811    }
3812
3813    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3814    fn vaddi16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3815        let mut a = self.state[operands.src1].get_i16x8();
3816        let b = self.state[operands.src2].get_i16x8();
3817        for (a, b) in a.iter_mut().zip(b) {
3818            *a = a.wrapping_add(b);
3819        }
3820        self.state[operands.dst].set_i16x8(a);
3821        ControlFlow::Continue(())
3822    }
3823
3824    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3825    fn vaddi32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3826        let mut a = self.state[operands.src1].get_i32x4();
3827        let b = self.state[operands.src2].get_i32x4();
3828        for (a, b) in a.iter_mut().zip(b) {
3829            *a = a.wrapping_add(b);
3830        }
3831        self.state[operands.dst].set_i32x4(a);
3832        ControlFlow::Continue(())
3833    }
3834
3835    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3836    fn vaddi64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3837        let mut a = self.state[operands.src1].get_i64x2();
3838        let b = self.state[operands.src2].get_i64x2();
3839        for (a, b) in a.iter_mut().zip(b) {
3840            *a = a.wrapping_add(b);
3841        }
3842        self.state[operands.dst].set_i64x2(a);
3843        ControlFlow::Continue(())
3844    }
3845
3846    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3847    fn vaddf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3848        let mut a = self.state[operands.src1].get_f32x4();
3849        let b = self.state[operands.src2].get_f32x4();
3850        for (a, b) in a.iter_mut().zip(b) {
3851            *a += b;
3852        }
3853        self.state[operands.dst].set_f32x4(a);
3854        ControlFlow::Continue(())
3855    }
3856
3857    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3858    fn vaddf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3859        let mut a = self.state[operands.src1].get_f64x2();
3860        let b = self.state[operands.src2].get_f64x2();
3861        for (a, b) in a.iter_mut().zip(b) {
3862            *a += b;
3863        }
3864        self.state[operands.dst].set_f64x2(a);
3865        ControlFlow::Continue(())
3866    }
3867
3868    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3869    fn vaddi8x16_sat(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3870        let mut a = self.state[operands.src1].get_i8x16();
3871        let b = self.state[operands.src2].get_i8x16();
3872        for (a, b) in a.iter_mut().zip(b) {
3873            *a = (*a).saturating_add(b);
3874        }
3875        self.state[operands.dst].set_i8x16(a);
3876        ControlFlow::Continue(())
3877    }
3878
3879    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3880    fn vaddu8x16_sat(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3881        let mut a = self.state[operands.src1].get_u8x16();
3882        let b = self.state[operands.src2].get_u8x16();
3883        for (a, b) in a.iter_mut().zip(b) {
3884            *a = (*a).saturating_add(b);
3885        }
3886        self.state[operands.dst].set_u8x16(a);
3887        ControlFlow::Continue(())
3888    }
3889
3890    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3891    fn vaddi16x8_sat(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3892        let mut a = self.state[operands.src1].get_i16x8();
3893        let b = self.state[operands.src2].get_i16x8();
3894        for (a, b) in a.iter_mut().zip(b) {
3895            *a = (*a).saturating_add(b);
3896        }
3897        self.state[operands.dst].set_i16x8(a);
3898        ControlFlow::Continue(())
3899    }
3900
3901    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3902    fn vaddu16x8_sat(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3903        let mut a = self.state[operands.src1].get_u16x8();
3904        let b = self.state[operands.src2].get_u16x8();
3905        for (a, b) in a.iter_mut().zip(b) {
3906            *a = (*a).saturating_add(b);
3907        }
3908        self.state[operands.dst].set_u16x8(a);
3909        ControlFlow::Continue(())
3910    }
3911
3912    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3913    fn vaddpairwisei16x8_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3914        let a = self.state[operands.src1].get_i16x8();
3915        let b = self.state[operands.src2].get_i16x8();
3916        let mut result = [0i16; 8];
3917        let half = result.len() / 2;
3918        for i in 0..half {
3919            result[i] = a[2 * i].wrapping_add(a[2 * i + 1]);
3920            result[i + half] = b[2 * i].wrapping_add(b[2 * i + 1]);
3921        }
3922        self.state[operands.dst].set_i16x8(result);
3923        ControlFlow::Continue(())
3924    }
3925
3926    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3927    fn vaddpairwisei32x4_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3928        let a = self.state[operands.src1].get_i32x4();
3929        let b = self.state[operands.src2].get_i32x4();
3930        let mut result = [0i32; 4];
3931        result[0] = a[0].wrapping_add(a[1]);
3932        result[1] = a[2].wrapping_add(a[3]);
3933        result[2] = b[0].wrapping_add(b[1]);
3934        result[3] = b[2].wrapping_add(b[3]);
3935        self.state[operands.dst].set_i32x4(result);
3936        ControlFlow::Continue(())
3937    }
3938
3939    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3940    fn vshli8x16(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
3941        let a = self.state[operands.src1].get_i8x16();
3942        let b = self.state[operands.src2].get_u32();
3943        self.state[operands.dst].set_i8x16(a.map(|a| a.wrapping_shl(b)));
3944        ControlFlow::Continue(())
3945    }
3946
3947    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3948    fn vshli16x8(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
3949        let a = self.state[operands.src1].get_i16x8();
3950        let b = self.state[operands.src2].get_u32();
3951        self.state[operands.dst].set_i16x8(a.map(|a| a.wrapping_shl(b)));
3952        ControlFlow::Continue(())
3953    }
3954
3955    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3956    fn vshli32x4(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
3957        let a = self.state[operands.src1].get_i32x4();
3958        let b = self.state[operands.src2].get_u32();
3959        self.state[operands.dst].set_i32x4(a.map(|a| a.wrapping_shl(b)));
3960        ControlFlow::Continue(())
3961    }
3962
3963    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3964    fn vshli64x2(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
3965        let a = self.state[operands.src1].get_i64x2();
3966        let b = self.state[operands.src2].get_u32();
3967        self.state[operands.dst].set_i64x2(a.map(|a| a.wrapping_shl(b)));
3968        ControlFlow::Continue(())
3969    }
3970
3971    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3972    fn vshri8x16_s(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
3973        let a = self.state[operands.src1].get_i8x16();
3974        let b = self.state[operands.src2].get_u32();
3975        self.state[operands.dst].set_i8x16(a.map(|a| a.wrapping_shr(b)));
3976        ControlFlow::Continue(())
3977    }
3978
3979    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3980    fn vshri16x8_s(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
3981        let a = self.state[operands.src1].get_i16x8();
3982        let b = self.state[operands.src2].get_u32();
3983        self.state[operands.dst].set_i16x8(a.map(|a| a.wrapping_shr(b)));
3984        ControlFlow::Continue(())
3985    }
3986
3987    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3988    fn vshri32x4_s(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
3989        let a = self.state[operands.src1].get_i32x4();
3990        let b = self.state[operands.src2].get_u32();
3991        self.state[operands.dst].set_i32x4(a.map(|a| a.wrapping_shr(b)));
3992        ControlFlow::Continue(())
3993    }
3994
3995    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
3996    fn vshri64x2_s(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
3997        let a = self.state[operands.src1].get_i64x2();
3998        let b = self.state[operands.src2].get_u32();
3999        self.state[operands.dst].set_i64x2(a.map(|a| a.wrapping_shr(b)));
4000        ControlFlow::Continue(())
4001    }
4002
4003    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4004    fn vshri8x16_u(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
4005        let a = self.state[operands.src1].get_u8x16();
4006        let b = self.state[operands.src2].get_u32();
4007        self.state[operands.dst].set_u8x16(a.map(|a| a.wrapping_shr(b)));
4008        ControlFlow::Continue(())
4009    }
4010
4011    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4012    fn vshri16x8_u(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
4013        let a = self.state[operands.src1].get_u16x8();
4014        let b = self.state[operands.src2].get_u32();
4015        self.state[operands.dst].set_u16x8(a.map(|a| a.wrapping_shr(b)));
4016        ControlFlow::Continue(())
4017    }
4018
4019    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4020    fn vshri32x4_u(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
4021        let a = self.state[operands.src1].get_u32x4();
4022        let b = self.state[operands.src2].get_u32();
4023        self.state[operands.dst].set_u32x4(a.map(|a| a.wrapping_shr(b)));
4024        ControlFlow::Continue(())
4025    }
4026
4027    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4028    fn vshri64x2_u(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
4029        let a = self.state[operands.src1].get_u64x2();
4030        let b = self.state[operands.src2].get_u32();
4031        self.state[operands.dst].set_u64x2(a.map(|a| a.wrapping_shr(b)));
4032        ControlFlow::Continue(())
4033    }
4034
4035    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4036    fn vconst128(&mut self, dst: VReg, val: u128) -> ControlFlow<Done> {
4037        self.state[dst].set_u128(val);
4038        ControlFlow::Continue(())
4039    }
4040
4041    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4042    fn vsplatx8(&mut self, dst: VReg, src: XReg) -> ControlFlow<Done> {
4043        let val = self.state[src].get_u32() as u8;
4044        self.state[dst].set_u8x16([val; 16]);
4045        ControlFlow::Continue(())
4046    }
4047
4048    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4049    fn vsplatx16(&mut self, dst: VReg, src: XReg) -> ControlFlow<Done> {
4050        let val = self.state[src].get_u32() as u16;
4051        self.state[dst].set_u16x8([val; 8]);
4052        ControlFlow::Continue(())
4053    }
4054
4055    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4056    fn vsplatx32(&mut self, dst: VReg, src: XReg) -> ControlFlow<Done> {
4057        let val = self.state[src].get_u32();
4058        self.state[dst].set_u32x4([val; 4]);
4059        ControlFlow::Continue(())
4060    }
4061
4062    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4063    fn vsplatx64(&mut self, dst: VReg, src: XReg) -> ControlFlow<Done> {
4064        let val = self.state[src].get_u64();
4065        self.state[dst].set_u64x2([val; 2]);
4066        ControlFlow::Continue(())
4067    }
4068
4069    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4070    fn vsplatf32(&mut self, dst: VReg, src: FReg) -> ControlFlow<Done> {
4071        let val = self.state[src].get_f32();
4072        self.state[dst].set_f32x4([val; 4]);
4073        ControlFlow::Continue(())
4074    }
4075
4076    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4077    fn vsplatf64(&mut self, dst: VReg, src: FReg) -> ControlFlow<Done> {
4078        let val = self.state[src].get_f64();
4079        self.state[dst].set_f64x2([val; 2]);
4080        ControlFlow::Continue(())
4081    }
4082
4083    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4084    fn vload8x8_s_z(&mut self, dst: VReg, addr: AddrZ) -> ControlFlow<Done> {
4085        let val = unsafe { self.load_ne::<[i8; 8], crate::VLoad8x8SZ>(addr)? };
4086        self.state[dst].set_i16x8(val.map(|i| i.into()));
4087        ControlFlow::Continue(())
4088    }
4089
4090    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4091    fn vload8x8_u_z(&mut self, dst: VReg, addr: AddrZ) -> ControlFlow<Done> {
4092        let val = unsafe { self.load_ne::<[u8; 8], crate::VLoad8x8UZ>(addr)? };
4093        self.state[dst].set_u16x8(val.map(|i| i.into()));
4094        ControlFlow::Continue(())
4095    }
4096
4097    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4098    fn vload16x4le_s_z(&mut self, dst: VReg, addr: AddrZ) -> ControlFlow<Done> {
4099        let val = unsafe { self.load_ne::<[i16; 4], crate::VLoad16x4LeSZ>(addr)? };
4100        self.state[dst].set_i32x4(val.map(|i| i16::from_le(i).into()));
4101        ControlFlow::Continue(())
4102    }
4103
4104    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4105    fn vload16x4le_u_z(&mut self, dst: VReg, addr: AddrZ) -> ControlFlow<Done> {
4106        let val = unsafe { self.load_ne::<[u16; 4], crate::VLoad16x4LeUZ>(addr)? };
4107        self.state[dst].set_u32x4(val.map(|i| u16::from_le(i).into()));
4108        ControlFlow::Continue(())
4109    }
4110
4111    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4112    fn vload32x2le_s_z(&mut self, dst: VReg, addr: AddrZ) -> ControlFlow<Done> {
4113        let val = unsafe { self.load_ne::<[i32; 2], crate::VLoad32x2LeSZ>(addr)? };
4114        self.state[dst].set_i64x2(val.map(|i| i32::from_le(i).into()));
4115        ControlFlow::Continue(())
4116    }
4117
4118    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4119    fn vload32x2le_u_z(&mut self, dst: VReg, addr: AddrZ) -> ControlFlow<Done> {
4120        let val = unsafe { self.load_ne::<[u32; 2], crate::VLoad32x2LeUZ>(addr)? };
4121        self.state[dst].set_u64x2(val.map(|i| u32::from_le(i).into()));
4122        ControlFlow::Continue(())
4123    }
4124
4125    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4126    fn vband128(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4127        let a = self.state[operands.src1].get_u128();
4128        let b = self.state[operands.src2].get_u128();
4129        self.state[operands.dst].set_u128(a & b);
4130        ControlFlow::Continue(())
4131    }
4132
4133    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4134    fn vbor128(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4135        let a = self.state[operands.src1].get_u128();
4136        let b = self.state[operands.src2].get_u128();
4137        self.state[operands.dst].set_u128(a | b);
4138        ControlFlow::Continue(())
4139    }
4140
4141    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4142    fn vbxor128(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4143        let a = self.state[operands.src1].get_u128();
4144        let b = self.state[operands.src2].get_u128();
4145        self.state[operands.dst].set_u128(a ^ b);
4146        ControlFlow::Continue(())
4147    }
4148
4149    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4150    fn vbnot128(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4151        let a = self.state[src].get_u128();
4152        self.state[dst].set_u128(!a);
4153        ControlFlow::Continue(())
4154    }
4155
4156    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4157    fn vbitselect128(&mut self, dst: VReg, c: VReg, x: VReg, y: VReg) -> ControlFlow<Done> {
4158        let c = self.state[c].get_u128();
4159        let x = self.state[x].get_u128();
4160        let y = self.state[y].get_u128();
4161        self.state[dst].set_u128((c & x) | (!c & y));
4162        ControlFlow::Continue(())
4163    }
4164
4165    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4166    fn vbitmask8x16(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4167        let a = self.state[src].get_u8x16();
4168        let mut result = 0;
4169        for item in a.iter().rev() {
4170            result <<= 1;
4171            result |= (*item >> 7) as u32;
4172        }
4173        self.state[dst].set_u32(result);
4174        ControlFlow::Continue(())
4175    }
4176
4177    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4178    fn vbitmask16x8(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4179        let a = self.state[src].get_u16x8();
4180        let mut result = 0;
4181        for item in a.iter().rev() {
4182            result <<= 1;
4183            result |= (*item >> 15) as u32;
4184        }
4185        self.state[dst].set_u32(result);
4186        ControlFlow::Continue(())
4187    }
4188
4189    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4190    fn vbitmask32x4(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4191        let a = self.state[src].get_u32x4();
4192        let mut result = 0;
4193        for item in a.iter().rev() {
4194            result <<= 1;
4195            result |= *item >> 31;
4196        }
4197        self.state[dst].set_u32(result);
4198        ControlFlow::Continue(())
4199    }
4200
4201    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4202    fn vbitmask64x2(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4203        let a = self.state[src].get_u64x2();
4204        let mut result = 0;
4205        for item in a.iter().rev() {
4206            result <<= 1;
4207            result |= (*item >> 63) as u32;
4208        }
4209        self.state[dst].set_u32(result);
4210        ControlFlow::Continue(())
4211    }
4212
4213    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4214    fn valltrue8x16(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4215        let a = self.state[src].get_u8x16();
4216        let result = a.iter().all(|a| *a != 0);
4217        self.state[dst].set_u32(u32::from(result));
4218        ControlFlow::Continue(())
4219    }
4220
4221    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4222    fn valltrue16x8(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4223        let a = self.state[src].get_u16x8();
4224        let result = a.iter().all(|a| *a != 0);
4225        self.state[dst].set_u32(u32::from(result));
4226        ControlFlow::Continue(())
4227    }
4228
4229    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4230    fn valltrue32x4(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4231        let a = self.state[src].get_u32x4();
4232        let result = a.iter().all(|a| *a != 0);
4233        self.state[dst].set_u32(u32::from(result));
4234        ControlFlow::Continue(())
4235    }
4236
4237    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4238    fn valltrue64x2(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4239        let a = self.state[src].get_u64x2();
4240        let result = a.iter().all(|a| *a != 0);
4241        self.state[dst].set_u32(u32::from(result));
4242        ControlFlow::Continue(())
4243    }
4244
4245    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4246    fn vanytrue8x16(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4247        let a = self.state[src].get_u8x16();
4248        let result = a.iter().any(|a| *a != 0);
4249        self.state[dst].set_u32(u32::from(result));
4250        ControlFlow::Continue(())
4251    }
4252
4253    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4254    fn vanytrue16x8(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4255        let a = self.state[src].get_u16x8();
4256        let result = a.iter().any(|a| *a != 0);
4257        self.state[dst].set_u32(u32::from(result));
4258        ControlFlow::Continue(())
4259    }
4260
4261    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4262    fn vanytrue32x4(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4263        let a = self.state[src].get_u32x4();
4264        let result = a.iter().any(|a| *a != 0);
4265        self.state[dst].set_u32(u32::from(result));
4266        ControlFlow::Continue(())
4267    }
4268
4269    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4270    fn vanytrue64x2(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4271        let a = self.state[src].get_u64x2();
4272        let result = a.iter().any(|a| *a != 0);
4273        self.state[dst].set_u32(u32::from(result));
4274        ControlFlow::Continue(())
4275    }
4276
4277    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4278    fn vf32x4_from_i32x4_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4279        let a = self.state[src].get_i32x4();
4280        self.state[dst].set_f32x4(a.map(|i| i as f32));
4281        ControlFlow::Continue(())
4282    }
4283
4284    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4285    fn vf32x4_from_i32x4_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4286        let a = self.state[src].get_u32x4();
4287        self.state[dst].set_f32x4(a.map(|i| i as f32));
4288        ControlFlow::Continue(())
4289    }
4290
4291    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4292    fn vf64x2_from_i64x2_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4293        let a = self.state[src].get_i64x2();
4294        self.state[dst].set_f64x2(a.map(|i| i as f64));
4295        ControlFlow::Continue(())
4296    }
4297
4298    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4299    fn vf64x2_from_i64x2_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4300        let a = self.state[src].get_u64x2();
4301        self.state[dst].set_f64x2(a.map(|i| i as f64));
4302        ControlFlow::Continue(())
4303    }
4304
4305    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4306    fn vi32x4_from_f32x4_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4307        let a = self.state[src].get_f32x4();
4308        self.state[dst].set_i32x4(a.map(|f| f as i32));
4309        ControlFlow::Continue(())
4310    }
4311
4312    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4313    fn vi32x4_from_f32x4_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4314        let a = self.state[src].get_f32x4();
4315        self.state[dst].set_u32x4(a.map(|f| f as u32));
4316        ControlFlow::Continue(())
4317    }
4318
4319    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4320    fn vi64x2_from_f64x2_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4321        let a = self.state[src].get_f64x2();
4322        self.state[dst].set_i64x2(a.map(|f| f as i64));
4323        ControlFlow::Continue(())
4324    }
4325
4326    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4327    fn vi64x2_from_f64x2_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4328        let a = self.state[src].get_f64x2();
4329        self.state[dst].set_u64x2(a.map(|f| f as u64));
4330        ControlFlow::Continue(())
4331    }
4332
4333    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4334    fn vwidenlow8x16_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4335        let a = *self.state[src].get_i8x16().first_chunk().unwrap();
4336        self.state[dst].set_i16x8(a.map(|i| i.into()));
4337        ControlFlow::Continue(())
4338    }
4339
4340    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4341    fn vwidenlow8x16_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4342        let a = *self.state[src].get_u8x16().first_chunk().unwrap();
4343        self.state[dst].set_u16x8(a.map(|i| i.into()));
4344        ControlFlow::Continue(())
4345    }
4346
4347    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4348    fn vwidenlow16x8_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4349        let a = *self.state[src].get_i16x8().first_chunk().unwrap();
4350        self.state[dst].set_i32x4(a.map(|i| i.into()));
4351        ControlFlow::Continue(())
4352    }
4353
4354    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4355    fn vwidenlow16x8_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4356        let a = *self.state[src].get_u16x8().first_chunk().unwrap();
4357        self.state[dst].set_u32x4(a.map(|i| i.into()));
4358        ControlFlow::Continue(())
4359    }
4360
4361    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4362    fn vwidenlow32x4_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4363        let a = *self.state[src].get_i32x4().first_chunk().unwrap();
4364        self.state[dst].set_i64x2(a.map(|i| i.into()));
4365        ControlFlow::Continue(())
4366    }
4367
4368    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4369    fn vwidenlow32x4_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4370        let a = *self.state[src].get_u32x4().first_chunk().unwrap();
4371        self.state[dst].set_u64x2(a.map(|i| i.into()));
4372        ControlFlow::Continue(())
4373    }
4374
4375    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4376    fn vwidenhigh8x16_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4377        let a = *self.state[src].get_i8x16().last_chunk().unwrap();
4378        self.state[dst].set_i16x8(a.map(|i| i.into()));
4379        ControlFlow::Continue(())
4380    }
4381
4382    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4383    fn vwidenhigh8x16_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4384        let a = *self.state[src].get_u8x16().last_chunk().unwrap();
4385        self.state[dst].set_u16x8(a.map(|i| i.into()));
4386        ControlFlow::Continue(())
4387    }
4388
4389    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4390    fn vwidenhigh16x8_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4391        let a = *self.state[src].get_i16x8().last_chunk().unwrap();
4392        self.state[dst].set_i32x4(a.map(|i| i.into()));
4393        ControlFlow::Continue(())
4394    }
4395
4396    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4397    fn vwidenhigh16x8_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4398        let a = *self.state[src].get_u16x8().last_chunk().unwrap();
4399        self.state[dst].set_u32x4(a.map(|i| i.into()));
4400        ControlFlow::Continue(())
4401    }
4402
4403    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4404    fn vwidenhigh32x4_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4405        let a = *self.state[src].get_i32x4().last_chunk().unwrap();
4406        self.state[dst].set_i64x2(a.map(|i| i.into()));
4407        ControlFlow::Continue(())
4408    }
4409
4410    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4411    fn vwidenhigh32x4_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4412        let a = *self.state[src].get_u32x4().last_chunk().unwrap();
4413        self.state[dst].set_u64x2(a.map(|i| i.into()));
4414        ControlFlow::Continue(())
4415    }
4416
4417    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4418    fn vnarrow16x8_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4419        let a = self.state[operands.src1].get_i16x8();
4420        let b = self.state[operands.src2].get_i16x8();
4421        let mut result = [0; 16];
4422        for (i, d) in a.iter().chain(&b).zip(&mut result) {
4423            *d = (*i)
4424                .try_into()
4425                .unwrap_or(if *i < 0 { i8::MIN } else { i8::MAX });
4426        }
4427        self.state[operands.dst].set_i8x16(result);
4428        ControlFlow::Continue(())
4429    }
4430
4431    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4432    fn vnarrow16x8_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4433        let a = self.state[operands.src1].get_i16x8();
4434        let b = self.state[operands.src2].get_i16x8();
4435        let mut result = [0; 16];
4436        for (i, d) in a.iter().chain(&b).zip(&mut result) {
4437            *d = (*i)
4438                .try_into()
4439                .unwrap_or(if *i < 0 { u8::MIN } else { u8::MAX });
4440        }
4441        self.state[operands.dst].set_u8x16(result);
4442        ControlFlow::Continue(())
4443    }
4444
4445    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4446    fn vnarrow32x4_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4447        let a = self.state[operands.src1].get_i32x4();
4448        let b = self.state[operands.src2].get_i32x4();
4449        let mut result = [0; 8];
4450        for (i, d) in a.iter().chain(&b).zip(&mut result) {
4451            *d = (*i)
4452                .try_into()
4453                .unwrap_or(if *i < 0 { i16::MIN } else { i16::MAX });
4454        }
4455        self.state[operands.dst].set_i16x8(result);
4456        ControlFlow::Continue(())
4457    }
4458
4459    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4460    fn vnarrow32x4_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4461        let a = self.state[operands.src1].get_i32x4();
4462        let b = self.state[operands.src2].get_i32x4();
4463        let mut result = [0; 8];
4464        for (i, d) in a.iter().chain(&b).zip(&mut result) {
4465            *d = (*i)
4466                .try_into()
4467                .unwrap_or(if *i < 0 { u16::MIN } else { u16::MAX });
4468        }
4469        self.state[operands.dst].set_u16x8(result);
4470        ControlFlow::Continue(())
4471    }
4472
4473    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4474    fn vnarrow64x2_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4475        let a = self.state[operands.src1].get_i64x2();
4476        let b = self.state[operands.src2].get_i64x2();
4477        let mut result = [0; 4];
4478        for (i, d) in a.iter().chain(&b).zip(&mut result) {
4479            *d = (*i)
4480                .try_into()
4481                .unwrap_or(if *i < 0 { i32::MIN } else { i32::MAX });
4482        }
4483        self.state[operands.dst].set_i32x4(result);
4484        ControlFlow::Continue(())
4485    }
4486
4487    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4488    fn vnarrow64x2_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4489        let a = self.state[operands.src1].get_i64x2();
4490        let b = self.state[operands.src2].get_i64x2();
4491        let mut result = [0; 4];
4492        for (i, d) in a.iter().chain(&b).zip(&mut result) {
4493            *d = (*i)
4494                .try_into()
4495                .unwrap_or(if *i < 0 { u32::MIN } else { u32::MAX });
4496        }
4497        self.state[operands.dst].set_u32x4(result);
4498        ControlFlow::Continue(())
4499    }
4500
4501    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4502    fn vunarrow64x2_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4503        let a = self.state[operands.src1].get_u64x2();
4504        let b = self.state[operands.src2].get_u64x2();
4505        let mut result = [0; 4];
4506        for (i, d) in a.iter().chain(&b).zip(&mut result) {
4507            *d = (*i).try_into().unwrap_or(u32::MAX);
4508        }
4509        self.state[operands.dst].set_u32x4(result);
4510        ControlFlow::Continue(())
4511    }
4512
4513    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4514    fn vfpromotelow(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4515        let a = self.state[src].get_f32x4();
4516        self.state[dst].set_f64x2([a[0].into(), a[1].into()]);
4517        ControlFlow::Continue(())
4518    }
4519
4520    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4521    fn vfdemote(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4522        let a = self.state[src].get_f64x2();
4523        self.state[dst].set_f32x4([a[0] as f32, a[1] as f32, 0.0, 0.0]);
4524        ControlFlow::Continue(())
4525    }
4526
4527    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4528    fn vsubi8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4529        let mut a = self.state[operands.src1].get_i8x16();
4530        let b = self.state[operands.src2].get_i8x16();
4531        for (a, b) in a.iter_mut().zip(b) {
4532            *a = a.wrapping_sub(b);
4533        }
4534        self.state[operands.dst].set_i8x16(a);
4535        ControlFlow::Continue(())
4536    }
4537
4538    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4539    fn vsubi16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4540        let mut a = self.state[operands.src1].get_i16x8();
4541        let b = self.state[operands.src2].get_i16x8();
4542        for (a, b) in a.iter_mut().zip(b) {
4543            *a = a.wrapping_sub(b);
4544        }
4545        self.state[operands.dst].set_i16x8(a);
4546        ControlFlow::Continue(())
4547    }
4548
4549    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4550    fn vsubi32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4551        let mut a = self.state[operands.src1].get_i32x4();
4552        let b = self.state[operands.src2].get_i32x4();
4553        for (a, b) in a.iter_mut().zip(b) {
4554            *a = a.wrapping_sub(b);
4555        }
4556        self.state[operands.dst].set_i32x4(a);
4557        ControlFlow::Continue(())
4558    }
4559
4560    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4561    fn vsubi64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4562        let mut a = self.state[operands.src1].get_i64x2();
4563        let b = self.state[operands.src2].get_i64x2();
4564        for (a, b) in a.iter_mut().zip(b) {
4565            *a = a.wrapping_sub(b);
4566        }
4567        self.state[operands.dst].set_i64x2(a);
4568        ControlFlow::Continue(())
4569    }
4570
4571    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4572    fn vsubi8x16_sat(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4573        let mut a = self.state[operands.src1].get_i8x16();
4574        let b = self.state[operands.src2].get_i8x16();
4575        for (a, b) in a.iter_mut().zip(b) {
4576            *a = a.saturating_sub(b);
4577        }
4578        self.state[operands.dst].set_i8x16(a);
4579        ControlFlow::Continue(())
4580    }
4581
4582    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4583    fn vsubu8x16_sat(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4584        let mut a = self.state[operands.src1].get_u8x16();
4585        let b = self.state[operands.src2].get_u8x16();
4586        for (a, b) in a.iter_mut().zip(b) {
4587            *a = a.saturating_sub(b);
4588        }
4589        self.state[operands.dst].set_u8x16(a);
4590        ControlFlow::Continue(())
4591    }
4592
4593    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4594    fn vsubi16x8_sat(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4595        let mut a = self.state[operands.src1].get_i16x8();
4596        let b = self.state[operands.src2].get_i16x8();
4597        for (a, b) in a.iter_mut().zip(b) {
4598            *a = a.saturating_sub(b);
4599        }
4600        self.state[operands.dst].set_i16x8(a);
4601        ControlFlow::Continue(())
4602    }
4603
4604    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4605    fn vsubu16x8_sat(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4606        let mut a = self.state[operands.src1].get_u16x8();
4607        let b = self.state[operands.src2].get_u16x8();
4608        for (a, b) in a.iter_mut().zip(b) {
4609            *a = a.saturating_sub(b);
4610        }
4611        self.state[operands.dst].set_u16x8(a);
4612        ControlFlow::Continue(())
4613    }
4614
4615    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4616    fn vsubf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4617        let mut a = self.state[operands.src1].get_f64x2();
4618        let b = self.state[operands.src2].get_f64x2();
4619        for (a, b) in a.iter_mut().zip(b) {
4620            *a = *a - b;
4621        }
4622        self.state[operands.dst].set_f64x2(a);
4623        ControlFlow::Continue(())
4624    }
4625
4626    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4627    fn vmuli8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4628        let mut a = self.state[operands.src1].get_i8x16();
4629        let b = self.state[operands.src2].get_i8x16();
4630        for (a, b) in a.iter_mut().zip(b) {
4631            *a = a.wrapping_mul(b);
4632        }
4633        self.state[operands.dst].set_i8x16(a);
4634        ControlFlow::Continue(())
4635    }
4636
4637    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4638    fn vmuli16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4639        let mut a = self.state[operands.src1].get_i16x8();
4640        let b = self.state[operands.src2].get_i16x8();
4641        for (a, b) in a.iter_mut().zip(b) {
4642            *a = a.wrapping_mul(b);
4643        }
4644        self.state[operands.dst].set_i16x8(a);
4645        ControlFlow::Continue(())
4646    }
4647
4648    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4649    fn vmuli32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4650        let mut a = self.state[operands.src1].get_i32x4();
4651        let b = self.state[operands.src2].get_i32x4();
4652        for (a, b) in a.iter_mut().zip(b) {
4653            *a = a.wrapping_mul(b);
4654        }
4655        self.state[operands.dst].set_i32x4(a);
4656        ControlFlow::Continue(())
4657    }
4658
4659    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4660    fn vmuli64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4661        let mut a = self.state[operands.src1].get_i64x2();
4662        let b = self.state[operands.src2].get_i64x2();
4663        for (a, b) in a.iter_mut().zip(b) {
4664            *a = a.wrapping_mul(b);
4665        }
4666        self.state[operands.dst].set_i64x2(a);
4667        ControlFlow::Continue(())
4668    }
4669
4670    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4671    fn vmulf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4672        let mut a = self.state[operands.src1].get_f64x2();
4673        let b = self.state[operands.src2].get_f64x2();
4674        for (a, b) in a.iter_mut().zip(b) {
4675            *a = *a * b;
4676        }
4677        self.state[operands.dst].set_f64x2(a);
4678        ControlFlow::Continue(())
4679    }
4680
4681    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4682    fn vqmulrsi16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4683        let mut a = self.state[operands.src1].get_i16x8();
4684        let b = self.state[operands.src2].get_i16x8();
4685        const MIN: i32 = i16::MIN as i32;
4686        const MAX: i32 = i16::MAX as i32;
4687        for (a, b) in a.iter_mut().zip(b) {
4688            let r = (i32::from(*a) * i32::from(b) + (1 << 14)) >> 15;
4689            *a = r.clamp(MIN, MAX) as i16;
4690        }
4691        self.state[operands.dst].set_i16x8(a);
4692        ControlFlow::Continue(())
4693    }
4694
4695    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4696    fn vpopcnt8x16(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4697        let a = self.state[src].get_u8x16();
4698        self.state[dst].set_u8x16(a.map(|i| i.count_ones() as u8));
4699        ControlFlow::Continue(())
4700    }
4701
4702    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4703    fn xextractv8x16(&mut self, dst: XReg, src: VReg, lane: u8) -> ControlFlow<Done> {
4704        let a = unsafe { *self.state[src].get_u8x16().get_unchecked(usize::from(lane)) };
4705        self.state[dst].set_u32(u32::from(a));
4706        ControlFlow::Continue(())
4707    }
4708
4709    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4710    fn xextractv16x8(&mut self, dst: XReg, src: VReg, lane: u8) -> ControlFlow<Done> {
4711        let a = unsafe { *self.state[src].get_u16x8().get_unchecked(usize::from(lane)) };
4712        self.state[dst].set_u32(u32::from(a));
4713        ControlFlow::Continue(())
4714    }
4715
4716    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4717    fn xextractv32x4(&mut self, dst: XReg, src: VReg, lane: u8) -> ControlFlow<Done> {
4718        let a = unsafe { *self.state[src].get_u32x4().get_unchecked(usize::from(lane)) };
4719        self.state[dst].set_u32(a);
4720        ControlFlow::Continue(())
4721    }
4722
4723    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4724    fn xextractv64x2(&mut self, dst: XReg, src: VReg, lane: u8) -> ControlFlow<Done> {
4725        let a = unsafe { *self.state[src].get_u64x2().get_unchecked(usize::from(lane)) };
4726        self.state[dst].set_u64(a);
4727        ControlFlow::Continue(())
4728    }
4729
4730    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4731    fn fextractv32x4(&mut self, dst: FReg, src: VReg, lane: u8) -> ControlFlow<Done> {
4732        let a = unsafe { *self.state[src].get_f32x4().get_unchecked(usize::from(lane)) };
4733        self.state[dst].set_f32(a);
4734        ControlFlow::Continue(())
4735    }
4736
4737    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4738    fn fextractv64x2(&mut self, dst: FReg, src: VReg, lane: u8) -> ControlFlow<Done> {
4739        let a = unsafe { *self.state[src].get_f64x2().get_unchecked(usize::from(lane)) };
4740        self.state[dst].set_f64(a);
4741        ControlFlow::Continue(())
4742    }
4743
4744    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4745    fn vinsertx8(
4746        &mut self,
4747        operands: BinaryOperands<VReg, VReg, XReg>,
4748        lane: u8,
4749    ) -> ControlFlow<Done> {
4750        let mut a = self.state[operands.src1].get_u8x16();
4751        let b = self.state[operands.src2].get_u32() as u8;
4752        unsafe {
4753            *a.get_unchecked_mut(usize::from(lane)) = b;
4754        }
4755        self.state[operands.dst].set_u8x16(a);
4756        ControlFlow::Continue(())
4757    }
4758
4759    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4760    fn vinsertx16(
4761        &mut self,
4762        operands: BinaryOperands<VReg, VReg, XReg>,
4763        lane: u8,
4764    ) -> ControlFlow<Done> {
4765        let mut a = self.state[operands.src1].get_u16x8();
4766        let b = self.state[operands.src2].get_u32() as u16;
4767        unsafe {
4768            *a.get_unchecked_mut(usize::from(lane)) = b;
4769        }
4770        self.state[operands.dst].set_u16x8(a);
4771        ControlFlow::Continue(())
4772    }
4773
4774    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4775    fn vinsertx32(
4776        &mut self,
4777        operands: BinaryOperands<VReg, VReg, XReg>,
4778        lane: u8,
4779    ) -> ControlFlow<Done> {
4780        let mut a = self.state[operands.src1].get_u32x4();
4781        let b = self.state[operands.src2].get_u32();
4782        unsafe {
4783            *a.get_unchecked_mut(usize::from(lane)) = b;
4784        }
4785        self.state[operands.dst].set_u32x4(a);
4786        ControlFlow::Continue(())
4787    }
4788
4789    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4790    fn vinsertx64(
4791        &mut self,
4792        operands: BinaryOperands<VReg, VReg, XReg>,
4793        lane: u8,
4794    ) -> ControlFlow<Done> {
4795        let mut a = self.state[operands.src1].get_u64x2();
4796        let b = self.state[operands.src2].get_u64();
4797        unsafe {
4798            *a.get_unchecked_mut(usize::from(lane)) = b;
4799        }
4800        self.state[operands.dst].set_u64x2(a);
4801        ControlFlow::Continue(())
4802    }
4803
4804    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4805    fn vinsertf32(
4806        &mut self,
4807        operands: BinaryOperands<VReg, VReg, FReg>,
4808        lane: u8,
4809    ) -> ControlFlow<Done> {
4810        let mut a = self.state[operands.src1].get_f32x4();
4811        let b = self.state[operands.src2].get_f32();
4812        unsafe {
4813            *a.get_unchecked_mut(usize::from(lane)) = b;
4814        }
4815        self.state[operands.dst].set_f32x4(a);
4816        ControlFlow::Continue(())
4817    }
4818
4819    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4820    fn vinsertf64(
4821        &mut self,
4822        operands: BinaryOperands<VReg, VReg, FReg>,
4823        lane: u8,
4824    ) -> ControlFlow<Done> {
4825        let mut a = self.state[operands.src1].get_f64x2();
4826        let b = self.state[operands.src2].get_f64();
4827        unsafe {
4828            *a.get_unchecked_mut(usize::from(lane)) = b;
4829        }
4830        self.state[operands.dst].set_f64x2(a);
4831        ControlFlow::Continue(())
4832    }
4833
4834    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4835    fn veq8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4836        let a = self.state[operands.src1].get_u8x16();
4837        let b = self.state[operands.src2].get_u8x16();
4838        let mut c = [0; 16];
4839        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4840            *c = if a == b { u8::MAX } else { 0 };
4841        }
4842        self.state[operands.dst].set_u8x16(c);
4843        ControlFlow::Continue(())
4844    }
4845
4846    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4847    fn vneq8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4848        let a = self.state[operands.src1].get_u8x16();
4849        let b = self.state[operands.src2].get_u8x16();
4850        let mut c = [0; 16];
4851        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4852            *c = if a != b { u8::MAX } else { 0 };
4853        }
4854        self.state[operands.dst].set_u8x16(c);
4855        ControlFlow::Continue(())
4856    }
4857
4858    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4859    fn vslt8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4860        let a = self.state[operands.src1].get_i8x16();
4861        let b = self.state[operands.src2].get_i8x16();
4862        let mut c = [0; 16];
4863        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4864            *c = if a < b { u8::MAX } else { 0 };
4865        }
4866        self.state[operands.dst].set_u8x16(c);
4867        ControlFlow::Continue(())
4868    }
4869
4870    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4871    fn vslteq8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4872        let a = self.state[operands.src1].get_i8x16();
4873        let b = self.state[operands.src2].get_i8x16();
4874        let mut c = [0; 16];
4875        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4876            *c = if a <= b { u8::MAX } else { 0 };
4877        }
4878        self.state[operands.dst].set_u8x16(c);
4879        ControlFlow::Continue(())
4880    }
4881
4882    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4883    fn vult8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4884        let a = self.state[operands.src1].get_u8x16();
4885        let b = self.state[operands.src2].get_u8x16();
4886        let mut c = [0; 16];
4887        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4888            *c = if a < b { u8::MAX } else { 0 };
4889        }
4890        self.state[operands.dst].set_u8x16(c);
4891        ControlFlow::Continue(())
4892    }
4893
4894    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4895    fn vulteq8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4896        let a = self.state[operands.src1].get_u8x16();
4897        let b = self.state[operands.src2].get_u8x16();
4898        let mut c = [0; 16];
4899        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4900            *c = if a <= b { u8::MAX } else { 0 };
4901        }
4902        self.state[operands.dst].set_u8x16(c);
4903        ControlFlow::Continue(())
4904    }
4905
4906    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4907    fn veq16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4908        let a = self.state[operands.src1].get_u16x8();
4909        let b = self.state[operands.src2].get_u16x8();
4910        let mut c = [0; 8];
4911        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4912            *c = if a == b { u16::MAX } else { 0 };
4913        }
4914        self.state[operands.dst].set_u16x8(c);
4915        ControlFlow::Continue(())
4916    }
4917
4918    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4919    fn vneq16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4920        let a = self.state[operands.src1].get_u16x8();
4921        let b = self.state[operands.src2].get_u16x8();
4922        let mut c = [0; 8];
4923        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4924            *c = if a != b { u16::MAX } else { 0 };
4925        }
4926        self.state[operands.dst].set_u16x8(c);
4927        ControlFlow::Continue(())
4928    }
4929
4930    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4931    fn vslt16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4932        let a = self.state[operands.src1].get_i16x8();
4933        let b = self.state[operands.src2].get_i16x8();
4934        let mut c = [0; 8];
4935        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4936            *c = if a < b { u16::MAX } else { 0 };
4937        }
4938        self.state[operands.dst].set_u16x8(c);
4939        ControlFlow::Continue(())
4940    }
4941
4942    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4943    fn vslteq16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4944        let a = self.state[operands.src1].get_i16x8();
4945        let b = self.state[operands.src2].get_i16x8();
4946        let mut c = [0; 8];
4947        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4948            *c = if a <= b { u16::MAX } else { 0 };
4949        }
4950        self.state[operands.dst].set_u16x8(c);
4951        ControlFlow::Continue(())
4952    }
4953
4954    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4955    fn vult16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4956        let a = self.state[operands.src1].get_u16x8();
4957        let b = self.state[operands.src2].get_u16x8();
4958        let mut c = [0; 8];
4959        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4960            *c = if a < b { u16::MAX } else { 0 };
4961        }
4962        self.state[operands.dst].set_u16x8(c);
4963        ControlFlow::Continue(())
4964    }
4965
4966    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4967    fn vulteq16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4968        let a = self.state[operands.src1].get_u16x8();
4969        let b = self.state[operands.src2].get_u16x8();
4970        let mut c = [0; 8];
4971        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4972            *c = if a <= b { u16::MAX } else { 0 };
4973        }
4974        self.state[operands.dst].set_u16x8(c);
4975        ControlFlow::Continue(())
4976    }
4977
4978    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4979    fn veq32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4980        let a = self.state[operands.src1].get_u32x4();
4981        let b = self.state[operands.src2].get_u32x4();
4982        let mut c = [0; 4];
4983        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4984            *c = if a == b { u32::MAX } else { 0 };
4985        }
4986        self.state[operands.dst].set_u32x4(c);
4987        ControlFlow::Continue(())
4988    }
4989
4990    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
4991    fn vneq32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4992        let a = self.state[operands.src1].get_u32x4();
4993        let b = self.state[operands.src2].get_u32x4();
4994        let mut c = [0; 4];
4995        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4996            *c = if a != b { u32::MAX } else { 0 };
4997        }
4998        self.state[operands.dst].set_u32x4(c);
4999        ControlFlow::Continue(())
5000    }
5001
5002    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5003    fn vslt32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5004        let a = self.state[operands.src1].get_i32x4();
5005        let b = self.state[operands.src2].get_i32x4();
5006        let mut c = [0; 4];
5007        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5008            *c = if a < b { u32::MAX } else { 0 };
5009        }
5010        self.state[operands.dst].set_u32x4(c);
5011        ControlFlow::Continue(())
5012    }
5013
5014    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5015    fn vslteq32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5016        let a = self.state[operands.src1].get_i32x4();
5017        let b = self.state[operands.src2].get_i32x4();
5018        let mut c = [0; 4];
5019        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5020            *c = if a <= b { u32::MAX } else { 0 };
5021        }
5022        self.state[operands.dst].set_u32x4(c);
5023        ControlFlow::Continue(())
5024    }
5025
5026    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5027    fn vult32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5028        let a = self.state[operands.src1].get_u32x4();
5029        let b = self.state[operands.src2].get_u32x4();
5030        let mut c = [0; 4];
5031        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5032            *c = if a < b { u32::MAX } else { 0 };
5033        }
5034        self.state[operands.dst].set_u32x4(c);
5035        ControlFlow::Continue(())
5036    }
5037
5038    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5039    fn vulteq32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5040        let a = self.state[operands.src1].get_u32x4();
5041        let b = self.state[operands.src2].get_u32x4();
5042        let mut c = [0; 4];
5043        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5044            *c = if a <= b { u32::MAX } else { 0 };
5045        }
5046        self.state[operands.dst].set_u32x4(c);
5047        ControlFlow::Continue(())
5048    }
5049
5050    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5051    fn veq64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5052        let a = self.state[operands.src1].get_u64x2();
5053        let b = self.state[operands.src2].get_u64x2();
5054        let mut c = [0; 2];
5055        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5056            *c = if a == b { u64::MAX } else { 0 };
5057        }
5058        self.state[operands.dst].set_u64x2(c);
5059        ControlFlow::Continue(())
5060    }
5061
5062    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5063    fn vneq64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5064        let a = self.state[operands.src1].get_u64x2();
5065        let b = self.state[operands.src2].get_u64x2();
5066        let mut c = [0; 2];
5067        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5068            *c = if a != b { u64::MAX } else { 0 };
5069        }
5070        self.state[operands.dst].set_u64x2(c);
5071        ControlFlow::Continue(())
5072    }
5073
5074    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5075    fn vslt64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5076        let a = self.state[operands.src1].get_i64x2();
5077        let b = self.state[operands.src2].get_i64x2();
5078        let mut c = [0; 2];
5079        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5080            *c = if a < b { u64::MAX } else { 0 };
5081        }
5082        self.state[operands.dst].set_u64x2(c);
5083        ControlFlow::Continue(())
5084    }
5085
5086    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5087    fn vslteq64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5088        let a = self.state[operands.src1].get_i64x2();
5089        let b = self.state[operands.src2].get_i64x2();
5090        let mut c = [0; 2];
5091        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5092            *c = if a <= b { u64::MAX } else { 0 };
5093        }
5094        self.state[operands.dst].set_u64x2(c);
5095        ControlFlow::Continue(())
5096    }
5097
5098    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5099    fn vult64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5100        let a = self.state[operands.src1].get_u64x2();
5101        let b = self.state[operands.src2].get_u64x2();
5102        let mut c = [0; 2];
5103        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5104            *c = if a < b { u64::MAX } else { 0 };
5105        }
5106        self.state[operands.dst].set_u64x2(c);
5107        ControlFlow::Continue(())
5108    }
5109
5110    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5111    fn vulteq64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5112        let a = self.state[operands.src1].get_u64x2();
5113        let b = self.state[operands.src2].get_u64x2();
5114        let mut c = [0; 2];
5115        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5116            *c = if a <= b { u64::MAX } else { 0 };
5117        }
5118        self.state[operands.dst].set_u64x2(c);
5119        ControlFlow::Continue(())
5120    }
5121
5122    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5123    fn vneg8x16(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
5124        let a = self.state[src].get_i8x16();
5125        self.state[dst].set_i8x16(a.map(|i| i.wrapping_neg()));
5126        ControlFlow::Continue(())
5127    }
5128
5129    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5130    fn vneg16x8(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
5131        let a = self.state[src].get_i16x8();
5132        self.state[dst].set_i16x8(a.map(|i| i.wrapping_neg()));
5133        ControlFlow::Continue(())
5134    }
5135
5136    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5137    fn vneg32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
5138        let a = self.state[src].get_i32x4();
5139        self.state[dst].set_i32x4(a.map(|i| i.wrapping_neg()));
5140        ControlFlow::Continue(())
5141    }
5142
5143    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5144    fn vneg64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
5145        let a = self.state[src].get_i64x2();
5146        self.state[dst].set_i64x2(a.map(|i| i.wrapping_neg()));
5147        ControlFlow::Continue(())
5148    }
5149
5150    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5151    fn vnegf64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
5152        let a = self.state[src].get_f64x2();
5153        self.state[dst].set_f64x2(a.map(|i| -i));
5154        ControlFlow::Continue(())
5155    }
5156
5157    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5158    fn vmin8x16_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5159        let mut a = self.state[operands.src1].get_i8x16();
5160        let b = self.state[operands.src2].get_i8x16();
5161        for (a, b) in a.iter_mut().zip(&b) {
5162            *a = (*a).min(*b);
5163        }
5164        self.state[operands.dst].set_i8x16(a);
5165        ControlFlow::Continue(())
5166    }
5167
5168    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5169    fn vmin8x16_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5170        let mut a = self.state[operands.src1].get_u8x16();
5171        let b = self.state[operands.src2].get_u8x16();
5172        for (a, b) in a.iter_mut().zip(&b) {
5173            *a = (*a).min(*b);
5174        }
5175        self.state[operands.dst].set_u8x16(a);
5176        ControlFlow::Continue(())
5177    }
5178
5179    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5180    fn vmin16x8_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5181        let mut a = self.state[operands.src1].get_i16x8();
5182        let b = self.state[operands.src2].get_i16x8();
5183        for (a, b) in a.iter_mut().zip(&b) {
5184            *a = (*a).min(*b);
5185        }
5186        self.state[operands.dst].set_i16x8(a);
5187        ControlFlow::Continue(())
5188    }
5189
5190    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5191    fn vmin16x8_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5192        let mut a = self.state[operands.src1].get_u16x8();
5193        let b = self.state[operands.src2].get_u16x8();
5194        for (a, b) in a.iter_mut().zip(&b) {
5195            *a = (*a).min(*b);
5196        }
5197        self.state[operands.dst].set_u16x8(a);
5198        ControlFlow::Continue(())
5199    }
5200
5201    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5202    fn vmin32x4_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5203        let mut a = self.state[operands.src1].get_i32x4();
5204        let b = self.state[operands.src2].get_i32x4();
5205        for (a, b) in a.iter_mut().zip(&b) {
5206            *a = (*a).min(*b);
5207        }
5208        self.state[operands.dst].set_i32x4(a);
5209        ControlFlow::Continue(())
5210    }
5211
5212    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5213    fn vmin32x4_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5214        let mut a = self.state[operands.src1].get_u32x4();
5215        let b = self.state[operands.src2].get_u32x4();
5216        for (a, b) in a.iter_mut().zip(&b) {
5217            *a = (*a).min(*b);
5218        }
5219        self.state[operands.dst].set_u32x4(a);
5220        ControlFlow::Continue(())
5221    }
5222
5223    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5224    fn vmax8x16_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5225        let mut a = self.state[operands.src1].get_i8x16();
5226        let b = self.state[operands.src2].get_i8x16();
5227        for (a, b) in a.iter_mut().zip(&b) {
5228            *a = (*a).max(*b);
5229        }
5230        self.state[operands.dst].set_i8x16(a);
5231        ControlFlow::Continue(())
5232    }
5233
5234    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5235    fn vmax8x16_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5236        let mut a = self.state[operands.src1].get_u8x16();
5237        let b = self.state[operands.src2].get_u8x16();
5238        for (a, b) in a.iter_mut().zip(&b) {
5239            *a = (*a).max(*b);
5240        }
5241        self.state[operands.dst].set_u8x16(a);
5242        ControlFlow::Continue(())
5243    }
5244
5245    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5246    fn vmax16x8_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5247        let mut a = self.state[operands.src1].get_i16x8();
5248        let b = self.state[operands.src2].get_i16x8();
5249        for (a, b) in a.iter_mut().zip(&b) {
5250            *a = (*a).max(*b);
5251        }
5252        self.state[operands.dst].set_i16x8(a);
5253        ControlFlow::Continue(())
5254    }
5255
5256    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5257    fn vmax16x8_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5258        let mut a = self.state[operands.src1].get_u16x8();
5259        let b = self.state[operands.src2].get_u16x8();
5260        for (a, b) in a.iter_mut().zip(&b) {
5261            *a = (*a).max(*b);
5262        }
5263        self.state[operands.dst].set_u16x8(a);
5264        ControlFlow::Continue(())
5265    }
5266
5267    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5268    fn vmax32x4_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5269        let mut a = self.state[operands.src1].get_i32x4();
5270        let b = self.state[operands.src2].get_i32x4();
5271        for (a, b) in a.iter_mut().zip(&b) {
5272            *a = (*a).max(*b);
5273        }
5274        self.state[operands.dst].set_i32x4(a);
5275        ControlFlow::Continue(())
5276    }
5277
5278    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5279    fn vmax32x4_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5280        let mut a = self.state[operands.src1].get_u32x4();
5281        let b = self.state[operands.src2].get_u32x4();
5282        for (a, b) in a.iter_mut().zip(&b) {
5283            *a = (*a).max(*b);
5284        }
5285        self.state[operands.dst].set_u32x4(a);
5286        ControlFlow::Continue(())
5287    }
5288
5289    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5290    fn vabs8x16(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
5291        let a = self.state[src].get_i8x16();
5292        self.state[dst].set_i8x16(a.map(|i| i.wrapping_abs()));
5293        ControlFlow::Continue(())
5294    }
5295
5296    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5297    fn vabs16x8(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
5298        let a = self.state[src].get_i16x8();
5299        self.state[dst].set_i16x8(a.map(|i| i.wrapping_abs()));
5300        ControlFlow::Continue(())
5301    }
5302
5303    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5304    fn vabs32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
5305        let a = self.state[src].get_i32x4();
5306        self.state[dst].set_i32x4(a.map(|i| i.wrapping_abs()));
5307        ControlFlow::Continue(())
5308    }
5309
5310    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5311    fn vabs64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
5312        let a = self.state[src].get_i64x2();
5313        self.state[dst].set_i64x2(a.map(|i| i.wrapping_abs()));
5314        ControlFlow::Continue(())
5315    }
5316
5317    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5318    fn vabsf32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
5319        let a = self.state[src].get_f32x4();
5320        self.state[dst].set_f32x4(a.map(|i| i.wasm_abs()));
5321        ControlFlow::Continue(())
5322    }
5323
5324    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5325    fn vabsf64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
5326        let a = self.state[src].get_f64x2();
5327        self.state[dst].set_f64x2(a.map(|i| i.wasm_abs()));
5328        ControlFlow::Continue(())
5329    }
5330
5331    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5332    fn vmaximumf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5333        let mut a = self.state[operands.src1].get_f32x4();
5334        let b = self.state[operands.src2].get_f32x4();
5335        for (a, b) in a.iter_mut().zip(&b) {
5336            *a = a.wasm_maximum(*b);
5337        }
5338        self.state[operands.dst].set_f32x4(a);
5339        ControlFlow::Continue(())
5340    }
5341
5342    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5343    fn vmaximumf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5344        let mut a = self.state[operands.src1].get_f64x2();
5345        let b = self.state[operands.src2].get_f64x2();
5346        for (a, b) in a.iter_mut().zip(&b) {
5347            *a = a.wasm_maximum(*b);
5348        }
5349        self.state[operands.dst].set_f64x2(a);
5350        ControlFlow::Continue(())
5351    }
5352
5353    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5354    fn vminimumf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5355        let mut a = self.state[operands.src1].get_f32x4();
5356        let b = self.state[operands.src2].get_f32x4();
5357        for (a, b) in a.iter_mut().zip(&b) {
5358            *a = a.wasm_minimum(*b);
5359        }
5360        self.state[operands.dst].set_f32x4(a);
5361        ControlFlow::Continue(())
5362    }
5363
5364    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5365    fn vminimumf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5366        let mut a = self.state[operands.src1].get_f64x2();
5367        let b = self.state[operands.src2].get_f64x2();
5368        for (a, b) in a.iter_mut().zip(&b) {
5369            *a = a.wasm_minimum(*b);
5370        }
5371        self.state[operands.dst].set_f64x2(a);
5372        ControlFlow::Continue(())
5373    }
5374
5375    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5376    fn vshuffle(&mut self, dst: VReg, src1: VReg, src2: VReg, mask: u128) -> ControlFlow<Done> {
5377        let a = self.state[src1].get_u8x16();
5378        let b = self.state[src2].get_u8x16();
5379        let result = mask.to_le_bytes().map(|m| {
5380            if m < 16 {
5381                a[m as usize]
5382            } else {
5383                b[m as usize - 16]
5384            }
5385        });
5386        self.state[dst].set_u8x16(result);
5387        ControlFlow::Continue(())
5388    }
5389
5390    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5391    fn vswizzlei8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5392        let src1 = self.state[operands.src1].get_i8x16();
5393        let src2 = self.state[operands.src2].get_i8x16();
5394        let mut dst = [0i8; 16];
5395        for (i, &idx) in src2.iter().enumerate() {
5396            if (idx as usize) < 16 {
5397                dst[i] = src1[idx as usize];
5398            } else {
5399                dst[i] = 0
5400            }
5401        }
5402        self.state[operands.dst].set_i8x16(dst);
5403        ControlFlow::Continue(())
5404    }
5405
5406    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5407    fn vavground8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5408        let mut a = self.state[operands.src1].get_u8x16();
5409        let b = self.state[operands.src2].get_u8x16();
5410        for (a, b) in a.iter_mut().zip(&b) {
5411            // use wider precision to avoid overflow
5412            *a = ((u32::from(*a) + u32::from(*b) + 1) / 2) as u8;
5413        }
5414        self.state[operands.dst].set_u8x16(a);
5415        ControlFlow::Continue(())
5416    }
5417
5418    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5419    fn vavground16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5420        let mut a = self.state[operands.src1].get_u16x8();
5421        let b = self.state[operands.src2].get_u16x8();
5422        for (a, b) in a.iter_mut().zip(&b) {
5423            // use wider precision to avoid overflow
5424            *a = ((u32::from(*a) + u32::from(*b) + 1) / 2) as u16;
5425        }
5426        self.state[operands.dst].set_u16x8(a);
5427        ControlFlow::Continue(())
5428    }
5429
5430    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5431    fn veqf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5432        let a = self.state[operands.src1].get_f32x4();
5433        let b = self.state[operands.src2].get_f32x4();
5434        let mut c = [0; 4];
5435        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5436            *c = if a == b { u32::MAX } else { 0 };
5437        }
5438        self.state[operands.dst].set_u32x4(c);
5439        ControlFlow::Continue(())
5440    }
5441
5442    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5443    fn vneqf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5444        let a = self.state[operands.src1].get_f32x4();
5445        let b = self.state[operands.src2].get_f32x4();
5446        let mut c = [0; 4];
5447        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5448            *c = if a != b { u32::MAX } else { 0 };
5449        }
5450        self.state[operands.dst].set_u32x4(c);
5451        ControlFlow::Continue(())
5452    }
5453
5454    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5455    fn vltf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5456        let a = self.state[operands.src1].get_f32x4();
5457        let b = self.state[operands.src2].get_f32x4();
5458        let mut c = [0; 4];
5459        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5460            *c = if a < b { u32::MAX } else { 0 };
5461        }
5462        self.state[operands.dst].set_u32x4(c);
5463        ControlFlow::Continue(())
5464    }
5465
5466    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5467    fn vlteqf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5468        let a = self.state[operands.src1].get_f32x4();
5469        let b = self.state[operands.src2].get_f32x4();
5470        let mut c = [0; 4];
5471        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5472            *c = if a <= b { u32::MAX } else { 0 };
5473        }
5474        self.state[operands.dst].set_u32x4(c);
5475        ControlFlow::Continue(())
5476    }
5477
5478    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5479    fn veqf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5480        let a = self.state[operands.src1].get_f64x2();
5481        let b = self.state[operands.src2].get_f64x2();
5482        let mut c = [0; 2];
5483        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5484            *c = if a == b { u64::MAX } else { 0 };
5485        }
5486        self.state[operands.dst].set_u64x2(c);
5487        ControlFlow::Continue(())
5488    }
5489
5490    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5491    fn vneqf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5492        let a = self.state[operands.src1].get_f64x2();
5493        let b = self.state[operands.src2].get_f64x2();
5494        let mut c = [0; 2];
5495        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5496            *c = if a != b { u64::MAX } else { 0 };
5497        }
5498        self.state[operands.dst].set_u64x2(c);
5499        ControlFlow::Continue(())
5500    }
5501
5502    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5503    fn vltf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5504        let a = self.state[operands.src1].get_f64x2();
5505        let b = self.state[operands.src2].get_f64x2();
5506        let mut c = [0; 2];
5507        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5508            *c = if a < b { u64::MAX } else { 0 };
5509        }
5510        self.state[operands.dst].set_u64x2(c);
5511        ControlFlow::Continue(())
5512    }
5513
5514    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5515    fn vlteqf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5516        let a = self.state[operands.src1].get_f64x2();
5517        let b = self.state[operands.src2].get_f64x2();
5518        let mut c = [0; 2];
5519        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5520            *c = if a <= b { u64::MAX } else { 0 };
5521        }
5522        self.state[operands.dst].set_u64x2(c);
5523        ControlFlow::Continue(())
5524    }
5525
5526    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5527    fn vfma32x4(&mut self, dst: VReg, a: VReg, b: VReg, c: VReg) -> ControlFlow<Done> {
5528        let mut a = self.state[a].get_f32x4();
5529        let b = self.state[b].get_f32x4();
5530        let c = self.state[c].get_f32x4();
5531        for ((a, b), c) in a.iter_mut().zip(b).zip(c) {
5532            *a = a.wasm_mul_add(b, c);
5533        }
5534        self.state[dst].set_f32x4(a);
5535        ControlFlow::Continue(())
5536    }
5537
5538    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5539    fn vfma64x2(&mut self, dst: VReg, a: VReg, b: VReg, c: VReg) -> ControlFlow<Done> {
5540        let mut a = self.state[a].get_f64x2();
5541        let b = self.state[b].get_f64x2();
5542        let c = self.state[c].get_f64x2();
5543        for ((a, b), c) in a.iter_mut().zip(b).zip(c) {
5544            *a = a.wasm_mul_add(b, c);
5545        }
5546        self.state[dst].set_f64x2(a);
5547        ControlFlow::Continue(())
5548    }
5549
5550    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5551    fn vselect(
5552        &mut self,
5553        dst: VReg,
5554        cond: XReg,
5555        if_nonzero: VReg,
5556        if_zero: VReg,
5557    ) -> ControlFlow<Done> {
5558        let result = if self.state[cond].get_u32() != 0 {
5559            self.state[if_nonzero]
5560        } else {
5561            self.state[if_zero]
5562        };
5563        self.state[dst] = result;
5564        ControlFlow::Continue(())
5565    }
5566
5567    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5568    fn xadd128(
5569        &mut self,
5570        dst_lo: XReg,
5571        dst_hi: XReg,
5572        lhs_lo: XReg,
5573        lhs_hi: XReg,
5574        rhs_lo: XReg,
5575        rhs_hi: XReg,
5576    ) -> ControlFlow<Done> {
5577        let lhs = self.get_i128(lhs_lo, lhs_hi);
5578        let rhs = self.get_i128(rhs_lo, rhs_hi);
5579        let result = lhs.wrapping_add(rhs);
5580        self.set_i128(dst_lo, dst_hi, result);
5581        ControlFlow::Continue(())
5582    }
5583
5584    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5585    fn xsub128(
5586        &mut self,
5587        dst_lo: XReg,
5588        dst_hi: XReg,
5589        lhs_lo: XReg,
5590        lhs_hi: XReg,
5591        rhs_lo: XReg,
5592        rhs_hi: XReg,
5593    ) -> ControlFlow<Done> {
5594        let lhs = self.get_i128(lhs_lo, lhs_hi);
5595        let rhs = self.get_i128(rhs_lo, rhs_hi);
5596        let result = lhs.wrapping_sub(rhs);
5597        self.set_i128(dst_lo, dst_hi, result);
5598        ControlFlow::Continue(())
5599    }
5600
5601    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5602    fn xwidemul64_s(
5603        &mut self,
5604        dst_lo: XReg,
5605        dst_hi: XReg,
5606        lhs: XReg,
5607        rhs: XReg,
5608    ) -> ControlFlow<Done> {
5609        let lhs = self.state[lhs].get_i64();
5610        let rhs = self.state[rhs].get_i64();
5611        let result = i128::from(lhs).wrapping_mul(i128::from(rhs));
5612        self.set_i128(dst_lo, dst_hi, result);
5613        ControlFlow::Continue(())
5614    }
5615
5616    #[interp_disable_if_cfg(pulley_disable_interp_simd)]
5617    fn xwidemul64_u(
5618        &mut self,
5619        dst_lo: XReg,
5620        dst_hi: XReg,
5621        lhs: XReg,
5622        rhs: XReg,
5623    ) -> ControlFlow<Done> {
5624        let lhs = self.state[lhs].get_u64();
5625        let rhs = self.state[rhs].get_u64();
5626        let result = u128::from(lhs).wrapping_mul(u128::from(rhs));
5627        self.set_i128(dst_lo, dst_hi, result as i128);
5628        ControlFlow::Continue(())
5629    }
5630}