pulley_interpreter/
interp.rs

1//! Interpretation of pulley bytecode.
2
3use crate::decode::*;
4use crate::encode::Encode;
5use crate::imms::*;
6use crate::profile::{ExecutingPc, ExecutingPcRef};
7use crate::regs::*;
8use alloc::string::ToString;
9use alloc::vec::Vec;
10use core::fmt;
11use core::mem;
12use core::ops::ControlFlow;
13use core::ops::{Index, IndexMut};
14use core::ptr::NonNull;
15use wasmtime_math::WasmFloat;
16mod debug;
17#[cfg(all(not(pulley_tail_calls), not(pulley_assume_llvm_makes_tail_calls)))]
18mod match_loop;
19#[cfg(any(pulley_tail_calls, pulley_assume_llvm_makes_tail_calls))]
20mod tail_loop;
21
22const DEFAULT_STACK_SIZE: usize = 1 << 20; // 1 MiB
23
24/// A virtual machine for interpreting Pulley bytecode.
25pub struct Vm {
26    state: MachineState,
27    executing_pc: ExecutingPc,
28}
29
30impl Default for Vm {
31    fn default() -> Self {
32        Vm::new()
33    }
34}
35
36impl Vm {
37    /// Create a new virtual machine with the default stack size.
38    pub fn new() -> Self {
39        Self::with_stack(DEFAULT_STACK_SIZE)
40    }
41
42    /// Create a new virtual machine with the given stack.
43    pub fn with_stack(stack_size: usize) -> Self {
44        Self {
45            state: MachineState::with_stack(stack_size),
46            executing_pc: ExecutingPc::default(),
47        }
48    }
49
50    /// Get a shared reference to this VM's machine state.
51    pub fn state(&self) -> &MachineState {
52        &self.state
53    }
54
55    /// Get an exclusive reference to this VM's machine state.
56    pub fn state_mut(&mut self) -> &mut MachineState {
57        &mut self.state
58    }
59
60    /// Call a bytecode function.
61    ///
62    /// The given `func` must point to the beginning of a valid Pulley bytecode
63    /// function.
64    ///
65    /// The given `args` must match the number and type of arguments that
66    /// function expects.
67    ///
68    /// The given `rets` must match the function's actual return types.
69    ///
70    /// Returns either the resulting values, or the PC at which a trap was
71    /// raised.
72    pub unsafe fn call<'a, T>(
73        &'a mut self,
74        func: NonNull<u8>,
75        args: &[Val],
76        rets: T,
77    ) -> DoneReason<impl Iterator<Item = Val> + use<'a, T>>
78    where
79        T: IntoIterator<Item = RegType> + 'a,
80    {
81        unsafe {
82            let lr = self.call_start(args);
83
84            match self.call_run(func) {
85                DoneReason::ReturnToHost(()) => DoneReason::ReturnToHost(self.call_end(lr, rets)),
86                DoneReason::Trap { pc, kind } => DoneReason::Trap { pc, kind },
87                DoneReason::CallIndirectHost { id, resume } => {
88                    DoneReason::CallIndirectHost { id, resume }
89                }
90            }
91        }
92    }
93
94    /// Peforms the initial part of [`Vm::call`] in setting up the `args`
95    /// provided in registers according to Pulley's ABI.
96    ///
97    /// # Return
98    ///
99    /// Returns the old `lr` register value. The current `lr` value is replaced
100    /// with a sentinel that triggers a return to the host when returned-to.
101    ///
102    /// # Unsafety
103    ///
104    /// All the same unsafety as `call` and additiionally, you must
105    /// invoke `call_run` and then `call_end` after calling `call_start`.
106    /// If you don't want to wrangle these invocations, use `call` instead
107    /// of `call_{start,run,end}`.
108    pub unsafe fn call_start<'a>(&'a mut self, args: &[Val]) -> *mut u8 {
109        // NB: make sure this method stays in sync with
110        // `PulleyMachineDeps::compute_arg_locs`!
111
112        let mut x_args = (0..16).map(|x| unsafe { XReg::new_unchecked(x) });
113        let mut f_args = (0..16).map(|f| unsafe { FReg::new_unchecked(f) });
114        let mut v_args = (0..16).map(|v| unsafe { VReg::new_unchecked(v) });
115
116        for arg in args {
117            match arg {
118                Val::XReg(val) => match x_args.next() {
119                    Some(reg) => self.state[reg] = *val,
120                    None => todo!("stack slots"),
121                },
122                Val::FReg(val) => match f_args.next() {
123                    Some(reg) => self.state[reg] = *val,
124                    None => todo!("stack slots"),
125                },
126                Val::VReg(val) => match v_args.next() {
127                    Some(reg) => self.state[reg] = *val,
128                    None => todo!("stack slots"),
129                },
130            }
131        }
132
133        mem::replace(&mut self.state.lr, HOST_RETURN_ADDR)
134    }
135
136    /// Peforms the internal part of [`Vm::call`] where bytecode is actually
137    /// executed.
138    ///
139    /// # Unsafety
140    ///
141    /// In addition to all the invariants documented for `call`, you
142    /// may only invoke `call_run` after invoking `call_start` to
143    /// initialize this call's arguments.
144    pub unsafe fn call_run(&mut self, pc: NonNull<u8>) -> DoneReason<()> {
145        self.state.debug_assert_done_reason_none();
146        let interpreter = Interpreter {
147            state: &mut self.state,
148            pc: unsafe { UnsafeBytecodeStream::new(pc) },
149            executing_pc: self.executing_pc.as_ref(),
150        };
151        let done = interpreter.run();
152        self.state.done_decode(done)
153    }
154
155    /// Peforms the tail end of [`Vm::call`] by returning the values as
156    /// determined by `rets` according to Pulley's ABI.
157    ///
158    /// The `old_ret` value should have been provided from `call_start`
159    /// previously.
160    ///
161    /// # Unsafety
162    ///
163    /// In addition to the invariants documented for `call`, this may
164    /// only be called after `call_run`.
165    pub unsafe fn call_end<'a>(
166        &'a mut self,
167        old_ret: *mut u8,
168        rets: impl IntoIterator<Item = RegType> + 'a,
169    ) -> impl Iterator<Item = Val> + 'a {
170        self.state.lr = old_ret;
171        // NB: make sure this method stays in sync with
172        // `PulleyMachineDeps::compute_arg_locs`!
173
174        let mut x_rets = (0..15).map(|x| unsafe { XReg::new_unchecked(x) });
175        let mut f_rets = (0..16).map(|f| unsafe { FReg::new_unchecked(f) });
176        let mut v_rets = (0..16).map(|v| unsafe { VReg::new_unchecked(v) });
177
178        rets.into_iter().map(move |ty| match ty {
179            RegType::XReg => match x_rets.next() {
180                Some(reg) => Val::XReg(self.state[reg]),
181                None => todo!("stack slots"),
182            },
183            RegType::FReg => match f_rets.next() {
184                Some(reg) => Val::FReg(self.state[reg]),
185                None => todo!("stack slots"),
186            },
187            RegType::VReg => match v_rets.next() {
188                Some(reg) => Val::VReg(self.state[reg]),
189                None => todo!("stack slots"),
190            },
191        })
192    }
193
194    /// Returns the current `fp` register value.
195    pub fn fp(&self) -> *mut u8 {
196        self.state.fp
197    }
198
199    /// Returns the current `lr` register value.
200    pub fn lr(&self) -> *mut u8 {
201        self.state.lr
202    }
203
204    /// Sets the current `fp` register value.
205    pub unsafe fn set_fp(&mut self, fp: *mut u8) {
206        self.state.fp = fp;
207    }
208
209    /// Sets the current `lr` register value.
210    pub unsafe fn set_lr(&mut self, lr: *mut u8) {
211        self.state.lr = lr;
212    }
213
214    /// Gets a handle to the currently executing program counter for this
215    /// interpreter which can be read from other threads.
216    //
217    // Note that despite this field still existing with `not(feature =
218    // "profile")` it's hidden from the public API in that scenario as it has no
219    // methods anyway.
220    #[cfg(feature = "profile")]
221    pub fn executing_pc(&self) -> &ExecutingPc {
222        &self.executing_pc
223    }
224}
225
226impl Drop for Vm {
227    fn drop(&mut self) {
228        self.executing_pc.set_done();
229    }
230}
231
232/// The type of a register in the Pulley machine state.
233#[derive(Clone, Copy, Debug)]
234pub enum RegType {
235    /// An `x` register: integers.
236    XReg,
237
238    /// An `f` register: floats.
239    FReg,
240
241    /// A `v` register: vectors.
242    VReg,
243}
244
245/// A value that can be stored in a register.
246#[derive(Clone, Copy, Debug)]
247pub enum Val {
248    /// An `x` register value: integers.
249    XReg(XRegVal),
250
251    /// An `f` register value: floats.
252    FReg(FRegVal),
253
254    /// A `v` register value: vectors.
255    VReg(VRegVal),
256}
257
258impl fmt::LowerHex for Val {
259    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
260        match self {
261            Val::XReg(v) => fmt::LowerHex::fmt(v, f),
262            Val::FReg(v) => fmt::LowerHex::fmt(v, f),
263            Val::VReg(v) => fmt::LowerHex::fmt(v, f),
264        }
265    }
266}
267
268impl From<XRegVal> for Val {
269    fn from(value: XRegVal) -> Self {
270        Val::XReg(value)
271    }
272}
273
274impl From<u64> for Val {
275    fn from(value: u64) -> Self {
276        XRegVal::new_u64(value).into()
277    }
278}
279
280impl From<u32> for Val {
281    fn from(value: u32) -> Self {
282        XRegVal::new_u32(value).into()
283    }
284}
285
286impl From<i64> for Val {
287    fn from(value: i64) -> Self {
288        XRegVal::new_i64(value).into()
289    }
290}
291
292impl From<i32> for Val {
293    fn from(value: i32) -> Self {
294        XRegVal::new_i32(value).into()
295    }
296}
297
298impl<T> From<*mut T> for Val {
299    fn from(value: *mut T) -> Self {
300        XRegVal::new_ptr(value).into()
301    }
302}
303
304impl From<FRegVal> for Val {
305    fn from(value: FRegVal) -> Self {
306        Val::FReg(value)
307    }
308}
309
310impl From<f64> for Val {
311    fn from(value: f64) -> Self {
312        FRegVal::new_f64(value).into()
313    }
314}
315
316impl From<f32> for Val {
317    fn from(value: f32) -> Self {
318        FRegVal::new_f32(value).into()
319    }
320}
321
322impl From<VRegVal> for Val {
323    fn from(value: VRegVal) -> Self {
324        Val::VReg(value)
325    }
326}
327
328/// An `x` register value: integers.
329#[derive(Copy, Clone)]
330pub struct XRegVal(XRegUnion);
331
332impl PartialEq for XRegVal {
333    fn eq(&self, other: &Self) -> bool {
334        self.get_u64() == other.get_u64()
335    }
336}
337
338impl Eq for XRegVal {}
339
340impl fmt::Debug for XRegVal {
341    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
342        f.debug_struct("XRegVal")
343            .field("as_u64", &self.get_u64())
344            .finish()
345    }
346}
347
348impl fmt::LowerHex for XRegVal {
349    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
350        fmt::LowerHex::fmt(&self.get_u64(), f)
351    }
352}
353
354/// Contents of an "x" register, or a general-purpose register.
355///
356/// This is represented as a Rust `union` to make it easier to access typed
357/// views of this, notably the `ptr` field which enables preserving a bit of
358/// provenance for Rust for values stored as a pointer and read as a pointer.
359///
360/// Note that the actual in-memory representation of this value is handled
361/// carefully at this time. Pulley bytecode exposes the ability to store a
362/// 32-bit result into a register and then read the 64-bit contents of the
363/// register. This leaves us with the question of what to do with the upper bits
364/// of the register when the 32-bit result is generated. Possibilities for
365/// handling this are:
366///
367/// 1. Do nothing, just store the 32-bit value. The problem with this approach
368///    means that the "upper bits" are now endianness-dependent. That means that
369///    the state of the register is now platform-dependent.
370/// 2. Sign or zero-extend. This restores platform-independent behavior but
371///    requires an extra store on 32-bit platforms because they can probably
372///    only store 32-bits at a time.
373/// 3. Always store the values in this union as little-endian. This means that
374///    big-endian platforms have to do a byte-swap but otherwise it has
375///    platform-independent behavior.
376///
377/// This union chooses route (3) at this time where the values here are always
378/// stored in little-endian form (even the `ptr` field). That guarantees
379/// cross-platform behavior while also minimizing the amount of data stored on
380/// writes.
381///
382/// In the future we may wish to benchmark this and possibly change this.
383/// Technically Cranelift-generated bytecode should never rely on the upper bits
384/// of a register if it didn't previously write them so this in theory doesn't
385/// actually matter for Cranelift or wasm semantics. The only cost right now is
386/// to big-endian platforms though and it's not certain how crucial performance
387/// will be there.
388///
389/// One final note is that this notably contrasts with native CPUs where
390/// native ISAs like RISC-V specifically define the entire register on every
391/// instruction, even if only the low half contains a significant result. Pulley
392/// is unlikely to become out-of-order within the CPU itself as it's interpreted
393/// meaning that severing data-dependencies with previous operations is
394/// hypothesized to not be too important. If this is ever a problem though it
395/// could increase the likelihood we go for route (2) above instead (or maybe
396/// even (1)).
397#[derive(Copy, Clone)]
398union XRegUnion {
399    i32: i32,
400    u32: u32,
401    i64: i64,
402    u64: u64,
403
404    // Note that this is intentionally `usize` and not an actual pointer like
405    // `*mut u8`. The reason for this is that provenance is required in Rust for
406    // pointers but Cranelift has no pointer type and thus no concept of
407    // provenance. That means that at-rest it's not known whether the value has
408    // provenance or not and basically means that Pulley is required to use
409    // "permissive provenance" in Rust as opposed to strict provenance.
410    //
411    // That's more-or-less a long-winded way of saying that storage of a pointer
412    // in this value is done with `.expose_provenance()` and reading a pointer
413    // uses `with_exposed_provenance_mut(..)`.
414    ptr: usize,
415}
416
417impl Default for XRegVal {
418    fn default() -> Self {
419        Self(unsafe { mem::zeroed() })
420    }
421}
422
423#[expect(missing_docs, reason = "self-describing methods")]
424impl XRegVal {
425    pub fn new_i32(x: i32) -> Self {
426        let mut val = XRegVal::default();
427        val.set_i32(x);
428        val
429    }
430
431    pub fn new_u32(x: u32) -> Self {
432        let mut val = XRegVal::default();
433        val.set_u32(x);
434        val
435    }
436
437    pub fn new_i64(x: i64) -> Self {
438        let mut val = XRegVal::default();
439        val.set_i64(x);
440        val
441    }
442
443    pub fn new_u64(x: u64) -> Self {
444        let mut val = XRegVal::default();
445        val.set_u64(x);
446        val
447    }
448
449    pub fn new_ptr<T>(ptr: *mut T) -> Self {
450        let mut val = XRegVal::default();
451        val.set_ptr(ptr);
452        val
453    }
454
455    pub fn get_i32(&self) -> i32 {
456        let x = unsafe { self.0.i32 };
457        i32::from_le(x)
458    }
459
460    pub fn get_u32(&self) -> u32 {
461        let x = unsafe { self.0.u32 };
462        u32::from_le(x)
463    }
464
465    pub fn get_i64(&self) -> i64 {
466        let x = unsafe { self.0.i64 };
467        i64::from_le(x)
468    }
469
470    pub fn get_u64(&self) -> u64 {
471        let x = unsafe { self.0.u64 };
472        u64::from_le(x)
473    }
474
475    pub fn get_ptr<T>(&self) -> *mut T {
476        let ptr = unsafe { self.0.ptr };
477        core::ptr::with_exposed_provenance_mut(usize::from_le(ptr))
478    }
479
480    pub fn set_i32(&mut self, x: i32) {
481        self.0.i32 = x.to_le();
482    }
483
484    pub fn set_u32(&mut self, x: u32) {
485        self.0.u32 = x.to_le();
486    }
487
488    pub fn set_i64(&mut self, x: i64) {
489        self.0.i64 = x.to_le();
490    }
491
492    pub fn set_u64(&mut self, x: u64) {
493        self.0.u64 = x.to_le();
494    }
495
496    pub fn set_ptr<T>(&mut self, ptr: *mut T) {
497        self.0.ptr = ptr.expose_provenance().to_le();
498    }
499}
500
501/// An `f` register value: floats.
502#[derive(Copy, Clone)]
503pub struct FRegVal(FRegUnion);
504
505impl fmt::Debug for FRegVal {
506    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
507        f.debug_struct("FRegVal")
508            .field("as_f32", &self.get_f32())
509            .field("as_f64", &self.get_f64())
510            .finish()
511    }
512}
513
514impl fmt::LowerHex for FRegVal {
515    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
516        fmt::LowerHex::fmt(&self.get_f64().to_bits(), f)
517    }
518}
519
520// NB: like `XRegUnion` values here are always little-endian, see the
521// documentation above for more details.
522#[derive(Copy, Clone)]
523union FRegUnion {
524    f32: u32,
525    f64: u64,
526}
527
528impl Default for FRegVal {
529    fn default() -> Self {
530        Self(unsafe { mem::zeroed() })
531    }
532}
533
534#[expect(missing_docs, reason = "self-describing methods")]
535impl FRegVal {
536    pub fn new_f32(f: f32) -> Self {
537        let mut val = Self::default();
538        val.set_f32(f);
539        val
540    }
541
542    pub fn new_f64(f: f64) -> Self {
543        let mut val = Self::default();
544        val.set_f64(f);
545        val
546    }
547
548    pub fn get_f32(&self) -> f32 {
549        let val = unsafe { self.0.f32 };
550        f32::from_le_bytes(val.to_ne_bytes())
551    }
552
553    pub fn get_f64(&self) -> f64 {
554        let val = unsafe { self.0.f64 };
555        f64::from_le_bytes(val.to_ne_bytes())
556    }
557
558    pub fn set_f32(&mut self, val: f32) {
559        self.0.f32 = u32::from_ne_bytes(val.to_le_bytes());
560    }
561
562    pub fn set_f64(&mut self, val: f64) {
563        self.0.f64 = u64::from_ne_bytes(val.to_le_bytes());
564    }
565}
566
567/// A `v` register value: vectors.
568#[derive(Copy, Clone)]
569pub struct VRegVal(VRegUnion);
570
571impl fmt::Debug for VRegVal {
572    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
573        f.debug_struct("VRegVal")
574            .field("as_u128", &unsafe { self.0.u128 })
575            .finish()
576    }
577}
578
579impl fmt::LowerHex for VRegVal {
580    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
581        fmt::LowerHex::fmt(unsafe { &self.0.u128 }, f)
582    }
583}
584
585/// 128-bit vector registers.
586///
587/// This register is always stored in little-endian order and has different
588/// constraints than `XRegVal` and `FRegVal` above. Notably all fields of this
589/// union are the same width so all bits are always defined. Note that
590/// little-endian is required though so bitcasts between different shapes of
591/// vectors works. This union cannot be stored in big-endian.
592#[derive(Copy, Clone)]
593#[repr(align(16))]
594union VRegUnion {
595    u128: u128,
596    i8x16: [i8; 16],
597    i16x8: [i16; 8],
598    i32x4: [i32; 4],
599    i64x2: [i64; 2],
600    u8x16: [u8; 16],
601    u16x8: [u16; 8],
602    u32x4: [u32; 4],
603    u64x2: [u64; 2],
604    // Note that these are `u32` and `u64`, not f32/f64. That's only because
605    // f32/f64 don't have `.to_le()` and `::from_le()` so need to go through the
606    // bits anyway.
607    f32x4: [u32; 4],
608    f64x2: [u64; 2],
609}
610
611impl Default for VRegVal {
612    fn default() -> Self {
613        Self(unsafe { mem::zeroed() })
614    }
615}
616
617#[expect(missing_docs, reason = "self-describing methods")]
618impl VRegVal {
619    pub fn new_u128(i: u128) -> Self {
620        let mut val = Self::default();
621        val.set_u128(i);
622        val
623    }
624
625    pub fn get_u128(&self) -> u128 {
626        let val = unsafe { self.0.u128 };
627        u128::from_le(val)
628    }
629
630    pub fn set_u128(&mut self, val: u128) {
631        self.0.u128 = val.to_le();
632    }
633
634    fn get_i8x16(&self) -> [i8; 16] {
635        let val = unsafe { self.0.i8x16 };
636        val.map(|e| i8::from_le(e))
637    }
638
639    fn set_i8x16(&mut self, val: [i8; 16]) {
640        self.0.i8x16 = val.map(|e| e.to_le());
641    }
642
643    fn get_u8x16(&self) -> [u8; 16] {
644        let val = unsafe { self.0.u8x16 };
645        val.map(|e| u8::from_le(e))
646    }
647
648    fn set_u8x16(&mut self, val: [u8; 16]) {
649        self.0.u8x16 = val.map(|e| e.to_le());
650    }
651
652    fn get_i16x8(&self) -> [i16; 8] {
653        let val = unsafe { self.0.i16x8 };
654        val.map(|e| i16::from_le(e))
655    }
656
657    fn set_i16x8(&mut self, val: [i16; 8]) {
658        self.0.i16x8 = val.map(|e| e.to_le());
659    }
660
661    fn get_u16x8(&self) -> [u16; 8] {
662        let val = unsafe { self.0.u16x8 };
663        val.map(|e| u16::from_le(e))
664    }
665
666    fn set_u16x8(&mut self, val: [u16; 8]) {
667        self.0.u16x8 = val.map(|e| e.to_le());
668    }
669
670    fn get_i32x4(&self) -> [i32; 4] {
671        let val = unsafe { self.0.i32x4 };
672        val.map(|e| i32::from_le(e))
673    }
674
675    fn set_i32x4(&mut self, val: [i32; 4]) {
676        self.0.i32x4 = val.map(|e| e.to_le());
677    }
678
679    fn get_u32x4(&self) -> [u32; 4] {
680        let val = unsafe { self.0.u32x4 };
681        val.map(|e| u32::from_le(e))
682    }
683
684    fn set_u32x4(&mut self, val: [u32; 4]) {
685        self.0.u32x4 = val.map(|e| e.to_le());
686    }
687
688    fn get_i64x2(&self) -> [i64; 2] {
689        let val = unsafe { self.0.i64x2 };
690        val.map(|e| i64::from_le(e))
691    }
692
693    fn set_i64x2(&mut self, val: [i64; 2]) {
694        self.0.i64x2 = val.map(|e| e.to_le());
695    }
696
697    fn get_u64x2(&self) -> [u64; 2] {
698        let val = unsafe { self.0.u64x2 };
699        val.map(|e| u64::from_le(e))
700    }
701
702    fn set_u64x2(&mut self, val: [u64; 2]) {
703        self.0.u64x2 = val.map(|e| e.to_le());
704    }
705
706    fn get_f64x2(&self) -> [f64; 2] {
707        let val = unsafe { self.0.f64x2 };
708        val.map(|e| f64::from_bits(u64::from_le(e)))
709    }
710
711    fn set_f64x2(&mut self, val: [f64; 2]) {
712        self.0.f64x2 = val.map(|e| e.to_bits().to_le());
713    }
714
715    fn get_f32x4(&self) -> [f32; 4] {
716        let val = unsafe { self.0.f32x4 };
717        val.map(|e| f32::from_bits(u32::from_le(e)))
718    }
719
720    fn set_f32x4(&mut self, val: [f32; 4]) {
721        self.0.f32x4 = val.map(|e| e.to_bits().to_le());
722    }
723}
724
725/// The machine state for a Pulley virtual machine: the various registers and
726/// stack.
727pub struct MachineState {
728    x_regs: [XRegVal; XReg::RANGE.end as usize],
729    f_regs: [FRegVal; FReg::RANGE.end as usize],
730    v_regs: [VRegVal; VReg::RANGE.end as usize],
731    fp: *mut u8,
732    lr: *mut u8,
733    stack: Stack,
734    done_reason: Option<DoneReason<()>>,
735}
736
737unsafe impl Send for MachineState {}
738unsafe impl Sync for MachineState {}
739
740/// Helper structure to store the state of the Pulley stack.
741///
742/// The Pulley stack notably needs to be a 16-byte aligned allocation on the
743/// host to ensure that addresses handed out are indeed 16-byte aligned. This is
744/// done with a custom `Vec<T>` internally where `T` has size and align of 16.
745/// This is manually done with a helper `Align16` type below.
746struct Stack {
747    storage: Vec<Align16>,
748}
749
750/// Helper type used with `Stack` above.
751#[derive(Copy, Clone)]
752#[repr(align(16))]
753struct Align16 {
754    // Just here to give the structure a size of 16. The alignment is always 16
755    // regardless of what the host platform's alignment of u128 is.
756    _unused: u128,
757}
758
759impl Stack {
760    /// Creates a new stack which will have a byte size of at least `size`.
761    ///
762    /// The allocated stack might be slightly larger due to rounding necessary.
763    fn new(size: usize) -> Stack {
764        Stack {
765            // Round up `size` to the nearest multiple of 16. Note that the
766            // stack is also allocated here but not initialized, and that's
767            // intentional as pulley bytecode should always initialize the stack
768            // before use.
769            storage: Vec::with_capacity((size + 15) / 16),
770        }
771    }
772
773    /// Returns a pointer to the top of the stack (the highest address).
774    ///
775    /// Note that the returned pointer has provenance for the entire stack
776    /// allocation, however, not just the top.
777    fn top(&mut self) -> *mut u8 {
778        let len = self.len();
779        unsafe { self.base().add(len) }
780    }
781
782    /// Returns a pointer to the base of the stack (the lowest address).
783    ///
784    /// Note that the returned pointer has provenance for the entire stack
785    /// allocation, however, not just the top.
786    fn base(&mut self) -> *mut u8 {
787        self.storage.as_mut_ptr().cast::<u8>()
788    }
789
790    /// Returns the length, in bytes, of this stack allocation.
791    fn len(&self) -> usize {
792        self.storage.capacity() * mem::size_of::<Align16>()
793    }
794}
795
796impl fmt::Debug for MachineState {
797    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
798        let MachineState {
799            x_regs,
800            f_regs,
801            v_regs,
802            stack: _,
803            done_reason: _,
804            fp: _,
805            lr: _,
806        } = self;
807
808        struct RegMap<'a, R>(&'a [R], fn(u8) -> alloc::string::String);
809
810        impl<R: fmt::Debug> fmt::Debug for RegMap<'_, R> {
811            fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
812                let mut f = f.debug_map();
813                for (i, r) in self.0.iter().enumerate() {
814                    f.entry(&(self.1)(i as u8), r);
815                }
816                f.finish()
817            }
818        }
819
820        f.debug_struct("MachineState")
821            .field(
822                "x_regs",
823                &RegMap(x_regs, |i| XReg::new(i).unwrap().to_string()),
824            )
825            .field(
826                "f_regs",
827                &RegMap(f_regs, |i| FReg::new(i).unwrap().to_string()),
828            )
829            .field(
830                "v_regs",
831                &RegMap(v_regs, |i| VReg::new(i).unwrap().to_string()),
832            )
833            .finish_non_exhaustive()
834    }
835}
836
837macro_rules! index_reg {
838    ($reg_ty:ty,$value_ty:ty,$field:ident) => {
839        impl Index<$reg_ty> for Vm {
840            type Output = $value_ty;
841
842            fn index(&self, reg: $reg_ty) -> &Self::Output {
843                &self.state[reg]
844            }
845        }
846
847        impl IndexMut<$reg_ty> for Vm {
848            fn index_mut(&mut self, reg: $reg_ty) -> &mut Self::Output {
849                &mut self.state[reg]
850            }
851        }
852
853        impl Index<$reg_ty> for MachineState {
854            type Output = $value_ty;
855
856            fn index(&self, reg: $reg_ty) -> &Self::Output {
857                &self.$field[reg.index()]
858            }
859        }
860
861        impl IndexMut<$reg_ty> for MachineState {
862            fn index_mut(&mut self, reg: $reg_ty) -> &mut Self::Output {
863                &mut self.$field[reg.index()]
864            }
865        }
866    };
867}
868
869index_reg!(XReg, XRegVal, x_regs);
870index_reg!(FReg, FRegVal, f_regs);
871index_reg!(VReg, VRegVal, v_regs);
872
873/// Sentinel return address that signals the end of the call stack.
874const HOST_RETURN_ADDR: *mut u8 = usize::MAX as *mut u8;
875
876impl MachineState {
877    fn with_stack(stack_size: usize) -> Self {
878        let mut state = Self {
879            x_regs: [Default::default(); XReg::RANGE.end as usize],
880            f_regs: Default::default(),
881            v_regs: Default::default(),
882            stack: Stack::new(stack_size),
883            done_reason: None,
884            fp: HOST_RETURN_ADDR,
885            lr: HOST_RETURN_ADDR,
886        };
887
888        let sp = state.stack.top();
889        state[XReg::sp] = XRegVal::new_ptr(sp);
890
891        state
892    }
893}
894
895/// Inner private module to prevent creation of the `Done` structure outside of
896/// this module.
897mod done {
898    use super::{Encode, Interpreter, MachineState};
899    use core::ops::ControlFlow;
900    use core::ptr::NonNull;
901
902    /// Zero-sized sentinel indicating that pulley execution has halted.
903    ///
904    /// The reason for halting is stored in `MachineState`.
905    #[derive(Copy, Clone, Debug, PartialEq, Eq)]
906    pub struct Done {
907        _priv: (),
908    }
909
910    /// Reason that the pulley interpreter has ceased execution.
911    pub enum DoneReason<T> {
912        /// A trap happened at this bytecode instruction.
913        Trap {
914            /// Which instruction is raising this trap.
915            pc: NonNull<u8>,
916            /// The kind of trap being raised, if known.
917            kind: Option<TrapKind>,
918        },
919        /// The `call_indirect_host` instruction was executed.
920        CallIndirectHost {
921            /// The payload of `call_indirect_host`.
922            id: u8,
923            /// Where to resume execution after the host has finished.
924            resume: NonNull<u8>,
925        },
926        /// Pulley has finished and the provided value is being returned.
927        ReturnToHost(T),
928    }
929
930    /// Stored within `DoneReason::Trap`.
931    #[expect(missing_docs, reason = "self-describing variants")]
932    pub enum TrapKind {
933        DivideByZero,
934        IntegerOverflow,
935        BadConversionToInteger,
936        MemoryOutOfBounds,
937    }
938
939    impl MachineState {
940        pub(super) fn debug_assert_done_reason_none(&mut self) {
941            debug_assert!(self.done_reason.is_none());
942        }
943
944        pub(super) fn done_decode(&mut self, Done { _priv }: Done) -> DoneReason<()> {
945            self.done_reason.take().unwrap()
946        }
947    }
948
949    impl Interpreter<'_> {
950        /// Finishes execution by recording `DoneReason::Trap`.
951        ///
952        /// This method takes an `I` generic parameter indicating which
953        /// instruction is executing this function and generating a trap. That's
954        /// used to go backwards from the current `pc` which is just beyond the
955        /// instruction to point to the instruction itself in the trap metadata
956        /// returned from the interpreter.
957        #[cold]
958        pub fn done_trap<I: Encode>(&mut self) -> ControlFlow<Done> {
959            self.done_trap_kind::<I>(None)
960        }
961
962        /// Same as `done_trap` but with an explicit `TrapKind`.
963        #[cold]
964        pub fn done_trap_kind<I: Encode>(&mut self, kind: Option<TrapKind>) -> ControlFlow<Done> {
965            let pc = self.current_pc::<I>();
966            self.state.done_reason = Some(DoneReason::Trap { pc, kind });
967            ControlFlow::Break(Done { _priv: () })
968        }
969
970        /// Finishes execution by recording `DoneReason::CallIndirectHost`.
971        #[cold]
972        pub fn done_call_indirect_host(&mut self, id: u8) -> ControlFlow<Done> {
973            self.state.done_reason = Some(DoneReason::CallIndirectHost {
974                id,
975                resume: self.pc.as_ptr(),
976            });
977            ControlFlow::Break(Done { _priv: () })
978        }
979
980        /// Finishes execution by recording `DoneReason::ReturnToHost`.
981        #[cold]
982        pub fn done_return_to_host(&mut self) -> ControlFlow<Done> {
983            self.state.done_reason = Some(DoneReason::ReturnToHost(()));
984            ControlFlow::Break(Done { _priv: () })
985        }
986    }
987}
988
989use done::Done;
990pub use done::{DoneReason, TrapKind};
991
992struct Interpreter<'a> {
993    state: &'a mut MachineState,
994    pc: UnsafeBytecodeStream,
995    executing_pc: ExecutingPcRef<'a>,
996}
997
998impl Interpreter<'_> {
999    /// Performs a relative jump of `offset` bytes from the current instruction.
1000    ///
1001    /// This will jump from the start of the current instruction, identified by
1002    /// `I`, `offset` bytes away. Note that the `self.pc` at the start of this
1003    /// function actually points to the instruction after this one so `I` is
1004    /// necessary to go back to ourselves after which we then go `offset` away.
1005    #[inline]
1006    fn pc_rel_jump<I: Encode>(&mut self, offset: PcRelOffset) -> ControlFlow<Done> {
1007        let offset = isize::try_from(i32::from(offset)).unwrap();
1008        let my_pc = self.current_pc::<I>();
1009        self.pc = unsafe { UnsafeBytecodeStream::new(my_pc.offset(offset)) };
1010        ControlFlow::Continue(())
1011    }
1012
1013    /// Returns the PC of the current instruction where `I` is the static type
1014    /// representing the current instruction.
1015    fn current_pc<I: Encode>(&self) -> NonNull<u8> {
1016        unsafe { self.pc.offset(-isize::from(I::WIDTH)).as_ptr() }
1017    }
1018
1019    /// `sp -= size_of::<T>(); *sp = val;`
1020    ///
1021    /// Note that `I` is the instruction which is pushing data to use if a trap
1022    /// is generated.
1023    #[must_use]
1024    fn push<I: Encode, T>(&mut self, val: T) -> ControlFlow<Done> {
1025        let new_sp = self.state[XReg::sp].get_ptr::<T>().wrapping_sub(1);
1026        self.set_sp::<I>(new_sp.cast())?;
1027        unsafe {
1028            new_sp.write_unaligned(val);
1029        }
1030        ControlFlow::Continue(())
1031    }
1032
1033    /// `ret = *sp; sp -= size_of::<T>()`
1034    fn pop<T>(&mut self) -> T {
1035        let sp = self.state[XReg::sp].get_ptr::<T>();
1036        let val = unsafe { sp.read_unaligned() };
1037        self.set_sp_unchecked(sp.wrapping_add(1));
1038        val
1039    }
1040
1041    /// Sets the stack pointer to the `sp` provided.
1042    ///
1043    /// Returns a trap if this would result in stack overflow, or if `sp` is
1044    /// beneath the base pointer of `self.state.stack`.
1045    ///
1046    /// The `I` parameter here is the instruction that is setting the stack
1047    /// pointer and is used to calculate this instruction's own `pc` if this
1048    /// instruction traps.
1049    #[must_use]
1050    fn set_sp<I: Encode>(&mut self, sp: *mut u8) -> ControlFlow<Done> {
1051        let sp_raw = sp as usize;
1052        let base_raw = self.state.stack.base() as usize;
1053        if sp_raw < base_raw {
1054            return self.done_trap::<I>();
1055        }
1056        self.set_sp_unchecked(sp);
1057        ControlFlow::Continue(())
1058    }
1059
1060    /// Same as `set_sp` but does not check to see if `sp` is in-bounds. Should
1061    /// only be used with stack increment operations such as `pop`.
1062    fn set_sp_unchecked<T>(&mut self, sp: *mut T) {
1063        if cfg!(debug_assertions) {
1064            let sp_raw = sp as usize;
1065            let base = self.state.stack.base() as usize;
1066            let end = base + self.state.stack.len();
1067            assert!(base <= sp_raw && sp_raw <= end);
1068        }
1069        self.state[XReg::sp].set_ptr(sp);
1070    }
1071
1072    /// Loads a value of `T` using native-endian byte ordering from the `addr`
1073    /// specified.
1074    ///
1075    /// The `I` type parameter is the instruction issuing this load which is
1076    /// used in case of traps to calculate the trapping pc.
1077    ///
1078    /// Returns `ControlFlow::Break` if a trap happens or
1079    /// `ControlFlow::Continue` if the value was loaded successfully.
1080    ///
1081    /// # Unsafety
1082    ///
1083    /// Safety of this method relies on the safety of the original bytecode
1084    /// itself and correctly annotating both `T` and `I`.
1085    #[must_use]
1086    unsafe fn load_ne<T, I: Encode>(&mut self, addr: impl AddressingMode) -> ControlFlow<Done, T> {
1087        unsafe { addr.load_ne::<T, I>(self) }
1088    }
1089
1090    /// Stores a `val` to the `addr` specified.
1091    ///
1092    /// The `I` type parameter is the instruction issuing this store which is
1093    /// used in case of traps to calculate the trapping pc.
1094    ///
1095    /// Returns `ControlFlow::Break` if a trap happens or
1096    /// `ControlFlow::Continue` if the value was stored successfully.
1097    ///
1098    /// # Unsafety
1099    ///
1100    /// Safety of this method relies on the safety of the original bytecode
1101    /// itself and correctly annotating both `T` and `I`.
1102    #[must_use]
1103    unsafe fn store_ne<T, I: Encode>(
1104        &mut self,
1105        addr: impl AddressingMode,
1106        val: T,
1107    ) -> ControlFlow<Done> {
1108        unsafe { addr.store_ne::<T, I>(self, val) }
1109    }
1110
1111    fn check_xnn_from_fnn<I: Encode>(&mut self, val: f64, lo: f64, hi: f64) -> ControlFlow<Done> {
1112        if val != val {
1113            return self.done_trap_kind::<I>(Some(TrapKind::BadConversionToInteger));
1114        }
1115        let val = val.wasm_trunc();
1116        if val <= lo || val >= hi {
1117            return self.done_trap_kind::<I>(Some(TrapKind::IntegerOverflow));
1118        }
1119        ControlFlow::Continue(())
1120    }
1121
1122    fn get_i128(&self, lo: XReg, hi: XReg) -> i128 {
1123        let lo = self.state[lo].get_u64();
1124        let hi = self.state[hi].get_i64();
1125        i128::from(lo) | (i128::from(hi) << 64)
1126    }
1127
1128    fn set_i128(&mut self, lo: XReg, hi: XReg, val: i128) {
1129        self.state[lo].set_u64(val as u64);
1130        self.state[hi].set_u64((val >> 64) as u64);
1131    }
1132
1133    fn record_executing_pc_for_profiling(&mut self) {
1134        // Note that this is a no-op if `feature = "profile"` is disabled.
1135        self.executing_pc.record(self.pc.as_ptr().as_ptr() as usize);
1136    }
1137}
1138
1139/// Helper trait to encompass the various addressing modes of Pulley.
1140trait AddressingMode: Sized {
1141    /// Calculates the native host address `*mut T` corresponding to this
1142    /// addressing mode.
1143    ///
1144    /// # Safety
1145    ///
1146    /// Relies on the original bytecode being safe to execute as this will
1147    /// otherwise perform unsafe byte offsets for example which requires the
1148    /// original bytecode to be correct.
1149    #[must_use]
1150    unsafe fn addr<T, I: Encode>(self, i: &mut Interpreter<'_>) -> ControlFlow<Done, *mut T>;
1151
1152    /// Loads a value of `T` from this address, using native-endian byte order.
1153    ///
1154    /// For more information see [`Interpreter::load_ne`].
1155    #[must_use]
1156    unsafe fn load_ne<T, I: Encode>(self, i: &mut Interpreter<'_>) -> ControlFlow<Done, T> {
1157        let ret = unsafe { self.addr::<T, I>(i)?.read_unaligned() };
1158        ControlFlow::Continue(ret)
1159    }
1160
1161    /// Stores a `val` to this address, using native-endian byte order.
1162    ///
1163    /// For more information see [`Interpreter::store_ne`].
1164    #[must_use]
1165    unsafe fn store_ne<T, I: Encode>(self, i: &mut Interpreter<'_>, val: T) -> ControlFlow<Done> {
1166        unsafe {
1167            self.addr::<T, I>(i)?.write_unaligned(val);
1168        }
1169        ControlFlow::Continue(())
1170    }
1171}
1172
1173impl AddressingMode for AddrO32 {
1174    unsafe fn addr<T, I: Encode>(self, i: &mut Interpreter<'_>) -> ControlFlow<Done, *mut T> {
1175        // Note that this addressing mode cannot return `ControlFlow::Break`
1176        // which is intentional. It's expected that LLVM optimizes away any
1177        // branches callers have.
1178        unsafe {
1179            ControlFlow::Continue(
1180                i.state[self.addr]
1181                    .get_ptr::<T>()
1182                    .byte_offset(self.offset as isize),
1183            )
1184        }
1185    }
1186}
1187
1188impl AddressingMode for AddrZ {
1189    unsafe fn addr<T, I: Encode>(self, i: &mut Interpreter<'_>) -> ControlFlow<Done, *mut T> {
1190        // This addressing mode defines loading/storing to the null address as
1191        // a trap, but all other addresses are allowed.
1192        let host_addr = i.state[self.addr].get_ptr::<T>();
1193        if host_addr.is_null() {
1194            i.done_trap_kind::<I>(Some(TrapKind::MemoryOutOfBounds))?;
1195            unreachable!();
1196        }
1197        unsafe {
1198            let addr = host_addr.byte_offset(self.offset as isize);
1199            ControlFlow::Continue(addr)
1200        }
1201    }
1202}
1203
1204impl AddressingMode for AddrG32 {
1205    unsafe fn addr<T, I: Encode>(self, i: &mut Interpreter<'_>) -> ControlFlow<Done, *mut T> {
1206        // Test if `bound - offset - T` is less than the wasm address to
1207        // generate a trap. It's a guarantee of this instruction that these
1208        // subtractions don't overflow.
1209        let bound = i.state[self.host_heap_bound].get_u64() as usize;
1210        let offset = usize::from(self.offset);
1211        let wasm_addr = i.state[self.wasm_addr].get_u32() as usize;
1212        if wasm_addr > bound - offset - size_of::<T>() {
1213            i.done_trap_kind::<I>(Some(TrapKind::MemoryOutOfBounds))?;
1214            unreachable!();
1215        }
1216        unsafe {
1217            let addr = i.state[self.host_heap_base]
1218                .get_ptr::<T>()
1219                .byte_add(wasm_addr)
1220                .byte_add(offset);
1221            ControlFlow::Continue(addr)
1222        }
1223    }
1224}
1225
1226impl AddressingMode for AddrG32Bne {
1227    unsafe fn addr<T, I: Encode>(self, i: &mut Interpreter<'_>) -> ControlFlow<Done, *mut T> {
1228        // Same as `AddrG32` above except that the bound is loaded from memory.
1229        let bound = unsafe {
1230            *i.state[self.host_heap_bound_addr]
1231                .get_ptr::<usize>()
1232                .byte_add(usize::from(self.host_heap_bound_offset))
1233        };
1234        let wasm_addr = i.state[self.wasm_addr].get_u32() as usize;
1235        let offset = usize::from(self.offset);
1236        if wasm_addr > bound - offset - size_of::<T>() {
1237            i.done_trap_kind::<I>(Some(TrapKind::MemoryOutOfBounds))?;
1238            unreachable!();
1239        }
1240        unsafe {
1241            let addr = i.state[self.host_heap_base]
1242                .get_ptr::<T>()
1243                .byte_add(wasm_addr)
1244                .byte_add(offset);
1245            ControlFlow::Continue(addr)
1246        }
1247    }
1248}
1249
1250#[test]
1251fn simple_push_pop() {
1252    let mut state = MachineState::with_stack(16);
1253    let pc = ExecutingPc::default();
1254    unsafe {
1255        let mut bytecode = [0; 10];
1256        let mut i = Interpreter {
1257            state: &mut state,
1258            // this isn't actually read so just manufacture a dummy one
1259            pc: UnsafeBytecodeStream::new(NonNull::new(bytecode.as_mut_ptr().offset(4)).unwrap()),
1260            executing_pc: pc.as_ref(),
1261        };
1262        assert!(i.push::<crate::Ret, _>(0_i32).is_continue());
1263        assert_eq!(i.pop::<i32>(), 0_i32);
1264        assert!(i.push::<crate::Ret, _>(1_i32).is_continue());
1265        assert!(i.push::<crate::Ret, _>(2_i32).is_continue());
1266        assert!(i.push::<crate::Ret, _>(3_i32).is_continue());
1267        assert!(i.push::<crate::Ret, _>(4_i32).is_continue());
1268        assert!(i.push::<crate::Ret, _>(5_i32).is_break());
1269        assert!(i.push::<crate::Ret, _>(6_i32).is_break());
1270        assert_eq!(i.pop::<i32>(), 4_i32);
1271        assert_eq!(i.pop::<i32>(), 3_i32);
1272        assert_eq!(i.pop::<i32>(), 2_i32);
1273        assert_eq!(i.pop::<i32>(), 1_i32);
1274    }
1275}
1276
1277macro_rules! br_if_imm {
1278    ($(
1279        fn $snake:ident(&mut self, a: XReg, b: $imm:ident, offset: PcRelOffset)
1280            = $camel:ident / $op:tt / $get:ident;
1281    )*) => {$(
1282        fn $snake(&mut self, a: XReg, b: $imm, offset: PcRelOffset) -> ControlFlow<Done> {
1283            let a = self.state[a].$get();
1284            if a $op b.into() {
1285                self.pc_rel_jump::<crate::$camel>(offset)
1286            } else {
1287                ControlFlow::Continue(())
1288            }
1289        }
1290    )*};
1291}
1292
1293impl OpVisitor for Interpreter<'_> {
1294    type BytecodeStream = UnsafeBytecodeStream;
1295    type Return = ControlFlow<Done>;
1296
1297    fn bytecode(&mut self) -> &mut UnsafeBytecodeStream {
1298        &mut self.pc
1299    }
1300
1301    fn ret(&mut self) -> ControlFlow<Done> {
1302        let lr = self.state.lr;
1303        if lr == HOST_RETURN_ADDR {
1304            self.done_return_to_host()
1305        } else {
1306            self.pc = unsafe { UnsafeBytecodeStream::new(NonNull::new_unchecked(lr)) };
1307            ControlFlow::Continue(())
1308        }
1309    }
1310
1311    fn call(&mut self, offset: PcRelOffset) -> ControlFlow<Done> {
1312        let return_addr = self.pc.as_ptr();
1313        self.state.lr = return_addr.as_ptr();
1314        self.pc_rel_jump::<crate::Call>(offset)
1315    }
1316
1317    fn call1(&mut self, arg1: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1318        let return_addr = self.pc.as_ptr();
1319        self.state.lr = return_addr.as_ptr();
1320        self.state[XReg::x0] = self.state[arg1];
1321        self.pc_rel_jump::<crate::Call1>(offset)
1322    }
1323
1324    fn call2(&mut self, arg1: XReg, arg2: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1325        let return_addr = self.pc.as_ptr();
1326        self.state.lr = return_addr.as_ptr();
1327        let (x0, x1) = (self.state[arg1], self.state[arg2]);
1328        self.state[XReg::x0] = x0;
1329        self.state[XReg::x1] = x1;
1330        self.pc_rel_jump::<crate::Call2>(offset)
1331    }
1332
1333    fn call3(
1334        &mut self,
1335        arg1: XReg,
1336        arg2: XReg,
1337        arg3: XReg,
1338        offset: PcRelOffset,
1339    ) -> ControlFlow<Done> {
1340        let return_addr = self.pc.as_ptr();
1341        self.state.lr = return_addr.as_ptr();
1342        let (x0, x1, x2) = (self.state[arg1], self.state[arg2], self.state[arg3]);
1343        self.state[XReg::x0] = x0;
1344        self.state[XReg::x1] = x1;
1345        self.state[XReg::x2] = x2;
1346        self.pc_rel_jump::<crate::Call3>(offset)
1347    }
1348
1349    fn call4(
1350        &mut self,
1351        arg1: XReg,
1352        arg2: XReg,
1353        arg3: XReg,
1354        arg4: XReg,
1355        offset: PcRelOffset,
1356    ) -> ControlFlow<Done> {
1357        let return_addr = self.pc.as_ptr();
1358        self.state.lr = return_addr.as_ptr();
1359        let (x0, x1, x2, x3) = (
1360            self.state[arg1],
1361            self.state[arg2],
1362            self.state[arg3],
1363            self.state[arg4],
1364        );
1365        self.state[XReg::x0] = x0;
1366        self.state[XReg::x1] = x1;
1367        self.state[XReg::x2] = x2;
1368        self.state[XReg::x3] = x3;
1369        self.pc_rel_jump::<crate::Call4>(offset)
1370    }
1371
1372    fn call_indirect(&mut self, dst: XReg) -> ControlFlow<Done> {
1373        let return_addr = self.pc.as_ptr();
1374        self.state.lr = return_addr.as_ptr();
1375        // SAFETY: part of the unsafe contract of the interpreter is only valid
1376        // bytecode is interpreted, so the jump destination is part of the validity
1377        // of the bytecode itself.
1378        unsafe {
1379            self.pc = UnsafeBytecodeStream::new(NonNull::new_unchecked(self.state[dst].get_ptr()));
1380        }
1381        ControlFlow::Continue(())
1382    }
1383
1384    fn jump(&mut self, offset: PcRelOffset) -> ControlFlow<Done> {
1385        self.pc_rel_jump::<crate::Jump>(offset)
1386    }
1387
1388    fn xjump(&mut self, reg: XReg) -> ControlFlow<Done> {
1389        unsafe {
1390            self.pc = UnsafeBytecodeStream::new(NonNull::new_unchecked(self.state[reg].get_ptr()));
1391        }
1392        ControlFlow::Continue(())
1393    }
1394
1395    fn br_if32(&mut self, cond: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1396        let cond = self.state[cond].get_u32();
1397        if cond != 0 {
1398            self.pc_rel_jump::<crate::BrIf>(offset)
1399        } else {
1400            ControlFlow::Continue(())
1401        }
1402    }
1403
1404    fn br_if_not32(&mut self, cond: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1405        let cond = self.state[cond].get_u32();
1406        if cond == 0 {
1407            self.pc_rel_jump::<crate::BrIfNot>(offset)
1408        } else {
1409            ControlFlow::Continue(())
1410        }
1411    }
1412
1413    fn br_if_xeq32(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1414        let a = self.state[a].get_u32();
1415        let b = self.state[b].get_u32();
1416        if a == b {
1417            self.pc_rel_jump::<crate::BrIfXeq32>(offset)
1418        } else {
1419            ControlFlow::Continue(())
1420        }
1421    }
1422
1423    fn br_if_xneq32(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1424        let a = self.state[a].get_u32();
1425        let b = self.state[b].get_u32();
1426        if a != b {
1427            self.pc_rel_jump::<crate::BrIfXneq32>(offset)
1428        } else {
1429            ControlFlow::Continue(())
1430        }
1431    }
1432
1433    fn br_if_xslt32(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1434        let a = self.state[a].get_i32();
1435        let b = self.state[b].get_i32();
1436        if a < b {
1437            self.pc_rel_jump::<crate::BrIfXslt32>(offset)
1438        } else {
1439            ControlFlow::Continue(())
1440        }
1441    }
1442
1443    fn br_if_xslteq32(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1444        let a = self.state[a].get_i32();
1445        let b = self.state[b].get_i32();
1446        if a <= b {
1447            self.pc_rel_jump::<crate::BrIfXslteq32>(offset)
1448        } else {
1449            ControlFlow::Continue(())
1450        }
1451    }
1452
1453    fn br_if_xult32(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1454        let a = self.state[a].get_u32();
1455        let b = self.state[b].get_u32();
1456        if a < b {
1457            self.pc_rel_jump::<crate::BrIfXult32>(offset)
1458        } else {
1459            ControlFlow::Continue(())
1460        }
1461    }
1462
1463    fn br_if_xulteq32(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1464        let a = self.state[a].get_u32();
1465        let b = self.state[b].get_u32();
1466        if a <= b {
1467            self.pc_rel_jump::<crate::BrIfXulteq32>(offset)
1468        } else {
1469            ControlFlow::Continue(())
1470        }
1471    }
1472
1473    fn br_if_xeq64(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1474        let a = self.state[a].get_u64();
1475        let b = self.state[b].get_u64();
1476        if a == b {
1477            self.pc_rel_jump::<crate::BrIfXeq64>(offset)
1478        } else {
1479            ControlFlow::Continue(())
1480        }
1481    }
1482
1483    fn br_if_xneq64(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1484        let a = self.state[a].get_u64();
1485        let b = self.state[b].get_u64();
1486        if a != b {
1487            self.pc_rel_jump::<crate::BrIfXneq64>(offset)
1488        } else {
1489            ControlFlow::Continue(())
1490        }
1491    }
1492
1493    fn br_if_xslt64(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1494        let a = self.state[a].get_i64();
1495        let b = self.state[b].get_i64();
1496        if a < b {
1497            self.pc_rel_jump::<crate::BrIfXslt64>(offset)
1498        } else {
1499            ControlFlow::Continue(())
1500        }
1501    }
1502
1503    fn br_if_xslteq64(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1504        let a = self.state[a].get_i64();
1505        let b = self.state[b].get_i64();
1506        if a <= b {
1507            self.pc_rel_jump::<crate::BrIfXslteq64>(offset)
1508        } else {
1509            ControlFlow::Continue(())
1510        }
1511    }
1512
1513    fn br_if_xult64(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1514        let a = self.state[a].get_u64();
1515        let b = self.state[b].get_u64();
1516        if a < b {
1517            self.pc_rel_jump::<crate::BrIfXult64>(offset)
1518        } else {
1519            ControlFlow::Continue(())
1520        }
1521    }
1522
1523    fn br_if_xulteq64(&mut self, a: XReg, b: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
1524        let a = self.state[a].get_u64();
1525        let b = self.state[b].get_u64();
1526        if a <= b {
1527            self.pc_rel_jump::<crate::BrIfXulteq64>(offset)
1528        } else {
1529            ControlFlow::Continue(())
1530        }
1531    }
1532
1533    br_if_imm! {
1534        fn br_if_xeq32_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1535            = BrIfXeq32I8 / == / get_i32;
1536        fn br_if_xeq32_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1537            = BrIfXeq32I32 / == / get_i32;
1538        fn br_if_xneq32_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1539            = BrIfXneq32I8 / != / get_i32;
1540        fn br_if_xneq32_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1541            = BrIfXneq32I32 / != / get_i32;
1542
1543        fn br_if_xslt32_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1544            = BrIfXslt32I8 / < / get_i32;
1545        fn br_if_xslt32_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1546            = BrIfXslt32I32 / < / get_i32;
1547        fn br_if_xsgt32_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1548            = BrIfXsgt32I8 / > / get_i32;
1549        fn br_if_xsgt32_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1550            = BrIfXsgt32I32 / > / get_i32;
1551        fn br_if_xslteq32_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1552            = BrIfXslteq32I8 / <= / get_i32;
1553        fn br_if_xslteq32_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1554            = BrIfXslteq32I32 / <= / get_i32;
1555        fn br_if_xsgteq32_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1556            = BrIfXsgteq32I8 / >= / get_i32;
1557        fn br_if_xsgteq32_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1558            = BrIfXsgteq32I32 / >= / get_i32;
1559
1560        fn br_if_xult32_u8(&mut self, a: XReg, b: u8, offset: PcRelOffset)
1561            = BrIfXult32U8 / < / get_u32;
1562        fn br_if_xult32_u32(&mut self, a: XReg, b: u32, offset: PcRelOffset)
1563            = BrIfXult32U32 / < / get_u32;
1564        fn br_if_xugt32_u8(&mut self, a: XReg, b: u8, offset: PcRelOffset)
1565            = BrIfXugt32U8 / > / get_u32;
1566        fn br_if_xugt32_u32(&mut self, a: XReg, b: u32, offset: PcRelOffset)
1567            = BrIfXugt32U32 / > / get_u32;
1568        fn br_if_xulteq32_u8(&mut self, a: XReg, b: u8, offset: PcRelOffset)
1569            = BrIfXulteq32U8 / <= / get_u32;
1570        fn br_if_xulteq32_u32(&mut self, a: XReg, b: u32, offset: PcRelOffset)
1571            = BrIfXulteq32U32 / <= / get_u32;
1572        fn br_if_xugteq32_u8(&mut self, a: XReg, b: u8, offset: PcRelOffset)
1573            = BrIfXugteq32U8 / >= / get_u32;
1574        fn br_if_xugteq32_u32(&mut self, a: XReg, b: u32, offset: PcRelOffset)
1575            = BrIfXugteq32U32 / >= / get_u32;
1576
1577        fn br_if_xeq64_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1578            = BrIfXeq64I8 / == / get_i64;
1579        fn br_if_xeq64_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1580            = BrIfXeq64I32 / == / get_i64;
1581        fn br_if_xneq64_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1582            = BrIfXneq64I8 / != / get_i64;
1583        fn br_if_xneq64_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1584            = BrIfXneq64I32 / != / get_i64;
1585
1586        fn br_if_xslt64_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1587            = BrIfXslt64I8 / < / get_i64;
1588        fn br_if_xslt64_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1589            = BrIfXslt64I32 / < / get_i64;
1590        fn br_if_xsgt64_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1591            = BrIfXsgt64I8 / > / get_i64;
1592        fn br_if_xsgt64_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1593            = BrIfXsgt64I32 / > / get_i64;
1594        fn br_if_xslteq64_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1595            = BrIfXslteq64I8 / <= / get_i64;
1596        fn br_if_xslteq64_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1597            = BrIfXslteq64I32 / <= / get_i64;
1598        fn br_if_xsgteq64_i8(&mut self, a: XReg, b: i8, offset: PcRelOffset)
1599            = BrIfXsgteq64I8 / >= / get_i64;
1600        fn br_if_xsgteq64_i32(&mut self, a: XReg, b: i32, offset: PcRelOffset)
1601            = BrIfXsgteq64I32 / >= / get_i64;
1602
1603        fn br_if_xult64_u8(&mut self, a: XReg, b: u8, offset: PcRelOffset)
1604            = BrIfXult64U8 / < / get_u64;
1605        fn br_if_xult64_u32(&mut self, a: XReg, b: u32, offset: PcRelOffset)
1606            = BrIfXult64U32 / < / get_u64;
1607        fn br_if_xugt64_u8(&mut self, a: XReg, b: u8, offset: PcRelOffset)
1608            = BrIfXugt64U8 / > / get_u64;
1609        fn br_if_xugt64_u32(&mut self, a: XReg, b: u32, offset: PcRelOffset)
1610            = BrIfXugt64U32 / > / get_u64;
1611        fn br_if_xulteq64_u8(&mut self, a: XReg, b: u8, offset: PcRelOffset)
1612            = BrIfXulteq64U8 / <= / get_u64;
1613        fn br_if_xulteq64_u32(&mut self, a: XReg, b: u32, offset: PcRelOffset)
1614            = BrIfXulteq64U32 / <= / get_u64;
1615        fn br_if_xugteq64_u8(&mut self, a: XReg, b: u8, offset: PcRelOffset)
1616            = BrIfXugteq64U8 / >= / get_u64;
1617        fn br_if_xugteq64_u32(&mut self, a: XReg, b: u32, offset: PcRelOffset)
1618            = BrIfXugteq64U32 / >= / get_u64;
1619    }
1620
1621    fn xmov(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
1622        let val = self.state[src];
1623        self.state[dst] = val;
1624        ControlFlow::Continue(())
1625    }
1626
1627    fn xconst8(&mut self, dst: XReg, imm: i8) -> ControlFlow<Done> {
1628        self.state[dst].set_i64(i64::from(imm));
1629        ControlFlow::Continue(())
1630    }
1631
1632    fn xzero(&mut self, dst: XReg) -> ControlFlow<Done> {
1633        self.state[dst].set_i64(0);
1634        ControlFlow::Continue(())
1635    }
1636
1637    fn xone(&mut self, dst: XReg) -> ControlFlow<Done> {
1638        self.state[dst].set_i64(1);
1639        ControlFlow::Continue(())
1640    }
1641
1642    fn xconst16(&mut self, dst: XReg, imm: i16) -> ControlFlow<Done> {
1643        self.state[dst].set_i64(i64::from(imm));
1644        ControlFlow::Continue(())
1645    }
1646
1647    fn xconst32(&mut self, dst: XReg, imm: i32) -> ControlFlow<Done> {
1648        self.state[dst].set_i64(i64::from(imm));
1649        ControlFlow::Continue(())
1650    }
1651
1652    fn xconst64(&mut self, dst: XReg, imm: i64) -> ControlFlow<Done> {
1653        self.state[dst].set_i64(imm);
1654        ControlFlow::Continue(())
1655    }
1656
1657    fn xadd32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1658        let a = self.state[operands.src1].get_u32();
1659        let b = self.state[operands.src2].get_u32();
1660        self.state[operands.dst].set_u32(a.wrapping_add(b));
1661        ControlFlow::Continue(())
1662    }
1663
1664    fn xadd32_u8(&mut self, dst: XReg, src1: XReg, src2: u8) -> ControlFlow<Done> {
1665        self.xadd32_u32(dst, src1, src2.into())
1666    }
1667
1668    fn xadd32_u32(&mut self, dst: XReg, src1: XReg, src2: u32) -> ControlFlow<Done> {
1669        let a = self.state[src1].get_u32();
1670        self.state[dst].set_u32(a.wrapping_add(src2.into()));
1671        ControlFlow::Continue(())
1672    }
1673
1674    fn xadd64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1675        let a = self.state[operands.src1].get_u64();
1676        let b = self.state[operands.src2].get_u64();
1677        self.state[operands.dst].set_u64(a.wrapping_add(b));
1678        ControlFlow::Continue(())
1679    }
1680
1681    fn xadd64_u8(&mut self, dst: XReg, src1: XReg, src2: u8) -> ControlFlow<Done> {
1682        self.xadd64_u32(dst, src1, src2.into())
1683    }
1684
1685    fn xadd64_u32(&mut self, dst: XReg, src1: XReg, src2: u32) -> ControlFlow<Done> {
1686        let a = self.state[src1].get_u64();
1687        self.state[dst].set_u64(a.wrapping_add(src2.into()));
1688        ControlFlow::Continue(())
1689    }
1690
1691    fn xmadd32(&mut self, dst: XReg, src1: XReg, src2: XReg, src3: XReg) -> ControlFlow<Done> {
1692        let a = self.state[src1].get_u32();
1693        let b = self.state[src2].get_u32();
1694        let c = self.state[src3].get_u32();
1695        self.state[dst].set_u32(a.wrapping_mul(b).wrapping_add(c));
1696        ControlFlow::Continue(())
1697    }
1698
1699    fn xmadd64(&mut self, dst: XReg, src1: XReg, src2: XReg, src3: XReg) -> ControlFlow<Done> {
1700        let a = self.state[src1].get_u64();
1701        let b = self.state[src2].get_u64();
1702        let c = self.state[src3].get_u64();
1703        self.state[dst].set_u64(a.wrapping_mul(b).wrapping_add(c));
1704        ControlFlow::Continue(())
1705    }
1706
1707    fn xsub32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1708        let a = self.state[operands.src1].get_u32();
1709        let b = self.state[operands.src2].get_u32();
1710        self.state[operands.dst].set_u32(a.wrapping_sub(b));
1711        ControlFlow::Continue(())
1712    }
1713
1714    fn xsub32_u8(&mut self, dst: XReg, src1: XReg, src2: u8) -> ControlFlow<Done> {
1715        self.xsub32_u32(dst, src1, src2.into())
1716    }
1717
1718    fn xsub32_u32(&mut self, dst: XReg, src1: XReg, src2: u32) -> ControlFlow<Done> {
1719        let a = self.state[src1].get_u32();
1720        self.state[dst].set_u32(a.wrapping_sub(src2.into()));
1721        ControlFlow::Continue(())
1722    }
1723
1724    fn xsub64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1725        let a = self.state[operands.src1].get_u64();
1726        let b = self.state[operands.src2].get_u64();
1727        self.state[operands.dst].set_u64(a.wrapping_sub(b));
1728        ControlFlow::Continue(())
1729    }
1730
1731    fn xsub64_u8(&mut self, dst: XReg, src1: XReg, src2: u8) -> ControlFlow<Done> {
1732        self.xsub64_u32(dst, src1, src2.into())
1733    }
1734
1735    fn xsub64_u32(&mut self, dst: XReg, src1: XReg, src2: u32) -> ControlFlow<Done> {
1736        let a = self.state[src1].get_u64();
1737        self.state[dst].set_u64(a.wrapping_sub(src2.into()));
1738        ControlFlow::Continue(())
1739    }
1740
1741    fn xmul32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1742        let a = self.state[operands.src1].get_u32();
1743        let b = self.state[operands.src2].get_u32();
1744        self.state[operands.dst].set_u32(a.wrapping_mul(b));
1745        ControlFlow::Continue(())
1746    }
1747
1748    fn xmul32_s8(&mut self, dst: XReg, src1: XReg, src2: i8) -> ControlFlow<Done> {
1749        self.xmul32_s32(dst, src1, src2.into())
1750    }
1751
1752    fn xmul32_s32(&mut self, dst: XReg, src1: XReg, src2: i32) -> ControlFlow<Done> {
1753        let a = self.state[src1].get_i32();
1754        self.state[dst].set_i32(a.wrapping_mul(src2));
1755        ControlFlow::Continue(())
1756    }
1757
1758    fn xmul64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1759        let a = self.state[operands.src1].get_u64();
1760        let b = self.state[operands.src2].get_u64();
1761        self.state[operands.dst].set_u64(a.wrapping_mul(b));
1762        ControlFlow::Continue(())
1763    }
1764
1765    fn xmul64_s8(&mut self, dst: XReg, src1: XReg, src2: i8) -> ControlFlow<Done> {
1766        self.xmul64_s32(dst, src1, src2.into())
1767    }
1768
1769    fn xmul64_s32(&mut self, dst: XReg, src1: XReg, src2: i32) -> ControlFlow<Done> {
1770        let a = self.state[src1].get_i64();
1771        self.state[dst].set_i64(a.wrapping_mul(src2.into()));
1772        ControlFlow::Continue(())
1773    }
1774
1775    fn xshl32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1776        let a = self.state[operands.src1].get_u32();
1777        let b = self.state[operands.src2].get_u32();
1778        self.state[operands.dst].set_u32(a.wrapping_shl(b));
1779        ControlFlow::Continue(())
1780    }
1781
1782    fn xshr32_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1783        let a = self.state[operands.src1].get_u32();
1784        let b = self.state[operands.src2].get_u32();
1785        self.state[operands.dst].set_u32(a.wrapping_shr(b));
1786        ControlFlow::Continue(())
1787    }
1788
1789    fn xshr32_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1790        let a = self.state[operands.src1].get_i32();
1791        let b = self.state[operands.src2].get_u32();
1792        self.state[operands.dst].set_i32(a.wrapping_shr(b));
1793        ControlFlow::Continue(())
1794    }
1795
1796    fn xshl64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1797        let a = self.state[operands.src1].get_u64();
1798        let b = self.state[operands.src2].get_u32();
1799        self.state[operands.dst].set_u64(a.wrapping_shl(b));
1800        ControlFlow::Continue(())
1801    }
1802
1803    fn xshr64_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1804        let a = self.state[operands.src1].get_u64();
1805        let b = self.state[operands.src2].get_u32();
1806        self.state[operands.dst].set_u64(a.wrapping_shr(b));
1807        ControlFlow::Continue(())
1808    }
1809
1810    fn xshr64_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1811        let a = self.state[operands.src1].get_i64();
1812        let b = self.state[operands.src2].get_u32();
1813        self.state[operands.dst].set_i64(a.wrapping_shr(b));
1814        ControlFlow::Continue(())
1815    }
1816
1817    fn xshl32_u6(&mut self, operands: BinaryOperands<XReg, XReg, U6>) -> ControlFlow<Done> {
1818        let a = self.state[operands.src1].get_u32();
1819        let b = u32::from(u8::from(operands.src2));
1820        self.state[operands.dst].set_u32(a.wrapping_shl(b));
1821        ControlFlow::Continue(())
1822    }
1823
1824    fn xshr32_u_u6(&mut self, operands: BinaryOperands<XReg, XReg, U6>) -> ControlFlow<Done> {
1825        let a = self.state[operands.src1].get_u32();
1826        let b = u32::from(u8::from(operands.src2));
1827        self.state[operands.dst].set_u32(a.wrapping_shr(b));
1828        ControlFlow::Continue(())
1829    }
1830
1831    fn xshr32_s_u6(&mut self, operands: BinaryOperands<XReg, XReg, U6>) -> ControlFlow<Done> {
1832        let a = self.state[operands.src1].get_i32();
1833        let b = u32::from(u8::from(operands.src2));
1834        self.state[operands.dst].set_i32(a.wrapping_shr(b));
1835        ControlFlow::Continue(())
1836    }
1837
1838    fn xshl64_u6(&mut self, operands: BinaryOperands<XReg, XReg, U6>) -> ControlFlow<Done> {
1839        let a = self.state[operands.src1].get_u64();
1840        let b = u32::from(u8::from(operands.src2));
1841        self.state[operands.dst].set_u64(a.wrapping_shl(b));
1842        ControlFlow::Continue(())
1843    }
1844
1845    fn xshr64_u_u6(&mut self, operands: BinaryOperands<XReg, XReg, U6>) -> ControlFlow<Done> {
1846        let a = self.state[operands.src1].get_u64();
1847        let b = u32::from(u8::from(operands.src2));
1848        self.state[operands.dst].set_u64(a.wrapping_shr(b));
1849        ControlFlow::Continue(())
1850    }
1851
1852    fn xshr64_s_u6(&mut self, operands: BinaryOperands<XReg, XReg, U6>) -> ControlFlow<Done> {
1853        let a = self.state[operands.src1].get_i64();
1854        let b = u32::from(u8::from(operands.src2));
1855        self.state[operands.dst].set_i64(a.wrapping_shr(b));
1856        ControlFlow::Continue(())
1857    }
1858
1859    fn xneg32(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
1860        let a = self.state[src].get_i32();
1861        self.state[dst].set_i32(a.wrapping_neg());
1862        ControlFlow::Continue(())
1863    }
1864
1865    fn xneg64(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
1866        let a = self.state[src].get_i64();
1867        self.state[dst].set_i64(a.wrapping_neg());
1868        ControlFlow::Continue(())
1869    }
1870
1871    fn xeq64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1872        let a = self.state[operands.src1].get_u64();
1873        let b = self.state[operands.src2].get_u64();
1874        self.state[operands.dst].set_u32(u32::from(a == b));
1875        ControlFlow::Continue(())
1876    }
1877
1878    fn xneq64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1879        let a = self.state[operands.src1].get_u64();
1880        let b = self.state[operands.src2].get_u64();
1881        self.state[operands.dst].set_u32(u32::from(a != b));
1882        ControlFlow::Continue(())
1883    }
1884
1885    fn xslt64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1886        let a = self.state[operands.src1].get_i64();
1887        let b = self.state[operands.src2].get_i64();
1888        self.state[operands.dst].set_u32(u32::from(a < b));
1889        ControlFlow::Continue(())
1890    }
1891
1892    fn xslteq64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1893        let a = self.state[operands.src1].get_i64();
1894        let b = self.state[operands.src2].get_i64();
1895        self.state[operands.dst].set_u32(u32::from(a <= b));
1896        ControlFlow::Continue(())
1897    }
1898
1899    fn xult64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1900        let a = self.state[operands.src1].get_u64();
1901        let b = self.state[operands.src2].get_u64();
1902        self.state[operands.dst].set_u32(u32::from(a < b));
1903        ControlFlow::Continue(())
1904    }
1905
1906    fn xulteq64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1907        let a = self.state[operands.src1].get_u64();
1908        let b = self.state[operands.src2].get_u64();
1909        self.state[operands.dst].set_u32(u32::from(a <= b));
1910        ControlFlow::Continue(())
1911    }
1912
1913    fn xeq32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1914        let a = self.state[operands.src1].get_u32();
1915        let b = self.state[operands.src2].get_u32();
1916        self.state[operands.dst].set_u32(u32::from(a == b));
1917        ControlFlow::Continue(())
1918    }
1919
1920    fn xneq32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1921        let a = self.state[operands.src1].get_u32();
1922        let b = self.state[operands.src2].get_u32();
1923        self.state[operands.dst].set_u32(u32::from(a != b));
1924        ControlFlow::Continue(())
1925    }
1926
1927    fn xslt32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1928        let a = self.state[operands.src1].get_i32();
1929        let b = self.state[operands.src2].get_i32();
1930        self.state[operands.dst].set_u32(u32::from(a < b));
1931        ControlFlow::Continue(())
1932    }
1933
1934    fn xslteq32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1935        let a = self.state[operands.src1].get_i32();
1936        let b = self.state[operands.src2].get_i32();
1937        self.state[operands.dst].set_u32(u32::from(a <= b));
1938        ControlFlow::Continue(())
1939    }
1940
1941    fn xult32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1942        let a = self.state[operands.src1].get_u32();
1943        let b = self.state[operands.src2].get_u32();
1944        self.state[operands.dst].set_u32(u32::from(a < b));
1945        ControlFlow::Continue(())
1946    }
1947
1948    fn xulteq32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
1949        let a = self.state[operands.src1].get_u32();
1950        let b = self.state[operands.src2].get_u32();
1951        self.state[operands.dst].set_u32(u32::from(a <= b));
1952        ControlFlow::Continue(())
1953    }
1954
1955    fn push_frame(&mut self) -> ControlFlow<Done> {
1956        self.push::<crate::PushFrame, _>(self.state.lr)?;
1957        self.push::<crate::PushFrame, _>(self.state.fp)?;
1958        self.state.fp = self.state[XReg::sp].get_ptr();
1959        ControlFlow::Continue(())
1960    }
1961
1962    #[inline]
1963    fn push_frame_save(&mut self, amt: u16, regs: UpperRegSet<XReg>) -> ControlFlow<Done> {
1964        // Decrement the stack pointer `amt` bytes plus 2 pointers more for
1965        // fp/lr.
1966        let ptr_size = size_of::<usize>();
1967        let full_amt = usize::from(amt) + 2 * ptr_size;
1968        let new_sp = self.state[XReg::sp].get_ptr::<u8>().wrapping_sub(full_amt);
1969        self.set_sp::<crate::PushFrameSave>(new_sp)?;
1970
1971        unsafe {
1972            // Emulate `push_frame` by placing `lr` and `fp` onto the stack, in
1973            // that order, at the top of the allocated area.
1974            self.store_ne::<_, crate::PushFrameSave>(
1975                AddrO32 {
1976                    addr: XReg::sp,
1977                    offset: (full_amt - 1 * ptr_size) as i32,
1978                },
1979                self.state.lr,
1980            )?;
1981            self.store_ne::<_, crate::PushFrameSave>(
1982                AddrO32 {
1983                    addr: XReg::sp,
1984                    offset: (full_amt - 2 * ptr_size) as i32,
1985                },
1986                self.state.fp,
1987            )?;
1988
1989            // Set `fp` to the top of our frame, where `fp` is stored.
1990            let mut offset = amt as i32;
1991            self.state.fp = self.state[XReg::sp]
1992                .get_ptr::<u8>()
1993                .byte_offset(offset as isize);
1994
1995            // Next save any registers in `regs` to the stack.
1996            for reg in regs {
1997                offset -= 8;
1998                self.store_ne::<_, crate::PushFrameSave>(
1999                    AddrO32 {
2000                        addr: XReg::sp,
2001                        offset,
2002                    },
2003                    self.state[reg].get_u64(),
2004                )?;
2005            }
2006        }
2007        ControlFlow::Continue(())
2008    }
2009
2010    fn pop_frame_restore(&mut self, amt: u16, regs: UpperRegSet<XReg>) -> ControlFlow<Done> {
2011        // Restore all registers in `regs`, followed by the normal `pop_frame`
2012        // opcode below to restore fp/lr.
2013        unsafe {
2014            let mut offset = i32::from(amt);
2015            for reg in regs {
2016                offset -= 8;
2017                let val = self.load_ne::<_, crate::PopFrameRestore>(AddrO32 {
2018                    addr: XReg::sp,
2019                    offset,
2020                })?;
2021                self.state[reg].set_u64(val);
2022            }
2023        }
2024        self.pop_frame()
2025    }
2026
2027    fn pop_frame(&mut self) -> ControlFlow<Done> {
2028        self.set_sp_unchecked(self.state.fp);
2029        let fp = self.pop();
2030        let lr = self.pop();
2031        self.state.fp = fp;
2032        self.state.lr = lr;
2033        ControlFlow::Continue(())
2034    }
2035
2036    fn br_table32(&mut self, idx: XReg, amt: u32) -> ControlFlow<Done> {
2037        let idx = self.state[idx].get_u32().min(amt - 1) as isize;
2038        // SAFETY: part of the contract of the interpreter is only dealing with
2039        // valid bytecode, so this offset should be safe.
2040        self.pc = unsafe { self.pc.offset(idx * 4) };
2041
2042        // Decode the `PcRelOffset` without tampering with `self.pc` as the
2043        // jump is relative to `self.pc`.
2044        let mut tmp = self.pc;
2045        let Ok(rel) = PcRelOffset::decode(&mut tmp);
2046        let offset = isize::try_from(i32::from(rel)).unwrap();
2047        self.pc = unsafe { self.pc.offset(offset) };
2048        ControlFlow::Continue(())
2049    }
2050
2051    fn stack_alloc32(&mut self, amt: u32) -> ControlFlow<Done> {
2052        let amt = usize::try_from(amt).unwrap();
2053        let new_sp = self.state[XReg::sp].get_ptr::<u8>().wrapping_sub(amt);
2054        self.set_sp::<crate::StackAlloc32>(new_sp)?;
2055        ControlFlow::Continue(())
2056    }
2057
2058    fn stack_free32(&mut self, amt: u32) -> ControlFlow<Done> {
2059        let amt = usize::try_from(amt).unwrap();
2060        let new_sp = self.state[XReg::sp].get_ptr::<u8>().wrapping_add(amt);
2061        self.set_sp_unchecked(new_sp);
2062        ControlFlow::Continue(())
2063    }
2064
2065    fn zext8(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2066        let src = self.state[src].get_u64() as u8;
2067        self.state[dst].set_u64(src.into());
2068        ControlFlow::Continue(())
2069    }
2070
2071    fn zext16(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2072        let src = self.state[src].get_u64() as u16;
2073        self.state[dst].set_u64(src.into());
2074        ControlFlow::Continue(())
2075    }
2076
2077    fn zext32(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2078        let src = self.state[src].get_u64() as u32;
2079        self.state[dst].set_u64(src.into());
2080        ControlFlow::Continue(())
2081    }
2082
2083    fn sext8(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2084        let src = self.state[src].get_i64() as i8;
2085        self.state[dst].set_i64(src.into());
2086        ControlFlow::Continue(())
2087    }
2088
2089    fn sext16(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2090        let src = self.state[src].get_i64() as i16;
2091        self.state[dst].set_i64(src.into());
2092        ControlFlow::Continue(())
2093    }
2094
2095    fn sext32(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2096        let src = self.state[src].get_i64() as i32;
2097        self.state[dst].set_i64(src.into());
2098        ControlFlow::Continue(())
2099    }
2100
2101    fn xdiv32_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2102        let a = self.state[operands.src1].get_i32();
2103        let b = self.state[operands.src2].get_i32();
2104        match a.checked_div(b) {
2105            Some(result) => {
2106                self.state[operands.dst].set_i32(result);
2107                ControlFlow::Continue(())
2108            }
2109            None => {
2110                let kind = if b == 0 {
2111                    TrapKind::DivideByZero
2112                } else {
2113                    TrapKind::IntegerOverflow
2114                };
2115                self.done_trap_kind::<crate::XDiv32S>(Some(kind))
2116            }
2117        }
2118    }
2119
2120    fn xdiv64_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2121        let a = self.state[operands.src1].get_i64();
2122        let b = self.state[operands.src2].get_i64();
2123        match a.checked_div(b) {
2124            Some(result) => {
2125                self.state[operands.dst].set_i64(result);
2126                ControlFlow::Continue(())
2127            }
2128            None => {
2129                let kind = if b == 0 {
2130                    TrapKind::DivideByZero
2131                } else {
2132                    TrapKind::IntegerOverflow
2133                };
2134                self.done_trap_kind::<crate::XDiv64S>(Some(kind))
2135            }
2136        }
2137    }
2138
2139    fn xdiv32_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2140        let a = self.state[operands.src1].get_u32();
2141        let b = self.state[operands.src2].get_u32();
2142        match a.checked_div(b) {
2143            Some(result) => {
2144                self.state[operands.dst].set_u32(result);
2145                ControlFlow::Continue(())
2146            }
2147            None => self.done_trap_kind::<crate::XDiv64U>(Some(TrapKind::DivideByZero)),
2148        }
2149    }
2150
2151    fn xdiv64_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2152        let a = self.state[operands.src1].get_u64();
2153        let b = self.state[operands.src2].get_u64();
2154        match a.checked_div(b) {
2155            Some(result) => {
2156                self.state[operands.dst].set_u64(result);
2157                ControlFlow::Continue(())
2158            }
2159            None => self.done_trap_kind::<crate::XDiv64U>(Some(TrapKind::DivideByZero)),
2160        }
2161    }
2162
2163    fn xrem32_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2164        let a = self.state[operands.src1].get_i32();
2165        let b = self.state[operands.src2].get_i32();
2166        let result = if a == i32::MIN && b == -1 {
2167            Some(0)
2168        } else {
2169            a.checked_rem(b)
2170        };
2171        match result {
2172            Some(result) => {
2173                self.state[operands.dst].set_i32(result);
2174                ControlFlow::Continue(())
2175            }
2176            None => self.done_trap_kind::<crate::XRem32S>(Some(TrapKind::DivideByZero)),
2177        }
2178    }
2179
2180    fn xrem64_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2181        let a = self.state[operands.src1].get_i64();
2182        let b = self.state[operands.src2].get_i64();
2183        let result = if a == i64::MIN && b == -1 {
2184            Some(0)
2185        } else {
2186            a.checked_rem(b)
2187        };
2188        match result {
2189            Some(result) => {
2190                self.state[operands.dst].set_i64(result);
2191                ControlFlow::Continue(())
2192            }
2193            None => self.done_trap_kind::<crate::XRem64S>(Some(TrapKind::DivideByZero)),
2194        }
2195    }
2196
2197    fn xrem32_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2198        let a = self.state[operands.src1].get_u32();
2199        let b = self.state[operands.src2].get_u32();
2200        match a.checked_rem(b) {
2201            Some(result) => {
2202                self.state[operands.dst].set_u32(result);
2203                ControlFlow::Continue(())
2204            }
2205            None => self.done_trap_kind::<crate::XRem32U>(Some(TrapKind::DivideByZero)),
2206        }
2207    }
2208
2209    fn xrem64_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2210        let a = self.state[operands.src1].get_u64();
2211        let b = self.state[operands.src2].get_u64();
2212        match a.checked_rem(b) {
2213            Some(result) => {
2214                self.state[operands.dst].set_u64(result);
2215                ControlFlow::Continue(())
2216            }
2217            None => self.done_trap_kind::<crate::XRem64U>(Some(TrapKind::DivideByZero)),
2218        }
2219    }
2220
2221    fn xband32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2222        let a = self.state[operands.src1].get_u32();
2223        let b = self.state[operands.src2].get_u32();
2224        self.state[operands.dst].set_u32(a & b);
2225        ControlFlow::Continue(())
2226    }
2227
2228    fn xband32_s8(&mut self, dst: XReg, src1: XReg, src2: i8) -> ControlFlow<Done> {
2229        self.xband32_s32(dst, src1, src2.into())
2230    }
2231
2232    fn xband32_s32(&mut self, dst: XReg, src1: XReg, src2: i32) -> ControlFlow<Done> {
2233        let a = self.state[src1].get_i32();
2234        self.state[dst].set_i32(a & src2);
2235        ControlFlow::Continue(())
2236    }
2237
2238    fn xband64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2239        let a = self.state[operands.src1].get_u64();
2240        let b = self.state[operands.src2].get_u64();
2241        self.state[operands.dst].set_u64(a & b);
2242        ControlFlow::Continue(())
2243    }
2244
2245    fn xband64_s8(&mut self, dst: XReg, src1: XReg, src2: i8) -> ControlFlow<Done> {
2246        self.xband64_s32(dst, src1, src2.into())
2247    }
2248
2249    fn xband64_s32(&mut self, dst: XReg, src1: XReg, src2: i32) -> ControlFlow<Done> {
2250        let a = self.state[src1].get_i64();
2251        self.state[dst].set_i64(a & i64::from(src2));
2252        ControlFlow::Continue(())
2253    }
2254
2255    fn xbor32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2256        let a = self.state[operands.src1].get_u32();
2257        let b = self.state[operands.src2].get_u32();
2258        self.state[operands.dst].set_u32(a | b);
2259        ControlFlow::Continue(())
2260    }
2261
2262    fn xbor32_s8(&mut self, dst: XReg, src1: XReg, src2: i8) -> ControlFlow<Done> {
2263        self.xbor32_s32(dst, src1, src2.into())
2264    }
2265
2266    fn xbor32_s32(&mut self, dst: XReg, src1: XReg, src2: i32) -> ControlFlow<Done> {
2267        let a = self.state[src1].get_i32();
2268        self.state[dst].set_i32(a | src2);
2269        ControlFlow::Continue(())
2270    }
2271
2272    fn xbor64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2273        let a = self.state[operands.src1].get_u64();
2274        let b = self.state[operands.src2].get_u64();
2275        self.state[operands.dst].set_u64(a | b);
2276        ControlFlow::Continue(())
2277    }
2278
2279    fn xbor64_s8(&mut self, dst: XReg, src1: XReg, src2: i8) -> ControlFlow<Done> {
2280        self.xbor64_s32(dst, src1, src2.into())
2281    }
2282
2283    fn xbor64_s32(&mut self, dst: XReg, src1: XReg, src2: i32) -> ControlFlow<Done> {
2284        let a = self.state[src1].get_i64();
2285        self.state[dst].set_i64(a | i64::from(src2));
2286        ControlFlow::Continue(())
2287    }
2288
2289    fn xbxor32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2290        let a = self.state[operands.src1].get_u32();
2291        let b = self.state[operands.src2].get_u32();
2292        self.state[operands.dst].set_u32(a ^ b);
2293        ControlFlow::Continue(())
2294    }
2295
2296    fn xbxor32_s8(&mut self, dst: XReg, src1: XReg, src2: i8) -> ControlFlow<Done> {
2297        self.xbxor32_s32(dst, src1, src2.into())
2298    }
2299
2300    fn xbxor32_s32(&mut self, dst: XReg, src1: XReg, src2: i32) -> ControlFlow<Done> {
2301        let a = self.state[src1].get_i32();
2302        self.state[dst].set_i32(a ^ src2);
2303        ControlFlow::Continue(())
2304    }
2305
2306    fn xbxor64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2307        let a = self.state[operands.src1].get_u64();
2308        let b = self.state[operands.src2].get_u64();
2309        self.state[operands.dst].set_u64(a ^ b);
2310        ControlFlow::Continue(())
2311    }
2312
2313    fn xbxor64_s8(&mut self, dst: XReg, src1: XReg, src2: i8) -> ControlFlow<Done> {
2314        self.xbxor64_s32(dst, src1, src2.into())
2315    }
2316
2317    fn xbxor64_s32(&mut self, dst: XReg, src1: XReg, src2: i32) -> ControlFlow<Done> {
2318        let a = self.state[src1].get_i64();
2319        self.state[dst].set_i64(a ^ i64::from(src2));
2320        ControlFlow::Continue(())
2321    }
2322
2323    fn xbnot32(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2324        let a = self.state[src].get_u32();
2325        self.state[dst].set_u32(!a);
2326        ControlFlow::Continue(())
2327    }
2328
2329    fn xbnot64(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2330        let a = self.state[src].get_u64();
2331        self.state[dst].set_u64(!a);
2332        ControlFlow::Continue(())
2333    }
2334
2335    fn xmin32_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2336        let a = self.state[operands.src1].get_u32();
2337        let b = self.state[operands.src2].get_u32();
2338        self.state[operands.dst].set_u32(a.min(b));
2339        ControlFlow::Continue(())
2340    }
2341
2342    fn xmin32_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2343        let a = self.state[operands.src1].get_i32();
2344        let b = self.state[operands.src2].get_i32();
2345        self.state[operands.dst].set_i32(a.min(b));
2346        ControlFlow::Continue(())
2347    }
2348
2349    fn xmax32_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2350        let a = self.state[operands.src1].get_u32();
2351        let b = self.state[operands.src2].get_u32();
2352        self.state[operands.dst].set_u32(a.max(b));
2353        ControlFlow::Continue(())
2354    }
2355
2356    fn xmax32_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2357        let a = self.state[operands.src1].get_i32();
2358        let b = self.state[operands.src2].get_i32();
2359        self.state[operands.dst].set_i32(a.max(b));
2360        ControlFlow::Continue(())
2361    }
2362
2363    fn xmin64_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2364        let a = self.state[operands.src1].get_u64();
2365        let b = self.state[operands.src2].get_u64();
2366        self.state[operands.dst].set_u64(a.min(b));
2367        ControlFlow::Continue(())
2368    }
2369
2370    fn xmin64_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2371        let a = self.state[operands.src1].get_i64();
2372        let b = self.state[operands.src2].get_i64();
2373        self.state[operands.dst].set_i64(a.min(b));
2374        ControlFlow::Continue(())
2375    }
2376
2377    fn xmax64_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2378        let a = self.state[operands.src1].get_u64();
2379        let b = self.state[operands.src2].get_u64();
2380        self.state[operands.dst].set_u64(a.max(b));
2381        ControlFlow::Continue(())
2382    }
2383
2384    fn xmax64_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2385        let a = self.state[operands.src1].get_i64();
2386        let b = self.state[operands.src2].get_i64();
2387        self.state[operands.dst].set_i64(a.max(b));
2388        ControlFlow::Continue(())
2389    }
2390
2391    fn xctz32(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2392        let a = self.state[src].get_u32();
2393        self.state[dst].set_u32(a.trailing_zeros());
2394        ControlFlow::Continue(())
2395    }
2396
2397    fn xctz64(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2398        let a = self.state[src].get_u64();
2399        self.state[dst].set_u64(a.trailing_zeros().into());
2400        ControlFlow::Continue(())
2401    }
2402
2403    fn xclz32(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2404        let a = self.state[src].get_u32();
2405        self.state[dst].set_u32(a.leading_zeros());
2406        ControlFlow::Continue(())
2407    }
2408
2409    fn xclz64(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2410        let a = self.state[src].get_u64();
2411        self.state[dst].set_u64(a.leading_zeros().into());
2412        ControlFlow::Continue(())
2413    }
2414
2415    fn xpopcnt32(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2416        let a = self.state[src].get_u32();
2417        self.state[dst].set_u32(a.count_ones());
2418        ControlFlow::Continue(())
2419    }
2420
2421    fn xpopcnt64(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2422        let a = self.state[src].get_u64();
2423        self.state[dst].set_u64(a.count_ones().into());
2424        ControlFlow::Continue(())
2425    }
2426
2427    fn xrotl32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2428        let a = self.state[operands.src1].get_u32();
2429        let b = self.state[operands.src2].get_u32();
2430        self.state[operands.dst].set_u32(a.rotate_left(b));
2431        ControlFlow::Continue(())
2432    }
2433
2434    fn xrotl64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2435        let a = self.state[operands.src1].get_u64();
2436        let b = self.state[operands.src2].get_u32();
2437        self.state[operands.dst].set_u64(a.rotate_left(b));
2438        ControlFlow::Continue(())
2439    }
2440
2441    fn xrotr32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2442        let a = self.state[operands.src1].get_u32();
2443        let b = self.state[operands.src2].get_u32();
2444        self.state[operands.dst].set_u32(a.rotate_right(b));
2445        ControlFlow::Continue(())
2446    }
2447
2448    fn xrotr64(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2449        let a = self.state[operands.src1].get_u64();
2450        let b = self.state[operands.src2].get_u32();
2451        self.state[operands.dst].set_u64(a.rotate_right(b));
2452        ControlFlow::Continue(())
2453    }
2454
2455    fn xselect32(
2456        &mut self,
2457        dst: XReg,
2458        cond: XReg,
2459        if_nonzero: XReg,
2460        if_zero: XReg,
2461    ) -> ControlFlow<Done> {
2462        let result = if self.state[cond].get_u32() != 0 {
2463            self.state[if_nonzero].get_u32()
2464        } else {
2465            self.state[if_zero].get_u32()
2466        };
2467        self.state[dst].set_u32(result);
2468        ControlFlow::Continue(())
2469    }
2470
2471    fn xselect64(
2472        &mut self,
2473        dst: XReg,
2474        cond: XReg,
2475        if_nonzero: XReg,
2476        if_zero: XReg,
2477    ) -> ControlFlow<Done> {
2478        let result = if self.state[cond].get_u32() != 0 {
2479            self.state[if_nonzero].get_u64()
2480        } else {
2481            self.state[if_zero].get_u64()
2482        };
2483        self.state[dst].set_u64(result);
2484        ControlFlow::Continue(())
2485    }
2486
2487    fn xabs32(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2488        let a = self.state[src].get_i32();
2489        self.state[dst].set_i32(a.wrapping_abs());
2490        ControlFlow::Continue(())
2491    }
2492
2493    fn xabs64(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2494        let a = self.state[src].get_i64();
2495        self.state[dst].set_i64(a.wrapping_abs());
2496        ControlFlow::Continue(())
2497    }
2498
2499    // =========================================================================
2500    // o32 addressing modes
2501
2502    fn xload8_u32_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done> {
2503        let result = unsafe { self.load_ne::<u8, crate::XLoad8U32O32>(addr)? };
2504        self.state[dst].set_u32(result.into());
2505        ControlFlow::Continue(())
2506    }
2507
2508    fn xload8_s32_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done> {
2509        let result = unsafe { self.load_ne::<i8, crate::XLoad8S32O32>(addr)? };
2510        self.state[dst].set_i32(result.into());
2511        ControlFlow::Continue(())
2512    }
2513
2514    fn xload16le_u32_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done> {
2515        let result = unsafe { self.load_ne::<u16, crate::XLoad16LeU32O32>(addr)? };
2516        self.state[dst].set_u32(u16::from_le(result).into());
2517        ControlFlow::Continue(())
2518    }
2519
2520    fn xload16le_s32_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done> {
2521        let result = unsafe { self.load_ne::<i16, crate::XLoad16LeS32O32>(addr)? };
2522        self.state[dst].set_i32(i16::from_le(result).into());
2523        ControlFlow::Continue(())
2524    }
2525
2526    fn xload32le_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done> {
2527        let result = unsafe { self.load_ne::<i32, crate::XLoad32LeO32>(addr)? };
2528        self.state[dst].set_i32(i32::from_le(result));
2529        ControlFlow::Continue(())
2530    }
2531
2532    fn xload64le_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done> {
2533        let result = unsafe { self.load_ne::<i64, crate::XLoad64LeO32>(addr)? };
2534        self.state[dst].set_i64(i64::from_le(result));
2535        ControlFlow::Continue(())
2536    }
2537
2538    fn xstore8_o32(&mut self, addr: AddrO32, val: XReg) -> ControlFlow<Done> {
2539        let val = self.state[val].get_u32() as u8;
2540        unsafe {
2541            self.store_ne::<u8, crate::XStore8O32>(addr, val)?;
2542        }
2543        ControlFlow::Continue(())
2544    }
2545
2546    fn xstore16le_o32(&mut self, addr: AddrO32, val: XReg) -> ControlFlow<Done> {
2547        let val = self.state[val].get_u32() as u16;
2548        unsafe {
2549            self.store_ne::<u16, crate::XStore16LeO32>(addr, val.to_le())?;
2550        }
2551        ControlFlow::Continue(())
2552    }
2553
2554    fn xstore32le_o32(&mut self, addr: AddrO32, val: XReg) -> ControlFlow<Done> {
2555        let val = self.state[val].get_u32();
2556        unsafe {
2557            self.store_ne::<u32, crate::XStore32LeO32>(addr, val.to_le())?;
2558        }
2559        ControlFlow::Continue(())
2560    }
2561
2562    fn xstore64le_o32(&mut self, addr: AddrO32, val: XReg) -> ControlFlow<Done> {
2563        let val = self.state[val].get_u64();
2564        unsafe {
2565            self.store_ne::<u64, crate::XStore64LeO32>(addr, val.to_le())?;
2566        }
2567        ControlFlow::Continue(())
2568    }
2569
2570    // =========================================================================
2571    // g32 addressing modes
2572
2573    fn xload8_u32_g32(&mut self, dst: XReg, addr: AddrG32) -> ControlFlow<Done> {
2574        let result = unsafe { self.load_ne::<u8, crate::XLoad8U32G32>(addr)? };
2575        self.state[dst].set_u32(result.into());
2576        ControlFlow::Continue(())
2577    }
2578
2579    fn xload8_s32_g32(&mut self, dst: XReg, addr: AddrG32) -> ControlFlow<Done> {
2580        let result = unsafe { self.load_ne::<i8, crate::XLoad8S32G32>(addr)? };
2581        self.state[dst].set_i32(result.into());
2582        ControlFlow::Continue(())
2583    }
2584
2585    fn xload16le_u32_g32(&mut self, dst: XReg, addr: AddrG32) -> ControlFlow<Done> {
2586        let result = unsafe { self.load_ne::<u16, crate::XLoad16LeU32G32>(addr)? };
2587        self.state[dst].set_u32(u16::from_le(result).into());
2588        ControlFlow::Continue(())
2589    }
2590
2591    fn xload16le_s32_g32(&mut self, dst: XReg, addr: AddrG32) -> ControlFlow<Done> {
2592        let result = unsafe { self.load_ne::<i16, crate::XLoad16LeS32G32>(addr)? };
2593        self.state[dst].set_i32(i16::from_le(result).into());
2594        ControlFlow::Continue(())
2595    }
2596
2597    fn xload32le_g32(&mut self, dst: XReg, addr: AddrG32) -> ControlFlow<Done> {
2598        let result = unsafe { self.load_ne::<i32, crate::XLoad32LeG32>(addr)? };
2599        self.state[dst].set_i32(i32::from_le(result));
2600        ControlFlow::Continue(())
2601    }
2602
2603    fn xload64le_g32(&mut self, dst: XReg, addr: AddrG32) -> ControlFlow<Done> {
2604        let result = unsafe { self.load_ne::<i64, crate::XLoad64LeG32>(addr)? };
2605        self.state[dst].set_i64(i64::from_le(result));
2606        ControlFlow::Continue(())
2607    }
2608
2609    fn xstore8_g32(&mut self, addr: AddrG32, val: XReg) -> ControlFlow<Done> {
2610        let val = self.state[val].get_u32() as u8;
2611        unsafe {
2612            self.store_ne::<u8, crate::XStore8G32>(addr, val)?;
2613        }
2614        ControlFlow::Continue(())
2615    }
2616
2617    fn xstore16le_g32(&mut self, addr: AddrG32, val: XReg) -> ControlFlow<Done> {
2618        let val = self.state[val].get_u32() as u16;
2619        unsafe {
2620            self.store_ne::<u16, crate::XStore16LeG32>(addr, val.to_le())?;
2621        }
2622        ControlFlow::Continue(())
2623    }
2624
2625    fn xstore32le_g32(&mut self, addr: AddrG32, val: XReg) -> ControlFlow<Done> {
2626        let val = self.state[val].get_u32();
2627        unsafe {
2628            self.store_ne::<u32, crate::XStore32LeG32>(addr, val.to_le())?;
2629        }
2630        ControlFlow::Continue(())
2631    }
2632
2633    fn xstore64le_g32(&mut self, addr: AddrG32, val: XReg) -> ControlFlow<Done> {
2634        let val = self.state[val].get_u64();
2635        unsafe {
2636            self.store_ne::<u64, crate::XStore64LeG32>(addr, val.to_le())?;
2637        }
2638        ControlFlow::Continue(())
2639    }
2640
2641    // =========================================================================
2642    // z addressing modes
2643
2644    fn xload8_u32_z(&mut self, dst: XReg, addr: AddrZ) -> ControlFlow<Done> {
2645        let result = unsafe { self.load_ne::<u8, crate::XLoad8U32Z>(addr)? };
2646        self.state[dst].set_u32(result.into());
2647        ControlFlow::Continue(())
2648    }
2649
2650    fn xload8_s32_z(&mut self, dst: XReg, addr: AddrZ) -> ControlFlow<Done> {
2651        let result = unsafe { self.load_ne::<i8, crate::XLoad8S32Z>(addr)? };
2652        self.state[dst].set_i32(result.into());
2653        ControlFlow::Continue(())
2654    }
2655
2656    fn xload16le_u32_z(&mut self, dst: XReg, addr: AddrZ) -> ControlFlow<Done> {
2657        let result = unsafe { self.load_ne::<u16, crate::XLoad16LeU32Z>(addr)? };
2658        self.state[dst].set_u32(u16::from_le(result).into());
2659        ControlFlow::Continue(())
2660    }
2661
2662    fn xload16le_s32_z(&mut self, dst: XReg, addr: AddrZ) -> ControlFlow<Done> {
2663        let result = unsafe { self.load_ne::<i16, crate::XLoad16LeS32Z>(addr)? };
2664        self.state[dst].set_i32(i16::from_le(result).into());
2665        ControlFlow::Continue(())
2666    }
2667
2668    fn xload32le_z(&mut self, dst: XReg, addr: AddrZ) -> ControlFlow<Done> {
2669        let result = unsafe { self.load_ne::<i32, crate::XLoad32LeZ>(addr)? };
2670        self.state[dst].set_i32(i32::from_le(result));
2671        ControlFlow::Continue(())
2672    }
2673
2674    fn xload64le_z(&mut self, dst: XReg, addr: AddrZ) -> ControlFlow<Done> {
2675        let result = unsafe { self.load_ne::<i64, crate::XLoad64LeZ>(addr)? };
2676        self.state[dst].set_i64(i64::from_le(result));
2677        ControlFlow::Continue(())
2678    }
2679
2680    fn xstore8_z(&mut self, addr: AddrZ, val: XReg) -> ControlFlow<Done> {
2681        let val = self.state[val].get_u32() as u8;
2682        unsafe {
2683            self.store_ne::<u8, crate::XStore8Z>(addr, val)?;
2684        }
2685        ControlFlow::Continue(())
2686    }
2687
2688    fn xstore16le_z(&mut self, addr: AddrZ, val: XReg) -> ControlFlow<Done> {
2689        let val = self.state[val].get_u32() as u16;
2690        unsafe {
2691            self.store_ne::<u16, crate::XStore16LeZ>(addr, val.to_le())?;
2692        }
2693        ControlFlow::Continue(())
2694    }
2695
2696    fn xstore32le_z(&mut self, addr: AddrZ, val: XReg) -> ControlFlow<Done> {
2697        let val = self.state[val].get_u32();
2698        unsafe {
2699            self.store_ne::<u32, crate::XStore32LeZ>(addr, val.to_le())?;
2700        }
2701        ControlFlow::Continue(())
2702    }
2703
2704    fn xstore64le_z(&mut self, addr: AddrZ, val: XReg) -> ControlFlow<Done> {
2705        let val = self.state[val].get_u64();
2706        unsafe {
2707            self.store_ne::<u64, crate::XStore64LeZ>(addr, val.to_le())?;
2708        }
2709        ControlFlow::Continue(())
2710    }
2711
2712    // =========================================================================
2713    // g32bne addressing modes
2714
2715    fn xload8_u32_g32bne(&mut self, dst: XReg, addr: AddrG32Bne) -> ControlFlow<Done> {
2716        let result = unsafe { self.load_ne::<u8, crate::XLoad8U32G32Bne>(addr)? };
2717        self.state[dst].set_u32(result.into());
2718        ControlFlow::Continue(())
2719    }
2720
2721    fn xload8_s32_g32bne(&mut self, dst: XReg, addr: AddrG32Bne) -> ControlFlow<Done> {
2722        let result = unsafe { self.load_ne::<i8, crate::XLoad8S32G32Bne>(addr)? };
2723        self.state[dst].set_i32(result.into());
2724        ControlFlow::Continue(())
2725    }
2726
2727    fn xload16le_u32_g32bne(&mut self, dst: XReg, addr: AddrG32Bne) -> ControlFlow<Done> {
2728        let result = unsafe { self.load_ne::<u16, crate::XLoad16LeU32G32Bne>(addr)? };
2729        self.state[dst].set_u32(u16::from_le(result).into());
2730        ControlFlow::Continue(())
2731    }
2732
2733    fn xload16le_s32_g32bne(&mut self, dst: XReg, addr: AddrG32Bne) -> ControlFlow<Done> {
2734        let result = unsafe { self.load_ne::<i16, crate::XLoad16LeS32G32Bne>(addr)? };
2735        self.state[dst].set_i32(i16::from_le(result).into());
2736        ControlFlow::Continue(())
2737    }
2738
2739    fn xload32le_g32bne(&mut self, dst: XReg, addr: AddrG32Bne) -> ControlFlow<Done> {
2740        let result = unsafe { self.load_ne::<i32, crate::XLoad32LeG32Bne>(addr)? };
2741        self.state[dst].set_i32(i32::from_le(result));
2742        ControlFlow::Continue(())
2743    }
2744
2745    fn xload64le_g32bne(&mut self, dst: XReg, addr: AddrG32Bne) -> ControlFlow<Done> {
2746        let result = unsafe { self.load_ne::<i64, crate::XLoad64LeG32Bne>(addr)? };
2747        self.state[dst].set_i64(i64::from_le(result));
2748        ControlFlow::Continue(())
2749    }
2750
2751    fn xstore8_g32bne(&mut self, addr: AddrG32Bne, val: XReg) -> ControlFlow<Done> {
2752        let val = self.state[val].get_u32() as u8;
2753        unsafe {
2754            self.store_ne::<u8, crate::XStore8G32Bne>(addr, val)?;
2755        }
2756        ControlFlow::Continue(())
2757    }
2758
2759    fn xstore16le_g32bne(&mut self, addr: AddrG32Bne, val: XReg) -> ControlFlow<Done> {
2760        let val = self.state[val].get_u32() as u16;
2761        unsafe {
2762            self.store_ne::<u16, crate::XStore16LeG32Bne>(addr, val.to_le())?;
2763        }
2764        ControlFlow::Continue(())
2765    }
2766
2767    fn xstore32le_g32bne(&mut self, addr: AddrG32Bne, val: XReg) -> ControlFlow<Done> {
2768        let val = self.state[val].get_u32();
2769        unsafe {
2770            self.store_ne::<u32, crate::XStore32LeG32Bne>(addr, val.to_le())?;
2771        }
2772        ControlFlow::Continue(())
2773    }
2774
2775    fn xstore64le_g32bne(&mut self, addr: AddrG32Bne, val: XReg) -> ControlFlow<Done> {
2776        let val = self.state[val].get_u64();
2777        unsafe {
2778            self.store_ne::<u64, crate::XStore64LeG32Bne>(addr, val.to_le())?;
2779        }
2780        ControlFlow::Continue(())
2781    }
2782}
2783
2784impl ExtendedOpVisitor for Interpreter<'_> {
2785    fn nop(&mut self) -> ControlFlow<Done> {
2786        ControlFlow::Continue(())
2787    }
2788
2789    fn trap(&mut self) -> ControlFlow<Done> {
2790        self.done_trap::<crate::Trap>()
2791    }
2792
2793    fn call_indirect_host(&mut self, id: u8) -> ControlFlow<Done> {
2794        self.done_call_indirect_host(id)
2795    }
2796
2797    fn bswap32(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2798        let src = self.state[src].get_u32();
2799        self.state[dst].set_u32(src.swap_bytes());
2800        ControlFlow::Continue(())
2801    }
2802
2803    fn bswap64(&mut self, dst: XReg, src: XReg) -> ControlFlow<Done> {
2804        let src = self.state[src].get_u64();
2805        self.state[dst].set_u64(src.swap_bytes());
2806        ControlFlow::Continue(())
2807    }
2808
2809    fn xbmask32(&mut self, dst: XReg, src: XReg) -> Self::Return {
2810        let a = self.state[src].get_u32();
2811        if a == 0 {
2812            self.state[dst].set_u32(0);
2813        } else {
2814            self.state[dst].set_i32(-1);
2815        }
2816        ControlFlow::Continue(())
2817    }
2818
2819    fn xbmask64(&mut self, dst: XReg, src: XReg) -> Self::Return {
2820        let a = self.state[src].get_u64();
2821        if a == 0 {
2822            self.state[dst].set_u64(0);
2823        } else {
2824            self.state[dst].set_i64(-1);
2825        }
2826        ControlFlow::Continue(())
2827    }
2828
2829    fn xadd32_uoverflow_trap(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2830        let a = self.state[operands.src1].get_u32();
2831        let b = self.state[operands.src2].get_u32();
2832        match a.checked_add(b) {
2833            Some(c) => {
2834                self.state[operands.dst].set_u32(c);
2835                ControlFlow::Continue(())
2836            }
2837            None => self.done_trap::<crate::Xadd32UoverflowTrap>(),
2838        }
2839    }
2840
2841    fn xadd64_uoverflow_trap(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2842        let a = self.state[operands.src1].get_u64();
2843        let b = self.state[operands.src2].get_u64();
2844        match a.checked_add(b) {
2845            Some(c) => {
2846                self.state[operands.dst].set_u64(c);
2847                ControlFlow::Continue(())
2848            }
2849            None => self.done_trap::<crate::Xadd64UoverflowTrap>(),
2850        }
2851    }
2852
2853    fn xmulhi64_s(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2854        let a = self.state[operands.src1].get_i64();
2855        let b = self.state[operands.src2].get_i64();
2856        let result = ((i128::from(a) * i128::from(b)) >> 64) as i64;
2857        self.state[operands.dst].set_i64(result);
2858        ControlFlow::Continue(())
2859    }
2860
2861    fn xmulhi64_u(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
2862        let a = self.state[operands.src1].get_u64();
2863        let b = self.state[operands.src2].get_u64();
2864        let result = ((u128::from(a) * u128::from(b)) >> 64) as u64;
2865        self.state[operands.dst].set_u64(result);
2866        ControlFlow::Continue(())
2867    }
2868
2869    // =========================================================================
2870    // o32 addressing modes for big-endian X-registers
2871
2872    fn xload16be_u32_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done> {
2873        let result = unsafe { self.load_ne::<u16, crate::XLoad16BeU32O32>(addr)? };
2874        self.state[dst].set_u32(u16::from_be(result).into());
2875        ControlFlow::Continue(())
2876    }
2877
2878    fn xload16be_s32_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done> {
2879        let result = unsafe { self.load_ne::<i16, crate::XLoad16BeS32O32>(addr)? };
2880        self.state[dst].set_i32(i16::from_be(result).into());
2881        ControlFlow::Continue(())
2882    }
2883
2884    fn xload32be_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done> {
2885        let result = unsafe { self.load_ne::<i32, crate::XLoad32BeO32>(addr)? };
2886        self.state[dst].set_i32(i32::from_be(result));
2887        ControlFlow::Continue(())
2888    }
2889
2890    fn xload64be_o32(&mut self, dst: XReg, addr: AddrO32) -> ControlFlow<Done> {
2891        let result = unsafe { self.load_ne::<i64, crate::XLoad64BeO32>(addr)? };
2892        self.state[dst].set_i64(i64::from_be(result));
2893        ControlFlow::Continue(())
2894    }
2895
2896    fn xstore16be_o32(&mut self, addr: AddrO32, val: XReg) -> ControlFlow<Done> {
2897        let val = self.state[val].get_u32() as u16;
2898        unsafe {
2899            self.store_ne::<u16, crate::XStore16BeO32>(addr, val.to_be())?;
2900        }
2901        ControlFlow::Continue(())
2902    }
2903
2904    fn xstore32be_o32(&mut self, addr: AddrO32, val: XReg) -> ControlFlow<Done> {
2905        let val = self.state[val].get_u32();
2906        unsafe {
2907            self.store_ne::<u32, crate::XStore32BeO32>(addr, val.to_be())?;
2908        }
2909        ControlFlow::Continue(())
2910    }
2911
2912    fn xstore64be_o32(&mut self, addr: AddrO32, val: XReg) -> ControlFlow<Done> {
2913        let val = self.state[val].get_u64();
2914        unsafe {
2915            self.store_ne::<u64, crate::XStore64BeO32>(addr, val.to_be())?;
2916        }
2917        ControlFlow::Continue(())
2918    }
2919
2920    // =========================================================================
2921    // o32 addressing modes for little-endian F-registers
2922
2923    fn fload32le_o32(&mut self, dst: FReg, addr: AddrO32) -> ControlFlow<Done> {
2924        let val = unsafe { self.load_ne::<u32, crate::Fload32LeO32>(addr)? };
2925        self.state[dst].set_f32(f32::from_bits(u32::from_le(val)));
2926        ControlFlow::Continue(())
2927    }
2928
2929    fn fload64le_o32(&mut self, dst: FReg, addr: AddrO32) -> ControlFlow<Done> {
2930        let val = unsafe { self.load_ne::<u64, crate::Fload64LeO32>(addr)? };
2931        self.state[dst].set_f64(f64::from_bits(u64::from_le(val)));
2932        ControlFlow::Continue(())
2933    }
2934
2935    fn fstore32le_o32(&mut self, addr: AddrO32, src: FReg) -> ControlFlow<Done> {
2936        let val = self.state[src].get_f32();
2937        unsafe {
2938            self.store_ne::<u32, crate::Fstore32LeO32>(addr, val.to_bits().to_le())?;
2939        }
2940        ControlFlow::Continue(())
2941    }
2942
2943    fn fstore64le_o32(&mut self, addr: AddrO32, src: FReg) -> ControlFlow<Done> {
2944        let val = self.state[src].get_f64();
2945        unsafe {
2946            self.store_ne::<u64, crate::Fstore64LeO32>(addr, val.to_bits().to_le())?;
2947        }
2948        ControlFlow::Continue(())
2949    }
2950
2951    // =========================================================================
2952    // o32 addressing modes for big-endian F-registers
2953
2954    fn fload32be_o32(&mut self, dst: FReg, addr: AddrO32) -> ControlFlow<Done> {
2955        let val = unsafe { self.load_ne::<u32, crate::Fload32BeO32>(addr)? };
2956        self.state[dst].set_f32(f32::from_bits(u32::from_be(val)));
2957        ControlFlow::Continue(())
2958    }
2959
2960    fn fload64be_o32(&mut self, dst: FReg, addr: AddrO32) -> ControlFlow<Done> {
2961        let val = unsafe { self.load_ne::<u64, crate::Fload64BeO32>(addr)? };
2962        self.state[dst].set_f64(f64::from_bits(u64::from_be(val)));
2963        ControlFlow::Continue(())
2964    }
2965
2966    fn fstore32be_o32(&mut self, addr: AddrO32, src: FReg) -> ControlFlow<Done> {
2967        let val = self.state[src].get_f32();
2968        unsafe {
2969            self.store_ne::<u32, crate::Fstore32BeO32>(addr, val.to_bits().to_be())?;
2970        }
2971        ControlFlow::Continue(())
2972    }
2973
2974    fn fstore64be_o32(&mut self, addr: AddrO32, src: FReg) -> ControlFlow<Done> {
2975        let val = self.state[src].get_f64();
2976        unsafe {
2977            self.store_ne::<u64, crate::Fstore64BeO32>(addr, val.to_bits().to_be())?;
2978        }
2979        ControlFlow::Continue(())
2980    }
2981
2982    // =========================================================================
2983    // z addressing modes for little-endian F-registers
2984
2985    fn fload32le_z(&mut self, dst: FReg, addr: AddrZ) -> ControlFlow<Done> {
2986        let val = unsafe { self.load_ne::<u32, crate::Fload32LeZ>(addr)? };
2987        self.state[dst].set_f32(f32::from_bits(u32::from_le(val)));
2988        ControlFlow::Continue(())
2989    }
2990
2991    fn fload64le_z(&mut self, dst: FReg, addr: AddrZ) -> ControlFlow<Done> {
2992        let val = unsafe { self.load_ne::<u64, crate::Fload64LeZ>(addr)? };
2993        self.state[dst].set_f64(f64::from_bits(u64::from_le(val)));
2994        ControlFlow::Continue(())
2995    }
2996
2997    fn fstore32le_z(&mut self, addr: AddrZ, src: FReg) -> ControlFlow<Done> {
2998        let val = self.state[src].get_f32();
2999        unsafe {
3000            self.store_ne::<u32, crate::Fstore32LeZ>(addr, val.to_bits().to_le())?;
3001        }
3002        ControlFlow::Continue(())
3003    }
3004
3005    fn fstore64le_z(&mut self, addr: AddrZ, src: FReg) -> ControlFlow<Done> {
3006        let val = self.state[src].get_f64();
3007        unsafe {
3008            self.store_ne::<u64, crate::Fstore64LeZ>(addr, val.to_bits().to_le())?;
3009        }
3010        ControlFlow::Continue(())
3011    }
3012
3013    // =========================================================================
3014    // g32 addressing modes for little-endian F-registers
3015
3016    fn fload32le_g32(&mut self, dst: FReg, addr: AddrG32) -> ControlFlow<Done> {
3017        let val = unsafe { self.load_ne::<u32, crate::Fload32LeG32>(addr)? };
3018        self.state[dst].set_f32(f32::from_bits(u32::from_le(val)));
3019        ControlFlow::Continue(())
3020    }
3021
3022    fn fload64le_g32(&mut self, dst: FReg, addr: AddrG32) -> ControlFlow<Done> {
3023        let val = unsafe { self.load_ne::<u64, crate::Fload64LeG32>(addr)? };
3024        self.state[dst].set_f64(f64::from_bits(u64::from_le(val)));
3025        ControlFlow::Continue(())
3026    }
3027
3028    fn fstore32le_g32(&mut self, addr: AddrG32, src: FReg) -> ControlFlow<Done> {
3029        let val = self.state[src].get_f32();
3030        unsafe {
3031            self.store_ne::<u32, crate::Fstore32LeG32>(addr, val.to_bits().to_le())?;
3032        }
3033        ControlFlow::Continue(())
3034    }
3035
3036    fn fstore64le_g32(&mut self, addr: AddrG32, src: FReg) -> ControlFlow<Done> {
3037        let val = self.state[src].get_f64();
3038        unsafe {
3039            self.store_ne::<u64, crate::Fstore64LeG32>(addr, val.to_bits().to_le())?;
3040        }
3041        ControlFlow::Continue(())
3042    }
3043
3044    // =========================================================================
3045    // o32 addressing modes for little-endian V-registers
3046
3047    fn vload128le_o32(&mut self, dst: VReg, addr: AddrO32) -> ControlFlow<Done> {
3048        let val = unsafe { self.load_ne::<u128, crate::VLoad128O32>(addr)? };
3049        self.state[dst].set_u128(u128::from_le(val));
3050        ControlFlow::Continue(())
3051    }
3052
3053    fn vstore128le_o32(&mut self, addr: AddrO32, src: VReg) -> ControlFlow<Done> {
3054        let val = self.state[src].get_u128();
3055        unsafe {
3056            self.store_ne::<u128, crate::Vstore128LeO32>(addr, val.to_le())?;
3057        }
3058        ControlFlow::Continue(())
3059    }
3060
3061    // =========================================================================
3062    // z addressing modes for little-endian V-registers
3063
3064    fn vload128le_z(&mut self, dst: VReg, addr: AddrZ) -> ControlFlow<Done> {
3065        let val = unsafe { self.load_ne::<u128, crate::VLoad128Z>(addr)? };
3066        self.state[dst].set_u128(u128::from_le(val));
3067        ControlFlow::Continue(())
3068    }
3069
3070    fn vstore128le_z(&mut self, addr: AddrZ, src: VReg) -> ControlFlow<Done> {
3071        let val = self.state[src].get_u128();
3072        unsafe {
3073            self.store_ne::<u128, crate::Vstore128LeZ>(addr, val.to_le())?;
3074        }
3075        ControlFlow::Continue(())
3076    }
3077
3078    // =========================================================================
3079    // g32 addressing modes for little-endian V-registers
3080
3081    fn vload128le_g32(&mut self, dst: VReg, addr: AddrG32) -> ControlFlow<Done> {
3082        let val = unsafe { self.load_ne::<u128, crate::VLoad128G32>(addr)? };
3083        self.state[dst].set_u128(u128::from_le(val));
3084        ControlFlow::Continue(())
3085    }
3086
3087    fn vstore128le_g32(&mut self, addr: AddrG32, src: VReg) -> ControlFlow<Done> {
3088        let val = self.state[src].get_u128();
3089        unsafe {
3090            self.store_ne::<u128, crate::Vstore128LeG32>(addr, val.to_le())?;
3091        }
3092        ControlFlow::Continue(())
3093    }
3094
3095    fn xmov_fp(&mut self, dst: XReg) -> ControlFlow<Done> {
3096        let fp = self.state.fp;
3097        self.state[dst].set_ptr(fp);
3098        ControlFlow::Continue(())
3099    }
3100
3101    fn xmov_lr(&mut self, dst: XReg) -> ControlFlow<Done> {
3102        let lr = self.state.lr;
3103        self.state[dst].set_ptr(lr);
3104        ControlFlow::Continue(())
3105    }
3106
3107    fn fmov(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3108        let val = self.state[src];
3109        self.state[dst] = val;
3110        ControlFlow::Continue(())
3111    }
3112
3113    fn vmov(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3114        let val = self.state[src];
3115        self.state[dst] = val;
3116        ControlFlow::Continue(())
3117    }
3118
3119    fn fconst32(&mut self, dst: FReg, bits: u32) -> ControlFlow<Done> {
3120        self.state[dst].set_f32(f32::from_bits(bits));
3121        ControlFlow::Continue(())
3122    }
3123
3124    fn fconst64(&mut self, dst: FReg, bits: u64) -> ControlFlow<Done> {
3125        self.state[dst].set_f64(f64::from_bits(bits));
3126        ControlFlow::Continue(())
3127    }
3128
3129    fn bitcast_int_from_float_32(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3130        let val = self.state[src].get_f32();
3131        self.state[dst].set_u32(val.to_bits());
3132        ControlFlow::Continue(())
3133    }
3134
3135    fn bitcast_int_from_float_64(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3136        let val = self.state[src].get_f64();
3137        self.state[dst].set_u64(val.to_bits());
3138        ControlFlow::Continue(())
3139    }
3140
3141    fn bitcast_float_from_int_32(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done> {
3142        let val = self.state[src].get_u32();
3143        self.state[dst].set_f32(f32::from_bits(val));
3144        ControlFlow::Continue(())
3145    }
3146
3147    fn bitcast_float_from_int_64(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done> {
3148        let val = self.state[src].get_u64();
3149        self.state[dst].set_f64(f64::from_bits(val));
3150        ControlFlow::Continue(())
3151    }
3152
3153    fn feq32(&mut self, dst: XReg, src1: FReg, src2: FReg) -> ControlFlow<Done> {
3154        let a = self.state[src1].get_f32();
3155        let b = self.state[src2].get_f32();
3156        self.state[dst].set_u32(u32::from(a == b));
3157        ControlFlow::Continue(())
3158    }
3159
3160    fn fneq32(&mut self, dst: XReg, src1: FReg, src2: FReg) -> ControlFlow<Done> {
3161        let a = self.state[src1].get_f32();
3162        let b = self.state[src2].get_f32();
3163        self.state[dst].set_u32(u32::from(a != b));
3164        ControlFlow::Continue(())
3165    }
3166
3167    fn flt32(&mut self, dst: XReg, src1: FReg, src2: FReg) -> ControlFlow<Done> {
3168        let a = self.state[src1].get_f32();
3169        let b = self.state[src2].get_f32();
3170        self.state[dst].set_u32(u32::from(a < b));
3171        ControlFlow::Continue(())
3172    }
3173
3174    fn flteq32(&mut self, dst: XReg, src1: FReg, src2: FReg) -> ControlFlow<Done> {
3175        let a = self.state[src1].get_f32();
3176        let b = self.state[src2].get_f32();
3177        self.state[dst].set_u32(u32::from(a <= b));
3178        ControlFlow::Continue(())
3179    }
3180
3181    fn feq64(&mut self, dst: XReg, src1: FReg, src2: FReg) -> ControlFlow<Done> {
3182        let a = self.state[src1].get_f64();
3183        let b = self.state[src2].get_f64();
3184        self.state[dst].set_u32(u32::from(a == b));
3185        ControlFlow::Continue(())
3186    }
3187
3188    fn fneq64(&mut self, dst: XReg, src1: FReg, src2: FReg) -> ControlFlow<Done> {
3189        let a = self.state[src1].get_f64();
3190        let b = self.state[src2].get_f64();
3191        self.state[dst].set_u32(u32::from(a != b));
3192        ControlFlow::Continue(())
3193    }
3194
3195    fn flt64(&mut self, dst: XReg, src1: FReg, src2: FReg) -> ControlFlow<Done> {
3196        let a = self.state[src1].get_f64();
3197        let b = self.state[src2].get_f64();
3198        self.state[dst].set_u32(u32::from(a < b));
3199        ControlFlow::Continue(())
3200    }
3201
3202    fn flteq64(&mut self, dst: XReg, src1: FReg, src2: FReg) -> ControlFlow<Done> {
3203        let a = self.state[src1].get_f64();
3204        let b = self.state[src2].get_f64();
3205        self.state[dst].set_u32(u32::from(a <= b));
3206        ControlFlow::Continue(())
3207    }
3208
3209    fn fselect32(
3210        &mut self,
3211        dst: FReg,
3212        cond: XReg,
3213        if_nonzero: FReg,
3214        if_zero: FReg,
3215    ) -> ControlFlow<Done> {
3216        let result = if self.state[cond].get_u32() != 0 {
3217            self.state[if_nonzero].get_f32()
3218        } else {
3219            self.state[if_zero].get_f32()
3220        };
3221        self.state[dst].set_f32(result);
3222        ControlFlow::Continue(())
3223    }
3224
3225    fn fselect64(
3226        &mut self,
3227        dst: FReg,
3228        cond: XReg,
3229        if_nonzero: FReg,
3230        if_zero: FReg,
3231    ) -> ControlFlow<Done> {
3232        let result = if self.state[cond].get_u32() != 0 {
3233            self.state[if_nonzero].get_f64()
3234        } else {
3235            self.state[if_zero].get_f64()
3236        };
3237        self.state[dst].set_f64(result);
3238        ControlFlow::Continue(())
3239    }
3240
3241    fn f32_from_x32_s(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done> {
3242        let a = self.state[src].get_i32();
3243        self.state[dst].set_f32(a as f32);
3244        ControlFlow::Continue(())
3245    }
3246
3247    fn f32_from_x32_u(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done> {
3248        let a = self.state[src].get_u32();
3249        self.state[dst].set_f32(a as f32);
3250        ControlFlow::Continue(())
3251    }
3252
3253    fn f32_from_x64_s(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done> {
3254        let a = self.state[src].get_i64();
3255        self.state[dst].set_f32(a as f32);
3256        ControlFlow::Continue(())
3257    }
3258
3259    fn f32_from_x64_u(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done> {
3260        let a = self.state[src].get_u64();
3261        self.state[dst].set_f32(a as f32);
3262        ControlFlow::Continue(())
3263    }
3264
3265    fn f64_from_x32_s(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done> {
3266        let a = self.state[src].get_i32();
3267        self.state[dst].set_f64(a as f64);
3268        ControlFlow::Continue(())
3269    }
3270
3271    fn f64_from_x32_u(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done> {
3272        let a = self.state[src].get_u32();
3273        self.state[dst].set_f64(a as f64);
3274        ControlFlow::Continue(())
3275    }
3276
3277    fn f64_from_x64_s(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done> {
3278        let a = self.state[src].get_i64();
3279        self.state[dst].set_f64(a as f64);
3280        ControlFlow::Continue(())
3281    }
3282
3283    fn f64_from_x64_u(&mut self, dst: FReg, src: XReg) -> ControlFlow<Done> {
3284        let a = self.state[src].get_u64();
3285        self.state[dst].set_f64(a as f64);
3286        ControlFlow::Continue(())
3287    }
3288
3289    fn x32_from_f32_s(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3290        let a = self.state[src].get_f32();
3291        self.check_xnn_from_fnn::<crate::X32FromF32S>(a.into(), -2147483649.0, 2147483648.0)?;
3292        self.state[dst].set_i32(a as i32);
3293        ControlFlow::Continue(())
3294    }
3295
3296    fn x32_from_f32_u(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3297        let a = self.state[src].get_f32();
3298        self.check_xnn_from_fnn::<crate::X32FromF32U>(a.into(), -1.0, 4294967296.0)?;
3299        self.state[dst].set_u32(a as u32);
3300        ControlFlow::Continue(())
3301    }
3302
3303    fn x64_from_f32_s(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3304        let a = self.state[src].get_f32();
3305        self.check_xnn_from_fnn::<crate::X64FromF32S>(
3306            a.into(),
3307            -9223372036854777856.0,
3308            9223372036854775808.0,
3309        )?;
3310        self.state[dst].set_i64(a as i64);
3311        ControlFlow::Continue(())
3312    }
3313
3314    fn x64_from_f32_u(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3315        let a = self.state[src].get_f32();
3316        self.check_xnn_from_fnn::<crate::X64FromF32U>(a.into(), -1.0, 18446744073709551616.0)?;
3317        self.state[dst].set_u64(a as u64);
3318        ControlFlow::Continue(())
3319    }
3320
3321    fn x32_from_f64_s(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3322        let a = self.state[src].get_f64();
3323        self.check_xnn_from_fnn::<crate::X32FromF64S>(a, -2147483649.0, 2147483648.0)?;
3324        self.state[dst].set_i32(a as i32);
3325        ControlFlow::Continue(())
3326    }
3327
3328    fn x32_from_f64_u(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3329        let a = self.state[src].get_f64();
3330        self.check_xnn_from_fnn::<crate::X32FromF64U>(a, -1.0, 4294967296.0)?;
3331        self.state[dst].set_u32(a as u32);
3332        ControlFlow::Continue(())
3333    }
3334
3335    fn x64_from_f64_s(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3336        let a = self.state[src].get_f64();
3337        self.check_xnn_from_fnn::<crate::X64FromF64S>(
3338            a,
3339            -9223372036854777856.0,
3340            9223372036854775808.0,
3341        )?;
3342        self.state[dst].set_i64(a as i64);
3343        ControlFlow::Continue(())
3344    }
3345
3346    fn x64_from_f64_u(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3347        let a = self.state[src].get_f64();
3348        self.check_xnn_from_fnn::<crate::X64FromF64U>(a, -1.0, 18446744073709551616.0)?;
3349        self.state[dst].set_u64(a as u64);
3350        ControlFlow::Continue(())
3351    }
3352
3353    fn x32_from_f32_s_sat(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3354        let a = self.state[src].get_f32();
3355        self.state[dst].set_i32(a as i32);
3356        ControlFlow::Continue(())
3357    }
3358
3359    fn x32_from_f32_u_sat(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3360        let a = self.state[src].get_f32();
3361        self.state[dst].set_u32(a as u32);
3362        ControlFlow::Continue(())
3363    }
3364
3365    fn x64_from_f32_s_sat(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3366        let a = self.state[src].get_f32();
3367        self.state[dst].set_i64(a as i64);
3368        ControlFlow::Continue(())
3369    }
3370
3371    fn x64_from_f32_u_sat(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3372        let a = self.state[src].get_f32();
3373        self.state[dst].set_u64(a as u64);
3374        ControlFlow::Continue(())
3375    }
3376
3377    fn x32_from_f64_s_sat(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3378        let a = self.state[src].get_f64();
3379        self.state[dst].set_i32(a as i32);
3380        ControlFlow::Continue(())
3381    }
3382
3383    fn x32_from_f64_u_sat(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3384        let a = self.state[src].get_f64();
3385        self.state[dst].set_u32(a as u32);
3386        ControlFlow::Continue(())
3387    }
3388
3389    fn x64_from_f64_s_sat(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3390        let a = self.state[src].get_f64();
3391        self.state[dst].set_i64(a as i64);
3392        ControlFlow::Continue(())
3393    }
3394
3395    fn x64_from_f64_u_sat(&mut self, dst: XReg, src: FReg) -> ControlFlow<Done> {
3396        let a = self.state[src].get_f64();
3397        self.state[dst].set_u64(a as u64);
3398        ControlFlow::Continue(())
3399    }
3400
3401    fn f32_from_f64(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3402        let a = self.state[src].get_f64();
3403        self.state[dst].set_f32(a as f32);
3404        ControlFlow::Continue(())
3405    }
3406
3407    fn f64_from_f32(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3408        let a = self.state[src].get_f32();
3409        self.state[dst].set_f64(a.into());
3410        ControlFlow::Continue(())
3411    }
3412
3413    fn fcopysign32(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3414        let a = self.state[operands.src1].get_f32();
3415        let b = self.state[operands.src2].get_f32();
3416        self.state[operands.dst].set_f32(a.wasm_copysign(b));
3417        ControlFlow::Continue(())
3418    }
3419
3420    fn fcopysign64(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3421        let a = self.state[operands.src1].get_f64();
3422        let b = self.state[operands.src2].get_f64();
3423        self.state[operands.dst].set_f64(a.wasm_copysign(b));
3424        ControlFlow::Continue(())
3425    }
3426
3427    fn fadd32(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3428        let a = self.state[operands.src1].get_f32();
3429        let b = self.state[operands.src2].get_f32();
3430        self.state[operands.dst].set_f32(a + b);
3431        ControlFlow::Continue(())
3432    }
3433
3434    fn fsub32(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3435        let a = self.state[operands.src1].get_f32();
3436        let b = self.state[operands.src2].get_f32();
3437        self.state[operands.dst].set_f32(a - b);
3438        ControlFlow::Continue(())
3439    }
3440
3441    fn vsubf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3442        let mut a = self.state[operands.src1].get_f32x4();
3443        let b = self.state[operands.src2].get_f32x4();
3444        for (a, b) in a.iter_mut().zip(b) {
3445            *a = *a - b;
3446        }
3447        self.state[operands.dst].set_f32x4(a);
3448        ControlFlow::Continue(())
3449    }
3450
3451    fn fmul32(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3452        let a = self.state[operands.src1].get_f32();
3453        let b = self.state[operands.src2].get_f32();
3454        self.state[operands.dst].set_f32(a * b);
3455        ControlFlow::Continue(())
3456    }
3457
3458    fn vmulf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3459        let mut a = self.state[operands.src1].get_f32x4();
3460        let b = self.state[operands.src2].get_f32x4();
3461        for (a, b) in a.iter_mut().zip(b) {
3462            *a = *a * b;
3463        }
3464        self.state[operands.dst].set_f32x4(a);
3465        ControlFlow::Continue(())
3466    }
3467
3468    fn fdiv32(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3469        let a = self.state[operands.src1].get_f32();
3470        let b = self.state[operands.src2].get_f32();
3471        self.state[operands.dst].set_f32(a / b);
3472        ControlFlow::Continue(())
3473    }
3474
3475    fn vdivf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3476        let a = self.state[operands.src1].get_f32x4();
3477        let b = self.state[operands.src2].get_f32x4();
3478        let mut result = [0.0f32; 4];
3479
3480        for i in 0..4 {
3481            result[i] = a[i] / b[i];
3482        }
3483
3484        self.state[operands.dst].set_f32x4(result);
3485        ControlFlow::Continue(())
3486    }
3487
3488    fn vdivf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3489        let a = self.state[operands.src1].get_f64x2();
3490        let b = self.state[operands.src2].get_f64x2();
3491        let mut result = [0.0f64; 2];
3492
3493        for i in 0..2 {
3494            result[i] = a[i] / b[i];
3495        }
3496
3497        self.state[operands.dst].set_f64x2(result);
3498        ControlFlow::Continue(())
3499    }
3500
3501    fn fmaximum32(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3502        let a = self.state[operands.src1].get_f32();
3503        let b = self.state[operands.src2].get_f32();
3504        self.state[operands.dst].set_f32(a.wasm_maximum(b));
3505        ControlFlow::Continue(())
3506    }
3507
3508    fn fminimum32(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3509        let a = self.state[operands.src1].get_f32();
3510        let b = self.state[operands.src2].get_f32();
3511        self.state[operands.dst].set_f32(a.wasm_minimum(b));
3512        ControlFlow::Continue(())
3513    }
3514
3515    fn ftrunc32(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3516        let a = self.state[src].get_f32();
3517        self.state[dst].set_f32(a.wasm_trunc());
3518        ControlFlow::Continue(())
3519    }
3520
3521    fn vtrunc32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3522        let mut a = self.state[src].get_f32x4();
3523        for elem in a.iter_mut() {
3524            *elem = elem.wasm_trunc();
3525        }
3526        self.state[dst].set_f32x4(a);
3527        ControlFlow::Continue(())
3528    }
3529
3530    fn vtrunc64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3531        let mut a = self.state[src].get_f64x2();
3532        for elem in a.iter_mut() {
3533            *elem = elem.wasm_trunc();
3534        }
3535        self.state[dst].set_f64x2(a);
3536        ControlFlow::Continue(())
3537    }
3538
3539    fn ffloor32(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3540        let a = self.state[src].get_f32();
3541        self.state[dst].set_f32(a.wasm_floor());
3542        ControlFlow::Continue(())
3543    }
3544
3545    fn vfloor32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3546        let mut a = self.state[src].get_f32x4();
3547        for elem in a.iter_mut() {
3548            *elem = elem.wasm_floor();
3549        }
3550        self.state[dst].set_f32x4(a);
3551        ControlFlow::Continue(())
3552    }
3553
3554    fn vfloor64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3555        let mut a = self.state[src].get_f64x2();
3556        for elem in a.iter_mut() {
3557            *elem = elem.wasm_floor();
3558        }
3559        self.state[dst].set_f64x2(a);
3560        ControlFlow::Continue(())
3561    }
3562
3563    fn fceil32(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3564        let a = self.state[src].get_f32();
3565        self.state[dst].set_f32(a.wasm_ceil());
3566        ControlFlow::Continue(())
3567    }
3568
3569    fn vceil32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3570        let mut a = self.state[src].get_f32x4();
3571        for elem in a.iter_mut() {
3572            *elem = elem.wasm_ceil();
3573        }
3574        self.state[dst].set_f32x4(a);
3575
3576        ControlFlow::Continue(())
3577    }
3578
3579    fn vceil64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3580        let mut a = self.state[src].get_f64x2();
3581        for elem in a.iter_mut() {
3582            *elem = elem.wasm_ceil();
3583        }
3584        self.state[dst].set_f64x2(a);
3585
3586        ControlFlow::Continue(())
3587    }
3588
3589    fn fnearest32(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3590        let a = self.state[src].get_f32();
3591        self.state[dst].set_f32(a.wasm_nearest());
3592        ControlFlow::Continue(())
3593    }
3594
3595    fn vnearest32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3596        let mut a = self.state[src].get_f32x4();
3597        for elem in a.iter_mut() {
3598            *elem = elem.wasm_nearest();
3599        }
3600        self.state[dst].set_f32x4(a);
3601        ControlFlow::Continue(())
3602    }
3603
3604    fn vnearest64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3605        let mut a = self.state[src].get_f64x2();
3606        for elem in a.iter_mut() {
3607            *elem = elem.wasm_nearest();
3608        }
3609        self.state[dst].set_f64x2(a);
3610        ControlFlow::Continue(())
3611    }
3612
3613    fn fsqrt32(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3614        let a = self.state[src].get_f32();
3615        self.state[dst].set_f32(a.wasm_sqrt());
3616        ControlFlow::Continue(())
3617    }
3618
3619    fn vsqrt32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3620        let mut a = self.state[src].get_f32x4();
3621        for elem in a.iter_mut() {
3622            *elem = elem.wasm_sqrt();
3623        }
3624        self.state[dst].set_f32x4(a);
3625        ControlFlow::Continue(())
3626    }
3627
3628    fn vsqrt64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3629        let mut a = self.state[src].get_f64x2();
3630        for elem in a.iter_mut() {
3631            *elem = elem.wasm_sqrt();
3632        }
3633        self.state[dst].set_f64x2(a);
3634        ControlFlow::Continue(())
3635    }
3636
3637    fn fneg32(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3638        let a = self.state[src].get_f32();
3639        self.state[dst].set_f32(-a);
3640        ControlFlow::Continue(())
3641    }
3642
3643    fn vnegf32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3644        let mut a = self.state[src].get_f32x4();
3645        for elem in a.iter_mut() {
3646            *elem = -*elem;
3647        }
3648        self.state[dst].set_f32x4(a);
3649        ControlFlow::Continue(())
3650    }
3651
3652    fn fabs32(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3653        let a = self.state[src].get_f32();
3654        self.state[dst].set_f32(a.wasm_abs());
3655        ControlFlow::Continue(())
3656    }
3657
3658    fn fadd64(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3659        let a = self.state[operands.src1].get_f64();
3660        let b = self.state[operands.src2].get_f64();
3661        self.state[operands.dst].set_f64(a + b);
3662        ControlFlow::Continue(())
3663    }
3664
3665    fn fsub64(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3666        let a = self.state[operands.src1].get_f64();
3667        let b = self.state[operands.src2].get_f64();
3668        self.state[operands.dst].set_f64(a - b);
3669        ControlFlow::Continue(())
3670    }
3671
3672    fn fmul64(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3673        let a = self.state[operands.src1].get_f64();
3674        let b = self.state[operands.src2].get_f64();
3675        self.state[operands.dst].set_f64(a * b);
3676        ControlFlow::Continue(())
3677    }
3678
3679    fn fdiv64(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3680        let a = self.state[operands.src1].get_f64();
3681        let b = self.state[operands.src2].get_f64();
3682        self.state[operands.dst].set_f64(a / b);
3683        ControlFlow::Continue(())
3684    }
3685
3686    fn fmaximum64(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3687        let a = self.state[operands.src1].get_f64();
3688        let b = self.state[operands.src2].get_f64();
3689        self.state[operands.dst].set_f64(a.wasm_maximum(b));
3690        ControlFlow::Continue(())
3691    }
3692
3693    fn fminimum64(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
3694        let a = self.state[operands.src1].get_f64();
3695        let b = self.state[operands.src2].get_f64();
3696        self.state[operands.dst].set_f64(a.wasm_minimum(b));
3697        ControlFlow::Continue(())
3698    }
3699
3700    fn ftrunc64(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3701        let a = self.state[src].get_f64();
3702        self.state[dst].set_f64(a.wasm_trunc());
3703        ControlFlow::Continue(())
3704    }
3705
3706    fn ffloor64(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3707        let a = self.state[src].get_f64();
3708        self.state[dst].set_f64(a.wasm_floor());
3709        ControlFlow::Continue(())
3710    }
3711
3712    fn fceil64(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3713        let a = self.state[src].get_f64();
3714        self.state[dst].set_f64(a.wasm_ceil());
3715        ControlFlow::Continue(())
3716    }
3717
3718    fn fnearest64(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3719        let a = self.state[src].get_f64();
3720        self.state[dst].set_f64(a.wasm_nearest());
3721        ControlFlow::Continue(())
3722    }
3723
3724    fn fsqrt64(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3725        let a = self.state[src].get_f64();
3726        self.state[dst].set_f64(a.wasm_sqrt());
3727        ControlFlow::Continue(())
3728    }
3729
3730    fn fneg64(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3731        let a = self.state[src].get_f64();
3732        self.state[dst].set_f64(-a);
3733        ControlFlow::Continue(())
3734    }
3735
3736    fn fabs64(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
3737        let a = self.state[src].get_f64();
3738        self.state[dst].set_f64(a.wasm_abs());
3739        ControlFlow::Continue(())
3740    }
3741
3742    fn vaddi8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3743        let mut a = self.state[operands.src1].get_i8x16();
3744        let b = self.state[operands.src2].get_i8x16();
3745        for (a, b) in a.iter_mut().zip(b) {
3746            *a = a.wrapping_add(b);
3747        }
3748        self.state[operands.dst].set_i8x16(a);
3749        ControlFlow::Continue(())
3750    }
3751
3752    fn vaddi16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3753        let mut a = self.state[operands.src1].get_i16x8();
3754        let b = self.state[operands.src2].get_i16x8();
3755        for (a, b) in a.iter_mut().zip(b) {
3756            *a = a.wrapping_add(b);
3757        }
3758        self.state[operands.dst].set_i16x8(a);
3759        ControlFlow::Continue(())
3760    }
3761
3762    fn vaddi32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3763        let mut a = self.state[operands.src1].get_i32x4();
3764        let b = self.state[operands.src2].get_i32x4();
3765        for (a, b) in a.iter_mut().zip(b) {
3766            *a = a.wrapping_add(b);
3767        }
3768        self.state[operands.dst].set_i32x4(a);
3769        ControlFlow::Continue(())
3770    }
3771
3772    fn vaddi64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3773        let mut a = self.state[operands.src1].get_i64x2();
3774        let b = self.state[operands.src2].get_i64x2();
3775        for (a, b) in a.iter_mut().zip(b) {
3776            *a = a.wrapping_add(b);
3777        }
3778        self.state[operands.dst].set_i64x2(a);
3779        ControlFlow::Continue(())
3780    }
3781
3782    fn vaddf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3783        let mut a = self.state[operands.src1].get_f32x4();
3784        let b = self.state[operands.src2].get_f32x4();
3785        for (a, b) in a.iter_mut().zip(b) {
3786            *a += b;
3787        }
3788        self.state[operands.dst].set_f32x4(a);
3789        ControlFlow::Continue(())
3790    }
3791
3792    fn vaddf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3793        let mut a = self.state[operands.src1].get_f64x2();
3794        let b = self.state[operands.src2].get_f64x2();
3795        for (a, b) in a.iter_mut().zip(b) {
3796            *a += b;
3797        }
3798        self.state[operands.dst].set_f64x2(a);
3799        ControlFlow::Continue(())
3800    }
3801
3802    fn vaddi8x16_sat(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3803        let mut a = self.state[operands.src1].get_i8x16();
3804        let b = self.state[operands.src2].get_i8x16();
3805        for (a, b) in a.iter_mut().zip(b) {
3806            *a = (*a).saturating_add(b);
3807        }
3808        self.state[operands.dst].set_i8x16(a);
3809        ControlFlow::Continue(())
3810    }
3811
3812    fn vaddu8x16_sat(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3813        let mut a = self.state[operands.src1].get_u8x16();
3814        let b = self.state[operands.src2].get_u8x16();
3815        for (a, b) in a.iter_mut().zip(b) {
3816            *a = (*a).saturating_add(b);
3817        }
3818        self.state[operands.dst].set_u8x16(a);
3819        ControlFlow::Continue(())
3820    }
3821
3822    fn vaddi16x8_sat(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3823        let mut a = self.state[operands.src1].get_i16x8();
3824        let b = self.state[operands.src2].get_i16x8();
3825        for (a, b) in a.iter_mut().zip(b) {
3826            *a = (*a).saturating_add(b);
3827        }
3828        self.state[operands.dst].set_i16x8(a);
3829        ControlFlow::Continue(())
3830    }
3831
3832    fn vaddu16x8_sat(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3833        let mut a = self.state[operands.src1].get_u16x8();
3834        let b = self.state[operands.src2].get_u16x8();
3835        for (a, b) in a.iter_mut().zip(b) {
3836            *a = (*a).saturating_add(b);
3837        }
3838        self.state[operands.dst].set_u16x8(a);
3839        ControlFlow::Continue(())
3840    }
3841
3842    fn vaddpairwisei16x8_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3843        let a = self.state[operands.src1].get_i16x8();
3844        let b = self.state[operands.src2].get_i16x8();
3845        let mut result = [0i16; 8];
3846        let half = result.len() / 2;
3847        for i in 0..half {
3848            result[i] = a[2 * i].wrapping_add(a[2 * i + 1]);
3849            result[i + half] = b[2 * i].wrapping_add(b[2 * i + 1]);
3850        }
3851        self.state[operands.dst].set_i16x8(result);
3852        ControlFlow::Continue(())
3853    }
3854
3855    fn vaddpairwisei32x4_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3856        let a = self.state[operands.src1].get_i32x4();
3857        let b = self.state[operands.src2].get_i32x4();
3858        let mut result = [0i32; 4];
3859        result[0] = a[0].wrapping_add(a[1]);
3860        result[1] = a[2].wrapping_add(a[3]);
3861        result[2] = b[0].wrapping_add(b[1]);
3862        result[3] = b[2].wrapping_add(b[3]);
3863        self.state[operands.dst].set_i32x4(result);
3864        ControlFlow::Continue(())
3865    }
3866
3867    fn vshli8x16(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
3868        let a = self.state[operands.src1].get_i8x16();
3869        let b = self.state[operands.src2].get_u32();
3870        self.state[operands.dst].set_i8x16(a.map(|a| a.wrapping_shl(b)));
3871        ControlFlow::Continue(())
3872    }
3873
3874    fn vshli16x8(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
3875        let a = self.state[operands.src1].get_i16x8();
3876        let b = self.state[operands.src2].get_u32();
3877        self.state[operands.dst].set_i16x8(a.map(|a| a.wrapping_shl(b)));
3878        ControlFlow::Continue(())
3879    }
3880
3881    fn vshli32x4(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
3882        let a = self.state[operands.src1].get_i32x4();
3883        let b = self.state[operands.src2].get_u32();
3884        self.state[operands.dst].set_i32x4(a.map(|a| a.wrapping_shl(b)));
3885        ControlFlow::Continue(())
3886    }
3887
3888    fn vshli64x2(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
3889        let a = self.state[operands.src1].get_i64x2();
3890        let b = self.state[operands.src2].get_u32();
3891        self.state[operands.dst].set_i64x2(a.map(|a| a.wrapping_shl(b)));
3892        ControlFlow::Continue(())
3893    }
3894
3895    fn vshri8x16_s(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
3896        let a = self.state[operands.src1].get_i8x16();
3897        let b = self.state[operands.src2].get_u32();
3898        self.state[operands.dst].set_i8x16(a.map(|a| a.wrapping_shr(b)));
3899        ControlFlow::Continue(())
3900    }
3901
3902    fn vshri16x8_s(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
3903        let a = self.state[operands.src1].get_i16x8();
3904        let b = self.state[operands.src2].get_u32();
3905        self.state[operands.dst].set_i16x8(a.map(|a| a.wrapping_shr(b)));
3906        ControlFlow::Continue(())
3907    }
3908
3909    fn vshri32x4_s(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
3910        let a = self.state[operands.src1].get_i32x4();
3911        let b = self.state[operands.src2].get_u32();
3912        self.state[operands.dst].set_i32x4(a.map(|a| a.wrapping_shr(b)));
3913        ControlFlow::Continue(())
3914    }
3915
3916    fn vshri64x2_s(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
3917        let a = self.state[operands.src1].get_i64x2();
3918        let b = self.state[operands.src2].get_u32();
3919        self.state[operands.dst].set_i64x2(a.map(|a| a.wrapping_shr(b)));
3920        ControlFlow::Continue(())
3921    }
3922
3923    fn vshri8x16_u(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
3924        let a = self.state[operands.src1].get_u8x16();
3925        let b = self.state[operands.src2].get_u32();
3926        self.state[operands.dst].set_u8x16(a.map(|a| a.wrapping_shr(b)));
3927        ControlFlow::Continue(())
3928    }
3929
3930    fn vshri16x8_u(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
3931        let a = self.state[operands.src1].get_u16x8();
3932        let b = self.state[operands.src2].get_u32();
3933        self.state[operands.dst].set_u16x8(a.map(|a| a.wrapping_shr(b)));
3934        ControlFlow::Continue(())
3935    }
3936
3937    fn vshri32x4_u(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
3938        let a = self.state[operands.src1].get_u32x4();
3939        let b = self.state[operands.src2].get_u32();
3940        self.state[operands.dst].set_u32x4(a.map(|a| a.wrapping_shr(b)));
3941        ControlFlow::Continue(())
3942    }
3943
3944    fn vshri64x2_u(&mut self, operands: BinaryOperands<VReg, VReg, XReg>) -> ControlFlow<Done> {
3945        let a = self.state[operands.src1].get_u64x2();
3946        let b = self.state[operands.src2].get_u32();
3947        self.state[operands.dst].set_u64x2(a.map(|a| a.wrapping_shr(b)));
3948        ControlFlow::Continue(())
3949    }
3950
3951    fn vconst128(&mut self, dst: VReg, val: u128) -> ControlFlow<Done> {
3952        self.state[dst].set_u128(val);
3953        ControlFlow::Continue(())
3954    }
3955
3956    fn vsplatx8(&mut self, dst: VReg, src: XReg) -> ControlFlow<Done> {
3957        let val = self.state[src].get_u32() as u8;
3958        self.state[dst].set_u8x16([val; 16]);
3959        ControlFlow::Continue(())
3960    }
3961
3962    fn vsplatx16(&mut self, dst: VReg, src: XReg) -> ControlFlow<Done> {
3963        let val = self.state[src].get_u32() as u16;
3964        self.state[dst].set_u16x8([val; 8]);
3965        ControlFlow::Continue(())
3966    }
3967
3968    fn vsplatx32(&mut self, dst: VReg, src: XReg) -> ControlFlow<Done> {
3969        let val = self.state[src].get_u32();
3970        self.state[dst].set_u32x4([val; 4]);
3971        ControlFlow::Continue(())
3972    }
3973
3974    fn vsplatx64(&mut self, dst: VReg, src: XReg) -> ControlFlow<Done> {
3975        let val = self.state[src].get_u64();
3976        self.state[dst].set_u64x2([val; 2]);
3977        ControlFlow::Continue(())
3978    }
3979
3980    fn vsplatf32(&mut self, dst: VReg, src: FReg) -> ControlFlow<Done> {
3981        let val = self.state[src].get_f32();
3982        self.state[dst].set_f32x4([val; 4]);
3983        ControlFlow::Continue(())
3984    }
3985
3986    fn vsplatf64(&mut self, dst: VReg, src: FReg) -> ControlFlow<Done> {
3987        let val = self.state[src].get_f64();
3988        self.state[dst].set_f64x2([val; 2]);
3989        ControlFlow::Continue(())
3990    }
3991
3992    fn vload8x8_s_z(&mut self, dst: VReg, addr: AddrZ) -> ControlFlow<Done> {
3993        let val = unsafe { self.load_ne::<[i8; 8], crate::VLoad8x8SZ>(addr)? };
3994        self.state[dst].set_i16x8(val.map(|i| i.into()));
3995        ControlFlow::Continue(())
3996    }
3997
3998    fn vload8x8_u_z(&mut self, dst: VReg, addr: AddrZ) -> ControlFlow<Done> {
3999        let val = unsafe { self.load_ne::<[u8; 8], crate::VLoad8x8UZ>(addr)? };
4000        self.state[dst].set_u16x8(val.map(|i| i.into()));
4001        ControlFlow::Continue(())
4002    }
4003
4004    fn vload16x4le_s_z(&mut self, dst: VReg, addr: AddrZ) -> ControlFlow<Done> {
4005        let val = unsafe { self.load_ne::<[i16; 4], crate::VLoad16x4LeSZ>(addr)? };
4006        self.state[dst].set_i32x4(val.map(|i| i16::from_le(i).into()));
4007        ControlFlow::Continue(())
4008    }
4009
4010    fn vload16x4le_u_z(&mut self, dst: VReg, addr: AddrZ) -> ControlFlow<Done> {
4011        let val = unsafe { self.load_ne::<[u16; 4], crate::VLoad16x4LeUZ>(addr)? };
4012        self.state[dst].set_u32x4(val.map(|i| u16::from_le(i).into()));
4013        ControlFlow::Continue(())
4014    }
4015
4016    fn vload32x2le_s_z(&mut self, dst: VReg, addr: AddrZ) -> ControlFlow<Done> {
4017        let val = unsafe { self.load_ne::<[i32; 2], crate::VLoad32x2LeSZ>(addr)? };
4018        self.state[dst].set_i64x2(val.map(|i| i32::from_le(i).into()));
4019        ControlFlow::Continue(())
4020    }
4021
4022    fn vload32x2le_u_z(&mut self, dst: VReg, addr: AddrZ) -> ControlFlow<Done> {
4023        let val = unsafe { self.load_ne::<[u32; 2], crate::VLoad32x2LeUZ>(addr)? };
4024        self.state[dst].set_u64x2(val.map(|i| u32::from_le(i).into()));
4025        ControlFlow::Continue(())
4026    }
4027
4028    fn vband128(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4029        let a = self.state[operands.src1].get_u128();
4030        let b = self.state[operands.src2].get_u128();
4031        self.state[operands.dst].set_u128(a & b);
4032        ControlFlow::Continue(())
4033    }
4034
4035    fn vbor128(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4036        let a = self.state[operands.src1].get_u128();
4037        let b = self.state[operands.src2].get_u128();
4038        self.state[operands.dst].set_u128(a | b);
4039        ControlFlow::Continue(())
4040    }
4041
4042    fn vbxor128(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4043        let a = self.state[operands.src1].get_u128();
4044        let b = self.state[operands.src2].get_u128();
4045        self.state[operands.dst].set_u128(a ^ b);
4046        ControlFlow::Continue(())
4047    }
4048
4049    fn vbnot128(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4050        let a = self.state[src].get_u128();
4051        self.state[dst].set_u128(!a);
4052        ControlFlow::Continue(())
4053    }
4054
4055    fn vbitselect128(&mut self, dst: VReg, c: VReg, x: VReg, y: VReg) -> ControlFlow<Done> {
4056        let c = self.state[c].get_u128();
4057        let x = self.state[x].get_u128();
4058        let y = self.state[y].get_u128();
4059        self.state[dst].set_u128((c & x) | (!c & y));
4060        ControlFlow::Continue(())
4061    }
4062
4063    fn vbitmask8x16(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4064        let a = self.state[src].get_u8x16();
4065        let mut result = 0;
4066        for item in a.iter().rev() {
4067            result <<= 1;
4068            result |= (*item >> 7) as u32;
4069        }
4070        self.state[dst].set_u32(result);
4071        ControlFlow::Continue(())
4072    }
4073
4074    fn vbitmask16x8(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4075        let a = self.state[src].get_u16x8();
4076        let mut result = 0;
4077        for item in a.iter().rev() {
4078            result <<= 1;
4079            result |= (*item >> 15) as u32;
4080        }
4081        self.state[dst].set_u32(result);
4082        ControlFlow::Continue(())
4083    }
4084
4085    fn vbitmask32x4(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4086        let a = self.state[src].get_u32x4();
4087        let mut result = 0;
4088        for item in a.iter().rev() {
4089            result <<= 1;
4090            result |= *item >> 31;
4091        }
4092        self.state[dst].set_u32(result);
4093        ControlFlow::Continue(())
4094    }
4095
4096    fn vbitmask64x2(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4097        let a = self.state[src].get_u64x2();
4098        let mut result = 0;
4099        for item in a.iter().rev() {
4100            result <<= 1;
4101            result |= (*item >> 63) as u32;
4102        }
4103        self.state[dst].set_u32(result);
4104        ControlFlow::Continue(())
4105    }
4106
4107    fn valltrue8x16(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4108        let a = self.state[src].get_u8x16();
4109        let result = a.iter().all(|a| *a != 0);
4110        self.state[dst].set_u32(u32::from(result));
4111        ControlFlow::Continue(())
4112    }
4113
4114    fn valltrue16x8(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4115        let a = self.state[src].get_u16x8();
4116        let result = a.iter().all(|a| *a != 0);
4117        self.state[dst].set_u32(u32::from(result));
4118        ControlFlow::Continue(())
4119    }
4120
4121    fn valltrue32x4(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4122        let a = self.state[src].get_u32x4();
4123        let result = a.iter().all(|a| *a != 0);
4124        self.state[dst].set_u32(u32::from(result));
4125        ControlFlow::Continue(())
4126    }
4127
4128    fn valltrue64x2(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4129        let a = self.state[src].get_u64x2();
4130        let result = a.iter().all(|a| *a != 0);
4131        self.state[dst].set_u32(u32::from(result));
4132        ControlFlow::Continue(())
4133    }
4134
4135    fn vanytrue8x16(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4136        let a = self.state[src].get_u8x16();
4137        let result = a.iter().any(|a| *a != 0);
4138        self.state[dst].set_u32(u32::from(result));
4139        ControlFlow::Continue(())
4140    }
4141
4142    fn vanytrue16x8(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4143        let a = self.state[src].get_u16x8();
4144        let result = a.iter().any(|a| *a != 0);
4145        self.state[dst].set_u32(u32::from(result));
4146        ControlFlow::Continue(())
4147    }
4148
4149    fn vanytrue32x4(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4150        let a = self.state[src].get_u32x4();
4151        let result = a.iter().any(|a| *a != 0);
4152        self.state[dst].set_u32(u32::from(result));
4153        ControlFlow::Continue(())
4154    }
4155
4156    fn vanytrue64x2(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
4157        let a = self.state[src].get_u64x2();
4158        let result = a.iter().any(|a| *a != 0);
4159        self.state[dst].set_u32(u32::from(result));
4160        ControlFlow::Continue(())
4161    }
4162
4163    fn vf32x4_from_i32x4_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4164        let a = self.state[src].get_i32x4();
4165        self.state[dst].set_f32x4(a.map(|i| i as f32));
4166        ControlFlow::Continue(())
4167    }
4168
4169    fn vf32x4_from_i32x4_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4170        let a = self.state[src].get_u32x4();
4171        self.state[dst].set_f32x4(a.map(|i| i as f32));
4172        ControlFlow::Continue(())
4173    }
4174
4175    fn vf64x2_from_i64x2_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4176        let a = self.state[src].get_i64x2();
4177        self.state[dst].set_f64x2(a.map(|i| i as f64));
4178        ControlFlow::Continue(())
4179    }
4180
4181    fn vf64x2_from_i64x2_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4182        let a = self.state[src].get_u64x2();
4183        self.state[dst].set_f64x2(a.map(|i| i as f64));
4184        ControlFlow::Continue(())
4185    }
4186
4187    fn vi32x4_from_f32x4_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4188        let a = self.state[src].get_f32x4();
4189        self.state[dst].set_i32x4(a.map(|f| f as i32));
4190        ControlFlow::Continue(())
4191    }
4192
4193    fn vi32x4_from_f32x4_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4194        let a = self.state[src].get_f32x4();
4195        self.state[dst].set_u32x4(a.map(|f| f as u32));
4196        ControlFlow::Continue(())
4197    }
4198
4199    fn vi64x2_from_f64x2_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4200        let a = self.state[src].get_f64x2();
4201        self.state[dst].set_i64x2(a.map(|f| f as i64));
4202        ControlFlow::Continue(())
4203    }
4204
4205    fn vi64x2_from_f64x2_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4206        let a = self.state[src].get_f64x2();
4207        self.state[dst].set_u64x2(a.map(|f| f as u64));
4208        ControlFlow::Continue(())
4209    }
4210
4211    fn vwidenlow8x16_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4212        let a = *self.state[src].get_i8x16().first_chunk().unwrap();
4213        self.state[dst].set_i16x8(a.map(|i| i.into()));
4214        ControlFlow::Continue(())
4215    }
4216
4217    fn vwidenlow8x16_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4218        let a = *self.state[src].get_u8x16().first_chunk().unwrap();
4219        self.state[dst].set_u16x8(a.map(|i| i.into()));
4220        ControlFlow::Continue(())
4221    }
4222
4223    fn vwidenlow16x8_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4224        let a = *self.state[src].get_i16x8().first_chunk().unwrap();
4225        self.state[dst].set_i32x4(a.map(|i| i.into()));
4226        ControlFlow::Continue(())
4227    }
4228
4229    fn vwidenlow16x8_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4230        let a = *self.state[src].get_u16x8().first_chunk().unwrap();
4231        self.state[dst].set_u32x4(a.map(|i| i.into()));
4232        ControlFlow::Continue(())
4233    }
4234
4235    fn vwidenlow32x4_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4236        let a = *self.state[src].get_i32x4().first_chunk().unwrap();
4237        self.state[dst].set_i64x2(a.map(|i| i.into()));
4238        ControlFlow::Continue(())
4239    }
4240
4241    fn vwidenlow32x4_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4242        let a = *self.state[src].get_u32x4().first_chunk().unwrap();
4243        self.state[dst].set_u64x2(a.map(|i| i.into()));
4244        ControlFlow::Continue(())
4245    }
4246
4247    fn vwidenhigh8x16_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4248        let a = *self.state[src].get_i8x16().last_chunk().unwrap();
4249        self.state[dst].set_i16x8(a.map(|i| i.into()));
4250        ControlFlow::Continue(())
4251    }
4252
4253    fn vwidenhigh8x16_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4254        let a = *self.state[src].get_u8x16().last_chunk().unwrap();
4255        self.state[dst].set_u16x8(a.map(|i| i.into()));
4256        ControlFlow::Continue(())
4257    }
4258
4259    fn vwidenhigh16x8_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4260        let a = *self.state[src].get_i16x8().last_chunk().unwrap();
4261        self.state[dst].set_i32x4(a.map(|i| i.into()));
4262        ControlFlow::Continue(())
4263    }
4264
4265    fn vwidenhigh16x8_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4266        let a = *self.state[src].get_u16x8().last_chunk().unwrap();
4267        self.state[dst].set_u32x4(a.map(|i| i.into()));
4268        ControlFlow::Continue(())
4269    }
4270
4271    fn vwidenhigh32x4_s(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4272        let a = *self.state[src].get_i32x4().last_chunk().unwrap();
4273        self.state[dst].set_i64x2(a.map(|i| i.into()));
4274        ControlFlow::Continue(())
4275    }
4276
4277    fn vwidenhigh32x4_u(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4278        let a = *self.state[src].get_u32x4().last_chunk().unwrap();
4279        self.state[dst].set_u64x2(a.map(|i| i.into()));
4280        ControlFlow::Continue(())
4281    }
4282
4283    fn vnarrow16x8_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4284        let a = self.state[operands.src1].get_i16x8();
4285        let b = self.state[operands.src2].get_i16x8();
4286        let mut result = [0; 16];
4287        for (i, d) in a.iter().chain(&b).zip(&mut result) {
4288            *d = (*i)
4289                .try_into()
4290                .unwrap_or(if *i < 0 { i8::MIN } else { i8::MAX });
4291        }
4292        self.state[operands.dst].set_i8x16(result);
4293        ControlFlow::Continue(())
4294    }
4295
4296    fn vnarrow16x8_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4297        let a = self.state[operands.src1].get_i16x8();
4298        let b = self.state[operands.src2].get_i16x8();
4299        let mut result = [0; 16];
4300        for (i, d) in a.iter().chain(&b).zip(&mut result) {
4301            *d = (*i)
4302                .try_into()
4303                .unwrap_or(if *i < 0 { u8::MIN } else { u8::MAX });
4304        }
4305        self.state[operands.dst].set_u8x16(result);
4306        ControlFlow::Continue(())
4307    }
4308
4309    fn vnarrow32x4_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4310        let a = self.state[operands.src1].get_i32x4();
4311        let b = self.state[operands.src2].get_i32x4();
4312        let mut result = [0; 8];
4313        for (i, d) in a.iter().chain(&b).zip(&mut result) {
4314            *d = (*i)
4315                .try_into()
4316                .unwrap_or(if *i < 0 { i16::MIN } else { i16::MAX });
4317        }
4318        self.state[operands.dst].set_i16x8(result);
4319        ControlFlow::Continue(())
4320    }
4321
4322    fn vnarrow32x4_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4323        let a = self.state[operands.src1].get_i32x4();
4324        let b = self.state[operands.src2].get_i32x4();
4325        let mut result = [0; 8];
4326        for (i, d) in a.iter().chain(&b).zip(&mut result) {
4327            *d = (*i)
4328                .try_into()
4329                .unwrap_or(if *i < 0 { u16::MIN } else { u16::MAX });
4330        }
4331        self.state[operands.dst].set_u16x8(result);
4332        ControlFlow::Continue(())
4333    }
4334
4335    fn vnarrow64x2_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4336        let a = self.state[operands.src1].get_i64x2();
4337        let b = self.state[operands.src2].get_i64x2();
4338        let mut result = [0; 4];
4339        for (i, d) in a.iter().chain(&b).zip(&mut result) {
4340            *d = (*i)
4341                .try_into()
4342                .unwrap_or(if *i < 0 { i32::MIN } else { i32::MAX });
4343        }
4344        self.state[operands.dst].set_i32x4(result);
4345        ControlFlow::Continue(())
4346    }
4347
4348    fn vnarrow64x2_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4349        let a = self.state[operands.src1].get_i64x2();
4350        let b = self.state[operands.src2].get_i64x2();
4351        let mut result = [0; 4];
4352        for (i, d) in a.iter().chain(&b).zip(&mut result) {
4353            *d = (*i)
4354                .try_into()
4355                .unwrap_or(if *i < 0 { u32::MIN } else { u32::MAX });
4356        }
4357        self.state[operands.dst].set_u32x4(result);
4358        ControlFlow::Continue(())
4359    }
4360
4361    fn vunarrow64x2_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4362        let a = self.state[operands.src1].get_u64x2();
4363        let b = self.state[operands.src2].get_u64x2();
4364        let mut result = [0; 4];
4365        for (i, d) in a.iter().chain(&b).zip(&mut result) {
4366            *d = (*i).try_into().unwrap_or(u32::MAX);
4367        }
4368        self.state[operands.dst].set_u32x4(result);
4369        ControlFlow::Continue(())
4370    }
4371
4372    fn vfpromotelow(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4373        let a = self.state[src].get_f32x4();
4374        self.state[dst].set_f64x2([a[0].into(), a[1].into()]);
4375        ControlFlow::Continue(())
4376    }
4377
4378    fn vfdemote(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4379        let a = self.state[src].get_f64x2();
4380        self.state[dst].set_f32x4([a[0] as f32, a[1] as f32, 0.0, 0.0]);
4381        ControlFlow::Continue(())
4382    }
4383
4384    fn vsubi8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4385        let mut a = self.state[operands.src1].get_i8x16();
4386        let b = self.state[operands.src2].get_i8x16();
4387        for (a, b) in a.iter_mut().zip(b) {
4388            *a = a.wrapping_sub(b);
4389        }
4390        self.state[operands.dst].set_i8x16(a);
4391        ControlFlow::Continue(())
4392    }
4393
4394    fn vsubi16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4395        let mut a = self.state[operands.src1].get_i16x8();
4396        let b = self.state[operands.src2].get_i16x8();
4397        for (a, b) in a.iter_mut().zip(b) {
4398            *a = a.wrapping_sub(b);
4399        }
4400        self.state[operands.dst].set_i16x8(a);
4401        ControlFlow::Continue(())
4402    }
4403
4404    fn vsubi32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4405        let mut a = self.state[operands.src1].get_i32x4();
4406        let b = self.state[operands.src2].get_i32x4();
4407        for (a, b) in a.iter_mut().zip(b) {
4408            *a = a.wrapping_sub(b);
4409        }
4410        self.state[operands.dst].set_i32x4(a);
4411        ControlFlow::Continue(())
4412    }
4413
4414    fn vsubi64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4415        let mut a = self.state[operands.src1].get_i64x2();
4416        let b = self.state[operands.src2].get_i64x2();
4417        for (a, b) in a.iter_mut().zip(b) {
4418            *a = a.wrapping_sub(b);
4419        }
4420        self.state[operands.dst].set_i64x2(a);
4421        ControlFlow::Continue(())
4422    }
4423
4424    fn vsubi8x16_sat(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4425        let mut a = self.state[operands.src1].get_i8x16();
4426        let b = self.state[operands.src2].get_i8x16();
4427        for (a, b) in a.iter_mut().zip(b) {
4428            *a = a.saturating_sub(b);
4429        }
4430        self.state[operands.dst].set_i8x16(a);
4431        ControlFlow::Continue(())
4432    }
4433
4434    fn vsubu8x16_sat(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4435        let mut a = self.state[operands.src1].get_u8x16();
4436        let b = self.state[operands.src2].get_u8x16();
4437        for (a, b) in a.iter_mut().zip(b) {
4438            *a = a.saturating_sub(b);
4439        }
4440        self.state[operands.dst].set_u8x16(a);
4441        ControlFlow::Continue(())
4442    }
4443
4444    fn vsubi16x8_sat(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4445        let mut a = self.state[operands.src1].get_i16x8();
4446        let b = self.state[operands.src2].get_i16x8();
4447        for (a, b) in a.iter_mut().zip(b) {
4448            *a = a.saturating_sub(b);
4449        }
4450        self.state[operands.dst].set_i16x8(a);
4451        ControlFlow::Continue(())
4452    }
4453
4454    fn vsubu16x8_sat(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4455        let mut a = self.state[operands.src1].get_u16x8();
4456        let b = self.state[operands.src2].get_u16x8();
4457        for (a, b) in a.iter_mut().zip(b) {
4458            *a = a.saturating_sub(b);
4459        }
4460        self.state[operands.dst].set_u16x8(a);
4461        ControlFlow::Continue(())
4462    }
4463
4464    fn vsubf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4465        let mut a = self.state[operands.src1].get_f64x2();
4466        let b = self.state[operands.src2].get_f64x2();
4467        for (a, b) in a.iter_mut().zip(b) {
4468            *a = *a - b;
4469        }
4470        self.state[operands.dst].set_f64x2(a);
4471        ControlFlow::Continue(())
4472    }
4473
4474    fn vmuli8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4475        let mut a = self.state[operands.src1].get_i8x16();
4476        let b = self.state[operands.src2].get_i8x16();
4477        for (a, b) in a.iter_mut().zip(b) {
4478            *a = a.wrapping_mul(b);
4479        }
4480        self.state[operands.dst].set_i8x16(a);
4481        ControlFlow::Continue(())
4482    }
4483
4484    fn vmuli16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4485        let mut a = self.state[operands.src1].get_i16x8();
4486        let b = self.state[operands.src2].get_i16x8();
4487        for (a, b) in a.iter_mut().zip(b) {
4488            *a = a.wrapping_mul(b);
4489        }
4490        self.state[operands.dst].set_i16x8(a);
4491        ControlFlow::Continue(())
4492    }
4493
4494    fn vmuli32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4495        let mut a = self.state[operands.src1].get_i32x4();
4496        let b = self.state[operands.src2].get_i32x4();
4497        for (a, b) in a.iter_mut().zip(b) {
4498            *a = a.wrapping_mul(b);
4499        }
4500        self.state[operands.dst].set_i32x4(a);
4501        ControlFlow::Continue(())
4502    }
4503
4504    fn vmuli64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4505        let mut a = self.state[operands.src1].get_i64x2();
4506        let b = self.state[operands.src2].get_i64x2();
4507        for (a, b) in a.iter_mut().zip(b) {
4508            *a = a.wrapping_mul(b);
4509        }
4510        self.state[operands.dst].set_i64x2(a);
4511        ControlFlow::Continue(())
4512    }
4513
4514    fn vmulf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4515        let mut a = self.state[operands.src1].get_f64x2();
4516        let b = self.state[operands.src2].get_f64x2();
4517        for (a, b) in a.iter_mut().zip(b) {
4518            *a = *a * b;
4519        }
4520        self.state[operands.dst].set_f64x2(a);
4521        ControlFlow::Continue(())
4522    }
4523
4524    fn vqmulrsi16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4525        let mut a = self.state[operands.src1].get_i16x8();
4526        let b = self.state[operands.src2].get_i16x8();
4527        const MIN: i32 = i16::MIN as i32;
4528        const MAX: i32 = i16::MAX as i32;
4529        for (a, b) in a.iter_mut().zip(b) {
4530            let r = (i32::from(*a) * i32::from(b) + (1 << 14)) >> 15;
4531            *a = r.clamp(MIN, MAX) as i16;
4532        }
4533        self.state[operands.dst].set_i16x8(a);
4534        ControlFlow::Continue(())
4535    }
4536
4537    fn vpopcnt8x16(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4538        let a = self.state[src].get_u8x16();
4539        self.state[dst].set_u8x16(a.map(|i| i.count_ones() as u8));
4540        ControlFlow::Continue(())
4541    }
4542
4543    fn xextractv8x16(&mut self, dst: XReg, src: VReg, lane: u8) -> ControlFlow<Done> {
4544        let a = unsafe { *self.state[src].get_u8x16().get_unchecked(usize::from(lane)) };
4545        self.state[dst].set_u32(u32::from(a));
4546        ControlFlow::Continue(())
4547    }
4548
4549    fn xextractv16x8(&mut self, dst: XReg, src: VReg, lane: u8) -> ControlFlow<Done> {
4550        let a = unsafe { *self.state[src].get_u16x8().get_unchecked(usize::from(lane)) };
4551        self.state[dst].set_u32(u32::from(a));
4552        ControlFlow::Continue(())
4553    }
4554
4555    fn xextractv32x4(&mut self, dst: XReg, src: VReg, lane: u8) -> ControlFlow<Done> {
4556        let a = unsafe { *self.state[src].get_u32x4().get_unchecked(usize::from(lane)) };
4557        self.state[dst].set_u32(a);
4558        ControlFlow::Continue(())
4559    }
4560
4561    fn xextractv64x2(&mut self, dst: XReg, src: VReg, lane: u8) -> ControlFlow<Done> {
4562        let a = unsafe { *self.state[src].get_u64x2().get_unchecked(usize::from(lane)) };
4563        self.state[dst].set_u64(a);
4564        ControlFlow::Continue(())
4565    }
4566
4567    fn fextractv32x4(&mut self, dst: FReg, src: VReg, lane: u8) -> ControlFlow<Done> {
4568        let a = unsafe { *self.state[src].get_f32x4().get_unchecked(usize::from(lane)) };
4569        self.state[dst].set_f32(a);
4570        ControlFlow::Continue(())
4571    }
4572
4573    fn fextractv64x2(&mut self, dst: FReg, src: VReg, lane: u8) -> ControlFlow<Done> {
4574        let a = unsafe { *self.state[src].get_f64x2().get_unchecked(usize::from(lane)) };
4575        self.state[dst].set_f64(a);
4576        ControlFlow::Continue(())
4577    }
4578
4579    fn vinsertx8(
4580        &mut self,
4581        operands: BinaryOperands<VReg, VReg, XReg>,
4582        lane: u8,
4583    ) -> ControlFlow<Done> {
4584        let mut a = self.state[operands.src1].get_u8x16();
4585        let b = self.state[operands.src2].get_u32() as u8;
4586        unsafe {
4587            *a.get_unchecked_mut(usize::from(lane)) = b;
4588        }
4589        self.state[operands.dst].set_u8x16(a);
4590        ControlFlow::Continue(())
4591    }
4592
4593    fn vinsertx16(
4594        &mut self,
4595        operands: BinaryOperands<VReg, VReg, XReg>,
4596        lane: u8,
4597    ) -> ControlFlow<Done> {
4598        let mut a = self.state[operands.src1].get_u16x8();
4599        let b = self.state[operands.src2].get_u32() as u16;
4600        unsafe {
4601            *a.get_unchecked_mut(usize::from(lane)) = b;
4602        }
4603        self.state[operands.dst].set_u16x8(a);
4604        ControlFlow::Continue(())
4605    }
4606
4607    fn vinsertx32(
4608        &mut self,
4609        operands: BinaryOperands<VReg, VReg, XReg>,
4610        lane: u8,
4611    ) -> ControlFlow<Done> {
4612        let mut a = self.state[operands.src1].get_u32x4();
4613        let b = self.state[operands.src2].get_u32();
4614        unsafe {
4615            *a.get_unchecked_mut(usize::from(lane)) = b;
4616        }
4617        self.state[operands.dst].set_u32x4(a);
4618        ControlFlow::Continue(())
4619    }
4620
4621    fn vinsertx64(
4622        &mut self,
4623        operands: BinaryOperands<VReg, VReg, XReg>,
4624        lane: u8,
4625    ) -> ControlFlow<Done> {
4626        let mut a = self.state[operands.src1].get_u64x2();
4627        let b = self.state[operands.src2].get_u64();
4628        unsafe {
4629            *a.get_unchecked_mut(usize::from(lane)) = b;
4630        }
4631        self.state[operands.dst].set_u64x2(a);
4632        ControlFlow::Continue(())
4633    }
4634
4635    fn vinsertf32(
4636        &mut self,
4637        operands: BinaryOperands<VReg, VReg, FReg>,
4638        lane: u8,
4639    ) -> ControlFlow<Done> {
4640        let mut a = self.state[operands.src1].get_f32x4();
4641        let b = self.state[operands.src2].get_f32();
4642        unsafe {
4643            *a.get_unchecked_mut(usize::from(lane)) = b;
4644        }
4645        self.state[operands.dst].set_f32x4(a);
4646        ControlFlow::Continue(())
4647    }
4648
4649    fn vinsertf64(
4650        &mut self,
4651        operands: BinaryOperands<VReg, VReg, FReg>,
4652        lane: u8,
4653    ) -> ControlFlow<Done> {
4654        let mut a = self.state[operands.src1].get_f64x2();
4655        let b = self.state[operands.src2].get_f64();
4656        unsafe {
4657            *a.get_unchecked_mut(usize::from(lane)) = b;
4658        }
4659        self.state[operands.dst].set_f64x2(a);
4660        ControlFlow::Continue(())
4661    }
4662
4663    fn veq8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4664        let a = self.state[operands.src1].get_u8x16();
4665        let b = self.state[operands.src2].get_u8x16();
4666        let mut c = [0; 16];
4667        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4668            *c = if a == b { u8::MAX } else { 0 };
4669        }
4670        self.state[operands.dst].set_u8x16(c);
4671        ControlFlow::Continue(())
4672    }
4673
4674    fn vneq8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4675        let a = self.state[operands.src1].get_u8x16();
4676        let b = self.state[operands.src2].get_u8x16();
4677        let mut c = [0; 16];
4678        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4679            *c = if a != b { u8::MAX } else { 0 };
4680        }
4681        self.state[operands.dst].set_u8x16(c);
4682        ControlFlow::Continue(())
4683    }
4684
4685    fn vslt8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4686        let a = self.state[operands.src1].get_i8x16();
4687        let b = self.state[operands.src2].get_i8x16();
4688        let mut c = [0; 16];
4689        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4690            *c = if a < b { u8::MAX } else { 0 };
4691        }
4692        self.state[operands.dst].set_u8x16(c);
4693        ControlFlow::Continue(())
4694    }
4695
4696    fn vslteq8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4697        let a = self.state[operands.src1].get_i8x16();
4698        let b = self.state[operands.src2].get_i8x16();
4699        let mut c = [0; 16];
4700        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4701            *c = if a <= b { u8::MAX } else { 0 };
4702        }
4703        self.state[operands.dst].set_u8x16(c);
4704        ControlFlow::Continue(())
4705    }
4706
4707    fn vult8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4708        let a = self.state[operands.src1].get_u8x16();
4709        let b = self.state[operands.src2].get_u8x16();
4710        let mut c = [0; 16];
4711        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4712            *c = if a < b { u8::MAX } else { 0 };
4713        }
4714        self.state[operands.dst].set_u8x16(c);
4715        ControlFlow::Continue(())
4716    }
4717
4718    fn vulteq8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4719        let a = self.state[operands.src1].get_u8x16();
4720        let b = self.state[operands.src2].get_u8x16();
4721        let mut c = [0; 16];
4722        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4723            *c = if a <= b { u8::MAX } else { 0 };
4724        }
4725        self.state[operands.dst].set_u8x16(c);
4726        ControlFlow::Continue(())
4727    }
4728
4729    fn veq16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4730        let a = self.state[operands.src1].get_u16x8();
4731        let b = self.state[operands.src2].get_u16x8();
4732        let mut c = [0; 8];
4733        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4734            *c = if a == b { u16::MAX } else { 0 };
4735        }
4736        self.state[operands.dst].set_u16x8(c);
4737        ControlFlow::Continue(())
4738    }
4739
4740    fn vneq16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4741        let a = self.state[operands.src1].get_u16x8();
4742        let b = self.state[operands.src2].get_u16x8();
4743        let mut c = [0; 8];
4744        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4745            *c = if a != b { u16::MAX } else { 0 };
4746        }
4747        self.state[operands.dst].set_u16x8(c);
4748        ControlFlow::Continue(())
4749    }
4750
4751    fn vslt16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4752        let a = self.state[operands.src1].get_i16x8();
4753        let b = self.state[operands.src2].get_i16x8();
4754        let mut c = [0; 8];
4755        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4756            *c = if a < b { u16::MAX } else { 0 };
4757        }
4758        self.state[operands.dst].set_u16x8(c);
4759        ControlFlow::Continue(())
4760    }
4761
4762    fn vslteq16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4763        let a = self.state[operands.src1].get_i16x8();
4764        let b = self.state[operands.src2].get_i16x8();
4765        let mut c = [0; 8];
4766        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4767            *c = if a <= b { u16::MAX } else { 0 };
4768        }
4769        self.state[operands.dst].set_u16x8(c);
4770        ControlFlow::Continue(())
4771    }
4772
4773    fn vult16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4774        let a = self.state[operands.src1].get_u16x8();
4775        let b = self.state[operands.src2].get_u16x8();
4776        let mut c = [0; 8];
4777        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4778            *c = if a < b { u16::MAX } else { 0 };
4779        }
4780        self.state[operands.dst].set_u16x8(c);
4781        ControlFlow::Continue(())
4782    }
4783
4784    fn vulteq16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4785        let a = self.state[operands.src1].get_u16x8();
4786        let b = self.state[operands.src2].get_u16x8();
4787        let mut c = [0; 8];
4788        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4789            *c = if a <= b { u16::MAX } else { 0 };
4790        }
4791        self.state[operands.dst].set_u16x8(c);
4792        ControlFlow::Continue(())
4793    }
4794
4795    fn veq32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4796        let a = self.state[operands.src1].get_u32x4();
4797        let b = self.state[operands.src2].get_u32x4();
4798        let mut c = [0; 4];
4799        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4800            *c = if a == b { u32::MAX } else { 0 };
4801        }
4802        self.state[operands.dst].set_u32x4(c);
4803        ControlFlow::Continue(())
4804    }
4805
4806    fn vneq32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4807        let a = self.state[operands.src1].get_u32x4();
4808        let b = self.state[operands.src2].get_u32x4();
4809        let mut c = [0; 4];
4810        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4811            *c = if a != b { u32::MAX } else { 0 };
4812        }
4813        self.state[operands.dst].set_u32x4(c);
4814        ControlFlow::Continue(())
4815    }
4816
4817    fn vslt32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4818        let a = self.state[operands.src1].get_i32x4();
4819        let b = self.state[operands.src2].get_i32x4();
4820        let mut c = [0; 4];
4821        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4822            *c = if a < b { u32::MAX } else { 0 };
4823        }
4824        self.state[operands.dst].set_u32x4(c);
4825        ControlFlow::Continue(())
4826    }
4827
4828    fn vslteq32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4829        let a = self.state[operands.src1].get_i32x4();
4830        let b = self.state[operands.src2].get_i32x4();
4831        let mut c = [0; 4];
4832        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4833            *c = if a <= b { u32::MAX } else { 0 };
4834        }
4835        self.state[operands.dst].set_u32x4(c);
4836        ControlFlow::Continue(())
4837    }
4838
4839    fn vult32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4840        let a = self.state[operands.src1].get_u32x4();
4841        let b = self.state[operands.src2].get_u32x4();
4842        let mut c = [0; 4];
4843        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4844            *c = if a < b { u32::MAX } else { 0 };
4845        }
4846        self.state[operands.dst].set_u32x4(c);
4847        ControlFlow::Continue(())
4848    }
4849
4850    fn vulteq32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4851        let a = self.state[operands.src1].get_u32x4();
4852        let b = self.state[operands.src2].get_u32x4();
4853        let mut c = [0; 4];
4854        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4855            *c = if a <= b { u32::MAX } else { 0 };
4856        }
4857        self.state[operands.dst].set_u32x4(c);
4858        ControlFlow::Continue(())
4859    }
4860
4861    fn veq64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4862        let a = self.state[operands.src1].get_u64x2();
4863        let b = self.state[operands.src2].get_u64x2();
4864        let mut c = [0; 2];
4865        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4866            *c = if a == b { u64::MAX } else { 0 };
4867        }
4868        self.state[operands.dst].set_u64x2(c);
4869        ControlFlow::Continue(())
4870    }
4871
4872    fn vneq64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4873        let a = self.state[operands.src1].get_u64x2();
4874        let b = self.state[operands.src2].get_u64x2();
4875        let mut c = [0; 2];
4876        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4877            *c = if a != b { u64::MAX } else { 0 };
4878        }
4879        self.state[operands.dst].set_u64x2(c);
4880        ControlFlow::Continue(())
4881    }
4882
4883    fn vslt64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4884        let a = self.state[operands.src1].get_i64x2();
4885        let b = self.state[operands.src2].get_i64x2();
4886        let mut c = [0; 2];
4887        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4888            *c = if a < b { u64::MAX } else { 0 };
4889        }
4890        self.state[operands.dst].set_u64x2(c);
4891        ControlFlow::Continue(())
4892    }
4893
4894    fn vslteq64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4895        let a = self.state[operands.src1].get_i64x2();
4896        let b = self.state[operands.src2].get_i64x2();
4897        let mut c = [0; 2];
4898        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4899            *c = if a <= b { u64::MAX } else { 0 };
4900        }
4901        self.state[operands.dst].set_u64x2(c);
4902        ControlFlow::Continue(())
4903    }
4904
4905    fn vult64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4906        let a = self.state[operands.src1].get_u64x2();
4907        let b = self.state[operands.src2].get_u64x2();
4908        let mut c = [0; 2];
4909        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4910            *c = if a < b { u64::MAX } else { 0 };
4911        }
4912        self.state[operands.dst].set_u64x2(c);
4913        ControlFlow::Continue(())
4914    }
4915
4916    fn vulteq64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4917        let a = self.state[operands.src1].get_u64x2();
4918        let b = self.state[operands.src2].get_u64x2();
4919        let mut c = [0; 2];
4920        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
4921            *c = if a <= b { u64::MAX } else { 0 };
4922        }
4923        self.state[operands.dst].set_u64x2(c);
4924        ControlFlow::Continue(())
4925    }
4926
4927    fn vneg8x16(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4928        let a = self.state[src].get_i8x16();
4929        self.state[dst].set_i8x16(a.map(|i| i.wrapping_neg()));
4930        ControlFlow::Continue(())
4931    }
4932
4933    fn vneg16x8(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4934        let a = self.state[src].get_i16x8();
4935        self.state[dst].set_i16x8(a.map(|i| i.wrapping_neg()));
4936        ControlFlow::Continue(())
4937    }
4938
4939    fn vneg32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4940        let a = self.state[src].get_i32x4();
4941        self.state[dst].set_i32x4(a.map(|i| i.wrapping_neg()));
4942        ControlFlow::Continue(())
4943    }
4944
4945    fn vneg64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4946        let a = self.state[src].get_i64x2();
4947        self.state[dst].set_i64x2(a.map(|i| i.wrapping_neg()));
4948        ControlFlow::Continue(())
4949    }
4950
4951    fn vnegf64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
4952        let a = self.state[src].get_f64x2();
4953        self.state[dst].set_f64x2(a.map(|i| -i));
4954        ControlFlow::Continue(())
4955    }
4956
4957    fn vmin8x16_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4958        let mut a = self.state[operands.src1].get_i8x16();
4959        let b = self.state[operands.src2].get_i8x16();
4960        for (a, b) in a.iter_mut().zip(&b) {
4961            *a = (*a).min(*b);
4962        }
4963        self.state[operands.dst].set_i8x16(a);
4964        ControlFlow::Continue(())
4965    }
4966
4967    fn vmin8x16_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4968        let mut a = self.state[operands.src1].get_u8x16();
4969        let b = self.state[operands.src2].get_u8x16();
4970        for (a, b) in a.iter_mut().zip(&b) {
4971            *a = (*a).min(*b);
4972        }
4973        self.state[operands.dst].set_u8x16(a);
4974        ControlFlow::Continue(())
4975    }
4976
4977    fn vmin16x8_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4978        let mut a = self.state[operands.src1].get_i16x8();
4979        let b = self.state[operands.src2].get_i16x8();
4980        for (a, b) in a.iter_mut().zip(&b) {
4981            *a = (*a).min(*b);
4982        }
4983        self.state[operands.dst].set_i16x8(a);
4984        ControlFlow::Continue(())
4985    }
4986
4987    fn vmin16x8_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4988        let mut a = self.state[operands.src1].get_u16x8();
4989        let b = self.state[operands.src2].get_u16x8();
4990        for (a, b) in a.iter_mut().zip(&b) {
4991            *a = (*a).min(*b);
4992        }
4993        self.state[operands.dst].set_u16x8(a);
4994        ControlFlow::Continue(())
4995    }
4996
4997    fn vmin32x4_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
4998        let mut a = self.state[operands.src1].get_i32x4();
4999        let b = self.state[operands.src2].get_i32x4();
5000        for (a, b) in a.iter_mut().zip(&b) {
5001            *a = (*a).min(*b);
5002        }
5003        self.state[operands.dst].set_i32x4(a);
5004        ControlFlow::Continue(())
5005    }
5006
5007    fn vmin32x4_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5008        let mut a = self.state[operands.src1].get_u32x4();
5009        let b = self.state[operands.src2].get_u32x4();
5010        for (a, b) in a.iter_mut().zip(&b) {
5011            *a = (*a).min(*b);
5012        }
5013        self.state[operands.dst].set_u32x4(a);
5014        ControlFlow::Continue(())
5015    }
5016
5017    fn vmax8x16_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5018        let mut a = self.state[operands.src1].get_i8x16();
5019        let b = self.state[operands.src2].get_i8x16();
5020        for (a, b) in a.iter_mut().zip(&b) {
5021            *a = (*a).max(*b);
5022        }
5023        self.state[operands.dst].set_i8x16(a);
5024        ControlFlow::Continue(())
5025    }
5026
5027    fn vmax8x16_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5028        let mut a = self.state[operands.src1].get_u8x16();
5029        let b = self.state[operands.src2].get_u8x16();
5030        for (a, b) in a.iter_mut().zip(&b) {
5031            *a = (*a).max(*b);
5032        }
5033        self.state[operands.dst].set_u8x16(a);
5034        ControlFlow::Continue(())
5035    }
5036
5037    fn vmax16x8_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5038        let mut a = self.state[operands.src1].get_i16x8();
5039        let b = self.state[operands.src2].get_i16x8();
5040        for (a, b) in a.iter_mut().zip(&b) {
5041            *a = (*a).max(*b);
5042        }
5043        self.state[operands.dst].set_i16x8(a);
5044        ControlFlow::Continue(())
5045    }
5046
5047    fn vmax16x8_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5048        let mut a = self.state[operands.src1].get_u16x8();
5049        let b = self.state[operands.src2].get_u16x8();
5050        for (a, b) in a.iter_mut().zip(&b) {
5051            *a = (*a).max(*b);
5052        }
5053        self.state[operands.dst].set_u16x8(a);
5054        ControlFlow::Continue(())
5055    }
5056
5057    fn vmax32x4_s(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5058        let mut a = self.state[operands.src1].get_i32x4();
5059        let b = self.state[operands.src2].get_i32x4();
5060        for (a, b) in a.iter_mut().zip(&b) {
5061            *a = (*a).max(*b);
5062        }
5063        self.state[operands.dst].set_i32x4(a);
5064        ControlFlow::Continue(())
5065    }
5066
5067    fn vmax32x4_u(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5068        let mut a = self.state[operands.src1].get_u32x4();
5069        let b = self.state[operands.src2].get_u32x4();
5070        for (a, b) in a.iter_mut().zip(&b) {
5071            *a = (*a).max(*b);
5072        }
5073        self.state[operands.dst].set_u32x4(a);
5074        ControlFlow::Continue(())
5075    }
5076
5077    fn vabs8x16(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
5078        let a = self.state[src].get_i8x16();
5079        self.state[dst].set_i8x16(a.map(|i| i.wrapping_abs()));
5080        ControlFlow::Continue(())
5081    }
5082
5083    fn vabs16x8(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
5084        let a = self.state[src].get_i16x8();
5085        self.state[dst].set_i16x8(a.map(|i| i.wrapping_abs()));
5086        ControlFlow::Continue(())
5087    }
5088
5089    fn vabs32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
5090        let a = self.state[src].get_i32x4();
5091        self.state[dst].set_i32x4(a.map(|i| i.wrapping_abs()));
5092        ControlFlow::Continue(())
5093    }
5094
5095    fn vabs64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
5096        let a = self.state[src].get_i64x2();
5097        self.state[dst].set_i64x2(a.map(|i| i.wrapping_abs()));
5098        ControlFlow::Continue(())
5099    }
5100
5101    fn vabsf32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
5102        let a = self.state[src].get_f32x4();
5103        self.state[dst].set_f32x4(a.map(|i| i.wasm_abs()));
5104        ControlFlow::Continue(())
5105    }
5106
5107    fn vabsf64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
5108        let a = self.state[src].get_f64x2();
5109        self.state[dst].set_f64x2(a.map(|i| i.wasm_abs()));
5110        ControlFlow::Continue(())
5111    }
5112
5113    fn vmaximumf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5114        let mut a = self.state[operands.src1].get_f32x4();
5115        let b = self.state[operands.src2].get_f32x4();
5116        for (a, b) in a.iter_mut().zip(&b) {
5117            *a = a.wasm_maximum(*b);
5118        }
5119        self.state[operands.dst].set_f32x4(a);
5120        ControlFlow::Continue(())
5121    }
5122
5123    fn vmaximumf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5124        let mut a = self.state[operands.src1].get_f64x2();
5125        let b = self.state[operands.src2].get_f64x2();
5126        for (a, b) in a.iter_mut().zip(&b) {
5127            *a = a.wasm_maximum(*b);
5128        }
5129        self.state[operands.dst].set_f64x2(a);
5130        ControlFlow::Continue(())
5131    }
5132
5133    fn vminimumf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5134        let mut a = self.state[operands.src1].get_f32x4();
5135        let b = self.state[operands.src2].get_f32x4();
5136        for (a, b) in a.iter_mut().zip(&b) {
5137            *a = a.wasm_minimum(*b);
5138        }
5139        self.state[operands.dst].set_f32x4(a);
5140        ControlFlow::Continue(())
5141    }
5142
5143    fn vminimumf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5144        let mut a = self.state[operands.src1].get_f64x2();
5145        let b = self.state[operands.src2].get_f64x2();
5146        for (a, b) in a.iter_mut().zip(&b) {
5147            *a = a.wasm_minimum(*b);
5148        }
5149        self.state[operands.dst].set_f64x2(a);
5150        ControlFlow::Continue(())
5151    }
5152
5153    fn vshuffle(&mut self, dst: VReg, src1: VReg, src2: VReg, mask: u128) -> ControlFlow<Done> {
5154        let a = self.state[src1].get_u8x16();
5155        let b = self.state[src2].get_u8x16();
5156        let result = mask.to_le_bytes().map(|m| {
5157            if m < 16 {
5158                a[m as usize]
5159            } else {
5160                b[m as usize - 16]
5161            }
5162        });
5163        self.state[dst].set_u8x16(result);
5164        ControlFlow::Continue(())
5165    }
5166
5167    fn vswizzlei8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5168        let src1 = self.state[operands.src1].get_i8x16();
5169        let src2 = self.state[operands.src2].get_i8x16();
5170        let mut dst = [0i8; 16];
5171        for (i, &idx) in src2.iter().enumerate() {
5172            if (idx as usize) < 16 {
5173                dst[i] = src1[idx as usize];
5174            } else {
5175                dst[i] = 0
5176            }
5177        }
5178        self.state[operands.dst].set_i8x16(dst);
5179        ControlFlow::Continue(())
5180    }
5181
5182    fn vavground8x16(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5183        let mut a = self.state[operands.src1].get_u8x16();
5184        let b = self.state[operands.src2].get_u8x16();
5185        for (a, b) in a.iter_mut().zip(&b) {
5186            // use wider precision to avoid overflow
5187            *a = ((u32::from(*a) + u32::from(*b) + 1) / 2) as u8;
5188        }
5189        self.state[operands.dst].set_u8x16(a);
5190        ControlFlow::Continue(())
5191    }
5192
5193    fn vavground16x8(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5194        let mut a = self.state[operands.src1].get_u16x8();
5195        let b = self.state[operands.src2].get_u16x8();
5196        for (a, b) in a.iter_mut().zip(&b) {
5197            // use wider precision to avoid overflow
5198            *a = ((u32::from(*a) + u32::from(*b) + 1) / 2) as u16;
5199        }
5200        self.state[operands.dst].set_u16x8(a);
5201        ControlFlow::Continue(())
5202    }
5203
5204    fn veqf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5205        let a = self.state[operands.src1].get_f32x4();
5206        let b = self.state[operands.src2].get_f32x4();
5207        let mut c = [0; 4];
5208        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5209            *c = if a == b { u32::MAX } else { 0 };
5210        }
5211        self.state[operands.dst].set_u32x4(c);
5212        ControlFlow::Continue(())
5213    }
5214
5215    fn vneqf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5216        let a = self.state[operands.src1].get_f32x4();
5217        let b = self.state[operands.src2].get_f32x4();
5218        let mut c = [0; 4];
5219        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5220            *c = if a != b { u32::MAX } else { 0 };
5221        }
5222        self.state[operands.dst].set_u32x4(c);
5223        ControlFlow::Continue(())
5224    }
5225
5226    fn vltf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5227        let a = self.state[operands.src1].get_f32x4();
5228        let b = self.state[operands.src2].get_f32x4();
5229        let mut c = [0; 4];
5230        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5231            *c = if a < b { u32::MAX } else { 0 };
5232        }
5233        self.state[operands.dst].set_u32x4(c);
5234        ControlFlow::Continue(())
5235    }
5236
5237    fn vlteqf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5238        let a = self.state[operands.src1].get_f32x4();
5239        let b = self.state[operands.src2].get_f32x4();
5240        let mut c = [0; 4];
5241        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5242            *c = if a <= b { u32::MAX } else { 0 };
5243        }
5244        self.state[operands.dst].set_u32x4(c);
5245        ControlFlow::Continue(())
5246    }
5247
5248    fn veqf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5249        let a = self.state[operands.src1].get_f64x2();
5250        let b = self.state[operands.src2].get_f64x2();
5251        let mut c = [0; 2];
5252        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5253            *c = if a == b { u64::MAX } else { 0 };
5254        }
5255        self.state[operands.dst].set_u64x2(c);
5256        ControlFlow::Continue(())
5257    }
5258
5259    fn vneqf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5260        let a = self.state[operands.src1].get_f64x2();
5261        let b = self.state[operands.src2].get_f64x2();
5262        let mut c = [0; 2];
5263        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5264            *c = if a != b { u64::MAX } else { 0 };
5265        }
5266        self.state[operands.dst].set_u64x2(c);
5267        ControlFlow::Continue(())
5268    }
5269
5270    fn vltf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5271        let a = self.state[operands.src1].get_f64x2();
5272        let b = self.state[operands.src2].get_f64x2();
5273        let mut c = [0; 2];
5274        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5275            *c = if a < b { u64::MAX } else { 0 };
5276        }
5277        self.state[operands.dst].set_u64x2(c);
5278        ControlFlow::Continue(())
5279    }
5280
5281    fn vlteqf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
5282        let a = self.state[operands.src1].get_f64x2();
5283        let b = self.state[operands.src2].get_f64x2();
5284        let mut c = [0; 2];
5285        for ((a, b), c) in a.iter().zip(&b).zip(&mut c) {
5286            *c = if a <= b { u64::MAX } else { 0 };
5287        }
5288        self.state[operands.dst].set_u64x2(c);
5289        ControlFlow::Continue(())
5290    }
5291
5292    fn vfma32x4(&mut self, dst: VReg, a: VReg, b: VReg, c: VReg) -> ControlFlow<Done> {
5293        let mut a = self.state[a].get_f32x4();
5294        let b = self.state[b].get_f32x4();
5295        let c = self.state[c].get_f32x4();
5296        for ((a, b), c) in a.iter_mut().zip(b).zip(c) {
5297            *a = a.wasm_mul_add(b, c);
5298        }
5299        self.state[dst].set_f32x4(a);
5300        ControlFlow::Continue(())
5301    }
5302
5303    fn vfma64x2(&mut self, dst: VReg, a: VReg, b: VReg, c: VReg) -> ControlFlow<Done> {
5304        let mut a = self.state[a].get_f64x2();
5305        let b = self.state[b].get_f64x2();
5306        let c = self.state[c].get_f64x2();
5307        for ((a, b), c) in a.iter_mut().zip(b).zip(c) {
5308            *a = a.wasm_mul_add(b, c);
5309        }
5310        self.state[dst].set_f64x2(a);
5311        ControlFlow::Continue(())
5312    }
5313
5314    fn vselect(
5315        &mut self,
5316        dst: VReg,
5317        cond: XReg,
5318        if_nonzero: VReg,
5319        if_zero: VReg,
5320    ) -> ControlFlow<Done> {
5321        let result = if self.state[cond].get_u32() != 0 {
5322            self.state[if_nonzero]
5323        } else {
5324            self.state[if_zero]
5325        };
5326        self.state[dst] = result;
5327        ControlFlow::Continue(())
5328    }
5329
5330    fn xadd128(
5331        &mut self,
5332        dst_lo: XReg,
5333        dst_hi: XReg,
5334        lhs_lo: XReg,
5335        lhs_hi: XReg,
5336        rhs_lo: XReg,
5337        rhs_hi: XReg,
5338    ) -> ControlFlow<Done> {
5339        let lhs = self.get_i128(lhs_lo, lhs_hi);
5340        let rhs = self.get_i128(rhs_lo, rhs_hi);
5341        let result = lhs.wrapping_add(rhs);
5342        self.set_i128(dst_lo, dst_hi, result);
5343        ControlFlow::Continue(())
5344    }
5345
5346    fn xsub128(
5347        &mut self,
5348        dst_lo: XReg,
5349        dst_hi: XReg,
5350        lhs_lo: XReg,
5351        lhs_hi: XReg,
5352        rhs_lo: XReg,
5353        rhs_hi: XReg,
5354    ) -> ControlFlow<Done> {
5355        let lhs = self.get_i128(lhs_lo, lhs_hi);
5356        let rhs = self.get_i128(rhs_lo, rhs_hi);
5357        let result = lhs.wrapping_sub(rhs);
5358        self.set_i128(dst_lo, dst_hi, result);
5359        ControlFlow::Continue(())
5360    }
5361
5362    fn xwidemul64_s(
5363        &mut self,
5364        dst_lo: XReg,
5365        dst_hi: XReg,
5366        lhs: XReg,
5367        rhs: XReg,
5368    ) -> ControlFlow<Done> {
5369        let lhs = self.state[lhs].get_i64();
5370        let rhs = self.state[rhs].get_i64();
5371        let result = i128::from(lhs).wrapping_mul(i128::from(rhs));
5372        self.set_i128(dst_lo, dst_hi, result);
5373        ControlFlow::Continue(())
5374    }
5375
5376    fn xwidemul64_u(
5377        &mut self,
5378        dst_lo: XReg,
5379        dst_hi: XReg,
5380        lhs: XReg,
5381        rhs: XReg,
5382    ) -> ControlFlow<Done> {
5383        let lhs = self.state[lhs].get_u64();
5384        let rhs = self.state[rhs].get_u64();
5385        let result = u128::from(lhs).wrapping_mul(u128::from(rhs));
5386        self.set_i128(dst_lo, dst_hi, result as i128);
5387        ControlFlow::Continue(())
5388    }
5389}