wasmtime/runtime/vm/instance/allocator/pooling/
memory_pool.rs

1//! Implements a memory pool using a single allocated memory slab.
2//!
3//! The pooling instance allocator maps one large slab of memory in advance and
4//! allocates WebAssembly memories from this slab--a [`MemoryPool`]. Each
5//! WebAssembly memory is allocated in its own slot (see uses of `index` and
6//! [`SlotId`] in this module):
7//!
8//! ```text
9//! ┌──────┬──────┬──────┬──────┬──────┐
10//! │Slot 0│Slot 1│Slot 2│Slot 3│......│
11//! └──────┴──────┴──────┴──────┴──────┘
12//! ```
13//!
14//! Diving deeper, we note that a [`MemoryPool`] protects Wasmtime from
15//! out-of-bounds memory accesses by inserting inaccessible guard regions
16//! between memory slots. These guard regions are configured to raise a signal
17//! if they are accessed--a WebAssembly out-of-bounds (OOB) memory access. The
18//! [`MemoryPool`] documentation has a more detailed chart but one can think of
19//! memory slots being laid out like the following:
20//!
21//! ```text
22//! ┌─────┬─────┬─────┬─────┬─────┬─────┬─────┬─────┐
23//! │Guard│Mem 0│Guard│Mem 1│Guard│Mem 2│.....│Guard│
24//! └─────┴─────┴─────┴─────┴─────┴─────┴─────┴─────┘
25//! ```
26//!
27//! But we can be more efficient about guard regions: with memory protection
28//! keys (MPK) enabled, the interleaved guard regions can be smaller. If we
29//! surround a memory with memories from other instances and each instance is
30//! protected by different protection keys, the guard region can be smaller AND
31//! the pool will still raise a signal on an OOB access. This complicates how we
32//! lay out memory slots: we must store memories from the same instance in the
33//! same "stripe". Each stripe is protected by a different protection key.
34//!
35//! This concept, dubbed [ColorGuard] in the original paper, relies on careful
36//! calculation of the memory sizes to prevent any "overlapping access" (see
37//! [`calculate`]): there are limited protection keys available (15) so the next
38//! memory using the same key must be at least as far away as the guard region
39//! we would insert otherwise. This ends up looking like the following, where a
40//! store for instance 0 (`I0`) "stripes" two memories (`M0` and `M1`) with the
41//! same protection key 1 and far enough apart to signal an OOB access:
42//!
43//! ```text
44//! ┌─────┬─────┬─────┬─────┬────────────────┬─────┬─────┬─────┐
45//! │.....│I0:M1│.....│.....│.<enough slots>.│I0:M2│.....│.....│
46//! ├─────┼─────┼─────┼─────┼────────────────┼─────┼─────┼─────┤
47//! │.....│key 1│key 2│key 3│..<more keys>...│key 1│key 2│.....│
48//! └─────┴─────┴─────┴─────┴────────────────┴─────┴─────┴─────┘
49//! ```
50//!
51//! [ColorGuard]: https://plas2022.github.io/files/pdf/SegueColorGuard.pdf
52
53use super::{
54    MemoryAllocationIndex,
55    index_allocator::{MemoryInModule, ModuleAffinityIndexAllocator, SlotId},
56};
57use crate::prelude::*;
58use crate::runtime::vm::{
59    CompiledModuleId, InstanceAllocationRequest, InstanceLimits, Memory, MemoryBase,
60    MemoryImageSlot, Mmap, MmapOffset, PoolingInstanceAllocatorConfig, mmap::AlignedLength,
61};
62use crate::{
63    Enabled,
64    runtime::vm::mpk::{self, ProtectionKey, ProtectionMask},
65    vm::HostAlignedByteCount,
66};
67use std::mem;
68use std::sync::atomic::{AtomicUsize, Ordering};
69use std::sync::{Arc, Mutex};
70use wasmtime_environ::{DefinedMemoryIndex, Module, Tunables};
71
72/// A set of allocator slots.
73///
74/// The allocated slots can be split by striping them: e.g., with two stripe
75/// colors 0 and 1, we would allocate all even slots using stripe 0 and all odd
76/// slots using stripe 1.
77///
78/// This is helpful for the use of protection keys: (a) if a request comes to
79/// allocate multiple instances, we can allocate them all from the same stripe
80/// and (b) if a store wants to allocate more from the same stripe it can.
81#[derive(Debug)]
82struct Stripe {
83    allocator: ModuleAffinityIndexAllocator,
84    pkey: Option<ProtectionKey>,
85}
86
87/// Represents a pool of WebAssembly linear memories.
88///
89/// A linear memory is divided into accessible pages and guard pages. A memory
90/// pool contains linear memories: each memory occupies a slot in an
91/// allocated slab (i.e., `mapping`):
92///
93/// ```text
94///          layout.max_memory_bytes                 layout.slot_bytes
95///                    |                                   |
96///              ◄─────┴────►                  ◄───────────┴──────────►
97/// ┌───────────┬────────────┬───────────┐     ┌───────────┬───────────┬───────────┐
98/// | PROT_NONE |            | PROT_NONE | ... |           | PROT_NONE | PROT_NONE |
99/// └───────────┴────────────┴───────────┘     └───────────┴───────────┴───────────┘
100/// |           |◄──────────────────┬─────────────────────────────────► ◄────┬────►
101/// |           |                   |                                        |
102/// mapping     |            `layout.num_slots` memories         layout.post_slab_guard_size
103///             |
104///   layout.pre_slab_guard_size
105/// ```
106#[derive(Debug)]
107pub struct MemoryPool {
108    mapping: Arc<Mmap<AlignedLength>>,
109    /// This memory pool is stripe-aware. If using  memory protection keys, this
110    /// will contain one stripe per available key; otherwise, a single stripe
111    /// with an empty key.
112    stripes: Vec<Stripe>,
113
114    /// If using a copy-on-write allocation scheme, the slot management. We
115    /// dynamically transfer ownership of a slot to a Memory when in use.
116    image_slots: Vec<Mutex<ImageSlot>>,
117
118    /// A description of the various memory sizes used in allocating the
119    /// `mapping` slab.
120    layout: SlabLayout,
121
122    /// The maximum number of memories that a single core module instance may
123    /// use.
124    ///
125    /// NB: this is needed for validation but does not affect the pool's size.
126    memories_per_instance: usize,
127
128    /// How much linear memory, in bytes, to keep resident after resetting for
129    /// use with the next instance. This much memory will be `memset` to zero
130    /// when a linear memory is deallocated.
131    ///
132    /// Memory exceeding this amount in the wasm linear memory will be released
133    /// with `madvise` back to the kernel.
134    ///
135    /// Only applicable on Linux.
136    pub(super) keep_resident: HostAlignedByteCount,
137
138    /// Keep track of protection keys handed out to initialized stores; this
139    /// allows us to round-robin the assignment of stores to stripes.
140    next_available_pkey: AtomicUsize,
141}
142
143/// The state of memory for each slot in this pool.
144#[derive(Debug)]
145enum ImageSlot {
146    /// This slot is guaranteed to be entirely unmapped.
147    ///
148    /// This is the initial state of all slots.
149    Unmapped,
150
151    /// The state of this slot is unknown.
152    ///
153    /// This encompasses a number of situations such as:
154    ///
155    /// * The slot is currently in use.
156    /// * The slot was attempted to be in use, but allocation failed.
157    /// * The slot was used but not deallocated properly.
158    ///
159    /// All of these situations are lumped into this one variant indicating
160    /// that, at a base level, no knowledge is known about this slot. Using a
161    /// slot in this state first requires resetting all memory in this slot by
162    /// mapping anonymous memory on top of the entire slot.
163    Unknown,
164
165    /// This slot was previously used and `MemoryImageSlot` maintains the state
166    /// about what this slot was last configured as.
167    ///
168    /// Future use of this slot will use `MemoryImageSlot` to continue to
169    /// re-instantiate and reuse images and such. This state is entered after
170    /// and allocated slot is successfully deallcoated.
171    PreviouslyUsed(MemoryImageSlot),
172}
173
174impl MemoryPool {
175    /// Create a new `MemoryPool`.
176    pub fn new(config: &PoolingInstanceAllocatorConfig, tunables: &Tunables) -> Result<Self> {
177        if u64::try_from(config.limits.max_memory_size).unwrap() > tunables.memory_reservation {
178            bail!(
179                "maximum memory size of {:#x} bytes exceeds the configured \
180                 memory reservation of {:#x} bytes",
181                config.limits.max_memory_size,
182                tunables.memory_reservation
183            );
184        }
185        let pkeys = match config.memory_protection_keys {
186            Enabled::Auto => {
187                if mpk::is_supported() {
188                    mpk::keys(config.max_memory_protection_keys)
189                } else {
190                    &[]
191                }
192            }
193            Enabled::Yes => {
194                if mpk::is_supported() {
195                    mpk::keys(config.max_memory_protection_keys)
196                } else {
197                    bail!("mpk is disabled on this system")
198                }
199            }
200            Enabled::No => &[],
201        };
202
203        // This is a tricky bit of global state: when creating a memory pool
204        // that uses memory protection keys, we ensure here that any host code
205        // will have access to all keys (i.e., stripes). It's only when we enter
206        // the WebAssembly guest code (see `StoreInner::call_hook`) that we
207        // enforce which keys/stripes can be accessed. Be forewarned about the
208        // assumptions here:
209        // - we expect this "allow all" configuration to reset the default
210        //   process state (only allow key 0) _before_ any memories are accessed
211        // - and we expect no other code (e.g., host-side code) to modify this
212        //   global MPK configuration
213        if !pkeys.is_empty() {
214            mpk::allow(ProtectionMask::all());
215        }
216
217        // Create a slab layout and allocate it as a completely inaccessible
218        // region to start--`PROT_NONE`.
219        let constraints = SlabConstraints::new(&config.limits, tunables, pkeys.len())?;
220        let layout = calculate(&constraints)?;
221        log::debug!(
222            "creating memory pool: {constraints:?} -> {layout:?} (total: {})",
223            layout.total_slab_bytes()?
224        );
225        let mut mapping =
226            Mmap::accessible_reserved(HostAlignedByteCount::ZERO, layout.total_slab_bytes()?)
227                .context("failed to create memory pool mapping")?;
228
229        // Then, stripe the memory with the available protection keys. This is
230        // unnecessary if there is only one stripe color.
231        if layout.num_stripes >= 2 {
232            let mut cursor = layout.pre_slab_guard_bytes;
233            let pkeys = &pkeys[..layout.num_stripes];
234            for i in 0..constraints.num_slots {
235                let pkey = &pkeys[i % pkeys.len()];
236                let region = unsafe {
237                    mapping.slice_mut(
238                        cursor.byte_count()..cursor.byte_count() + layout.slot_bytes.byte_count(),
239                    )
240                };
241                pkey.protect(region)?;
242                cursor = cursor
243                    .checked_add(layout.slot_bytes)
244                    .context("cursor + slot_bytes overflows")?;
245            }
246            debug_assert_eq!(
247                cursor
248                    .checked_add(layout.post_slab_guard_bytes)
249                    .context("cursor + post_slab_guard_bytes overflows")?,
250                layout.total_slab_bytes()?
251            );
252        }
253
254        let image_slots: Vec<_> = std::iter::repeat_with(|| Mutex::new(ImageSlot::Unmapped))
255            .take(constraints.num_slots)
256            .collect();
257
258        let create_stripe = |i| {
259            let num_slots = constraints.num_slots / layout.num_stripes
260                + usize::from(constraints.num_slots % layout.num_stripes > i);
261            let allocator = ModuleAffinityIndexAllocator::new(
262                num_slots.try_into().unwrap(),
263                config.max_unused_warm_slots,
264            );
265            Stripe {
266                allocator,
267                pkey: pkeys.get(i).cloned(),
268            }
269        };
270
271        debug_assert!(layout.num_stripes > 0);
272        let stripes: Vec<_> = (0..layout.num_stripes).map(create_stripe).collect();
273
274        let pool = Self {
275            stripes,
276            mapping: Arc::new(mapping),
277            image_slots,
278            layout,
279            memories_per_instance: usize::try_from(config.limits.max_memories_per_module).unwrap(),
280            keep_resident: HostAlignedByteCount::new_rounded_up(
281                config.linear_memory_keep_resident,
282            )?,
283            next_available_pkey: AtomicUsize::new(0),
284        };
285
286        Ok(pool)
287    }
288
289    /// Return a protection key that stores can use for requesting new
290    pub fn next_available_pkey(&self) -> Option<ProtectionKey> {
291        let index = self.next_available_pkey.fetch_add(1, Ordering::SeqCst) % self.stripes.len();
292        debug_assert!(
293            self.stripes.len() < 2 || self.stripes[index].pkey.is_some(),
294            "if we are using stripes, we cannot have an empty protection key"
295        );
296        self.stripes[index].pkey
297    }
298
299    /// Validate whether this memory pool supports the given module.
300    pub fn validate_memories(&self, module: &Module) -> Result<()> {
301        let memories = module.num_defined_memories();
302        if memories > self.memories_per_instance {
303            bail!(
304                "defined memories count of {} exceeds the per-instance limit of {}",
305                memories,
306                self.memories_per_instance,
307            );
308        }
309
310        for (i, memory) in module.memories.iter().skip(module.num_imported_memories) {
311            self.validate_memory(memory).with_context(|| {
312                format!(
313                    "memory index {} is unsupported in this pooling allocator configuration",
314                    i.as_u32()
315                )
316            })?;
317        }
318        Ok(())
319    }
320
321    /// Validate one memory for this pool.
322    pub fn validate_memory(&self, memory: &wasmtime_environ::Memory) -> Result<()> {
323        let min = memory.minimum_byte_size().with_context(|| {
324            format!("memory has a minimum byte size that cannot be represented in a u64",)
325        })?;
326        if min > u64::try_from(self.layout.max_memory_bytes.byte_count()).unwrap() {
327            bail!(
328                "memory has a minimum byte size of {} which exceeds the limit of {} bytes",
329                min,
330                self.layout.max_memory_bytes,
331            );
332        }
333        if memory.shared {
334            // FIXME(#4244): since the pooling allocator owns the memory
335            // allocation (which is torn down with the instance), that
336            // can't be used with shared memory where threads or the host
337            // might persist the memory beyond the lifetime of the instance
338            // itself.
339            bail!("memory is shared which is not supported in the pooling allocator");
340        }
341        Ok(())
342    }
343
344    /// Are zero slots in use right now?
345    pub fn is_empty(&self) -> bool {
346        self.stripes.iter().all(|s| s.allocator.is_empty())
347    }
348
349    /// Allocate a single memory for the given instance allocation request.
350    pub async fn allocate(
351        &self,
352        request: &mut InstanceAllocationRequest<'_, '_>,
353        ty: &wasmtime_environ::Memory,
354        memory_index: Option<DefinedMemoryIndex>,
355    ) -> Result<(MemoryAllocationIndex, Memory)> {
356        let tunables = request.store.engine().tunables();
357        let stripe_index = if let Some(pkey) = request.store.get_pkey() {
358            pkey.as_stripe()
359        } else {
360            debug_assert!(self.stripes.len() < 2);
361            0
362        };
363
364        let striped_allocation_index = self.stripes[stripe_index]
365            .allocator
366            .alloc(memory_index.and_then(|mem_idx| {
367                request
368                    .runtime_info
369                    .unique_id()
370                    .map(|id| MemoryInModule(id, mem_idx))
371            }))
372            .map(|slot| StripedAllocationIndex(u32::try_from(slot.index()).unwrap()))
373            .ok_or_else(|| {
374                super::PoolConcurrencyLimitError::new(
375                    self.stripes[stripe_index].allocator.len(),
376                    format!("memory stripe {stripe_index}"),
377                )
378            })?;
379        let mut guard = DeallocateIndexGuard {
380            pool: self,
381            stripe_index,
382            striped_allocation_index,
383            active: true,
384        };
385
386        let allocation_index =
387            striped_allocation_index.as_unstriped_slot_index(stripe_index, self.stripes.len());
388
389        // Double-check that the runtime requirements of the memory are
390        // satisfied by the configuration of this pooling allocator. This
391        // should be returned as an error through `validate_memory_plans`
392        // but double-check here to be sure.
393        assert!(
394            tunables.memory_reservation + tunables.memory_guard_size
395                <= u64::try_from(self.layout.bytes_to_next_stripe_slot().byte_count()).unwrap()
396        );
397
398        let base = self.get_base(allocation_index);
399        let base_capacity = self.layout.max_memory_bytes;
400
401        let mut slot = self.take_memory_image_slot(allocation_index)?;
402        let image = match memory_index {
403            Some(memory_index) => request.runtime_info.memory_image(memory_index)?,
404            None => None,
405        };
406        let initial_size = ty
407            .minimum_byte_size()
408            .expect("min size checked in validation");
409
410        // If instantiation fails, we can propagate the error
411        // upward and drop the slot. This will cause the Drop
412        // handler to attempt to map the range with PROT_NONE
413        // memory, to reserve the space while releasing any
414        // stale mappings. The next use of this slot will then
415        // create a new slot that will try to map over
416        // this, returning errors as well if the mapping
417        // errors persist. The unmap-on-drop is best effort;
418        // if it fails, then we can still soundly continue
419        // using the rest of the pool and allowing the rest of
420        // the process to continue, because we never perform a
421        // mmap that would leave an open space for someone
422        // else to come in and map something.
423        let initial_size = usize::try_from(initial_size).unwrap();
424        slot.instantiate(initial_size, image, ty, tunables)?;
425
426        let memory = Memory::new_static(
427            ty,
428            tunables,
429            MemoryBase::Mmap(base),
430            base_capacity.byte_count(),
431            slot,
432            request.limiter.as_deref_mut(),
433        )
434        .await?;
435        guard.active = false;
436        return Ok((allocation_index, memory));
437
438        struct DeallocateIndexGuard<'a> {
439            pool: &'a MemoryPool,
440            stripe_index: usize,
441            striped_allocation_index: StripedAllocationIndex,
442            active: bool,
443        }
444
445        impl Drop for DeallocateIndexGuard<'_> {
446            fn drop(&mut self) {
447                if !self.active {
448                    return;
449                }
450                self.pool.stripes[self.stripe_index]
451                    .allocator
452                    .free(SlotId(self.striped_allocation_index.0), 0);
453            }
454        }
455    }
456
457    /// Deallocate a previously-allocated memory.
458    ///
459    /// # Safety
460    ///
461    /// The memory must have been previously allocated from this pool and
462    /// assigned the given index, must currently be in an allocated state, and
463    /// must never be used again.
464    ///
465    /// The caller must have already called `clear_and_remain_ready` on the
466    /// memory's image and flushed any enqueued decommits for this memory.
467    pub unsafe fn deallocate(
468        &self,
469        allocation_index: MemoryAllocationIndex,
470        image: MemoryImageSlot,
471        bytes_resident: usize,
472    ) {
473        self.return_memory_image_slot(allocation_index, image);
474
475        let (stripe_index, striped_allocation_index) =
476            StripedAllocationIndex::from_unstriped_slot_index(allocation_index, self.stripes.len());
477        self.stripes[stripe_index]
478            .allocator
479            .free(SlotId(striped_allocation_index.0), bytes_resident);
480    }
481
482    /// Purging everything related to `module`.
483    pub fn purge_module(&self, module: CompiledModuleId) {
484        // This primarily means clearing out all of its memory images present in
485        // the virtual address space. Go through the index allocator for slots
486        // affine to `module` and reset them, freeing up the index when we're
487        // done.
488        //
489        // Note that this is only called when the specified `module` won't be
490        // allocated further (the module is being dropped) so this shouldn't hit
491        // any sort of infinite loop since this should be the final operation
492        // working with `module`.
493        //
494        // TODO: We are given a module id, but key affinity by pair of module id
495        // and defined memory index. We are missing any defined memory index or
496        // count of how many memories the module defines here. Therefore, we
497        // probe up to the maximum number of memories per instance. This is fine
498        // because that maximum is generally relatively small. If this method
499        // somehow ever gets hot because of unnecessary probing, we should
500        // either pass in the actual number of defined memories for the given
501        // module to this method, or keep a side table of all slots that are
502        // associated with a module (not just module and memory). The latter
503        // would require care to make sure that its maintenance wouldn't be too
504        // expensive for normal allocation/free operations.
505        for stripe in &self.stripes {
506            for i in 0..self.memories_per_instance {
507                use wasmtime_environ::EntityRef;
508                let memory_index = DefinedMemoryIndex::new(i);
509                while let Some(id) = stripe
510                    .allocator
511                    .alloc_affine_and_clear_affinity(module, memory_index)
512                {
513                    // Attempt to acquire the `MemoryImageSlot` state for this
514                    // slot, and then if we have that try to remove the image,
515                    // and then if all that succeeds put the slot back in.
516                    //
517                    // If anything fails then the slot will be in an "unknown"
518                    // state which means that on next use it'll be remapped with
519                    // anonymous memory.
520                    let index = MemoryAllocationIndex(id.0);
521                    if let Ok(mut slot) = self.take_memory_image_slot(index) {
522                        if slot.remove_image().is_ok() {
523                            self.return_memory_image_slot(index, slot);
524                        }
525                    }
526
527                    stripe.allocator.free(id, 0);
528                }
529            }
530        }
531    }
532
533    fn get_base(&self, allocation_index: MemoryAllocationIndex) -> MmapOffset {
534        assert!(allocation_index.index() < self.layout.num_slots);
535        let offset = self
536            .layout
537            .slot_bytes
538            .checked_mul(allocation_index.index())
539            .and_then(|c| c.checked_add(self.layout.pre_slab_guard_bytes))
540            .expect("slot_bytes * index + pre_slab_guard_bytes overflows");
541        self.mapping.offset(offset).expect("offset is in bounds")
542    }
543
544    /// Take ownership of the given image slot.
545    ///
546    /// This method is used when a `MemoryAllocationIndex` has been allocated
547    /// and the state of the slot needs to be acquired. This will lazily
548    /// allocate a `MemoryImageSlot` which describes the current (and possibly
549    /// prior) state of the slot.
550    ///
551    /// During deallocation this structure is passed back to
552    /// `return_memory_image_slot`.
553    ///
554    /// Note that this is a fallible method because using a slot might require
555    /// resetting the memory that was previously there. This reset operation
556    /// is a fallible operation that may not succeed. If it fails then this
557    /// slot cannot be used at this time.
558    fn take_memory_image_slot(
559        &self,
560        allocation_index: MemoryAllocationIndex,
561    ) -> Result<MemoryImageSlot> {
562        let (maybe_slot, needs_reset) = {
563            let mut slot = self.image_slots[allocation_index.index()].lock().unwrap();
564            match mem::replace(&mut *slot, ImageSlot::Unknown) {
565                ImageSlot::Unmapped => (None, false),
566                ImageSlot::Unknown => (None, true),
567                ImageSlot::PreviouslyUsed(state) => (Some(state), false),
568            }
569        };
570        let mut slot = maybe_slot.unwrap_or_else(|| {
571            MemoryImageSlot::create(
572                self.get_base(allocation_index),
573                HostAlignedByteCount::ZERO,
574                self.layout.max_memory_bytes.byte_count(),
575            )
576        });
577
578        // For `Unknown` slots it means that `slot` is brand new and isn't
579        // actually tracking the state of the previous slot, so reset it
580        // entirely with anonymous memory to wipe the slate clean and start
581        // from zero. This should only happen if allocation of the previous
582        // slot failed, for example.
583        if needs_reset {
584            slot.reset_with_anon_memory()?;
585        }
586        Ok(slot)
587    }
588
589    /// Return ownership of the given image slot.
590    fn return_memory_image_slot(
591        &self,
592        allocation_index: MemoryAllocationIndex,
593        slot: MemoryImageSlot,
594    ) {
595        assert!(!slot.is_dirty());
596
597        let prev = mem::replace(
598            &mut *self.image_slots[allocation_index.index()].lock().unwrap(),
599            ImageSlot::PreviouslyUsed(slot),
600        );
601        assert!(matches!(prev, ImageSlot::Unknown));
602    }
603
604    pub fn unused_warm_slots(&self) -> u32 {
605        self.stripes
606            .iter()
607            .map(|i| i.allocator.unused_warm_slots())
608            .sum()
609    }
610
611    pub fn unused_bytes_resident(&self) -> usize {
612        self.stripes
613            .iter()
614            .map(|i| i.allocator.unused_bytes_resident())
615            .sum()
616    }
617}
618
619/// The index of a memory allocation within an `InstanceAllocator`.
620#[derive(Clone, Copy, Debug, Eq, PartialEq, PartialOrd, Ord)]
621pub struct StripedAllocationIndex(u32);
622
623impl StripedAllocationIndex {
624    fn from_unstriped_slot_index(
625        index: MemoryAllocationIndex,
626        num_stripes: usize,
627    ) -> (usize, Self) {
628        let stripe_index = index.index() % num_stripes;
629        let num_stripes: u32 = num_stripes.try_into().unwrap();
630        let index_within_stripe = Self(index.0 / num_stripes);
631        (stripe_index, index_within_stripe)
632    }
633
634    fn as_unstriped_slot_index(self, stripe: usize, num_stripes: usize) -> MemoryAllocationIndex {
635        let num_stripes: u32 = num_stripes.try_into().unwrap();
636        let stripe: u32 = stripe.try_into().unwrap();
637        MemoryAllocationIndex(self.0 * num_stripes + stripe)
638    }
639}
640
641#[derive(Clone, Debug)]
642struct SlabConstraints {
643    /// Essentially, the `static_memory_bound`: this is an assumption that the
644    /// runtime and JIT compiler make about how much space will be guarded
645    /// between slots.
646    expected_slot_bytes: HostAlignedByteCount,
647    /// The maximum size of any memory in the pool. Always a non-zero multiple
648    /// of the page size.
649    max_memory_bytes: HostAlignedByteCount,
650    num_slots: usize,
651    num_pkeys_available: usize,
652    guard_bytes: HostAlignedByteCount,
653    guard_before_slots: bool,
654}
655
656impl SlabConstraints {
657    fn new(
658        limits: &InstanceLimits,
659        tunables: &Tunables,
660        num_pkeys_available: usize,
661    ) -> Result<Self> {
662        // `memory_reservation` is the configured number of bytes for a
663        // static memory slot (see `Config::memory_reservation`); even
664        // if the memory never grows to this size (e.g., it has a lower memory
665        // maximum), codegen will assume that this unused memory is mapped
666        // `PROT_NONE`. Typically `memory_reservation` is 4GiB which helps
667        // elide most bounds checks. `MemoryPool` must respect this bound,
668        // though not explicitly: if we can achieve the same effect via
669        // MPK-protected stripes, the slot size can be lower than the
670        // `memory_reservation`.
671        let expected_slot_bytes =
672            HostAlignedByteCount::new_rounded_up_u64(tunables.memory_reservation)
673                .context("memory reservation is too large")?;
674
675        // Page-align the maximum size of memory since that's the granularity that
676        // permissions are going to be controlled at.
677        let max_memory_bytes = HostAlignedByteCount::new_rounded_up(limits.max_memory_size)
678            .context("maximum size of memory is too large")?;
679
680        let guard_bytes = HostAlignedByteCount::new_rounded_up_u64(tunables.memory_guard_size)
681            .context("guard region is too large")?;
682
683        let num_slots = limits
684            .total_memories
685            .try_into()
686            .context("too many memories")?;
687
688        let constraints = SlabConstraints {
689            max_memory_bytes,
690            num_slots,
691            expected_slot_bytes,
692            num_pkeys_available,
693            guard_bytes,
694            guard_before_slots: tunables.guard_before_linear_memory,
695        };
696        Ok(constraints)
697    }
698}
699
700#[derive(Debug)]
701struct SlabLayout {
702    /// The total number of slots available in the memory pool slab.
703    num_slots: usize,
704    /// The size of each slot in the memory pool; this contains the maximum
705    /// memory size (i.e., from WebAssembly or Wasmtime configuration) plus any
706    /// guard region after the memory to catch OOB access. On these guard
707    /// regions, note that:
708    /// - users can configure how aggressively (or not) to elide bounds checks
709    ///   via `Config::memory_guard_size` (see also:
710    ///   `memory_and_guard_size`)
711    /// - memory protection keys can compress the size of the guard region by
712    ///   placing slots from a different key (i.e., a stripe) in the guard
713    ///   region; this means the slot itself can be smaller and we can allocate
714    ///   more of them.
715    slot_bytes: HostAlignedByteCount,
716    /// The maximum size that can become accessible, in bytes, for each linear
717    /// memory. Guaranteed to be a whole number of Wasm pages.
718    max_memory_bytes: HostAlignedByteCount,
719    /// If necessary, the number of bytes to reserve as a guard region at the
720    /// beginning of the slab.
721    pre_slab_guard_bytes: HostAlignedByteCount,
722    /// Like `pre_slab_guard_bytes`, but at the end of the slab.
723    post_slab_guard_bytes: HostAlignedByteCount,
724    /// The number of stripes needed in the slab layout.
725    num_stripes: usize,
726}
727
728impl SlabLayout {
729    /// Return the total size of the slab, using the final layout (where `n =
730    /// num_slots`):
731    ///
732    /// ```text
733    /// ┌────────────────────┬──────┬──────┬───┬──────┬─────────────────────┐
734    /// │pre_slab_guard_bytes│slot 1│slot 2│...│slot n│post_slab_guard_bytes│
735    /// └────────────────────┴──────┴──────┴───┴──────┴─────────────────────┘
736    /// ```
737    fn total_slab_bytes(&self) -> Result<HostAlignedByteCount> {
738        self.slot_bytes
739            .checked_mul(self.num_slots)
740            .and_then(|c| c.checked_add(self.pre_slab_guard_bytes))
741            .and_then(|c| c.checked_add(self.post_slab_guard_bytes))
742            .context("total size of memory reservation exceeds addressable memory")
743    }
744
745    /// Returns the number of Wasm bytes from the beginning of one slot to the
746    /// next slot in the same stripe--this is the striped equivalent of
747    /// `static_memory_bound`. Recall that between slots of the same stripe we
748    /// will see a slot from every other stripe.
749    ///
750    /// For example, in a 3-stripe pool, this function measures the distance
751    /// from the beginning of slot 1 to slot 4, which are of the same stripe:
752    ///
753    /// ```text
754    ///  ◄────────────────────►
755    /// ┌────────┬──────┬──────┬────────┬───┐
756    /// │*slot 1*│slot 2│slot 3│*slot 4*│...|
757    /// └────────┴──────┴──────┴────────┴───┘
758    /// ```
759    fn bytes_to_next_stripe_slot(&self) -> HostAlignedByteCount {
760        self.slot_bytes
761            .checked_mul(self.num_stripes)
762            .expect("constructor checks that self.slot_bytes * self.num_stripes is in bounds")
763    }
764}
765
766fn calculate(constraints: &SlabConstraints) -> Result<SlabLayout> {
767    let SlabConstraints {
768        max_memory_bytes,
769        num_slots,
770        expected_slot_bytes,
771        num_pkeys_available,
772        guard_bytes,
773        guard_before_slots,
774    } = *constraints;
775
776    // If the user specifies a guard region, we always need to allocate a
777    // `PROT_NONE` region for it before any memory slots. Recall that we can
778    // avoid bounds checks for loads and stores with immediates up to
779    // `guard_bytes`, but we rely on Wasmtime to emit bounds checks for any
780    // accesses greater than this.
781    let pre_slab_guard_bytes = if guard_before_slots {
782        guard_bytes
783    } else {
784        HostAlignedByteCount::ZERO
785    };
786
787    // To calculate the slot size, we start with the default configured size and
788    // attempt to chip away at this via MPK protection. Note here how we begin
789    // to define a slot as "all of the memory and guard region."
790    let faulting_region_bytes = expected_slot_bytes
791        .max(max_memory_bytes)
792        .checked_add(guard_bytes)
793        .context("faulting region is too large")?;
794
795    let (num_stripes, slot_bytes) = if guard_bytes == 0 || max_memory_bytes == 0 || num_slots == 0 {
796        // In the uncommon case where the memory/guard regions are empty or we don't need any slots , we
797        // will not need any stripes: we just lay out the slots back-to-back
798        // using a single stripe.
799        (1, faulting_region_bytes.byte_count())
800    } else if num_pkeys_available < 2 {
801        // If we do not have enough protection keys to stripe the memory, we do
802        // the same. We can't elide any of the guard bytes because we aren't
803        // overlapping guard regions with other stripes...
804        (1, faulting_region_bytes.byte_count())
805    } else {
806        // ...but if we can create at least two stripes, we can use another
807        // stripe (i.e., a different pkey) as this slot's guard region--this
808        // reduces the guard bytes each slot has to allocate. We must make
809        // sure, though, that if the size of that other stripe(s) does not
810        // fully cover `guard_bytes`, we keep those around to prevent OOB
811        // access.
812
813        // We first calculate the number of stripes we need: we want to
814        // minimize this so that there is less chance of a single store
815        // running out of slots with its stripe--we need at least two,
816        // though. But this is not just an optimization; we need to handle
817        // the case when there are fewer slots than stripes. E.g., if our
818        // pool is configured with only three slots (`num_memory_slots =
819        // 3`), we will run into failures if we attempt to set up more than
820        // three stripes.
821        let needed_num_stripes = faulting_region_bytes
822            .checked_div(max_memory_bytes)
823            .expect("if condition above implies max_memory_bytes is non-zero")
824            + usize::from(
825                faulting_region_bytes
826                    .checked_rem(max_memory_bytes)
827                    .expect("if condition above implies max_memory_bytes is non-zero")
828                    != 0,
829            );
830        assert!(needed_num_stripes > 0);
831        let num_stripes = num_pkeys_available.min(needed_num_stripes).min(num_slots);
832
833        // Next, we try to reduce the slot size by "overlapping" the stripes: we
834        // can make slot `n` smaller since we know that slot `n+1` and following
835        // are in different stripes and will look just like `PROT_NONE` memory.
836        // Recall that codegen expects a guarantee that at least
837        // `faulting_region_bytes` will catch OOB accesses via segfaults.
838        let needed_slot_bytes = faulting_region_bytes
839            .byte_count()
840            .checked_div(num_stripes)
841            .unwrap_or(faulting_region_bytes.byte_count())
842            .max(max_memory_bytes.byte_count());
843        assert!(needed_slot_bytes >= max_memory_bytes.byte_count());
844
845        (num_stripes, needed_slot_bytes)
846    };
847
848    // The page-aligned slot size; equivalent to `memory_and_guard_size`.
849    let slot_bytes =
850        HostAlignedByteCount::new_rounded_up(slot_bytes).context("slot size is too large")?;
851
852    // We may need another guard region (like `pre_slab_guard_bytes`) at the end
853    // of our slab to maintain our `faulting_region_bytes` guarantee. We could
854    // be conservative and just create it as large as `faulting_region_bytes`,
855    // but because we know that the last slot's `slot_bytes` make up the first
856    // part of that region, we reduce the final guard region by that much.
857    let post_slab_guard_bytes = faulting_region_bytes.saturating_sub(slot_bytes);
858
859    // Check that we haven't exceeded the slab we can calculate given the limits
860    // of `usize`.
861    let layout = SlabLayout {
862        num_slots,
863        slot_bytes,
864        max_memory_bytes,
865        pre_slab_guard_bytes,
866        post_slab_guard_bytes,
867        num_stripes,
868    };
869    match layout.total_slab_bytes() {
870        Ok(_) => Ok(layout),
871        Err(e) => Err(e),
872    }
873}
874
875#[cfg(test)]
876mod tests {
877    use super::*;
878    use proptest::prelude::*;
879
880    const WASM_PAGE_SIZE: u32 = wasmtime_environ::Memory::DEFAULT_PAGE_SIZE;
881
882    #[cfg(target_pointer_width = "64")]
883    #[test]
884    fn test_memory_pool() -> Result<()> {
885        let pool = MemoryPool::new(
886            &PoolingInstanceAllocatorConfig {
887                limits: InstanceLimits {
888                    total_memories: 5,
889                    max_tables_per_module: 0,
890                    max_memories_per_module: 3,
891                    table_elements: 0,
892                    max_memory_size: WASM_PAGE_SIZE as usize,
893                    ..Default::default()
894                },
895                ..Default::default()
896            },
897            &Tunables {
898                memory_reservation: WASM_PAGE_SIZE as u64,
899                memory_guard_size: 0,
900                ..Tunables::default_host()
901            },
902        )?;
903
904        assert_eq!(pool.layout.slot_bytes, WASM_PAGE_SIZE as usize);
905        assert_eq!(pool.layout.num_slots, 5);
906        assert_eq!(pool.layout.max_memory_bytes, WASM_PAGE_SIZE as usize);
907
908        let base = pool.mapping.as_ptr() as usize;
909
910        for i in 0..5 {
911            let index = MemoryAllocationIndex(i);
912            let ptr = pool.get_base(index).as_mut_ptr();
913            assert_eq!(
914                ptr as usize - base,
915                i as usize * pool.layout.slot_bytes.byte_count()
916            );
917        }
918
919        Ok(())
920    }
921
922    #[test]
923    #[cfg_attr(miri, ignore)]
924    fn test_pooling_allocator_striping() {
925        if !mpk::is_supported() {
926            println!("skipping `test_pooling_allocator_striping` test; mpk is not supported");
927            return;
928        }
929
930        // Force the use of MPK.
931        let config = PoolingInstanceAllocatorConfig {
932            memory_protection_keys: Enabled::Yes,
933            ..PoolingInstanceAllocatorConfig::default()
934        };
935        let pool = MemoryPool::new(&config, &Tunables::default_host()).unwrap();
936        assert!(pool.stripes.len() >= 2);
937
938        let max_memory_slots = config.limits.total_memories;
939        dbg!(pool.stripes[0].allocator.num_empty_slots());
940        dbg!(pool.stripes[1].allocator.num_empty_slots());
941        let available_memory_slots: usize = pool
942            .stripes
943            .iter()
944            .map(|s| s.allocator.num_empty_slots())
945            .sum();
946        assert_eq!(
947            max_memory_slots,
948            u32::try_from(available_memory_slots).unwrap()
949        );
950    }
951
952    #[test]
953    fn check_known_layout_calculations() {
954        for num_pkeys_available in 0..16 {
955            for num_memory_slots in [0, 1, 10, 64] {
956                for expected_slot_bytes in [0, 1 << 30 /* 1GB */, 4 << 30 /* 4GB */] {
957                    let expected_slot_bytes =
958                        HostAlignedByteCount::new(expected_slot_bytes).unwrap();
959                    for max_memory_bytes in
960                        [0, 1 * WASM_PAGE_SIZE as usize, 10 * WASM_PAGE_SIZE as usize]
961                    {
962                        // Note new rather than new_rounded_up here -- for now,
963                        // WASM_PAGE_SIZE is 64KiB, which is a multiple of the
964                        // host page size on all platforms.
965                        let max_memory_bytes = HostAlignedByteCount::new(max_memory_bytes).unwrap();
966                        for guard_bytes in [0, 2 << 30 /* 2GB */] {
967                            let guard_bytes = HostAlignedByteCount::new(guard_bytes).unwrap();
968                            for guard_before_slots in [true, false] {
969                                let constraints = SlabConstraints {
970                                    max_memory_bytes,
971                                    num_slots: num_memory_slots,
972                                    expected_slot_bytes,
973                                    num_pkeys_available,
974                                    guard_bytes,
975                                    guard_before_slots,
976                                };
977                                match calculate(&constraints) {
978                                    Ok(layout) => {
979                                        assert_slab_layout_invariants(constraints, layout)
980                                    }
981                                    Err(e) => {
982                                        // Only allow failure on 32-bit
983                                        // platforms where the calculation
984                                        // exceeded the size of the address
985                                        // space
986                                        assert!(
987                                            cfg!(target_pointer_width = "32")
988                                                && e.to_string()
989                                                    .contains("exceeds addressable memory"),
990                                            "bad error: {e:?}"
991                                        );
992                                    }
993                                }
994                            }
995                        }
996                    }
997                }
998            }
999        }
1000    }
1001
1002    proptest! {
1003        #[test]
1004        #[cfg_attr(miri, ignore)]
1005        fn check_random_layout_calculations(c in constraints()) {
1006            if let Ok(l) = calculate(&c) {
1007                assert_slab_layout_invariants(c, l);
1008            }
1009        }
1010    }
1011
1012    fn constraints() -> impl Strategy<Value = SlabConstraints> {
1013        (
1014            any::<HostAlignedByteCount>(),
1015            any::<usize>(),
1016            any::<HostAlignedByteCount>(),
1017            any::<usize>(),
1018            any::<HostAlignedByteCount>(),
1019            any::<bool>(),
1020        )
1021            .prop_map(
1022                |(
1023                    max_memory_bytes,
1024                    num_memory_slots,
1025                    expected_slot_bytes,
1026                    num_pkeys_available,
1027                    guard_bytes,
1028                    guard_before_slots,
1029                )| {
1030                    SlabConstraints {
1031                        max_memory_bytes,
1032                        num_slots: num_memory_slots,
1033                        expected_slot_bytes,
1034                        num_pkeys_available,
1035                        guard_bytes,
1036                        guard_before_slots,
1037                    }
1038                },
1039            )
1040    }
1041
1042    fn assert_slab_layout_invariants(c: SlabConstraints, s: SlabLayout) {
1043        // Check that all the sizes add up.
1044        assert_eq!(
1045            s.total_slab_bytes().unwrap(),
1046            s.pre_slab_guard_bytes
1047                .checked_add(s.slot_bytes.checked_mul(c.num_slots).unwrap())
1048                .and_then(|c| c.checked_add(s.post_slab_guard_bytes))
1049                .unwrap(),
1050            "the slab size does not add up: {c:?} => {s:?}"
1051        );
1052        assert!(
1053            s.slot_bytes >= s.max_memory_bytes,
1054            "slot is not big enough: {c:?} => {s:?}"
1055        );
1056
1057        // The HostAlignedByteCount newtype wrapper ensures that the various
1058        // byte values are page-aligned.
1059
1060        // Check that we use no more or less stripes than needed.
1061        assert!(s.num_stripes >= 1, "not enough stripes: {c:?} => {s:?}");
1062        if c.num_pkeys_available == 0 || c.num_slots == 0 {
1063            assert_eq!(
1064                s.num_stripes, 1,
1065                "expected at least one stripe: {c:?} => {s:?}"
1066            );
1067        } else {
1068            assert!(
1069                s.num_stripes <= c.num_pkeys_available,
1070                "layout has more stripes than available pkeys: {c:?} => {s:?}"
1071            );
1072            assert!(
1073                s.num_stripes <= c.num_slots,
1074                "layout has more stripes than memory slots: {c:?} => {s:?}"
1075            );
1076        }
1077
1078        // Check that we use the minimum number of stripes/protection keys.
1079        // - if the next MPK-protected slot is bigger or the same as the
1080        //   required guard region, we only need two stripes
1081        // - if the next slot is smaller than the guard region, we only need
1082        //   enough stripes to add up to at least that guard region size.
1083        if c.num_pkeys_available > 1 && !c.max_memory_bytes.is_zero() {
1084            assert!(
1085                s.num_stripes <= (c.guard_bytes.checked_div(c.max_memory_bytes).unwrap() + 2),
1086                "calculated more stripes than needed: {c:?} => {s:?}"
1087            );
1088        }
1089
1090        // Check that the memory-striping will not allow OOB access.
1091        // - we may have reduced the slot size from `expected_slot_bytes` to
1092        //   `slot_bytes` assuming MPK striping; we check that our guaranteed
1093        //   "faulting region" is respected
1094        // - the last slot won't have MPK striping after it; we check that the
1095        //   `post_slab_guard_bytes` accounts for this
1096        assert!(
1097            s.bytes_to_next_stripe_slot()
1098                >= c.expected_slot_bytes
1099                    .max(c.max_memory_bytes)
1100                    .checked_add(c.guard_bytes)
1101                    .unwrap(),
1102            "faulting region not large enough: {c:?} => {s:?}"
1103        );
1104        assert!(
1105            s.slot_bytes.checked_add(s.post_slab_guard_bytes).unwrap() >= c.expected_slot_bytes,
1106            "last slot may allow OOB access: {c:?} => {s:?}"
1107        );
1108    }
1109}