wasmtime/runtime/vm/instance/allocator/pooling/memory_pool.rs
1//! Implements a memory pool using a single allocated memory slab.
2//!
3//! The pooling instance allocator maps one large slab of memory in advance and
4//! allocates WebAssembly memories from this slab--a [`MemoryPool`]. Each
5//! WebAssembly memory is allocated in its own slot (see uses of `index` and
6//! [`SlotId`] in this module):
7//!
8//! ```text
9//! ┌──────┬──────┬──────┬──────┬──────┐
10//! │Slot 0│Slot 1│Slot 2│Slot 3│......│
11//! └──────┴──────┴──────┴──────┴──────┘
12//! ```
13//!
14//! Diving deeper, we note that a [`MemoryPool`] protects Wasmtime from
15//! out-of-bounds memory accesses by inserting inaccessible guard regions
16//! between memory slots. These guard regions are configured to raise a signal
17//! if they are accessed--a WebAssembly out-of-bounds (OOB) memory access. The
18//! [`MemoryPool`] documentation has a more detailed chart but one can think of
19//! memory slots being laid out like the following:
20//!
21//! ```text
22//! ┌─────┬─────┬─────┬─────┬─────┬─────┬─────┬─────┐
23//! │Guard│Mem 0│Guard│Mem 1│Guard│Mem 2│.....│Guard│
24//! └─────┴─────┴─────┴─────┴─────┴─────┴─────┴─────┘
25//! ```
26//!
27//! But we can be more efficient about guard regions: with memory protection
28//! keys (MPK) enabled, the interleaved guard regions can be smaller. If we
29//! surround a memory with memories from other instances and each instance is
30//! protected by different protection keys, the guard region can be smaller AND
31//! the pool will still raise a signal on an OOB access. This complicates how we
32//! lay out memory slots: we must store memories from the same instance in the
33//! same "stripe". Each stripe is protected by a different protection key.
34//!
35//! This concept, dubbed [ColorGuard] in the original paper, relies on careful
36//! calculation of the memory sizes to prevent any "overlapping access" (see
37//! [`calculate`]): there are limited protection keys available (15) so the next
38//! memory using the same key must be at least as far away as the guard region
39//! we would insert otherwise. This ends up looking like the following, where a
40//! store for instance 0 (`I0`) "stripes" two memories (`M0` and `M1`) with the
41//! same protection key 1 and far enough apart to signal an OOB access:
42//!
43//! ```text
44//! ┌─────┬─────┬─────┬─────┬────────────────┬─────┬─────┬─────┐
45//! │.....│I0:M1│.....│.....│.<enough slots>.│I0:M2│.....│.....│
46//! ├─────┼─────┼─────┼─────┼────────────────┼─────┼─────┼─────┤
47//! │.....│key 1│key 2│key 3│..<more keys>...│key 1│key 2│.....│
48//! └─────┴─────┴─────┴─────┴────────────────┴─────┴─────┴─────┘
49//! ```
50//!
51//! [ColorGuard]: https://plas2022.github.io/files/pdf/SegueColorGuard.pdf
52
53use super::{
54 MemoryAllocationIndex,
55 index_allocator::{MemoryInModule, ModuleAffinityIndexAllocator, SlotId},
56};
57use crate::prelude::*;
58use crate::runtime::vm::{
59 CompiledModuleId, InstanceAllocationRequest, InstanceLimits, Memory, MemoryBase,
60 MemoryImageSlot, Mmap, MmapOffset, PoolingInstanceAllocatorConfig, mmap::AlignedLength,
61};
62use crate::{
63 Enabled,
64 runtime::vm::mpk::{self, ProtectionKey, ProtectionMask},
65 vm::HostAlignedByteCount,
66};
67use std::mem;
68use std::sync::atomic::{AtomicUsize, Ordering};
69use std::sync::{Arc, Mutex};
70use wasmtime_environ::{DefinedMemoryIndex, MemoryKind, MemoryTunables, Module, Tunables};
71
72/// A set of allocator slots.
73///
74/// The allocated slots can be split by striping them: e.g., with two stripe
75/// colors 0 and 1, we would allocate all even slots using stripe 0 and all odd
76/// slots using stripe 1.
77///
78/// This is helpful for the use of protection keys: (a) if a request comes to
79/// allocate multiple instances, we can allocate them all from the same stripe
80/// and (b) if a store wants to allocate more from the same stripe it can.
81#[derive(Debug)]
82struct Stripe {
83 allocator: ModuleAffinityIndexAllocator,
84 pkey: Option<ProtectionKey>,
85}
86
87/// Represents a pool of WebAssembly linear memories.
88///
89/// A linear memory is divided into accessible pages and guard pages. A memory
90/// pool contains linear memories: each memory occupies a slot in an
91/// allocated slab (i.e., `mapping`):
92///
93/// ```text
94/// layout.max_memory_bytes layout.slot_bytes
95/// | |
96/// ◄─────┴────► ◄───────────┴──────────►
97/// ┌───────────┬────────────┬───────────┐ ┌───────────┬───────────┬───────────┐
98/// | PROT_NONE | | PROT_NONE | ... | | PROT_NONE | PROT_NONE |
99/// └───────────┴────────────┴───────────┘ └───────────┴───────────┴───────────┘
100/// | |◄──────────────────┬─────────────────────────────────► ◄────┬────►
101/// | | | |
102/// mapping | `layout.num_slots` memories layout.post_slab_guard_size
103/// |
104/// layout.pre_slab_guard_size
105/// ```
106#[derive(Debug)]
107pub struct MemoryPool {
108 mapping: Arc<Mmap<AlignedLength>>,
109 /// This memory pool is stripe-aware. If using memory protection keys, this
110 /// will contain one stripe per available key; otherwise, a single stripe
111 /// with an empty key.
112 stripes: Vec<Stripe>,
113
114 /// If using a copy-on-write allocation scheme, the slot management. We
115 /// dynamically transfer ownership of a slot to a Memory when in use.
116 image_slots: Vec<Mutex<ImageSlot>>,
117
118 /// A description of the various memory sizes used in allocating the
119 /// `mapping` slab.
120 layout: SlabLayout,
121
122 /// The maximum number of memories that a single core module instance may
123 /// use.
124 ///
125 /// NB: this is needed for validation but does not affect the pool's size.
126 memories_per_instance: usize,
127
128 /// How much linear memory, in bytes, to keep resident after resetting for
129 /// use with the next instance. This much memory will be `memset` to zero
130 /// when a linear memory is deallocated.
131 ///
132 /// Memory exceeding this amount in the wasm linear memory will be released
133 /// with `madvise` back to the kernel.
134 ///
135 /// Only applicable on Linux.
136 pub(super) keep_resident: HostAlignedByteCount,
137
138 /// Keep track of protection keys handed out to initialized stores; this
139 /// allows us to round-robin the assignment of stores to stripes.
140 next_available_pkey: AtomicUsize,
141}
142
143/// The state of memory for each slot in this pool.
144#[derive(Debug)]
145enum ImageSlot {
146 /// This slot is guaranteed to be entirely unmapped.
147 ///
148 /// This is the initial state of all slots.
149 Unmapped,
150
151 /// The state of this slot is unknown.
152 ///
153 /// This encompasses a number of situations such as:
154 ///
155 /// * The slot is currently in use.
156 /// * The slot was attempted to be in use, but allocation failed.
157 /// * The slot was used but not deallocated properly.
158 ///
159 /// All of these situations are lumped into this one variant indicating
160 /// that, at a base level, no knowledge is known about this slot. Using a
161 /// slot in this state first requires resetting all memory in this slot by
162 /// mapping anonymous memory on top of the entire slot.
163 Unknown,
164
165 /// This slot was previously used and `MemoryImageSlot` maintains the state
166 /// about what this slot was last configured as.
167 ///
168 /// Future use of this slot will use `MemoryImageSlot` to continue to
169 /// re-instantiate and reuse images and such. This state is entered after
170 /// and allocated slot is successfully deallocated.
171 PreviouslyUsed(MemoryImageSlot),
172}
173
174impl MemoryPool {
175 /// Create a new `MemoryPool`.
176 pub fn new(config: &PoolingInstanceAllocatorConfig, tunables: &Tunables) -> Result<Self> {
177 if u64::try_from(config.limits.max_memory_size).unwrap() > tunables.memory_reservation {
178 bail!(
179 "maximum memory size of {:#x} bytes exceeds the configured \
180 memory reservation of {:#x} bytes",
181 config.limits.max_memory_size,
182 tunables.memory_reservation
183 );
184 }
185 let pkeys = match config.memory_protection_keys {
186 Enabled::Auto => {
187 if mpk::is_supported() {
188 mpk::keys(config.max_memory_protection_keys)
189 } else {
190 &[]
191 }
192 }
193 Enabled::Yes => {
194 if mpk::is_supported() {
195 mpk::keys(config.max_memory_protection_keys)
196 } else {
197 bail!("mpk is disabled on this system")
198 }
199 }
200 Enabled::No => &[],
201 };
202
203 // This is a tricky bit of global state: when creating a memory pool
204 // that uses memory protection keys, we ensure here that any host code
205 // will have access to all keys (i.e., stripes). It's only when we enter
206 // the WebAssembly guest code (see `StoreInner::call_hook`) that we
207 // enforce which keys/stripes can be accessed. Be forewarned about the
208 // assumptions here:
209 // - we expect this "allow all" configuration to reset the default
210 // process state (only allow key 0) _before_ any memories are accessed
211 // - and we expect no other code (e.g., host-side code) to modify this
212 // global MPK configuration
213 if !pkeys.is_empty() {
214 mpk::allow(ProtectionMask::all());
215 }
216
217 // Create a slab layout and allocate it as a completely inaccessible
218 // region to start--`PROT_NONE`.
219 let constraints = SlabConstraints::new(&config.limits, tunables, pkeys.len())?;
220 let layout = calculate(&constraints)?;
221 log::debug!(
222 "creating memory pool: {constraints:?} -> {layout:?} (total: {})",
223 layout.total_slab_bytes()?
224 );
225 let mut mapping =
226 Mmap::accessible_reserved(HostAlignedByteCount::ZERO, layout.total_slab_bytes()?)
227 .context("failed to create memory pool mapping")?;
228
229 // Then, stripe the memory with the available protection keys. This is
230 // unnecessary if there is only one stripe color.
231 if layout.num_stripes >= 2 {
232 let mut cursor = layout.pre_slab_guard_bytes;
233 let pkeys = &pkeys[..layout.num_stripes];
234 for i in 0..constraints.num_slots {
235 let pkey = &pkeys[i % pkeys.len()];
236 let region = unsafe {
237 mapping.slice_mut(
238 cursor.byte_count()..cursor.byte_count() + layout.slot_bytes.byte_count(),
239 )
240 };
241 pkey.protect(region)?;
242 cursor = cursor
243 .checked_add(layout.slot_bytes)
244 .context("cursor + slot_bytes overflows")?;
245 }
246 debug_assert_eq!(
247 cursor
248 .checked_add(layout.post_slab_guard_bytes)
249 .context("cursor + post_slab_guard_bytes overflows")?,
250 layout.total_slab_bytes()?
251 );
252 }
253
254 let image_slots: Vec<_> = std::iter::repeat_with(|| Mutex::new(ImageSlot::Unmapped))
255 .take(constraints.num_slots)
256 .collect();
257
258 let create_stripe = |i| {
259 let num_slots = constraints.num_slots / layout.num_stripes
260 + usize::from(constraints.num_slots % layout.num_stripes > i);
261 let allocator = ModuleAffinityIndexAllocator::new(
262 num_slots.try_into().unwrap(),
263 config.max_unused_warm_slots,
264 );
265 Stripe {
266 allocator,
267 pkey: pkeys.get(i).cloned(),
268 }
269 };
270
271 debug_assert!(layout.num_stripes > 0);
272 let stripes: Vec<_> = (0..layout.num_stripes).map(create_stripe).collect();
273
274 let pool = Self {
275 stripes,
276 mapping: Arc::new(mapping),
277 image_slots,
278 layout,
279 memories_per_instance: usize::try_from(config.limits.max_memories_per_module).unwrap(),
280 keep_resident: HostAlignedByteCount::new_rounded_up(
281 config.linear_memory_keep_resident,
282 )?,
283 next_available_pkey: AtomicUsize::new(0),
284 };
285
286 Ok(pool)
287 }
288
289 /// Return a protection key that stores can use for requesting new
290 pub fn next_available_pkey(&self) -> Option<ProtectionKey> {
291 let index = self.next_available_pkey.fetch_add(1, Ordering::SeqCst) % self.stripes.len();
292 debug_assert!(
293 self.stripes.len() < 2 || self.stripes[index].pkey.is_some(),
294 "if we are using stripes, we cannot have an empty protection key"
295 );
296 self.stripes[index].pkey
297 }
298
299 /// Validate whether this memory pool supports the given module.
300 pub fn validate_memories(&self, module: &Module) -> Result<()> {
301 let memories = module.num_defined_memories();
302 if memories > self.memories_per_instance {
303 bail!(
304 "defined memories count of {} exceeds the per-instance limit of {}",
305 memories,
306 self.memories_per_instance,
307 );
308 }
309
310 for (i, memory) in module.memories.iter().skip(module.num_imported_memories) {
311 self.validate_memory(memory).with_context(|| {
312 format!(
313 "memory index {} is unsupported in this pooling allocator configuration",
314 i.as_u32()
315 )
316 })?;
317 }
318 Ok(())
319 }
320
321 /// Validate one memory for this pool.
322 pub fn validate_memory(&self, memory: &wasmtime_environ::Memory) -> Result<()> {
323 let min = memory.minimum_byte_size().with_context(|| {
324 format!("memory has a minimum byte size that cannot be represented in a u64",)
325 })?;
326 if min > u64::try_from(self.layout.max_memory_bytes.byte_count()).unwrap() {
327 bail!(
328 "memory has a minimum byte size of {} which exceeds the limit of {} bytes",
329 min,
330 self.layout.max_memory_bytes,
331 );
332 }
333 if memory.shared {
334 // FIXME(#4244): since the pooling allocator owns the memory
335 // allocation (which is torn down with the instance), that
336 // can't be used with shared memory where threads or the host
337 // might persist the memory beyond the lifetime of the instance
338 // itself.
339 bail!("memory is shared which is not supported in the pooling allocator");
340 }
341 Ok(())
342 }
343
344 /// Are zero slots in use right now?
345 pub fn is_empty(&self) -> bool {
346 self.stripes.iter().all(|s| s.allocator.is_empty())
347 }
348
349 /// Allocate a single memory for the given instance allocation request.
350 pub async fn allocate(
351 &self,
352 request: &mut InstanceAllocationRequest<'_, '_>,
353 ty: &wasmtime_environ::Memory,
354 memory_index: Option<DefinedMemoryIndex>,
355 ) -> Result<(MemoryAllocationIndex, Memory)> {
356 let tunables = request.store.engine().tunables();
357 let memory_tunables = MemoryTunables::new(tunables, MemoryKind::LinearMemory);
358 let stripe_index = if let Some(pkey) = request.store.get_pkey() {
359 pkey.as_stripe()
360 } else {
361 debug_assert!(self.stripes.len() < 2);
362 0
363 };
364
365 let striped_allocation_index = self.stripes[stripe_index]
366 .allocator
367 .alloc(memory_index.and_then(|mem_idx| {
368 request
369 .runtime_info
370 .unique_id()
371 .map(|id| MemoryInModule(id, mem_idx))
372 }))
373 .map(|slot| StripedAllocationIndex(u32::try_from(slot.index()).unwrap()))
374 .ok_or_else(|| {
375 super::PoolConcurrencyLimitError::new(
376 self.stripes[stripe_index].allocator.len(),
377 format!("memory stripe {stripe_index}"),
378 )
379 })?;
380 let mut guard = DeallocateIndexGuard {
381 pool: self,
382 stripe_index,
383 striped_allocation_index,
384 active: true,
385 };
386
387 let allocation_index =
388 striped_allocation_index.as_unstriped_slot_index(stripe_index, self.stripes.len());
389
390 // Double-check that the runtime requirements of the memory are
391 // satisfied by the configuration of this pooling allocator. This
392 // should be returned as an error through `validate_memory_plans`
393 // but double-check here to be sure.
394 assert!(
395 memory_tunables.reservation() + memory_tunables.guard_size()
396 <= u64::try_from(self.layout.bytes_to_next_stripe_slot().byte_count()).unwrap()
397 );
398
399 let base = self.get_base(allocation_index);
400 let base_capacity = self.layout.max_memory_bytes;
401
402 let mut slot = self.take_memory_image_slot(allocation_index)?;
403 let image = match memory_index {
404 Some(memory_index) => request.runtime_info.memory_image(memory_index)?,
405 None => None,
406 };
407 let initial_size = ty
408 .minimum_byte_size()
409 .expect("min size checked in validation");
410
411 // If instantiation fails, we can propagate the error
412 // upward and drop the slot. This will cause the Drop
413 // handler to attempt to map the range with PROT_NONE
414 // memory, to reserve the space while releasing any
415 // stale mappings. The next use of this slot will then
416 // create a new slot that will try to map over
417 // this, returning errors as well if the mapping
418 // errors persist. The unmap-on-drop is best effort;
419 // if it fails, then we can still soundly continue
420 // using the rest of the pool and allowing the rest of
421 // the process to continue, because we never perform a
422 // mmap that would leave an open space for someone
423 // else to come in and map something.
424 let initial_size = usize::try_from(initial_size).unwrap();
425 slot.instantiate(initial_size, image, ty, &memory_tunables)?;
426
427 let memory = Memory::new_static(
428 ty,
429 &memory_tunables,
430 MemoryBase::Mmap(base),
431 base_capacity.byte_count(),
432 slot,
433 request.limiter.as_deref_mut(),
434 )
435 .await?;
436 guard.active = false;
437 return Ok((allocation_index, memory));
438
439 struct DeallocateIndexGuard<'a> {
440 pool: &'a MemoryPool,
441 stripe_index: usize,
442 striped_allocation_index: StripedAllocationIndex,
443 active: bool,
444 }
445
446 impl Drop for DeallocateIndexGuard<'_> {
447 fn drop(&mut self) {
448 if !self.active {
449 return;
450 }
451 self.pool.stripes[self.stripe_index]
452 .allocator
453 .free(SlotId(self.striped_allocation_index.0), 0);
454 }
455 }
456 }
457
458 /// Deallocate a previously-allocated memory.
459 ///
460 /// If `image` is `None` then the state of this memory's slot is left
461 /// unknown. Otherwise `image` is used to retain information about the state
462 /// of this slot.
463 ///
464 /// # Safety
465 ///
466 /// The memory must have been previously allocated from this pool and
467 /// assigned the given index, must currently be in an allocated state, and
468 /// must never be used again.
469 ///
470 /// The caller must have already called `clear_and_remain_ready` on the
471 /// memory's image and flushed any enqueued decommits for this memory. Note
472 /// that if `image` is `None` then this is not required.
473 pub unsafe fn deallocate(
474 &self,
475 allocation_index: MemoryAllocationIndex,
476 image: Option<MemoryImageSlot>,
477 bytes_resident: usize,
478 ) {
479 self.return_memory_image_slot(allocation_index, image);
480
481 let (stripe_index, striped_allocation_index) =
482 StripedAllocationIndex::from_unstriped_slot_index(allocation_index, self.stripes.len());
483 self.stripes[stripe_index]
484 .allocator
485 .free(SlotId(striped_allocation_index.0), bytes_resident);
486 }
487
488 /// Purging everything related to `module`.
489 pub fn purge_module(&self, module: CompiledModuleId) {
490 // This primarily means clearing out all of its memory images present in
491 // the virtual address space. Go through the index allocator for slots
492 // affine to `module` and reset them, freeing up the index when we're
493 // done.
494 //
495 // Note that this is only called when the specified `module` won't be
496 // allocated further (the module is being dropped) so this shouldn't hit
497 // any sort of infinite loop since this should be the final operation
498 // working with `module`.
499 //
500 // TODO: We are given a module id, but key affinity by pair of module id
501 // and defined memory index. We are missing any defined memory index or
502 // count of how many memories the module defines here. Therefore, we
503 // probe up to the maximum number of memories per instance. This is fine
504 // because that maximum is generally relatively small. If this method
505 // somehow ever gets hot because of unnecessary probing, we should
506 // either pass in the actual number of defined memories for the given
507 // module to this method, or keep a side table of all slots that are
508 // associated with a module (not just module and memory). The latter
509 // would require care to make sure that its maintenance wouldn't be too
510 // expensive for normal allocation/free operations.
511 for (stripe_index, stripe) in self.stripes.iter().enumerate() {
512 for i in 0..self.memories_per_instance {
513 use wasmtime_environ::EntityRef;
514 let memory_index = DefinedMemoryIndex::new(i);
515 while let Some(id) = stripe
516 .allocator
517 .alloc_affine_and_clear_affinity(module, memory_index)
518 {
519 // Attempt to acquire the `MemoryImageSlot` state for this
520 // slot, and then if we have that try to remove the image,
521 // and then if all that succeeds put the slot back in.
522 //
523 // If anything fails then the slot will be in an "unknown"
524 // state which means that on next use it'll be remapped with
525 // anonymous memory.
526 let index = StripedAllocationIndex(id.0)
527 .as_unstriped_slot_index(stripe_index, self.stripes.len());
528 if let Ok(mut slot) = self.take_memory_image_slot(index) {
529 if slot.remove_image().is_ok() {
530 self.return_memory_image_slot(index, Some(slot));
531 }
532 }
533
534 stripe.allocator.free(id, 0);
535 }
536 }
537 }
538 }
539
540 fn get_base(&self, allocation_index: MemoryAllocationIndex) -> MmapOffset {
541 assert!(allocation_index.index() < self.layout.num_slots);
542 let offset = self
543 .layout
544 .slot_bytes
545 .checked_mul(allocation_index.index())
546 .and_then(|c| c.checked_add(self.layout.pre_slab_guard_bytes))
547 .expect("slot_bytes * index + pre_slab_guard_bytes overflows");
548 self.mapping.offset(offset).expect("offset is in bounds")
549 }
550
551 /// Take ownership of the given image slot.
552 ///
553 /// This method is used when a `MemoryAllocationIndex` has been allocated
554 /// and the state of the slot needs to be acquired. This will lazily
555 /// allocate a `MemoryImageSlot` which describes the current (and possibly
556 /// prior) state of the slot.
557 ///
558 /// During deallocation this structure is passed back to
559 /// `return_memory_image_slot`.
560 ///
561 /// Note that this is a fallible method because using a slot might require
562 /// resetting the memory that was previously there. This reset operation
563 /// is a fallible operation that may not succeed. If it fails then this
564 /// slot cannot be used at this time.
565 fn take_memory_image_slot(
566 &self,
567 allocation_index: MemoryAllocationIndex,
568 ) -> Result<MemoryImageSlot> {
569 let (maybe_slot, needs_reset) = {
570 let mut slot = self.image_slots[allocation_index.index()].lock().unwrap();
571 match mem::replace(&mut *slot, ImageSlot::Unknown) {
572 ImageSlot::Unmapped => (None, false),
573 ImageSlot::Unknown => (None, true),
574 ImageSlot::PreviouslyUsed(state) => (Some(state), false),
575 }
576 };
577 let mut slot = maybe_slot.unwrap_or_else(|| {
578 MemoryImageSlot::create(
579 self.get_base(allocation_index),
580 HostAlignedByteCount::ZERO,
581 self.layout.max_memory_bytes.byte_count(),
582 )
583 });
584
585 // For `Unknown` slots it means that `slot` is brand new and isn't
586 // actually tracking the state of the previous slot, so reset it
587 // entirely with anonymous memory to wipe the slate clean and start
588 // from zero. This should only happen if allocation of the previous
589 // slot failed, for example.
590 if needs_reset {
591 slot.reset_with_anon_memory()?;
592 }
593 Ok(slot)
594 }
595
596 /// Return ownership of the given image slot.
597 ///
598 /// If `slot` is not provided then it's reset with `Unknown` meaning a
599 /// future allocation will need to pave over it to use it.
600 fn return_memory_image_slot(
601 &self,
602 allocation_index: MemoryAllocationIndex,
603 slot: Option<MemoryImageSlot>,
604 ) {
605 let prev = mem::replace(
606 &mut *self.image_slots[allocation_index.index()].lock().unwrap(),
607 match slot {
608 Some(slot) => {
609 assert!(!slot.is_dirty());
610 ImageSlot::PreviouslyUsed(slot)
611 }
612 None => ImageSlot::Unknown,
613 },
614 );
615 assert!(matches!(prev, ImageSlot::Unknown));
616 }
617
618 pub fn unused_warm_slots(&self) -> u32 {
619 self.stripes
620 .iter()
621 .map(|i| i.allocator.unused_warm_slots())
622 .sum()
623 }
624
625 pub fn unused_bytes_resident(&self) -> usize {
626 self.stripes
627 .iter()
628 .map(|i| i.allocator.unused_bytes_resident())
629 .sum()
630 }
631}
632
633/// The index of a memory allocation within an `InstanceAllocator`.
634#[derive(Clone, Copy, Debug, Eq, PartialEq, PartialOrd, Ord)]
635pub struct StripedAllocationIndex(u32);
636
637impl StripedAllocationIndex {
638 fn from_unstriped_slot_index(
639 index: MemoryAllocationIndex,
640 num_stripes: usize,
641 ) -> (usize, Self) {
642 let stripe_index = index.index() % num_stripes;
643 let num_stripes: u32 = num_stripes.try_into().unwrap();
644 let index_within_stripe = Self(index.0 / num_stripes);
645 (stripe_index, index_within_stripe)
646 }
647
648 fn as_unstriped_slot_index(self, stripe: usize, num_stripes: usize) -> MemoryAllocationIndex {
649 let num_stripes: u32 = num_stripes.try_into().unwrap();
650 let stripe: u32 = stripe.try_into().unwrap();
651 MemoryAllocationIndex(self.0 * num_stripes + stripe)
652 }
653}
654
655#[derive(Clone, Debug)]
656struct SlabConstraints {
657 /// Essentially, the `static_memory_bound`: this is an assumption that the
658 /// runtime and JIT compiler make about how much space will be guarded
659 /// between slots.
660 expected_slot_bytes: HostAlignedByteCount,
661 /// The maximum size of any memory in the pool. Always a non-zero multiple
662 /// of the page size.
663 max_memory_bytes: HostAlignedByteCount,
664 num_slots: usize,
665 num_pkeys_available: usize,
666 guard_bytes: HostAlignedByteCount,
667 guard_before_slots: bool,
668}
669
670impl SlabConstraints {
671 fn new(
672 limits: &InstanceLimits,
673 tunables: &Tunables,
674 num_pkeys_available: usize,
675 ) -> Result<Self> {
676 // `memory_reservation` is the configured number of bytes for a
677 // static memory slot (see `Config::memory_reservation`); even
678 // if the memory never grows to this size (e.g., it has a lower memory
679 // maximum), codegen will assume that this unused memory is mapped
680 // `PROT_NONE`. Typically `memory_reservation` is 4GiB which helps
681 // elide most bounds checks. `MemoryPool` must respect this bound,
682 // though not explicitly: if we can achieve the same effect via
683 // MPK-protected stripes, the slot size can be lower than the
684 // `memory_reservation`.
685 let expected_slot_bytes =
686 HostAlignedByteCount::new_rounded_up_u64(tunables.memory_reservation)
687 .context("memory reservation is too large")?;
688
689 // Page-align the maximum size of memory since that's the granularity that
690 // permissions are going to be controlled at.
691 let max_memory_bytes = HostAlignedByteCount::new_rounded_up(limits.max_memory_size)
692 .context("maximum size of memory is too large")?;
693
694 let guard_bytes = HostAlignedByteCount::new_rounded_up_u64(tunables.memory_guard_size)
695 .context("guard region is too large")?;
696
697 let num_slots = usize::try_from(limits.total_memories).context("too many memories")?;
698
699 let constraints = SlabConstraints {
700 max_memory_bytes,
701 num_slots,
702 expected_slot_bytes,
703 num_pkeys_available,
704 guard_bytes,
705 guard_before_slots: tunables.guard_before_linear_memory,
706 };
707 Ok(constraints)
708 }
709}
710
711#[derive(Debug)]
712struct SlabLayout {
713 /// The total number of slots available in the memory pool slab.
714 num_slots: usize,
715 /// The size of each slot in the memory pool; this contains the maximum
716 /// memory size (i.e., from WebAssembly or Wasmtime configuration) plus any
717 /// guard region after the memory to catch OOB access. On these guard
718 /// regions, note that:
719 /// - users can configure how aggressively (or not) to elide bounds checks
720 /// via `Config::memory_guard_size` (see also:
721 /// `memory_and_guard_size`)
722 /// - memory protection keys can compress the size of the guard region by
723 /// placing slots from a different key (i.e., a stripe) in the guard
724 /// region; this means the slot itself can be smaller and we can allocate
725 /// more of them.
726 slot_bytes: HostAlignedByteCount,
727 /// The maximum size that can become accessible, in bytes, for each linear
728 /// memory. Guaranteed to be a whole number of Wasm pages.
729 max_memory_bytes: HostAlignedByteCount,
730 /// If necessary, the number of bytes to reserve as a guard region at the
731 /// beginning of the slab.
732 pre_slab_guard_bytes: HostAlignedByteCount,
733 /// Like `pre_slab_guard_bytes`, but at the end of the slab.
734 post_slab_guard_bytes: HostAlignedByteCount,
735 /// The number of stripes needed in the slab layout.
736 num_stripes: usize,
737}
738
739impl SlabLayout {
740 /// Return the total size of the slab, using the final layout (where `n =
741 /// num_slots`):
742 ///
743 /// ```text
744 /// ┌────────────────────┬──────┬──────┬───┬──────┬─────────────────────┐
745 /// │pre_slab_guard_bytes│slot 1│slot 2│...│slot n│post_slab_guard_bytes│
746 /// └────────────────────┴──────┴──────┴───┴──────┴─────────────────────┘
747 /// ```
748 fn total_slab_bytes(&self) -> Result<HostAlignedByteCount> {
749 self.slot_bytes
750 .checked_mul(self.num_slots)
751 .and_then(|c| c.checked_add(self.pre_slab_guard_bytes))
752 .and_then(|c| c.checked_add(self.post_slab_guard_bytes))
753 .context("total size of memory reservation exceeds addressable memory")
754 }
755
756 /// Returns the number of Wasm bytes from the beginning of one slot to the
757 /// next slot in the same stripe--this is the striped equivalent of
758 /// `static_memory_bound`. Recall that between slots of the same stripe we
759 /// will see a slot from every other stripe.
760 ///
761 /// For example, in a 3-stripe pool, this function measures the distance
762 /// from the beginning of slot 1 to slot 4, which are of the same stripe:
763 ///
764 /// ```text
765 /// ◄────────────────────►
766 /// ┌────────┬──────┬──────┬────────┬───┐
767 /// │*slot 1*│slot 2│slot 3│*slot 4*│...|
768 /// └────────┴──────┴──────┴────────┴───┘
769 /// ```
770 fn bytes_to_next_stripe_slot(&self) -> HostAlignedByteCount {
771 self.slot_bytes
772 .checked_mul(self.num_stripes)
773 .expect("constructor checks that self.slot_bytes * self.num_stripes is in bounds")
774 }
775}
776
777fn calculate(constraints: &SlabConstraints) -> Result<SlabLayout> {
778 let SlabConstraints {
779 max_memory_bytes,
780 num_slots,
781 expected_slot_bytes,
782 num_pkeys_available,
783 guard_bytes,
784 guard_before_slots,
785 } = *constraints;
786
787 // If the user specifies a guard region, we always need to allocate a
788 // `PROT_NONE` region for it before any memory slots. Recall that we can
789 // avoid bounds checks for loads and stores with immediates up to
790 // `guard_bytes`, but we rely on Wasmtime to emit bounds checks for any
791 // accesses greater than this.
792 let pre_slab_guard_bytes = if guard_before_slots {
793 guard_bytes
794 } else {
795 HostAlignedByteCount::ZERO
796 };
797
798 // To calculate the slot size, we start with the default configured size and
799 // attempt to chip away at this via MPK protection. Note here how we begin
800 // to define a slot as "all of the memory and guard region."
801 let faulting_region_bytes = expected_slot_bytes
802 .max(max_memory_bytes)
803 .checked_add(guard_bytes)
804 .context("faulting region is too large")?;
805
806 let (num_stripes, slot_bytes) = if guard_bytes == 0 || max_memory_bytes == 0 || num_slots == 0 {
807 // In the uncommon case where the memory/guard regions are empty or we don't need any slots , we
808 // will not need any stripes: we just lay out the slots back-to-back
809 // using a single stripe.
810 (1, faulting_region_bytes.byte_count())
811 } else if num_pkeys_available < 2 {
812 // If we do not have enough protection keys to stripe the memory, we do
813 // the same. We can't elide any of the guard bytes because we aren't
814 // overlapping guard regions with other stripes...
815 (1, faulting_region_bytes.byte_count())
816 } else {
817 // ...but if we can create at least two stripes, we can use another
818 // stripe (i.e., a different pkey) as this slot's guard region--this
819 // reduces the guard bytes each slot has to allocate. We must make
820 // sure, though, that if the size of that other stripe(s) does not
821 // fully cover `guard_bytes`, we keep those around to prevent OOB
822 // access.
823
824 // We first calculate the number of stripes we need: we want to
825 // minimize this so that there is less chance of a single store
826 // running out of slots with its stripe--we need at least two,
827 // though. But this is not just an optimization; we need to handle
828 // the case when there are fewer slots than stripes. E.g., if our
829 // pool is configured with only three slots (`num_memory_slots =
830 // 3`), we will run into failures if we attempt to set up more than
831 // three stripes.
832 let needed_num_stripes = faulting_region_bytes
833 .checked_div(max_memory_bytes)
834 .expect("if condition above implies max_memory_bytes is non-zero")
835 + usize::from(
836 faulting_region_bytes
837 .checked_rem(max_memory_bytes)
838 .expect("if condition above implies max_memory_bytes is non-zero")
839 != 0,
840 );
841 assert!(needed_num_stripes > 0);
842 let num_stripes = num_pkeys_available.min(needed_num_stripes).min(num_slots);
843
844 // Next, we try to reduce the slot size by "overlapping" the stripes: we
845 // can make slot `n` smaller since we know that slot `n+1` and following
846 // are in different stripes and will look just like `PROT_NONE` memory.
847 // Recall that codegen expects a guarantee that at least
848 // `faulting_region_bytes` will catch OOB accesses via segfaults.
849 let needed_slot_bytes = faulting_region_bytes
850 .byte_count()
851 .checked_div(num_stripes)
852 .unwrap_or(faulting_region_bytes.byte_count())
853 .max(max_memory_bytes.byte_count());
854 assert!(needed_slot_bytes >= max_memory_bytes.byte_count());
855
856 (num_stripes, needed_slot_bytes)
857 };
858
859 // The page-aligned slot size; equivalent to `memory_and_guard_size`.
860 let slot_bytes =
861 HostAlignedByteCount::new_rounded_up(slot_bytes).context("slot size is too large")?;
862
863 // We may need another guard region (like `pre_slab_guard_bytes`) at the end
864 // of our slab to maintain our `faulting_region_bytes` guarantee. We could
865 // be conservative and just create it as large as `faulting_region_bytes`,
866 // but because we know that the last slot's `slot_bytes` make up the first
867 // part of that region, we reduce the final guard region by that much.
868 let post_slab_guard_bytes = faulting_region_bytes.saturating_sub(slot_bytes);
869
870 // Check that we haven't exceeded the slab we can calculate given the limits
871 // of `usize`.
872 let layout = SlabLayout {
873 num_slots,
874 slot_bytes,
875 max_memory_bytes,
876 pre_slab_guard_bytes,
877 post_slab_guard_bytes,
878 num_stripes,
879 };
880 match layout.total_slab_bytes() {
881 Ok(_) => Ok(layout),
882 Err(e) => Err(e),
883 }
884}
885
886#[cfg(test)]
887mod tests {
888 use super::*;
889 use proptest::prelude::*;
890
891 const WASM_PAGE_SIZE: u32 = wasmtime_environ::Memory::DEFAULT_PAGE_SIZE;
892
893 #[cfg(target_pointer_width = "64")]
894 #[test]
895 fn test_memory_pool() -> Result<()> {
896 let pool = MemoryPool::new(
897 &PoolingInstanceAllocatorConfig {
898 limits: InstanceLimits {
899 total_memories: 5,
900 max_tables_per_module: 0,
901 max_memories_per_module: 3,
902 table_elements: 0,
903 max_memory_size: WASM_PAGE_SIZE as usize,
904 ..Default::default()
905 },
906 ..Default::default()
907 },
908 &Tunables {
909 memory_reservation: WASM_PAGE_SIZE as u64,
910 memory_guard_size: 0,
911 ..Tunables::default_host()
912 },
913 )?;
914
915 assert_eq!(pool.layout.slot_bytes, WASM_PAGE_SIZE as usize);
916 assert_eq!(pool.layout.num_slots, 5);
917 assert_eq!(pool.layout.max_memory_bytes, WASM_PAGE_SIZE as usize);
918
919 let base = pool.mapping.as_ptr() as usize;
920
921 for i in 0..5 {
922 let index = MemoryAllocationIndex(i);
923 let ptr = pool.get_base(index).as_mut_ptr();
924 assert_eq!(
925 ptr as usize - base,
926 i as usize * pool.layout.slot_bytes.byte_count()
927 );
928 }
929
930 Ok(())
931 }
932
933 #[test]
934 #[cfg_attr(miri, ignore)]
935 fn test_pooling_allocator_striping() {
936 if !mpk::is_supported() {
937 println!("skipping `test_pooling_allocator_striping` test; mpk is not supported");
938 return;
939 }
940
941 // Force the use of MPK.
942 let config = PoolingInstanceAllocatorConfig {
943 memory_protection_keys: Enabled::Yes,
944 ..PoolingInstanceAllocatorConfig::default()
945 };
946 let pool = MemoryPool::new(&config, &Tunables::default_host()).unwrap();
947 assert!(pool.stripes.len() >= 2);
948
949 let max_memory_slots = config.limits.total_memories;
950 dbg!(pool.stripes[0].allocator.num_empty_slots());
951 dbg!(pool.stripes[1].allocator.num_empty_slots());
952 let available_memory_slots: usize = pool
953 .stripes
954 .iter()
955 .map(|s| s.allocator.num_empty_slots())
956 .sum();
957 assert_eq!(
958 max_memory_slots,
959 u32::try_from(available_memory_slots).unwrap()
960 );
961 }
962
963 #[test]
964 fn check_known_layout_calculations() {
965 for num_pkeys_available in 0..16 {
966 for num_memory_slots in [0, 1, 10, 64] {
967 for expected_slot_bytes in [0, 1 << 30 /* 1GB */, 4 << 30 /* 4GB */] {
968 let expected_slot_bytes =
969 HostAlignedByteCount::new(expected_slot_bytes).unwrap();
970 for max_memory_bytes in
971 [0, 1 * WASM_PAGE_SIZE as usize, 10 * WASM_PAGE_SIZE as usize]
972 {
973 // Note new rather than new_rounded_up here -- for now,
974 // WASM_PAGE_SIZE is 64KiB, which is a multiple of the
975 // host page size on all platforms.
976 let max_memory_bytes = HostAlignedByteCount::new(max_memory_bytes).unwrap();
977 for guard_bytes in [0, 2 << 30 /* 2GB */] {
978 let guard_bytes = HostAlignedByteCount::new(guard_bytes).unwrap();
979 for guard_before_slots in [true, false] {
980 let constraints = SlabConstraints {
981 max_memory_bytes,
982 num_slots: num_memory_slots,
983 expected_slot_bytes,
984 num_pkeys_available,
985 guard_bytes,
986 guard_before_slots,
987 };
988 match calculate(&constraints) {
989 Ok(layout) => {
990 assert_slab_layout_invariants(constraints, layout)
991 }
992 Err(e) => {
993 // Only allow failure on 32-bit
994 // platforms where the calculation
995 // exceeded the size of the address
996 // space
997 assert!(
998 cfg!(target_pointer_width = "32")
999 && e.to_string()
1000 .contains("exceeds addressable memory"),
1001 "bad error: {e:?}"
1002 );
1003 }
1004 }
1005 }
1006 }
1007 }
1008 }
1009 }
1010 }
1011 }
1012
1013 proptest! {
1014 #[test]
1015 #[cfg_attr(miri, ignore)]
1016 fn check_random_layout_calculations(c in constraints()) {
1017 if let Ok(l) = calculate(&c) {
1018 assert_slab_layout_invariants(c, l);
1019 }
1020 }
1021 }
1022
1023 fn constraints() -> impl Strategy<Value = SlabConstraints> {
1024 (
1025 any::<HostAlignedByteCount>(),
1026 any::<usize>(),
1027 any::<HostAlignedByteCount>(),
1028 any::<usize>(),
1029 any::<HostAlignedByteCount>(),
1030 any::<bool>(),
1031 )
1032 .prop_map(
1033 |(
1034 max_memory_bytes,
1035 num_memory_slots,
1036 expected_slot_bytes,
1037 num_pkeys_available,
1038 guard_bytes,
1039 guard_before_slots,
1040 )| {
1041 SlabConstraints {
1042 max_memory_bytes,
1043 num_slots: num_memory_slots,
1044 expected_slot_bytes,
1045 num_pkeys_available,
1046 guard_bytes,
1047 guard_before_slots,
1048 }
1049 },
1050 )
1051 }
1052
1053 fn assert_slab_layout_invariants(c: SlabConstraints, s: SlabLayout) {
1054 // Check that all the sizes add up.
1055 assert_eq!(
1056 s.total_slab_bytes().unwrap(),
1057 s.pre_slab_guard_bytes
1058 .checked_add(s.slot_bytes.checked_mul(c.num_slots).unwrap())
1059 .and_then(|c| c.checked_add(s.post_slab_guard_bytes))
1060 .unwrap(),
1061 "the slab size does not add up: {c:?} => {s:?}"
1062 );
1063 assert!(
1064 s.slot_bytes >= s.max_memory_bytes,
1065 "slot is not big enough: {c:?} => {s:?}"
1066 );
1067
1068 // The HostAlignedByteCount newtype wrapper ensures that the various
1069 // byte values are page-aligned.
1070
1071 // Check that we use no more or less stripes than needed.
1072 assert!(s.num_stripes >= 1, "not enough stripes: {c:?} => {s:?}");
1073 if c.num_pkeys_available == 0 || c.num_slots == 0 {
1074 assert_eq!(
1075 s.num_stripes, 1,
1076 "expected at least one stripe: {c:?} => {s:?}"
1077 );
1078 } else {
1079 assert!(
1080 s.num_stripes <= c.num_pkeys_available,
1081 "layout has more stripes than available pkeys: {c:?} => {s:?}"
1082 );
1083 assert!(
1084 s.num_stripes <= c.num_slots,
1085 "layout has more stripes than memory slots: {c:?} => {s:?}"
1086 );
1087 }
1088
1089 // Check that we use the minimum number of stripes/protection keys.
1090 // - if the next MPK-protected slot is bigger or the same as the
1091 // required guard region, we only need two stripes
1092 // - if the next slot is smaller than the guard region, we only need
1093 // enough stripes to add up to at least that guard region size.
1094 if c.num_pkeys_available > 1 && !c.max_memory_bytes.is_zero() {
1095 assert!(
1096 s.num_stripes <= (c.guard_bytes.checked_div(c.max_memory_bytes).unwrap() + 2),
1097 "calculated more stripes than needed: {c:?} => {s:?}"
1098 );
1099 }
1100
1101 // Check that the memory-striping will not allow OOB access.
1102 // - we may have reduced the slot size from `expected_slot_bytes` to
1103 // `slot_bytes` assuming MPK striping; we check that our guaranteed
1104 // "faulting region" is respected
1105 // - the last slot won't have MPK striping after it; we check that the
1106 // `post_slab_guard_bytes` accounts for this
1107 assert!(
1108 s.bytes_to_next_stripe_slot()
1109 >= c.expected_slot_bytes
1110 .max(c.max_memory_bytes)
1111 .checked_add(c.guard_bytes)
1112 .unwrap(),
1113 "faulting region not large enough: {c:?} => {s:?}"
1114 );
1115 assert!(
1116 s.slot_bytes.checked_add(s.post_slab_guard_bytes).unwrap() >= c.expected_slot_bytes,
1117 "last slot may allow OOB access: {c:?} => {s:?}"
1118 );
1119 }
1120}