wasmtime/runtime/vm/instance/allocator/pooling/memory_pool.rs
1//! Implements a memory pool using a single allocated memory slab.
2//!
3//! The pooling instance allocator maps one large slab of memory in advance and
4//! allocates WebAssembly memories from this slab--a [`MemoryPool`]. Each
5//! WebAssembly memory is allocated in its own slot (see uses of `index` and
6//! [`SlotId`] in this module):
7//!
8//! ```text
9//! ┌──────┬──────┬──────┬──────┬──────┐
10//! │Slot 0│Slot 1│Slot 2│Slot 3│......│
11//! └──────┴──────┴──────┴──────┴──────┘
12//! ```
13//!
14//! Diving deeper, we note that a [`MemoryPool`] protects Wasmtime from
15//! out-of-bounds memory accesses by inserting inaccessible guard regions
16//! between memory slots. These guard regions are configured to raise a signal
17//! if they are accessed--a WebAssembly out-of-bounds (OOB) memory access. The
18//! [`MemoryPool`] documentation has a more detailed chart but one can think of
19//! memory slots being laid out like the following:
20//!
21//! ```text
22//! ┌─────┬─────┬─────┬─────┬─────┬─────┬─────┬─────┐
23//! │Guard│Mem 0│Guard│Mem 1│Guard│Mem 2│.....│Guard│
24//! └─────┴─────┴─────┴─────┴─────┴─────┴─────┴─────┘
25//! ```
26//!
27//! But we can be more efficient about guard regions: with memory protection
28//! keys (MPK) enabled, the interleaved guard regions can be smaller. If we
29//! surround a memory with memories from other instances and each instance is
30//! protected by different protection keys, the guard region can be smaller AND
31//! the pool will still raise a signal on an OOB access. This complicates how we
32//! lay out memory slots: we must store memories from the same instance in the
33//! same "stripe". Each stripe is protected by a different protection key.
34//!
35//! This concept, dubbed [ColorGuard] in the original paper, relies on careful
36//! calculation of the memory sizes to prevent any "overlapping access" (see
37//! [`calculate`]): there are limited protection keys available (15) so the next
38//! memory using the same key must be at least as far away as the guard region
39//! we would insert otherwise. This ends up looking like the following, where a
40//! store for instance 0 (`I0`) "stripes" two memories (`M0` and `M1`) with the
41//! same protection key 1 and far enough apart to signal an OOB access:
42//!
43//! ```text
44//! ┌─────┬─────┬─────┬─────┬────────────────┬─────┬─────┬─────┐
45//! │.....│I0:M1│.....│.....│.<enough slots>.│I0:M2│.....│.....│
46//! ├─────┼─────┼─────┼─────┼────────────────┼─────┼─────┼─────┤
47//! │.....│key 1│key 2│key 3│..<more keys>...│key 1│key 2│.....│
48//! └─────┴─────┴─────┴─────┴────────────────┴─────┴─────┴─────┘
49//! ```
50//!
51//! [ColorGuard]: https://plas2022.github.io/files/pdf/SegueColorGuard.pdf
52
53use super::{
54 MemoryAllocationIndex,
55 index_allocator::{MemoryInModule, ModuleAffinityIndexAllocator, SlotId},
56};
57use crate::prelude::*;
58use crate::runtime::vm::{
59 CompiledModuleId, InstanceAllocationRequest, InstanceLimits, Memory, MemoryBase,
60 MemoryImageSlot, Mmap, MmapOffset, PoolingInstanceAllocatorConfig, mmap::AlignedLength,
61};
62use crate::{
63 MpkEnabled,
64 runtime::vm::mpk::{self, ProtectionKey, ProtectionMask},
65 vm::HostAlignedByteCount,
66};
67use std::sync::atomic::{AtomicUsize, Ordering};
68use std::sync::{Arc, Mutex};
69use wasmtime_environ::{DefinedMemoryIndex, Module, Tunables};
70
71/// A set of allocator slots.
72///
73/// The allocated slots can be split by striping them: e.g., with two stripe
74/// colors 0 and 1, we would allocate all even slots using stripe 0 and all odd
75/// slots using stripe 1.
76///
77/// This is helpful for the use of protection keys: (a) if a request comes to
78/// allocate multiple instances, we can allocate them all from the same stripe
79/// and (b) if a store wants to allocate more from the same stripe it can.
80#[derive(Debug)]
81struct Stripe {
82 allocator: ModuleAffinityIndexAllocator,
83 pkey: Option<ProtectionKey>,
84}
85
86/// Represents a pool of WebAssembly linear memories.
87///
88/// A linear memory is divided into accessible pages and guard pages. A memory
89/// pool contains linear memories: each memory occupies a slot in an
90/// allocated slab (i.e., `mapping`):
91///
92/// ```text
93/// layout.max_memory_bytes layout.slot_bytes
94/// | |
95/// ◄─────┴────► ◄───────────┴──────────►
96/// ┌───────────┬────────────┬───────────┐ ┌───────────┬───────────┬───────────┐
97/// | PROT_NONE | | PROT_NONE | ... | | PROT_NONE | PROT_NONE |
98/// └───────────┴────────────┴───────────┘ └───────────┴───────────┴───────────┘
99/// | |◄──────────────────┬─────────────────────────────────► ◄────┬────►
100/// | | | |
101/// mapping | `layout.num_slots` memories layout.post_slab_guard_size
102/// |
103/// layout.pre_slab_guard_size
104/// ```
105#[derive(Debug)]
106pub struct MemoryPool {
107 mapping: Arc<Mmap<AlignedLength>>,
108 /// This memory pool is stripe-aware. If using memory protection keys, this
109 /// will contain one stripe per available key; otherwise, a single stripe
110 /// with an empty key.
111 stripes: Vec<Stripe>,
112
113 /// If using a copy-on-write allocation scheme, the slot management. We
114 /// dynamically transfer ownership of a slot to a Memory when in use.
115 image_slots: Vec<Mutex<Option<MemoryImageSlot>>>,
116
117 /// A description of the various memory sizes used in allocating the
118 /// `mapping` slab.
119 layout: SlabLayout,
120
121 /// The maximum number of memories that a single core module instance may
122 /// use.
123 ///
124 /// NB: this is needed for validation but does not affect the pool's size.
125 memories_per_instance: usize,
126
127 /// How much linear memory, in bytes, to keep resident after resetting for
128 /// use with the next instance. This much memory will be `memset` to zero
129 /// when a linear memory is deallocated.
130 ///
131 /// Memory exceeding this amount in the wasm linear memory will be released
132 /// with `madvise` back to the kernel.
133 ///
134 /// Only applicable on Linux.
135 pub(super) keep_resident: HostAlignedByteCount,
136
137 /// Keep track of protection keys handed out to initialized stores; this
138 /// allows us to round-robin the assignment of stores to stripes.
139 next_available_pkey: AtomicUsize,
140}
141
142impl MemoryPool {
143 /// Create a new `MemoryPool`.
144 pub fn new(config: &PoolingInstanceAllocatorConfig, tunables: &Tunables) -> Result<Self> {
145 if u64::try_from(config.limits.max_memory_size).unwrap() > tunables.memory_reservation {
146 bail!(
147 "maximum memory size of {:#x} bytes exceeds the configured \
148 memory reservation of {:#x} bytes",
149 config.limits.max_memory_size,
150 tunables.memory_reservation
151 );
152 }
153 let pkeys = match config.memory_protection_keys {
154 MpkEnabled::Auto => {
155 if mpk::is_supported() {
156 mpk::keys(config.max_memory_protection_keys)
157 } else {
158 &[]
159 }
160 }
161 MpkEnabled::Enable => {
162 if mpk::is_supported() {
163 mpk::keys(config.max_memory_protection_keys)
164 } else {
165 bail!("mpk is disabled on this system")
166 }
167 }
168 MpkEnabled::Disable => &[],
169 };
170
171 // This is a tricky bit of global state: when creating a memory pool
172 // that uses memory protection keys, we ensure here that any host code
173 // will have access to all keys (i.e., stripes). It's only when we enter
174 // the WebAssembly guest code (see `StoreInner::call_hook`) that we
175 // enforce which keys/stripes can be accessed. Be forewarned about the
176 // assumptions here:
177 // - we expect this "allow all" configuration to reset the default
178 // process state (only allow key 0) _before_ any memories are accessed
179 // - and we expect no other code (e.g., host-side code) to modify this
180 // global MPK configuration
181 if !pkeys.is_empty() {
182 mpk::allow(ProtectionMask::all());
183 }
184
185 // Create a slab layout and allocate it as a completely inaccessible
186 // region to start--`PROT_NONE`.
187 let constraints = SlabConstraints::new(&config.limits, tunables, pkeys.len())?;
188 let layout = calculate(&constraints)?;
189 log::debug!(
190 "creating memory pool: {constraints:?} -> {layout:?} (total: {})",
191 layout.total_slab_bytes()?
192 );
193 let mut mapping =
194 Mmap::accessible_reserved(HostAlignedByteCount::ZERO, layout.total_slab_bytes()?)
195 .context("failed to create memory pool mapping")?;
196
197 // Then, stripe the memory with the available protection keys. This is
198 // unnecessary if there is only one stripe color.
199 if layout.num_stripes >= 2 {
200 let mut cursor = layout.pre_slab_guard_bytes;
201 let pkeys = &pkeys[..layout.num_stripes];
202 for i in 0..constraints.num_slots {
203 let pkey = &pkeys[i % pkeys.len()];
204 let region = unsafe {
205 mapping.slice_mut(
206 cursor.byte_count()..cursor.byte_count() + layout.slot_bytes.byte_count(),
207 )
208 };
209 pkey.protect(region)?;
210 cursor = cursor
211 .checked_add(layout.slot_bytes)
212 .context("cursor + slot_bytes overflows")?;
213 }
214 debug_assert_eq!(
215 cursor
216 .checked_add(layout.post_slab_guard_bytes)
217 .context("cursor + post_slab_guard_bytes overflows")?,
218 layout.total_slab_bytes()?
219 );
220 }
221
222 let image_slots: Vec<_> = std::iter::repeat_with(|| Mutex::new(None))
223 .take(constraints.num_slots)
224 .collect();
225
226 let create_stripe = |i| {
227 let num_slots = constraints.num_slots / layout.num_stripes
228 + usize::from(constraints.num_slots % layout.num_stripes > i);
229 let allocator = ModuleAffinityIndexAllocator::new(
230 num_slots.try_into().unwrap(),
231 config.max_unused_warm_slots,
232 );
233 Stripe {
234 allocator,
235 pkey: pkeys.get(i).cloned(),
236 }
237 };
238
239 debug_assert!(layout.num_stripes > 0);
240 let stripes: Vec<_> = (0..layout.num_stripes).map(create_stripe).collect();
241
242 let pool = Self {
243 stripes,
244 mapping: Arc::new(mapping),
245 image_slots,
246 layout,
247 memories_per_instance: usize::try_from(config.limits.max_memories_per_module).unwrap(),
248 keep_resident: HostAlignedByteCount::new_rounded_up(
249 config.linear_memory_keep_resident,
250 )?,
251 next_available_pkey: AtomicUsize::new(0),
252 };
253
254 Ok(pool)
255 }
256
257 /// Return a protection key that stores can use for requesting new
258 pub fn next_available_pkey(&self) -> Option<ProtectionKey> {
259 let index = self.next_available_pkey.fetch_add(1, Ordering::SeqCst) % self.stripes.len();
260 debug_assert!(
261 self.stripes.len() < 2 || self.stripes[index].pkey.is_some(),
262 "if we are using stripes, we cannot have an empty protection key"
263 );
264 self.stripes[index].pkey
265 }
266
267 /// Validate whether this memory pool supports the given module.
268 pub fn validate_memories(&self, module: &Module) -> Result<()> {
269 let memories = module.num_defined_memories();
270 if memories > self.memories_per_instance {
271 bail!(
272 "defined memories count of {} exceeds the per-instance limit of {}",
273 memories,
274 self.memories_per_instance,
275 );
276 }
277
278 for (i, memory) in module.memories.iter().skip(module.num_imported_memories) {
279 self.validate_memory(memory).with_context(|| {
280 format!(
281 "memory index {} is unsupported in this pooling allocator configuration",
282 i.as_u32()
283 )
284 })?;
285 }
286 Ok(())
287 }
288
289 /// Validate one memory for this pool.
290 pub fn validate_memory(&self, memory: &wasmtime_environ::Memory) -> Result<()> {
291 let min = memory.minimum_byte_size().with_context(|| {
292 format!("memory has a minimum byte size that cannot be represented in a u64",)
293 })?;
294 if min > u64::try_from(self.layout.max_memory_bytes.byte_count()).unwrap() {
295 bail!(
296 "memory has a minimum byte size of {} which exceeds the limit of {} bytes",
297 min,
298 self.layout.max_memory_bytes,
299 );
300 }
301 if memory.shared {
302 // FIXME(#4244): since the pooling allocator owns the memory
303 // allocation (which is torn down with the instance), that
304 // can't be used with shared memory where threads or the host
305 // might persist the memory beyond the lifetime of the instance
306 // itself.
307 bail!("memory is shared which is not supported in the pooling allocator");
308 }
309 Ok(())
310 }
311
312 /// Are zero slots in use right now?
313 #[allow(unused)] // some cfgs don't use this
314 pub fn is_empty(&self) -> bool {
315 self.stripes.iter().all(|s| s.allocator.is_empty())
316 }
317
318 /// Allocate a single memory for the given instance allocation request.
319 pub fn allocate(
320 &self,
321 request: &mut InstanceAllocationRequest,
322 ty: &wasmtime_environ::Memory,
323 tunables: &Tunables,
324 memory_index: Option<DefinedMemoryIndex>,
325 ) -> Result<(MemoryAllocationIndex, Memory)> {
326 let stripe_index = if let Some(pkey) = &request.pkey {
327 pkey.as_stripe()
328 } else {
329 debug_assert!(self.stripes.len() < 2);
330 0
331 };
332
333 let striped_allocation_index = self.stripes[stripe_index]
334 .allocator
335 .alloc(memory_index.and_then(|mem_idx| {
336 request
337 .runtime_info
338 .unique_id()
339 .map(|id| MemoryInModule(id, mem_idx))
340 }))
341 .map(|slot| StripedAllocationIndex(u32::try_from(slot.index()).unwrap()))
342 .ok_or_else(|| {
343 super::PoolConcurrencyLimitError::new(
344 self.stripes[stripe_index].allocator.len(),
345 format!("memory stripe {stripe_index}"),
346 )
347 })?;
348 let allocation_index =
349 striped_allocation_index.as_unstriped_slot_index(stripe_index, self.stripes.len());
350
351 match (|| {
352 // Double-check that the runtime requirements of the memory are
353 // satisfied by the configuration of this pooling allocator. This
354 // should be returned as an error through `validate_memory_plans`
355 // but double-check here to be sure.
356 assert!(
357 tunables.memory_reservation + tunables.memory_guard_size
358 <= u64::try_from(self.layout.bytes_to_next_stripe_slot().byte_count()).unwrap()
359 );
360
361 let base = self.get_base(allocation_index);
362 let base_capacity = self.layout.max_memory_bytes;
363
364 let mut slot = self.take_memory_image_slot(allocation_index);
365 let image = match memory_index {
366 Some(memory_index) => request.runtime_info.memory_image(memory_index)?,
367 None => None,
368 };
369 let initial_size = ty
370 .minimum_byte_size()
371 .expect("min size checked in validation");
372
373 // If instantiation fails, we can propagate the error
374 // upward and drop the slot. This will cause the Drop
375 // handler to attempt to map the range with PROT_NONE
376 // memory, to reserve the space while releasing any
377 // stale mappings. The next use of this slot will then
378 // create a new slot that will try to map over
379 // this, returning errors as well if the mapping
380 // errors persist. The unmap-on-drop is best effort;
381 // if it fails, then we can still soundly continue
382 // using the rest of the pool and allowing the rest of
383 // the process to continue, because we never perform a
384 // mmap that would leave an open space for someone
385 // else to come in and map something.
386 let initial_size = usize::try_from(initial_size).unwrap();
387 slot.instantiate(initial_size, image, ty, tunables)?;
388
389 Memory::new_static(
390 ty,
391 tunables,
392 MemoryBase::Mmap(base),
393 base_capacity.byte_count(),
394 slot,
395 unsafe { &mut *request.store.get().unwrap() },
396 )
397 })() {
398 Ok(memory) => Ok((allocation_index, memory)),
399 Err(e) => {
400 self.stripes[stripe_index]
401 .allocator
402 .free(SlotId(striped_allocation_index.0));
403 Err(e)
404 }
405 }
406 }
407
408 /// Deallocate a previously-allocated memory.
409 ///
410 /// # Safety
411 ///
412 /// The memory must have been previously allocated from this pool and
413 /// assigned the given index, must currently be in an allocated state, and
414 /// must never be used again.
415 ///
416 /// The caller must have already called `clear_and_remain_ready` on the
417 /// memory's image and flushed any enqueued decommits for this memory.
418 pub unsafe fn deallocate(
419 &self,
420 allocation_index: MemoryAllocationIndex,
421 image: MemoryImageSlot,
422 ) {
423 self.return_memory_image_slot(allocation_index, image);
424
425 let (stripe_index, striped_allocation_index) =
426 StripedAllocationIndex::from_unstriped_slot_index(allocation_index, self.stripes.len());
427 self.stripes[stripe_index]
428 .allocator
429 .free(SlotId(striped_allocation_index.0));
430 }
431
432 /// Purging everything related to `module`.
433 pub fn purge_module(&self, module: CompiledModuleId) {
434 // This primarily means clearing out all of its memory images present in
435 // the virtual address space. Go through the index allocator for slots
436 // affine to `module` and reset them, freeing up the index when we're
437 // done.
438 //
439 // Note that this is only called when the specified `module` won't be
440 // allocated further (the module is being dropped) so this shouldn't hit
441 // any sort of infinite loop since this should be the final operation
442 // working with `module`.
443 //
444 // TODO: We are given a module id, but key affinity by pair of module id
445 // and defined memory index. We are missing any defined memory index or
446 // count of how many memories the module defines here. Therefore, we
447 // probe up to the maximum number of memories per instance. This is fine
448 // because that maximum is generally relatively small. If this method
449 // somehow ever gets hot because of unnecessary probing, we should
450 // either pass in the actual number of defined memories for the given
451 // module to this method, or keep a side table of all slots that are
452 // associated with a module (not just module and memory). The latter
453 // would require care to make sure that its maintenance wouldn't be too
454 // expensive for normal allocation/free operations.
455 for stripe in &self.stripes {
456 for i in 0..self.memories_per_instance {
457 use wasmtime_environ::EntityRef;
458 let memory_index = DefinedMemoryIndex::new(i);
459 while let Some(id) = stripe
460 .allocator
461 .alloc_affine_and_clear_affinity(module, memory_index)
462 {
463 // Clear the image from the slot and, if successful, return it back
464 // to our state. Note that on failure here the whole slot will get
465 // paved over with an anonymous mapping.
466 let index = MemoryAllocationIndex(id.0);
467 let mut slot = self.take_memory_image_slot(index);
468 if slot.remove_image().is_ok() {
469 self.return_memory_image_slot(index, slot);
470 }
471
472 stripe.allocator.free(id);
473 }
474 }
475 }
476 }
477
478 fn get_base(&self, allocation_index: MemoryAllocationIndex) -> MmapOffset {
479 assert!(allocation_index.index() < self.layout.num_slots);
480 let offset = self
481 .layout
482 .slot_bytes
483 .checked_mul(allocation_index.index())
484 .and_then(|c| c.checked_add(self.layout.pre_slab_guard_bytes))
485 .expect("slot_bytes * index + pre_slab_guard_bytes overflows");
486 self.mapping.offset(offset).expect("offset is in bounds")
487 }
488
489 /// Take ownership of the given image slot. Must be returned via
490 /// `return_memory_image_slot` when the instance is done using it.
491 fn take_memory_image_slot(&self, allocation_index: MemoryAllocationIndex) -> MemoryImageSlot {
492 let maybe_slot = self.image_slots[allocation_index.index()]
493 .lock()
494 .unwrap()
495 .take();
496
497 maybe_slot.unwrap_or_else(|| {
498 MemoryImageSlot::create(
499 self.get_base(allocation_index),
500 HostAlignedByteCount::ZERO,
501 self.layout.max_memory_bytes.byte_count(),
502 )
503 })
504 }
505
506 /// Return ownership of the given image slot.
507 fn return_memory_image_slot(
508 &self,
509 allocation_index: MemoryAllocationIndex,
510 slot: MemoryImageSlot,
511 ) {
512 assert!(!slot.is_dirty());
513 *self.image_slots[allocation_index.index()].lock().unwrap() = Some(slot);
514 }
515}
516
517impl Drop for MemoryPool {
518 fn drop(&mut self) {
519 // Clear the `clear_no_drop` flag (i.e., ask to *not* clear on
520 // drop) for all slots, and then drop them here. This is
521 // valid because the one `Mmap` that covers the whole region
522 // can just do its one munmap.
523 for mut slot in std::mem::take(&mut self.image_slots) {
524 if let Some(slot) = slot.get_mut().unwrap() {
525 slot.no_clear_on_drop();
526 }
527 }
528 }
529}
530
531/// The index of a memory allocation within an `InstanceAllocator`.
532#[derive(Clone, Copy, Debug, Eq, PartialEq, PartialOrd, Ord)]
533pub struct StripedAllocationIndex(u32);
534
535impl StripedAllocationIndex {
536 fn from_unstriped_slot_index(
537 index: MemoryAllocationIndex,
538 num_stripes: usize,
539 ) -> (usize, Self) {
540 let stripe_index = index.index() % num_stripes;
541 let num_stripes: u32 = num_stripes.try_into().unwrap();
542 let index_within_stripe = Self(index.0 / num_stripes);
543 (stripe_index, index_within_stripe)
544 }
545
546 fn as_unstriped_slot_index(self, stripe: usize, num_stripes: usize) -> MemoryAllocationIndex {
547 let num_stripes: u32 = num_stripes.try_into().unwrap();
548 let stripe: u32 = stripe.try_into().unwrap();
549 MemoryAllocationIndex(self.0 * num_stripes + stripe)
550 }
551}
552
553#[derive(Clone, Debug)]
554struct SlabConstraints {
555 /// Essentially, the `static_memory_bound`: this is an assumption that the
556 /// runtime and JIT compiler make about how much space will be guarded
557 /// between slots.
558 expected_slot_bytes: HostAlignedByteCount,
559 /// The maximum size of any memory in the pool. Always a non-zero multiple
560 /// of the page size.
561 max_memory_bytes: HostAlignedByteCount,
562 num_slots: usize,
563 num_pkeys_available: usize,
564 guard_bytes: HostAlignedByteCount,
565 guard_before_slots: bool,
566}
567
568impl SlabConstraints {
569 fn new(
570 limits: &InstanceLimits,
571 tunables: &Tunables,
572 num_pkeys_available: usize,
573 ) -> Result<Self> {
574 // `memory_reservation` is the configured number of bytes for a
575 // static memory slot (see `Config::memory_reservation`); even
576 // if the memory never grows to this size (e.g., it has a lower memory
577 // maximum), codegen will assume that this unused memory is mapped
578 // `PROT_NONE`. Typically `memory_reservation` is 4GiB which helps
579 // elide most bounds checks. `MemoryPool` must respect this bound,
580 // though not explicitly: if we can achieve the same effect via
581 // MPK-protected stripes, the slot size can be lower than the
582 // `memory_reservation`.
583 let expected_slot_bytes =
584 HostAlignedByteCount::new_rounded_up_u64(tunables.memory_reservation)
585 .context("memory reservation is too large")?;
586
587 // Page-align the maximum size of memory since that's the granularity that
588 // permissions are going to be controlled at.
589 let max_memory_bytes = HostAlignedByteCount::new_rounded_up(limits.max_memory_size)
590 .context("maximum size of memory is too large")?;
591
592 let guard_bytes = HostAlignedByteCount::new_rounded_up_u64(tunables.memory_guard_size)
593 .context("guard region is too large")?;
594
595 let num_slots = limits
596 .total_memories
597 .try_into()
598 .context("too many memories")?;
599
600 let constraints = SlabConstraints {
601 max_memory_bytes,
602 num_slots,
603 expected_slot_bytes,
604 num_pkeys_available,
605 guard_bytes,
606 guard_before_slots: tunables.guard_before_linear_memory,
607 };
608 Ok(constraints)
609 }
610}
611
612#[derive(Debug)]
613struct SlabLayout {
614 /// The total number of slots available in the memory pool slab.
615 num_slots: usize,
616 /// The size of each slot in the memory pool; this contains the maximum
617 /// memory size (i.e., from WebAssembly or Wasmtime configuration) plus any
618 /// guard region after the memory to catch OOB access. On these guard
619 /// regions, note that:
620 /// - users can configure how aggressively (or not) to elide bounds checks
621 /// via `Config::memory_guard_size` (see also:
622 /// `memory_and_guard_size`)
623 /// - memory protection keys can compress the size of the guard region by
624 /// placing slots from a different key (i.e., a stripe) in the guard
625 /// region; this means the slot itself can be smaller and we can allocate
626 /// more of them.
627 slot_bytes: HostAlignedByteCount,
628 /// The maximum size that can become accessible, in bytes, for each linear
629 /// memory. Guaranteed to be a whole number of Wasm pages.
630 max_memory_bytes: HostAlignedByteCount,
631 /// If necessary, the number of bytes to reserve as a guard region at the
632 /// beginning of the slab.
633 pre_slab_guard_bytes: HostAlignedByteCount,
634 /// Like `pre_slab_guard_bytes`, but at the end of the slab.
635 post_slab_guard_bytes: HostAlignedByteCount,
636 /// The number of stripes needed in the slab layout.
637 num_stripes: usize,
638}
639
640impl SlabLayout {
641 /// Return the total size of the slab, using the final layout (where `n =
642 /// num_slots`):
643 ///
644 /// ```text
645 /// ┌────────────────────┬──────┬──────┬───┬──────┬─────────────────────┐
646 /// │pre_slab_guard_bytes│slot 1│slot 2│...│slot n│post_slab_guard_bytes│
647 /// └────────────────────┴──────┴──────┴───┴──────┴─────────────────────┘
648 /// ```
649 fn total_slab_bytes(&self) -> Result<HostAlignedByteCount> {
650 self.slot_bytes
651 .checked_mul(self.num_slots)
652 .and_then(|c| c.checked_add(self.pre_slab_guard_bytes))
653 .and_then(|c| c.checked_add(self.post_slab_guard_bytes))
654 .context("total size of memory reservation exceeds addressable memory")
655 }
656
657 /// Returns the number of Wasm bytes from the beginning of one slot to the
658 /// next slot in the same stripe--this is the striped equivalent of
659 /// `static_memory_bound`. Recall that between slots of the same stripe we
660 /// will see a slot from every other stripe.
661 ///
662 /// For example, in a 3-stripe pool, this function measures the distance
663 /// from the beginning of slot 1 to slot 4, which are of the same stripe:
664 ///
665 /// ```text
666 /// ◄────────────────────►
667 /// ┌────────┬──────┬──────┬────────┬───┐
668 /// │*slot 1*│slot 2│slot 3│*slot 4*│...|
669 /// └────────┴──────┴──────┴────────┴───┘
670 /// ```
671 fn bytes_to_next_stripe_slot(&self) -> HostAlignedByteCount {
672 self.slot_bytes
673 .checked_mul(self.num_stripes)
674 .expect("constructor checks that self.slot_bytes * self.num_stripes is in bounds")
675 }
676}
677
678fn calculate(constraints: &SlabConstraints) -> Result<SlabLayout> {
679 let SlabConstraints {
680 max_memory_bytes,
681 num_slots,
682 expected_slot_bytes,
683 num_pkeys_available,
684 guard_bytes,
685 guard_before_slots,
686 } = *constraints;
687
688 // If the user specifies a guard region, we always need to allocate a
689 // `PROT_NONE` region for it before any memory slots. Recall that we can
690 // avoid bounds checks for loads and stores with immediates up to
691 // `guard_bytes`, but we rely on Wasmtime to emit bounds checks for any
692 // accesses greater than this.
693 let pre_slab_guard_bytes = if guard_before_slots {
694 guard_bytes
695 } else {
696 HostAlignedByteCount::ZERO
697 };
698
699 // To calculate the slot size, we start with the default configured size and
700 // attempt to chip away at this via MPK protection. Note here how we begin
701 // to define a slot as "all of the memory and guard region."
702 let faulting_region_bytes = expected_slot_bytes
703 .max(max_memory_bytes)
704 .checked_add(guard_bytes)
705 .context("faulting region is too large")?;
706
707 let (num_stripes, slot_bytes) = if guard_bytes == 0 || max_memory_bytes == 0 || num_slots == 0 {
708 // In the uncommon case where the memory/guard regions are empty or we don't need any slots , we
709 // will not need any stripes: we just lay out the slots back-to-back
710 // using a single stripe.
711 (1, faulting_region_bytes.byte_count())
712 } else if num_pkeys_available < 2 {
713 // If we do not have enough protection keys to stripe the memory, we do
714 // the same. We can't elide any of the guard bytes because we aren't
715 // overlapping guard regions with other stripes...
716 (1, faulting_region_bytes.byte_count())
717 } else {
718 // ...but if we can create at least two stripes, we can use another
719 // stripe (i.e., a different pkey) as this slot's guard region--this
720 // reduces the guard bytes each slot has to allocate. We must make
721 // sure, though, that if the size of that other stripe(s) does not
722 // fully cover `guard_bytes`, we keep those around to prevent OOB
723 // access.
724
725 // We first calculate the number of stripes we need: we want to
726 // minimize this so that there is less chance of a single store
727 // running out of slots with its stripe--we need at least two,
728 // though. But this is not just an optimization; we need to handle
729 // the case when there are fewer slots than stripes. E.g., if our
730 // pool is configured with only three slots (`num_memory_slots =
731 // 3`), we will run into failures if we attempt to set up more than
732 // three stripes.
733 let needed_num_stripes = faulting_region_bytes
734 .checked_div(max_memory_bytes)
735 .expect("if condition above implies max_memory_bytes is non-zero")
736 + usize::from(
737 faulting_region_bytes
738 .checked_rem(max_memory_bytes)
739 .expect("if condition above implies max_memory_bytes is non-zero")
740 != 0,
741 );
742 assert!(needed_num_stripes > 0);
743 let num_stripes = num_pkeys_available.min(needed_num_stripes).min(num_slots);
744
745 // Next, we try to reduce the slot size by "overlapping" the stripes: we
746 // can make slot `n` smaller since we know that slot `n+1` and following
747 // are in different stripes and will look just like `PROT_NONE` memory.
748 // Recall that codegen expects a guarantee that at least
749 // `faulting_region_bytes` will catch OOB accesses via segfaults.
750 let needed_slot_bytes = faulting_region_bytes
751 .byte_count()
752 .checked_div(num_stripes)
753 .unwrap_or(faulting_region_bytes.byte_count())
754 .max(max_memory_bytes.byte_count());
755 assert!(needed_slot_bytes >= max_memory_bytes.byte_count());
756
757 (num_stripes, needed_slot_bytes)
758 };
759
760 // The page-aligned slot size; equivalent to `memory_and_guard_size`.
761 let slot_bytes =
762 HostAlignedByteCount::new_rounded_up(slot_bytes).context("slot size is too large")?;
763
764 // We may need another guard region (like `pre_slab_guard_bytes`) at the end
765 // of our slab to maintain our `faulting_region_bytes` guarantee. We could
766 // be conservative and just create it as large as `faulting_region_bytes`,
767 // but because we know that the last slot's `slot_bytes` make up the first
768 // part of that region, we reduce the final guard region by that much.
769 let post_slab_guard_bytes = faulting_region_bytes.saturating_sub(slot_bytes);
770
771 // Check that we haven't exceeded the slab we can calculate given the limits
772 // of `usize`.
773 let layout = SlabLayout {
774 num_slots,
775 slot_bytes,
776 max_memory_bytes,
777 pre_slab_guard_bytes,
778 post_slab_guard_bytes,
779 num_stripes,
780 };
781 match layout.total_slab_bytes() {
782 Ok(_) => Ok(layout),
783 Err(e) => Err(e),
784 }
785}
786
787#[cfg(test)]
788mod tests {
789 use super::*;
790 use proptest::prelude::*;
791
792 const WASM_PAGE_SIZE: u32 = wasmtime_environ::Memory::DEFAULT_PAGE_SIZE;
793
794 #[cfg(target_pointer_width = "64")]
795 #[test]
796 fn test_memory_pool() -> Result<()> {
797 let pool = MemoryPool::new(
798 &PoolingInstanceAllocatorConfig {
799 limits: InstanceLimits {
800 total_memories: 5,
801 max_tables_per_module: 0,
802 max_memories_per_module: 3,
803 table_elements: 0,
804 max_memory_size: WASM_PAGE_SIZE as usize,
805 ..Default::default()
806 },
807 ..Default::default()
808 },
809 &Tunables {
810 memory_reservation: WASM_PAGE_SIZE as u64,
811 memory_guard_size: 0,
812 ..Tunables::default_host()
813 },
814 )?;
815
816 assert_eq!(pool.layout.slot_bytes, WASM_PAGE_SIZE as usize);
817 assert_eq!(pool.layout.num_slots, 5);
818 assert_eq!(pool.layout.max_memory_bytes, WASM_PAGE_SIZE as usize);
819
820 let base = pool.mapping.as_ptr() as usize;
821
822 for i in 0..5 {
823 let index = MemoryAllocationIndex(i);
824 let ptr = pool.get_base(index).as_mut_ptr();
825 assert_eq!(
826 ptr as usize - base,
827 i as usize * pool.layout.slot_bytes.byte_count()
828 );
829 }
830
831 Ok(())
832 }
833
834 #[test]
835 #[cfg_attr(miri, ignore)]
836 fn test_pooling_allocator_striping() {
837 if !mpk::is_supported() {
838 println!("skipping `test_pooling_allocator_striping` test; mpk is not supported");
839 return;
840 }
841
842 // Force the use of MPK.
843 let config = PoolingInstanceAllocatorConfig {
844 memory_protection_keys: MpkEnabled::Enable,
845 ..PoolingInstanceAllocatorConfig::default()
846 };
847 let pool = MemoryPool::new(&config, &Tunables::default_host()).unwrap();
848 assert!(pool.stripes.len() >= 2);
849
850 let max_memory_slots = config.limits.total_memories;
851 dbg!(pool.stripes[0].allocator.num_empty_slots());
852 dbg!(pool.stripes[1].allocator.num_empty_slots());
853 let available_memory_slots: usize = pool
854 .stripes
855 .iter()
856 .map(|s| s.allocator.num_empty_slots())
857 .sum();
858 assert_eq!(
859 max_memory_slots,
860 u32::try_from(available_memory_slots).unwrap()
861 );
862 }
863
864 #[test]
865 fn check_known_layout_calculations() {
866 for num_pkeys_available in 0..16 {
867 for num_memory_slots in [0, 1, 10, 64] {
868 for expected_slot_bytes in [0, 1 << 30 /* 1GB */, 4 << 30 /* 4GB */] {
869 let expected_slot_bytes =
870 HostAlignedByteCount::new(expected_slot_bytes).unwrap();
871 for max_memory_bytes in
872 [0, 1 * WASM_PAGE_SIZE as usize, 10 * WASM_PAGE_SIZE as usize]
873 {
874 // Note new rather than new_rounded_up here -- for now,
875 // WASM_PAGE_SIZE is 64KiB, which is a multiple of the
876 // host page size on all platforms.
877 let max_memory_bytes = HostAlignedByteCount::new(max_memory_bytes).unwrap();
878 for guard_bytes in [0, 2 << 30 /* 2GB */] {
879 let guard_bytes = HostAlignedByteCount::new(guard_bytes).unwrap();
880 for guard_before_slots in [true, false] {
881 let constraints = SlabConstraints {
882 max_memory_bytes,
883 num_slots: num_memory_slots,
884 expected_slot_bytes,
885 num_pkeys_available,
886 guard_bytes,
887 guard_before_slots,
888 };
889 match calculate(&constraints) {
890 Ok(layout) => {
891 assert_slab_layout_invariants(constraints, layout)
892 }
893 Err(e) => {
894 // Only allow failure on 32-bit
895 // platforms where the calculation
896 // exceeded the size of the address
897 // space
898 assert!(
899 cfg!(target_pointer_width = "32")
900 && e.to_string()
901 .contains("exceeds addressable memory"),
902 "bad error: {e:?}"
903 );
904 }
905 }
906 }
907 }
908 }
909 }
910 }
911 }
912 }
913
914 proptest! {
915 #[test]
916 #[cfg_attr(miri, ignore)]
917 fn check_random_layout_calculations(c in constraints()) {
918 if let Ok(l) = calculate(&c) {
919 assert_slab_layout_invariants(c, l);
920 }
921 }
922 }
923
924 fn constraints() -> impl Strategy<Value = SlabConstraints> {
925 (
926 any::<HostAlignedByteCount>(),
927 any::<usize>(),
928 any::<HostAlignedByteCount>(),
929 any::<usize>(),
930 any::<HostAlignedByteCount>(),
931 any::<bool>(),
932 )
933 .prop_map(
934 |(
935 max_memory_bytes,
936 num_memory_slots,
937 expected_slot_bytes,
938 num_pkeys_available,
939 guard_bytes,
940 guard_before_slots,
941 )| {
942 SlabConstraints {
943 max_memory_bytes,
944 num_slots: num_memory_slots,
945 expected_slot_bytes,
946 num_pkeys_available,
947 guard_bytes,
948 guard_before_slots,
949 }
950 },
951 )
952 }
953
954 fn assert_slab_layout_invariants(c: SlabConstraints, s: SlabLayout) {
955 // Check that all the sizes add up.
956 assert_eq!(
957 s.total_slab_bytes().unwrap(),
958 s.pre_slab_guard_bytes
959 .checked_add(s.slot_bytes.checked_mul(c.num_slots).unwrap())
960 .and_then(|c| c.checked_add(s.post_slab_guard_bytes))
961 .unwrap(),
962 "the slab size does not add up: {c:?} => {s:?}"
963 );
964 assert!(
965 s.slot_bytes >= s.max_memory_bytes,
966 "slot is not big enough: {c:?} => {s:?}"
967 );
968
969 // The HostAlignedByteCount newtype wrapper ensures that the various
970 // byte values are page-aligned.
971
972 // Check that we use no more or less stripes than needed.
973 assert!(s.num_stripes >= 1, "not enough stripes: {c:?} => {s:?}");
974 if c.num_pkeys_available == 0 || c.num_slots == 0 {
975 assert_eq!(
976 s.num_stripes, 1,
977 "expected at least one stripe: {c:?} => {s:?}"
978 );
979 } else {
980 assert!(
981 s.num_stripes <= c.num_pkeys_available,
982 "layout has more stripes than available pkeys: {c:?} => {s:?}"
983 );
984 assert!(
985 s.num_stripes <= c.num_slots,
986 "layout has more stripes than memory slots: {c:?} => {s:?}"
987 );
988 }
989
990 // Check that we use the minimum number of stripes/protection keys.
991 // - if the next MPK-protected slot is bigger or the same as the
992 // required guard region, we only need two stripes
993 // - if the next slot is smaller than the guard region, we only need
994 // enough stripes to add up to at least that guard region size.
995 if c.num_pkeys_available > 1 && !c.max_memory_bytes.is_zero() {
996 assert!(
997 s.num_stripes <= (c.guard_bytes.checked_div(c.max_memory_bytes).unwrap() + 2),
998 "calculated more stripes than needed: {c:?} => {s:?}"
999 );
1000 }
1001
1002 // Check that the memory-striping will not allow OOB access.
1003 // - we may have reduced the slot size from `expected_slot_bytes` to
1004 // `slot_bytes` assuming MPK striping; we check that our guaranteed
1005 // "faulting region" is respected
1006 // - the last slot won't have MPK striping after it; we check that the
1007 // `post_slab_guard_bytes` accounts for this
1008 assert!(
1009 s.bytes_to_next_stripe_slot()
1010 >= c.expected_slot_bytes
1011 .max(c.max_memory_bytes)
1012 .checked_add(c.guard_bytes)
1013 .unwrap(),
1014 "faulting region not large enough: {c:?} => {s:?}"
1015 );
1016 assert!(
1017 s.slot_bytes.checked_add(s.post_slab_guard_bytes).unwrap() >= c.expected_slot_bytes,
1018 "last slot may allow OOB access: {c:?} => {s:?}"
1019 );
1020 }
1021}