wasmtime/runtime/vm/cow.rs
1//! Copy-on-write initialization support: creation of backing images for
2//! modules, and logic to support mapping these backing images into memory.
3
4// `MemoryImageSource` is an empty enum on some platforms which triggers some
5// warnings
6#![cfg_attr(any(not(unix), miri), allow(unreachable_patterns))]
7
8use super::sys::DecommitBehavior;
9use crate::prelude::*;
10use crate::runtime::vm::sys::vm::{self, MemoryImageSource};
11use crate::runtime::vm::{host_page_size, HostAlignedByteCount, MmapOffset, MmapVec};
12use alloc::sync::Arc;
13use core::ops::Range;
14use core::ptr;
15use wasmtime_environ::{DefinedMemoryIndex, MemoryInitialization, Module, PrimaryMap, Tunables};
16
17/// Backing images for memories in a module.
18///
19/// This is meant to be built once, when a module is first loaded/constructed,
20/// and then used many times for instantiation.
21pub struct ModuleMemoryImages {
22 memories: PrimaryMap<DefinedMemoryIndex, Option<Arc<MemoryImage>>>,
23}
24
25impl ModuleMemoryImages {
26 /// Get the MemoryImage for a given memory.
27 pub fn get_memory_image(&self, defined_index: DefinedMemoryIndex) -> Option<&Arc<MemoryImage>> {
28 self.memories[defined_index].as_ref()
29 }
30}
31
32/// One backing image for one memory.
33#[derive(Debug, PartialEq)]
34pub struct MemoryImage {
35 /// The platform-specific source of this image.
36 ///
37 /// This might be a mapped `*.cwasm` file or on Unix it could also be a
38 /// `Memfd` as an anonymous file in memory on Linux. In either case this is
39 /// used as the backing-source for the CoW image.
40 source: MemoryImageSource,
41
42 /// Length of image, in bytes.
43 ///
44 /// Note that initial memory size may be larger; leading and trailing zeroes
45 /// are truncated (handled by backing fd).
46 ///
47 /// Must be a multiple of the system page size.
48 len: HostAlignedByteCount,
49
50 /// Image starts this many bytes into `source`.
51 ///
52 /// This is 0 for anonymous-backed memfd files and is the offset of the
53 /// data section in a `*.cwasm` file for `*.cwasm`-backed images.
54 ///
55 /// Must be a multiple of the system page size.
56 ///
57 /// ## Notes
58 ///
59 /// This currently isn't a `HostAlignedByteCount` because that's a usize and
60 /// this, being a file offset, is a u64.
61 source_offset: u64,
62
63 /// Image starts this many bytes into heap space.
64 ///
65 /// Must be a multiple of the system page size.
66 linear_memory_offset: HostAlignedByteCount,
67}
68
69impl MemoryImage {
70 fn new(
71 page_size: u32,
72 linear_memory_offset: HostAlignedByteCount,
73 data: &[u8],
74 mmap: Option<&MmapVec>,
75 ) -> Result<Option<MemoryImage>> {
76 let assert_page_aligned = |val: usize| {
77 assert_eq!(val % (page_size as usize), 0);
78 };
79 // Sanity-check that various parameters are page-aligned.
80 let len = HostAlignedByteCount::new(data.len()).expect("memory image data is page-aligned");
81
82 // If a backing `mmap` is present then `data` should be a sub-slice of
83 // the `mmap`. The sanity-checks here double-check that. Additionally
84 // compilation should have ensured that the `data` section is
85 // page-aligned within `mmap`, so that's also all double-checked here.
86 //
87 // Finally if the `mmap` itself comes from a backing file on disk, such
88 // as a `*.cwasm` file, then that's a valid source of data for the
89 // memory image so we simply return referencing that.
90 //
91 // Note that this path is platform-agnostic in the sense of all
92 // platforms we support support memory mapping copy-on-write data from
93 // files, but for now this is still a Linux-specific region of Wasmtime.
94 // Some work will be needed to get this file compiling for macOS and
95 // Windows.
96 if let Some(mmap) = mmap {
97 let start = mmap.as_ptr() as usize;
98 let end = start + mmap.len();
99 let data_start = data.as_ptr() as usize;
100 let data_end = data_start + data.len();
101 assert!(start <= data_start && data_end <= end);
102 assert_page_aligned(start);
103 assert_page_aligned(data_start);
104 assert_page_aligned(data_end);
105
106 #[cfg(feature = "std")]
107 if let Some(file) = mmap.original_file() {
108 if let Some(source) = MemoryImageSource::from_file(file) {
109 return Ok(Some(MemoryImage {
110 source,
111 source_offset: u64::try_from(data_start - start).unwrap(),
112 linear_memory_offset,
113 len,
114 }));
115 }
116 }
117 }
118
119 // If `mmap` doesn't come from a file then platform-specific mechanisms
120 // may be used to place the data in a form that's amenable to an mmap.
121 if let Some(source) = MemoryImageSource::from_data(data)? {
122 return Ok(Some(MemoryImage {
123 source,
124 source_offset: 0,
125 linear_memory_offset,
126 len,
127 }));
128 }
129
130 Ok(None)
131 }
132
133 unsafe fn map_at(&self, mmap_base: &MmapOffset) -> Result<()> {
134 mmap_base.map_image_at(
135 &self.source,
136 self.source_offset,
137 self.linear_memory_offset,
138 self.len,
139 )
140 }
141
142 unsafe fn remap_as_zeros_at(&self, base: *mut u8) -> Result<()> {
143 self.source.remap_as_zeros_at(
144 base.add(self.linear_memory_offset.byte_count()),
145 self.len.byte_count(),
146 )?;
147 Ok(())
148 }
149}
150
151impl ModuleMemoryImages {
152 /// Create a new `ModuleMemoryImages` for the given module. This can be
153 /// passed in as part of a `InstanceAllocationRequest` to speed up
154 /// instantiation and execution by using copy-on-write-backed memories.
155 pub fn new(
156 module: &Module,
157 wasm_data: &[u8],
158 mmap: Option<&MmapVec>,
159 ) -> Result<Option<ModuleMemoryImages>> {
160 let map = match &module.memory_initialization {
161 MemoryInitialization::Static { map } => map,
162 _ => return Ok(None),
163 };
164 let mut memories = PrimaryMap::with_capacity(map.len());
165 let page_size = crate::runtime::vm::host_page_size();
166 let page_size = u32::try_from(page_size).unwrap();
167 for (memory_index, init) in map {
168 // mmap-based-initialization only works for defined memories with a
169 // known starting point of all zeros, so bail out if the mmeory is
170 // imported.
171 let defined_memory = match module.defined_memory_index(memory_index) {
172 Some(idx) => idx,
173 None => return Ok(None),
174 };
175
176 // If there's no initialization for this memory known then we don't
177 // need an image for the memory so push `None` and move on.
178 let init = match init {
179 Some(init) => init,
180 None => {
181 memories.push(None);
182 continue;
183 }
184 };
185
186 // Get the image for this wasm module as a subslice of `wasm_data`,
187 // and then use that to try to create the `MemoryImage`. If this
188 // creation files then we fail creating `ModuleMemoryImages` since this
189 // memory couldn't be represented.
190 let data = &wasm_data[init.data.start as usize..init.data.end as usize];
191 if module.memories[memory_index]
192 .minimum_byte_size()
193 .map_or(false, |mem_initial_len| {
194 init.offset + u64::try_from(data.len()).unwrap() > mem_initial_len
195 })
196 {
197 // The image is rounded up to multiples of the host OS page
198 // size. But if Wasm is using a custom page size, the Wasm page
199 // size might be smaller than the host OS page size, and that
200 // rounding might have made the image larger than the Wasm
201 // memory's initial length. This is *probably* okay, since the
202 // rounding would have just introduced new runs of zeroes in the
203 // image, but out of an abundance of caution we don't generate
204 // CoW images in this scenario.
205 return Ok(None);
206 }
207
208 let offset_usize = match usize::try_from(init.offset) {
209 Ok(offset) => offset,
210 Err(_) => return Ok(None),
211 };
212 let offset = HostAlignedByteCount::new(offset_usize)
213 .expect("memory init offset is a multiple of the host page size");
214 let image = match MemoryImage::new(page_size, offset, data, mmap)? {
215 Some(image) => image,
216 None => return Ok(None),
217 };
218
219 let idx = memories.push(Some(Arc::new(image)));
220 assert_eq!(idx, defined_memory);
221 }
222
223 Ok(Some(ModuleMemoryImages { memories }))
224 }
225}
226
227/// Slot management of a copy-on-write image which can be reused for the pooling
228/// allocator.
229///
230/// This data structure manages a slot of linear memory, primarily in the
231/// pooling allocator, which optionally has a contiguous memory image in the
232/// middle of it. Pictorially this data structure manages a virtual memory
233/// region that looks like:
234///
235/// ```text
236/// +--------------------+-------------------+--------------+--------------+
237/// | anonymous | optional | anonymous | PROT_NONE |
238/// | zero | memory | zero | memory |
239/// | memory | image | memory | |
240/// +--------------------+-------------------+--------------+--------------+
241/// | <------+---------->
242/// |<-----+------------> \
243/// | \ image.len
244/// | \
245/// | image.linear_memory_offset
246/// |
247/// \
248/// self.base is this virtual address
249///
250/// <------------------+------------------------------------------------>
251/// \
252/// static_size
253///
254/// <------------------+---------------------------------->
255/// \
256/// accessible
257/// ```
258///
259/// When a `MemoryImageSlot` is created it's told what the `static_size` and
260/// `accessible` limits are. Initially there is assumed to be no image in linear
261/// memory.
262///
263/// When `MemoryImageSlot::instantiate` is called then the method will perform
264/// a "synchronization" to take the image from its prior state to the new state
265/// for the image specified. The first instantiation for example will mmap the
266/// heap image into place. Upon reuse of a slot nothing happens except possibly
267/// shrinking `self.accessible`. When a new image is used then the old image is
268/// mapped to anonymous zero memory and then the new image is mapped in place.
269///
270/// A `MemoryImageSlot` is either `dirty` or it isn't. When a `MemoryImageSlot`
271/// is dirty then it is assumed that any memory beneath `self.accessible` could
272/// have any value. Instantiation cannot happen into a `dirty` slot, however, so
273/// the `MemoryImageSlot::clear_and_remain_ready` returns this memory back to
274/// its original state to mark `dirty = false`. This is done by resetting all
275/// anonymous memory back to zero and the image itself back to its initial
276/// contents.
277///
278/// On Linux this is achieved with the `madvise(MADV_DONTNEED)` syscall. This
279/// syscall will release the physical pages back to the OS but retain the
280/// original mappings, effectively resetting everything back to its initial
281/// state. Non-linux platforms will replace all memory below `self.accessible`
282/// with a fresh zero'd mmap, meaning that reuse is effectively not supported.
283#[derive(Debug)]
284pub struct MemoryImageSlot {
285 /// The mmap and offset within it that contains the linear memory for this
286 /// slot.
287 base: MmapOffset,
288
289 /// The maximum static memory size which `self.accessible` can grow to.
290 static_size: usize,
291
292 /// An optional image that is currently being used in this linear memory.
293 ///
294 /// This can be `None` in which case memory is originally all zeros. When
295 /// `Some` the image describes where it's located within the image.
296 image: Option<Arc<MemoryImage>>,
297
298 /// The size of the heap that is readable and writable.
299 ///
300 /// Note that this may extend beyond the actual linear memory heap size in
301 /// the case of dynamic memories in use. Memory accesses to memory below
302 /// `self.accessible` may still page fault as pages are lazily brought in
303 /// but the faults will always be resolved by the kernel.
304 ///
305 /// Also note that this is always page-aligned.
306 accessible: HostAlignedByteCount,
307
308 /// Whether this slot may have "dirty" pages (pages written by an
309 /// instantiation). Set by `instantiate()` and cleared by
310 /// `clear_and_remain_ready()`, and used in assertions to ensure
311 /// those methods are called properly.
312 ///
313 /// Invariant: if !dirty, then this memory slot contains a clean
314 /// CoW mapping of `image`, if `Some(..)`, and anonymous-zero
315 /// memory beyond the image up to `static_size`. The addresses
316 /// from offset 0 to `self.accessible` are R+W and set to zero or the
317 /// initial image content, as appropriate. Everything between
318 /// `self.accessible` and `self.static_size` is inaccessible.
319 dirty: bool,
320
321 /// Whether this MemoryImageSlot is responsible for mapping anonymous
322 /// memory (to hold the reservation while overwriting mappings
323 /// specific to this slot) in place when it is dropped. Default
324 /// on, unless the caller knows what they are doing.
325 clear_on_drop: bool,
326}
327
328impl MemoryImageSlot {
329 /// Create a new MemoryImageSlot. Assumes that there is an anonymous
330 /// mmap backing in the given range to start.
331 ///
332 /// The `accessible` parameter describes how much of linear memory is
333 /// already mapped as R/W with all zero-bytes. The `static_size` value is
334 /// the maximum size of this image which `accessible` cannot grow beyond,
335 /// and all memory from `accessible` from `static_size` should be mapped as
336 /// `PROT_NONE` backed by zero-bytes.
337 pub(crate) fn create(
338 base: MmapOffset,
339 accessible: HostAlignedByteCount,
340 static_size: usize,
341 ) -> Self {
342 MemoryImageSlot {
343 base,
344 static_size,
345 accessible,
346 image: None,
347 dirty: false,
348 clear_on_drop: true,
349 }
350 }
351
352 /// Inform the MemoryImageSlot that it should *not* clear the underlying
353 /// address space when dropped. This should be used only when the
354 /// caller will clear or reuse the address space in some other
355 /// way.
356 pub(crate) fn no_clear_on_drop(&mut self) {
357 self.clear_on_drop = false;
358 }
359
360 pub(crate) fn set_heap_limit(&mut self, size_bytes: usize) -> Result<()> {
361 let size_bytes_aligned = HostAlignedByteCount::new_rounded_up(size_bytes)?;
362 assert!(size_bytes <= self.static_size);
363 assert!(size_bytes_aligned.byte_count() <= self.static_size);
364
365 // If the heap limit already addresses accessible bytes then no syscalls
366 // are necessary since the data is already mapped into the process and
367 // waiting to go.
368 //
369 // This is used for "dynamic" memories where memory is not always
370 // decommitted during recycling (but it's still always reset).
371 if size_bytes_aligned <= self.accessible {
372 return Ok(());
373 }
374
375 // Otherwise use `mprotect` to make the new pages read/write.
376 self.set_protection(self.accessible..size_bytes_aligned, true)?;
377 self.accessible = size_bytes_aligned;
378
379 Ok(())
380 }
381
382 /// Prepares this slot for the instantiation of a new instance with the
383 /// provided linear memory image.
384 ///
385 /// The `initial_size_bytes` parameter indicates the required initial size
386 /// of the heap for the instance. The `maybe_image` is an optional initial
387 /// image for linear memory to contains. The `style` is the way compiled
388 /// code will be accessing this memory.
389 ///
390 /// The purpose of this method is to take a previously pristine slot
391 /// (`!self.dirty`) and transform its prior state into state necessary for
392 /// the given parameters. This could include, for example:
393 ///
394 /// * More memory may be made read/write if `initial_size_bytes` is larger
395 /// than `self.accessible`.
396 /// * For `MemoryStyle::Static` linear memory may be made `PROT_NONE` if
397 /// `self.accessible` is larger than `initial_size_bytes`.
398 /// * If no image was previously in place or if the wrong image was
399 /// previously in place then `mmap` may be used to setup the initial
400 /// image.
401 pub(crate) fn instantiate(
402 &mut self,
403 initial_size_bytes: usize,
404 maybe_image: Option<&Arc<MemoryImage>>,
405 ty: &wasmtime_environ::Memory,
406 tunables: &Tunables,
407 ) -> Result<()> {
408 assert!(!self.dirty);
409 assert!(initial_size_bytes <= self.static_size);
410 let initial_size_bytes_page_aligned =
411 HostAlignedByteCount::new_rounded_up(initial_size_bytes)?;
412
413 // First order of business is to blow away the previous linear memory
414 // image if it doesn't match the image specified here. If one is
415 // detected then it's reset with anonymous memory which means that all
416 // of memory up to `self.accessible` will now be read/write and zero.
417 //
418 // Note that this intentionally a "small mmap" which only covers the
419 // extent of the prior initialization image in order to preserve
420 // resident memory that might come before or after the image.
421 if self.image.as_ref() != maybe_image {
422 self.remove_image()?;
423 }
424
425 // The next order of business is to ensure that `self.accessible` is
426 // appropriate. First up is to grow the read/write portion of memory if
427 // it's not large enough to accommodate `initial_size_bytes`.
428 if self.accessible < initial_size_bytes_page_aligned {
429 self.set_protection(self.accessible..initial_size_bytes_page_aligned, true)?;
430 self.accessible = initial_size_bytes_page_aligned;
431 }
432
433 // If (1) the accessible region is not in its initial state, and (2) the
434 // memory relies on virtual memory at all (i.e. has offset guard
435 // pages), then we need to reset memory protections. Put another way,
436 // the only time it is safe to not reset protections is when we are
437 // using dynamic memory without any guard pages.
438 let host_page_size_log2 = u8::try_from(host_page_size().ilog2()).unwrap();
439 if initial_size_bytes_page_aligned < self.accessible
440 && (tunables.memory_guard_size > 0
441 || ty.can_elide_bounds_check(tunables, host_page_size_log2))
442 {
443 self.set_protection(initial_size_bytes_page_aligned..self.accessible, false)?;
444 self.accessible = initial_size_bytes_page_aligned;
445 }
446
447 // Now that memory is sized appropriately the final operation is to
448 // place the new image into linear memory. Note that this operation is
449 // skipped if `self.image` matches `maybe_image`.
450 assert!(initial_size_bytes <= self.accessible.byte_count());
451 assert!(initial_size_bytes_page_aligned <= self.accessible);
452 if self.image.as_ref() != maybe_image {
453 if let Some(image) = maybe_image.as_ref() {
454 assert!(
455 image
456 .linear_memory_offset
457 .checked_add(image.len)
458 .unwrap()
459 .byte_count()
460 <= initial_size_bytes
461 );
462 if !image.len.is_zero() {
463 unsafe {
464 image.map_at(&self.base)?;
465 }
466 }
467 }
468 self.image = maybe_image.cloned();
469 }
470
471 // Flag ourselves as `dirty` which means that the next operation on this
472 // slot is required to be `clear_and_remain_ready`.
473 self.dirty = true;
474
475 Ok(())
476 }
477
478 pub(crate) fn remove_image(&mut self) -> Result<()> {
479 if let Some(image) = &self.image {
480 unsafe {
481 image.remap_as_zeros_at(self.base.as_mut_ptr())?;
482 }
483 self.image = None;
484 }
485 Ok(())
486 }
487
488 /// Resets this linear memory slot back to a "pristine state".
489 ///
490 /// This will reset the memory back to its original contents on Linux or
491 /// reset the contents back to zero on other platforms. The `keep_resident`
492 /// argument is the maximum amount of memory to keep resident in this
493 /// process's memory on Linux. Up to that much memory will be `memset` to
494 /// zero where the rest of it will be reset or released with `madvise`.
495 #[allow(dead_code)] // ignore warnings as this is only used in some cfgs
496 pub(crate) fn clear_and_remain_ready(
497 &mut self,
498 keep_resident: HostAlignedByteCount,
499 decommit: impl FnMut(*mut u8, usize),
500 ) -> Result<()> {
501 assert!(self.dirty);
502
503 unsafe {
504 self.reset_all_memory_contents(keep_resident, decommit)?;
505 }
506
507 self.dirty = false;
508 Ok(())
509 }
510
511 #[allow(dead_code)] // ignore warnings as this is only used in some cfgs
512 unsafe fn reset_all_memory_contents(
513 &mut self,
514 keep_resident: HostAlignedByteCount,
515 decommit: impl FnMut(*mut u8, usize),
516 ) -> Result<()> {
517 match vm::decommit_behavior() {
518 DecommitBehavior::Zero => {
519 // If we're not on Linux then there's no generic platform way to
520 // reset memory back to its original state, so instead reset memory
521 // back to entirely zeros with an anonymous backing.
522 //
523 // Additionally the previous image, if any, is dropped here
524 // since it's no longer applicable to this mapping.
525 self.reset_with_anon_memory()
526 }
527 DecommitBehavior::RestoreOriginalMapping => {
528 self.reset_with_original_mapping(keep_resident, decommit);
529 Ok(())
530 }
531 }
532 }
533
534 #[allow(dead_code)] // ignore warnings as this is only used in some cfgs
535 unsafe fn reset_with_original_mapping(
536 &mut self,
537 keep_resident: HostAlignedByteCount,
538 mut decommit: impl FnMut(*mut u8, usize),
539 ) {
540 match &self.image {
541 Some(image) => {
542 if image.linear_memory_offset < keep_resident {
543 // If the image starts below the `keep_resident` then
544 // memory looks something like this:
545 //
546 // up to `keep_resident` bytes
547 // |
548 // +--------------------------+ remaining_memset
549 // | | /
550 // <--------------> <------->
551 //
552 // image_end
553 // 0 linear_memory_offset | accessible
554 // | | | |
555 // +----------------+--------------+---------+--------+
556 // | dirty memory | image | dirty memory |
557 // +----------------+--------------+---------+--------+
558 //
559 // <------+-------> <-----+-----> <---+---> <--+--->
560 // | | | |
561 // | | | |
562 // memset (1) / | madvise (4)
563 // mmadvise (2) /
564 // /
565 // memset (3)
566 //
567 //
568 // In this situation there are two disjoint regions that are
569 // `memset` manually to zero. Note that `memset (3)` may be
570 // zero bytes large. Furthermore `madvise (4)` may also be
571 // zero bytes large.
572
573 let image_end = image
574 .linear_memory_offset
575 .checked_add(image.len)
576 .expect("image is in bounds");
577 let mem_after_image = self
578 .accessible
579 .checked_sub(image_end)
580 .expect("image_end falls before self.accessible");
581 let excess = keep_resident
582 .checked_sub(image.linear_memory_offset)
583 .expect(
584 "if statement checks that keep_resident > image.linear_memory_offset",
585 );
586 let remaining_memset = excess.min(mem_after_image);
587
588 // This is memset (1)
589 ptr::write_bytes(
590 self.base.as_mut_ptr(),
591 0u8,
592 image.linear_memory_offset.byte_count(),
593 );
594
595 // This is madvise (2)
596 self.restore_original_mapping(
597 image.linear_memory_offset,
598 image.len,
599 &mut decommit,
600 );
601
602 // This is memset (3)
603 ptr::write_bytes(
604 self.base.as_mut_ptr().add(image_end.byte_count()),
605 0u8,
606 remaining_memset.byte_count(),
607 );
608
609 // This is madvise (4)
610 self.restore_original_mapping(
611 image_end
612 .checked_add(remaining_memset)
613 .expect("image_end + remaining_memset is in bounds"),
614 mem_after_image
615 .checked_sub(remaining_memset)
616 .expect("remaining_memset defined to be <= mem_after_image"),
617 &mut decommit,
618 );
619 } else {
620 // If the image starts after the `keep_resident` threshold
621 // then we memset the start of linear memory and then use
622 // madvise below for the rest of it, including the image.
623 //
624 // 0 keep_resident accessible
625 // | | |
626 // +----------------+---+----------+------------------+
627 // | dirty memory | image | dirty memory |
628 // +----------------+---+----------+------------------+
629 //
630 // <------+-------> <-------------+----------------->
631 // | |
632 // | |
633 // memset (1) madvise (2)
634 //
635 // Here only a single memset is necessary since the image
636 // started after the threshold which we're keeping resident.
637 // Note that the memset may be zero bytes here.
638
639 // This is memset (1)
640 ptr::write_bytes(self.base.as_mut_ptr(), 0u8, keep_resident.byte_count());
641
642 // This is madvise (2)
643 self.restore_original_mapping(
644 keep_resident,
645 self.accessible
646 .checked_sub(keep_resident)
647 .expect("keep_resident is a subset of accessible memory"),
648 decommit,
649 );
650 };
651 }
652
653 // If there's no memory image for this slot then memset the first
654 // bytes in the memory back to zero while using `madvise` to purge
655 // the rest.
656 None => {
657 let size_to_memset = keep_resident.min(self.accessible);
658 ptr::write_bytes(self.base.as_mut_ptr(), 0u8, size_to_memset.byte_count());
659 self.restore_original_mapping(
660 size_to_memset,
661 self.accessible
662 .checked_sub(size_to_memset)
663 .expect("size_to_memset is defined to be <= self.accessible"),
664 decommit,
665 );
666 }
667 }
668 }
669
670 #[allow(dead_code)] // ignore warnings as this is only used in some cfgs
671 unsafe fn restore_original_mapping(
672 &self,
673 base: HostAlignedByteCount,
674 len: HostAlignedByteCount,
675 mut decommit: impl FnMut(*mut u8, usize),
676 ) {
677 assert!(base.checked_add(len).unwrap() <= self.accessible);
678 if len == 0 {
679 return;
680 }
681
682 assert_eq!(
683 vm::decommit_behavior(),
684 DecommitBehavior::RestoreOriginalMapping
685 );
686 decommit(
687 self.base.as_mut_ptr().add(base.byte_count()),
688 len.byte_count(),
689 );
690 }
691
692 fn set_protection(&self, range: Range<HostAlignedByteCount>, readwrite: bool) -> Result<()> {
693 let len = range
694 .end
695 .checked_sub(range.start)
696 .expect("range.start <= range.end");
697 assert!(range.end.byte_count() <= self.static_size);
698 if len.is_zero() {
699 return Ok(());
700 }
701
702 // TODO: use Mmap to change memory permissions instead of these free
703 // functions.
704 unsafe {
705 let start = self.base.as_mut_ptr().add(range.start.byte_count());
706 if readwrite {
707 vm::expose_existing_mapping(start, len.byte_count())?;
708 } else {
709 vm::hide_existing_mapping(start, len.byte_count())?;
710 }
711 }
712
713 Ok(())
714 }
715
716 pub(crate) fn has_image(&self) -> bool {
717 self.image.is_some()
718 }
719
720 #[allow(dead_code)] // ignore warnings as this is only used in some cfgs
721 pub(crate) fn is_dirty(&self) -> bool {
722 self.dirty
723 }
724
725 /// Map anonymous zeroed memory across the whole slot,
726 /// inaccessible. Used both during instantiate and during drop.
727 fn reset_with_anon_memory(&mut self) -> Result<()> {
728 if self.static_size == 0 {
729 assert!(self.image.is_none());
730 assert_eq!(self.accessible, 0);
731 return Ok(());
732 }
733
734 unsafe {
735 vm::erase_existing_mapping(self.base.as_mut_ptr(), self.static_size)?;
736 }
737
738 self.image = None;
739 self.accessible = HostAlignedByteCount::ZERO;
740
741 Ok(())
742 }
743}
744
745impl Drop for MemoryImageSlot {
746 fn drop(&mut self) {
747 // The MemoryImageSlot may be dropped if there is an error during
748 // instantiation: for example, if a memory-growth limiter
749 // disallows a guest from having a memory of a certain size,
750 // after we've already initialized the MemoryImageSlot.
751 //
752 // We need to return this region of the large pool mmap to a
753 // safe state (with no module-specific mappings). The
754 // MemoryImageSlot will not be returned to the MemoryPool, so a new
755 // MemoryImageSlot will be created and overwrite the mappings anyway
756 // on the slot's next use; but for safety and to avoid
757 // resource leaks it's better not to have stale mappings to a
758 // possibly-otherwise-dead module's image.
759 //
760 // To "wipe the slate clean", let's do a mmap of anonymous
761 // memory over the whole region, with PROT_NONE. Note that we
762 // *can't* simply munmap, because that leaves a hole in the
763 // middle of the pooling allocator's big memory area that some
764 // other random mmap may swoop in and take, to be trampled
765 // over by the next MemoryImageSlot later.
766 //
767 // Since we're in drop(), we can't sanely return an error if
768 // this mmap fails. Instead though the result is unwrapped here to
769 // trigger a panic if something goes wrong. Otherwise if this
770 // reset-the-mapping fails then on reuse it might be possible, depending
771 // on precisely where errors happened, that stale memory could get
772 // leaked through.
773 //
774 // The exception to all of this is if the `clear_on_drop` flag
775 // (which is set by default) is false. If so, the owner of
776 // this MemoryImageSlot has indicated that it will clean up in some
777 // other way.
778 if self.clear_on_drop {
779 self.reset_with_anon_memory().unwrap();
780 }
781 }
782}
783
784#[cfg(all(test, target_os = "linux", not(miri)))]
785mod test {
786 use super::*;
787 use crate::runtime::vm::mmap::{AlignedLength, Mmap};
788 use crate::runtime::vm::sys::vm::decommit_pages;
789 use crate::runtime::vm::{host_page_size, HostAlignedByteCount};
790 use std::sync::Arc;
791 use wasmtime_environ::{IndexType, Limits, Memory};
792
793 fn create_memfd_with_data(offset: usize, data: &[u8]) -> Result<MemoryImage> {
794 // offset must be a multiple of the page size.
795 let linear_memory_offset =
796 HostAlignedByteCount::new(offset).expect("offset is page-aligned");
797 // The image length is rounded up to the nearest page size
798 let image_len = HostAlignedByteCount::new_rounded_up(data.len()).unwrap();
799
800 Ok(MemoryImage {
801 source: MemoryImageSource::from_data(data)?.unwrap(),
802 len: image_len,
803 source_offset: 0,
804 linear_memory_offset,
805 })
806 }
807
808 fn dummy_memory() -> Memory {
809 Memory {
810 idx_type: IndexType::I32,
811 limits: Limits { min: 0, max: None },
812 shared: false,
813 page_size_log2: Memory::DEFAULT_PAGE_SIZE_LOG2,
814 }
815 }
816
817 fn mmap_4mib_inaccessible() -> Arc<Mmap<AlignedLength>> {
818 let four_mib = HostAlignedByteCount::new(4 << 20).expect("4 MiB is page aligned");
819 Arc::new(Mmap::accessible_reserved(HostAlignedByteCount::ZERO, four_mib).unwrap())
820 }
821
822 /// Presents a part of an mmap as a mutable slice within a callback.
823 ///
824 /// The callback ensures that the reference no longer lives after the
825 /// function is done.
826 ///
827 /// # Safety
828 ///
829 /// The caller must ensure that during this function call, the only way this
830 /// region of memory is not accessed by (read from or written to) is via the
831 /// reference. Making the callback `'static` goes some way towards ensuring
832 /// that, but it's still possible to squirrel away a reference into global
833 /// state. So don't do that.
834 unsafe fn with_slice_mut(
835 mmap: &Arc<Mmap<AlignedLength>>,
836 range: Range<usize>,
837 f: impl FnOnce(&mut [u8]) + 'static,
838 ) {
839 let ptr = mmap.as_ptr().cast_mut();
840 let slice = unsafe {
841 core::slice::from_raw_parts_mut(ptr.add(range.start), range.end - range.start)
842 };
843 f(slice);
844 }
845
846 #[test]
847 fn instantiate_no_image() {
848 let ty = dummy_memory();
849 let tunables = Tunables {
850 memory_reservation: 4 << 30,
851 ..Tunables::default_miri()
852 };
853 // 4 MiB mmap'd area, not accessible
854 let mmap = mmap_4mib_inaccessible();
855 // Create a MemoryImageSlot on top of it
856 let mut memfd =
857 MemoryImageSlot::create(mmap.zero_offset(), HostAlignedByteCount::ZERO, 4 << 20);
858 memfd.no_clear_on_drop();
859 assert!(!memfd.is_dirty());
860 // instantiate with 64 KiB initial size
861 memfd.instantiate(64 << 10, None, &ty, &tunables).unwrap();
862 assert!(memfd.is_dirty());
863
864 // We should be able to access this 64 KiB (try both ends) and
865 // it should consist of zeroes.
866 unsafe {
867 with_slice_mut(&mmap, 0..65536, |slice| {
868 assert_eq!(0, slice[0]);
869 assert_eq!(0, slice[65535]);
870 slice[1024] = 42;
871 assert_eq!(42, slice[1024]);
872 });
873 }
874
875 // grow the heap
876 memfd.set_heap_limit(128 << 10).unwrap();
877 let slice = unsafe { mmap.slice(0..1 << 20) };
878 assert_eq!(42, slice[1024]);
879 assert_eq!(0, slice[131071]);
880 // instantiate again; we should see zeroes, even as the
881 // reuse-anon-mmap-opt kicks in
882 memfd
883 .clear_and_remain_ready(HostAlignedByteCount::ZERO, |ptr, len| unsafe {
884 decommit_pages(ptr, len).unwrap()
885 })
886 .unwrap();
887 assert!(!memfd.is_dirty());
888 memfd.instantiate(64 << 10, None, &ty, &tunables).unwrap();
889 let slice = unsafe { mmap.slice(0..65536) };
890 assert_eq!(0, slice[1024]);
891 }
892
893 #[test]
894 fn instantiate_image() {
895 let page_size = host_page_size();
896 let ty = dummy_memory();
897 let tunables = Tunables {
898 memory_reservation: 4 << 30,
899 ..Tunables::default_miri()
900 };
901 // 4 MiB mmap'd area, not accessible
902 let mmap = mmap_4mib_inaccessible();
903 // Create a MemoryImageSlot on top of it
904 let mut memfd =
905 MemoryImageSlot::create(mmap.zero_offset(), HostAlignedByteCount::ZERO, 4 << 20);
906 memfd.no_clear_on_drop();
907 // Create an image with some data.
908 let image = Arc::new(create_memfd_with_data(page_size, &[1, 2, 3, 4]).unwrap());
909 // Instantiate with this image
910 memfd
911 .instantiate(64 << 10, Some(&image), &ty, &tunables)
912 .unwrap();
913 assert!(memfd.has_image());
914
915 unsafe {
916 with_slice_mut(&mmap, 0..65536, move |slice| {
917 assert_eq!(&[1, 2, 3, 4], &slice[page_size..][..4]);
918 slice[page_size] = 5;
919 });
920 }
921
922 // Clear and re-instantiate same image
923 memfd
924 .clear_and_remain_ready(HostAlignedByteCount::ZERO, |ptr, len| unsafe {
925 decommit_pages(ptr, len).unwrap()
926 })
927 .unwrap();
928 memfd
929 .instantiate(64 << 10, Some(&image), &ty, &tunables)
930 .unwrap();
931 let slice = unsafe { mmap.slice(0..65536) };
932 assert_eq!(&[1, 2, 3, 4], &slice[page_size..][..4]);
933
934 // Clear and re-instantiate no image
935 memfd
936 .clear_and_remain_ready(HostAlignedByteCount::ZERO, |ptr, len| unsafe {
937 decommit_pages(ptr, len).unwrap()
938 })
939 .unwrap();
940 memfd.instantiate(64 << 10, None, &ty, &tunables).unwrap();
941 assert!(!memfd.has_image());
942 let slice = unsafe { mmap.slice(0..65536) };
943 assert_eq!(&[0, 0, 0, 0], &slice[page_size..][..4]);
944
945 // Clear and re-instantiate image again
946 memfd
947 .clear_and_remain_ready(HostAlignedByteCount::ZERO, |ptr, len| unsafe {
948 decommit_pages(ptr, len).unwrap()
949 })
950 .unwrap();
951 memfd
952 .instantiate(64 << 10, Some(&image), &ty, &tunables)
953 .unwrap();
954 let slice = unsafe { mmap.slice(0..65536) };
955 assert_eq!(&[1, 2, 3, 4], &slice[page_size..][..4]);
956
957 // Create another image with different data.
958 let image2 = Arc::new(create_memfd_with_data(page_size, &[10, 11, 12, 13]).unwrap());
959 memfd
960 .clear_and_remain_ready(HostAlignedByteCount::ZERO, |ptr, len| unsafe {
961 decommit_pages(ptr, len).unwrap()
962 })
963 .unwrap();
964 memfd
965 .instantiate(128 << 10, Some(&image2), &ty, &tunables)
966 .unwrap();
967 let slice = unsafe { mmap.slice(0..65536) };
968 assert_eq!(&[10, 11, 12, 13], &slice[page_size..][..4]);
969
970 // Instantiate the original image again; we should notice it's
971 // a different image and not reuse the mappings.
972 memfd
973 .clear_and_remain_ready(HostAlignedByteCount::ZERO, |ptr, len| unsafe {
974 decommit_pages(ptr, len).unwrap()
975 })
976 .unwrap();
977 memfd
978 .instantiate(64 << 10, Some(&image), &ty, &tunables)
979 .unwrap();
980 let slice = unsafe { mmap.slice(0..65536) };
981 assert_eq!(&[1, 2, 3, 4], &slice[page_size..][..4]);
982 }
983
984 #[test]
985 #[cfg(target_os = "linux")]
986 fn memset_instead_of_madvise() {
987 let page_size = host_page_size();
988 let ty = dummy_memory();
989 let tunables = Tunables {
990 memory_reservation: 100 << 16,
991 ..Tunables::default_miri()
992 };
993 let mmap = mmap_4mib_inaccessible();
994 let mut memfd =
995 MemoryImageSlot::create(mmap.zero_offset(), HostAlignedByteCount::ZERO, 4 << 20);
996 memfd.no_clear_on_drop();
997
998 // Test basics with the image
999 for image_off in [0, page_size, page_size * 2] {
1000 let image = Arc::new(create_memfd_with_data(image_off, &[1, 2, 3, 4]).unwrap());
1001 for amt_to_memset in [0, page_size, page_size * 10, 1 << 20, 10 << 20] {
1002 let amt_to_memset = HostAlignedByteCount::new(amt_to_memset).unwrap();
1003 memfd
1004 .instantiate(64 << 10, Some(&image), &ty, &tunables)
1005 .unwrap();
1006 assert!(memfd.has_image());
1007
1008 unsafe {
1009 with_slice_mut(&mmap, 0..64 << 10, move |slice| {
1010 if image_off > 0 {
1011 assert_eq!(slice[image_off - 1], 0);
1012 }
1013 assert_eq!(slice[image_off + 5], 0);
1014 assert_eq!(&[1, 2, 3, 4], &slice[image_off..][..4]);
1015 slice[image_off] = 5;
1016 assert_eq!(&[5, 2, 3, 4], &slice[image_off..][..4]);
1017 })
1018 };
1019
1020 memfd
1021 .clear_and_remain_ready(amt_to_memset, |ptr, len| unsafe {
1022 decommit_pages(ptr, len).unwrap()
1023 })
1024 .unwrap();
1025 }
1026 }
1027
1028 // Test without an image
1029 for amt_to_memset in [0, page_size, page_size * 10, 1 << 20, 10 << 20] {
1030 let amt_to_memset = HostAlignedByteCount::new(amt_to_memset).unwrap();
1031 memfd.instantiate(64 << 10, None, &ty, &tunables).unwrap();
1032
1033 unsafe {
1034 with_slice_mut(&mmap, 0..64 << 10, |slice| {
1035 for chunk in slice.chunks_mut(1024) {
1036 assert_eq!(chunk[0], 0);
1037 chunk[0] = 5;
1038 }
1039 });
1040 }
1041 memfd
1042 .clear_and_remain_ready(amt_to_memset, |ptr, len| unsafe {
1043 decommit_pages(ptr, len).unwrap()
1044 })
1045 .unwrap();
1046 }
1047 }
1048
1049 #[test]
1050 #[cfg(target_os = "linux")]
1051 fn dynamic() {
1052 let page_size = host_page_size();
1053 let ty = dummy_memory();
1054 let tunables = Tunables {
1055 memory_reservation: 0,
1056 memory_reservation_for_growth: 200,
1057 ..Tunables::default_miri()
1058 };
1059
1060 let mmap = mmap_4mib_inaccessible();
1061 let mut memfd =
1062 MemoryImageSlot::create(mmap.zero_offset(), HostAlignedByteCount::ZERO, 4 << 20);
1063 memfd.no_clear_on_drop();
1064 let image = Arc::new(create_memfd_with_data(page_size, &[1, 2, 3, 4]).unwrap());
1065 let initial = 64 << 10;
1066
1067 // Instantiate the image and test that memory remains accessible after
1068 // it's cleared.
1069 memfd
1070 .instantiate(initial, Some(&image), &ty, &tunables)
1071 .unwrap();
1072 assert!(memfd.has_image());
1073
1074 unsafe {
1075 with_slice_mut(&mmap, 0..(64 << 10) + page_size, move |slice| {
1076 assert_eq!(&[1, 2, 3, 4], &slice[page_size..][..4]);
1077 slice[page_size] = 5;
1078 assert_eq!(&[5, 2, 3, 4], &slice[page_size..][..4]);
1079 });
1080 }
1081
1082 memfd
1083 .clear_and_remain_ready(HostAlignedByteCount::ZERO, |ptr, len| unsafe {
1084 decommit_pages(ptr, len).unwrap()
1085 })
1086 .unwrap();
1087 let slice = unsafe { mmap.slice(0..(64 << 10) + page_size) };
1088 assert_eq!(&[1, 2, 3, 4], &slice[page_size..][..4]);
1089
1090 // Re-instantiate make sure it preserves memory. Grow a bit and set data
1091 // beyond the initial size.
1092 memfd
1093 .instantiate(initial, Some(&image), &ty, &tunables)
1094 .unwrap();
1095 assert_eq!(&[1, 2, 3, 4], &slice[page_size..][..4]);
1096
1097 memfd.set_heap_limit(initial * 2).unwrap();
1098
1099 unsafe {
1100 with_slice_mut(&mmap, 0..(64 << 10) + page_size, move |slice| {
1101 assert_eq!(&[0, 0], &slice[initial..initial + 2]);
1102 slice[initial] = 100;
1103 assert_eq!(&[100, 0], &slice[initial..initial + 2]);
1104 });
1105 }
1106
1107 memfd
1108 .clear_and_remain_ready(HostAlignedByteCount::ZERO, |ptr, len| unsafe {
1109 decommit_pages(ptr, len).unwrap()
1110 })
1111 .unwrap();
1112
1113 // Test that memory is still accessible, but it's been reset
1114 assert_eq!(&[0, 0], &slice[initial..initial + 2]);
1115
1116 // Instantiate again, and again memory beyond the initial size should
1117 // still be accessible. Grow into it again and make sure it works.
1118 memfd
1119 .instantiate(initial, Some(&image), &ty, &tunables)
1120 .unwrap();
1121 assert_eq!(&[0, 0], &slice[initial..initial + 2]);
1122 memfd.set_heap_limit(initial * 2).unwrap();
1123
1124 unsafe {
1125 with_slice_mut(&mmap, 0..(64 << 10) + page_size, move |slice| {
1126 assert_eq!(&[0, 0], &slice[initial..initial + 2]);
1127 slice[initial] = 100;
1128 assert_eq!(&[100, 0], &slice[initial..initial + 2]);
1129 });
1130 }
1131
1132 memfd
1133 .clear_and_remain_ready(HostAlignedByteCount::ZERO, |ptr, len| unsafe {
1134 decommit_pages(ptr, len).unwrap()
1135 })
1136 .unwrap();
1137
1138 // Reset the image to none and double-check everything is back to zero
1139 memfd.instantiate(64 << 10, None, &ty, &tunables).unwrap();
1140 assert!(!memfd.has_image());
1141 assert_eq!(&[0, 0, 0, 0], &slice[page_size..][..4]);
1142 assert_eq!(&[0, 0], &slice[initial..initial + 2]);
1143 }
1144}