wasmtime/runtime/vm/cow.rs
1//! Copy-on-write initialization support: creation of backing images for
2//! modules, and logic to support mapping these backing images into memory.
3
4use super::sys::DecommitBehavior;
5use crate::prelude::*;
6use crate::runtime::vm::sys::vm::{self, MemoryImageSource};
7use crate::runtime::vm::{HostAlignedByteCount, MmapOffset, MmapVec, host_page_size};
8use alloc::sync::Arc;
9use core::ops::Range;
10use core::ptr;
11use wasmtime_environ::{DefinedMemoryIndex, MemoryInitialization, Module, PrimaryMap, Tunables};
12
13/// Backing images for memories in a module.
14///
15/// This is meant to be built once, when a module is first loaded/constructed,
16/// and then used many times for instantiation.
17pub struct ModuleMemoryImages {
18 memories: PrimaryMap<DefinedMemoryIndex, Option<Arc<MemoryImage>>>,
19}
20
21impl ModuleMemoryImages {
22 /// Get the MemoryImage for a given memory.
23 pub fn get_memory_image(&self, defined_index: DefinedMemoryIndex) -> Option<&Arc<MemoryImage>> {
24 self.memories[defined_index].as_ref()
25 }
26}
27
28/// One backing image for one memory.
29#[derive(Debug, PartialEq)]
30pub struct MemoryImage {
31 /// The platform-specific source of this image.
32 ///
33 /// This might be a mapped `*.cwasm` file or on Unix it could also be a
34 /// `Memfd` as an anonymous file in memory on Linux. In either case this is
35 /// used as the backing-source for the CoW image.
36 source: MemoryImageSource,
37
38 /// Length of image, in bytes.
39 ///
40 /// Note that initial memory size may be larger; leading and trailing zeroes
41 /// are truncated (handled by backing fd).
42 ///
43 /// Must be a multiple of the system page size.
44 len: HostAlignedByteCount,
45
46 /// Image starts this many bytes into `source`.
47 ///
48 /// This is 0 for anonymous-backed memfd files and is the offset of the
49 /// data section in a `*.cwasm` file for `*.cwasm`-backed images.
50 ///
51 /// Must be a multiple of the system page size.
52 ///
53 /// ## Notes
54 ///
55 /// This currently isn't a `HostAlignedByteCount` because that's a usize and
56 /// this, being a file offset, is a u64.
57 source_offset: u64,
58
59 /// Image starts this many bytes into heap space.
60 ///
61 /// Must be a multiple of the system page size.
62 linear_memory_offset: HostAlignedByteCount,
63}
64
65impl MemoryImage {
66 fn new(
67 page_size: u32,
68 linear_memory_offset: HostAlignedByteCount,
69 data: &[u8],
70 mmap: Option<&MmapVec>,
71 ) -> Result<Option<MemoryImage>> {
72 let assert_page_aligned = |val: usize| {
73 assert_eq!(val % (page_size as usize), 0);
74 };
75 // Sanity-check that various parameters are page-aligned.
76 let len = HostAlignedByteCount::new(data.len()).expect("memory image data is page-aligned");
77
78 // If a backing `mmap` is present then `data` should be a sub-slice of
79 // the `mmap`. The sanity-checks here double-check that. Additionally
80 // compilation should have ensured that the `data` section is
81 // page-aligned within `mmap`, so that's also all double-checked here.
82 //
83 // Finally if the `mmap` itself comes from a backing file on disk, such
84 // as a `*.cwasm` file, then that's a valid source of data for the
85 // memory image so we simply return referencing that.
86 //
87 // Note that this path is platform-agnostic in the sense of all
88 // platforms we support support memory mapping copy-on-write data from
89 // files, but for now this is still a Linux-specific region of Wasmtime.
90 // Some work will be needed to get this file compiling for macOS and
91 // Windows.
92 if let Some(mmap) = mmap {
93 let start = mmap.as_ptr() as usize;
94 let end = start + mmap.len();
95 let data_start = data.as_ptr() as usize;
96 let data_end = data_start + data.len();
97 assert!(start <= data_start && data_end <= end);
98 assert_page_aligned(start);
99 assert_page_aligned(data_start);
100 assert_page_aligned(data_end);
101
102 #[cfg(feature = "std")]
103 if let Some(file) = mmap.original_file() {
104 if let Some(source) = MemoryImageSource::from_file(file) {
105 return Ok(Some(MemoryImage {
106 source,
107 source_offset: u64::try_from(data_start - start).unwrap(),
108 linear_memory_offset,
109 len,
110 }));
111 }
112 }
113 }
114
115 // If `mmap` doesn't come from a file then platform-specific mechanisms
116 // may be used to place the data in a form that's amenable to an mmap.
117 if let Some(source) = MemoryImageSource::from_data(data)? {
118 return Ok(Some(MemoryImage {
119 source,
120 source_offset: 0,
121 linear_memory_offset,
122 len,
123 }));
124 }
125
126 Ok(None)
127 }
128
129 unsafe fn map_at(&self, mmap_base: &MmapOffset) -> Result<()> {
130 mmap_base.map_image_at(
131 &self.source,
132 self.source_offset,
133 self.linear_memory_offset,
134 self.len,
135 )
136 }
137
138 unsafe fn remap_as_zeros_at(&self, base: *mut u8) -> Result<()> {
139 self.source.remap_as_zeros_at(
140 base.add(self.linear_memory_offset.byte_count()),
141 self.len.byte_count(),
142 )?;
143 Ok(())
144 }
145}
146
147impl ModuleMemoryImages {
148 /// Create a new `ModuleMemoryImages` for the given module. This can be
149 /// passed in as part of a `InstanceAllocationRequest` to speed up
150 /// instantiation and execution by using copy-on-write-backed memories.
151 pub fn new(
152 module: &Module,
153 wasm_data: &[u8],
154 mmap: Option<&MmapVec>,
155 ) -> Result<Option<ModuleMemoryImages>> {
156 let map = match &module.memory_initialization {
157 MemoryInitialization::Static { map } => map,
158 _ => return Ok(None),
159 };
160 let mut memories = PrimaryMap::with_capacity(map.len());
161 let page_size = crate::runtime::vm::host_page_size();
162 let page_size = u32::try_from(page_size).unwrap();
163 for (memory_index, init) in map {
164 // mmap-based-initialization only works for defined memories with a
165 // known starting point of all zeros, so bail out if the mmeory is
166 // imported.
167 let defined_memory = match module.defined_memory_index(memory_index) {
168 Some(idx) => idx,
169 None => return Ok(None),
170 };
171
172 // If there's no initialization for this memory known then we don't
173 // need an image for the memory so push `None` and move on.
174 let init = match init {
175 Some(init) => init,
176 None => {
177 memories.push(None);
178 continue;
179 }
180 };
181
182 // Get the image for this wasm module as a subslice of `wasm_data`,
183 // and then use that to try to create the `MemoryImage`. If this
184 // creation files then we fail creating `ModuleMemoryImages` since this
185 // memory couldn't be represented.
186 let data = &wasm_data[init.data.start as usize..init.data.end as usize];
187 if module.memories[memory_index]
188 .minimum_byte_size()
189 .map_or(false, |mem_initial_len| {
190 init.offset + u64::try_from(data.len()).unwrap() > mem_initial_len
191 })
192 {
193 // The image is rounded up to multiples of the host OS page
194 // size. But if Wasm is using a custom page size, the Wasm page
195 // size might be smaller than the host OS page size, and that
196 // rounding might have made the image larger than the Wasm
197 // memory's initial length. This is *probably* okay, since the
198 // rounding would have just introduced new runs of zeroes in the
199 // image, but out of an abundance of caution we don't generate
200 // CoW images in this scenario.
201 return Ok(None);
202 }
203
204 let offset_usize = match usize::try_from(init.offset) {
205 Ok(offset) => offset,
206 Err(_) => return Ok(None),
207 };
208 let offset = HostAlignedByteCount::new(offset_usize)
209 .expect("memory init offset is a multiple of the host page size");
210 let image = match MemoryImage::new(page_size, offset, data, mmap)? {
211 Some(image) => image,
212 None => return Ok(None),
213 };
214
215 let idx = memories.push(Some(Arc::new(image)));
216 assert_eq!(idx, defined_memory);
217 }
218
219 Ok(Some(ModuleMemoryImages { memories }))
220 }
221}
222
223/// Slot management of a copy-on-write image which can be reused for the pooling
224/// allocator.
225///
226/// This data structure manages a slot of linear memory, primarily in the
227/// pooling allocator, which optionally has a contiguous memory image in the
228/// middle of it. Pictorially this data structure manages a virtual memory
229/// region that looks like:
230///
231/// ```text
232/// +--------------------+-------------------+--------------+--------------+
233/// | anonymous | optional | anonymous | PROT_NONE |
234/// | zero | memory | zero | memory |
235/// | memory | image | memory | |
236/// +--------------------+-------------------+--------------+--------------+
237/// | <------+---------->
238/// |<-----+------------> \
239/// | \ image.len
240/// | \
241/// | image.linear_memory_offset
242/// |
243/// \
244/// self.base is this virtual address
245///
246/// <------------------+------------------------------------------------>
247/// \
248/// static_size
249///
250/// <------------------+---------------------------------->
251/// \
252/// accessible
253/// ```
254///
255/// When a `MemoryImageSlot` is created it's told what the `static_size` and
256/// `accessible` limits are. Initially there is assumed to be no image in linear
257/// memory.
258///
259/// When `MemoryImageSlot::instantiate` is called then the method will perform
260/// a "synchronization" to take the image from its prior state to the new state
261/// for the image specified. The first instantiation for example will mmap the
262/// heap image into place. Upon reuse of a slot nothing happens except possibly
263/// shrinking `self.accessible`. When a new image is used then the old image is
264/// mapped to anonymous zero memory and then the new image is mapped in place.
265///
266/// A `MemoryImageSlot` is either `dirty` or it isn't. When a `MemoryImageSlot`
267/// is dirty then it is assumed that any memory beneath `self.accessible` could
268/// have any value. Instantiation cannot happen into a `dirty` slot, however, so
269/// the `MemoryImageSlot::clear_and_remain_ready` returns this memory back to
270/// its original state to mark `dirty = false`. This is done by resetting all
271/// anonymous memory back to zero and the image itself back to its initial
272/// contents.
273///
274/// On Linux this is achieved with the `madvise(MADV_DONTNEED)` syscall. This
275/// syscall will release the physical pages back to the OS but retain the
276/// original mappings, effectively resetting everything back to its initial
277/// state. Non-linux platforms will replace all memory below `self.accessible`
278/// with a fresh zero'd mmap, meaning that reuse is effectively not supported.
279#[derive(Debug)]
280pub struct MemoryImageSlot {
281 /// The mmap and offset within it that contains the linear memory for this
282 /// slot.
283 base: MmapOffset,
284
285 /// The maximum static memory size which `self.accessible` can grow to.
286 static_size: usize,
287
288 /// An optional image that is currently being used in this linear memory.
289 ///
290 /// This can be `None` in which case memory is originally all zeros. When
291 /// `Some` the image describes where it's located within the image.
292 image: Option<Arc<MemoryImage>>,
293
294 /// The size of the heap that is readable and writable.
295 ///
296 /// Note that this may extend beyond the actual linear memory heap size in
297 /// the case of dynamic memories in use. Memory accesses to memory below
298 /// `self.accessible` may still page fault as pages are lazily brought in
299 /// but the faults will always be resolved by the kernel.
300 ///
301 /// Also note that this is always page-aligned.
302 accessible: HostAlignedByteCount,
303
304 /// Whether this slot may have "dirty" pages (pages written by an
305 /// instantiation). Set by `instantiate()` and cleared by
306 /// `clear_and_remain_ready()`, and used in assertions to ensure
307 /// those methods are called properly.
308 ///
309 /// Invariant: if !dirty, then this memory slot contains a clean
310 /// CoW mapping of `image`, if `Some(..)`, and anonymous-zero
311 /// memory beyond the image up to `static_size`. The addresses
312 /// from offset 0 to `self.accessible` are R+W and set to zero or the
313 /// initial image content, as appropriate. Everything between
314 /// `self.accessible` and `self.static_size` is inaccessible.
315 dirty: bool,
316
317 /// Whether this MemoryImageSlot is responsible for mapping anonymous
318 /// memory (to hold the reservation while overwriting mappings
319 /// specific to this slot) in place when it is dropped. Default
320 /// on, unless the caller knows what they are doing.
321 clear_on_drop: bool,
322}
323
324impl MemoryImageSlot {
325 /// Create a new MemoryImageSlot. Assumes that there is an anonymous
326 /// mmap backing in the given range to start.
327 ///
328 /// The `accessible` parameter describes how much of linear memory is
329 /// already mapped as R/W with all zero-bytes. The `static_size` value is
330 /// the maximum size of this image which `accessible` cannot grow beyond,
331 /// and all memory from `accessible` from `static_size` should be mapped as
332 /// `PROT_NONE` backed by zero-bytes.
333 pub(crate) fn create(
334 base: MmapOffset,
335 accessible: HostAlignedByteCount,
336 static_size: usize,
337 ) -> Self {
338 MemoryImageSlot {
339 base,
340 static_size,
341 accessible,
342 image: None,
343 dirty: false,
344 clear_on_drop: true,
345 }
346 }
347
348 /// Inform the MemoryImageSlot that it should *not* clear the underlying
349 /// address space when dropped. This should be used only when the
350 /// caller will clear or reuse the address space in some other
351 /// way.
352 pub(crate) fn no_clear_on_drop(&mut self) {
353 self.clear_on_drop = false;
354 }
355
356 pub(crate) fn set_heap_limit(&mut self, size_bytes: usize) -> Result<()> {
357 let size_bytes_aligned = HostAlignedByteCount::new_rounded_up(size_bytes)?;
358 assert!(size_bytes <= self.static_size);
359 assert!(size_bytes_aligned.byte_count() <= self.static_size);
360
361 // If the heap limit already addresses accessible bytes then no syscalls
362 // are necessary since the data is already mapped into the process and
363 // waiting to go.
364 //
365 // This is used for "dynamic" memories where memory is not always
366 // decommitted during recycling (but it's still always reset).
367 if size_bytes_aligned <= self.accessible {
368 return Ok(());
369 }
370
371 // Otherwise use `mprotect` to make the new pages read/write.
372 self.set_protection(self.accessible..size_bytes_aligned, true)?;
373 self.accessible = size_bytes_aligned;
374
375 Ok(())
376 }
377
378 /// Prepares this slot for the instantiation of a new instance with the
379 /// provided linear memory image.
380 ///
381 /// The `initial_size_bytes` parameter indicates the required initial size
382 /// of the heap for the instance. The `maybe_image` is an optional initial
383 /// image for linear memory to contains. The `style` is the way compiled
384 /// code will be accessing this memory.
385 ///
386 /// The purpose of this method is to take a previously pristine slot
387 /// (`!self.dirty`) and transform its prior state into state necessary for
388 /// the given parameters. This could include, for example:
389 ///
390 /// * More memory may be made read/write if `initial_size_bytes` is larger
391 /// than `self.accessible`.
392 /// * For `MemoryStyle::Static` linear memory may be made `PROT_NONE` if
393 /// `self.accessible` is larger than `initial_size_bytes`.
394 /// * If no image was previously in place or if the wrong image was
395 /// previously in place then `mmap` may be used to setup the initial
396 /// image.
397 pub(crate) fn instantiate(
398 &mut self,
399 initial_size_bytes: usize,
400 maybe_image: Option<&Arc<MemoryImage>>,
401 ty: &wasmtime_environ::Memory,
402 tunables: &Tunables,
403 ) -> Result<()> {
404 assert!(!self.dirty);
405 assert!(
406 initial_size_bytes <= self.static_size,
407 "initial_size_bytes <= self.static_size failed: \
408 initial_size_bytes={initial_size_bytes}, self.static_size={}",
409 self.static_size
410 );
411 let initial_size_bytes_page_aligned =
412 HostAlignedByteCount::new_rounded_up(initial_size_bytes)?;
413
414 // First order of business is to blow away the previous linear memory
415 // image if it doesn't match the image specified here. If one is
416 // detected then it's reset with anonymous memory which means that all
417 // of memory up to `self.accessible` will now be read/write and zero.
418 //
419 // Note that this intentionally a "small mmap" which only covers the
420 // extent of the prior initialization image in order to preserve
421 // resident memory that might come before or after the image.
422 if self.image.as_ref() != maybe_image {
423 self.remove_image()?;
424 }
425
426 // The next order of business is to ensure that `self.accessible` is
427 // appropriate. First up is to grow the read/write portion of memory if
428 // it's not large enough to accommodate `initial_size_bytes`.
429 if self.accessible < initial_size_bytes_page_aligned {
430 self.set_protection(self.accessible..initial_size_bytes_page_aligned, true)?;
431 self.accessible = initial_size_bytes_page_aligned;
432 }
433
434 // If (1) the accessible region is not in its initial state, and (2) the
435 // memory relies on virtual memory at all (i.e. has offset guard
436 // pages), then we need to reset memory protections. Put another way,
437 // the only time it is safe to not reset protections is when we are
438 // using dynamic memory without any guard pages.
439 let host_page_size_log2 = u8::try_from(host_page_size().ilog2()).unwrap();
440 if initial_size_bytes_page_aligned < self.accessible
441 && (tunables.memory_guard_size > 0
442 || ty.can_elide_bounds_check(tunables, host_page_size_log2))
443 {
444 self.set_protection(initial_size_bytes_page_aligned..self.accessible, false)?;
445 self.accessible = initial_size_bytes_page_aligned;
446 }
447
448 // Now that memory is sized appropriately the final operation is to
449 // place the new image into linear memory. Note that this operation is
450 // skipped if `self.image` matches `maybe_image`.
451 assert!(initial_size_bytes <= self.accessible.byte_count());
452 assert!(initial_size_bytes_page_aligned <= self.accessible);
453 if self.image.as_ref() != maybe_image {
454 if let Some(image) = maybe_image.as_ref() {
455 assert!(
456 image
457 .linear_memory_offset
458 .checked_add(image.len)
459 .unwrap()
460 .byte_count()
461 <= initial_size_bytes
462 );
463 if !image.len.is_zero() {
464 unsafe {
465 image.map_at(&self.base)?;
466 }
467 }
468 }
469 self.image = maybe_image.cloned();
470 }
471
472 // Flag ourselves as `dirty` which means that the next operation on this
473 // slot is required to be `clear_and_remain_ready`.
474 self.dirty = true;
475
476 Ok(())
477 }
478
479 pub(crate) fn remove_image(&mut self) -> Result<()> {
480 if let Some(image) = &self.image {
481 unsafe {
482 image.remap_as_zeros_at(self.base.as_mut_ptr())?;
483 }
484 self.image = None;
485 }
486 Ok(())
487 }
488
489 /// Resets this linear memory slot back to a "pristine state".
490 ///
491 /// This will reset the memory back to its original contents on Linux or
492 /// reset the contents back to zero on other platforms. The `keep_resident`
493 /// argument is the maximum amount of memory to keep resident in this
494 /// process's memory on Linux. Up to that much memory will be `memset` to
495 /// zero where the rest of it will be reset or released with `madvise`.
496 #[allow(dead_code, reason = "only used in some cfgs")]
497 pub(crate) fn clear_and_remain_ready(
498 &mut self,
499 keep_resident: HostAlignedByteCount,
500 decommit: impl FnMut(*mut u8, usize),
501 ) -> Result<()> {
502 assert!(self.dirty);
503
504 unsafe {
505 self.reset_all_memory_contents(keep_resident, decommit)?;
506 }
507
508 self.dirty = false;
509 Ok(())
510 }
511
512 #[allow(dead_code, reason = "only used in some cfgs")]
513 unsafe fn reset_all_memory_contents(
514 &mut self,
515 keep_resident: HostAlignedByteCount,
516 decommit: impl FnMut(*mut u8, usize),
517 ) -> Result<()> {
518 match vm::decommit_behavior() {
519 DecommitBehavior::Zero => {
520 // If we're not on Linux then there's no generic platform way to
521 // reset memory back to its original state, so instead reset memory
522 // back to entirely zeros with an anonymous backing.
523 //
524 // Additionally the previous image, if any, is dropped here
525 // since it's no longer applicable to this mapping.
526 self.reset_with_anon_memory()
527 }
528 DecommitBehavior::RestoreOriginalMapping => {
529 self.reset_with_original_mapping(keep_resident, decommit);
530 Ok(())
531 }
532 }
533 }
534
535 #[allow(dead_code, reason = "only used in some cfgs")]
536 unsafe fn reset_with_original_mapping(
537 &mut self,
538 keep_resident: HostAlignedByteCount,
539 mut decommit: impl FnMut(*mut u8, usize),
540 ) {
541 match &self.image {
542 Some(image) => {
543 if image.linear_memory_offset < keep_resident {
544 // If the image starts below the `keep_resident` then
545 // memory looks something like this:
546 //
547 // up to `keep_resident` bytes
548 // |
549 // +--------------------------+ remaining_memset
550 // | | /
551 // <--------------> <------->
552 //
553 // image_end
554 // 0 linear_memory_offset | accessible
555 // | | | |
556 // +----------------+--------------+---------+--------+
557 // | dirty memory | image | dirty memory |
558 // +----------------+--------------+---------+--------+
559 //
560 // <------+-------> <-----+-----> <---+---> <--+--->
561 // | | | |
562 // | | | |
563 // memset (1) / | madvise (4)
564 // mmadvise (2) /
565 // /
566 // memset (3)
567 //
568 //
569 // In this situation there are two disjoint regions that are
570 // `memset` manually to zero. Note that `memset (3)` may be
571 // zero bytes large. Furthermore `madvise (4)` may also be
572 // zero bytes large.
573
574 let image_end = image
575 .linear_memory_offset
576 .checked_add(image.len)
577 .expect("image is in bounds");
578 let mem_after_image = self
579 .accessible
580 .checked_sub(image_end)
581 .expect("image_end falls before self.accessible");
582 let excess = keep_resident
583 .checked_sub(image.linear_memory_offset)
584 .expect(
585 "if statement checks that keep_resident > image.linear_memory_offset",
586 );
587 let remaining_memset = excess.min(mem_after_image);
588
589 // This is memset (1)
590 ptr::write_bytes(
591 self.base.as_mut_ptr(),
592 0u8,
593 image.linear_memory_offset.byte_count(),
594 );
595
596 // This is madvise (2)
597 self.restore_original_mapping(
598 image.linear_memory_offset,
599 image.len,
600 &mut decommit,
601 );
602
603 // This is memset (3)
604 ptr::write_bytes(
605 self.base.as_mut_ptr().add(image_end.byte_count()),
606 0u8,
607 remaining_memset.byte_count(),
608 );
609
610 // This is madvise (4)
611 self.restore_original_mapping(
612 image_end
613 .checked_add(remaining_memset)
614 .expect("image_end + remaining_memset is in bounds"),
615 mem_after_image
616 .checked_sub(remaining_memset)
617 .expect("remaining_memset defined to be <= mem_after_image"),
618 &mut decommit,
619 );
620 } else {
621 // If the image starts after the `keep_resident` threshold
622 // then we memset the start of linear memory and then use
623 // madvise below for the rest of it, including the image.
624 //
625 // 0 keep_resident accessible
626 // | | |
627 // +----------------+---+----------+------------------+
628 // | dirty memory | image | dirty memory |
629 // +----------------+---+----------+------------------+
630 //
631 // <------+-------> <-------------+----------------->
632 // | |
633 // | |
634 // memset (1) madvise (2)
635 //
636 // Here only a single memset is necessary since the image
637 // started after the threshold which we're keeping resident.
638 // Note that the memset may be zero bytes here.
639
640 // This is memset (1)
641 ptr::write_bytes(self.base.as_mut_ptr(), 0u8, keep_resident.byte_count());
642
643 // This is madvise (2)
644 self.restore_original_mapping(
645 keep_resident,
646 self.accessible
647 .checked_sub(keep_resident)
648 .expect("keep_resident is a subset of accessible memory"),
649 decommit,
650 );
651 };
652 }
653
654 // If there's no memory image for this slot then memset the first
655 // bytes in the memory back to zero while using `madvise` to purge
656 // the rest.
657 None => {
658 let size_to_memset = keep_resident.min(self.accessible);
659 ptr::write_bytes(self.base.as_mut_ptr(), 0u8, size_to_memset.byte_count());
660 self.restore_original_mapping(
661 size_to_memset,
662 self.accessible
663 .checked_sub(size_to_memset)
664 .expect("size_to_memset is defined to be <= self.accessible"),
665 decommit,
666 );
667 }
668 }
669 }
670
671 #[allow(dead_code, reason = "only used in some cfgs")]
672 unsafe fn restore_original_mapping(
673 &self,
674 base: HostAlignedByteCount,
675 len: HostAlignedByteCount,
676 mut decommit: impl FnMut(*mut u8, usize),
677 ) {
678 assert!(base.checked_add(len).unwrap() <= self.accessible);
679 if len == 0 {
680 return;
681 }
682
683 assert_eq!(
684 vm::decommit_behavior(),
685 DecommitBehavior::RestoreOriginalMapping
686 );
687 decommit(
688 self.base.as_mut_ptr().add(base.byte_count()),
689 len.byte_count(),
690 );
691 }
692
693 fn set_protection(&self, range: Range<HostAlignedByteCount>, readwrite: bool) -> Result<()> {
694 let len = range
695 .end
696 .checked_sub(range.start)
697 .expect("range.start <= range.end");
698 assert!(range.end.byte_count() <= self.static_size);
699 if len.is_zero() {
700 return Ok(());
701 }
702
703 // TODO: use Mmap to change memory permissions instead of these free
704 // functions.
705 unsafe {
706 let start = self.base.as_mut_ptr().add(range.start.byte_count());
707 if readwrite {
708 vm::expose_existing_mapping(start, len.byte_count())?;
709 } else {
710 vm::hide_existing_mapping(start, len.byte_count())?;
711 }
712 }
713
714 Ok(())
715 }
716
717 pub(crate) fn has_image(&self) -> bool {
718 self.image.is_some()
719 }
720
721 #[allow(dead_code, reason = "only used in some cfgs")]
722 pub(crate) fn is_dirty(&self) -> bool {
723 self.dirty
724 }
725
726 /// Map anonymous zeroed memory across the whole slot,
727 /// inaccessible. Used both during instantiate and during drop.
728 fn reset_with_anon_memory(&mut self) -> Result<()> {
729 if self.static_size == 0 {
730 assert!(self.image.is_none());
731 assert_eq!(self.accessible, 0);
732 return Ok(());
733 }
734
735 unsafe {
736 vm::erase_existing_mapping(self.base.as_mut_ptr(), self.static_size)?;
737 }
738
739 self.image = None;
740 self.accessible = HostAlignedByteCount::ZERO;
741
742 Ok(())
743 }
744}
745
746impl Drop for MemoryImageSlot {
747 fn drop(&mut self) {
748 // The MemoryImageSlot may be dropped if there is an error during
749 // instantiation: for example, if a memory-growth limiter
750 // disallows a guest from having a memory of a certain size,
751 // after we've already initialized the MemoryImageSlot.
752 //
753 // We need to return this region of the large pool mmap to a
754 // safe state (with no module-specific mappings). The
755 // MemoryImageSlot will not be returned to the MemoryPool, so a new
756 // MemoryImageSlot will be created and overwrite the mappings anyway
757 // on the slot's next use; but for safety and to avoid
758 // resource leaks it's better not to have stale mappings to a
759 // possibly-otherwise-dead module's image.
760 //
761 // To "wipe the slate clean", let's do a mmap of anonymous
762 // memory over the whole region, with PROT_NONE. Note that we
763 // *can't* simply munmap, because that leaves a hole in the
764 // middle of the pooling allocator's big memory area that some
765 // other random mmap may swoop in and take, to be trampled
766 // over by the next MemoryImageSlot later.
767 //
768 // Since we're in drop(), we can't sanely return an error if
769 // this mmap fails. Instead though the result is unwrapped here to
770 // trigger a panic if something goes wrong. Otherwise if this
771 // reset-the-mapping fails then on reuse it might be possible, depending
772 // on precisely where errors happened, that stale memory could get
773 // leaked through.
774 //
775 // The exception to all of this is if the `clear_on_drop` flag
776 // (which is set by default) is false. If so, the owner of
777 // this MemoryImageSlot has indicated that it will clean up in some
778 // other way.
779 if self.clear_on_drop {
780 self.reset_with_anon_memory().unwrap();
781 }
782 }
783}
784
785#[cfg(all(test, target_os = "linux", not(miri)))]
786mod test {
787 use super::*;
788 use crate::runtime::vm::mmap::{AlignedLength, Mmap};
789 use crate::runtime::vm::sys::vm::decommit_pages;
790 use crate::runtime::vm::{HostAlignedByteCount, host_page_size};
791 use std::sync::Arc;
792 use wasmtime_environ::{IndexType, Limits, Memory};
793
794 fn create_memfd_with_data(offset: usize, data: &[u8]) -> Result<MemoryImage> {
795 // offset must be a multiple of the page size.
796 let linear_memory_offset =
797 HostAlignedByteCount::new(offset).expect("offset is page-aligned");
798 // The image length is rounded up to the nearest page size
799 let image_len = HostAlignedByteCount::new_rounded_up(data.len()).unwrap();
800
801 Ok(MemoryImage {
802 source: MemoryImageSource::from_data(data)?.unwrap(),
803 len: image_len,
804 source_offset: 0,
805 linear_memory_offset,
806 })
807 }
808
809 fn dummy_memory() -> Memory {
810 Memory {
811 idx_type: IndexType::I32,
812 limits: Limits { min: 0, max: None },
813 shared: false,
814 page_size_log2: Memory::DEFAULT_PAGE_SIZE_LOG2,
815 }
816 }
817
818 fn mmap_4mib_inaccessible() -> Arc<Mmap<AlignedLength>> {
819 let four_mib = HostAlignedByteCount::new(4 << 20).expect("4 MiB is page aligned");
820 Arc::new(Mmap::accessible_reserved(HostAlignedByteCount::ZERO, four_mib).unwrap())
821 }
822
823 /// Presents a part of an mmap as a mutable slice within a callback.
824 ///
825 /// The callback ensures that the reference no longer lives after the
826 /// function is done.
827 ///
828 /// # Safety
829 ///
830 /// The caller must ensure that during this function call, the only way this
831 /// region of memory is not accessed by (read from or written to) is via the
832 /// reference. Making the callback `'static` goes some way towards ensuring
833 /// that, but it's still possible to squirrel away a reference into global
834 /// state. So don't do that.
835 unsafe fn with_slice_mut(
836 mmap: &Arc<Mmap<AlignedLength>>,
837 range: Range<usize>,
838 f: impl FnOnce(&mut [u8]) + 'static,
839 ) {
840 let ptr = mmap.as_ptr().cast_mut();
841 let slice = unsafe {
842 core::slice::from_raw_parts_mut(ptr.add(range.start), range.end - range.start)
843 };
844 f(slice);
845 }
846
847 #[test]
848 fn instantiate_no_image() {
849 let ty = dummy_memory();
850 let tunables = Tunables {
851 memory_reservation: 4 << 30,
852 ..Tunables::default_miri()
853 };
854 // 4 MiB mmap'd area, not accessible
855 let mmap = mmap_4mib_inaccessible();
856 // Create a MemoryImageSlot on top of it
857 let mut memfd =
858 MemoryImageSlot::create(mmap.zero_offset(), HostAlignedByteCount::ZERO, 4 << 20);
859 memfd.no_clear_on_drop();
860 assert!(!memfd.is_dirty());
861 // instantiate with 64 KiB initial size
862 memfd.instantiate(64 << 10, None, &ty, &tunables).unwrap();
863 assert!(memfd.is_dirty());
864
865 // We should be able to access this 64 KiB (try both ends) and
866 // it should consist of zeroes.
867 unsafe {
868 with_slice_mut(&mmap, 0..65536, |slice| {
869 assert_eq!(0, slice[0]);
870 assert_eq!(0, slice[65535]);
871 slice[1024] = 42;
872 assert_eq!(42, slice[1024]);
873 });
874 }
875
876 // grow the heap
877 memfd.set_heap_limit(128 << 10).unwrap();
878 let slice = unsafe { mmap.slice(0..1 << 20) };
879 assert_eq!(42, slice[1024]);
880 assert_eq!(0, slice[131071]);
881 // instantiate again; we should see zeroes, even as the
882 // reuse-anon-mmap-opt kicks in
883 memfd
884 .clear_and_remain_ready(HostAlignedByteCount::ZERO, |ptr, len| unsafe {
885 decommit_pages(ptr, len).unwrap()
886 })
887 .unwrap();
888 assert!(!memfd.is_dirty());
889 memfd.instantiate(64 << 10, None, &ty, &tunables).unwrap();
890 let slice = unsafe { mmap.slice(0..65536) };
891 assert_eq!(0, slice[1024]);
892 }
893
894 #[test]
895 fn instantiate_image() {
896 let page_size = host_page_size();
897 let ty = dummy_memory();
898 let tunables = Tunables {
899 memory_reservation: 4 << 30,
900 ..Tunables::default_miri()
901 };
902 // 4 MiB mmap'd area, not accessible
903 let mmap = mmap_4mib_inaccessible();
904 // Create a MemoryImageSlot on top of it
905 let mut memfd =
906 MemoryImageSlot::create(mmap.zero_offset(), HostAlignedByteCount::ZERO, 4 << 20);
907 memfd.no_clear_on_drop();
908 // Create an image with some data.
909 let image = Arc::new(create_memfd_with_data(page_size, &[1, 2, 3, 4]).unwrap());
910 // Instantiate with this image
911 memfd
912 .instantiate(64 << 10, Some(&image), &ty, &tunables)
913 .unwrap();
914 assert!(memfd.has_image());
915
916 unsafe {
917 with_slice_mut(&mmap, 0..65536, move |slice| {
918 assert_eq!(&[1, 2, 3, 4], &slice[page_size..][..4]);
919 slice[page_size] = 5;
920 });
921 }
922
923 // Clear and re-instantiate same image
924 memfd
925 .clear_and_remain_ready(HostAlignedByteCount::ZERO, |ptr, len| unsafe {
926 decommit_pages(ptr, len).unwrap()
927 })
928 .unwrap();
929 memfd
930 .instantiate(64 << 10, Some(&image), &ty, &tunables)
931 .unwrap();
932 let slice = unsafe { mmap.slice(0..65536) };
933 assert_eq!(&[1, 2, 3, 4], &slice[page_size..][..4]);
934
935 // Clear and re-instantiate no image
936 memfd
937 .clear_and_remain_ready(HostAlignedByteCount::ZERO, |ptr, len| unsafe {
938 decommit_pages(ptr, len).unwrap()
939 })
940 .unwrap();
941 memfd.instantiate(64 << 10, None, &ty, &tunables).unwrap();
942 assert!(!memfd.has_image());
943 let slice = unsafe { mmap.slice(0..65536) };
944 assert_eq!(&[0, 0, 0, 0], &slice[page_size..][..4]);
945
946 // Clear and re-instantiate image again
947 memfd
948 .clear_and_remain_ready(HostAlignedByteCount::ZERO, |ptr, len| unsafe {
949 decommit_pages(ptr, len).unwrap()
950 })
951 .unwrap();
952 memfd
953 .instantiate(64 << 10, Some(&image), &ty, &tunables)
954 .unwrap();
955 let slice = unsafe { mmap.slice(0..65536) };
956 assert_eq!(&[1, 2, 3, 4], &slice[page_size..][..4]);
957
958 // Create another image with different data.
959 let image2 = Arc::new(create_memfd_with_data(page_size, &[10, 11, 12, 13]).unwrap());
960 memfd
961 .clear_and_remain_ready(HostAlignedByteCount::ZERO, |ptr, len| unsafe {
962 decommit_pages(ptr, len).unwrap()
963 })
964 .unwrap();
965 memfd
966 .instantiate(128 << 10, Some(&image2), &ty, &tunables)
967 .unwrap();
968 let slice = unsafe { mmap.slice(0..65536) };
969 assert_eq!(&[10, 11, 12, 13], &slice[page_size..][..4]);
970
971 // Instantiate the original image again; we should notice it's
972 // a different image and not reuse the mappings.
973 memfd
974 .clear_and_remain_ready(HostAlignedByteCount::ZERO, |ptr, len| unsafe {
975 decommit_pages(ptr, len).unwrap()
976 })
977 .unwrap();
978 memfd
979 .instantiate(64 << 10, Some(&image), &ty, &tunables)
980 .unwrap();
981 let slice = unsafe { mmap.slice(0..65536) };
982 assert_eq!(&[1, 2, 3, 4], &slice[page_size..][..4]);
983 }
984
985 #[test]
986 #[cfg(target_os = "linux")]
987 fn memset_instead_of_madvise() {
988 let page_size = host_page_size();
989 let ty = dummy_memory();
990 let tunables = Tunables {
991 memory_reservation: 100 << 16,
992 ..Tunables::default_miri()
993 };
994 let mmap = mmap_4mib_inaccessible();
995 let mut memfd =
996 MemoryImageSlot::create(mmap.zero_offset(), HostAlignedByteCount::ZERO, 4 << 20);
997 memfd.no_clear_on_drop();
998
999 // Test basics with the image
1000 for image_off in [0, page_size, page_size * 2] {
1001 let image = Arc::new(create_memfd_with_data(image_off, &[1, 2, 3, 4]).unwrap());
1002 for amt_to_memset in [0, page_size, page_size * 10, 1 << 20, 10 << 20] {
1003 let amt_to_memset = HostAlignedByteCount::new(amt_to_memset).unwrap();
1004 memfd
1005 .instantiate(64 << 10, Some(&image), &ty, &tunables)
1006 .unwrap();
1007 assert!(memfd.has_image());
1008
1009 unsafe {
1010 with_slice_mut(&mmap, 0..64 << 10, move |slice| {
1011 if image_off > 0 {
1012 assert_eq!(slice[image_off - 1], 0);
1013 }
1014 assert_eq!(slice[image_off + 5], 0);
1015 assert_eq!(&[1, 2, 3, 4], &slice[image_off..][..4]);
1016 slice[image_off] = 5;
1017 assert_eq!(&[5, 2, 3, 4], &slice[image_off..][..4]);
1018 })
1019 };
1020
1021 memfd
1022 .clear_and_remain_ready(amt_to_memset, |ptr, len| unsafe {
1023 decommit_pages(ptr, len).unwrap()
1024 })
1025 .unwrap();
1026 }
1027 }
1028
1029 // Test without an image
1030 for amt_to_memset in [0, page_size, page_size * 10, 1 << 20, 10 << 20] {
1031 let amt_to_memset = HostAlignedByteCount::new(amt_to_memset).unwrap();
1032 memfd.instantiate(64 << 10, None, &ty, &tunables).unwrap();
1033
1034 unsafe {
1035 with_slice_mut(&mmap, 0..64 << 10, |slice| {
1036 for chunk in slice.chunks_mut(1024) {
1037 assert_eq!(chunk[0], 0);
1038 chunk[0] = 5;
1039 }
1040 });
1041 }
1042 memfd
1043 .clear_and_remain_ready(amt_to_memset, |ptr, len| unsafe {
1044 decommit_pages(ptr, len).unwrap()
1045 })
1046 .unwrap();
1047 }
1048 }
1049
1050 #[test]
1051 #[cfg(target_os = "linux")]
1052 fn dynamic() {
1053 let page_size = host_page_size();
1054 let ty = dummy_memory();
1055 let tunables = Tunables {
1056 memory_reservation: 0,
1057 memory_reservation_for_growth: 200,
1058 ..Tunables::default_miri()
1059 };
1060
1061 let mmap = mmap_4mib_inaccessible();
1062 let mut memfd =
1063 MemoryImageSlot::create(mmap.zero_offset(), HostAlignedByteCount::ZERO, 4 << 20);
1064 memfd.no_clear_on_drop();
1065 let image = Arc::new(create_memfd_with_data(page_size, &[1, 2, 3, 4]).unwrap());
1066 let initial = 64 << 10;
1067
1068 // Instantiate the image and test that memory remains accessible after
1069 // it's cleared.
1070 memfd
1071 .instantiate(initial, Some(&image), &ty, &tunables)
1072 .unwrap();
1073 assert!(memfd.has_image());
1074
1075 unsafe {
1076 with_slice_mut(&mmap, 0..(64 << 10) + page_size, move |slice| {
1077 assert_eq!(&[1, 2, 3, 4], &slice[page_size..][..4]);
1078 slice[page_size] = 5;
1079 assert_eq!(&[5, 2, 3, 4], &slice[page_size..][..4]);
1080 });
1081 }
1082
1083 memfd
1084 .clear_and_remain_ready(HostAlignedByteCount::ZERO, |ptr, len| unsafe {
1085 decommit_pages(ptr, len).unwrap()
1086 })
1087 .unwrap();
1088 let slice = unsafe { mmap.slice(0..(64 << 10) + page_size) };
1089 assert_eq!(&[1, 2, 3, 4], &slice[page_size..][..4]);
1090
1091 // Re-instantiate make sure it preserves memory. Grow a bit and set data
1092 // beyond the initial size.
1093 memfd
1094 .instantiate(initial, Some(&image), &ty, &tunables)
1095 .unwrap();
1096 assert_eq!(&[1, 2, 3, 4], &slice[page_size..][..4]);
1097
1098 memfd.set_heap_limit(initial * 2).unwrap();
1099
1100 unsafe {
1101 with_slice_mut(&mmap, 0..(64 << 10) + page_size, move |slice| {
1102 assert_eq!(&[0, 0], &slice[initial..initial + 2]);
1103 slice[initial] = 100;
1104 assert_eq!(&[100, 0], &slice[initial..initial + 2]);
1105 });
1106 }
1107
1108 memfd
1109 .clear_and_remain_ready(HostAlignedByteCount::ZERO, |ptr, len| unsafe {
1110 decommit_pages(ptr, len).unwrap()
1111 })
1112 .unwrap();
1113
1114 // Test that memory is still accessible, but it's been reset
1115 assert_eq!(&[0, 0], &slice[initial..initial + 2]);
1116
1117 // Instantiate again, and again memory beyond the initial size should
1118 // still be accessible. Grow into it again and make sure it works.
1119 memfd
1120 .instantiate(initial, Some(&image), &ty, &tunables)
1121 .unwrap();
1122 assert_eq!(&[0, 0], &slice[initial..initial + 2]);
1123 memfd.set_heap_limit(initial * 2).unwrap();
1124
1125 unsafe {
1126 with_slice_mut(&mmap, 0..(64 << 10) + page_size, move |slice| {
1127 assert_eq!(&[0, 0], &slice[initial..initial + 2]);
1128 slice[initial] = 100;
1129 assert_eq!(&[100, 0], &slice[initial..initial + 2]);
1130 });
1131 }
1132
1133 memfd
1134 .clear_and_remain_ready(HostAlignedByteCount::ZERO, |ptr, len| unsafe {
1135 decommit_pages(ptr, len).unwrap()
1136 })
1137 .unwrap();
1138
1139 // Reset the image to none and double-check everything is back to zero
1140 memfd.instantiate(64 << 10, None, &ty, &tunables).unwrap();
1141 assert!(!memfd.has_image());
1142 assert_eq!(&[0, 0, 0, 0], &slice[page_size..][..4]);
1143 assert_eq!(&[0, 0], &slice[initial..initial + 2]);
1144 }
1145}