wasmtime/runtime/vm/
cow.rs

1//! Copy-on-write initialization support: creation of backing images for
2//! modules, and logic to support mapping these backing images into memory.
3
4// `MemoryImageSource` is an empty enum on some platforms which triggers some
5// warnings
6#![cfg_attr(any(not(unix), miri), allow(unreachable_patterns))]
7
8use super::sys::DecommitBehavior;
9use crate::prelude::*;
10use crate::runtime::vm::sys::vm::{self, MemoryImageSource};
11use crate::runtime::vm::{host_page_size, HostAlignedByteCount, MmapOffset, MmapVec};
12use alloc::sync::Arc;
13use core::ops::Range;
14use core::ptr;
15use wasmtime_environ::{DefinedMemoryIndex, MemoryInitialization, Module, PrimaryMap, Tunables};
16
17/// Backing images for memories in a module.
18///
19/// This is meant to be built once, when a module is first loaded/constructed,
20/// and then used many times for instantiation.
21pub struct ModuleMemoryImages {
22    memories: PrimaryMap<DefinedMemoryIndex, Option<Arc<MemoryImage>>>,
23}
24
25impl ModuleMemoryImages {
26    /// Get the MemoryImage for a given memory.
27    pub fn get_memory_image(&self, defined_index: DefinedMemoryIndex) -> Option<&Arc<MemoryImage>> {
28        self.memories[defined_index].as_ref()
29    }
30}
31
32/// One backing image for one memory.
33#[derive(Debug, PartialEq)]
34pub struct MemoryImage {
35    /// The platform-specific source of this image.
36    ///
37    /// This might be a mapped `*.cwasm` file or on Unix it could also be a
38    /// `Memfd` as an anonymous file in memory on Linux. In either case this is
39    /// used as the backing-source for the CoW image.
40    source: MemoryImageSource,
41
42    /// Length of image, in bytes.
43    ///
44    /// Note that initial memory size may be larger; leading and trailing zeroes
45    /// are truncated (handled by backing fd).
46    ///
47    /// Must be a multiple of the system page size.
48    len: HostAlignedByteCount,
49
50    /// Image starts this many bytes into `source`.
51    ///
52    /// This is 0 for anonymous-backed memfd files and is the offset of the
53    /// data section in a `*.cwasm` file for `*.cwasm`-backed images.
54    ///
55    /// Must be a multiple of the system page size.
56    ///
57    /// ## Notes
58    ///
59    /// This currently isn't a `HostAlignedByteCount` because that's a usize and
60    /// this, being a file offset, is a u64.
61    source_offset: u64,
62
63    /// Image starts this many bytes into heap space.
64    ///
65    /// Must be a multiple of the system page size.
66    linear_memory_offset: HostAlignedByteCount,
67}
68
69impl MemoryImage {
70    fn new(
71        page_size: u32,
72        linear_memory_offset: HostAlignedByteCount,
73        data: &[u8],
74        mmap: Option<&MmapVec>,
75    ) -> Result<Option<MemoryImage>> {
76        let assert_page_aligned = |val: usize| {
77            assert_eq!(val % (page_size as usize), 0);
78        };
79        // Sanity-check that various parameters are page-aligned.
80        let len = HostAlignedByteCount::new(data.len()).expect("memory image data is page-aligned");
81
82        // If a backing `mmap` is present then `data` should be a sub-slice of
83        // the `mmap`. The sanity-checks here double-check that. Additionally
84        // compilation should have ensured that the `data` section is
85        // page-aligned within `mmap`, so that's also all double-checked here.
86        //
87        // Finally if the `mmap` itself comes from a backing file on disk, such
88        // as a `*.cwasm` file, then that's a valid source of data for the
89        // memory image so we simply return referencing that.
90        //
91        // Note that this path is platform-agnostic in the sense of all
92        // platforms we support support memory mapping copy-on-write data from
93        // files, but for now this is still a Linux-specific region of Wasmtime.
94        // Some work will be needed to get this file compiling for macOS and
95        // Windows.
96        if let Some(mmap) = mmap {
97            let start = mmap.as_ptr() as usize;
98            let end = start + mmap.len();
99            let data_start = data.as_ptr() as usize;
100            let data_end = data_start + data.len();
101            assert!(start <= data_start && data_end <= end);
102            assert_page_aligned(start);
103            assert_page_aligned(data_start);
104            assert_page_aligned(data_end);
105
106            #[cfg(feature = "std")]
107            if let Some(file) = mmap.original_file() {
108                if let Some(source) = MemoryImageSource::from_file(file) {
109                    return Ok(Some(MemoryImage {
110                        source,
111                        source_offset: u64::try_from(data_start - start).unwrap(),
112                        linear_memory_offset,
113                        len,
114                    }));
115                }
116            }
117        }
118
119        // If `mmap` doesn't come from a file then platform-specific mechanisms
120        // may be used to place the data in a form that's amenable to an mmap.
121        if let Some(source) = MemoryImageSource::from_data(data)? {
122            return Ok(Some(MemoryImage {
123                source,
124                source_offset: 0,
125                linear_memory_offset,
126                len,
127            }));
128        }
129
130        Ok(None)
131    }
132
133    unsafe fn map_at(&self, mmap_base: &MmapOffset) -> Result<()> {
134        mmap_base.map_image_at(
135            &self.source,
136            self.source_offset,
137            self.linear_memory_offset,
138            self.len,
139        )
140    }
141
142    unsafe fn remap_as_zeros_at(&self, base: *mut u8) -> Result<()> {
143        self.source.remap_as_zeros_at(
144            base.add(self.linear_memory_offset.byte_count()),
145            self.len.byte_count(),
146        )?;
147        Ok(())
148    }
149}
150
151impl ModuleMemoryImages {
152    /// Create a new `ModuleMemoryImages` for the given module. This can be
153    /// passed in as part of a `InstanceAllocationRequest` to speed up
154    /// instantiation and execution by using copy-on-write-backed memories.
155    pub fn new(
156        module: &Module,
157        wasm_data: &[u8],
158        mmap: Option<&MmapVec>,
159    ) -> Result<Option<ModuleMemoryImages>> {
160        let map = match &module.memory_initialization {
161            MemoryInitialization::Static { map } => map,
162            _ => return Ok(None),
163        };
164        let mut memories = PrimaryMap::with_capacity(map.len());
165        let page_size = crate::runtime::vm::host_page_size();
166        let page_size = u32::try_from(page_size).unwrap();
167        for (memory_index, init) in map {
168            // mmap-based-initialization only works for defined memories with a
169            // known starting point of all zeros, so bail out if the mmeory is
170            // imported.
171            let defined_memory = match module.defined_memory_index(memory_index) {
172                Some(idx) => idx,
173                None => return Ok(None),
174            };
175
176            // If there's no initialization for this memory known then we don't
177            // need an image for the memory so push `None` and move on.
178            let init = match init {
179                Some(init) => init,
180                None => {
181                    memories.push(None);
182                    continue;
183                }
184            };
185
186            // Get the image for this wasm module  as a subslice of `wasm_data`,
187            // and then use that to try to create the `MemoryImage`. If this
188            // creation files then we fail creating `ModuleMemoryImages` since this
189            // memory couldn't be represented.
190            let data = &wasm_data[init.data.start as usize..init.data.end as usize];
191            if module.memories[memory_index]
192                .minimum_byte_size()
193                .map_or(false, |mem_initial_len| {
194                    init.offset + u64::try_from(data.len()).unwrap() > mem_initial_len
195                })
196            {
197                // The image is rounded up to multiples of the host OS page
198                // size. But if Wasm is using a custom page size, the Wasm page
199                // size might be smaller than the host OS page size, and that
200                // rounding might have made the image larger than the Wasm
201                // memory's initial length. This is *probably* okay, since the
202                // rounding would have just introduced new runs of zeroes in the
203                // image, but out of an abundance of caution we don't generate
204                // CoW images in this scenario.
205                return Ok(None);
206            }
207
208            let offset_usize = match usize::try_from(init.offset) {
209                Ok(offset) => offset,
210                Err(_) => return Ok(None),
211            };
212            let offset = HostAlignedByteCount::new(offset_usize)
213                .expect("memory init offset is a multiple of the host page size");
214            let image = match MemoryImage::new(page_size, offset, data, mmap)? {
215                Some(image) => image,
216                None => return Ok(None),
217            };
218
219            let idx = memories.push(Some(Arc::new(image)));
220            assert_eq!(idx, defined_memory);
221        }
222
223        Ok(Some(ModuleMemoryImages { memories }))
224    }
225}
226
227/// Slot management of a copy-on-write image which can be reused for the pooling
228/// allocator.
229///
230/// This data structure manages a slot of linear memory, primarily in the
231/// pooling allocator, which optionally has a contiguous memory image in the
232/// middle of it. Pictorially this data structure manages a virtual memory
233/// region that looks like:
234///
235/// ```text
236///   +--------------------+-------------------+--------------+--------------+
237///   |   anonymous        |      optional     |   anonymous  |    PROT_NONE |
238///   |     zero           |       memory      |     zero     |     memory   |
239///   |    memory          |       image       |    memory    |              |
240///   +--------------------+-------------------+--------------+--------------+
241///   |                     <------+---------->
242///   |<-----+------------>         \
243///   |      \                   image.len
244///   |       \
245///   |  image.linear_memory_offset
246///   |
247///   \
248///  self.base is this virtual address
249///
250///    <------------------+------------------------------------------------>
251///                        \
252///                      static_size
253///
254///    <------------------+---------------------------------->
255///                        \
256///                      accessible
257/// ```
258///
259/// When a `MemoryImageSlot` is created it's told what the `static_size` and
260/// `accessible` limits are. Initially there is assumed to be no image in linear
261/// memory.
262///
263/// When `MemoryImageSlot::instantiate` is called then the method will perform
264/// a "synchronization" to take the image from its prior state to the new state
265/// for the image specified. The first instantiation for example will mmap the
266/// heap image into place. Upon reuse of a slot nothing happens except possibly
267/// shrinking `self.accessible`. When a new image is used then the old image is
268/// mapped to anonymous zero memory and then the new image is mapped in place.
269///
270/// A `MemoryImageSlot` is either `dirty` or it isn't. When a `MemoryImageSlot`
271/// is dirty then it is assumed that any memory beneath `self.accessible` could
272/// have any value. Instantiation cannot happen into a `dirty` slot, however, so
273/// the `MemoryImageSlot::clear_and_remain_ready` returns this memory back to
274/// its original state to mark `dirty = false`. This is done by resetting all
275/// anonymous memory back to zero and the image itself back to its initial
276/// contents.
277///
278/// On Linux this is achieved with the `madvise(MADV_DONTNEED)` syscall. This
279/// syscall will release the physical pages back to the OS but retain the
280/// original mappings, effectively resetting everything back to its initial
281/// state. Non-linux platforms will replace all memory below `self.accessible`
282/// with a fresh zero'd mmap, meaning that reuse is effectively not supported.
283#[derive(Debug)]
284pub struct MemoryImageSlot {
285    /// The mmap and offset within it that contains the linear memory for this
286    /// slot.
287    base: MmapOffset,
288
289    /// The maximum static memory size which `self.accessible` can grow to.
290    static_size: usize,
291
292    /// An optional image that is currently being used in this linear memory.
293    ///
294    /// This can be `None` in which case memory is originally all zeros. When
295    /// `Some` the image describes where it's located within the image.
296    image: Option<Arc<MemoryImage>>,
297
298    /// The size of the heap that is readable and writable.
299    ///
300    /// Note that this may extend beyond the actual linear memory heap size in
301    /// the case of dynamic memories in use. Memory accesses to memory below
302    /// `self.accessible` may still page fault as pages are lazily brought in
303    /// but the faults will always be resolved by the kernel.
304    ///
305    /// Also note that this is always page-aligned.
306    accessible: HostAlignedByteCount,
307
308    /// Whether this slot may have "dirty" pages (pages written by an
309    /// instantiation). Set by `instantiate()` and cleared by
310    /// `clear_and_remain_ready()`, and used in assertions to ensure
311    /// those methods are called properly.
312    ///
313    /// Invariant: if !dirty, then this memory slot contains a clean
314    /// CoW mapping of `image`, if `Some(..)`, and anonymous-zero
315    /// memory beyond the image up to `static_size`. The addresses
316    /// from offset 0 to `self.accessible` are R+W and set to zero or the
317    /// initial image content, as appropriate. Everything between
318    /// `self.accessible` and `self.static_size` is inaccessible.
319    dirty: bool,
320
321    /// Whether this MemoryImageSlot is responsible for mapping anonymous
322    /// memory (to hold the reservation while overwriting mappings
323    /// specific to this slot) in place when it is dropped. Default
324    /// on, unless the caller knows what they are doing.
325    clear_on_drop: bool,
326}
327
328impl MemoryImageSlot {
329    /// Create a new MemoryImageSlot. Assumes that there is an anonymous
330    /// mmap backing in the given range to start.
331    ///
332    /// The `accessible` parameter describes how much of linear memory is
333    /// already mapped as R/W with all zero-bytes. The `static_size` value is
334    /// the maximum size of this image which `accessible` cannot grow beyond,
335    /// and all memory from `accessible` from `static_size` should be mapped as
336    /// `PROT_NONE` backed by zero-bytes.
337    pub(crate) fn create(
338        base: MmapOffset,
339        accessible: HostAlignedByteCount,
340        static_size: usize,
341    ) -> Self {
342        MemoryImageSlot {
343            base,
344            static_size,
345            accessible,
346            image: None,
347            dirty: false,
348            clear_on_drop: true,
349        }
350    }
351
352    /// Inform the MemoryImageSlot that it should *not* clear the underlying
353    /// address space when dropped. This should be used only when the
354    /// caller will clear or reuse the address space in some other
355    /// way.
356    pub(crate) fn no_clear_on_drop(&mut self) {
357        self.clear_on_drop = false;
358    }
359
360    pub(crate) fn set_heap_limit(&mut self, size_bytes: usize) -> Result<()> {
361        let size_bytes_aligned = HostAlignedByteCount::new_rounded_up(size_bytes)?;
362        assert!(size_bytes <= self.static_size);
363        assert!(size_bytes_aligned.byte_count() <= self.static_size);
364
365        // If the heap limit already addresses accessible bytes then no syscalls
366        // are necessary since the data is already mapped into the process and
367        // waiting to go.
368        //
369        // This is used for "dynamic" memories where memory is not always
370        // decommitted during recycling (but it's still always reset).
371        if size_bytes_aligned <= self.accessible {
372            return Ok(());
373        }
374
375        // Otherwise use `mprotect` to make the new pages read/write.
376        self.set_protection(self.accessible..size_bytes_aligned, true)?;
377        self.accessible = size_bytes_aligned;
378
379        Ok(())
380    }
381
382    /// Prepares this slot for the instantiation of a new instance with the
383    /// provided linear memory image.
384    ///
385    /// The `initial_size_bytes` parameter indicates the required initial size
386    /// of the heap for the instance. The `maybe_image` is an optional initial
387    /// image for linear memory to contains. The `style` is the way compiled
388    /// code will be accessing this memory.
389    ///
390    /// The purpose of this method is to take a previously pristine slot
391    /// (`!self.dirty`) and transform its prior state into state necessary for
392    /// the given parameters. This could include, for example:
393    ///
394    /// * More memory may be made read/write if `initial_size_bytes` is larger
395    ///   than `self.accessible`.
396    /// * For `MemoryStyle::Static` linear memory may be made `PROT_NONE` if
397    ///   `self.accessible` is larger than `initial_size_bytes`.
398    /// * If no image was previously in place or if the wrong image was
399    ///   previously in place then `mmap` may be used to setup the initial
400    ///   image.
401    pub(crate) fn instantiate(
402        &mut self,
403        initial_size_bytes: usize,
404        maybe_image: Option<&Arc<MemoryImage>>,
405        ty: &wasmtime_environ::Memory,
406        tunables: &Tunables,
407    ) -> Result<()> {
408        assert!(!self.dirty);
409        assert!(initial_size_bytes <= self.static_size);
410        let initial_size_bytes_page_aligned =
411            HostAlignedByteCount::new_rounded_up(initial_size_bytes)?;
412
413        // First order of business is to blow away the previous linear memory
414        // image if it doesn't match the image specified here. If one is
415        // detected then it's reset with anonymous memory which means that all
416        // of memory up to `self.accessible` will now be read/write and zero.
417        //
418        // Note that this intentionally a "small mmap" which only covers the
419        // extent of the prior initialization image in order to preserve
420        // resident memory that might come before or after the image.
421        if self.image.as_ref() != maybe_image {
422            self.remove_image()?;
423        }
424
425        // The next order of business is to ensure that `self.accessible` is
426        // appropriate. First up is to grow the read/write portion of memory if
427        // it's not large enough to accommodate `initial_size_bytes`.
428        if self.accessible < initial_size_bytes_page_aligned {
429            self.set_protection(self.accessible..initial_size_bytes_page_aligned, true)?;
430            self.accessible = initial_size_bytes_page_aligned;
431        }
432
433        // If (1) the accessible region is not in its initial state, and (2) the
434        // memory relies on virtual memory at all (i.e. has offset guard
435        // pages), then we need to reset memory protections. Put another way,
436        // the only time it is safe to not reset protections is when we are
437        // using dynamic memory without any guard pages.
438        let host_page_size_log2 = u8::try_from(host_page_size().ilog2()).unwrap();
439        if initial_size_bytes_page_aligned < self.accessible
440            && (tunables.memory_guard_size > 0
441                || ty.can_elide_bounds_check(tunables, host_page_size_log2))
442        {
443            self.set_protection(initial_size_bytes_page_aligned..self.accessible, false)?;
444            self.accessible = initial_size_bytes_page_aligned;
445        }
446
447        // Now that memory is sized appropriately the final operation is to
448        // place the new image into linear memory. Note that this operation is
449        // skipped if `self.image` matches `maybe_image`.
450        assert!(initial_size_bytes <= self.accessible.byte_count());
451        assert!(initial_size_bytes_page_aligned <= self.accessible);
452        if self.image.as_ref() != maybe_image {
453            if let Some(image) = maybe_image.as_ref() {
454                assert!(
455                    image
456                        .linear_memory_offset
457                        .checked_add(image.len)
458                        .unwrap()
459                        .byte_count()
460                        <= initial_size_bytes
461                );
462                if !image.len.is_zero() {
463                    unsafe {
464                        image.map_at(&self.base)?;
465                    }
466                }
467            }
468            self.image = maybe_image.cloned();
469        }
470
471        // Flag ourselves as `dirty` which means that the next operation on this
472        // slot is required to be `clear_and_remain_ready`.
473        self.dirty = true;
474
475        Ok(())
476    }
477
478    pub(crate) fn remove_image(&mut self) -> Result<()> {
479        if let Some(image) = &self.image {
480            unsafe {
481                image.remap_as_zeros_at(self.base.as_mut_ptr())?;
482            }
483            self.image = None;
484        }
485        Ok(())
486    }
487
488    /// Resets this linear memory slot back to a "pristine state".
489    ///
490    /// This will reset the memory back to its original contents on Linux or
491    /// reset the contents back to zero on other platforms. The `keep_resident`
492    /// argument is the maximum amount of memory to keep resident in this
493    /// process's memory on Linux. Up to that much memory will be `memset` to
494    /// zero where the rest of it will be reset or released with `madvise`.
495    #[allow(dead_code)] // ignore warnings as this is only used in some cfgs
496    pub(crate) fn clear_and_remain_ready(
497        &mut self,
498        keep_resident: HostAlignedByteCount,
499        decommit: impl FnMut(*mut u8, usize),
500    ) -> Result<()> {
501        assert!(self.dirty);
502
503        unsafe {
504            self.reset_all_memory_contents(keep_resident, decommit)?;
505        }
506
507        self.dirty = false;
508        Ok(())
509    }
510
511    #[allow(dead_code)] // ignore warnings as this is only used in some cfgs
512    unsafe fn reset_all_memory_contents(
513        &mut self,
514        keep_resident: HostAlignedByteCount,
515        decommit: impl FnMut(*mut u8, usize),
516    ) -> Result<()> {
517        match vm::decommit_behavior() {
518            DecommitBehavior::Zero => {
519                // If we're not on Linux then there's no generic platform way to
520                // reset memory back to its original state, so instead reset memory
521                // back to entirely zeros with an anonymous backing.
522                //
523                // Additionally the previous image, if any, is dropped here
524                // since it's no longer applicable to this mapping.
525                self.reset_with_anon_memory()
526            }
527            DecommitBehavior::RestoreOriginalMapping => {
528                self.reset_with_original_mapping(keep_resident, decommit);
529                Ok(())
530            }
531        }
532    }
533
534    #[allow(dead_code)] // ignore warnings as this is only used in some cfgs
535    unsafe fn reset_with_original_mapping(
536        &mut self,
537        keep_resident: HostAlignedByteCount,
538        mut decommit: impl FnMut(*mut u8, usize),
539    ) {
540        match &self.image {
541            Some(image) => {
542                if image.linear_memory_offset < keep_resident {
543                    // If the image starts below the `keep_resident` then
544                    // memory looks something like this:
545                    //
546                    //               up to `keep_resident` bytes
547                    //                          |
548                    //          +--------------------------+  remaining_memset
549                    //          |                          | /
550                    //  <-------------->                <------->
551                    //
552                    //                              image_end
553                    // 0        linear_memory_offset   |             accessible
554                    // |                |              |                  |
555                    // +----------------+--------------+---------+--------+
556                    // |  dirty memory  |    image     |   dirty memory   |
557                    // +----------------+--------------+---------+--------+
558                    //
559                    //  <------+-------> <-----+----->  <---+---> <--+--->
560                    //         |               |            |        |
561                    //         |               |            |        |
562                    //   memset (1)            /            |   madvise (4)
563                    //                  mmadvise (2)       /
564                    //                                    /
565                    //                              memset (3)
566                    //
567                    //
568                    // In this situation there are two disjoint regions that are
569                    // `memset` manually to zero. Note that `memset (3)` may be
570                    // zero bytes large. Furthermore `madvise (4)` may also be
571                    // zero bytes large.
572
573                    let image_end = image
574                        .linear_memory_offset
575                        .checked_add(image.len)
576                        .expect("image is in bounds");
577                    let mem_after_image = self
578                        .accessible
579                        .checked_sub(image_end)
580                        .expect("image_end falls before self.accessible");
581                    let excess = keep_resident
582                        .checked_sub(image.linear_memory_offset)
583                        .expect(
584                            "if statement checks that keep_resident > image.linear_memory_offset",
585                        );
586                    let remaining_memset = excess.min(mem_after_image);
587
588                    // This is memset (1)
589                    ptr::write_bytes(
590                        self.base.as_mut_ptr(),
591                        0u8,
592                        image.linear_memory_offset.byte_count(),
593                    );
594
595                    // This is madvise (2)
596                    self.restore_original_mapping(
597                        image.linear_memory_offset,
598                        image.len,
599                        &mut decommit,
600                    );
601
602                    // This is memset (3)
603                    ptr::write_bytes(
604                        self.base.as_mut_ptr().add(image_end.byte_count()),
605                        0u8,
606                        remaining_memset.byte_count(),
607                    );
608
609                    // This is madvise (4)
610                    self.restore_original_mapping(
611                        image_end
612                            .checked_add(remaining_memset)
613                            .expect("image_end + remaining_memset is in bounds"),
614                        mem_after_image
615                            .checked_sub(remaining_memset)
616                            .expect("remaining_memset defined to be <= mem_after_image"),
617                        &mut decommit,
618                    );
619                } else {
620                    // If the image starts after the `keep_resident` threshold
621                    // then we memset the start of linear memory and then use
622                    // madvise below for the rest of it, including the image.
623                    //
624                    // 0             keep_resident                   accessible
625                    // |                |                                 |
626                    // +----------------+---+----------+------------------+
627                    // |  dirty memory      |  image   |   dirty memory   |
628                    // +----------------+---+----------+------------------+
629                    //
630                    //  <------+-------> <-------------+----------------->
631                    //         |                       |
632                    //         |                       |
633                    //   memset (1)                 madvise (2)
634                    //
635                    // Here only a single memset is necessary since the image
636                    // started after the threshold which we're keeping resident.
637                    // Note that the memset may be zero bytes here.
638
639                    // This is memset (1)
640                    ptr::write_bytes(self.base.as_mut_ptr(), 0u8, keep_resident.byte_count());
641
642                    // This is madvise (2)
643                    self.restore_original_mapping(
644                        keep_resident,
645                        self.accessible
646                            .checked_sub(keep_resident)
647                            .expect("keep_resident is a subset of accessible memory"),
648                        decommit,
649                    );
650                };
651            }
652
653            // If there's no memory image for this slot then memset the first
654            // bytes in the memory back to zero while using `madvise` to purge
655            // the rest.
656            None => {
657                let size_to_memset = keep_resident.min(self.accessible);
658                ptr::write_bytes(self.base.as_mut_ptr(), 0u8, size_to_memset.byte_count());
659                self.restore_original_mapping(
660                    size_to_memset,
661                    self.accessible
662                        .checked_sub(size_to_memset)
663                        .expect("size_to_memset is defined to be <= self.accessible"),
664                    decommit,
665                );
666            }
667        }
668    }
669
670    #[allow(dead_code)] // ignore warnings as this is only used in some cfgs
671    unsafe fn restore_original_mapping(
672        &self,
673        base: HostAlignedByteCount,
674        len: HostAlignedByteCount,
675        mut decommit: impl FnMut(*mut u8, usize),
676    ) {
677        assert!(base.checked_add(len).unwrap() <= self.accessible);
678        if len == 0 {
679            return;
680        }
681
682        assert_eq!(
683            vm::decommit_behavior(),
684            DecommitBehavior::RestoreOriginalMapping
685        );
686        decommit(
687            self.base.as_mut_ptr().add(base.byte_count()),
688            len.byte_count(),
689        );
690    }
691
692    fn set_protection(&self, range: Range<HostAlignedByteCount>, readwrite: bool) -> Result<()> {
693        let len = range
694            .end
695            .checked_sub(range.start)
696            .expect("range.start <= range.end");
697        assert!(range.end.byte_count() <= self.static_size);
698        if len.is_zero() {
699            return Ok(());
700        }
701
702        // TODO: use Mmap to change memory permissions instead of these free
703        // functions.
704        unsafe {
705            let start = self.base.as_mut_ptr().add(range.start.byte_count());
706            if readwrite {
707                vm::expose_existing_mapping(start, len.byte_count())?;
708            } else {
709                vm::hide_existing_mapping(start, len.byte_count())?;
710            }
711        }
712
713        Ok(())
714    }
715
716    pub(crate) fn has_image(&self) -> bool {
717        self.image.is_some()
718    }
719
720    #[allow(dead_code)] // ignore warnings as this is only used in some cfgs
721    pub(crate) fn is_dirty(&self) -> bool {
722        self.dirty
723    }
724
725    /// Map anonymous zeroed memory across the whole slot,
726    /// inaccessible. Used both during instantiate and during drop.
727    fn reset_with_anon_memory(&mut self) -> Result<()> {
728        if self.static_size == 0 {
729            assert!(self.image.is_none());
730            assert_eq!(self.accessible, 0);
731            return Ok(());
732        }
733
734        unsafe {
735            vm::erase_existing_mapping(self.base.as_mut_ptr(), self.static_size)?;
736        }
737
738        self.image = None;
739        self.accessible = HostAlignedByteCount::ZERO;
740
741        Ok(())
742    }
743}
744
745impl Drop for MemoryImageSlot {
746    fn drop(&mut self) {
747        // The MemoryImageSlot may be dropped if there is an error during
748        // instantiation: for example, if a memory-growth limiter
749        // disallows a guest from having a memory of a certain size,
750        // after we've already initialized the MemoryImageSlot.
751        //
752        // We need to return this region of the large pool mmap to a
753        // safe state (with no module-specific mappings). The
754        // MemoryImageSlot will not be returned to the MemoryPool, so a new
755        // MemoryImageSlot will be created and overwrite the mappings anyway
756        // on the slot's next use; but for safety and to avoid
757        // resource leaks it's better not to have stale mappings to a
758        // possibly-otherwise-dead module's image.
759        //
760        // To "wipe the slate clean", let's do a mmap of anonymous
761        // memory over the whole region, with PROT_NONE. Note that we
762        // *can't* simply munmap, because that leaves a hole in the
763        // middle of the pooling allocator's big memory area that some
764        // other random mmap may swoop in and take, to be trampled
765        // over by the next MemoryImageSlot later.
766        //
767        // Since we're in drop(), we can't sanely return an error if
768        // this mmap fails. Instead though the result is unwrapped here to
769        // trigger a panic if something goes wrong. Otherwise if this
770        // reset-the-mapping fails then on reuse it might be possible, depending
771        // on precisely where errors happened, that stale memory could get
772        // leaked through.
773        //
774        // The exception to all of this is if the `clear_on_drop` flag
775        // (which is set by default) is false. If so, the owner of
776        // this MemoryImageSlot has indicated that it will clean up in some
777        // other way.
778        if self.clear_on_drop {
779            self.reset_with_anon_memory().unwrap();
780        }
781    }
782}
783
784#[cfg(all(test, target_os = "linux", not(miri)))]
785mod test {
786    use super::*;
787    use crate::runtime::vm::mmap::{AlignedLength, Mmap};
788    use crate::runtime::vm::sys::vm::decommit_pages;
789    use crate::runtime::vm::{host_page_size, HostAlignedByteCount};
790    use std::sync::Arc;
791    use wasmtime_environ::{IndexType, Limits, Memory};
792
793    fn create_memfd_with_data(offset: usize, data: &[u8]) -> Result<MemoryImage> {
794        // offset must be a multiple of the page size.
795        let linear_memory_offset =
796            HostAlignedByteCount::new(offset).expect("offset is page-aligned");
797        // The image length is rounded up to the nearest page size
798        let image_len = HostAlignedByteCount::new_rounded_up(data.len()).unwrap();
799
800        Ok(MemoryImage {
801            source: MemoryImageSource::from_data(data)?.unwrap(),
802            len: image_len,
803            source_offset: 0,
804            linear_memory_offset,
805        })
806    }
807
808    fn dummy_memory() -> Memory {
809        Memory {
810            idx_type: IndexType::I32,
811            limits: Limits { min: 0, max: None },
812            shared: false,
813            page_size_log2: Memory::DEFAULT_PAGE_SIZE_LOG2,
814        }
815    }
816
817    fn mmap_4mib_inaccessible() -> Arc<Mmap<AlignedLength>> {
818        let four_mib = HostAlignedByteCount::new(4 << 20).expect("4 MiB is page aligned");
819        Arc::new(Mmap::accessible_reserved(HostAlignedByteCount::ZERO, four_mib).unwrap())
820    }
821
822    /// Presents a part of an mmap as a mutable slice within a callback.
823    ///
824    /// The callback ensures that the reference no longer lives after the
825    /// function is done.
826    ///
827    /// # Safety
828    ///
829    /// The caller must ensure that during this function call, the only way this
830    /// region of memory is not accessed by (read from or written to) is via the
831    /// reference. Making the callback `'static` goes some way towards ensuring
832    /// that, but it's still possible to squirrel away a reference into global
833    /// state. So don't do that.
834    unsafe fn with_slice_mut(
835        mmap: &Arc<Mmap<AlignedLength>>,
836        range: Range<usize>,
837        f: impl FnOnce(&mut [u8]) + 'static,
838    ) {
839        let ptr = mmap.as_ptr().cast_mut();
840        let slice = unsafe {
841            core::slice::from_raw_parts_mut(ptr.add(range.start), range.end - range.start)
842        };
843        f(slice);
844    }
845
846    #[test]
847    fn instantiate_no_image() {
848        let ty = dummy_memory();
849        let tunables = Tunables {
850            memory_reservation: 4 << 30,
851            ..Tunables::default_miri()
852        };
853        // 4 MiB mmap'd area, not accessible
854        let mmap = mmap_4mib_inaccessible();
855        // Create a MemoryImageSlot on top of it
856        let mut memfd =
857            MemoryImageSlot::create(mmap.zero_offset(), HostAlignedByteCount::ZERO, 4 << 20);
858        memfd.no_clear_on_drop();
859        assert!(!memfd.is_dirty());
860        // instantiate with 64 KiB initial size
861        memfd.instantiate(64 << 10, None, &ty, &tunables).unwrap();
862        assert!(memfd.is_dirty());
863
864        // We should be able to access this 64 KiB (try both ends) and
865        // it should consist of zeroes.
866        unsafe {
867            with_slice_mut(&mmap, 0..65536, |slice| {
868                assert_eq!(0, slice[0]);
869                assert_eq!(0, slice[65535]);
870                slice[1024] = 42;
871                assert_eq!(42, slice[1024]);
872            });
873        }
874
875        // grow the heap
876        memfd.set_heap_limit(128 << 10).unwrap();
877        let slice = unsafe { mmap.slice(0..1 << 20) };
878        assert_eq!(42, slice[1024]);
879        assert_eq!(0, slice[131071]);
880        // instantiate again; we should see zeroes, even as the
881        // reuse-anon-mmap-opt kicks in
882        memfd
883            .clear_and_remain_ready(HostAlignedByteCount::ZERO, |ptr, len| unsafe {
884                decommit_pages(ptr, len).unwrap()
885            })
886            .unwrap();
887        assert!(!memfd.is_dirty());
888        memfd.instantiate(64 << 10, None, &ty, &tunables).unwrap();
889        let slice = unsafe { mmap.slice(0..65536) };
890        assert_eq!(0, slice[1024]);
891    }
892
893    #[test]
894    fn instantiate_image() {
895        let page_size = host_page_size();
896        let ty = dummy_memory();
897        let tunables = Tunables {
898            memory_reservation: 4 << 30,
899            ..Tunables::default_miri()
900        };
901        // 4 MiB mmap'd area, not accessible
902        let mmap = mmap_4mib_inaccessible();
903        // Create a MemoryImageSlot on top of it
904        let mut memfd =
905            MemoryImageSlot::create(mmap.zero_offset(), HostAlignedByteCount::ZERO, 4 << 20);
906        memfd.no_clear_on_drop();
907        // Create an image with some data.
908        let image = Arc::new(create_memfd_with_data(page_size, &[1, 2, 3, 4]).unwrap());
909        // Instantiate with this image
910        memfd
911            .instantiate(64 << 10, Some(&image), &ty, &tunables)
912            .unwrap();
913        assert!(memfd.has_image());
914
915        unsafe {
916            with_slice_mut(&mmap, 0..65536, move |slice| {
917                assert_eq!(&[1, 2, 3, 4], &slice[page_size..][..4]);
918                slice[page_size] = 5;
919            });
920        }
921
922        // Clear and re-instantiate same image
923        memfd
924            .clear_and_remain_ready(HostAlignedByteCount::ZERO, |ptr, len| unsafe {
925                decommit_pages(ptr, len).unwrap()
926            })
927            .unwrap();
928        memfd
929            .instantiate(64 << 10, Some(&image), &ty, &tunables)
930            .unwrap();
931        let slice = unsafe { mmap.slice(0..65536) };
932        assert_eq!(&[1, 2, 3, 4], &slice[page_size..][..4]);
933
934        // Clear and re-instantiate no image
935        memfd
936            .clear_and_remain_ready(HostAlignedByteCount::ZERO, |ptr, len| unsafe {
937                decommit_pages(ptr, len).unwrap()
938            })
939            .unwrap();
940        memfd.instantiate(64 << 10, None, &ty, &tunables).unwrap();
941        assert!(!memfd.has_image());
942        let slice = unsafe { mmap.slice(0..65536) };
943        assert_eq!(&[0, 0, 0, 0], &slice[page_size..][..4]);
944
945        // Clear and re-instantiate image again
946        memfd
947            .clear_and_remain_ready(HostAlignedByteCount::ZERO, |ptr, len| unsafe {
948                decommit_pages(ptr, len).unwrap()
949            })
950            .unwrap();
951        memfd
952            .instantiate(64 << 10, Some(&image), &ty, &tunables)
953            .unwrap();
954        let slice = unsafe { mmap.slice(0..65536) };
955        assert_eq!(&[1, 2, 3, 4], &slice[page_size..][..4]);
956
957        // Create another image with different data.
958        let image2 = Arc::new(create_memfd_with_data(page_size, &[10, 11, 12, 13]).unwrap());
959        memfd
960            .clear_and_remain_ready(HostAlignedByteCount::ZERO, |ptr, len| unsafe {
961                decommit_pages(ptr, len).unwrap()
962            })
963            .unwrap();
964        memfd
965            .instantiate(128 << 10, Some(&image2), &ty, &tunables)
966            .unwrap();
967        let slice = unsafe { mmap.slice(0..65536) };
968        assert_eq!(&[10, 11, 12, 13], &slice[page_size..][..4]);
969
970        // Instantiate the original image again; we should notice it's
971        // a different image and not reuse the mappings.
972        memfd
973            .clear_and_remain_ready(HostAlignedByteCount::ZERO, |ptr, len| unsafe {
974                decommit_pages(ptr, len).unwrap()
975            })
976            .unwrap();
977        memfd
978            .instantiate(64 << 10, Some(&image), &ty, &tunables)
979            .unwrap();
980        let slice = unsafe { mmap.slice(0..65536) };
981        assert_eq!(&[1, 2, 3, 4], &slice[page_size..][..4]);
982    }
983
984    #[test]
985    #[cfg(target_os = "linux")]
986    fn memset_instead_of_madvise() {
987        let page_size = host_page_size();
988        let ty = dummy_memory();
989        let tunables = Tunables {
990            memory_reservation: 100 << 16,
991            ..Tunables::default_miri()
992        };
993        let mmap = mmap_4mib_inaccessible();
994        let mut memfd =
995            MemoryImageSlot::create(mmap.zero_offset(), HostAlignedByteCount::ZERO, 4 << 20);
996        memfd.no_clear_on_drop();
997
998        // Test basics with the image
999        for image_off in [0, page_size, page_size * 2] {
1000            let image = Arc::new(create_memfd_with_data(image_off, &[1, 2, 3, 4]).unwrap());
1001            for amt_to_memset in [0, page_size, page_size * 10, 1 << 20, 10 << 20] {
1002                let amt_to_memset = HostAlignedByteCount::new(amt_to_memset).unwrap();
1003                memfd
1004                    .instantiate(64 << 10, Some(&image), &ty, &tunables)
1005                    .unwrap();
1006                assert!(memfd.has_image());
1007
1008                unsafe {
1009                    with_slice_mut(&mmap, 0..64 << 10, move |slice| {
1010                        if image_off > 0 {
1011                            assert_eq!(slice[image_off - 1], 0);
1012                        }
1013                        assert_eq!(slice[image_off + 5], 0);
1014                        assert_eq!(&[1, 2, 3, 4], &slice[image_off..][..4]);
1015                        slice[image_off] = 5;
1016                        assert_eq!(&[5, 2, 3, 4], &slice[image_off..][..4]);
1017                    })
1018                };
1019
1020                memfd
1021                    .clear_and_remain_ready(amt_to_memset, |ptr, len| unsafe {
1022                        decommit_pages(ptr, len).unwrap()
1023                    })
1024                    .unwrap();
1025            }
1026        }
1027
1028        // Test without an image
1029        for amt_to_memset in [0, page_size, page_size * 10, 1 << 20, 10 << 20] {
1030            let amt_to_memset = HostAlignedByteCount::new(amt_to_memset).unwrap();
1031            memfd.instantiate(64 << 10, None, &ty, &tunables).unwrap();
1032
1033            unsafe {
1034                with_slice_mut(&mmap, 0..64 << 10, |slice| {
1035                    for chunk in slice.chunks_mut(1024) {
1036                        assert_eq!(chunk[0], 0);
1037                        chunk[0] = 5;
1038                    }
1039                });
1040            }
1041            memfd
1042                .clear_and_remain_ready(amt_to_memset, |ptr, len| unsafe {
1043                    decommit_pages(ptr, len).unwrap()
1044                })
1045                .unwrap();
1046        }
1047    }
1048
1049    #[test]
1050    #[cfg(target_os = "linux")]
1051    fn dynamic() {
1052        let page_size = host_page_size();
1053        let ty = dummy_memory();
1054        let tunables = Tunables {
1055            memory_reservation: 0,
1056            memory_reservation_for_growth: 200,
1057            ..Tunables::default_miri()
1058        };
1059
1060        let mmap = mmap_4mib_inaccessible();
1061        let mut memfd =
1062            MemoryImageSlot::create(mmap.zero_offset(), HostAlignedByteCount::ZERO, 4 << 20);
1063        memfd.no_clear_on_drop();
1064        let image = Arc::new(create_memfd_with_data(page_size, &[1, 2, 3, 4]).unwrap());
1065        let initial = 64 << 10;
1066
1067        // Instantiate the image and test that memory remains accessible after
1068        // it's cleared.
1069        memfd
1070            .instantiate(initial, Some(&image), &ty, &tunables)
1071            .unwrap();
1072        assert!(memfd.has_image());
1073
1074        unsafe {
1075            with_slice_mut(&mmap, 0..(64 << 10) + page_size, move |slice| {
1076                assert_eq!(&[1, 2, 3, 4], &slice[page_size..][..4]);
1077                slice[page_size] = 5;
1078                assert_eq!(&[5, 2, 3, 4], &slice[page_size..][..4]);
1079            });
1080        }
1081
1082        memfd
1083            .clear_and_remain_ready(HostAlignedByteCount::ZERO, |ptr, len| unsafe {
1084                decommit_pages(ptr, len).unwrap()
1085            })
1086            .unwrap();
1087        let slice = unsafe { mmap.slice(0..(64 << 10) + page_size) };
1088        assert_eq!(&[1, 2, 3, 4], &slice[page_size..][..4]);
1089
1090        // Re-instantiate make sure it preserves memory. Grow a bit and set data
1091        // beyond the initial size.
1092        memfd
1093            .instantiate(initial, Some(&image), &ty, &tunables)
1094            .unwrap();
1095        assert_eq!(&[1, 2, 3, 4], &slice[page_size..][..4]);
1096
1097        memfd.set_heap_limit(initial * 2).unwrap();
1098
1099        unsafe {
1100            with_slice_mut(&mmap, 0..(64 << 10) + page_size, move |slice| {
1101                assert_eq!(&[0, 0], &slice[initial..initial + 2]);
1102                slice[initial] = 100;
1103                assert_eq!(&[100, 0], &slice[initial..initial + 2]);
1104            });
1105        }
1106
1107        memfd
1108            .clear_and_remain_ready(HostAlignedByteCount::ZERO, |ptr, len| unsafe {
1109                decommit_pages(ptr, len).unwrap()
1110            })
1111            .unwrap();
1112
1113        // Test that memory is still accessible, but it's been reset
1114        assert_eq!(&[0, 0], &slice[initial..initial + 2]);
1115
1116        // Instantiate again, and again memory beyond the initial size should
1117        // still be accessible. Grow into it again and make sure it works.
1118        memfd
1119            .instantiate(initial, Some(&image), &ty, &tunables)
1120            .unwrap();
1121        assert_eq!(&[0, 0], &slice[initial..initial + 2]);
1122        memfd.set_heap_limit(initial * 2).unwrap();
1123
1124        unsafe {
1125            with_slice_mut(&mmap, 0..(64 << 10) + page_size, move |slice| {
1126                assert_eq!(&[0, 0], &slice[initial..initial + 2]);
1127                slice[initial] = 100;
1128                assert_eq!(&[100, 0], &slice[initial..initial + 2]);
1129            });
1130        }
1131
1132        memfd
1133            .clear_and_remain_ready(HostAlignedByteCount::ZERO, |ptr, len| unsafe {
1134                decommit_pages(ptr, len).unwrap()
1135            })
1136            .unwrap();
1137
1138        // Reset the image to none and double-check everything is back to zero
1139        memfd.instantiate(64 << 10, None, &ty, &tunables).unwrap();
1140        assert!(!memfd.has_image());
1141        assert_eq!(&[0, 0, 0, 0], &slice[page_size..][..4]);
1142        assert_eq!(&[0, 0], &slice[initial..initial + 2]);
1143    }
1144}