wasmtime/runtime/vm/
cow.rs

1//! Copy-on-write initialization support: creation of backing images for
2//! modules, and logic to support mapping these backing images into memory.
3
4use super::sys::DecommitBehavior;
5use crate::prelude::*;
6use crate::runtime::vm::sys::vm::{self, MemoryImageSource};
7use crate::runtime::vm::{HostAlignedByteCount, MmapOffset, MmapVec, host_page_size};
8use alloc::sync::Arc;
9use core::ops::Range;
10use core::ptr;
11use wasmtime_environ::{DefinedMemoryIndex, MemoryInitialization, Module, PrimaryMap, Tunables};
12
13/// Backing images for memories in a module.
14///
15/// This is meant to be built once, when a module is first loaded/constructed,
16/// and then used many times for instantiation.
17pub struct ModuleMemoryImages {
18    memories: PrimaryMap<DefinedMemoryIndex, Option<Arc<MemoryImage>>>,
19}
20
21impl ModuleMemoryImages {
22    /// Get the MemoryImage for a given memory.
23    pub fn get_memory_image(&self, defined_index: DefinedMemoryIndex) -> Option<&Arc<MemoryImage>> {
24        self.memories[defined_index].as_ref()
25    }
26}
27
28/// One backing image for one memory.
29#[derive(Debug, PartialEq)]
30pub struct MemoryImage {
31    /// The platform-specific source of this image.
32    ///
33    /// This might be a mapped `*.cwasm` file or on Unix it could also be a
34    /// `Memfd` as an anonymous file in memory on Linux. In either case this is
35    /// used as the backing-source for the CoW image.
36    source: MemoryImageSource,
37
38    /// Length of image, in bytes.
39    ///
40    /// Note that initial memory size may be larger; leading and trailing zeroes
41    /// are truncated (handled by backing fd).
42    ///
43    /// Must be a multiple of the system page size.
44    len: HostAlignedByteCount,
45
46    /// Image starts this many bytes into `source`.
47    ///
48    /// This is 0 for anonymous-backed memfd files and is the offset of the
49    /// data section in a `*.cwasm` file for `*.cwasm`-backed images.
50    ///
51    /// Must be a multiple of the system page size.
52    ///
53    /// ## Notes
54    ///
55    /// This currently isn't a `HostAlignedByteCount` because that's a usize and
56    /// this, being a file offset, is a u64.
57    source_offset: u64,
58
59    /// Image starts this many bytes into heap space.
60    ///
61    /// Must be a multiple of the system page size.
62    linear_memory_offset: HostAlignedByteCount,
63}
64
65impl MemoryImage {
66    fn new(
67        page_size: u32,
68        linear_memory_offset: HostAlignedByteCount,
69        data: &[u8],
70        mmap: Option<&MmapVec>,
71    ) -> Result<Option<MemoryImage>> {
72        let assert_page_aligned = |val: usize| {
73            assert_eq!(val % (page_size as usize), 0);
74        };
75        // Sanity-check that various parameters are page-aligned.
76        let len = HostAlignedByteCount::new(data.len()).expect("memory image data is page-aligned");
77
78        // If a backing `mmap` is present then `data` should be a sub-slice of
79        // the `mmap`. The sanity-checks here double-check that. Additionally
80        // compilation should have ensured that the `data` section is
81        // page-aligned within `mmap`, so that's also all double-checked here.
82        //
83        // Finally if the `mmap` itself comes from a backing file on disk, such
84        // as a `*.cwasm` file, then that's a valid source of data for the
85        // memory image so we simply return referencing that.
86        //
87        // Note that this path is platform-agnostic in the sense of all
88        // platforms we support support memory mapping copy-on-write data from
89        // files, but for now this is still a Linux-specific region of Wasmtime.
90        // Some work will be needed to get this file compiling for macOS and
91        // Windows.
92        if let Some(mmap) = mmap {
93            let start = mmap.as_ptr() as usize;
94            let end = start + mmap.len();
95            let data_start = data.as_ptr() as usize;
96            let data_end = data_start + data.len();
97            assert!(start <= data_start && data_end <= end);
98            assert_page_aligned(start);
99            assert_page_aligned(data_start);
100            assert_page_aligned(data_end);
101
102            #[cfg(feature = "std")]
103            if let Some(file) = mmap.original_file() {
104                if let Some(source) = MemoryImageSource::from_file(file) {
105                    return Ok(Some(MemoryImage {
106                        source,
107                        source_offset: u64::try_from(data_start - start).unwrap(),
108                        linear_memory_offset,
109                        len,
110                    }));
111                }
112            }
113        }
114
115        // If `mmap` doesn't come from a file then platform-specific mechanisms
116        // may be used to place the data in a form that's amenable to an mmap.
117        if let Some(source) = MemoryImageSource::from_data(data)? {
118            return Ok(Some(MemoryImage {
119                source,
120                source_offset: 0,
121                linear_memory_offset,
122                len,
123            }));
124        }
125
126        Ok(None)
127    }
128
129    unsafe fn map_at(&self, mmap_base: &MmapOffset) -> Result<()> {
130        mmap_base.map_image_at(
131            &self.source,
132            self.source_offset,
133            self.linear_memory_offset,
134            self.len,
135        )
136    }
137
138    unsafe fn remap_as_zeros_at(&self, base: *mut u8) -> Result<()> {
139        self.source.remap_as_zeros_at(
140            base.add(self.linear_memory_offset.byte_count()),
141            self.len.byte_count(),
142        )?;
143        Ok(())
144    }
145}
146
147impl ModuleMemoryImages {
148    /// Create a new `ModuleMemoryImages` for the given module. This can be
149    /// passed in as part of a `InstanceAllocationRequest` to speed up
150    /// instantiation and execution by using copy-on-write-backed memories.
151    pub fn new(
152        module: &Module,
153        wasm_data: &[u8],
154        mmap: Option<&MmapVec>,
155    ) -> Result<Option<ModuleMemoryImages>> {
156        let map = match &module.memory_initialization {
157            MemoryInitialization::Static { map } => map,
158            _ => return Ok(None),
159        };
160        let mut memories = PrimaryMap::with_capacity(map.len());
161        let page_size = crate::runtime::vm::host_page_size();
162        let page_size = u32::try_from(page_size).unwrap();
163        for (memory_index, init) in map {
164            // mmap-based-initialization only works for defined memories with a
165            // known starting point of all zeros, so bail out if the mmeory is
166            // imported.
167            let defined_memory = match module.defined_memory_index(memory_index) {
168                Some(idx) => idx,
169                None => return Ok(None),
170            };
171
172            // If there's no initialization for this memory known then we don't
173            // need an image for the memory so push `None` and move on.
174            let init = match init {
175                Some(init) => init,
176                None => {
177                    memories.push(None);
178                    continue;
179                }
180            };
181
182            // Get the image for this wasm module  as a subslice of `wasm_data`,
183            // and then use that to try to create the `MemoryImage`. If this
184            // creation files then we fail creating `ModuleMemoryImages` since this
185            // memory couldn't be represented.
186            let data = &wasm_data[init.data.start as usize..init.data.end as usize];
187            if module.memories[memory_index]
188                .minimum_byte_size()
189                .map_or(false, |mem_initial_len| {
190                    init.offset + u64::try_from(data.len()).unwrap() > mem_initial_len
191                })
192            {
193                // The image is rounded up to multiples of the host OS page
194                // size. But if Wasm is using a custom page size, the Wasm page
195                // size might be smaller than the host OS page size, and that
196                // rounding might have made the image larger than the Wasm
197                // memory's initial length. This is *probably* okay, since the
198                // rounding would have just introduced new runs of zeroes in the
199                // image, but out of an abundance of caution we don't generate
200                // CoW images in this scenario.
201                return Ok(None);
202            }
203
204            let offset_usize = match usize::try_from(init.offset) {
205                Ok(offset) => offset,
206                Err(_) => return Ok(None),
207            };
208            let offset = HostAlignedByteCount::new(offset_usize)
209                .expect("memory init offset is a multiple of the host page size");
210            let image = match MemoryImage::new(page_size, offset, data, mmap)? {
211                Some(image) => image,
212                None => return Ok(None),
213            };
214
215            let idx = memories.push(Some(Arc::new(image)));
216            assert_eq!(idx, defined_memory);
217        }
218
219        Ok(Some(ModuleMemoryImages { memories }))
220    }
221}
222
223/// Slot management of a copy-on-write image which can be reused for the pooling
224/// allocator.
225///
226/// This data structure manages a slot of linear memory, primarily in the
227/// pooling allocator, which optionally has a contiguous memory image in the
228/// middle of it. Pictorially this data structure manages a virtual memory
229/// region that looks like:
230///
231/// ```text
232///   +--------------------+-------------------+--------------+--------------+
233///   |   anonymous        |      optional     |   anonymous  |    PROT_NONE |
234///   |     zero           |       memory      |     zero     |     memory   |
235///   |    memory          |       image       |    memory    |              |
236///   +--------------------+-------------------+--------------+--------------+
237///   |                     <------+---------->
238///   |<-----+------------>         \
239///   |      \                   image.len
240///   |       \
241///   |  image.linear_memory_offset
242///   |
243///   \
244///  self.base is this virtual address
245///
246///    <------------------+------------------------------------------------>
247///                        \
248///                      static_size
249///
250///    <------------------+---------------------------------->
251///                        \
252///                      accessible
253/// ```
254///
255/// When a `MemoryImageSlot` is created it's told what the `static_size` and
256/// `accessible` limits are. Initially there is assumed to be no image in linear
257/// memory.
258///
259/// When `MemoryImageSlot::instantiate` is called then the method will perform
260/// a "synchronization" to take the image from its prior state to the new state
261/// for the image specified. The first instantiation for example will mmap the
262/// heap image into place. Upon reuse of a slot nothing happens except possibly
263/// shrinking `self.accessible`. When a new image is used then the old image is
264/// mapped to anonymous zero memory and then the new image is mapped in place.
265///
266/// A `MemoryImageSlot` is either `dirty` or it isn't. When a `MemoryImageSlot`
267/// is dirty then it is assumed that any memory beneath `self.accessible` could
268/// have any value. Instantiation cannot happen into a `dirty` slot, however, so
269/// the `MemoryImageSlot::clear_and_remain_ready` returns this memory back to
270/// its original state to mark `dirty = false`. This is done by resetting all
271/// anonymous memory back to zero and the image itself back to its initial
272/// contents.
273///
274/// On Linux this is achieved with the `madvise(MADV_DONTNEED)` syscall. This
275/// syscall will release the physical pages back to the OS but retain the
276/// original mappings, effectively resetting everything back to its initial
277/// state. Non-linux platforms will replace all memory below `self.accessible`
278/// with a fresh zero'd mmap, meaning that reuse is effectively not supported.
279#[derive(Debug)]
280pub struct MemoryImageSlot {
281    /// The mmap and offset within it that contains the linear memory for this
282    /// slot.
283    base: MmapOffset,
284
285    /// The maximum static memory size which `self.accessible` can grow to.
286    static_size: usize,
287
288    /// An optional image that is currently being used in this linear memory.
289    ///
290    /// This can be `None` in which case memory is originally all zeros. When
291    /// `Some` the image describes where it's located within the image.
292    image: Option<Arc<MemoryImage>>,
293
294    /// The size of the heap that is readable and writable.
295    ///
296    /// Note that this may extend beyond the actual linear memory heap size in
297    /// the case of dynamic memories in use. Memory accesses to memory below
298    /// `self.accessible` may still page fault as pages are lazily brought in
299    /// but the faults will always be resolved by the kernel.
300    ///
301    /// Also note that this is always page-aligned.
302    accessible: HostAlignedByteCount,
303
304    /// Whether this slot may have "dirty" pages (pages written by an
305    /// instantiation). Set by `instantiate()` and cleared by
306    /// `clear_and_remain_ready()`, and used in assertions to ensure
307    /// those methods are called properly.
308    ///
309    /// Invariant: if !dirty, then this memory slot contains a clean
310    /// CoW mapping of `image`, if `Some(..)`, and anonymous-zero
311    /// memory beyond the image up to `static_size`. The addresses
312    /// from offset 0 to `self.accessible` are R+W and set to zero or the
313    /// initial image content, as appropriate. Everything between
314    /// `self.accessible` and `self.static_size` is inaccessible.
315    dirty: bool,
316
317    /// Whether this MemoryImageSlot is responsible for mapping anonymous
318    /// memory (to hold the reservation while overwriting mappings
319    /// specific to this slot) in place when it is dropped. Default
320    /// on, unless the caller knows what they are doing.
321    clear_on_drop: bool,
322}
323
324impl MemoryImageSlot {
325    /// Create a new MemoryImageSlot. Assumes that there is an anonymous
326    /// mmap backing in the given range to start.
327    ///
328    /// The `accessible` parameter describes how much of linear memory is
329    /// already mapped as R/W with all zero-bytes. The `static_size` value is
330    /// the maximum size of this image which `accessible` cannot grow beyond,
331    /// and all memory from `accessible` from `static_size` should be mapped as
332    /// `PROT_NONE` backed by zero-bytes.
333    pub(crate) fn create(
334        base: MmapOffset,
335        accessible: HostAlignedByteCount,
336        static_size: usize,
337    ) -> Self {
338        MemoryImageSlot {
339            base,
340            static_size,
341            accessible,
342            image: None,
343            dirty: false,
344            clear_on_drop: true,
345        }
346    }
347
348    /// Inform the MemoryImageSlot that it should *not* clear the underlying
349    /// address space when dropped. This should be used only when the
350    /// caller will clear or reuse the address space in some other
351    /// way.
352    pub(crate) fn no_clear_on_drop(&mut self) {
353        self.clear_on_drop = false;
354    }
355
356    pub(crate) fn set_heap_limit(&mut self, size_bytes: usize) -> Result<()> {
357        let size_bytes_aligned = HostAlignedByteCount::new_rounded_up(size_bytes)?;
358        assert!(size_bytes <= self.static_size);
359        assert!(size_bytes_aligned.byte_count() <= self.static_size);
360
361        // If the heap limit already addresses accessible bytes then no syscalls
362        // are necessary since the data is already mapped into the process and
363        // waiting to go.
364        //
365        // This is used for "dynamic" memories where memory is not always
366        // decommitted during recycling (but it's still always reset).
367        if size_bytes_aligned <= self.accessible {
368            return Ok(());
369        }
370
371        // Otherwise use `mprotect` to make the new pages read/write.
372        self.set_protection(self.accessible..size_bytes_aligned, true)?;
373        self.accessible = size_bytes_aligned;
374
375        Ok(())
376    }
377
378    /// Prepares this slot for the instantiation of a new instance with the
379    /// provided linear memory image.
380    ///
381    /// The `initial_size_bytes` parameter indicates the required initial size
382    /// of the heap for the instance. The `maybe_image` is an optional initial
383    /// image for linear memory to contains. The `style` is the way compiled
384    /// code will be accessing this memory.
385    ///
386    /// The purpose of this method is to take a previously pristine slot
387    /// (`!self.dirty`) and transform its prior state into state necessary for
388    /// the given parameters. This could include, for example:
389    ///
390    /// * More memory may be made read/write if `initial_size_bytes` is larger
391    ///   than `self.accessible`.
392    /// * For `MemoryStyle::Static` linear memory may be made `PROT_NONE` if
393    ///   `self.accessible` is larger than `initial_size_bytes`.
394    /// * If no image was previously in place or if the wrong image was
395    ///   previously in place then `mmap` may be used to setup the initial
396    ///   image.
397    pub(crate) fn instantiate(
398        &mut self,
399        initial_size_bytes: usize,
400        maybe_image: Option<&Arc<MemoryImage>>,
401        ty: &wasmtime_environ::Memory,
402        tunables: &Tunables,
403    ) -> Result<()> {
404        assert!(!self.dirty);
405        assert!(
406            initial_size_bytes <= self.static_size,
407            "initial_size_bytes <= self.static_size failed: \
408             initial_size_bytes={initial_size_bytes}, self.static_size={}",
409            self.static_size
410        );
411        let initial_size_bytes_page_aligned =
412            HostAlignedByteCount::new_rounded_up(initial_size_bytes)?;
413
414        // First order of business is to blow away the previous linear memory
415        // image if it doesn't match the image specified here. If one is
416        // detected then it's reset with anonymous memory which means that all
417        // of memory up to `self.accessible` will now be read/write and zero.
418        //
419        // Note that this intentionally a "small mmap" which only covers the
420        // extent of the prior initialization image in order to preserve
421        // resident memory that might come before or after the image.
422        if self.image.as_ref() != maybe_image {
423            self.remove_image()?;
424        }
425
426        // The next order of business is to ensure that `self.accessible` is
427        // appropriate. First up is to grow the read/write portion of memory if
428        // it's not large enough to accommodate `initial_size_bytes`.
429        if self.accessible < initial_size_bytes_page_aligned {
430            self.set_protection(self.accessible..initial_size_bytes_page_aligned, true)?;
431            self.accessible = initial_size_bytes_page_aligned;
432        }
433
434        // If (1) the accessible region is not in its initial state, and (2) the
435        // memory relies on virtual memory at all (i.e. has offset guard
436        // pages), then we need to reset memory protections. Put another way,
437        // the only time it is safe to not reset protections is when we are
438        // using dynamic memory without any guard pages.
439        let host_page_size_log2 = u8::try_from(host_page_size().ilog2()).unwrap();
440        if initial_size_bytes_page_aligned < self.accessible
441            && (tunables.memory_guard_size > 0
442                || ty.can_elide_bounds_check(tunables, host_page_size_log2))
443        {
444            self.set_protection(initial_size_bytes_page_aligned..self.accessible, false)?;
445            self.accessible = initial_size_bytes_page_aligned;
446        }
447
448        // Now that memory is sized appropriately the final operation is to
449        // place the new image into linear memory. Note that this operation is
450        // skipped if `self.image` matches `maybe_image`.
451        assert!(initial_size_bytes <= self.accessible.byte_count());
452        assert!(initial_size_bytes_page_aligned <= self.accessible);
453        if self.image.as_ref() != maybe_image {
454            if let Some(image) = maybe_image.as_ref() {
455                assert!(
456                    image
457                        .linear_memory_offset
458                        .checked_add(image.len)
459                        .unwrap()
460                        .byte_count()
461                        <= initial_size_bytes
462                );
463                if !image.len.is_zero() {
464                    unsafe {
465                        image.map_at(&self.base)?;
466                    }
467                }
468            }
469            self.image = maybe_image.cloned();
470        }
471
472        // Flag ourselves as `dirty` which means that the next operation on this
473        // slot is required to be `clear_and_remain_ready`.
474        self.dirty = true;
475
476        Ok(())
477    }
478
479    pub(crate) fn remove_image(&mut self) -> Result<()> {
480        if let Some(image) = &self.image {
481            unsafe {
482                image.remap_as_zeros_at(self.base.as_mut_ptr())?;
483            }
484            self.image = None;
485        }
486        Ok(())
487    }
488
489    /// Resets this linear memory slot back to a "pristine state".
490    ///
491    /// This will reset the memory back to its original contents on Linux or
492    /// reset the contents back to zero on other platforms. The `keep_resident`
493    /// argument is the maximum amount of memory to keep resident in this
494    /// process's memory on Linux. Up to that much memory will be `memset` to
495    /// zero where the rest of it will be reset or released with `madvise`.
496    #[allow(dead_code, reason = "only used in some cfgs")]
497    pub(crate) fn clear_and_remain_ready(
498        &mut self,
499        keep_resident: HostAlignedByteCount,
500        decommit: impl FnMut(*mut u8, usize),
501    ) -> Result<()> {
502        assert!(self.dirty);
503
504        unsafe {
505            self.reset_all_memory_contents(keep_resident, decommit)?;
506        }
507
508        self.dirty = false;
509        Ok(())
510    }
511
512    #[allow(dead_code, reason = "only used in some cfgs")]
513    unsafe fn reset_all_memory_contents(
514        &mut self,
515        keep_resident: HostAlignedByteCount,
516        decommit: impl FnMut(*mut u8, usize),
517    ) -> Result<()> {
518        match vm::decommit_behavior() {
519            DecommitBehavior::Zero => {
520                // If we're not on Linux then there's no generic platform way to
521                // reset memory back to its original state, so instead reset memory
522                // back to entirely zeros with an anonymous backing.
523                //
524                // Additionally the previous image, if any, is dropped here
525                // since it's no longer applicable to this mapping.
526                self.reset_with_anon_memory()
527            }
528            DecommitBehavior::RestoreOriginalMapping => {
529                self.reset_with_original_mapping(keep_resident, decommit);
530                Ok(())
531            }
532        }
533    }
534
535    #[allow(dead_code, reason = "only used in some cfgs")]
536    unsafe fn reset_with_original_mapping(
537        &mut self,
538        keep_resident: HostAlignedByteCount,
539        mut decommit: impl FnMut(*mut u8, usize),
540    ) {
541        match &self.image {
542            Some(image) => {
543                if image.linear_memory_offset < keep_resident {
544                    // If the image starts below the `keep_resident` then
545                    // memory looks something like this:
546                    //
547                    //               up to `keep_resident` bytes
548                    //                          |
549                    //          +--------------------------+  remaining_memset
550                    //          |                          | /
551                    //  <-------------->                <------->
552                    //
553                    //                              image_end
554                    // 0        linear_memory_offset   |             accessible
555                    // |                |              |                  |
556                    // +----------------+--------------+---------+--------+
557                    // |  dirty memory  |    image     |   dirty memory   |
558                    // +----------------+--------------+---------+--------+
559                    //
560                    //  <------+-------> <-----+----->  <---+---> <--+--->
561                    //         |               |            |        |
562                    //         |               |            |        |
563                    //   memset (1)            /            |   madvise (4)
564                    //                  mmadvise (2)       /
565                    //                                    /
566                    //                              memset (3)
567                    //
568                    //
569                    // In this situation there are two disjoint regions that are
570                    // `memset` manually to zero. Note that `memset (3)` may be
571                    // zero bytes large. Furthermore `madvise (4)` may also be
572                    // zero bytes large.
573
574                    let image_end = image
575                        .linear_memory_offset
576                        .checked_add(image.len)
577                        .expect("image is in bounds");
578                    let mem_after_image = self
579                        .accessible
580                        .checked_sub(image_end)
581                        .expect("image_end falls before self.accessible");
582                    let excess = keep_resident
583                        .checked_sub(image.linear_memory_offset)
584                        .expect(
585                            "if statement checks that keep_resident > image.linear_memory_offset",
586                        );
587                    let remaining_memset = excess.min(mem_after_image);
588
589                    // This is memset (1)
590                    ptr::write_bytes(
591                        self.base.as_mut_ptr(),
592                        0u8,
593                        image.linear_memory_offset.byte_count(),
594                    );
595
596                    // This is madvise (2)
597                    self.restore_original_mapping(
598                        image.linear_memory_offset,
599                        image.len,
600                        &mut decommit,
601                    );
602
603                    // This is memset (3)
604                    ptr::write_bytes(
605                        self.base.as_mut_ptr().add(image_end.byte_count()),
606                        0u8,
607                        remaining_memset.byte_count(),
608                    );
609
610                    // This is madvise (4)
611                    self.restore_original_mapping(
612                        image_end
613                            .checked_add(remaining_memset)
614                            .expect("image_end + remaining_memset is in bounds"),
615                        mem_after_image
616                            .checked_sub(remaining_memset)
617                            .expect("remaining_memset defined to be <= mem_after_image"),
618                        &mut decommit,
619                    );
620                } else {
621                    // If the image starts after the `keep_resident` threshold
622                    // then we memset the start of linear memory and then use
623                    // madvise below for the rest of it, including the image.
624                    //
625                    // 0             keep_resident                   accessible
626                    // |                |                                 |
627                    // +----------------+---+----------+------------------+
628                    // |  dirty memory      |  image   |   dirty memory   |
629                    // +----------------+---+----------+------------------+
630                    //
631                    //  <------+-------> <-------------+----------------->
632                    //         |                       |
633                    //         |                       |
634                    //   memset (1)                 madvise (2)
635                    //
636                    // Here only a single memset is necessary since the image
637                    // started after the threshold which we're keeping resident.
638                    // Note that the memset may be zero bytes here.
639
640                    // This is memset (1)
641                    ptr::write_bytes(self.base.as_mut_ptr(), 0u8, keep_resident.byte_count());
642
643                    // This is madvise (2)
644                    self.restore_original_mapping(
645                        keep_resident,
646                        self.accessible
647                            .checked_sub(keep_resident)
648                            .expect("keep_resident is a subset of accessible memory"),
649                        decommit,
650                    );
651                };
652            }
653
654            // If there's no memory image for this slot then memset the first
655            // bytes in the memory back to zero while using `madvise` to purge
656            // the rest.
657            None => {
658                let size_to_memset = keep_resident.min(self.accessible);
659                ptr::write_bytes(self.base.as_mut_ptr(), 0u8, size_to_memset.byte_count());
660                self.restore_original_mapping(
661                    size_to_memset,
662                    self.accessible
663                        .checked_sub(size_to_memset)
664                        .expect("size_to_memset is defined to be <= self.accessible"),
665                    decommit,
666                );
667            }
668        }
669    }
670
671    #[allow(dead_code, reason = "only used in some cfgs")]
672    unsafe fn restore_original_mapping(
673        &self,
674        base: HostAlignedByteCount,
675        len: HostAlignedByteCount,
676        mut decommit: impl FnMut(*mut u8, usize),
677    ) {
678        assert!(base.checked_add(len).unwrap() <= self.accessible);
679        if len == 0 {
680            return;
681        }
682
683        assert_eq!(
684            vm::decommit_behavior(),
685            DecommitBehavior::RestoreOriginalMapping
686        );
687        decommit(
688            self.base.as_mut_ptr().add(base.byte_count()),
689            len.byte_count(),
690        );
691    }
692
693    fn set_protection(&self, range: Range<HostAlignedByteCount>, readwrite: bool) -> Result<()> {
694        let len = range
695            .end
696            .checked_sub(range.start)
697            .expect("range.start <= range.end");
698        assert!(range.end.byte_count() <= self.static_size);
699        if len.is_zero() {
700            return Ok(());
701        }
702
703        // TODO: use Mmap to change memory permissions instead of these free
704        // functions.
705        unsafe {
706            let start = self.base.as_mut_ptr().add(range.start.byte_count());
707            if readwrite {
708                vm::expose_existing_mapping(start, len.byte_count())?;
709            } else {
710                vm::hide_existing_mapping(start, len.byte_count())?;
711            }
712        }
713
714        Ok(())
715    }
716
717    pub(crate) fn has_image(&self) -> bool {
718        self.image.is_some()
719    }
720
721    #[allow(dead_code, reason = "only used in some cfgs")]
722    pub(crate) fn is_dirty(&self) -> bool {
723        self.dirty
724    }
725
726    /// Map anonymous zeroed memory across the whole slot,
727    /// inaccessible. Used both during instantiate and during drop.
728    fn reset_with_anon_memory(&mut self) -> Result<()> {
729        if self.static_size == 0 {
730            assert!(self.image.is_none());
731            assert_eq!(self.accessible, 0);
732            return Ok(());
733        }
734
735        unsafe {
736            vm::erase_existing_mapping(self.base.as_mut_ptr(), self.static_size)?;
737        }
738
739        self.image = None;
740        self.accessible = HostAlignedByteCount::ZERO;
741
742        Ok(())
743    }
744}
745
746impl Drop for MemoryImageSlot {
747    fn drop(&mut self) {
748        // The MemoryImageSlot may be dropped if there is an error during
749        // instantiation: for example, if a memory-growth limiter
750        // disallows a guest from having a memory of a certain size,
751        // after we've already initialized the MemoryImageSlot.
752        //
753        // We need to return this region of the large pool mmap to a
754        // safe state (with no module-specific mappings). The
755        // MemoryImageSlot will not be returned to the MemoryPool, so a new
756        // MemoryImageSlot will be created and overwrite the mappings anyway
757        // on the slot's next use; but for safety and to avoid
758        // resource leaks it's better not to have stale mappings to a
759        // possibly-otherwise-dead module's image.
760        //
761        // To "wipe the slate clean", let's do a mmap of anonymous
762        // memory over the whole region, with PROT_NONE. Note that we
763        // *can't* simply munmap, because that leaves a hole in the
764        // middle of the pooling allocator's big memory area that some
765        // other random mmap may swoop in and take, to be trampled
766        // over by the next MemoryImageSlot later.
767        //
768        // Since we're in drop(), we can't sanely return an error if
769        // this mmap fails. Instead though the result is unwrapped here to
770        // trigger a panic if something goes wrong. Otherwise if this
771        // reset-the-mapping fails then on reuse it might be possible, depending
772        // on precisely where errors happened, that stale memory could get
773        // leaked through.
774        //
775        // The exception to all of this is if the `clear_on_drop` flag
776        // (which is set by default) is false. If so, the owner of
777        // this MemoryImageSlot has indicated that it will clean up in some
778        // other way.
779        if self.clear_on_drop {
780            self.reset_with_anon_memory().unwrap();
781        }
782    }
783}
784
785#[cfg(all(test, target_os = "linux", not(miri)))]
786mod test {
787    use super::*;
788    use crate::runtime::vm::mmap::{AlignedLength, Mmap};
789    use crate::runtime::vm::sys::vm::decommit_pages;
790    use crate::runtime::vm::{HostAlignedByteCount, host_page_size};
791    use std::sync::Arc;
792    use wasmtime_environ::{IndexType, Limits, Memory};
793
794    fn create_memfd_with_data(offset: usize, data: &[u8]) -> Result<MemoryImage> {
795        // offset must be a multiple of the page size.
796        let linear_memory_offset =
797            HostAlignedByteCount::new(offset).expect("offset is page-aligned");
798        // The image length is rounded up to the nearest page size
799        let image_len = HostAlignedByteCount::new_rounded_up(data.len()).unwrap();
800
801        Ok(MemoryImage {
802            source: MemoryImageSource::from_data(data)?.unwrap(),
803            len: image_len,
804            source_offset: 0,
805            linear_memory_offset,
806        })
807    }
808
809    fn dummy_memory() -> Memory {
810        Memory {
811            idx_type: IndexType::I32,
812            limits: Limits { min: 0, max: None },
813            shared: false,
814            page_size_log2: Memory::DEFAULT_PAGE_SIZE_LOG2,
815        }
816    }
817
818    fn mmap_4mib_inaccessible() -> Arc<Mmap<AlignedLength>> {
819        let four_mib = HostAlignedByteCount::new(4 << 20).expect("4 MiB is page aligned");
820        Arc::new(Mmap::accessible_reserved(HostAlignedByteCount::ZERO, four_mib).unwrap())
821    }
822
823    /// Presents a part of an mmap as a mutable slice within a callback.
824    ///
825    /// The callback ensures that the reference no longer lives after the
826    /// function is done.
827    ///
828    /// # Safety
829    ///
830    /// The caller must ensure that during this function call, the only way this
831    /// region of memory is not accessed by (read from or written to) is via the
832    /// reference. Making the callback `'static` goes some way towards ensuring
833    /// that, but it's still possible to squirrel away a reference into global
834    /// state. So don't do that.
835    unsafe fn with_slice_mut(
836        mmap: &Arc<Mmap<AlignedLength>>,
837        range: Range<usize>,
838        f: impl FnOnce(&mut [u8]) + 'static,
839    ) {
840        let ptr = mmap.as_ptr().cast_mut();
841        let slice = unsafe {
842            core::slice::from_raw_parts_mut(ptr.add(range.start), range.end - range.start)
843        };
844        f(slice);
845    }
846
847    #[test]
848    fn instantiate_no_image() {
849        let ty = dummy_memory();
850        let tunables = Tunables {
851            memory_reservation: 4 << 30,
852            ..Tunables::default_miri()
853        };
854        // 4 MiB mmap'd area, not accessible
855        let mmap = mmap_4mib_inaccessible();
856        // Create a MemoryImageSlot on top of it
857        let mut memfd =
858            MemoryImageSlot::create(mmap.zero_offset(), HostAlignedByteCount::ZERO, 4 << 20);
859        memfd.no_clear_on_drop();
860        assert!(!memfd.is_dirty());
861        // instantiate with 64 KiB initial size
862        memfd.instantiate(64 << 10, None, &ty, &tunables).unwrap();
863        assert!(memfd.is_dirty());
864
865        // We should be able to access this 64 KiB (try both ends) and
866        // it should consist of zeroes.
867        unsafe {
868            with_slice_mut(&mmap, 0..65536, |slice| {
869                assert_eq!(0, slice[0]);
870                assert_eq!(0, slice[65535]);
871                slice[1024] = 42;
872                assert_eq!(42, slice[1024]);
873            });
874        }
875
876        // grow the heap
877        memfd.set_heap_limit(128 << 10).unwrap();
878        let slice = unsafe { mmap.slice(0..1 << 20) };
879        assert_eq!(42, slice[1024]);
880        assert_eq!(0, slice[131071]);
881        // instantiate again; we should see zeroes, even as the
882        // reuse-anon-mmap-opt kicks in
883        memfd
884            .clear_and_remain_ready(HostAlignedByteCount::ZERO, |ptr, len| unsafe {
885                decommit_pages(ptr, len).unwrap()
886            })
887            .unwrap();
888        assert!(!memfd.is_dirty());
889        memfd.instantiate(64 << 10, None, &ty, &tunables).unwrap();
890        let slice = unsafe { mmap.slice(0..65536) };
891        assert_eq!(0, slice[1024]);
892    }
893
894    #[test]
895    fn instantiate_image() {
896        let page_size = host_page_size();
897        let ty = dummy_memory();
898        let tunables = Tunables {
899            memory_reservation: 4 << 30,
900            ..Tunables::default_miri()
901        };
902        // 4 MiB mmap'd area, not accessible
903        let mmap = mmap_4mib_inaccessible();
904        // Create a MemoryImageSlot on top of it
905        let mut memfd =
906            MemoryImageSlot::create(mmap.zero_offset(), HostAlignedByteCount::ZERO, 4 << 20);
907        memfd.no_clear_on_drop();
908        // Create an image with some data.
909        let image = Arc::new(create_memfd_with_data(page_size, &[1, 2, 3, 4]).unwrap());
910        // Instantiate with this image
911        memfd
912            .instantiate(64 << 10, Some(&image), &ty, &tunables)
913            .unwrap();
914        assert!(memfd.has_image());
915
916        unsafe {
917            with_slice_mut(&mmap, 0..65536, move |slice| {
918                assert_eq!(&[1, 2, 3, 4], &slice[page_size..][..4]);
919                slice[page_size] = 5;
920            });
921        }
922
923        // Clear and re-instantiate same image
924        memfd
925            .clear_and_remain_ready(HostAlignedByteCount::ZERO, |ptr, len| unsafe {
926                decommit_pages(ptr, len).unwrap()
927            })
928            .unwrap();
929        memfd
930            .instantiate(64 << 10, Some(&image), &ty, &tunables)
931            .unwrap();
932        let slice = unsafe { mmap.slice(0..65536) };
933        assert_eq!(&[1, 2, 3, 4], &slice[page_size..][..4]);
934
935        // Clear and re-instantiate no image
936        memfd
937            .clear_and_remain_ready(HostAlignedByteCount::ZERO, |ptr, len| unsafe {
938                decommit_pages(ptr, len).unwrap()
939            })
940            .unwrap();
941        memfd.instantiate(64 << 10, None, &ty, &tunables).unwrap();
942        assert!(!memfd.has_image());
943        let slice = unsafe { mmap.slice(0..65536) };
944        assert_eq!(&[0, 0, 0, 0], &slice[page_size..][..4]);
945
946        // Clear and re-instantiate image again
947        memfd
948            .clear_and_remain_ready(HostAlignedByteCount::ZERO, |ptr, len| unsafe {
949                decommit_pages(ptr, len).unwrap()
950            })
951            .unwrap();
952        memfd
953            .instantiate(64 << 10, Some(&image), &ty, &tunables)
954            .unwrap();
955        let slice = unsafe { mmap.slice(0..65536) };
956        assert_eq!(&[1, 2, 3, 4], &slice[page_size..][..4]);
957
958        // Create another image with different data.
959        let image2 = Arc::new(create_memfd_with_data(page_size, &[10, 11, 12, 13]).unwrap());
960        memfd
961            .clear_and_remain_ready(HostAlignedByteCount::ZERO, |ptr, len| unsafe {
962                decommit_pages(ptr, len).unwrap()
963            })
964            .unwrap();
965        memfd
966            .instantiate(128 << 10, Some(&image2), &ty, &tunables)
967            .unwrap();
968        let slice = unsafe { mmap.slice(0..65536) };
969        assert_eq!(&[10, 11, 12, 13], &slice[page_size..][..4]);
970
971        // Instantiate the original image again; we should notice it's
972        // a different image and not reuse the mappings.
973        memfd
974            .clear_and_remain_ready(HostAlignedByteCount::ZERO, |ptr, len| unsafe {
975                decommit_pages(ptr, len).unwrap()
976            })
977            .unwrap();
978        memfd
979            .instantiate(64 << 10, Some(&image), &ty, &tunables)
980            .unwrap();
981        let slice = unsafe { mmap.slice(0..65536) };
982        assert_eq!(&[1, 2, 3, 4], &slice[page_size..][..4]);
983    }
984
985    #[test]
986    #[cfg(target_os = "linux")]
987    fn memset_instead_of_madvise() {
988        let page_size = host_page_size();
989        let ty = dummy_memory();
990        let tunables = Tunables {
991            memory_reservation: 100 << 16,
992            ..Tunables::default_miri()
993        };
994        let mmap = mmap_4mib_inaccessible();
995        let mut memfd =
996            MemoryImageSlot::create(mmap.zero_offset(), HostAlignedByteCount::ZERO, 4 << 20);
997        memfd.no_clear_on_drop();
998
999        // Test basics with the image
1000        for image_off in [0, page_size, page_size * 2] {
1001            let image = Arc::new(create_memfd_with_data(image_off, &[1, 2, 3, 4]).unwrap());
1002            for amt_to_memset in [0, page_size, page_size * 10, 1 << 20, 10 << 20] {
1003                let amt_to_memset = HostAlignedByteCount::new(amt_to_memset).unwrap();
1004                memfd
1005                    .instantiate(64 << 10, Some(&image), &ty, &tunables)
1006                    .unwrap();
1007                assert!(memfd.has_image());
1008
1009                unsafe {
1010                    with_slice_mut(&mmap, 0..64 << 10, move |slice| {
1011                        if image_off > 0 {
1012                            assert_eq!(slice[image_off - 1], 0);
1013                        }
1014                        assert_eq!(slice[image_off + 5], 0);
1015                        assert_eq!(&[1, 2, 3, 4], &slice[image_off..][..4]);
1016                        slice[image_off] = 5;
1017                        assert_eq!(&[5, 2, 3, 4], &slice[image_off..][..4]);
1018                    })
1019                };
1020
1021                memfd
1022                    .clear_and_remain_ready(amt_to_memset, |ptr, len| unsafe {
1023                        decommit_pages(ptr, len).unwrap()
1024                    })
1025                    .unwrap();
1026            }
1027        }
1028
1029        // Test without an image
1030        for amt_to_memset in [0, page_size, page_size * 10, 1 << 20, 10 << 20] {
1031            let amt_to_memset = HostAlignedByteCount::new(amt_to_memset).unwrap();
1032            memfd.instantiate(64 << 10, None, &ty, &tunables).unwrap();
1033
1034            unsafe {
1035                with_slice_mut(&mmap, 0..64 << 10, |slice| {
1036                    for chunk in slice.chunks_mut(1024) {
1037                        assert_eq!(chunk[0], 0);
1038                        chunk[0] = 5;
1039                    }
1040                });
1041            }
1042            memfd
1043                .clear_and_remain_ready(amt_to_memset, |ptr, len| unsafe {
1044                    decommit_pages(ptr, len).unwrap()
1045                })
1046                .unwrap();
1047        }
1048    }
1049
1050    #[test]
1051    #[cfg(target_os = "linux")]
1052    fn dynamic() {
1053        let page_size = host_page_size();
1054        let ty = dummy_memory();
1055        let tunables = Tunables {
1056            memory_reservation: 0,
1057            memory_reservation_for_growth: 200,
1058            ..Tunables::default_miri()
1059        };
1060
1061        let mmap = mmap_4mib_inaccessible();
1062        let mut memfd =
1063            MemoryImageSlot::create(mmap.zero_offset(), HostAlignedByteCount::ZERO, 4 << 20);
1064        memfd.no_clear_on_drop();
1065        let image = Arc::new(create_memfd_with_data(page_size, &[1, 2, 3, 4]).unwrap());
1066        let initial = 64 << 10;
1067
1068        // Instantiate the image and test that memory remains accessible after
1069        // it's cleared.
1070        memfd
1071            .instantiate(initial, Some(&image), &ty, &tunables)
1072            .unwrap();
1073        assert!(memfd.has_image());
1074
1075        unsafe {
1076            with_slice_mut(&mmap, 0..(64 << 10) + page_size, move |slice| {
1077                assert_eq!(&[1, 2, 3, 4], &slice[page_size..][..4]);
1078                slice[page_size] = 5;
1079                assert_eq!(&[5, 2, 3, 4], &slice[page_size..][..4]);
1080            });
1081        }
1082
1083        memfd
1084            .clear_and_remain_ready(HostAlignedByteCount::ZERO, |ptr, len| unsafe {
1085                decommit_pages(ptr, len).unwrap()
1086            })
1087            .unwrap();
1088        let slice = unsafe { mmap.slice(0..(64 << 10) + page_size) };
1089        assert_eq!(&[1, 2, 3, 4], &slice[page_size..][..4]);
1090
1091        // Re-instantiate make sure it preserves memory. Grow a bit and set data
1092        // beyond the initial size.
1093        memfd
1094            .instantiate(initial, Some(&image), &ty, &tunables)
1095            .unwrap();
1096        assert_eq!(&[1, 2, 3, 4], &slice[page_size..][..4]);
1097
1098        memfd.set_heap_limit(initial * 2).unwrap();
1099
1100        unsafe {
1101            with_slice_mut(&mmap, 0..(64 << 10) + page_size, move |slice| {
1102                assert_eq!(&[0, 0], &slice[initial..initial + 2]);
1103                slice[initial] = 100;
1104                assert_eq!(&[100, 0], &slice[initial..initial + 2]);
1105            });
1106        }
1107
1108        memfd
1109            .clear_and_remain_ready(HostAlignedByteCount::ZERO, |ptr, len| unsafe {
1110                decommit_pages(ptr, len).unwrap()
1111            })
1112            .unwrap();
1113
1114        // Test that memory is still accessible, but it's been reset
1115        assert_eq!(&[0, 0], &slice[initial..initial + 2]);
1116
1117        // Instantiate again, and again memory beyond the initial size should
1118        // still be accessible. Grow into it again and make sure it works.
1119        memfd
1120            .instantiate(initial, Some(&image), &ty, &tunables)
1121            .unwrap();
1122        assert_eq!(&[0, 0], &slice[initial..initial + 2]);
1123        memfd.set_heap_limit(initial * 2).unwrap();
1124
1125        unsafe {
1126            with_slice_mut(&mmap, 0..(64 << 10) + page_size, move |slice| {
1127                assert_eq!(&[0, 0], &slice[initial..initial + 2]);
1128                slice[initial] = 100;
1129                assert_eq!(&[100, 0], &slice[initial..initial + 2]);
1130            });
1131        }
1132
1133        memfd
1134            .clear_and_remain_ready(HostAlignedByteCount::ZERO, |ptr, len| unsafe {
1135                decommit_pages(ptr, len).unwrap()
1136            })
1137            .unwrap();
1138
1139        // Reset the image to none and double-check everything is back to zero
1140        memfd.instantiate(64 << 10, None, &ty, &tunables).unwrap();
1141        assert!(!memfd.has_image());
1142        assert_eq!(&[0, 0, 0, 0], &slice[page_size..][..4]);
1143        assert_eq!(&[0, 0], &slice[initial..initial + 2]);
1144    }
1145}