Skip to main content

wasmtime/runtime/vm/
cow.rs

1//! Copy-on-write initialization support: creation of backing images for
2//! modules, and logic to support mapping these backing images into memory.
3
4use super::sys::DecommitBehavior;
5use crate::Engine;
6use crate::prelude::*;
7use crate::runtime::vm::sys::vm::{self, MemoryImageSource, PageMap, reset_with_pagemap};
8use crate::runtime::vm::{
9    HostAlignedByteCount, MmapOffset, ModuleMemoryImageSource, host_page_size,
10};
11use alloc::sync::Arc;
12use core::fmt;
13use core::ops::Range;
14use wasmtime_environ::prelude::TryPrimaryMap;
15use wasmtime_environ::{DefinedMemoryIndex, MemoryInitialization, Module, Tunables};
16
17/// Backing images for memories in a module.
18///
19/// This is meant to be built once, when a module is first loaded/constructed,
20/// and then used many times for instantiation.
21pub struct ModuleMemoryImages {
22    memories: TryPrimaryMap<DefinedMemoryIndex, Option<Arc<MemoryImage>>>,
23}
24
25impl ModuleMemoryImages {
26    /// Get the MemoryImage for a given memory.
27    pub fn get_memory_image(&self, defined_index: DefinedMemoryIndex) -> Option<&Arc<MemoryImage>> {
28        self.memories[defined_index].as_ref()
29    }
30}
31
32/// One backing image for one memory.
33pub struct MemoryImage {
34    /// The platform-specific source of this image.
35    ///
36    /// This might be a mapped `*.cwasm` file or on Unix it could also be a
37    /// `Memfd` as an anonymous file in memory on Linux. In either case this is
38    /// used as the backing-source for the CoW image.
39    source: MemoryImageSource,
40
41    /// Length of image, in bytes.
42    ///
43    /// Note that initial memory size may be larger; leading and trailing zeroes
44    /// are truncated (handled by backing fd).
45    ///
46    /// Must be a multiple of the system page size.
47    len: HostAlignedByteCount,
48
49    /// Image starts this many bytes into `source`.
50    ///
51    /// This is 0 for anonymous-backed memfd files and is the offset of the
52    /// data section in a `*.cwasm` file for `*.cwasm`-backed images.
53    ///
54    /// Must be a multiple of the system page size.
55    ///
56    /// ## Notes
57    ///
58    /// This currently isn't a `HostAlignedByteCount` because that's a usize and
59    /// this, being a file offset, is a u64.
60    source_offset: u64,
61
62    /// Image starts this many bytes into heap space.
63    ///
64    /// Must be a multiple of the system page size.
65    linear_memory_offset: HostAlignedByteCount,
66
67    /// The original source of data that this image is derived from.
68    module_source: Arc<dyn ModuleMemoryImageSource>,
69
70    /// The offset, within `module_source.wasm_data()`, that this image starts
71    /// at.
72    module_source_offset: usize,
73}
74
75impl MemoryImage {
76    fn new(
77        engine: &Engine,
78        page_size: u32,
79        linear_memory_offset: HostAlignedByteCount,
80        module_source: &Arc<impl ModuleMemoryImageSource>,
81        data_range: Range<usize>,
82    ) -> Result<Option<MemoryImage>> {
83        let assert_page_aligned = |val: usize| {
84            assert_eq!(val % (page_size as usize), 0);
85        };
86        // Sanity-check that various parameters are page-aligned.
87        let len =
88            HostAlignedByteCount::new(data_range.len()).expect("memory image data is page-aligned");
89
90        // If a backing `mmap` is present then `data` should be a sub-slice of
91        // the `mmap`. The sanity-checks here double-check that. Additionally
92        // compilation should have ensured that the `data` section is
93        // page-aligned within `mmap`, so that's also all double-checked here.
94        //
95        // Finally if the `mmap` itself comes from a backing file on disk, such
96        // as a `*.cwasm` file, then that's a valid source of data for the
97        // memory image so we simply return referencing that.
98        //
99        // Note that this path is platform-agnostic in the sense of all
100        // platforms we support support memory mapping copy-on-write data from
101        // files, but for now this is still a Linux-specific region of Wasmtime.
102        // Some work will be needed to get this file compiling for macOS and
103        // Windows.
104        let data = &module_source.wasm_data()[data_range.clone()];
105        if !engine.config().force_memory_init_memfd {
106            if let Some(mmap) = module_source.mmap() {
107                let start = mmap.as_ptr() as usize;
108                let end = start + mmap.len();
109                let data_start = data.as_ptr() as usize;
110                let data_end = data_start + data.len();
111                assert!(start <= data_start && data_end <= end);
112                assert_page_aligned(start);
113                assert_page_aligned(data_start);
114                assert_page_aligned(data_end);
115
116                #[cfg(feature = "std")]
117                if let Some(file) = mmap.original_file() {
118                    if let Some(source) = MemoryImageSource::from_file(file) {
119                        return Ok(Some(MemoryImage {
120                            source,
121                            source_offset: u64::try_from(data_start - start).unwrap(),
122                            linear_memory_offset,
123                            len,
124                            module_source: module_source.clone(),
125                            module_source_offset: data_range.start,
126                        }));
127                    }
128                }
129            }
130        }
131
132        // If `mmap` doesn't come from a file then platform-specific mechanisms
133        // may be used to place the data in a form that's amenable to an mmap.
134        if let Some(source) = MemoryImageSource::from_data(data)? {
135            return Ok(Some(MemoryImage {
136                source,
137                source_offset: 0,
138                linear_memory_offset,
139                len,
140                module_source: module_source.clone(),
141                module_source_offset: data_range.start,
142            }));
143        }
144
145        Ok(None)
146    }
147
148    unsafe fn map_at(&self, mmap_base: &MmapOffset) -> Result<()> {
149        unsafe {
150            mmap_base.map_image_at(
151                &self.source,
152                self.source_offset,
153                self.linear_memory_offset,
154                self.len,
155            )
156        }
157    }
158
159    unsafe fn remap_as_zeros_at(&self, base: *mut u8) -> Result<()> {
160        unsafe {
161            self.source.remap_as_zeros_at(
162                base.add(self.linear_memory_offset.byte_count()),
163                self.len.byte_count(),
164            )?;
165        }
166        Ok(())
167    }
168}
169
170impl ModuleMemoryImages {
171    /// Create a new `ModuleMemoryImages` for the given module. This can be
172    /// passed in as part of a `InstanceAllocationRequest` to speed up
173    /// instantiation and execution by using copy-on-write-backed memories.
174    pub fn new(
175        engine: &Engine,
176        module: &Module,
177        source: &Arc<impl ModuleMemoryImageSource>,
178    ) -> Result<Option<ModuleMemoryImages>> {
179        let map = match &module.memory_initialization {
180            MemoryInitialization::Static { map } => map,
181            _ => return Ok(None),
182        };
183        let mut memories = TryPrimaryMap::with_capacity(map.len())?;
184        let page_size = crate::runtime::vm::host_page_size();
185        let page_size = u32::try_from(page_size).unwrap();
186        for (memory_index, init) in map {
187            // mmap-based-initialization only works for defined memories with a
188            // known starting point of all zeros, so bail out if the mmeory is
189            // imported.
190            let defined_memory = match module.defined_memory_index(memory_index) {
191                Some(idx) => idx,
192                None => return Ok(None),
193            };
194
195            // If there's no initialization for this memory known then we don't
196            // need an image for the memory so push `None` and move on.
197            let init = match init {
198                Some(init) => init,
199                None => {
200                    memories.push(None)?;
201                    continue;
202                }
203            };
204
205            let data_range = init.data.start as usize..init.data.end as usize;
206            if module.memories[memory_index]
207                .minimum_byte_size()
208                .map_or(false, |mem_initial_len| {
209                    init.offset + u64::try_from(data_range.len()).unwrap() > mem_initial_len
210                })
211            {
212                // The image is rounded up to multiples of the host OS page
213                // size. But if Wasm is using a custom page size, the Wasm page
214                // size might be smaller than the host OS page size, and that
215                // rounding might have made the image larger than the Wasm
216                // memory's initial length. This is *probably* okay, since the
217                // rounding would have just introduced new runs of zeroes in the
218                // image, but out of an abundance of caution we don't generate
219                // CoW images in this scenario.
220                return Ok(None);
221            }
222
223            let offset_usize = match usize::try_from(init.offset) {
224                Ok(offset) => offset,
225                Err(_) => return Ok(None),
226            };
227            let offset = HostAlignedByteCount::new(offset_usize)
228                .expect("memory init offset is a multiple of the host page size");
229
230            // If this creation fails then we fail creating
231            // `ModuleMemoryImages` since this memory couldn't be represented.
232            let image = match MemoryImage::new(engine, page_size, offset, source, data_range)? {
233                Some(image) => image,
234                None => return Ok(None),
235            };
236
237            let idx = memories.push(Some(try_new::<Arc<_>>(image)?))?;
238            assert_eq!(idx, defined_memory);
239        }
240
241        Ok(Some(ModuleMemoryImages { memories }))
242    }
243}
244
245/// Slot management of a copy-on-write image which can be reused for the pooling
246/// allocator.
247///
248/// This data structure manages a slot of linear memory, primarily in the
249/// pooling allocator, which optionally has a contiguous memory image in the
250/// middle of it. Pictorially this data structure manages a virtual memory
251/// region that looks like:
252///
253/// ```text
254///   +--------------------+-------------------+--------------+--------------+
255///   |   anonymous        |      optional     |   anonymous  |    PROT_NONE |
256///   |     zero           |       memory      |     zero     |     memory   |
257///   |    memory          |       image       |    memory    |              |
258///   +--------------------+-------------------+--------------+--------------+
259///   |                     <------+---------->
260///   |<-----+------------>         \
261///   |      \                   image.len
262///   |       \
263///   |  image.linear_memory_offset
264///   |
265///   \
266///  self.base is this virtual address
267///
268///    <------------------+------------------------------------------------>
269///                        \
270///                      static_size
271///
272///    <------------------+---------------------------------->
273///                        \
274///                      accessible
275/// ```
276///
277/// When a `MemoryImageSlot` is created it's told what the `static_size` and
278/// `accessible` limits are. Initially there is assumed to be no image in linear
279/// memory.
280///
281/// When `MemoryImageSlot::instantiate` is called then the method will perform
282/// a "synchronization" to take the image from its prior state to the new state
283/// for the image specified. The first instantiation for example will mmap the
284/// heap image into place. Upon reuse of a slot nothing happens except possibly
285/// shrinking `self.accessible`. When a new image is used then the old image is
286/// mapped to anonymous zero memory and then the new image is mapped in place.
287///
288/// A `MemoryImageSlot` is either `dirty` or it isn't. When a `MemoryImageSlot`
289/// is dirty then it is assumed that any memory beneath `self.accessible` could
290/// have any value. Instantiation cannot happen into a `dirty` slot, however, so
291/// the `MemoryImageSlot::clear_and_remain_ready` returns this memory back to
292/// its original state to mark `dirty = false`. This is done by resetting all
293/// anonymous memory back to zero and the image itself back to its initial
294/// contents.
295///
296/// On Linux this is achieved with the `madvise(MADV_DONTNEED)` syscall. This
297/// syscall will release the physical pages back to the OS but retain the
298/// original mappings, effectively resetting everything back to its initial
299/// state. Non-linux platforms will replace all memory below `self.accessible`
300/// with a fresh zero'd mmap, meaning that reuse is effectively not supported.
301pub struct MemoryImageSlot {
302    /// The mmap and offset within it that contains the linear memory for this
303    /// slot.
304    base: MmapOffset,
305
306    /// The maximum static memory size which `self.accessible` can grow to.
307    static_size: usize,
308
309    /// An optional image that is currently being used in this linear memory.
310    ///
311    /// This can be `None` in which case memory is originally all zeros. When
312    /// `Some` the image describes where it's located within the image.
313    image: Option<Arc<MemoryImage>>,
314
315    /// The size of the heap that is readable and writable.
316    ///
317    /// Note that this may extend beyond the actual linear memory heap size in
318    /// the case of dynamic memories in use. Memory accesses to memory below
319    /// `self.accessible` may still page fault as pages are lazily brought in
320    /// but the faults will always be resolved by the kernel.
321    ///
322    /// Also note that this is always page-aligned.
323    accessible: HostAlignedByteCount,
324
325    /// Whether this slot may have "dirty" pages (pages written by an
326    /// instantiation). Set by `instantiate()` and cleared by
327    /// `clear_and_remain_ready()`, and used in assertions to ensure
328    /// those methods are called properly.
329    ///
330    /// Invariant: if !dirty, then this memory slot contains a clean
331    /// CoW mapping of `image`, if `Some(..)`, and anonymous-zero
332    /// memory beyond the image up to `static_size`. The addresses
333    /// from offset 0 to `self.accessible` are R+W and set to zero or the
334    /// initial image content, as appropriate. Everything between
335    /// `self.accessible` and `self.static_size` is inaccessible.
336    dirty: bool,
337}
338
339impl fmt::Debug for MemoryImageSlot {
340    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
341        f.debug_struct("MemoryImageSlot")
342            .field("base", &self.base)
343            .field("static_size", &self.static_size)
344            .field("accessible", &self.accessible)
345            .field("dirty", &self.dirty)
346            .finish_non_exhaustive()
347    }
348}
349
350impl MemoryImageSlot {
351    /// Create a new MemoryImageSlot. Assumes that there is an anonymous
352    /// mmap backing in the given range to start.
353    ///
354    /// The `accessible` parameter describes how much of linear memory is
355    /// already mapped as R/W with all zero-bytes. The `static_size` value is
356    /// the maximum size of this image which `accessible` cannot grow beyond,
357    /// and all memory from `accessible` from `static_size` should be mapped as
358    /// `PROT_NONE` backed by zero-bytes.
359    pub(crate) fn create(
360        base: MmapOffset,
361        accessible: HostAlignedByteCount,
362        static_size: usize,
363    ) -> Self {
364        MemoryImageSlot {
365            base,
366            static_size,
367            accessible,
368            image: None,
369            dirty: false,
370        }
371    }
372
373    pub(crate) fn set_heap_limit(&mut self, size_bytes: usize) -> Result<()> {
374        let size_bytes_aligned = HostAlignedByteCount::new_rounded_up(size_bytes)?;
375        assert!(size_bytes <= self.static_size);
376        assert!(size_bytes_aligned.byte_count() <= self.static_size);
377
378        // If the heap limit already addresses accessible bytes then no syscalls
379        // are necessary since the data is already mapped into the process and
380        // waiting to go.
381        //
382        // This is used for "dynamic" memories where memory is not always
383        // decommitted during recycling (but it's still always reset).
384        if size_bytes_aligned <= self.accessible {
385            return Ok(());
386        }
387
388        // Otherwise use `mprotect` to make the new pages read/write.
389        self.set_protection(self.accessible..size_bytes_aligned, true)?;
390        self.accessible = size_bytes_aligned;
391
392        Ok(())
393    }
394
395    /// Prepares this slot for the instantiation of a new instance with the
396    /// provided linear memory image.
397    ///
398    /// The `initial_size_bytes` parameter indicates the required initial size
399    /// of the heap for the instance. The `maybe_image` is an optional initial
400    /// image for linear memory to contains. The `style` is the way compiled
401    /// code will be accessing this memory.
402    ///
403    /// The purpose of this method is to take a previously pristine slot
404    /// (`!self.dirty`) and transform its prior state into state necessary for
405    /// the given parameters. This could include, for example:
406    ///
407    /// * More memory may be made read/write if `initial_size_bytes` is larger
408    ///   than `self.accessible`.
409    /// * For `MemoryStyle::Static` linear memory may be made `PROT_NONE` if
410    ///   `self.accessible` is larger than `initial_size_bytes`.
411    /// * If no image was previously in place or if the wrong image was
412    ///   previously in place then `mmap` may be used to setup the initial
413    ///   image.
414    pub(crate) fn instantiate(
415        &mut self,
416        initial_size_bytes: usize,
417        maybe_image: Option<&Arc<MemoryImage>>,
418        ty: &wasmtime_environ::Memory,
419        tunables: &Tunables,
420    ) -> Result<()> {
421        assert!(!self.dirty);
422        assert!(
423            initial_size_bytes <= self.static_size,
424            "initial_size_bytes <= self.static_size failed: \
425             initial_size_bytes={initial_size_bytes}, self.static_size={}",
426            self.static_size
427        );
428        let initial_size_bytes_page_aligned =
429            HostAlignedByteCount::new_rounded_up(initial_size_bytes)?;
430
431        // First order of business is to blow away the previous linear memory
432        // image if it doesn't match the image specified here. If one is
433        // detected then it's reset with anonymous memory which means that all
434        // of memory up to `self.accessible` will now be read/write and zero.
435        //
436        // Note that this intentionally a "small mmap" which only covers the
437        // extent of the prior initialization image in order to preserve
438        // resident memory that might come before or after the image.
439        let images_equal = match (self.image.as_ref(), maybe_image) {
440            (Some(a), Some(b)) if Arc::ptr_eq(a, b) => true,
441            (None, None) => true,
442            _ => false,
443        };
444        if !images_equal {
445            self.remove_image()?;
446        }
447
448        // The next order of business is to ensure that `self.accessible` is
449        // appropriate. First up is to grow the read/write portion of memory if
450        // it's not large enough to accommodate `initial_size_bytes`.
451        if self.accessible < initial_size_bytes_page_aligned {
452            self.set_protection(self.accessible..initial_size_bytes_page_aligned, true)?;
453            self.accessible = initial_size_bytes_page_aligned;
454        }
455
456        // If (1) the accessible region is not in its initial state, and (2) the
457        // memory relies on virtual memory at all (i.e. has offset guard
458        // pages), then we need to reset memory protections. Put another way,
459        // the only time it is safe to not reset protections is when we are
460        // using dynamic memory without any guard pages.
461        let host_page_size_log2 = u8::try_from(host_page_size().ilog2()).unwrap();
462        if initial_size_bytes_page_aligned < self.accessible
463            && (tunables.memory_guard_size > 0
464                || ty.can_elide_bounds_check(tunables, host_page_size_log2))
465        {
466            self.set_protection(initial_size_bytes_page_aligned..self.accessible, false)?;
467            self.accessible = initial_size_bytes_page_aligned;
468        }
469
470        // Now that memory is sized appropriately the final operation is to
471        // place the new image into linear memory. Note that this operation is
472        // skipped if `self.image` matches `maybe_image`.
473        assert!(initial_size_bytes <= self.accessible.byte_count());
474        assert!(initial_size_bytes_page_aligned <= self.accessible);
475        if !images_equal {
476            if let Some(image) = maybe_image.as_ref() {
477                assert!(
478                    image
479                        .linear_memory_offset
480                        .checked_add(image.len)
481                        .unwrap()
482                        .byte_count()
483                        <= initial_size_bytes
484                );
485                if !image.len.is_zero() {
486                    unsafe {
487                        image.map_at(&self.base)?;
488                    }
489                }
490            }
491            self.image = maybe_image.cloned();
492        }
493
494        // Flag ourselves as `dirty` which means that the next operation on this
495        // slot is required to be `clear_and_remain_ready`.
496        self.dirty = true;
497
498        Ok(())
499    }
500
501    pub(crate) fn remove_image(&mut self) -> Result<()> {
502        if let Some(image) = &self.image {
503            unsafe {
504                image.remap_as_zeros_at(self.base.as_mut_ptr())?;
505            }
506            self.image = None;
507        }
508        Ok(())
509    }
510
511    /// Resets this linear memory slot back to a "pristine state".
512    ///
513    /// This will reset the memory back to its original contents on Linux or
514    /// reset the contents back to zero on other platforms. The `keep_resident`
515    /// argument is the maximum amount of memory to keep resident in this
516    /// process's memory on Linux. Up to that much memory will be `memset` to
517    /// zero where the rest of it will be reset or released with `madvise`.
518    ///
519    /// Returns the number of bytes still resident in memory after this function
520    /// has returned.
521    #[allow(dead_code, reason = "only used in some cfgs")]
522    pub(crate) fn clear_and_remain_ready(
523        &mut self,
524        pagemap: Option<&PageMap>,
525        keep_resident: HostAlignedByteCount,
526        decommit: impl FnMut(*mut u8, usize),
527    ) -> Result<usize> {
528        assert!(self.dirty);
529
530        let bytes_resident =
531            unsafe { self.reset_all_memory_contents(pagemap, keep_resident, decommit)? };
532
533        self.dirty = false;
534        Ok(bytes_resident)
535    }
536
537    #[allow(dead_code, reason = "only used in some cfgs")]
538    unsafe fn reset_all_memory_contents(
539        &mut self,
540        pagemap: Option<&PageMap>,
541        keep_resident: HostAlignedByteCount,
542        decommit: impl FnMut(*mut u8, usize),
543    ) -> Result<usize> {
544        match vm::decommit_behavior() {
545            DecommitBehavior::Zero => {
546                // If we're not on Linux then there's no generic platform way to
547                // reset memory back to its original state, so instead reset memory
548                // back to entirely zeros with an anonymous backing.
549                //
550                // Additionally the previous image, if any, is dropped here
551                // since it's no longer applicable to this mapping.
552                self.reset_with_anon_memory()?;
553                Ok(0)
554            }
555            DecommitBehavior::RestoreOriginalMapping => {
556                let bytes_resident =
557                    unsafe { self.reset_with_original_mapping(pagemap, keep_resident, decommit) };
558                Ok(bytes_resident)
559            }
560        }
561    }
562
563    #[allow(dead_code, reason = "only used in some cfgs")]
564    unsafe fn reset_with_original_mapping(
565        &mut self,
566        pagemap: Option<&PageMap>,
567        keep_resident: HostAlignedByteCount,
568        decommit: impl FnMut(*mut u8, usize),
569    ) -> usize {
570        assert_eq!(
571            vm::decommit_behavior(),
572            DecommitBehavior::RestoreOriginalMapping
573        );
574
575        unsafe {
576            return match &self.image {
577                // If there's a backing image then manually resetting a region
578                // is a bit trickier than without an image, so delegate to the
579                // helper function below.
580                Some(image) => reset_with_pagemap(
581                    pagemap,
582                    self.base.as_mut_ptr(),
583                    self.accessible,
584                    keep_resident,
585                    |region| manually_reset_region(self.base.as_mut_ptr().addr(), image, region),
586                    decommit,
587                ),
588
589                // If there's no memory image for this slot then pages are always
590                // manually reset back to zero or given to `decommit`.
591                None => reset_with_pagemap(
592                    pagemap,
593                    self.base.as_mut_ptr(),
594                    self.accessible,
595                    keep_resident,
596                    |region| region.fill(0),
597                    decommit,
598                ),
599            };
600        }
601
602        /// Manually resets `region` back to its original contents as specified
603        /// in `image`.
604        ///
605        /// This assumes that the original mmap starts at `base_addr` and
606        /// `region` is a subslice within the original mmap.
607        ///
608        /// # Panics
609        ///
610        /// Panics if `base_addr` is not the right index due to the various
611        /// indexing calculations below.
612        fn manually_reset_region(base_addr: usize, image: &MemoryImage, mut region: &mut [u8]) {
613            let image_start = image.linear_memory_offset.byte_count();
614            let image_end = image_start + image.len.byte_count();
615            let mut region_start = region.as_ptr().addr() - base_addr;
616            let region_end = region_start + region.len();
617            let image_bytes = image.module_source.wasm_data();
618            let image_bytes = &image_bytes[image.module_source_offset..][..image.len.byte_count()];
619
620            // 1. Zero out the part before the image (if any).
621            if let Some(len_before_image) = image_start.checked_sub(region_start) {
622                let len = len_before_image.min(region.len());
623                let (a, b) = region.split_at_mut(len);
624                a.fill(0);
625                region = b;
626                region_start += len;
627
628                if region.is_empty() {
629                    return;
630                }
631            }
632
633            debug_assert_eq!(region_end - region_start, region.len());
634            debug_assert!(region_start >= image_start);
635
636            // 2. Copy the original bytes from the image for the part that
637            //    overlaps with the image.
638            if let Some(len_in_image) = image_end.checked_sub(region_start) {
639                let len = len_in_image.min(region.len());
640                let (a, b) = region.split_at_mut(len);
641                a.copy_from_slice(&image_bytes[region_start - image_start..][..len]);
642                region = b;
643                region_start += len;
644
645                if region.is_empty() {
646                    return;
647                }
648            }
649
650            debug_assert_eq!(region_end - region_start, region.len());
651            debug_assert!(region_start >= image_end);
652
653            // 3. Zero out the part after the image.
654            region.fill(0);
655        }
656    }
657
658    fn set_protection(&self, range: Range<HostAlignedByteCount>, readwrite: bool) -> Result<()> {
659        let len = range
660            .end
661            .checked_sub(range.start)
662            .expect("range.start <= range.end");
663        assert!(range.end.byte_count() <= self.static_size);
664        if len.is_zero() {
665            return Ok(());
666        }
667
668        // TODO: use Mmap to change memory permissions instead of these free
669        // functions.
670        unsafe {
671            let start = self.base.as_mut_ptr().add(range.start.byte_count());
672            if readwrite {
673                vm::expose_existing_mapping(start, len.byte_count())?;
674            } else {
675                vm::hide_existing_mapping(start, len.byte_count())?;
676            }
677        }
678
679        Ok(())
680    }
681
682    pub(crate) fn has_image(&self) -> bool {
683        self.image.is_some()
684    }
685
686    #[allow(dead_code, reason = "only used in some cfgs")]
687    pub(crate) fn is_dirty(&self) -> bool {
688        self.dirty
689    }
690
691    /// Map anonymous zeroed memory across the whole slot,
692    /// inaccessible. Used both during instantiate and during drop.
693    pub(crate) fn reset_with_anon_memory(&mut self) -> Result<()> {
694        if self.static_size == 0 {
695            assert!(self.image.is_none());
696            assert_eq!(self.accessible, 0);
697            return Ok(());
698        }
699
700        unsafe {
701            vm::erase_existing_mapping(self.base.as_mut_ptr(), self.static_size)?;
702        }
703
704        self.image = None;
705        self.accessible = HostAlignedByteCount::ZERO;
706
707        Ok(())
708    }
709}
710
711#[cfg(all(test, target_os = "linux", not(miri)))]
712mod test {
713    use super::*;
714    use crate::runtime::vm::mmap::{AlignedLength, Mmap};
715    use crate::runtime::vm::sys::vm::decommit_pages;
716    use crate::runtime::vm::{HostAlignedByteCount, MmapVec, host_page_size};
717    use std::sync::Arc;
718    use wasmtime_environ::{IndexType, Limits, Memory};
719
720    fn create_memfd_with_data(offset: usize, data: &[u8]) -> Result<MemoryImage> {
721        // offset must be a multiple of the page size.
722        let linear_memory_offset =
723            HostAlignedByteCount::new(offset).expect("offset is page-aligned");
724        // The image length is rounded up to the nearest page size
725        let image_len = HostAlignedByteCount::new_rounded_up(data.len()).unwrap();
726
727        let mut source = TestDataSource {
728            data: vec![0; image_len.byte_count()],
729        };
730        source.data[..data.len()].copy_from_slice(data);
731
732        return Ok(MemoryImage {
733            source: MemoryImageSource::from_data(data)?.unwrap(),
734            len: image_len,
735            source_offset: 0,
736            linear_memory_offset,
737            module_source: Arc::new(source),
738            module_source_offset: 0,
739        });
740
741        struct TestDataSource {
742            data: Vec<u8>,
743        }
744
745        impl ModuleMemoryImageSource for TestDataSource {
746            fn wasm_data(&self) -> &[u8] {
747                &self.data
748            }
749            fn mmap(&self) -> Option<&MmapVec> {
750                None
751            }
752        }
753    }
754
755    fn dummy_memory() -> Memory {
756        Memory {
757            idx_type: IndexType::I32,
758            limits: Limits { min: 0, max: None },
759            shared: false,
760            page_size_log2: Memory::DEFAULT_PAGE_SIZE_LOG2,
761        }
762    }
763
764    fn mmap_4mib_inaccessible() -> Arc<Mmap<AlignedLength>> {
765        let four_mib = HostAlignedByteCount::new(4 << 20).expect("4 MiB is page aligned");
766        Arc::new(Mmap::accessible_reserved(HostAlignedByteCount::ZERO, four_mib).unwrap())
767    }
768
769    /// Presents a part of an mmap as a mutable slice within a callback.
770    ///
771    /// The callback ensures that the reference no longer lives after the
772    /// function is done.
773    ///
774    /// # Safety
775    ///
776    /// The caller must ensure that during this function call, the only way this
777    /// region of memory is not accessed by (read from or written to) is via the
778    /// reference. Making the callback `'static` goes some way towards ensuring
779    /// that, but it's still possible to squirrel away a reference into global
780    /// state. So don't do that.
781    unsafe fn with_slice_mut(
782        mmap: &Arc<Mmap<AlignedLength>>,
783        range: Range<usize>,
784        f: impl FnOnce(&mut [u8]) + 'static,
785    ) {
786        let ptr = mmap.as_ptr().cast_mut();
787        let slice = unsafe {
788            core::slice::from_raw_parts_mut(ptr.add(range.start), range.end - range.start)
789        };
790        f(slice);
791    }
792
793    #[test]
794    fn instantiate_no_image() {
795        let ty = dummy_memory();
796        let tunables = Tunables {
797            memory_reservation: 4 << 30,
798            ..Tunables::default_miri()
799        };
800        // 4 MiB mmap'd area, not accessible
801        let mmap = mmap_4mib_inaccessible();
802        // Create a MemoryImageSlot on top of it
803        let mut memfd =
804            MemoryImageSlot::create(mmap.zero_offset(), HostAlignedByteCount::ZERO, 4 << 20);
805        assert!(!memfd.is_dirty());
806        // instantiate with 64 KiB initial size
807        memfd.instantiate(64 << 10, None, &ty, &tunables).unwrap();
808        assert!(memfd.is_dirty());
809
810        // We should be able to access this 64 KiB (try both ends) and
811        // it should consist of zeroes.
812        unsafe {
813            with_slice_mut(&mmap, 0..65536, |slice| {
814                assert_eq!(0, slice[0]);
815                assert_eq!(0, slice[65535]);
816                slice[1024] = 42;
817                assert_eq!(42, slice[1024]);
818            });
819        }
820
821        // grow the heap
822        memfd.set_heap_limit(128 << 10).unwrap();
823        let slice = unsafe { mmap.slice(0..1 << 20) };
824        assert_eq!(42, slice[1024]);
825        assert_eq!(0, slice[131071]);
826        // instantiate again; we should see zeroes, even as the
827        // reuse-anon-mmap-opt kicks in
828        memfd
829            .clear_and_remain_ready(None, HostAlignedByteCount::ZERO, |ptr, len| unsafe {
830                decommit_pages(ptr, len).unwrap()
831            })
832            .unwrap();
833        assert!(!memfd.is_dirty());
834        memfd.instantiate(64 << 10, None, &ty, &tunables).unwrap();
835        let slice = unsafe { mmap.slice(0..65536) };
836        assert_eq!(0, slice[1024]);
837    }
838
839    #[test]
840    fn instantiate_image() {
841        let page_size = host_page_size();
842        let ty = dummy_memory();
843        let tunables = Tunables {
844            memory_reservation: 4 << 30,
845            ..Tunables::default_miri()
846        };
847        // 4 MiB mmap'd area, not accessible
848        let mmap = mmap_4mib_inaccessible();
849        // Create a MemoryImageSlot on top of it
850        let mut memfd =
851            MemoryImageSlot::create(mmap.zero_offset(), HostAlignedByteCount::ZERO, 4 << 20);
852        // Create an image with some data.
853        let image = Arc::new(create_memfd_with_data(page_size, &[1, 2, 3, 4]).unwrap());
854        // Instantiate with this image
855        memfd
856            .instantiate(64 << 10, Some(&image), &ty, &tunables)
857            .unwrap();
858        assert!(memfd.has_image());
859
860        unsafe {
861            with_slice_mut(&mmap, 0..65536, move |slice| {
862                assert_eq!(&[1, 2, 3, 4], &slice[page_size..][..4]);
863                slice[page_size] = 5;
864            });
865        }
866
867        // Clear and re-instantiate same image
868        memfd
869            .clear_and_remain_ready(None, HostAlignedByteCount::ZERO, |ptr, len| unsafe {
870                decommit_pages(ptr, len).unwrap()
871            })
872            .unwrap();
873        memfd
874            .instantiate(64 << 10, Some(&image), &ty, &tunables)
875            .unwrap();
876        let slice = unsafe { mmap.slice(0..65536) };
877        assert_eq!(&[1, 2, 3, 4], &slice[page_size..][..4]);
878
879        // Clear and re-instantiate no image
880        memfd
881            .clear_and_remain_ready(None, HostAlignedByteCount::ZERO, |ptr, len| unsafe {
882                decommit_pages(ptr, len).unwrap()
883            })
884            .unwrap();
885        memfd.instantiate(64 << 10, None, &ty, &tunables).unwrap();
886        assert!(!memfd.has_image());
887        let slice = unsafe { mmap.slice(0..65536) };
888        assert_eq!(&[0, 0, 0, 0], &slice[page_size..][..4]);
889
890        // Clear and re-instantiate image again
891        memfd
892            .clear_and_remain_ready(None, HostAlignedByteCount::ZERO, |ptr, len| unsafe {
893                decommit_pages(ptr, len).unwrap()
894            })
895            .unwrap();
896        memfd
897            .instantiate(64 << 10, Some(&image), &ty, &tunables)
898            .unwrap();
899        let slice = unsafe { mmap.slice(0..65536) };
900        assert_eq!(&[1, 2, 3, 4], &slice[page_size..][..4]);
901
902        // Create another image with different data.
903        let image2 = Arc::new(create_memfd_with_data(page_size, &[10, 11, 12, 13]).unwrap());
904        memfd
905            .clear_and_remain_ready(None, HostAlignedByteCount::ZERO, |ptr, len| unsafe {
906                decommit_pages(ptr, len).unwrap()
907            })
908            .unwrap();
909        memfd
910            .instantiate(128 << 10, Some(&image2), &ty, &tunables)
911            .unwrap();
912        let slice = unsafe { mmap.slice(0..65536) };
913        assert_eq!(&[10, 11, 12, 13], &slice[page_size..][..4]);
914
915        // Instantiate the original image again; we should notice it's
916        // a different image and not reuse the mappings.
917        memfd
918            .clear_and_remain_ready(None, HostAlignedByteCount::ZERO, |ptr, len| unsafe {
919                decommit_pages(ptr, len).unwrap()
920            })
921            .unwrap();
922        memfd
923            .instantiate(64 << 10, Some(&image), &ty, &tunables)
924            .unwrap();
925        let slice = unsafe { mmap.slice(0..65536) };
926        assert_eq!(&[1, 2, 3, 4], &slice[page_size..][..4]);
927    }
928
929    #[test]
930    #[cfg(target_os = "linux")]
931    fn memset_instead_of_madvise() {
932        let page_size = host_page_size();
933        let ty = dummy_memory();
934        let tunables = Tunables {
935            memory_reservation: 100 << 16,
936            ..Tunables::default_miri()
937        };
938        let mmap = mmap_4mib_inaccessible();
939        let mut memfd =
940            MemoryImageSlot::create(mmap.zero_offset(), HostAlignedByteCount::ZERO, 4 << 20);
941
942        // Test basics with the image
943        for image_off in [0, page_size, page_size * 2] {
944            let image = Arc::new(create_memfd_with_data(image_off, &[1, 2, 3, 4]).unwrap());
945            for amt_to_memset in [0, page_size, page_size * 10, 1 << 20, 10 << 20] {
946                let amt_to_memset = HostAlignedByteCount::new(amt_to_memset).unwrap();
947                memfd
948                    .instantiate(64 << 10, Some(&image), &ty, &tunables)
949                    .unwrap();
950                assert!(memfd.has_image());
951
952                unsafe {
953                    with_slice_mut(&mmap, 0..64 << 10, move |slice| {
954                        if image_off > 0 {
955                            assert_eq!(slice[image_off - 1], 0);
956                        }
957                        assert_eq!(slice[image_off + 5], 0);
958                        assert_eq!(&[1, 2, 3, 4], &slice[image_off..][..4]);
959                        slice[image_off] = 5;
960                        assert_eq!(&[5, 2, 3, 4], &slice[image_off..][..4]);
961                    })
962                };
963
964                memfd
965                    .clear_and_remain_ready(None, amt_to_memset, |ptr, len| unsafe {
966                        decommit_pages(ptr, len).unwrap()
967                    })
968                    .unwrap();
969            }
970        }
971
972        // Test without an image
973        for amt_to_memset in [0, page_size, page_size * 10, 1 << 20, 10 << 20] {
974            let amt_to_memset = HostAlignedByteCount::new(amt_to_memset).unwrap();
975            memfd.instantiate(64 << 10, None, &ty, &tunables).unwrap();
976
977            unsafe {
978                with_slice_mut(&mmap, 0..64 << 10, |slice| {
979                    for chunk in slice.chunks_mut(1024) {
980                        assert_eq!(chunk[0], 0);
981                        chunk[0] = 5;
982                    }
983                });
984            }
985            memfd
986                .clear_and_remain_ready(None, amt_to_memset, |ptr, len| unsafe {
987                    decommit_pages(ptr, len).unwrap()
988                })
989                .unwrap();
990        }
991    }
992
993    #[test]
994    #[cfg(target_os = "linux")]
995    fn dynamic() {
996        let page_size = host_page_size();
997        let ty = dummy_memory();
998        let tunables = Tunables {
999            memory_reservation: 0,
1000            memory_reservation_for_growth: 200,
1001            ..Tunables::default_miri()
1002        };
1003
1004        let mmap = mmap_4mib_inaccessible();
1005        let mut memfd =
1006            MemoryImageSlot::create(mmap.zero_offset(), HostAlignedByteCount::ZERO, 4 << 20);
1007        let image = Arc::new(create_memfd_with_data(page_size, &[1, 2, 3, 4]).unwrap());
1008        let initial = 64 << 10;
1009
1010        // Instantiate the image and test that memory remains accessible after
1011        // it's cleared.
1012        memfd
1013            .instantiate(initial, Some(&image), &ty, &tunables)
1014            .unwrap();
1015        assert!(memfd.has_image());
1016
1017        unsafe {
1018            with_slice_mut(&mmap, 0..(64 << 10) + page_size, move |slice| {
1019                assert_eq!(&[1, 2, 3, 4], &slice[page_size..][..4]);
1020                slice[page_size] = 5;
1021                assert_eq!(&[5, 2, 3, 4], &slice[page_size..][..4]);
1022            });
1023        }
1024
1025        memfd
1026            .clear_and_remain_ready(None, HostAlignedByteCount::ZERO, |ptr, len| unsafe {
1027                decommit_pages(ptr, len).unwrap()
1028            })
1029            .unwrap();
1030        let slice = unsafe { mmap.slice(0..(64 << 10) + page_size) };
1031        assert_eq!(&[1, 2, 3, 4], &slice[page_size..][..4]);
1032
1033        // Re-instantiate make sure it preserves memory. Grow a bit and set data
1034        // beyond the initial size.
1035        memfd
1036            .instantiate(initial, Some(&image), &ty, &tunables)
1037            .unwrap();
1038        assert_eq!(&[1, 2, 3, 4], &slice[page_size..][..4]);
1039
1040        memfd.set_heap_limit(initial * 2).unwrap();
1041
1042        unsafe {
1043            with_slice_mut(&mmap, 0..(64 << 10) + page_size, move |slice| {
1044                assert_eq!(&[0, 0], &slice[initial..initial + 2]);
1045                slice[initial] = 100;
1046                assert_eq!(&[100, 0], &slice[initial..initial + 2]);
1047            });
1048        }
1049
1050        memfd
1051            .clear_and_remain_ready(None, HostAlignedByteCount::ZERO, |ptr, len| unsafe {
1052                decommit_pages(ptr, len).unwrap()
1053            })
1054            .unwrap();
1055
1056        // Test that memory is still accessible, but it's been reset
1057        assert_eq!(&[0, 0], &slice[initial..initial + 2]);
1058
1059        // Instantiate again, and again memory beyond the initial size should
1060        // still be accessible. Grow into it again and make sure it works.
1061        memfd
1062            .instantiate(initial, Some(&image), &ty, &tunables)
1063            .unwrap();
1064        assert_eq!(&[0, 0], &slice[initial..initial + 2]);
1065        memfd.set_heap_limit(initial * 2).unwrap();
1066
1067        unsafe {
1068            with_slice_mut(&mmap, 0..(64 << 10) + page_size, move |slice| {
1069                assert_eq!(&[0, 0], &slice[initial..initial + 2]);
1070                slice[initial] = 100;
1071                assert_eq!(&[100, 0], &slice[initial..initial + 2]);
1072            });
1073        }
1074
1075        memfd
1076            .clear_and_remain_ready(None, HostAlignedByteCount::ZERO, |ptr, len| unsafe {
1077                decommit_pages(ptr, len).unwrap()
1078            })
1079            .unwrap();
1080
1081        // Reset the image to none and double-check everything is back to zero
1082        memfd.instantiate(64 << 10, None, &ty, &tunables).unwrap();
1083        assert!(!memfd.has_image());
1084        assert_eq!(&[0, 0, 0, 0], &slice[page_size..][..4]);
1085        assert_eq!(&[0, 0], &slice[initial..initial + 2]);
1086    }
1087
1088    #[test]
1089    fn reset_with_pagemap() {
1090        let page_size = host_page_size();
1091        let ty = dummy_memory();
1092        let tunables = Tunables {
1093            memory_reservation: 100 << 16,
1094            ..Tunables::default_miri()
1095        };
1096        let mmap = mmap_4mib_inaccessible();
1097        let mmap_len = page_size * 9;
1098        let mut memfd =
1099            MemoryImageSlot::create(mmap.zero_offset(), HostAlignedByteCount::ZERO, mmap_len);
1100        let pagemap = PageMap::new();
1101        let pagemap = pagemap.as_ref();
1102
1103        let mut data = vec![0; 3 * page_size];
1104        for (i, chunk) in data.chunks_mut(page_size).enumerate() {
1105            for slot in chunk {
1106                *slot = u8::try_from(i + 1).unwrap();
1107            }
1108        }
1109        let image = Arc::new(create_memfd_with_data(3 * page_size, &data).unwrap());
1110
1111        memfd
1112            .instantiate(mmap_len, Some(&image), &ty, &tunables)
1113            .unwrap();
1114
1115        let keep_resident = HostAlignedByteCount::new(mmap_len).unwrap();
1116        let assert_pristine_after_reset = |memfd: &mut MemoryImageSlot| unsafe {
1117            // Wipe the image, keeping some bytes resident.
1118            memfd
1119                .clear_and_remain_ready(pagemap, keep_resident, |ptr, len| {
1120                    decommit_pages(ptr, len).unwrap()
1121                })
1122                .unwrap();
1123
1124            // Double check that the contents of memory are as expected after
1125            // reset.
1126            with_slice_mut(&mmap, 0..mmap_len, move |slice| {
1127                for (i, chunk) in slice.chunks(page_size).enumerate() {
1128                    let expected = match i {
1129                        0..3 => 0,
1130                        3..6 => u8::try_from(i).unwrap() - 2,
1131                        6..9 => 0,
1132                        _ => unreachable!(),
1133                    };
1134                    for slot in chunk {
1135                        assert_eq!(*slot, expected);
1136                    }
1137                }
1138            });
1139
1140            // Re-instantiate, but then wipe the image entirely by keeping
1141            // nothing resident.
1142            memfd
1143                .instantiate(mmap_len, Some(&image), &ty, &tunables)
1144                .unwrap();
1145            memfd
1146                .clear_and_remain_ready(pagemap, HostAlignedByteCount::ZERO, |ptr, len| {
1147                    decommit_pages(ptr, len).unwrap()
1148                })
1149                .unwrap();
1150
1151            // Next re-instantiate a final time to get used for the next test.
1152            memfd
1153                .instantiate(mmap_len, Some(&image), &ty, &tunables)
1154                .unwrap();
1155        };
1156
1157        let write_page = |_memfd: &mut MemoryImageSlot, page: usize| unsafe {
1158            with_slice_mut(
1159                &mmap,
1160                page * page_size..(page + 1) * page_size,
1161                move |slice| slice.fill(0xff),
1162            );
1163        };
1164
1165        // Test various combinations of dirty pages and regions. For example
1166        // test a dirty region of memory entirely in the zero-initialized zone
1167        // before/after the image and also test when the dirty region straddles
1168        // just the start of the image, just the end of the image, both ends,
1169        // and is entirely contained in just the image.
1170        assert_pristine_after_reset(&mut memfd);
1171
1172        for i in 0..9 {
1173            write_page(&mut memfd, i);
1174            assert_pristine_after_reset(&mut memfd);
1175        }
1176        write_page(&mut memfd, 0);
1177        write_page(&mut memfd, 1);
1178        assert_pristine_after_reset(&mut memfd);
1179        write_page(&mut memfd, 1);
1180        assert_pristine_after_reset(&mut memfd);
1181        write_page(&mut memfd, 2);
1182        write_page(&mut memfd, 3);
1183        assert_pristine_after_reset(&mut memfd);
1184        write_page(&mut memfd, 3);
1185        write_page(&mut memfd, 4);
1186        write_page(&mut memfd, 5);
1187        assert_pristine_after_reset(&mut memfd);
1188        write_page(&mut memfd, 0);
1189        write_page(&mut memfd, 1);
1190        write_page(&mut memfd, 2);
1191        assert_pristine_after_reset(&mut memfd);
1192        write_page(&mut memfd, 0);
1193        write_page(&mut memfd, 3);
1194        write_page(&mut memfd, 6);
1195        assert_pristine_after_reset(&mut memfd);
1196        write_page(&mut memfd, 2);
1197        write_page(&mut memfd, 3);
1198        write_page(&mut memfd, 4);
1199        write_page(&mut memfd, 5);
1200        write_page(&mut memfd, 6);
1201        assert_pristine_after_reset(&mut memfd);
1202        write_page(&mut memfd, 4);
1203        write_page(&mut memfd, 5);
1204        write_page(&mut memfd, 6);
1205        write_page(&mut memfd, 7);
1206        assert_pristine_after_reset(&mut memfd);
1207        write_page(&mut memfd, 4);
1208        write_page(&mut memfd, 5);
1209        write_page(&mut memfd, 8);
1210        assert_pristine_after_reset(&mut memfd);
1211    }
1212}