wasmtime/runtime/vm/instance/allocator/pooling/
unix_stack_pool.rs

1#![cfg_attr(asan, allow(dead_code))]
2
3use super::index_allocator::{SimpleIndexAllocator, SlotId};
4use crate::prelude::*;
5use crate::runtime::vm::sys::vm::commit_pages;
6use crate::runtime::vm::{
7    HostAlignedByteCount, Mmap, PoolingInstanceAllocatorConfig, mmap::AlignedLength,
8};
9
10/// Represents a pool of execution stacks (used for the async fiber implementation).
11///
12/// Each index into the pool represents a single execution stack. The maximum number of
13/// stacks is the same as the maximum number of instances.
14///
15/// As stacks grow downwards, each stack starts (lowest address) with a guard page
16/// that can be used to detect stack overflow.
17///
18/// The top of the stack (starting stack pointer) is returned when a stack is allocated
19/// from the pool.
20#[derive(Debug)]
21pub struct StackPool {
22    mapping: Mmap<AlignedLength>,
23    stack_size: HostAlignedByteCount,
24    max_stacks: usize,
25    page_size: HostAlignedByteCount,
26    index_allocator: SimpleIndexAllocator,
27    async_stack_zeroing: bool,
28    async_stack_keep_resident: HostAlignedByteCount,
29}
30
31impl StackPool {
32    pub fn new(config: &PoolingInstanceAllocatorConfig) -> Result<Self> {
33        use rustix::mm::{MprotectFlags, mprotect};
34
35        let page_size = HostAlignedByteCount::host_page_size();
36
37        // Add a page to the stack size for the guard page when using fiber stacks
38        let stack_size = if config.stack_size == 0 {
39            HostAlignedByteCount::ZERO
40        } else {
41            HostAlignedByteCount::new_rounded_up(config.stack_size)
42                .and_then(|size| size.checked_add(HostAlignedByteCount::host_page_size()))
43                .context("stack size exceeds addressable memory")?
44        };
45
46        let max_stacks = usize::try_from(config.limits.total_stacks).unwrap();
47
48        let allocation_size = stack_size
49            .checked_mul(max_stacks)
50            .context("total size of execution stacks exceeds addressable memory")?;
51
52        let mapping = Mmap::accessible_reserved(allocation_size, allocation_size)
53            .context("failed to create stack pool mapping")?;
54
55        // Set up the stack guard pages.
56        if !allocation_size.is_zero() {
57            unsafe {
58                for i in 0..max_stacks {
59                    // Safety: i < max_stacks and we've already checked that
60                    // stack_size * max_stacks is valid.
61                    let offset = stack_size.unchecked_mul(i);
62                    // Make the stack guard page inaccessible.
63                    let bottom_of_stack = mapping.as_ptr().add(offset.byte_count()).cast_mut();
64                    mprotect(
65                        bottom_of_stack.cast(),
66                        page_size.byte_count(),
67                        MprotectFlags::empty(),
68                    )
69                    .context("failed to protect stack guard page")?;
70                }
71            }
72        }
73
74        Ok(Self {
75            mapping,
76            stack_size,
77            max_stacks,
78            page_size,
79            async_stack_zeroing: config.async_stack_zeroing,
80            async_stack_keep_resident: HostAlignedByteCount::new_rounded_up(
81                config.async_stack_keep_resident,
82            )?,
83            index_allocator: SimpleIndexAllocator::new(config.limits.total_stacks),
84        })
85    }
86
87    /// Are there zero slots in use right now?
88    pub fn is_empty(&self) -> bool {
89        self.index_allocator.is_empty()
90    }
91
92    /// Allocate a new fiber.
93    pub fn allocate(&self) -> Result<wasmtime_fiber::FiberStack> {
94        if self.stack_size.is_zero() {
95            bail!("pooling allocator not configured to enable fiber stack allocation");
96        }
97
98        let index = self
99            .index_allocator
100            .alloc()
101            .ok_or_else(|| super::PoolConcurrencyLimitError::new(self.max_stacks, "fibers"))?
102            .index();
103
104        assert!(index < self.max_stacks);
105
106        unsafe {
107            // Remove the guard page from the size
108            let size_without_guard = self.stack_size.checked_sub(self.page_size).expect(
109                "self.stack_size is host-page-aligned and is > 0,\
110                 so it must be >= self.page_size",
111            );
112
113            let bottom_of_stack = self
114                .mapping
115                .as_ptr()
116                .add(self.stack_size.unchecked_mul(index).byte_count())
117                .cast_mut();
118
119            commit_pages(bottom_of_stack, size_without_guard.byte_count())?;
120
121            let stack = wasmtime_fiber::FiberStack::from_raw_parts(
122                bottom_of_stack,
123                self.page_size.byte_count(),
124                size_without_guard.byte_count(),
125            )?;
126            Ok(stack)
127        }
128    }
129
130    /// Zero the given stack, if we are configured to do so.
131    ///
132    /// This will call the given `decommit` function for each region of memory
133    /// that should be decommitted. It is the caller's responsibility to ensure
134    /// that those decommits happen before this stack is reused.
135    ///
136    /// # Panics
137    ///
138    /// `zero_stack` panics if the passed in `stack` was not created by
139    /// [`Self::allocate`].
140    ///
141    /// # Safety
142    ///
143    /// The stack must no longer be in use, and ready for returning to the pool
144    /// after it is zeroed and decommitted.
145    pub unsafe fn zero_stack(
146        &self,
147        stack: &mut wasmtime_fiber::FiberStack,
148        mut decommit: impl FnMut(*mut u8, usize),
149    ) {
150        assert!(stack.is_from_raw_parts());
151        assert!(
152            !self.stack_size.is_zero(),
153            "pooling allocator not configured to enable fiber stack allocation \
154             (Self::allocate should have returned an error)"
155        );
156
157        if !self.async_stack_zeroing {
158            return;
159        }
160
161        let top = stack
162            .top()
163            .expect("fiber stack not allocated from the pool") as usize;
164
165        let base = self.mapping.as_ptr() as usize;
166        let len = self.mapping.len();
167        assert!(
168            top > base && top <= (base + len),
169            "fiber stack top pointer not in range"
170        );
171
172        // Remove the guard page from the size.
173        let stack_size = self.stack_size.checked_sub(self.page_size).expect(
174            "self.stack_size is host-page-aligned and is > 0,\
175             so it must be >= self.page_size",
176        );
177        let bottom_of_stack = top - stack_size.byte_count();
178        let start_of_stack = bottom_of_stack - self.page_size.byte_count();
179        assert!(start_of_stack >= base && start_of_stack < (base + len));
180        assert!((start_of_stack - base) % self.stack_size.byte_count() == 0);
181
182        // Manually zero the top of the stack to keep the pages resident in
183        // memory and avoid future page faults. Use the system to deallocate
184        // pages past this. This hopefully strikes a reasonable balance between:
185        //
186        // * memset for the whole range is probably expensive
187        // * madvise for the whole range incurs expensive future page faults
188        // * most threads probably don't use most of the stack anyway
189        let size_to_memset = stack_size.min(self.async_stack_keep_resident);
190        let rest = stack_size
191            .checked_sub(size_to_memset)
192            .expect("stack_size >= size_to_memset");
193
194        // SAFETY: this function's own contract requires that the stack is not
195        // in use so it's safe to pave over part of it with zero.
196        unsafe {
197            std::ptr::write_bytes(
198                (bottom_of_stack + rest.byte_count()) as *mut u8,
199                0,
200                size_to_memset.byte_count(),
201            );
202        }
203
204        // Use the system to reset remaining stack pages to zero.
205        decommit(bottom_of_stack as _, rest.byte_count());
206    }
207
208    /// Deallocate a previously-allocated fiber.
209    ///
210    /// # Safety
211    ///
212    /// The fiber must have been allocated by this pool, must be in an allocated
213    /// state, and must never be used again.
214    ///
215    /// The caller must have already called `zero_stack` on the fiber stack and
216    /// flushed any enqueued decommits for this stack's memory.
217    pub unsafe fn deallocate(&self, stack: wasmtime_fiber::FiberStack) {
218        assert!(stack.is_from_raw_parts());
219
220        let top = stack
221            .top()
222            .expect("fiber stack not allocated from the pool") as usize;
223
224        let base = self.mapping.as_ptr() as usize;
225        let len = self.mapping.len();
226        assert!(
227            top > base && top <= (base + len),
228            "fiber stack top pointer not in range"
229        );
230
231        // Remove the guard page from the size
232        let stack_size = self.stack_size.byte_count() - self.page_size.byte_count();
233        let bottom_of_stack = top - stack_size;
234        let start_of_stack = bottom_of_stack - self.page_size.byte_count();
235        assert!(start_of_stack >= base && start_of_stack < (base + len));
236        assert!((start_of_stack - base) % self.stack_size.byte_count() == 0);
237
238        let index = (start_of_stack - base) / self.stack_size.byte_count();
239        assert!(index < self.max_stacks);
240        let index = u32::try_from(index).unwrap();
241
242        self.index_allocator.free(SlotId(index));
243    }
244}
245
246#[cfg(all(test, unix, feature = "async", not(miri), not(asan)))]
247mod tests {
248    use super::*;
249    use crate::runtime::vm::InstanceLimits;
250
251    #[test]
252    fn test_stack_pool() -> Result<()> {
253        let config = PoolingInstanceAllocatorConfig {
254            limits: InstanceLimits {
255                total_stacks: 10,
256                ..Default::default()
257            },
258            stack_size: 1,
259            async_stack_zeroing: true,
260            ..PoolingInstanceAllocatorConfig::default()
261        };
262        let pool = StackPool::new(&config)?;
263
264        let native_page_size = crate::runtime::vm::host_page_size();
265        assert_eq!(pool.stack_size, 2 * native_page_size);
266        assert_eq!(pool.max_stacks, 10);
267        assert_eq!(pool.page_size, native_page_size);
268
269        assert_eq!(pool.index_allocator.testing_freelist(), []);
270
271        let base = pool.mapping.as_ptr() as usize;
272
273        let mut stacks = Vec::new();
274        for i in 0..10 {
275            let stack = pool.allocate().expect("allocation should succeed");
276            assert_eq!(
277                ((stack.top().unwrap() as usize - base) / pool.stack_size.byte_count()) - 1,
278                i
279            );
280            stacks.push(stack);
281        }
282
283        assert_eq!(pool.index_allocator.testing_freelist(), []);
284
285        assert!(pool.allocate().is_err(), "allocation should fail");
286
287        for stack in stacks {
288            unsafe {
289                pool.deallocate(stack);
290            }
291        }
292
293        assert_eq!(
294            pool.index_allocator.testing_freelist(),
295            [
296                SlotId(0),
297                SlotId(1),
298                SlotId(2),
299                SlotId(3),
300                SlotId(4),
301                SlotId(5),
302                SlotId(6),
303                SlotId(7),
304                SlotId(8),
305                SlotId(9)
306            ],
307        );
308
309        Ok(())
310    }
311}