wasmtime_internal_cache/
lib.rs

1//! > **⚠️ Warning ⚠️**: this crate is an internal-only crate for the Wasmtime
2//! > project and is not intended for general use. APIs are not strictly
3//! > reviewed for safety and usage outside of Wasmtime may have bugs. If
4//! > you're interested in using this feel free to file an issue on the
5//! > Wasmtime repository to start a discussion about doing so, but otherwise
6//! > be aware that your usage of this crate is not supported.
7
8use anyhow::Result;
9use base64::Engine;
10use log::{debug, trace, warn};
11use serde::{Deserialize, Serialize};
12use sha2::{Digest, Sha256};
13use std::hash::Hash;
14use std::hash::Hasher;
15use std::io::Write;
16use std::path::{Path, PathBuf};
17use std::sync::Arc;
18use std::sync::atomic::{AtomicUsize, Ordering::SeqCst};
19use std::time::Duration;
20use std::{fs, io};
21
22#[macro_use] // for tests
23mod config;
24mod worker;
25
26pub use config::{CacheConfig, create_new_config};
27use worker::Worker;
28
29/// Global configuration for how the cache is managed
30#[derive(Debug, Clone)]
31pub struct Cache {
32    config: CacheConfig,
33    worker: Worker,
34    state: Arc<CacheState>,
35}
36
37macro_rules! generate_config_setting_getter {
38    ($setting:ident: $setting_type:ty) => {
39        #[doc = concat!("Returns ", "`", stringify!($setting), "`.")]
40        ///
41        /// Panics if the cache is disabled.
42        pub fn $setting(&self) -> $setting_type {
43            self.config.$setting()
44        }
45    };
46}
47
48impl Cache {
49    /// Builds a [`Cache`] from the configuration and spawns the cache worker.
50    ///
51    /// If you want to load the cache configuration from a file, use [`CacheConfig::from_file`].
52    /// You can call [`CacheConfig::new`] for the default configuration.
53    ///
54    /// # Errors
55    /// Returns an error if the configuration is invalid.
56    pub fn new(mut config: CacheConfig) -> Result<Self> {
57        config.validate()?;
58        Ok(Self {
59            worker: Worker::start_new(&config),
60            config,
61            state: Default::default(),
62        })
63    }
64
65    /// Loads cache configuration specified at `path`.
66    ///
67    /// This method will read the file specified by `path` on the filesystem and
68    /// attempt to load cache configuration from it. This method can also fail
69    /// due to I/O errors, misconfiguration, syntax errors, etc. For expected
70    /// syntax in the configuration file see the [documentation online][docs].
71    ///
72    /// Passing in `None` loads cache configuration from the system default path.
73    /// This is located, for example, on Unix at `$HOME/.config/wasmtime/config.toml`
74    /// and is typically created with the `wasmtime config new` command.
75    ///
76    /// # Errors
77    ///
78    /// This method can fail due to any error that happens when loading the file
79    /// pointed to by `path` and attempting to load the cache configuration.
80    ///
81    /// [docs]: https://bytecodealliance.github.io/wasmtime/cli-cache.html
82    pub fn from_file(path: Option<&Path>) -> Result<Self> {
83        let config = CacheConfig::from_file(path)?;
84        Self::new(config)
85    }
86
87    generate_config_setting_getter!(worker_event_queue_size: u64);
88    generate_config_setting_getter!(baseline_compression_level: i32);
89    generate_config_setting_getter!(optimized_compression_level: i32);
90    generate_config_setting_getter!(optimized_compression_usage_counter_threshold: u64);
91    generate_config_setting_getter!(cleanup_interval: Duration);
92    generate_config_setting_getter!(optimizing_compression_task_timeout: Duration);
93    generate_config_setting_getter!(allowed_clock_drift_for_files_from_future: Duration);
94    generate_config_setting_getter!(file_count_soft_limit: u64);
95    generate_config_setting_getter!(files_total_size_soft_limit: u64);
96    generate_config_setting_getter!(file_count_limit_percent_if_deleting: u8);
97    generate_config_setting_getter!(files_total_size_limit_percent_if_deleting: u8);
98
99    /// Returns path to the cache directory.
100    ///
101    /// Panics if the cache directory is not set.
102    pub fn directory(&self) -> &PathBuf {
103        &self.config.directory()
104    }
105
106    #[cfg(test)]
107    fn worker(&self) -> &Worker {
108        &self.worker
109    }
110
111    /// Returns the number of cache hits seen so far
112    pub fn cache_hits(&self) -> usize {
113        self.state.hits.load(SeqCst)
114    }
115
116    /// Returns the number of cache misses seen so far
117    pub fn cache_misses(&self) -> usize {
118        self.state.misses.load(SeqCst)
119    }
120
121    pub(crate) fn on_cache_get_async(&self, path: impl AsRef<Path>) {
122        self.state.hits.fetch_add(1, SeqCst);
123        self.worker.on_cache_get_async(path)
124    }
125
126    pub(crate) fn on_cache_update_async(&self, path: impl AsRef<Path>) {
127        self.state.misses.fetch_add(1, SeqCst);
128        self.worker.on_cache_update_async(path)
129    }
130}
131
132#[derive(Default, Debug)]
133struct CacheState {
134    hits: AtomicUsize,
135    misses: AtomicUsize,
136}
137
138/// Module level cache entry.
139pub struct ModuleCacheEntry<'cache>(Option<ModuleCacheEntryInner<'cache>>);
140
141struct ModuleCacheEntryInner<'cache> {
142    root_path: PathBuf,
143    cache: &'cache Cache,
144}
145
146struct Sha256Hasher(Sha256);
147
148impl<'cache> ModuleCacheEntry<'cache> {
149    /// Create the cache entry.
150    pub fn new(compiler_name: &str, cache: Option<&'cache Cache>) -> Self {
151        Self(cache.map(|cache| ModuleCacheEntryInner::new(compiler_name, cache)))
152    }
153
154    #[cfg(test)]
155    fn from_inner(inner: ModuleCacheEntryInner<'cache>) -> Self {
156        Self(Some(inner))
157    }
158
159    /// Gets cached data if state matches, otherwise calls `compute`.
160    ///
161    /// Data is automatically serialized/deserialized with `bincode`.
162    pub fn get_data<T, U, E>(&self, state: T, compute: fn(&T) -> Result<U, E>) -> Result<U, E>
163    where
164        T: Hash,
165        U: Serialize + for<'a> Deserialize<'a>,
166    {
167        self.get_data_raw(
168            &state,
169            compute,
170            |_state, data| postcard::to_allocvec(data).ok(),
171            |_state, data| postcard::from_bytes(&data).ok(),
172        )
173    }
174
175    /// Gets cached data if state matches, otherwise calls `compute`.
176    ///
177    /// If the cache is disabled or no cached data is found then `compute` is
178    /// called to calculate the data. If the data was found in cache it is
179    /// passed to `deserialize`, which if successful will be the returned value.
180    /// When computed the `serialize` function is used to generate the bytes
181    /// from the returned value.
182    pub fn get_data_raw<T, U, E>(
183        &self,
184        state: &T,
185        // NOTE: These are function pointers instead of closures so that they
186        // don't accidentally close over something not accounted in the cache.
187        compute: fn(&T) -> Result<U, E>,
188        serialize: fn(&T, &U) -> Option<Vec<u8>>,
189        deserialize: fn(&T, Vec<u8>) -> Option<U>,
190    ) -> Result<U, E>
191    where
192        T: Hash,
193    {
194        let inner = match &self.0 {
195            Some(inner) => inner,
196            None => return compute(state),
197        };
198
199        let mut hasher = Sha256Hasher(Sha256::new());
200        state.hash(&mut hasher);
201        let hash: [u8; 32] = hasher.0.finalize().into();
202        // standard encoding uses '/' which can't be used for filename
203        let hash = base64::engine::general_purpose::URL_SAFE_NO_PAD.encode(&hash);
204
205        if let Some(cached_val) = inner.get_data(&hash) {
206            if let Some(val) = deserialize(state, cached_val) {
207                let mod_cache_path = inner.root_path.join(&hash);
208                inner.cache.on_cache_get_async(&mod_cache_path); // call on success
209                return Ok(val);
210            }
211        }
212        let val_to_cache = compute(state)?;
213        if let Some(bytes) = serialize(state, &val_to_cache) {
214            if inner.update_data(&hash, &bytes).is_some() {
215                let mod_cache_path = inner.root_path.join(&hash);
216                inner.cache.on_cache_update_async(&mod_cache_path); // call on success
217            }
218        }
219        Ok(val_to_cache)
220    }
221}
222
223impl<'cache> ModuleCacheEntryInner<'cache> {
224    fn new(compiler_name: &str, cache: &'cache Cache) -> Self {
225        // If debug assertions are enabled then assume that we're some sort of
226        // local build. We don't want local builds to stomp over caches between
227        // builds, so just use a separate cache directory based on the mtime of
228        // our executable, which should roughly correlate with "you changed the
229        // source code so you get a different directory".
230        //
231        // Otherwise if this is a release build we use the `GIT_REV` env var
232        // which is either the git rev if installed from git or the crate
233        // version if installed from crates.io.
234        let compiler_dir = if cfg!(debug_assertions) {
235            fn self_mtime() -> Option<String> {
236                let path = std::env::current_exe().ok()?;
237                let metadata = path.metadata().ok()?;
238                let mtime = metadata.modified().ok()?;
239                Some(match mtime.duration_since(std::time::UNIX_EPOCH) {
240                    Ok(dur) => format!("{}", dur.as_millis()),
241                    Err(err) => format!("m{}", err.duration().as_millis()),
242                })
243            }
244            let self_mtime = self_mtime().unwrap_or("no-mtime".to_string());
245            format!(
246                "{comp_name}-{comp_ver}-{comp_mtime}",
247                comp_name = compiler_name,
248                comp_ver = env!("GIT_REV"),
249                comp_mtime = self_mtime,
250            )
251        } else {
252            format!(
253                "{comp_name}-{comp_ver}",
254                comp_name = compiler_name,
255                comp_ver = env!("GIT_REV"),
256            )
257        };
258        let root_path = cache.directory().join("modules").join(compiler_dir);
259
260        Self { root_path, cache }
261    }
262
263    fn get_data(&self, hash: &str) -> Option<Vec<u8>> {
264        let mod_cache_path = self.root_path.join(hash);
265        trace!("get_data() for path: {}", mod_cache_path.display());
266        let compressed_cache_bytes = fs::read(&mod_cache_path).ok()?;
267        let cache_bytes = zstd::decode_all(&compressed_cache_bytes[..])
268            .map_err(|err| warn!("Failed to decompress cached code: {}", err))
269            .ok()?;
270        Some(cache_bytes)
271    }
272
273    fn update_data(&self, hash: &str, serialized_data: &[u8]) -> Option<()> {
274        let mod_cache_path = self.root_path.join(hash);
275        trace!("update_data() for path: {}", mod_cache_path.display());
276        let compressed_data = zstd::encode_all(
277            &serialized_data[..],
278            self.cache.baseline_compression_level(),
279        )
280        .map_err(|err| warn!("Failed to compress cached code: {}", err))
281        .ok()?;
282
283        // Optimize syscalls: first, try writing to disk. It should succeed in most cases.
284        // Otherwise, try creating the cache directory and retry writing to the file.
285        if fs_write_atomic(&mod_cache_path, "mod", &compressed_data).is_ok() {
286            return Some(());
287        }
288
289        debug!(
290            "Attempting to create the cache directory, because \
291             failed to write cached code to disk, path: {}",
292            mod_cache_path.display(),
293        );
294
295        let cache_dir = mod_cache_path.parent().unwrap();
296        fs::create_dir_all(cache_dir)
297            .map_err(|err| {
298                warn!(
299                    "Failed to create cache directory, path: {}, message: {}",
300                    cache_dir.display(),
301                    err
302                )
303            })
304            .ok()?;
305
306        match fs_write_atomic(&mod_cache_path, "mod", &compressed_data) {
307            Ok(_) => Some(()),
308            Err(err) => {
309                warn!(
310                    "Failed to write file with rename, target path: {}, err: {}",
311                    mod_cache_path.display(),
312                    err
313                );
314                None
315            }
316        }
317    }
318}
319
320impl Hasher for Sha256Hasher {
321    fn finish(&self) -> u64 {
322        panic!("Sha256Hasher doesn't support finish!");
323    }
324
325    fn write(&mut self, bytes: &[u8]) {
326        self.0.update(bytes);
327    }
328}
329
330// Assumption: path inside cache directory.
331// Then, we don't have to use sound OS-specific exclusive file access.
332// Note: there's no need to remove temporary file here - cleanup task will do it later.
333fn fs_write_atomic(path: &Path, reason: &str, contents: &[u8]) -> io::Result<()> {
334    let lock_path = path.with_extension(format!("wip-atomic-write-{reason}"));
335    fs::OpenOptions::new()
336        .create_new(true) // atomic file creation (assumption: no one will open it without this flag)
337        .write(true)
338        .open(&lock_path)
339        .and_then(|mut file| file.write_all(contents))
340        // file should go out of scope and be closed at this point
341        .and_then(|()| fs::rename(&lock_path, &path)) // atomic file rename
342}
343
344#[cfg(test)]
345mod tests;