wasmtime_cache/
lib.rs

1use anyhow::Result;
2use base64::Engine;
3use log::{debug, trace, warn};
4use serde::{Deserialize, Serialize};
5use sha2::{Digest, Sha256};
6use std::hash::Hash;
7use std::hash::Hasher;
8use std::io::Write;
9use std::path::{Path, PathBuf};
10use std::sync::atomic::{AtomicUsize, Ordering::SeqCst};
11use std::sync::Arc;
12use std::time::Duration;
13use std::{fs, io};
14
15#[macro_use] // for tests
16mod config;
17mod worker;
18
19pub use config::{create_new_config, CacheConfig};
20use worker::Worker;
21
22/// Global configuration for how the cache is managed
23#[derive(Debug, Clone)]
24pub struct Cache {
25    config: CacheConfig,
26    worker: Worker,
27    state: Arc<CacheState>,
28}
29
30macro_rules! generate_config_setting_getter {
31    ($setting:ident: $setting_type:ty) => {
32        /// Returns `$setting`.
33        ///
34        /// Panics if the cache is disabled.
35        pub fn $setting(&self) -> $setting_type {
36            self.config.$setting()
37        }
38    };
39}
40
41impl Cache {
42    /// Builds a [`Cache`] from the configuration and spawns the cache worker.
43    ///
44    /// If you want to load the cache configuration from a file, use [`CacheConfig::from_file`].
45    /// You can call [`CacheConfig::new`] for the default configuration.
46    ///
47    /// # Errors
48    /// Returns an error if the configuration is invalid.
49    pub fn new(mut config: CacheConfig) -> Result<Self> {
50        config.validate()?;
51        Ok(Self {
52            worker: Worker::start_new(&config),
53            config,
54            state: Default::default(),
55        })
56    }
57
58    /// Loads cache configuration specified at `path`.
59    ///
60    /// This method will read the file specified by `path` on the filesystem and
61    /// attempt to load cache configuration from it. This method can also fail
62    /// due to I/O errors, misconfiguration, syntax errors, etc. For expected
63    /// syntax in the configuration file see the [documentation online][docs].
64    ///
65    /// Passing in `None` loads cache configuration from the system default path.
66    /// This is located, for example, on Unix at `$HOME/.config/wasmtime/config.toml`
67    /// and is typically created with the `wasmtime config new` command.
68    ///
69    /// # Errors
70    ///
71    /// This method can fail due to any error that happens when loading the file
72    /// pointed to by `path` and attempting to load the cache configuration.
73    ///
74    /// [docs]: https://bytecodealliance.github.io/wasmtime/cli-cache.html
75    pub fn from_file(path: Option<&Path>) -> Result<Self> {
76        let config = CacheConfig::from_file(path)?;
77        Self::new(config)
78    }
79
80    generate_config_setting_getter!(worker_event_queue_size: u64);
81    generate_config_setting_getter!(baseline_compression_level: i32);
82    generate_config_setting_getter!(optimized_compression_level: i32);
83    generate_config_setting_getter!(optimized_compression_usage_counter_threshold: u64);
84    generate_config_setting_getter!(cleanup_interval: Duration);
85    generate_config_setting_getter!(optimizing_compression_task_timeout: Duration);
86    generate_config_setting_getter!(allowed_clock_drift_for_files_from_future: Duration);
87    generate_config_setting_getter!(file_count_soft_limit: u64);
88    generate_config_setting_getter!(files_total_size_soft_limit: u64);
89    generate_config_setting_getter!(file_count_limit_percent_if_deleting: u8);
90    generate_config_setting_getter!(files_total_size_limit_percent_if_deleting: u8);
91
92    /// Returns path to the cache directory.
93    ///
94    /// Panics if the cache directory is not set.
95    pub fn directory(&self) -> &PathBuf {
96        &self.config.directory()
97    }
98
99    #[cfg(test)]
100    fn worker(&self) -> &Worker {
101        &self.worker
102    }
103
104    /// Returns the number of cache hits seen so far
105    pub fn cache_hits(&self) -> usize {
106        self.state.hits.load(SeqCst)
107    }
108
109    /// Returns the number of cache misses seen so far
110    pub fn cache_misses(&self) -> usize {
111        self.state.misses.load(SeqCst)
112    }
113
114    pub(crate) fn on_cache_get_async(&self, path: impl AsRef<Path>) {
115        self.state.hits.fetch_add(1, SeqCst);
116        self.worker.on_cache_get_async(path)
117    }
118
119    pub(crate) fn on_cache_update_async(&self, path: impl AsRef<Path>) {
120        self.state.misses.fetch_add(1, SeqCst);
121        self.worker.on_cache_update_async(path)
122    }
123}
124
125#[derive(Default, Debug)]
126struct CacheState {
127    hits: AtomicUsize,
128    misses: AtomicUsize,
129}
130
131/// Module level cache entry.
132pub struct ModuleCacheEntry<'cache>(Option<ModuleCacheEntryInner<'cache>>);
133
134struct ModuleCacheEntryInner<'cache> {
135    root_path: PathBuf,
136    cache: &'cache Cache,
137}
138
139struct Sha256Hasher(Sha256);
140
141impl<'cache> ModuleCacheEntry<'cache> {
142    /// Create the cache entry.
143    pub fn new(compiler_name: &str, cache: Option<&'cache Cache>) -> Self {
144        Self(cache.map(|cache| ModuleCacheEntryInner::new(compiler_name, cache)))
145    }
146
147    #[cfg(test)]
148    fn from_inner(inner: ModuleCacheEntryInner<'cache>) -> Self {
149        Self(Some(inner))
150    }
151
152    /// Gets cached data if state matches, otherwise calls `compute`.
153    ///
154    /// Data is automatically serialized/deserialized with `bincode`.
155    pub fn get_data<T, U, E>(&self, state: T, compute: fn(&T) -> Result<U, E>) -> Result<U, E>
156    where
157        T: Hash,
158        U: Serialize + for<'a> Deserialize<'a>,
159    {
160        self.get_data_raw(
161            &state,
162            compute,
163            |_state, data| postcard::to_allocvec(data).ok(),
164            |_state, data| postcard::from_bytes(&data).ok(),
165        )
166    }
167
168    /// Gets cached data if state matches, otherwise calls `compute`.
169    ///
170    /// If the cache is disabled or no cached data is found then `compute` is
171    /// called to calculate the data. If the data was found in cache it is
172    /// passed to `deserialize`, which if successful will be the returned value.
173    /// When computed the `serialize` function is used to generate the bytes
174    /// from the returned value.
175    pub fn get_data_raw<T, U, E>(
176        &self,
177        state: &T,
178        // NOTE: These are function pointers instead of closures so that they
179        // don't accidentally close over something not accounted in the cache.
180        compute: fn(&T) -> Result<U, E>,
181        serialize: fn(&T, &U) -> Option<Vec<u8>>,
182        deserialize: fn(&T, Vec<u8>) -> Option<U>,
183    ) -> Result<U, E>
184    where
185        T: Hash,
186    {
187        let inner = match &self.0 {
188            Some(inner) => inner,
189            None => return compute(state),
190        };
191
192        let mut hasher = Sha256Hasher(Sha256::new());
193        state.hash(&mut hasher);
194        let hash: [u8; 32] = hasher.0.finalize().into();
195        // standard encoding uses '/' which can't be used for filename
196        let hash = base64::engine::general_purpose::URL_SAFE_NO_PAD.encode(&hash);
197
198        if let Some(cached_val) = inner.get_data(&hash) {
199            if let Some(val) = deserialize(state, cached_val) {
200                let mod_cache_path = inner.root_path.join(&hash);
201                inner.cache.on_cache_get_async(&mod_cache_path); // call on success
202                return Ok(val);
203            }
204        }
205        let val_to_cache = compute(state)?;
206        if let Some(bytes) = serialize(state, &val_to_cache) {
207            if inner.update_data(&hash, &bytes).is_some() {
208                let mod_cache_path = inner.root_path.join(&hash);
209                inner.cache.on_cache_update_async(&mod_cache_path); // call on success
210            }
211        }
212        Ok(val_to_cache)
213    }
214}
215
216impl<'cache> ModuleCacheEntryInner<'cache> {
217    fn new(compiler_name: &str, cache: &'cache Cache) -> Self {
218        // If debug assertions are enabled then assume that we're some sort of
219        // local build. We don't want local builds to stomp over caches between
220        // builds, so just use a separate cache directory based on the mtime of
221        // our executable, which should roughly correlate with "you changed the
222        // source code so you get a different directory".
223        //
224        // Otherwise if this is a release build we use the `GIT_REV` env var
225        // which is either the git rev if installed from git or the crate
226        // version if installed from crates.io.
227        let compiler_dir = if cfg!(debug_assertions) {
228            fn self_mtime() -> Option<String> {
229                let path = std::env::current_exe().ok()?;
230                let metadata = path.metadata().ok()?;
231                let mtime = metadata.modified().ok()?;
232                Some(match mtime.duration_since(std::time::UNIX_EPOCH) {
233                    Ok(dur) => format!("{}", dur.as_millis()),
234                    Err(err) => format!("m{}", err.duration().as_millis()),
235                })
236            }
237            let self_mtime = self_mtime().unwrap_or("no-mtime".to_string());
238            format!(
239                "{comp_name}-{comp_ver}-{comp_mtime}",
240                comp_name = compiler_name,
241                comp_ver = env!("GIT_REV"),
242                comp_mtime = self_mtime,
243            )
244        } else {
245            format!(
246                "{comp_name}-{comp_ver}",
247                comp_name = compiler_name,
248                comp_ver = env!("GIT_REV"),
249            )
250        };
251        let root_path = cache.directory().join("modules").join(compiler_dir);
252
253        Self { root_path, cache }
254    }
255
256    fn get_data(&self, hash: &str) -> Option<Vec<u8>> {
257        let mod_cache_path = self.root_path.join(hash);
258        trace!("get_data() for path: {}", mod_cache_path.display());
259        let compressed_cache_bytes = fs::read(&mod_cache_path).ok()?;
260        let cache_bytes = zstd::decode_all(&compressed_cache_bytes[..])
261            .map_err(|err| warn!("Failed to decompress cached code: {}", err))
262            .ok()?;
263        Some(cache_bytes)
264    }
265
266    fn update_data(&self, hash: &str, serialized_data: &[u8]) -> Option<()> {
267        let mod_cache_path = self.root_path.join(hash);
268        trace!("update_data() for path: {}", mod_cache_path.display());
269        let compressed_data = zstd::encode_all(
270            &serialized_data[..],
271            self.cache.baseline_compression_level(),
272        )
273        .map_err(|err| warn!("Failed to compress cached code: {}", err))
274        .ok()?;
275
276        // Optimize syscalls: first, try writing to disk. It should succeed in most cases.
277        // Otherwise, try creating the cache directory and retry writing to the file.
278        if fs_write_atomic(&mod_cache_path, "mod", &compressed_data).is_ok() {
279            return Some(());
280        }
281
282        debug!(
283            "Attempting to create the cache directory, because \
284             failed to write cached code to disk, path: {}",
285            mod_cache_path.display(),
286        );
287
288        let cache_dir = mod_cache_path.parent().unwrap();
289        fs::create_dir_all(cache_dir)
290            .map_err(|err| {
291                warn!(
292                    "Failed to create cache directory, path: {}, message: {}",
293                    cache_dir.display(),
294                    err
295                )
296            })
297            .ok()?;
298
299        match fs_write_atomic(&mod_cache_path, "mod", &compressed_data) {
300            Ok(_) => Some(()),
301            Err(err) => {
302                warn!(
303                    "Failed to write file with rename, target path: {}, err: {}",
304                    mod_cache_path.display(),
305                    err
306                );
307                None
308            }
309        }
310    }
311}
312
313impl Hasher for Sha256Hasher {
314    fn finish(&self) -> u64 {
315        panic!("Sha256Hasher doesn't support finish!");
316    }
317
318    fn write(&mut self, bytes: &[u8]) {
319        self.0.update(bytes);
320    }
321}
322
323// Assumption: path inside cache directory.
324// Then, we don't have to use sound OS-specific exclusive file access.
325// Note: there's no need to remove temporary file here - cleanup task will do it later.
326fn fs_write_atomic(path: &Path, reason: &str, contents: &[u8]) -> io::Result<()> {
327    let lock_path = path.with_extension(format!("wip-atomic-write-{reason}"));
328    fs::OpenOptions::new()
329        .create_new(true) // atomic file creation (assumption: no one will open it without this flag)
330        .write(true)
331        .open(&lock_path)
332        .and_then(|mut file| file.write_all(contents))
333        // file should go out of scope and be closed at this point
334        .and_then(|()| fs::rename(&lock_path, &path)) // atomic file rename
335}
336
337#[cfg(test)]
338mod tests;