clif_util/
souper_harvest.rs

1use crate::utils::{iterate_files, read_to_string};
2use anyhow::{Context as _, Result};
3use clap::Parser;
4use cranelift_codegen::control::ControlPlane;
5use cranelift_codegen::ir::Function;
6use cranelift_codegen::Context;
7use cranelift_reader::parse_sets_and_triple;
8use rayon::iter::{IntoParallelIterator, ParallelBridge, ParallelIterator};
9use std::collections::HashSet;
10use std::hash::{BuildHasher, BuildHasherDefault};
11use std::io::Write;
12use std::path::PathBuf;
13use std::{fs, io};
14
15/// Harvest candidates for superoptimization from a Wasm or Clif file.
16///
17/// Candidates are emitted in Souper's text format:
18/// <https://github.com/google/souper>
19#[derive(Parser)]
20pub struct Options {
21    /// Specify an input file to be used. Use '-' for stdin.
22    input: Vec<PathBuf>,
23
24    /// Specify the directory where harvested left-hand side files should be
25    /// written to.
26    #[arg(short, long)]
27    output_dir: PathBuf,
28
29    /// Configure Cranelift settings
30    #[arg(long = "set")]
31    settings: Vec<String>,
32
33    /// Specify the Cranelift target
34    #[arg(long = "target")]
35    target: String,
36
37    /// Add a comment from which CLIF variable and function each left-hand side
38    /// was harvested from. This prevents deduplicating harvested left-hand
39    /// sides.
40    #[arg(long)]
41    add_harvest_source: bool,
42}
43
44pub fn run(options: &Options) -> Result<()> {
45    let parsed = parse_sets_and_triple(&options.settings, &options.target)?;
46    let fisa = parsed.as_fisa();
47    if fisa.isa.is_none() {
48        anyhow::bail!("`souper-harvest` requires a target isa");
49    }
50
51    match fs::create_dir_all(&options.output_dir) {
52        Ok(_) => {}
53        Err(e)
54            if e.kind() == io::ErrorKind::AlreadyExists
55                && fs::metadata(&options.output_dir)
56                    .with_context(|| {
57                        format!(
58                            "failed to read file metadata: {}",
59                            options.output_dir.display(),
60                        )
61                    })?
62                    .is_dir() => {}
63        Err(e) => {
64            return Err(e).context(format!(
65                "failed to create output directory: {}",
66                options.output_dir.display()
67            ))
68        }
69    }
70
71    let (send, recv) = std::sync::mpsc::channel::<String>();
72
73    let writing_thread = std::thread::spawn({
74        let output_dir = options.output_dir.clone();
75        let keep_harvest_source = options.add_harvest_source;
76        move || -> Result<()> {
77            let mut already_harvested = HashSet::new();
78            for lhs in recv {
79                let lhs = if keep_harvest_source {
80                    &lhs
81                } else {
82                    // Remove the first `;; Harvested from v12 in u:34` line.
83                    let i = lhs.find('\n').unwrap();
84                    &lhs[i + 1..]
85                };
86                let hash = hash(lhs.as_bytes());
87                if already_harvested.insert(hash) {
88                    let output_path = output_dir.join(hash.to_string());
89                    let mut output =
90                        io::BufWriter::new(fs::File::create(&output_path).with_context(|| {
91                            format!("failed to create file: {}", output_path.display())
92                        })?);
93                    output.write_all(lhs.as_bytes()).with_context(|| {
94                        format!("failed to write to output file: {}", output_path.display())
95                    })?;
96                }
97            }
98            Ok(())
99        }
100    });
101
102    iterate_files(&options.input)
103        .par_bridge()
104        .flat_map(|path| {
105            parse_input(path)
106                .unwrap_or_else(|e| {
107                    println!("{e:?}");
108                    Vec::new()
109                })
110                .into_par_iter()
111        })
112        .map_init(
113            move || (send.clone(), Context::new()),
114            move |(send, ctx), func| {
115                ctx.clear();
116                ctx.func = func;
117
118                ctx.optimize(fisa.isa.unwrap(), &mut ControlPlane::default())
119                    .context("failed to run optimizations")?;
120
121                ctx.souper_harvest(send)
122                    .context("failed to run souper harvester")?;
123
124                Ok(())
125            },
126        )
127        .collect::<Result<()>>()?;
128
129    match writing_thread.join() {
130        Ok(result) => result?,
131        Err(e) => std::panic::resume_unwind(e),
132    }
133
134    Ok(())
135}
136
137fn parse_input(path: PathBuf) -> Result<Vec<Function>> {
138    let contents = read_to_string(&path)?;
139    let funcs = cranelift_reader::parse_functions(&contents)
140        .with_context(|| format!("parse error in {}", path.display()))?;
141    Ok(funcs)
142}
143
144/// A convenience function for a quick usize hash
145#[inline]
146pub fn hash<T: std::hash::Hash + ?Sized>(v: &T) -> usize {
147    BuildHasherDefault::<rustc_hash::FxHasher>::default().hash_one(v) as usize
148}