cranelift_codegen/ir/memflags.rs
1//! Memory operation flags.
2
3use super::TrapCode;
4use core::fmt;
5use core::num::NonZeroU8;
6use core::str::FromStr;
7
8#[cfg(feature = "enable-serde")]
9use serde_derive::{Deserialize, Serialize};
10
11/// Endianness of a memory access.
12#[derive(Clone, Copy, PartialEq, Eq, Debug, Hash)]
13pub enum Endianness {
14 /// Little-endian
15 Little,
16 /// Big-endian
17 Big,
18}
19
20/// Which disjoint region of aliasing memory is accessed in this memory
21/// operation.
22#[derive(Clone, Copy, PartialEq, Eq, Debug, Hash)]
23#[repr(u8)]
24#[allow(missing_docs)]
25#[rustfmt::skip]
26pub enum AliasRegion {
27 // None = 0b00;
28 Heap = 0b01,
29 Table = 0b10,
30 Vmctx = 0b11,
31}
32
33impl AliasRegion {
34 const fn from_bits(bits: u8) -> Option<Self> {
35 match bits {
36 0b00 => None,
37 0b01 => Some(Self::Heap),
38 0b10 => Some(Self::Table),
39 0b11 => Some(Self::Vmctx),
40 _ => panic!("invalid alias region bits"),
41 }
42 }
43
44 const fn to_bits(region: Option<Self>) -> u8 {
45 match region {
46 None => 0b00,
47 Some(r) => r as u8,
48 }
49 }
50}
51
52/// Flags for memory operations like load/store.
53///
54/// Each of these flags introduce a limited form of undefined behavior. The flags each enable
55/// certain optimizations that need to make additional assumptions. Generally, the semantics of a
56/// program does not change when a flag is removed, but adding a flag will.
57///
58/// In addition, the flags determine the endianness of the memory access. By default,
59/// any memory access uses the native endianness determined by the target ISA. This can
60/// be overridden for individual accesses by explicitly specifying little- or big-endian
61/// semantics via the flags.
62#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)]
63#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
64pub struct MemFlags {
65 // Initialized to all zeros to have all flags have their default value.
66 // This is interpreted through various methods below. Currently the bits of
67 // this are defined as:
68 //
69 // * 0 - aligned flag
70 // * 1 - readonly flag
71 // * 2 - little endian flag
72 // * 3 - big endian flag
73 // * 4 - checked flag
74 // * 5/6 - alias region
75 // * 7/8/9/10/11/12/13/14 - trap code
76 // * 15 - can_move flag
77 //
78 // Current properties upheld are:
79 //
80 // * only one of little/big endian is set
81 // * only one alias region can be set - once set it cannot be changed
82 bits: u16,
83}
84
85/// Guaranteed to use "natural alignment" for the given type. This
86/// may enable better instruction selection.
87const BIT_ALIGNED: u16 = 1 << 0;
88
89/// A load that reads data in memory that does not change for the
90/// duration of the function's execution. This may enable
91/// additional optimizations to be performed.
92const BIT_READONLY: u16 = 1 << 1;
93
94/// Load multi-byte values from memory in a little-endian format.
95const BIT_LITTLE_ENDIAN: u16 = 1 << 2;
96
97/// Load multi-byte values from memory in a big-endian format.
98const BIT_BIG_ENDIAN: u16 = 1 << 3;
99
100/// Check this load or store for safety when using the
101/// proof-carrying-code framework. The address must have a
102/// `PointsTo` fact attached with a sufficiently large valid range
103/// for the accessed size.
104const BIT_CHECKED: u16 = 1 << 4;
105
106/// Used for alias analysis, indicates which disjoint part of the abstract state
107/// is being accessed.
108const MASK_ALIAS_REGION: u16 = 0b11 << ALIAS_REGION_OFFSET;
109const ALIAS_REGION_OFFSET: u16 = 5;
110
111/// Trap code, if any, for this memory operation.
112const MASK_TRAP_CODE: u16 = 0b1111_1111 << TRAP_CODE_OFFSET;
113const TRAP_CODE_OFFSET: u16 = 7;
114
115/// Whether this memory operation may be freely moved by the optimizer so long
116/// as its data dependencies are satisfied. That is, by setting this flag, the
117/// producer is guaranteeing that this memory operation's safety is not guarded
118/// by outside-the-data-flow-graph properties, like implicit bounds-checking
119/// control dependencies.
120const BIT_CAN_MOVE: u16 = 1 << 15;
121
122impl MemFlags {
123 /// Create a new empty set of flags.
124 pub const fn new() -> Self {
125 Self { bits: 0 }.with_trap_code(Some(TrapCode::HEAP_OUT_OF_BOUNDS))
126 }
127
128 /// Create a set of flags representing an access from a "trusted" address, meaning it's
129 /// known to be aligned and non-trapping.
130 pub const fn trusted() -> Self {
131 Self::new().with_notrap().with_aligned()
132 }
133
134 /// Read a flag bit.
135 const fn read_bit(self, bit: u16) -> bool {
136 self.bits & bit != 0
137 }
138
139 /// Return a new `MemFlags` with this flag bit set.
140 const fn with_bit(mut self, bit: u16) -> Self {
141 self.bits |= bit;
142 self
143 }
144
145 /// Reads the alias region that this memory operation works with.
146 pub const fn alias_region(self) -> Option<AliasRegion> {
147 AliasRegion::from_bits(((self.bits & MASK_ALIAS_REGION) >> ALIAS_REGION_OFFSET) as u8)
148 }
149
150 /// Sets the alias region that this works on to the specified `region`.
151 pub const fn with_alias_region(mut self, region: Option<AliasRegion>) -> Self {
152 let bits = AliasRegion::to_bits(region);
153 self.bits &= !MASK_ALIAS_REGION;
154 self.bits |= (bits as u16) << ALIAS_REGION_OFFSET;
155 self
156 }
157
158 /// Sets the alias region that this works on to the specified `region`.
159 pub fn set_alias_region(&mut self, region: Option<AliasRegion>) {
160 *self = self.with_alias_region(region);
161 }
162
163 /// Set a flag bit by name.
164 ///
165 /// Returns true if the flag was found and set, false for an unknown flag
166 /// name.
167 ///
168 /// # Errors
169 ///
170 /// Returns an error message if the `name` is known but couldn't be applied
171 /// due to it being a semantic error.
172 pub fn set_by_name(&mut self, name: &str) -> Result<bool, &'static str> {
173 *self = match name {
174 "notrap" => self.with_trap_code(None),
175 "aligned" => self.with_aligned(),
176 "readonly" => self.with_readonly(),
177 "little" => {
178 if self.read_bit(BIT_BIG_ENDIAN) {
179 return Err("cannot set both big and little endian bits");
180 }
181 self.with_endianness(Endianness::Little)
182 }
183 "big" => {
184 if self.read_bit(BIT_LITTLE_ENDIAN) {
185 return Err("cannot set both big and little endian bits");
186 }
187 self.with_endianness(Endianness::Big)
188 }
189 "heap" => {
190 if self.alias_region().is_some() {
191 return Err("cannot set more than one alias region");
192 }
193 self.with_alias_region(Some(AliasRegion::Heap))
194 }
195 "table" => {
196 if self.alias_region().is_some() {
197 return Err("cannot set more than one alias region");
198 }
199 self.with_alias_region(Some(AliasRegion::Table))
200 }
201 "vmctx" => {
202 if self.alias_region().is_some() {
203 return Err("cannot set more than one alias region");
204 }
205 self.with_alias_region(Some(AliasRegion::Vmctx))
206 }
207 "checked" => self.with_checked(),
208 "can_move" => self.with_can_move(),
209
210 other => match TrapCode::from_str(other) {
211 Ok(code) => self.with_trap_code(Some(code)),
212 Err(()) => return Ok(false),
213 },
214 };
215 Ok(true)
216 }
217
218 /// Return endianness of the memory access. This will return the endianness
219 /// explicitly specified by the flags if any, and will default to the native
220 /// endianness otherwise. The native endianness has to be provided by the
221 /// caller since it is not explicitly encoded in CLIF IR -- this allows a
222 /// front end to create IR without having to know the target endianness.
223 pub const fn endianness(self, native_endianness: Endianness) -> Endianness {
224 if self.read_bit(BIT_LITTLE_ENDIAN) {
225 Endianness::Little
226 } else if self.read_bit(BIT_BIG_ENDIAN) {
227 Endianness::Big
228 } else {
229 native_endianness
230 }
231 }
232
233 /// Return endianness of the memory access, if explicitly specified.
234 ///
235 /// If the endianness is not explicitly specified, this will return `None`,
236 /// which means "native endianness".
237 pub const fn explicit_endianness(self) -> Option<Endianness> {
238 if self.read_bit(BIT_LITTLE_ENDIAN) {
239 Some(Endianness::Little)
240 } else if self.read_bit(BIT_BIG_ENDIAN) {
241 Some(Endianness::Big)
242 } else {
243 None
244 }
245 }
246
247 /// Set endianness of the memory access.
248 pub fn set_endianness(&mut self, endianness: Endianness) {
249 *self = self.with_endianness(endianness);
250 }
251
252 /// Set endianness of the memory access, returning new flags.
253 pub const fn with_endianness(self, endianness: Endianness) -> Self {
254 let res = match endianness {
255 Endianness::Little => self.with_bit(BIT_LITTLE_ENDIAN),
256 Endianness::Big => self.with_bit(BIT_BIG_ENDIAN),
257 };
258 assert!(!(res.read_bit(BIT_LITTLE_ENDIAN) && res.read_bit(BIT_BIG_ENDIAN)));
259 res
260 }
261
262 /// Test if this memory operation cannot trap.
263 ///
264 /// By default `MemFlags` will assume that any load/store can trap and is
265 /// associated with a `TrapCode::HeapOutOfBounds` code. If the trap code is
266 /// configured to `None` though then this method will return `true` and
267 /// indicates that the memory operation will not trap.
268 ///
269 /// If this returns `true` then the memory is *accessible*, which means
270 /// that accesses will not trap. This makes it possible to delete an unused
271 /// load or a dead store instruction.
272 ///
273 /// This flag does *not* mean that the associated instruction can be
274 /// code-motioned to arbitrary places in the function so long as its data
275 /// dependencies are met. This only means that, given its current location
276 /// in the function, it will never trap. See the `can_move` method for more
277 /// details.
278 pub const fn notrap(self) -> bool {
279 self.trap_code().is_none()
280 }
281
282 /// Sets the trap code for this `MemFlags` to `None`.
283 pub fn set_notrap(&mut self) {
284 *self = self.with_notrap();
285 }
286
287 /// Sets the trap code for this `MemFlags` to `None`, returning the new
288 /// flags.
289 pub const fn with_notrap(self) -> Self {
290 self.with_trap_code(None)
291 }
292
293 /// Is this memory operation safe to move so long as its data dependencies
294 /// remain satisfied?
295 ///
296 /// If this is `true`, then it is okay to code motion this instruction to
297 /// arbitrary locations, in the function, including across blocks and
298 /// conditional branches, so long as data dependencies (and trap ordering,
299 /// if any) are upheld.
300 ///
301 /// If this is `false`, then this memory operation's safety potentially
302 /// relies upon invariants that are not reflected in its data dependencies,
303 /// and therefore it is not safe to code motion this operation. For example,
304 /// this operation could be in a block that is dominated by a control-flow
305 /// bounds check, which is not reflected in its operands, and it would be
306 /// unsafe to code motion it above the bounds check, even if its data
307 /// dependencies would still be satisfied.
308 pub const fn can_move(self) -> bool {
309 self.read_bit(BIT_CAN_MOVE)
310 }
311
312 /// Set the `can_move` flag.
313 pub const fn set_can_move(&mut self) {
314 *self = self.with_can_move();
315 }
316
317 /// Set the `can_move` flag, returning new flags.
318 pub const fn with_can_move(self) -> Self {
319 self.with_bit(BIT_CAN_MOVE)
320 }
321
322 /// Test if the `aligned` flag is set.
323 ///
324 /// By default, Cranelift memory instructions work with any unaligned effective address. If the
325 /// `aligned` flag is set, the instruction is permitted to trap or return a wrong result if the
326 /// effective address is misaligned.
327 pub const fn aligned(self) -> bool {
328 self.read_bit(BIT_ALIGNED)
329 }
330
331 /// Set the `aligned` flag.
332 pub fn set_aligned(&mut self) {
333 *self = self.with_aligned();
334 }
335
336 /// Set the `aligned` flag, returning new flags.
337 pub const fn with_aligned(self) -> Self {
338 self.with_bit(BIT_ALIGNED)
339 }
340
341 /// Test if the `readonly` flag is set.
342 ///
343 /// Loads with this flag have no memory dependencies.
344 /// This results in undefined behavior if the dereferenced memory is mutated at any time
345 /// between when the function is called and when it is exited.
346 pub const fn readonly(self) -> bool {
347 self.read_bit(BIT_READONLY)
348 }
349
350 /// Set the `readonly` flag.
351 pub fn set_readonly(&mut self) {
352 *self = self.with_readonly();
353 }
354
355 /// Set the `readonly` flag, returning new flags.
356 pub const fn with_readonly(self) -> Self {
357 self.with_bit(BIT_READONLY)
358 }
359
360 /// Test if the `checked` bit is set.
361 ///
362 /// Loads and stores with this flag are verified to access
363 /// pointers only with a validated `PointsTo` fact attached, and
364 /// with that fact validated, when using the proof-carrying-code
365 /// framework. If initial facts on program inputs are correct
366 /// (i.e., correctly denote the shape and types of data structures
367 /// in memory), and if PCC validates the compiled output, then all
368 /// `checked`-marked memory accesses are guaranteed (up to the
369 /// checker's correctness) to access valid memory. This can be
370 /// used to ensure memory safety and sandboxing.
371 pub const fn checked(self) -> bool {
372 self.read_bit(BIT_CHECKED)
373 }
374
375 /// Set the `checked` bit.
376 pub fn set_checked(&mut self) {
377 *self = self.with_checked();
378 }
379
380 /// Set the `checked` bit, returning new flags.
381 pub const fn with_checked(self) -> Self {
382 self.with_bit(BIT_CHECKED)
383 }
384
385 /// Get the trap code to report if this memory access traps.
386 ///
387 /// A `None` trap code indicates that this memory access does not trap.
388 pub const fn trap_code(self) -> Option<TrapCode> {
389 let byte = ((self.bits & MASK_TRAP_CODE) >> TRAP_CODE_OFFSET) as u8;
390 match NonZeroU8::new(byte) {
391 Some(code) => Some(TrapCode::from_raw(code)),
392 None => None,
393 }
394 }
395
396 /// Configures these flags with the specified trap code `code`.
397 ///
398 /// A trap code indicates that this memory operation cannot be optimized
399 /// away and it must "stay where it is" in the programs. Traps are
400 /// considered side effects, for example, and have meaning through the trap
401 /// code that is communicated and which instruction trapped.
402 pub const fn with_trap_code(mut self, code: Option<TrapCode>) -> Self {
403 let bits = match code {
404 Some(code) => code.as_raw().get() as u16,
405 None => 0,
406 };
407 self.bits &= !MASK_TRAP_CODE;
408 self.bits |= bits << TRAP_CODE_OFFSET;
409 self
410 }
411}
412
413impl fmt::Display for MemFlags {
414 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
415 match self.trap_code() {
416 None => write!(f, " notrap")?,
417 // This is the default trap code, so don't print anything extra
418 // for this.
419 Some(TrapCode::HEAP_OUT_OF_BOUNDS) => {}
420 Some(t) => write!(f, " {t}")?,
421 }
422 if self.aligned() {
423 write!(f, " aligned")?;
424 }
425 if self.readonly() {
426 write!(f, " readonly")?;
427 }
428 if self.can_move() {
429 write!(f, " can_move")?;
430 }
431 if self.read_bit(BIT_BIG_ENDIAN) {
432 write!(f, " big")?;
433 }
434 if self.read_bit(BIT_LITTLE_ENDIAN) {
435 write!(f, " little")?;
436 }
437 if self.checked() {
438 write!(f, " checked")?;
439 }
440 match self.alias_region() {
441 None => {}
442 Some(AliasRegion::Heap) => write!(f, " heap")?,
443 Some(AliasRegion::Table) => write!(f, " table")?,
444 Some(AliasRegion::Vmctx) => write!(f, " vmctx")?,
445 }
446 Ok(())
447 }
448}
449
450#[cfg(test)]
451mod tests {
452 use super::*;
453
454 #[test]
455 fn roundtrip_traps() {
456 for trap in TrapCode::non_user_traps().iter().copied() {
457 let flags = MemFlags::new().with_trap_code(Some(trap));
458 assert_eq!(flags.trap_code(), Some(trap));
459 }
460 let flags = MemFlags::new().with_trap_code(None);
461 assert_eq!(flags.trap_code(), None);
462 }
463
464 #[test]
465 fn cannot_set_big_and_little() {
466 let mut big = MemFlags::new().with_endianness(Endianness::Big);
467 assert!(big.set_by_name("little").is_err());
468
469 let mut little = MemFlags::new().with_endianness(Endianness::Little);
470 assert!(little.set_by_name("big").is_err());
471 }
472
473 #[test]
474 fn only_one_region() {
475 let mut big = MemFlags::new().with_alias_region(Some(AliasRegion::Heap));
476 assert!(big.set_by_name("table").is_err());
477 assert!(big.set_by_name("vmctx").is_err());
478
479 let mut big = MemFlags::new().with_alias_region(Some(AliasRegion::Table));
480 assert!(big.set_by_name("heap").is_err());
481 assert!(big.set_by_name("vmctx").is_err());
482
483 let mut big = MemFlags::new().with_alias_region(Some(AliasRegion::Vmctx));
484 assert!(big.set_by_name("heap").is_err());
485 assert!(big.set_by_name("table").is_err());
486 }
487}