Skip to main content

wasmtime_environ/graphs/
scc.rs

1//! Strongly-connected components.
2//!
3//! This module implements [Tarjan's algorithm] for finding strongly-connected
4//! components.
5//!
6//! This algorithm takes `O(V+E)` time and uses `O(V+E)` space.
7//!
8//! Tarjan's algorithm is usually presented as a recursive algorithm, but we do
9//! not trust the input and cannot recurse over it for fear of blowing the
10//! stack. Therefore, this implementation is iterative.
11//!
12//! [Tarjan's algorithm]: https://en.wikipedia.org/wiki/Tarjan%27s_strongly_connected_components_algorithm
13
14use super::*;
15use crate::{
16    EntityRef, EntitySet, PrimaryMap, SecondaryMap, non_max::NonMaxU32,
17    packed_option::PackedOption, prelude::*,
18};
19use alloc::collections::BTreeSet;
20use core::{
21    cmp,
22    fmt::{self, Debug},
23    hash::Hash,
24    iter,
25    ops::Range,
26};
27
28/// A strongly-connected component.
29#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
30pub struct Scc(u32);
31crate::entity_impl!(Scc);
32
33/// The set of strongly-connected components for a graph of `Node`s.
34pub struct StronglyConnectedComponents<Node>
35where
36    Node: EntityRef,
37{
38    /// A map from a component to the range of `self.component_nodes` that make
39    /// up that component's nodes.
40    components: PrimaryMap<Scc, Range<u32>>,
41
42    /// The data storage for the values of `self.components`.
43    component_nodes: Vec<Node>,
44}
45
46impl<Node> Debug for StronglyConnectedComponents<Node>
47where
48    Node: EntityRef + Debug,
49{
50    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
51        struct Map<'a, Node: EntityRef + Debug>(&'a StronglyConnectedComponents<Node>);
52
53        impl<'a, Node> Debug for Map<'a, Node>
54        where
55            Node: EntityRef + Debug,
56        {
57            fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> fmt::Result {
58                f.debug_map().entries(self.0.iter()).finish()
59            }
60        }
61
62        f.debug_struct("StronglyConnectedComponents")
63            .field("components", &Map(self))
64            .finish()
65    }
66}
67
68impl<Node> StronglyConnectedComponents<Node>
69where
70    Node: EntityRef + Debug,
71{
72    /// Find the strongly-connected for the given graph.
73    pub fn new<G>(graph: G) -> Self
74    where
75        G: Graph<Node>,
76    {
77        let nodes = graph.nodes();
78
79        // The resulting components and their nodes.
80        let mut component_nodes = vec![];
81        let mut components = PrimaryMap::<Scc, Range<u32>>::new();
82
83        // The DFS index counter.
84        let mut index = NonMaxU32::default();
85
86        // The DFS index and the earliest on-stack node reachable from each
87        // node.
88        let (min, max) = nodes.size_hint();
89        let capacity = max.unwrap_or_else(|| 2 * min);
90        let mut indices = SecondaryMap::<Node, Option<NonMaxU32>>::with_capacity(capacity);
91        let mut lowlinks = SecondaryMap::<Node, Option<NonMaxU32>>::with_capacity(capacity);
92
93        // The stack of nodes we are currently finding an SCC for. Not the same
94        // as the DFS stack: we only pop from this stack once we find the root
95        // of an SCC.
96        let mut stack = vec![];
97        let mut on_stack = EntitySet::<Node>::new();
98
99        let mut dfs = Dfs::new(nodes);
100        while let Some(event) = dfs.next(
101            &graph,
102            // We have seen the node before if we have assigned it a DFS index.
103            |node| indices[node].is_some(),
104        ) {
105            match event {
106                DfsEvent::Pre(node) => {
107                    debug_assert!(indices[node].is_none());
108                    debug_assert!(lowlinks[node].is_none());
109
110                    // Assign an index to this node.
111                    indices[node] = Some(index);
112
113                    // Its current lowlink is itself. This will get updated to
114                    // be accurate as we visit the node's successors.
115                    lowlinks[node] = Some(index);
116
117                    // Increment the DFS counter.
118                    index = NonMaxU32::new(index.get() + 1).unwrap();
119
120                    // Push the node onto the SCC stack.
121                    stack.push(node);
122                    let is_newly_on_stack = on_stack.insert(node);
123                    debug_assert!(is_newly_on_stack);
124                }
125
126                DfsEvent::AfterEdge(node, succ) => {
127                    debug_assert!(indices[node].is_some());
128                    debug_assert!(lowlinks[node].is_some());
129                    debug_assert!(lowlinks[node] <= indices[node]);
130                    debug_assert!(indices[succ].is_some());
131                    debug_assert!(lowlinks[succ].is_some());
132                    debug_assert!(lowlinks[succ] <= indices[succ]);
133
134                    // If the successor is still on the SCC stack, then it is
135                    // part of the same SCC as this node, so propagate its
136                    // lowlink to this node's lowlink.
137                    if on_stack.contains(succ) {
138                        lowlinks[node] =
139                            Some(cmp::min(lowlinks[node].unwrap(), lowlinks[succ].unwrap()));
140                    }
141                }
142
143                DfsEvent::Post(node) => {
144                    debug_assert!(indices[node].is_some());
145                    debug_assert!(lowlinks[node].is_some());
146
147                    // If this node's index is the same as its lowlink, then it
148                    // is the root of a component. Pop this component's elements
149                    // from the SCC stack and push them into our result data
150                    // structures.
151                    if indices[node] == lowlinks[node] {
152                        let mut done = false;
153                        components.push(extend_with_range(
154                            &mut component_nodes,
155                            iter::from_fn(|| {
156                                if done {
157                                    return None;
158                                }
159                                let v = stack.pop().unwrap();
160                                let was_on_stack = on_stack.remove(v);
161                                debug_assert!(was_on_stack);
162                                if v == node {
163                                    done = true;
164                                }
165                                Some(v)
166                            }),
167                        ));
168                    }
169                }
170            }
171        }
172
173        Self {
174            components,
175            component_nodes,
176        }
177    }
178
179    /// Get the number of components.
180    pub fn len(&self) -> usize {
181        self.components.len()
182    }
183
184    fn node_range(&self, range: Range<u32>) -> &[Node] {
185        let start = usize::try_from(range.start).unwrap();
186        let end = usize::try_from(range.end).unwrap();
187        &self.component_nodes[start..end]
188    }
189
190    /// Iterate over each strongly-connnected component and the `Node`s that are
191    /// members of it.
192    ///
193    /// Iteration happens in reverse-topological order (successors are visited
194    /// before predecessors in the resulting SCC DAG).
195    pub fn iter(&self) -> impl ExactSizeIterator<Item = (Scc, &[Node])> + '_ {
196        self.components
197            .iter()
198            .map(|(component, range)| (component, self.node_range(range.clone())))
199    }
200
201    /// Iterate over each strongly-connected component.
202    ///
203    /// Iteration happens in reverse-topological order (successors are visited
204    /// before predecessors in the resulting SCC DAG).
205    pub fn keys(&self) -> impl ExactSizeIterator<Item = Scc> {
206        self.components.keys()
207    }
208
209    /// Iterate over the `Node`s that make up each strongly-connected component.
210    ///
211    /// Iteration happens in reverse-topological order (successors are visited
212    /// before predecessors in the resulting SCC DAG).
213    #[cfg(test)]
214    pub fn values(&self) -> impl ExactSizeIterator<Item = &[Node]> + '_ {
215        self.components
216            .values()
217            .map(|range| self.node_range(range.clone()))
218    }
219
220    /// Get the `Node`s that make up the given strongly-connected component.
221    pub fn nodes(&self, component: Scc) -> &[Node] {
222        let range = self.components[component].clone();
223        self.node_range(range)
224    }
225
226    /// Get this set of strongly-connected components' "evaporation".
227    ///
228    /// Given an input graph `G`, we can construct a new graph where the new
229    /// graph's nodes are the strongly-connected components of `G` and there is
230    /// an edge `scc(a) --> scc(b)` for every edge `a --> b` in the input graph
231    /// `G` where `scc(a) != scc(b)` and `scc` is the function from a node to
232    /// its strongly-connected component. This new graph is known as the
233    /// "condensation" of `G`.
234    ///
235    /// In the following diagram, the solid lines are the input graph and the
236    /// dotted lines show its condensation:
237    ///
238    /// ```text
239    /// ...............
240    /// :             :
241    /// :    ,-----.  :
242    /// :    |     |  :
243    /// :    V     |  :
244    /// :  +---+   |  :
245    /// :  | a |---'  :
246    /// :  +---+      :
247    /// :    |        :
248    /// :....|........:
249    ///      |  :
250    ///      |  :
251    ///      |  :
252    ///      |  V
253    /// .....|..............       ....................
254    /// :    |             :       :                  :
255    /// :    V             :......>:                  :
256    /// :  +---+    +---+  :       :  +---+    +---+  :
257    /// :  | b |<-->| c |------------>| d |<-->| e |  :
258    /// :  +---+    +---+  :       :  +---+    +---+  :
259    /// :    |             :       :             |    :
260    /// :....|.............:       :.............|....:
261    ///      |  :                          :     |
262    ///      |  :                          :     |
263    ///      |  :                          :     |
264    ///      |  V                          :     |
265    /// .....|........................     :     |
266    /// :    |                       :     :     |
267    /// :    | ,----------------.    :     :     |
268    /// :    | |                |    :<....:     |
269    /// :    V |                V    :           |
270    /// :   +---+    +---+    +---+  :           |
271    /// :   | f |<---| g |<---| h |<-------------'
272    /// :   +---+    +---+    +---+  :
273    /// :                            :
274    /// :............................:
275    /// ```
276    ///
277    /// Note that a graph's condensation is always acyclic, because the
278    /// strongly-conneted components encapsulate and hide any cycles from the
279    /// input graph.
280    ///
281    /// I am not aware of an existing name for the reverse (or transpose)
282    /// condensation, where each of the condensation's edges `a --> b` are
283    /// flipped into `b --> a`, so, at cfallin's brilliant suggestion, I have
284    /// decided to call it an "evaporation".
285    ///
286    /// In the context of a call graph, the condensation allows us to derive a
287    /// partial dependency ordering for bottom-up inlining:
288    ///
289    /// * An edge `scc({a,b,...}) --> scc({c,d,...})` means that the functions
290    ///   `{c,d,...}` should be processed for inlining before functions
291    ///   `{a,b,...}`, since some functions in `{a,b,...}` call some functions
292    ///   in `{c,d,...}` and we might want to inline these calls but only after
293    ///   `{c,d,...}` have already had their calls inlined.
294    ///
295    /// * Functions within an SCC are unordered (and we probably don't want to
296    ///   inline between them at all, or only want to do so in a very limited
297    ///   manner, since their members are mutually recursive).
298    ///
299    /// A call graph's evaporation, by flipping edges from pointing to an SCC's
300    /// dependencies to pointing to its dependers, allows us to answer the
301    /// question "given that I've finished processing calls in `scc({e,f,...})`
302    /// for inlining, which other SCCs are now potentially ready for
303    /// processing?".
304    pub fn evaporation<G>(&self, graph: G) -> Evaporation
305    where
306        G: Graph<Node>,
307    {
308        log::trace!("Creating the evaporation of {self:#?}");
309
310        let node_to_component: SecondaryMap<Node, PackedOption<Scc>> = self
311            .iter()
312            .flat_map(|(c, nodes)| {
313                nodes
314                    .iter()
315                    .copied()
316                    .map(move |node| (node, PackedOption::from(Some(c))))
317            })
318            .collect();
319
320        // Create a set of all reverse dependencies. This set contains an entry
321        // `(to, from)` for all `from --> to` edges in the SCC's condensation.
322        //
323        // Note that we use a `BTreeSet` so that the results are ordered, all
324        // edges `a --> *` are contiguous for each component `a`, and we can
325        // therefore pack them densely in the resulting `Evaporation`.
326        let mut reverse_edge_set = BTreeSet::<(Scc, Scc)>::new();
327        for (from_component, nodes) in self.iter() {
328            for node in nodes {
329                for succ in graph.successors(*node) {
330                    let to_component = node_to_component[succ].unwrap();
331                    if to_component == from_component {
332                        continue;
333                    }
334                    reverse_edge_set.insert((to_component, from_component));
335                }
336            }
337        }
338
339        // Convert the `reverse_edge_set` into our densely-packed
340        // representation.
341        let mut reverse_edges =
342            SecondaryMap::<Scc, Range<u32>>::with_capacity(self.components.len());
343        let mut reverse_edge_elems = Vec::with_capacity(reverse_edge_set.len());
344        for (to_node, from_node) in reverse_edge_set {
345            let range = extend_with_range(&mut reverse_edge_elems, Some(from_node));
346
347            if reverse_edges[to_node] == Range::default() {
348                reverse_edges[to_node] = range;
349            } else {
350                debug_assert_eq!(reverse_edges[to_node].end, range.start);
351                reverse_edges[to_node].end = range.end;
352            }
353        }
354
355        let result = Evaporation {
356            reverse_edges,
357            reverse_edge_elems,
358        };
359        log::trace!("  -> {result:#?}");
360        result
361    }
362}
363
364/// The "evaporation" of a graph and its strongly-connected components.
365///
366/// Created by `StronglyConnectedComponents::evaporation`; see that method's
367/// documentation for more details.
368pub struct Evaporation {
369    reverse_edges: SecondaryMap<Scc, Range<u32>>,
370    reverse_edge_elems: Vec<Scc>,
371}
372
373impl Debug for Evaporation {
374    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
375        struct Map<'a>(&'a Evaporation);
376
377        impl<'a> Debug for Map<'a> {
378            fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
379                f.debug_map()
380                    .entries(
381                        self.0
382                            .reverse_edges
383                            .keys()
384                            .map(|k| (k, self.0.reverse_edges(k))),
385                    )
386                    .finish()
387            }
388        }
389
390        f.debug_struct("Evaporation")
391            .field("reverse_edges", &Map(self))
392            .finish()
393    }
394}
395
396impl Evaporation {
397    /// Get the reverse dependencies of the given strongly-connected component.
398    ///
399    /// The result is all of the SCCs whose members have edges in the original
400    /// graph to one or more of this SCC's members.
401    pub fn reverse_edges(&self, component: Scc) -> &[Scc] {
402        let Range { start, end } = self.reverse_edges[component].clone();
403        let start = usize::try_from(start).unwrap();
404        let end = usize::try_from(end).unwrap();
405        &self.reverse_edge_elems[start..end]
406    }
407}
408
409#[cfg(test)]
410mod tests {
411    use super::*;
412
413    #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
414    pub struct Node(u32);
415    crate::entity_impl!(Node);
416
417    #[derive(Debug)]
418    struct Graph {
419        edges: SecondaryMap<Node, Vec<Node>>,
420    }
421
422    impl Default for Graph {
423        fn default() -> Self {
424            let _ = env_logger::try_init();
425            Self {
426                edges: Default::default(),
427            }
428        }
429    }
430
431    impl Graph {
432        fn define(&mut self, node: u32, edges: impl IntoIterator<Item = u32>) -> &mut Self {
433            assert!(self.edges[Node::from_u32(node)].is_empty());
434            self.edges[Node::from_u32(node)].extend(edges.into_iter().map(|v| Node::from_u32(v)));
435            self
436        }
437    }
438
439    impl super::Graph<Node> for Graph {
440        type NodesIter<'a>
441            = cranelift_entity::Keys<Node>
442        where
443            Self: 'a;
444
445        fn nodes(&self) -> Self::NodesIter<'_> {
446            self.edges.keys()
447        }
448
449        type SuccessorsIter<'a>
450            = core::iter::Copied<core::slice::Iter<'a, Node>>
451        where
452            Self: 'a;
453
454        fn successors(&self, node: Node) -> Self::SuccessorsIter<'_> {
455            self.edges[node].iter().copied()
456        }
457    }
458
459    fn components(graph: &Graph) -> Vec<Vec<u32>> {
460        let components = StronglyConnectedComponents::new(graph);
461        components
462            .values()
463            .map(|vs| vs.iter().map(|v| v.as_u32()).collect::<Vec<_>>())
464            .collect()
465    }
466
467    #[test]
468    fn test_empty() {
469        let graph = Graph::default();
470        assert!(components(&graph).is_empty());
471    }
472
473    #[test]
474    fn test_single_node() {
475        // +---+
476        // | 0 |
477        // +---+
478        let mut graph = Graph::default();
479        graph.define(0, []);
480
481        assert_eq!(components(&graph), vec![vec![0]]);
482    }
483
484    #[test]
485    fn test_single_node_cycle() {
486        //   ,---.
487        //   |   |
488        //   V   |
489        // +---+ |
490        // | 0 |-'
491        // +---+
492        let mut graph = Graph::default();
493        graph.define(0, [0]);
494
495        assert_eq!(components(&graph), vec![vec![0]]);
496    }
497
498    #[test]
499    fn test_disconnected_nodes() {
500        // +---+     +---+
501        // | 0 |     | 1 |
502        // +---+     +---+
503        let mut graph = Graph::default();
504        graph.define(0, []);
505        graph.define(1, []);
506        assert_eq!(components(&graph), vec![vec![1], vec![0]]);
507    }
508
509    #[test]
510    fn test_chained_nodes() {
511        // +---+   +---+   +---+   +---+
512        // | 0 |<--| 1 |<--| 2 |<--| 3 |
513        // +---+   +---+   +---+   +---+
514        let mut graph = Graph::default();
515        graph.define(0, []);
516        graph.define(1, [0]);
517        graph.define(2, [1]);
518        graph.define(3, [2]);
519        assert_eq!(components(&graph), vec![vec![0], vec![1], vec![2], vec![3]]);
520    }
521
522    #[test]
523    fn test_simple_multi_node_cycle() {
524        //   ,-----------------------.
525        //   |                       |
526        //   |                       V
527        // +---+   +---+   +---+   +---+
528        // | 0 |<--| 1 |<--| 2 |<--| 3 |
529        // +---+   +---+   +---+   +---+
530        let mut graph = Graph::default();
531        graph.define(0, [3]);
532        graph.define(1, [0]);
533        graph.define(2, [1]);
534        graph.define(3, [2]);
535        assert_eq!(components(&graph), vec![vec![0, 1, 2, 3]]);
536    }
537
538    #[test]
539    fn test_complicated_multi_node_cycle() {
540        //   ,---------------.
541        //   |               |
542        //   |               V
543        // +---+   +---+   +---+   +---+   +---+
544        // | 0 |<--| 1 |<--| 2 |<--| 3 |<--| 4 |
545        // +---+   +---+   +---+   +---+   +---+
546        //                   |               ^
547        //                   |               |
548        //                   `---------------'
549        let mut graph = Graph::default();
550        graph.define(0, [3]);
551        graph.define(1, [0]);
552        graph.define(2, [1, 4]);
553        graph.define(3, [2]);
554        graph.define(4, [3]);
555        assert_eq!(components(&graph), vec![vec![0, 1, 2, 3, 4]]);
556    }
557
558    #[test]
559    fn test_disconnected_cycles() {
560        // +---+           +---+
561        // | 0 |           | 1 |
562        // +---+           +---+
563        //   ^               ^
564        //   |               |
565        //   V               V
566        // +---+           +---+
567        // | 2 |           | 3 |
568        // +---+           +---+
569        let mut graph = Graph::default();
570        graph.define(0, [2]);
571        graph.define(1, [3]);
572        graph.define(2, [0]);
573        graph.define(3, [1]);
574        assert_eq!(components(&graph), vec![vec![1, 3], vec![0, 2]]);
575    }
576
577    #[test]
578    fn test_chain_of_cycles() {
579        //   ,-----.
580        //   |     |
581        //   V     |
582        // +---+   |
583        // | 0 |---'
584        // +---+
585        //   |
586        //   V
587        // +---+    +---+
588        // | 1 |<-->| 2 |
589        // +---+    +---+
590        //  |
591        //  | ,----------------.
592        //  | |                |
593        //  V |                V
594        // +---+    +---+    +---+
595        // | 3 |<---| 4 |<---| 5 |
596        // +---+    +---+    +---+
597        let mut graph = Graph::default();
598        graph.define(0, [0, 1]);
599        graph.define(1, [2, 3]);
600        graph.define(2, [1]);
601        graph.define(3, [5]);
602        graph.define(4, [3]);
603        graph.define(5, [4]);
604        assert_eq!(components(&graph), vec![vec![3, 4, 5], vec![1, 2], vec![0]]);
605    }
606
607    #[test]
608    fn test_multiple_edges_to_same_component() {
609        // +---+           +---+
610        // | 0 |           | 1 |
611        // +---+           +---+
612        //   ^               ^
613        //   |               |
614        //   V               V
615        // +---+           +---+
616        // | 2 |           | 3 |
617        // +---+           +---+
618        //   |               |
619        //   `------. ,------'
620        //          | |
621        //          V V
622        //         +---+
623        //         | 4 |
624        //         +---+
625        //           ^
626        //           |
627        //           V
628        //         +---+
629        //         | 5 |
630        //         +---+
631        let mut graph = Graph::default();
632        graph.define(0, [2]);
633        graph.define(1, [3]);
634        graph.define(2, [0, 4]);
635        graph.define(3, [1, 4]);
636        graph.define(4, [5]);
637        graph.define(5, [4]);
638        assert_eq!(components(&graph), vec![vec![4, 5], vec![1, 3], vec![0, 2]]);
639    }
640
641    #[test]
642    fn test_duplicate_edges() {
643        // +---+           +---+
644        // | 0 |           | 1 |
645        // +---+           +---+
646        //   ^               ^
647        //   |               |
648        //   V               V
649        // +---+           +---+
650        // | 2 |---------->| 3 |
651        // +---+           +---+
652        //   |               ^
653        //   `---------------'
654        let mut graph = Graph::default();
655        graph.define(0, [2]);
656        graph.define(1, [3]);
657        graph.define(2, [0, 3, 3]);
658        graph.define(3, [1]);
659        assert_eq!(components(&graph), vec![vec![1, 3], vec![0, 2]]);
660    }
661}