Add in the basics for Import ordering (#5524)
* Add in the basics for Import ordering This will walk AST for import notes, and build up a mapping of relationships tying in the target of the import (the "dependency") to our current program (the "dependent"). This allows us to build up a mapping of what dependency and reverse dependency relationships look like, in order to sort dependencies by topologically sorting them. Since I wanted to enable some parallelism here long-term, this returns the (kinda awkward) `Vec<Vec<String>>` type. This is a list of list of dependency targets which can safely be run concurrently, because they have no dependency relationship to eachother. Each list of dependencies should be gated, and start the next list of dependencies after the first is complely done. This isn't ideal and will change long-term, but for now this will work to enable very basic opportunistic parallelism. * clippy v1.0 * allow this this lint doesn't apply here, order doesn't matter. Elsewhere in this program where order does matter we do the intermediate vec. * A snapshot a day keeps the bugs away! 📷🐛 (OS: namespace-profile-ubuntu-8-cores) --------- Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com> Co-authored-by: Jess Frazelle <jessfraz@users.noreply.github.com>
This commit is contained in:
		
							
								
								
									
										225
									
								
								src/wasm-lib/kcl/src/walk/import_graph.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										225
									
								
								src/wasm-lib/kcl/src/walk/import_graph.rs
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,225 @@
 | 
			
		||||
use anyhow::Result;
 | 
			
		||||
 | 
			
		||||
use crate::{
 | 
			
		||||
    parsing::ast::types::{ImportPath, NodeRef, Program},
 | 
			
		||||
    walk::{Node, Visitable},
 | 
			
		||||
};
 | 
			
		||||
use std::{
 | 
			
		||||
    collections::HashMap,
 | 
			
		||||
    sync::{Arc, Mutex},
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
/// Specific dependency between two modules. The 0th element of this tuple
 | 
			
		||||
/// is the "importing" module, the 1st is the "imported" module. The 0th
 | 
			
		||||
/// module *depends on* the 1st module.
 | 
			
		||||
type Dependency = (String, String);
 | 
			
		||||
 | 
			
		||||
type Graph = Vec<Dependency>;
 | 
			
		||||
 | 
			
		||||
/// Process a number of programs, returning the graph of dependencies.
 | 
			
		||||
///
 | 
			
		||||
/// This will (currently) return a list of lists of IDs that can be safely
 | 
			
		||||
/// run concurrently. Each "stage" is blocking in this model, which will
 | 
			
		||||
/// change in the future. Don't use this function widely, yet.
 | 
			
		||||
#[allow(clippy::iter_over_hash_type)]
 | 
			
		||||
pub fn import_graph(progs: HashMap<String, NodeRef<'_, Program>>) -> Result<Vec<Vec<String>>> {
 | 
			
		||||
    let mut graph = Graph::new();
 | 
			
		||||
 | 
			
		||||
    for (name, program) in progs.iter() {
 | 
			
		||||
        graph.extend(
 | 
			
		||||
            import_dependencies(program)?
 | 
			
		||||
                .into_iter()
 | 
			
		||||
                .map(|dependency| (name.clone(), dependency))
 | 
			
		||||
                .collect::<Vec<_>>(),
 | 
			
		||||
        );
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    let all_modules: Vec<&str> = progs.keys().map(|v| v.as_str()).collect();
 | 
			
		||||
    topsort(&all_modules, graph)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#[allow(clippy::iter_over_hash_type)]
 | 
			
		||||
fn topsort(all_modules: &[&str], graph: Graph) -> Result<Vec<Vec<String>>> {
 | 
			
		||||
    let mut dep_map = HashMap::<String, Vec<String>>::new();
 | 
			
		||||
 | 
			
		||||
    for (dependent, dependency) in graph.iter() {
 | 
			
		||||
        let mut dependencies = dep_map.remove(dependent).unwrap_or_default();
 | 
			
		||||
        dependencies.push(dependency.to_owned());
 | 
			
		||||
        dep_map.insert(dependent.to_owned(), dependencies);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // dep_map now contains reverse dependencies. For each module, it's a
 | 
			
		||||
    // list of what things are "waiting on it". A non-empty value for a key
 | 
			
		||||
    // means it's currently blocked.
 | 
			
		||||
 | 
			
		||||
    let mut waiting_modules = all_modules.to_owned();
 | 
			
		||||
    let mut order = vec![];
 | 
			
		||||
 | 
			
		||||
    loop {
 | 
			
		||||
        // Each pass through we need to find any modules which have nothing
 | 
			
		||||
        // "pointing at it" -- so-called reverse dependencies. This is an entry
 | 
			
		||||
        // that is either not in the dep_map OR an empty list.
 | 
			
		||||
 | 
			
		||||
        let mut stage_modules: Vec<String> = vec![];
 | 
			
		||||
 | 
			
		||||
        for module in &waiting_modules {
 | 
			
		||||
            let module = module.to_string();
 | 
			
		||||
            if dep_map.get(&module).map(|v| v.len()).unwrap_or(0) == 0 {
 | 
			
		||||
                // if it's None or empty, this is a node that we can process,
 | 
			
		||||
                // and remove from the graph.
 | 
			
		||||
                stage_modules.push(module.to_string());
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        for stage_module in &stage_modules {
 | 
			
		||||
            // remove the ready-to-run module from the waiting list
 | 
			
		||||
            waiting_modules.retain(|v| *v != stage_module.as_str());
 | 
			
		||||
 | 
			
		||||
            // remove any dependencies for the next run
 | 
			
		||||
            for (_, waiting_for) in dep_map.iter_mut() {
 | 
			
		||||
                waiting_for.retain(|v| v != stage_module);
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        if stage_modules.is_empty() {
 | 
			
		||||
            anyhow::bail!("imports are acyclic");
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        // not strictly needed here, but perhaps helpful to avoid thinking
 | 
			
		||||
        // there's any implied ordering as well as helping to make tests
 | 
			
		||||
        // easier.
 | 
			
		||||
        stage_modules.sort();
 | 
			
		||||
 | 
			
		||||
        order.push(stage_modules);
 | 
			
		||||
 | 
			
		||||
        if waiting_modules.is_empty() {
 | 
			
		||||
            break;
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    Ok(order)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
pub(crate) fn import_dependencies(prog: NodeRef<'_, Program>) -> Result<Vec<String>> {
 | 
			
		||||
    let ret = Arc::new(Mutex::new(vec![]));
 | 
			
		||||
 | 
			
		||||
    fn walk(ret: Arc<Mutex<Vec<String>>>, node: Node<'_>) {
 | 
			
		||||
        if let Node::ImportStatement(is) = node {
 | 
			
		||||
            let dependency = match &is.path {
 | 
			
		||||
                ImportPath::Kcl { filename } => filename.to_string(),
 | 
			
		||||
                ImportPath::Foreign { path } => path.to_string(),
 | 
			
		||||
                ImportPath::Std { path } => path.join("::"),
 | 
			
		||||
            };
 | 
			
		||||
 | 
			
		||||
            ret.lock().unwrap().push(dependency);
 | 
			
		||||
        }
 | 
			
		||||
        for child in node.children().iter() {
 | 
			
		||||
            walk(ret.clone(), *child)
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    walk(ret.clone(), prog.into());
 | 
			
		||||
 | 
			
		||||
    let ret = ret.lock().unwrap().clone();
 | 
			
		||||
    Ok(ret)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#[cfg(test)]
 | 
			
		||||
mod tests {
 | 
			
		||||
    use super::*;
 | 
			
		||||
 | 
			
		||||
    macro_rules! kcl {
 | 
			
		||||
        ( $kcl:expr ) => {{
 | 
			
		||||
            $crate::parsing::top_level_parse($kcl).unwrap()
 | 
			
		||||
        }};
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    #[test]
 | 
			
		||||
    fn order_imports() {
 | 
			
		||||
        let mut modules = HashMap::new();
 | 
			
		||||
 | 
			
		||||
        let a = kcl!("");
 | 
			
		||||
        modules.insert("a.kcl".to_owned(), &a);
 | 
			
		||||
 | 
			
		||||
        let b = kcl!(
 | 
			
		||||
            "
 | 
			
		||||
import \"a.kcl\"
 | 
			
		||||
"
 | 
			
		||||
        );
 | 
			
		||||
        modules.insert("b.kcl".to_owned(), &b);
 | 
			
		||||
 | 
			
		||||
        let order = import_graph(modules).unwrap();
 | 
			
		||||
        assert_eq!(vec![vec!["a.kcl".to_owned()], vec!["b.kcl".to_owned()]], order);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    #[test]
 | 
			
		||||
    fn order_imports_none() {
 | 
			
		||||
        let mut modules = HashMap::new();
 | 
			
		||||
 | 
			
		||||
        let a = kcl!(
 | 
			
		||||
            "
 | 
			
		||||
y = 2
 | 
			
		||||
"
 | 
			
		||||
        );
 | 
			
		||||
        modules.insert("a.kcl".to_owned(), &a);
 | 
			
		||||
 | 
			
		||||
        let b = kcl!(
 | 
			
		||||
            "
 | 
			
		||||
x = 1
 | 
			
		||||
"
 | 
			
		||||
        );
 | 
			
		||||
        modules.insert("b.kcl".to_owned(), &b);
 | 
			
		||||
 | 
			
		||||
        let order = import_graph(modules).unwrap();
 | 
			
		||||
        assert_eq!(vec![vec!["a.kcl".to_owned(), "b.kcl".to_owned()]], order);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    #[test]
 | 
			
		||||
    fn order_imports_2() {
 | 
			
		||||
        let mut modules = HashMap::new();
 | 
			
		||||
 | 
			
		||||
        let a = kcl!("");
 | 
			
		||||
        modules.insert("a.kcl".to_owned(), &a);
 | 
			
		||||
 | 
			
		||||
        let b = kcl!(
 | 
			
		||||
            "
 | 
			
		||||
import \"a.kcl\"
 | 
			
		||||
"
 | 
			
		||||
        );
 | 
			
		||||
        modules.insert("b.kcl".to_owned(), &b);
 | 
			
		||||
 | 
			
		||||
        let c = kcl!(
 | 
			
		||||
            "
 | 
			
		||||
import \"a.kcl\"
 | 
			
		||||
"
 | 
			
		||||
        );
 | 
			
		||||
        modules.insert("c.kcl".to_owned(), &c);
 | 
			
		||||
 | 
			
		||||
        let order = import_graph(modules).unwrap();
 | 
			
		||||
        assert_eq!(
 | 
			
		||||
            vec![vec!["a.kcl".to_owned()], vec!["b.kcl".to_owned(), "c.kcl".to_owned()]],
 | 
			
		||||
            order
 | 
			
		||||
        );
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    #[test]
 | 
			
		||||
    fn order_imports_cycle() {
 | 
			
		||||
        let mut modules = HashMap::new();
 | 
			
		||||
 | 
			
		||||
        let a = kcl!(
 | 
			
		||||
            "
 | 
			
		||||
import \"b.kcl\"
 | 
			
		||||
"
 | 
			
		||||
        );
 | 
			
		||||
        modules.insert("a.kcl".to_owned(), &a);
 | 
			
		||||
 | 
			
		||||
        let b = kcl!(
 | 
			
		||||
            "
 | 
			
		||||
import \"a.kcl\"
 | 
			
		||||
"
 | 
			
		||||
        );
 | 
			
		||||
        modules.insert("b.kcl".to_owned(), &b);
 | 
			
		||||
 | 
			
		||||
        import_graph(modules).unwrap_err();
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
@ -1,7 +1,9 @@
 | 
			
		||||
mod ast_node;
 | 
			
		||||
mod ast_visitor;
 | 
			
		||||
mod ast_walk;
 | 
			
		||||
mod import_graph;
 | 
			
		||||
 | 
			
		||||
pub use ast_node::Node;
 | 
			
		||||
pub use ast_visitor::{Visitable, Visitor};
 | 
			
		||||
pub use ast_walk::walk;
 | 
			
		||||
pub use import_graph::import_graph;
 | 
			
		||||
 | 
			
		||||
		Reference in New Issue
	
	Block a user