mirror of
				https://github.com/sharkdp/bat.git
				synced 2025-11-04 00:51:56 +00:00 
			
		
		
		
	Remove the code related to minimal_syntaxes.bin
To get fast startup, syntect will instead start to lazy-load syntaxes. See https://github.com/trishume/syntect/pull/393 and discussions in linked PRs.
This commit is contained in:
		@@ -16,7 +16,6 @@
 | 
			
		||||
 | 
			
		||||
- Load cached assets as fast as integrated assets, see #1753 (@Enselic)
 | 
			
		||||
- Greatly reduce startup time in loop-through mode, e.g. when redirecting output. Instead of *50 ms* - *100 ms*, startup takes *5 ms* - *10 ms*. See #1747 (@Enselic)
 | 
			
		||||
- Reduce startup time by approximately 80% for 91 out of 168 syntaxes when using `--language`. See #1787 (@Enselic)
 | 
			
		||||
 | 
			
		||||
## Other
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										
											BIN
										
									
								
								assets/minimal_syntaxes.bin
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										
											BIN
										
									
								
								assets/minimal_syntaxes.bin
									
									
									
									
										vendored
									
									
								
							
										
											Binary file not shown.
										
									
								
							@@ -15,7 +15,6 @@ use crate::syntax_mapping::ignored_suffixes::IgnoredSuffixes;
 | 
			
		||||
use crate::syntax_mapping::MappingTarget;
 | 
			
		||||
use crate::{bat_warning, SyntaxMapping};
 | 
			
		||||
 | 
			
		||||
use minimal_assets::*;
 | 
			
		||||
use serialized_syntax_set::*;
 | 
			
		||||
 | 
			
		||||
#[cfg(feature = "build-assets")]
 | 
			
		||||
@@ -24,7 +23,6 @@ pub use crate::assets::build_assets::*;
 | 
			
		||||
pub(crate) mod assets_metadata;
 | 
			
		||||
#[cfg(feature = "build-assets")]
 | 
			
		||||
mod build_assets;
 | 
			
		||||
mod minimal_assets;
 | 
			
		||||
mod serialized_syntax_set;
 | 
			
		||||
 | 
			
		||||
#[derive(Debug)]
 | 
			
		||||
@@ -32,8 +30,6 @@ pub struct HighlightingAssets {
 | 
			
		||||
    syntax_set_cell: LazyCell<SyntaxSet>,
 | 
			
		||||
    serialized_syntax_set: SerializedSyntaxSet,
 | 
			
		||||
 | 
			
		||||
    minimal_assets: MinimalAssets,
 | 
			
		||||
 | 
			
		||||
    theme_set: ThemeSet,
 | 
			
		||||
    fallback_theme: Option<&'static str>,
 | 
			
		||||
}
 | 
			
		||||
@@ -50,27 +46,11 @@ pub(crate) const COMPRESS_SYNTAXES: bool = true;
 | 
			
		||||
/// Compress for size of ~20 kB instead of ~200 kB at the cost of ~30% longer deserialization time
 | 
			
		||||
pub(crate) const COMPRESS_THEMES: bool = true;
 | 
			
		||||
 | 
			
		||||
/// Compress for size of ~400 kB instead of ~2100 kB at the cost of ~30% longer deserialization time
 | 
			
		||||
pub(crate) const COMPRESS_SERIALIZED_MINIMAL_SYNTAXES: bool = true;
 | 
			
		||||
 | 
			
		||||
/// Whether or not to compress the serialized form of [MinimalSyntaxes]. Shall
 | 
			
		||||
/// always be `false`, because the data in
 | 
			
		||||
/// [MinimalSyntaxes.serialized_syntax_sets] has already been compressed
 | 
			
		||||
/// (assuming [COMPRESS_SERIALIZED_MINIMAL_SYNTAXES] is `true`). The "outer" data
 | 
			
		||||
/// structures like `by_name` are tiny. If we compress, deserialization can't do
 | 
			
		||||
/// efficient byte-by-byte copy of `serialized_syntax_sets`.
 | 
			
		||||
pub(crate) const COMPRESS_MINIMAL_SYNTAXES: bool = false;
 | 
			
		||||
 | 
			
		||||
impl HighlightingAssets {
 | 
			
		||||
    fn new(
 | 
			
		||||
        serialized_syntax_set: SerializedSyntaxSet,
 | 
			
		||||
        minimal_syntaxes: MinimalSyntaxes,
 | 
			
		||||
        theme_set: ThemeSet,
 | 
			
		||||
    ) -> Self {
 | 
			
		||||
    fn new(serialized_syntax_set: SerializedSyntaxSet, theme_set: ThemeSet) -> Self {
 | 
			
		||||
        HighlightingAssets {
 | 
			
		||||
            syntax_set_cell: LazyCell::new(),
 | 
			
		||||
            serialized_syntax_set,
 | 
			
		||||
            minimal_assets: MinimalAssets::new(minimal_syntaxes),
 | 
			
		||||
            theme_set,
 | 
			
		||||
            fallback_theme: None,
 | 
			
		||||
        }
 | 
			
		||||
@@ -83,11 +63,6 @@ impl HighlightingAssets {
 | 
			
		||||
    pub fn from_cache(cache_path: &Path) -> Result<Self> {
 | 
			
		||||
        Ok(HighlightingAssets::new(
 | 
			
		||||
            SerializedSyntaxSet::FromFile(cache_path.join("syntaxes.bin")),
 | 
			
		||||
            asset_from_cache(
 | 
			
		||||
                &cache_path.join("minimal_syntaxes.bin"),
 | 
			
		||||
                "minimal syntax sets",
 | 
			
		||||
                COMPRESS_MINIMAL_SYNTAXES,
 | 
			
		||||
            )?,
 | 
			
		||||
            asset_from_cache(&cache_path.join("themes.bin"), "theme set", COMPRESS_THEMES)?,
 | 
			
		||||
        ))
 | 
			
		||||
    }
 | 
			
		||||
@@ -95,7 +70,6 @@ impl HighlightingAssets {
 | 
			
		||||
    pub fn from_binary() -> Self {
 | 
			
		||||
        HighlightingAssets::new(
 | 
			
		||||
            SerializedSyntaxSet::FromBinary(get_serialized_integrated_syntaxset()),
 | 
			
		||||
            get_integrated_minimal_syntaxes(),
 | 
			
		||||
            get_integrated_themeset(),
 | 
			
		||||
        )
 | 
			
		||||
    }
 | 
			
		||||
@@ -129,16 +103,6 @@ impl HighlightingAssets {
 | 
			
		||||
        self.get_theme_set().themes.keys().map(|s| s.as_ref())
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// Finds a [SyntaxSet] that contains a [SyntaxReference] by its name. First
 | 
			
		||||
    /// tries to find a minimal [SyntaxSet]. If none is found, returns the
 | 
			
		||||
    /// [SyntaxSet] that contains all syntaxes.
 | 
			
		||||
    fn get_syntax_set_by_name(&self, name: &str) -> Result<&SyntaxSet> {
 | 
			
		||||
        match self.minimal_assets.get_syntax_set_by_name(name) {
 | 
			
		||||
            Some(syntax_set) => Ok(syntax_set),
 | 
			
		||||
            None => self.get_syntax_set(),
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// Use [Self::get_syntax_for_path] instead
 | 
			
		||||
    #[deprecated]
 | 
			
		||||
    pub fn syntax_for_file_name(
 | 
			
		||||
@@ -234,7 +198,7 @@ impl HighlightingAssets {
 | 
			
		||||
        mapping: &SyntaxMapping,
 | 
			
		||||
    ) -> Result<SyntaxReferenceInSet> {
 | 
			
		||||
        if let Some(language) = language {
 | 
			
		||||
            let syntax_set = self.get_syntax_set_by_name(language)?;
 | 
			
		||||
            let syntax_set = self.get_syntax_set()?;
 | 
			
		||||
            return syntax_set
 | 
			
		||||
                .find_syntax_by_token(language)
 | 
			
		||||
                .map(|syntax| SyntaxReferenceInSet { syntax, syntax_set })
 | 
			
		||||
@@ -331,13 +295,6 @@ pub(crate) fn get_integrated_themeset() -> ThemeSet {
 | 
			
		||||
    from_binary(include_bytes!("../assets/themes.bin"), COMPRESS_THEMES)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
fn get_integrated_minimal_syntaxes() -> MinimalSyntaxes {
 | 
			
		||||
    from_binary(
 | 
			
		||||
        include_bytes!("../assets/minimal_syntaxes.bin"),
 | 
			
		||||
        COMPRESS_MINIMAL_SYNTAXES,
 | 
			
		||||
    )
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
pub(crate) fn from_binary<T: serde::de::DeserializeOwned>(v: &[u8], compressed: bool) -> T {
 | 
			
		||||
    asset_from_contents(v, "n/a", compressed)
 | 
			
		||||
        .expect("data integrated in binary is never faulty, but make sure `compressed` is in sync!")
 | 
			
		||||
 
 | 
			
		||||
@@ -1,36 +1,9 @@
 | 
			
		||||
use std::collections::HashMap;
 | 
			
		||||
use std::path::Path;
 | 
			
		||||
use syntect::highlighting::ThemeSet;
 | 
			
		||||
use syntect::parsing::syntax_definition::{
 | 
			
		||||
    ContextReference, MatchOperation, MatchPattern, Pattern, SyntaxDefinition,
 | 
			
		||||
};
 | 
			
		||||
use syntect::parsing::{Scope, SyntaxSet, SyntaxSetBuilder};
 | 
			
		||||
use syntect::parsing::{SyntaxSet, SyntaxSetBuilder};
 | 
			
		||||
 | 
			
		||||
use crate::assets::*;
 | 
			
		||||
 | 
			
		||||
mod graphviz_utils;
 | 
			
		||||
 | 
			
		||||
type SyntaxName = String;
 | 
			
		||||
 | 
			
		||||
/// Used to look up which [SyntaxDefinition] corresponds to a given [OtherSyntax]
 | 
			
		||||
type OtherSyntaxLookup<'a> = HashMap<OtherSyntax, &'a SyntaxDefinition>;
 | 
			
		||||
 | 
			
		||||
/// Used to look up what dependencies a given [SyntaxDefinition] has
 | 
			
		||||
type SyntaxToDependencies = HashMap<SyntaxName, Vec<OtherSyntax>>;
 | 
			
		||||
 | 
			
		||||
/// Used to look up what other [SyntaxDefinition]s depend on a given [SyntaxDefinition]
 | 
			
		||||
type SyntaxToDependents<'a> = HashMap<SyntaxName, Vec<OtherSyntax>>;
 | 
			
		||||
 | 
			
		||||
/// Represents some other `*.sublime-syntax` file, i.e. another [SyntaxDefinition].
 | 
			
		||||
#[derive(Debug, Eq, PartialEq, PartialOrd, Ord, Clone, Hash)]
 | 
			
		||||
pub(crate) enum OtherSyntax {
 | 
			
		||||
    /// By name. Example YAML: `include: C.sublime-syntax` (name is `"C"`)
 | 
			
		||||
    ByName(String),
 | 
			
		||||
 | 
			
		||||
    /// By scope. Example YAML: `embed: scope:source.c` (scope is `"source.c"`)
 | 
			
		||||
    ByScope(Scope),
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
pub fn build(
 | 
			
		||||
    source_dir: &Path,
 | 
			
		||||
    include_integrated_assets: bool,
 | 
			
		||||
@@ -41,19 +14,11 @@ pub fn build(
 | 
			
		||||
 | 
			
		||||
    let syntax_set_builder = build_syntax_set_builder(source_dir, include_integrated_assets)?;
 | 
			
		||||
 | 
			
		||||
    let minimal_syntaxes = build_minimal_syntaxes(&syntax_set_builder, include_integrated_assets)?;
 | 
			
		||||
 | 
			
		||||
    let syntax_set = syntax_set_builder.build();
 | 
			
		||||
 | 
			
		||||
    print_unlinked_contexts(&syntax_set);
 | 
			
		||||
 | 
			
		||||
    write_assets(
 | 
			
		||||
        &theme_set,
 | 
			
		||||
        &syntax_set,
 | 
			
		||||
        &minimal_syntaxes,
 | 
			
		||||
        target_dir,
 | 
			
		||||
        current_version,
 | 
			
		||||
    )
 | 
			
		||||
    write_assets(&theme_set, &syntax_set, target_dir, current_version)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
fn build_theme_set(source_dir: &Path, include_integrated_assets: bool) -> ThemeSet {
 | 
			
		||||
@@ -122,7 +87,6 @@ fn print_unlinked_contexts(syntax_set: &SyntaxSet) {
 | 
			
		||||
fn write_assets(
 | 
			
		||||
    theme_set: &ThemeSet,
 | 
			
		||||
    syntax_set: &SyntaxSet,
 | 
			
		||||
    minimal_syntaxes: &MinimalSyntaxes,
 | 
			
		||||
    target_dir: &Path,
 | 
			
		||||
    current_version: &str,
 | 
			
		||||
) -> Result<()> {
 | 
			
		||||
@@ -139,12 +103,6 @@ fn write_assets(
 | 
			
		||||
        "syntax set",
 | 
			
		||||
        COMPRESS_SYNTAXES,
 | 
			
		||||
    )?;
 | 
			
		||||
    asset_to_cache(
 | 
			
		||||
        minimal_syntaxes,
 | 
			
		||||
        &target_dir.join("minimal_syntaxes.bin"),
 | 
			
		||||
        "minimal syntax sets",
 | 
			
		||||
        COMPRESS_MINIMAL_SYNTAXES,
 | 
			
		||||
    )?;
 | 
			
		||||
 | 
			
		||||
    print!(
 | 
			
		||||
        "Writing metadata to folder {} ... ",
 | 
			
		||||
@@ -156,299 +114,6 @@ fn write_assets(
 | 
			
		||||
    Ok(())
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
fn print_syntax_set_names(syntax_set: &SyntaxSet) {
 | 
			
		||||
    let names = syntax_set
 | 
			
		||||
        .syntaxes()
 | 
			
		||||
        .iter()
 | 
			
		||||
        .map(|syntax| &syntax.name)
 | 
			
		||||
        .collect::<Vec<_>>();
 | 
			
		||||
    println!("{:?}", names);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
fn build_minimal_syntaxes(
 | 
			
		||||
    syntax_set_builder: &'_ SyntaxSetBuilder,
 | 
			
		||||
    include_integrated_assets: bool,
 | 
			
		||||
) -> Result<MinimalSyntaxes> {
 | 
			
		||||
    let mut minimal_syntaxes = MinimalSyntaxes {
 | 
			
		||||
        by_name: HashMap::new(),
 | 
			
		||||
        serialized_syntax_sets: vec![],
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    if include_integrated_assets {
 | 
			
		||||
        // Dependency info is not present in integrated assets, so we can't
 | 
			
		||||
        // calculate minimal syntax sets. Return early without any data filled
 | 
			
		||||
        // in. This means that no minimal syntax sets will be available to use, and
 | 
			
		||||
        // the full, slow-to-deserialize, fallback syntax set will be used instead.
 | 
			
		||||
        return Ok(minimal_syntaxes);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    let minimal_syntax_sets_to_serialize = build_minimal_syntax_sets(syntax_set_builder)
 | 
			
		||||
        // For now, only store syntax sets with one syntax, otherwise
 | 
			
		||||
        // the binary grows by several megs
 | 
			
		||||
        .filter(|syntax_set| syntax_set.syntaxes().len() == 1);
 | 
			
		||||
 | 
			
		||||
    for minimal_syntax_set in minimal_syntax_sets_to_serialize {
 | 
			
		||||
        // Remember what index it is found at
 | 
			
		||||
        let current_index = minimal_syntaxes.serialized_syntax_sets.len();
 | 
			
		||||
 | 
			
		||||
        for syntax in minimal_syntax_set.syntaxes() {
 | 
			
		||||
            minimal_syntaxes
 | 
			
		||||
                .by_name
 | 
			
		||||
                .insert(syntax.name.to_ascii_lowercase().clone(), current_index);
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        let serialized_syntax_set = asset_to_contents(
 | 
			
		||||
            &minimal_syntax_set,
 | 
			
		||||
            &format!("failed to serialize minimal syntax set {}", current_index),
 | 
			
		||||
            COMPRESS_SERIALIZED_MINIMAL_SYNTAXES,
 | 
			
		||||
        )?;
 | 
			
		||||
 | 
			
		||||
        // Add last so that it ends up at `current_index`
 | 
			
		||||
        minimal_syntaxes
 | 
			
		||||
            .serialized_syntax_sets
 | 
			
		||||
            .push(serialized_syntax_set);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    Ok(minimal_syntaxes)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/// Analyzes dependencies between syntaxes in a [SyntaxSetBuilder].
 | 
			
		||||
/// From that, it builds minimal [SyntaxSet]s.
 | 
			
		||||
fn build_minimal_syntax_sets(
 | 
			
		||||
    syntax_set_builder: &'_ SyntaxSetBuilder,
 | 
			
		||||
) -> impl Iterator<Item = SyntaxSet> + '_ {
 | 
			
		||||
    let syntaxes = syntax_set_builder.syntaxes();
 | 
			
		||||
 | 
			
		||||
    // Build the data structures we need for dependency resolution
 | 
			
		||||
    let (other_syntax_lookup, syntax_to_dependencies, syntax_to_dependents) =
 | 
			
		||||
        generate_maps(syntaxes);
 | 
			
		||||
 | 
			
		||||
    maybe_write_syntax_dependencies_to_graphviz_dot_file(
 | 
			
		||||
        &other_syntax_lookup,
 | 
			
		||||
        &syntax_to_dependencies,
 | 
			
		||||
    );
 | 
			
		||||
 | 
			
		||||
    // Create one minimal SyntaxSet from each (non-hidden) SyntaxDefinition
 | 
			
		||||
    syntaxes.iter().filter_map(move |syntax| {
 | 
			
		||||
        if syntax.hidden {
 | 
			
		||||
            return None;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        let mut builder = SyntaxSetDependencyBuilder::new();
 | 
			
		||||
        builder.add_with_dependencies(
 | 
			
		||||
            syntax,
 | 
			
		||||
            &other_syntax_lookup,
 | 
			
		||||
            &syntax_to_dependencies,
 | 
			
		||||
            &syntax_to_dependents,
 | 
			
		||||
        );
 | 
			
		||||
        let syntax_set = builder.build();
 | 
			
		||||
 | 
			
		||||
        if std::env::var("BAT_PRINT_SYNTAX_DEPENDENCIES").is_ok() {
 | 
			
		||||
            // To trigger this code, run:
 | 
			
		||||
            // BAT_PRINT_SYNTAX_DEPENDENCIES=1 cargo run -- cache --build --source assets --blank --target /tmp
 | 
			
		||||
            print_syntax_set_names(&syntax_set);
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        Some(syntax_set)
 | 
			
		||||
    })
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/// In order to analyze dependencies, we need three key pieces of data.
 | 
			
		||||
///
 | 
			
		||||
///  * When we have a [OtherSyntax], we need to know what [SyntaxDefinition]
 | 
			
		||||
///    that corresponds to
 | 
			
		||||
///  * When we have a [SyntaxDefinition], we need to know what dependencies it
 | 
			
		||||
///    has
 | 
			
		||||
///  * When we have a [SyntaxDefinition], we need to know what other syntaxes
 | 
			
		||||
///    depend on it
 | 
			
		||||
///
 | 
			
		||||
/// This functions generates that data for each syntax.
 | 
			
		||||
fn generate_maps(
 | 
			
		||||
    syntaxes: &[SyntaxDefinition],
 | 
			
		||||
) -> (OtherSyntaxLookup, SyntaxToDependencies, SyntaxToDependents) {
 | 
			
		||||
    let mut other_syntax_lookup = HashMap::new();
 | 
			
		||||
    let mut syntax_to_dependencies = HashMap::new();
 | 
			
		||||
    let mut syntax_to_dependents = HashMap::new();
 | 
			
		||||
 | 
			
		||||
    for syntax in syntaxes {
 | 
			
		||||
        other_syntax_lookup.insert(OtherSyntax::ByName(syntax.name.clone()), syntax);
 | 
			
		||||
        other_syntax_lookup.insert(OtherSyntax::ByScope(syntax.scope), syntax);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    for syntax in syntaxes {
 | 
			
		||||
        let dependencies = dependencies_for_syntax(syntax);
 | 
			
		||||
 | 
			
		||||
        for dependency in &dependencies {
 | 
			
		||||
            if let Some(dependency) = other_syntax_lookup.get(dependency) {
 | 
			
		||||
                syntax_to_dependents
 | 
			
		||||
                    .entry(dependency.name.clone())
 | 
			
		||||
                    .or_insert_with(Vec::new)
 | 
			
		||||
                    .push(OtherSyntax::ByName(syntax.name.clone()));
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        syntax_to_dependencies.insert(syntax.name.clone(), dependencies);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    (
 | 
			
		||||
        other_syntax_lookup,
 | 
			
		||||
        syntax_to_dependencies,
 | 
			
		||||
        syntax_to_dependents,
 | 
			
		||||
    )
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/// Gets what external dependencies a given [SyntaxDefinition] has.
 | 
			
		||||
/// An external dependency is another `.sublime-syntax` file.
 | 
			
		||||
/// It does that by looking for variants of the following YAML patterns:
 | 
			
		||||
/// - `include: C.sublime-syntax`
 | 
			
		||||
/// - `embed: scope:source.c`
 | 
			
		||||
fn dependencies_for_syntax(syntax: &SyntaxDefinition) -> Vec<OtherSyntax> {
 | 
			
		||||
    let mut dependencies: Vec<OtherSyntax> = syntax
 | 
			
		||||
        .contexts
 | 
			
		||||
        .values()
 | 
			
		||||
        .flat_map(|context| &context.patterns)
 | 
			
		||||
        .flat_map(dependencies_from_pattern)
 | 
			
		||||
        .collect();
 | 
			
		||||
 | 
			
		||||
    // No need to track a dependency more than once
 | 
			
		||||
    dependencies.sort();
 | 
			
		||||
    dependencies.dedup();
 | 
			
		||||
 | 
			
		||||
    dependencies
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
fn dependencies_from_pattern(pattern: &Pattern) -> Vec<OtherSyntax> {
 | 
			
		||||
    match *pattern {
 | 
			
		||||
        Pattern::Match(MatchPattern {
 | 
			
		||||
            operation: MatchOperation::Push(ref context_references),
 | 
			
		||||
            ..
 | 
			
		||||
        }) => context_references
 | 
			
		||||
            .iter()
 | 
			
		||||
            .map(dependency_from_context_reference)
 | 
			
		||||
            .collect(),
 | 
			
		||||
        Pattern::Include(ref context_reference) => {
 | 
			
		||||
            vec![dependency_from_context_reference(context_reference)]
 | 
			
		||||
        }
 | 
			
		||||
        _ => vec![],
 | 
			
		||||
    }
 | 
			
		||||
    .into_iter()
 | 
			
		||||
    .flatten()
 | 
			
		||||
    .collect()
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/// To generate a Graphviz dot file of syntax dependencies, do this:
 | 
			
		||||
/// ```bash
 | 
			
		||||
/// sudo apt install graphviz
 | 
			
		||||
/// BAT_SYNTAX_DEPENDENCIES_TO_GRAPHVIZ_DOT_FILE=/tmp/bat-syntax-dependencies.dot cargo run -- cache  --build --source assets --blank --target /tmp
 | 
			
		||||
/// dot /tmp/bat-syntax-dependencies.dot -Tpng -o /tmp/bat-syntax-dependencies.png
 | 
			
		||||
/// open /tmp/bat-syntax-dependencies.png
 | 
			
		||||
/// ```
 | 
			
		||||
fn maybe_write_syntax_dependencies_to_graphviz_dot_file(
 | 
			
		||||
    other_syntax_lookup: &OtherSyntaxLookup,
 | 
			
		||||
    syntax_to_dependencies: &SyntaxToDependencies,
 | 
			
		||||
) {
 | 
			
		||||
    if let Ok(dot_file_path) = std::env::var("BAT_SYNTAX_DEPENDENCIES_TO_GRAPHVIZ_DOT_FILE") {
 | 
			
		||||
        graphviz_utils::try_syntax_dependencies_to_graphviz_dot_file(
 | 
			
		||||
            other_syntax_lookup,
 | 
			
		||||
            syntax_to_dependencies,
 | 
			
		||||
            &dot_file_path,
 | 
			
		||||
        );
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/// Removes any context name from the syntax reference.
 | 
			
		||||
///
 | 
			
		||||
/// When we track dependencies between syntaxes, we are not interested in
 | 
			
		||||
/// dependencies on specific contexts inside other syntaxes. We only care about
 | 
			
		||||
/// the dependency on the syntax itself.
 | 
			
		||||
///
 | 
			
		||||
/// For example, if a syntax includes another syntax like this:
 | 
			
		||||
/// ```yaml
 | 
			
		||||
///   - include: scope:source.c++#unique-variables
 | 
			
		||||
/// ```
 | 
			
		||||
/// we only want to track the dependency on `source.c++`.
 | 
			
		||||
fn remove_explicit_context(scope: Scope) -> Scope {
 | 
			
		||||
    if let Some(without_context) = scope.build_string().split('#').next() {
 | 
			
		||||
        Scope::new(without_context).expect("removing context reference must never fail")
 | 
			
		||||
    } else {
 | 
			
		||||
        scope
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
fn dependency_from_context_reference(context_reference: &ContextReference) -> Option<OtherSyntax> {
 | 
			
		||||
    match &context_reference {
 | 
			
		||||
        ContextReference::File { ref name, .. } => Some(OtherSyntax::ByName(name.clone())),
 | 
			
		||||
        ContextReference::ByScope { ref scope, .. } => {
 | 
			
		||||
            Some(OtherSyntax::ByScope(remove_explicit_context(*scope)))
 | 
			
		||||
        }
 | 
			
		||||
        _ => None,
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/// Helper to construct a [SyntaxSetBuilder] that contains only [SyntaxDefinition]s
 | 
			
		||||
/// that have dependencies among them.
 | 
			
		||||
struct SyntaxSetDependencyBuilder {
 | 
			
		||||
    syntax_set_builder: SyntaxSetBuilder,
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
impl SyntaxSetDependencyBuilder {
 | 
			
		||||
    fn new() -> Self {
 | 
			
		||||
        SyntaxSetDependencyBuilder {
 | 
			
		||||
            syntax_set_builder: SyntaxSetBuilder::new(),
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// Add a [SyntaxDefinition] to the underlying [SyntaxSetBuilder].
 | 
			
		||||
    /// Also resolve any dependencies it has and add those [SyntaxDefinition]s too.
 | 
			
		||||
    /// This is a recursive process.
 | 
			
		||||
    fn add_with_dependencies(
 | 
			
		||||
        &mut self,
 | 
			
		||||
        syntax: &SyntaxDefinition,
 | 
			
		||||
        other_syntax_lookup: &OtherSyntaxLookup,
 | 
			
		||||
        syntax_to_dependencies: &SyntaxToDependencies,
 | 
			
		||||
        syntax_to_dependents: &SyntaxToDependents,
 | 
			
		||||
    ) {
 | 
			
		||||
        let name = &syntax.name;
 | 
			
		||||
        if self.is_syntax_already_added(name) {
 | 
			
		||||
            return;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        self.syntax_set_builder.add(syntax.clone());
 | 
			
		||||
 | 
			
		||||
        let mut syntaxes_to_add = vec![];
 | 
			
		||||
        if let Some(dependencies) = syntax_to_dependencies.get(name) {
 | 
			
		||||
            syntaxes_to_add.extend(dependencies);
 | 
			
		||||
        }
 | 
			
		||||
        if let Some(dependents) = syntax_to_dependents.get(name) {
 | 
			
		||||
            // This will later be enabled intelligently
 | 
			
		||||
            if std::env::var("BAT_INCLUDE_SYNTAX_DEPENDENTS").is_ok() {
 | 
			
		||||
                syntaxes_to_add.extend(dependents);
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
        for syntax_to_add in syntaxes_to_add {
 | 
			
		||||
            if let Some(syntax_to_add) = other_syntax_lookup.get(syntax_to_add) {
 | 
			
		||||
                self.add_with_dependencies(
 | 
			
		||||
                    syntax_to_add,
 | 
			
		||||
                    other_syntax_lookup,
 | 
			
		||||
                    syntax_to_dependencies,
 | 
			
		||||
                    syntax_to_dependents,
 | 
			
		||||
                )
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    fn is_syntax_already_added(&self, name: &str) -> bool {
 | 
			
		||||
        self.syntax_set_builder
 | 
			
		||||
            .syntaxes()
 | 
			
		||||
            .iter()
 | 
			
		||||
            .any(|syntax| syntax.name == name)
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    fn build(self) -> SyntaxSet {
 | 
			
		||||
        self.syntax_set_builder.build()
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
fn asset_to_contents<T: serde::Serialize>(
 | 
			
		||||
    asset: &T,
 | 
			
		||||
    description: &str,
 | 
			
		||||
@@ -485,16 +150,3 @@ fn asset_to_cache<T: serde::Serialize>(
 | 
			
		||||
    println!("okay");
 | 
			
		||||
    Ok(())
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#[cfg(test)]
 | 
			
		||||
mod tests {
 | 
			
		||||
    use super::*;
 | 
			
		||||
 | 
			
		||||
    #[test]
 | 
			
		||||
    fn remove_explicit_context_sanity() {
 | 
			
		||||
        // Example from Objective-C++.sublime-syntax
 | 
			
		||||
        let scope = Scope::new("source.c++#unique-variables").unwrap();
 | 
			
		||||
        let expected = Scope::new("source.c++").unwrap();
 | 
			
		||||
        assert_eq!(remove_explicit_context(scope), expected);
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
@@ -1,41 +0,0 @@
 | 
			
		||||
use super::*;
 | 
			
		||||
 | 
			
		||||
pub(crate) fn try_syntax_dependencies_to_graphviz_dot_file(
 | 
			
		||||
    other_syntax_lookup: &OtherSyntaxLookup,
 | 
			
		||||
    syntax_to_dependencies: &SyntaxToDependencies,
 | 
			
		||||
    dot_file_path: &str,
 | 
			
		||||
) {
 | 
			
		||||
    match syntax_dependencies_to_graphviz_dot_file(
 | 
			
		||||
        other_syntax_lookup,
 | 
			
		||||
        syntax_to_dependencies,
 | 
			
		||||
        dot_file_path,
 | 
			
		||||
    ) {
 | 
			
		||||
        Ok(_) => println!("Wrote graphviz dot file to {}", dot_file_path),
 | 
			
		||||
        Err(e) => eprintln!(
 | 
			
		||||
            "Failed to write graphviz dot file to {}: {}",
 | 
			
		||||
            dot_file_path, e
 | 
			
		||||
        ),
 | 
			
		||||
    };
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
fn syntax_dependencies_to_graphviz_dot_file(
 | 
			
		||||
    other_syntax_lookup: &OtherSyntaxLookup,
 | 
			
		||||
    syntax_to_dependencies: &SyntaxToDependencies,
 | 
			
		||||
    dot_file_path: &str,
 | 
			
		||||
) -> Result<()> {
 | 
			
		||||
    use std::io::Write;
 | 
			
		||||
 | 
			
		||||
    let mut dot_file = std::fs::File::create(dot_file_path)?;
 | 
			
		||||
 | 
			
		||||
    writeln!(dot_file, "digraph BatSyntaxDependencies {{")?;
 | 
			
		||||
    for (key, dependencies) in syntax_to_dependencies {
 | 
			
		||||
        for dependency in dependencies {
 | 
			
		||||
            if let Some(dep) = other_syntax_lookup.get(dependency) {
 | 
			
		||||
                writeln!(dot_file, "    \"{}\" -> \"{}\"", key, dep.name)?;
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
    writeln!(dot_file, "}}")?;
 | 
			
		||||
 | 
			
		||||
    Ok(())
 | 
			
		||||
}
 | 
			
		||||
@@ -1,72 +0,0 @@
 | 
			
		||||
use std::collections::HashMap;
 | 
			
		||||
 | 
			
		||||
use lazycell::LazyCell;
 | 
			
		||||
 | 
			
		||||
use syntect::parsing::SyntaxSet;
 | 
			
		||||
 | 
			
		||||
use super::*;
 | 
			
		||||
 | 
			
		||||
#[derive(Debug)]
 | 
			
		||||
pub(crate) struct MinimalAssets {
 | 
			
		||||
    minimal_syntaxes: MinimalSyntaxes,
 | 
			
		||||
 | 
			
		||||
    /// Lazily load serialized [SyntaxSet]s from [Self.minimal_syntaxes]. The
 | 
			
		||||
    /// index in this vec matches the index in
 | 
			
		||||
    /// [Self.minimal_syntaxes.serialized_syntax_sets]
 | 
			
		||||
    deserialized_minimal_syntaxes: Vec<LazyCell<SyntaxSet>>,
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/// Stores and allows lookup of minimal [SyntaxSet]s. The [SyntaxSet]s are
 | 
			
		||||
/// stored in serialized form, and are deserialized on-demand. This gives good
 | 
			
		||||
/// startup performance since only the necessary [SyntaxReference]s needs to be
 | 
			
		||||
/// deserialized.
 | 
			
		||||
#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]
 | 
			
		||||
pub(crate) struct MinimalSyntaxes {
 | 
			
		||||
    /// Lookup the index into `serialized_syntax_sets` of a [SyntaxSet] by the
 | 
			
		||||
    /// name of any [SyntaxReference] inside the [SyntaxSet]
 | 
			
		||||
    /// (We will later add `by_extension`, `by_first_line`, etc.)
 | 
			
		||||
    pub(crate) by_name: HashMap<String, usize>,
 | 
			
		||||
 | 
			
		||||
    /// Serialized [SyntaxSet]s. Whether or not this data is compressed is
 | 
			
		||||
    /// decided by [COMPRESS_SERIALIZED_MINIMAL_SYNTAXES]
 | 
			
		||||
    pub(crate) serialized_syntax_sets: Vec<Vec<u8>>,
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
impl MinimalAssets {
 | 
			
		||||
    pub(crate) fn new(minimal_syntaxes: MinimalSyntaxes) -> Self {
 | 
			
		||||
        // Prepare so we can lazily load minimal syntaxes without a mut reference
 | 
			
		||||
        let deserialized_minimal_syntaxes =
 | 
			
		||||
            vec![LazyCell::new(); minimal_syntaxes.serialized_syntax_sets.len()];
 | 
			
		||||
 | 
			
		||||
        Self {
 | 
			
		||||
            minimal_syntaxes,
 | 
			
		||||
            deserialized_minimal_syntaxes,
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    pub fn get_syntax_set_by_name(&self, name: &str) -> Option<&SyntaxSet> {
 | 
			
		||||
        self.minimal_syntaxes
 | 
			
		||||
            .by_name
 | 
			
		||||
            .get(&name.to_ascii_lowercase())
 | 
			
		||||
            .and_then(|index| self.get_minimal_syntax_set_with_index(*index))
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    fn load_minimal_syntax_set_with_index(&self, index: usize) -> Result<SyntaxSet> {
 | 
			
		||||
        let serialized_syntax_set = &self.minimal_syntaxes.serialized_syntax_sets[index];
 | 
			
		||||
        asset_from_contents(
 | 
			
		||||
            &serialized_syntax_set[..],
 | 
			
		||||
            &format!("minimal syntax set {}", index),
 | 
			
		||||
            COMPRESS_SERIALIZED_MINIMAL_SYNTAXES,
 | 
			
		||||
        )
 | 
			
		||||
        .map_err(|_| format!("Could not parse minimal syntax set {}", index).into())
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    fn get_minimal_syntax_set_with_index(&self, index: usize) -> Option<&SyntaxSet> {
 | 
			
		||||
        self.deserialized_minimal_syntaxes
 | 
			
		||||
            .get(index)
 | 
			
		||||
            .and_then(|cell| {
 | 
			
		||||
                cell.try_borrow_with(|| self.load_minimal_syntax_set_with_index(index))
 | 
			
		||||
                    .ok()
 | 
			
		||||
            })
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
@@ -21,7 +21,6 @@ pub fn cache_dir() -> Cow<'static, str> {
 | 
			
		||||
pub fn clear_assets() {
 | 
			
		||||
    clear_asset("themes.bin", "theme set cache");
 | 
			
		||||
    clear_asset("syntaxes.bin", "syntax set cache");
 | 
			
		||||
    clear_asset("minimal_syntaxes.bin", "minimal syntax sets cache");
 | 
			
		||||
    clear_asset("metadata.yaml", "metadata file");
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user