diff --git a/CHANGELOG.md b/CHANGELOG.md index b9a0440c..45d5926e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,7 +16,6 @@ - Load cached assets as fast as integrated assets, see #1753 (@Enselic) - Greatly reduce startup time in loop-through mode, e.g. when redirecting output. Instead of *50 ms* - *100 ms*, startup takes *5 ms* - *10 ms*. See #1747 (@Enselic) -- Reduce startup time by approximately 80% for 91 out of 168 syntaxes when using `--language`. See #1787 (@Enselic) ## Other diff --git a/assets/minimal_syntaxes.bin b/assets/minimal_syntaxes.bin deleted file mode 100644 index 96be60c8..00000000 Binary files a/assets/minimal_syntaxes.bin and /dev/null differ diff --git a/src/assets.rs b/src/assets.rs index 8ac682d0..5a285e4c 100644 --- a/src/assets.rs +++ b/src/assets.rs @@ -15,7 +15,6 @@ use crate::syntax_mapping::ignored_suffixes::IgnoredSuffixes; use crate::syntax_mapping::MappingTarget; use crate::{bat_warning, SyntaxMapping}; -use minimal_assets::*; use serialized_syntax_set::*; #[cfg(feature = "build-assets")] @@ -24,7 +23,6 @@ pub use crate::assets::build_assets::*; pub(crate) mod assets_metadata; #[cfg(feature = "build-assets")] mod build_assets; -mod minimal_assets; mod serialized_syntax_set; #[derive(Debug)] @@ -32,8 +30,6 @@ pub struct HighlightingAssets { syntax_set_cell: LazyCell, serialized_syntax_set: SerializedSyntaxSet, - minimal_assets: MinimalAssets, - theme_set: ThemeSet, fallback_theme: Option<&'static str>, } @@ -50,27 +46,11 @@ pub(crate) const COMPRESS_SYNTAXES: bool = true; /// Compress for size of ~20 kB instead of ~200 kB at the cost of ~30% longer deserialization time pub(crate) const COMPRESS_THEMES: bool = true; -/// Compress for size of ~400 kB instead of ~2100 kB at the cost of ~30% longer deserialization time -pub(crate) const COMPRESS_SERIALIZED_MINIMAL_SYNTAXES: bool = true; - -/// Whether or not to compress the serialized form of [MinimalSyntaxes]. Shall -/// always be `false`, because the data in -/// [MinimalSyntaxes.serialized_syntax_sets] has already been compressed -/// (assuming [COMPRESS_SERIALIZED_MINIMAL_SYNTAXES] is `true`). The "outer" data -/// structures like `by_name` are tiny. If we compress, deserialization can't do -/// efficient byte-by-byte copy of `serialized_syntax_sets`. -pub(crate) const COMPRESS_MINIMAL_SYNTAXES: bool = false; - impl HighlightingAssets { - fn new( - serialized_syntax_set: SerializedSyntaxSet, - minimal_syntaxes: MinimalSyntaxes, - theme_set: ThemeSet, - ) -> Self { + fn new(serialized_syntax_set: SerializedSyntaxSet, theme_set: ThemeSet) -> Self { HighlightingAssets { syntax_set_cell: LazyCell::new(), serialized_syntax_set, - minimal_assets: MinimalAssets::new(minimal_syntaxes), theme_set, fallback_theme: None, } @@ -83,11 +63,6 @@ impl HighlightingAssets { pub fn from_cache(cache_path: &Path) -> Result { Ok(HighlightingAssets::new( SerializedSyntaxSet::FromFile(cache_path.join("syntaxes.bin")), - asset_from_cache( - &cache_path.join("minimal_syntaxes.bin"), - "minimal syntax sets", - COMPRESS_MINIMAL_SYNTAXES, - )?, asset_from_cache(&cache_path.join("themes.bin"), "theme set", COMPRESS_THEMES)?, )) } @@ -95,7 +70,6 @@ impl HighlightingAssets { pub fn from_binary() -> Self { HighlightingAssets::new( SerializedSyntaxSet::FromBinary(get_serialized_integrated_syntaxset()), - get_integrated_minimal_syntaxes(), get_integrated_themeset(), ) } @@ -129,16 +103,6 @@ impl HighlightingAssets { self.get_theme_set().themes.keys().map(|s| s.as_ref()) } - /// Finds a [SyntaxSet] that contains a [SyntaxReference] by its name. First - /// tries to find a minimal [SyntaxSet]. If none is found, returns the - /// [SyntaxSet] that contains all syntaxes. - fn get_syntax_set_by_name(&self, name: &str) -> Result<&SyntaxSet> { - match self.minimal_assets.get_syntax_set_by_name(name) { - Some(syntax_set) => Ok(syntax_set), - None => self.get_syntax_set(), - } - } - /// Use [Self::get_syntax_for_path] instead #[deprecated] pub fn syntax_for_file_name( @@ -234,7 +198,7 @@ impl HighlightingAssets { mapping: &SyntaxMapping, ) -> Result { if let Some(language) = language { - let syntax_set = self.get_syntax_set_by_name(language)?; + let syntax_set = self.get_syntax_set()?; return syntax_set .find_syntax_by_token(language) .map(|syntax| SyntaxReferenceInSet { syntax, syntax_set }) @@ -331,13 +295,6 @@ pub(crate) fn get_integrated_themeset() -> ThemeSet { from_binary(include_bytes!("../assets/themes.bin"), COMPRESS_THEMES) } -fn get_integrated_minimal_syntaxes() -> MinimalSyntaxes { - from_binary( - include_bytes!("../assets/minimal_syntaxes.bin"), - COMPRESS_MINIMAL_SYNTAXES, - ) -} - pub(crate) fn from_binary(v: &[u8], compressed: bool) -> T { asset_from_contents(v, "n/a", compressed) .expect("data integrated in binary is never faulty, but make sure `compressed` is in sync!") diff --git a/src/assets/build_assets.rs b/src/assets/build_assets.rs index b09d36dc..34fa150b 100644 --- a/src/assets/build_assets.rs +++ b/src/assets/build_assets.rs @@ -1,36 +1,9 @@ -use std::collections::HashMap; use std::path::Path; use syntect::highlighting::ThemeSet; -use syntect::parsing::syntax_definition::{ - ContextReference, MatchOperation, MatchPattern, Pattern, SyntaxDefinition, -}; -use syntect::parsing::{Scope, SyntaxSet, SyntaxSetBuilder}; +use syntect::parsing::{SyntaxSet, SyntaxSetBuilder}; use crate::assets::*; -mod graphviz_utils; - -type SyntaxName = String; - -/// Used to look up which [SyntaxDefinition] corresponds to a given [OtherSyntax] -type OtherSyntaxLookup<'a> = HashMap; - -/// Used to look up what dependencies a given [SyntaxDefinition] has -type SyntaxToDependencies = HashMap>; - -/// Used to look up what other [SyntaxDefinition]s depend on a given [SyntaxDefinition] -type SyntaxToDependents<'a> = HashMap>; - -/// Represents some other `*.sublime-syntax` file, i.e. another [SyntaxDefinition]. -#[derive(Debug, Eq, PartialEq, PartialOrd, Ord, Clone, Hash)] -pub(crate) enum OtherSyntax { - /// By name. Example YAML: `include: C.sublime-syntax` (name is `"C"`) - ByName(String), - - /// By scope. Example YAML: `embed: scope:source.c` (scope is `"source.c"`) - ByScope(Scope), -} - pub fn build( source_dir: &Path, include_integrated_assets: bool, @@ -41,19 +14,11 @@ pub fn build( let syntax_set_builder = build_syntax_set_builder(source_dir, include_integrated_assets)?; - let minimal_syntaxes = build_minimal_syntaxes(&syntax_set_builder, include_integrated_assets)?; - let syntax_set = syntax_set_builder.build(); print_unlinked_contexts(&syntax_set); - write_assets( - &theme_set, - &syntax_set, - &minimal_syntaxes, - target_dir, - current_version, - ) + write_assets(&theme_set, &syntax_set, target_dir, current_version) } fn build_theme_set(source_dir: &Path, include_integrated_assets: bool) -> ThemeSet { @@ -122,7 +87,6 @@ fn print_unlinked_contexts(syntax_set: &SyntaxSet) { fn write_assets( theme_set: &ThemeSet, syntax_set: &SyntaxSet, - minimal_syntaxes: &MinimalSyntaxes, target_dir: &Path, current_version: &str, ) -> Result<()> { @@ -139,12 +103,6 @@ fn write_assets( "syntax set", COMPRESS_SYNTAXES, )?; - asset_to_cache( - minimal_syntaxes, - &target_dir.join("minimal_syntaxes.bin"), - "minimal syntax sets", - COMPRESS_MINIMAL_SYNTAXES, - )?; print!( "Writing metadata to folder {} ... ", @@ -156,299 +114,6 @@ fn write_assets( Ok(()) } -fn print_syntax_set_names(syntax_set: &SyntaxSet) { - let names = syntax_set - .syntaxes() - .iter() - .map(|syntax| &syntax.name) - .collect::>(); - println!("{:?}", names); -} - -fn build_minimal_syntaxes( - syntax_set_builder: &'_ SyntaxSetBuilder, - include_integrated_assets: bool, -) -> Result { - let mut minimal_syntaxes = MinimalSyntaxes { - by_name: HashMap::new(), - serialized_syntax_sets: vec![], - }; - - if include_integrated_assets { - // Dependency info is not present in integrated assets, so we can't - // calculate minimal syntax sets. Return early without any data filled - // in. This means that no minimal syntax sets will be available to use, and - // the full, slow-to-deserialize, fallback syntax set will be used instead. - return Ok(minimal_syntaxes); - } - - let minimal_syntax_sets_to_serialize = build_minimal_syntax_sets(syntax_set_builder) - // For now, only store syntax sets with one syntax, otherwise - // the binary grows by several megs - .filter(|syntax_set| syntax_set.syntaxes().len() == 1); - - for minimal_syntax_set in minimal_syntax_sets_to_serialize { - // Remember what index it is found at - let current_index = minimal_syntaxes.serialized_syntax_sets.len(); - - for syntax in minimal_syntax_set.syntaxes() { - minimal_syntaxes - .by_name - .insert(syntax.name.to_ascii_lowercase().clone(), current_index); - } - - let serialized_syntax_set = asset_to_contents( - &minimal_syntax_set, - &format!("failed to serialize minimal syntax set {}", current_index), - COMPRESS_SERIALIZED_MINIMAL_SYNTAXES, - )?; - - // Add last so that it ends up at `current_index` - minimal_syntaxes - .serialized_syntax_sets - .push(serialized_syntax_set); - } - - Ok(minimal_syntaxes) -} - -/// Analyzes dependencies between syntaxes in a [SyntaxSetBuilder]. -/// From that, it builds minimal [SyntaxSet]s. -fn build_minimal_syntax_sets( - syntax_set_builder: &'_ SyntaxSetBuilder, -) -> impl Iterator + '_ { - let syntaxes = syntax_set_builder.syntaxes(); - - // Build the data structures we need for dependency resolution - let (other_syntax_lookup, syntax_to_dependencies, syntax_to_dependents) = - generate_maps(syntaxes); - - maybe_write_syntax_dependencies_to_graphviz_dot_file( - &other_syntax_lookup, - &syntax_to_dependencies, - ); - - // Create one minimal SyntaxSet from each (non-hidden) SyntaxDefinition - syntaxes.iter().filter_map(move |syntax| { - if syntax.hidden { - return None; - } - - let mut builder = SyntaxSetDependencyBuilder::new(); - builder.add_with_dependencies( - syntax, - &other_syntax_lookup, - &syntax_to_dependencies, - &syntax_to_dependents, - ); - let syntax_set = builder.build(); - - if std::env::var("BAT_PRINT_SYNTAX_DEPENDENCIES").is_ok() { - // To trigger this code, run: - // BAT_PRINT_SYNTAX_DEPENDENCIES=1 cargo run -- cache --build --source assets --blank --target /tmp - print_syntax_set_names(&syntax_set); - } - - Some(syntax_set) - }) -} - -/// In order to analyze dependencies, we need three key pieces of data. -/// -/// * When we have a [OtherSyntax], we need to know what [SyntaxDefinition] -/// that corresponds to -/// * When we have a [SyntaxDefinition], we need to know what dependencies it -/// has -/// * When we have a [SyntaxDefinition], we need to know what other syntaxes -/// depend on it -/// -/// This functions generates that data for each syntax. -fn generate_maps( - syntaxes: &[SyntaxDefinition], -) -> (OtherSyntaxLookup, SyntaxToDependencies, SyntaxToDependents) { - let mut other_syntax_lookup = HashMap::new(); - let mut syntax_to_dependencies = HashMap::new(); - let mut syntax_to_dependents = HashMap::new(); - - for syntax in syntaxes { - other_syntax_lookup.insert(OtherSyntax::ByName(syntax.name.clone()), syntax); - other_syntax_lookup.insert(OtherSyntax::ByScope(syntax.scope), syntax); - } - - for syntax in syntaxes { - let dependencies = dependencies_for_syntax(syntax); - - for dependency in &dependencies { - if let Some(dependency) = other_syntax_lookup.get(dependency) { - syntax_to_dependents - .entry(dependency.name.clone()) - .or_insert_with(Vec::new) - .push(OtherSyntax::ByName(syntax.name.clone())); - } - } - - syntax_to_dependencies.insert(syntax.name.clone(), dependencies); - } - - ( - other_syntax_lookup, - syntax_to_dependencies, - syntax_to_dependents, - ) -} - -/// Gets what external dependencies a given [SyntaxDefinition] has. -/// An external dependency is another `.sublime-syntax` file. -/// It does that by looking for variants of the following YAML patterns: -/// - `include: C.sublime-syntax` -/// - `embed: scope:source.c` -fn dependencies_for_syntax(syntax: &SyntaxDefinition) -> Vec { - let mut dependencies: Vec = syntax - .contexts - .values() - .flat_map(|context| &context.patterns) - .flat_map(dependencies_from_pattern) - .collect(); - - // No need to track a dependency more than once - dependencies.sort(); - dependencies.dedup(); - - dependencies -} - -fn dependencies_from_pattern(pattern: &Pattern) -> Vec { - match *pattern { - Pattern::Match(MatchPattern { - operation: MatchOperation::Push(ref context_references), - .. - }) => context_references - .iter() - .map(dependency_from_context_reference) - .collect(), - Pattern::Include(ref context_reference) => { - vec![dependency_from_context_reference(context_reference)] - } - _ => vec![], - } - .into_iter() - .flatten() - .collect() -} - -/// To generate a Graphviz dot file of syntax dependencies, do this: -/// ```bash -/// sudo apt install graphviz -/// BAT_SYNTAX_DEPENDENCIES_TO_GRAPHVIZ_DOT_FILE=/tmp/bat-syntax-dependencies.dot cargo run -- cache --build --source assets --blank --target /tmp -/// dot /tmp/bat-syntax-dependencies.dot -Tpng -o /tmp/bat-syntax-dependencies.png -/// open /tmp/bat-syntax-dependencies.png -/// ``` -fn maybe_write_syntax_dependencies_to_graphviz_dot_file( - other_syntax_lookup: &OtherSyntaxLookup, - syntax_to_dependencies: &SyntaxToDependencies, -) { - if let Ok(dot_file_path) = std::env::var("BAT_SYNTAX_DEPENDENCIES_TO_GRAPHVIZ_DOT_FILE") { - graphviz_utils::try_syntax_dependencies_to_graphviz_dot_file( - other_syntax_lookup, - syntax_to_dependencies, - &dot_file_path, - ); - } -} - -/// Removes any context name from the syntax reference. -/// -/// When we track dependencies between syntaxes, we are not interested in -/// dependencies on specific contexts inside other syntaxes. We only care about -/// the dependency on the syntax itself. -/// -/// For example, if a syntax includes another syntax like this: -/// ```yaml -/// - include: scope:source.c++#unique-variables -/// ``` -/// we only want to track the dependency on `source.c++`. -fn remove_explicit_context(scope: Scope) -> Scope { - if let Some(without_context) = scope.build_string().split('#').next() { - Scope::new(without_context).expect("removing context reference must never fail") - } else { - scope - } -} - -fn dependency_from_context_reference(context_reference: &ContextReference) -> Option { - match &context_reference { - ContextReference::File { ref name, .. } => Some(OtherSyntax::ByName(name.clone())), - ContextReference::ByScope { ref scope, .. } => { - Some(OtherSyntax::ByScope(remove_explicit_context(*scope))) - } - _ => None, - } -} - -/// Helper to construct a [SyntaxSetBuilder] that contains only [SyntaxDefinition]s -/// that have dependencies among them. -struct SyntaxSetDependencyBuilder { - syntax_set_builder: SyntaxSetBuilder, -} - -impl SyntaxSetDependencyBuilder { - fn new() -> Self { - SyntaxSetDependencyBuilder { - syntax_set_builder: SyntaxSetBuilder::new(), - } - } - - /// Add a [SyntaxDefinition] to the underlying [SyntaxSetBuilder]. - /// Also resolve any dependencies it has and add those [SyntaxDefinition]s too. - /// This is a recursive process. - fn add_with_dependencies( - &mut self, - syntax: &SyntaxDefinition, - other_syntax_lookup: &OtherSyntaxLookup, - syntax_to_dependencies: &SyntaxToDependencies, - syntax_to_dependents: &SyntaxToDependents, - ) { - let name = &syntax.name; - if self.is_syntax_already_added(name) { - return; - } - - self.syntax_set_builder.add(syntax.clone()); - - let mut syntaxes_to_add = vec![]; - if let Some(dependencies) = syntax_to_dependencies.get(name) { - syntaxes_to_add.extend(dependencies); - } - if let Some(dependents) = syntax_to_dependents.get(name) { - // This will later be enabled intelligently - if std::env::var("BAT_INCLUDE_SYNTAX_DEPENDENTS").is_ok() { - syntaxes_to_add.extend(dependents); - } - } - for syntax_to_add in syntaxes_to_add { - if let Some(syntax_to_add) = other_syntax_lookup.get(syntax_to_add) { - self.add_with_dependencies( - syntax_to_add, - other_syntax_lookup, - syntax_to_dependencies, - syntax_to_dependents, - ) - } - } - } - - fn is_syntax_already_added(&self, name: &str) -> bool { - self.syntax_set_builder - .syntaxes() - .iter() - .any(|syntax| syntax.name == name) - } - - fn build(self) -> SyntaxSet { - self.syntax_set_builder.build() - } -} - fn asset_to_contents( asset: &T, description: &str, @@ -485,16 +150,3 @@ fn asset_to_cache( println!("okay"); Ok(()) } - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn remove_explicit_context_sanity() { - // Example from Objective-C++.sublime-syntax - let scope = Scope::new("source.c++#unique-variables").unwrap(); - let expected = Scope::new("source.c++").unwrap(); - assert_eq!(remove_explicit_context(scope), expected); - } -} diff --git a/src/assets/build_assets/graphviz_utils.rs b/src/assets/build_assets/graphviz_utils.rs deleted file mode 100644 index 5c93b37b..00000000 --- a/src/assets/build_assets/graphviz_utils.rs +++ /dev/null @@ -1,41 +0,0 @@ -use super::*; - -pub(crate) fn try_syntax_dependencies_to_graphviz_dot_file( - other_syntax_lookup: &OtherSyntaxLookup, - syntax_to_dependencies: &SyntaxToDependencies, - dot_file_path: &str, -) { - match syntax_dependencies_to_graphviz_dot_file( - other_syntax_lookup, - syntax_to_dependencies, - dot_file_path, - ) { - Ok(_) => println!("Wrote graphviz dot file to {}", dot_file_path), - Err(e) => eprintln!( - "Failed to write graphviz dot file to {}: {}", - dot_file_path, e - ), - }; -} - -fn syntax_dependencies_to_graphviz_dot_file( - other_syntax_lookup: &OtherSyntaxLookup, - syntax_to_dependencies: &SyntaxToDependencies, - dot_file_path: &str, -) -> Result<()> { - use std::io::Write; - - let mut dot_file = std::fs::File::create(dot_file_path)?; - - writeln!(dot_file, "digraph BatSyntaxDependencies {{")?; - for (key, dependencies) in syntax_to_dependencies { - for dependency in dependencies { - if let Some(dep) = other_syntax_lookup.get(dependency) { - writeln!(dot_file, " \"{}\" -> \"{}\"", key, dep.name)?; - } - } - } - writeln!(dot_file, "}}")?; - - Ok(()) -} diff --git a/src/assets/minimal_assets.rs b/src/assets/minimal_assets.rs deleted file mode 100644 index 6bd33d08..00000000 --- a/src/assets/minimal_assets.rs +++ /dev/null @@ -1,72 +0,0 @@ -use std::collections::HashMap; - -use lazycell::LazyCell; - -use syntect::parsing::SyntaxSet; - -use super::*; - -#[derive(Debug)] -pub(crate) struct MinimalAssets { - minimal_syntaxes: MinimalSyntaxes, - - /// Lazily load serialized [SyntaxSet]s from [Self.minimal_syntaxes]. The - /// index in this vec matches the index in - /// [Self.minimal_syntaxes.serialized_syntax_sets] - deserialized_minimal_syntaxes: Vec>, -} - -/// Stores and allows lookup of minimal [SyntaxSet]s. The [SyntaxSet]s are -/// stored in serialized form, and are deserialized on-demand. This gives good -/// startup performance since only the necessary [SyntaxReference]s needs to be -/// deserialized. -#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)] -pub(crate) struct MinimalSyntaxes { - /// Lookup the index into `serialized_syntax_sets` of a [SyntaxSet] by the - /// name of any [SyntaxReference] inside the [SyntaxSet] - /// (We will later add `by_extension`, `by_first_line`, etc.) - pub(crate) by_name: HashMap, - - /// Serialized [SyntaxSet]s. Whether or not this data is compressed is - /// decided by [COMPRESS_SERIALIZED_MINIMAL_SYNTAXES] - pub(crate) serialized_syntax_sets: Vec>, -} - -impl MinimalAssets { - pub(crate) fn new(minimal_syntaxes: MinimalSyntaxes) -> Self { - // Prepare so we can lazily load minimal syntaxes without a mut reference - let deserialized_minimal_syntaxes = - vec![LazyCell::new(); minimal_syntaxes.serialized_syntax_sets.len()]; - - Self { - minimal_syntaxes, - deserialized_minimal_syntaxes, - } - } - - pub fn get_syntax_set_by_name(&self, name: &str) -> Option<&SyntaxSet> { - self.minimal_syntaxes - .by_name - .get(&name.to_ascii_lowercase()) - .and_then(|index| self.get_minimal_syntax_set_with_index(*index)) - } - - fn load_minimal_syntax_set_with_index(&self, index: usize) -> Result { - let serialized_syntax_set = &self.minimal_syntaxes.serialized_syntax_sets[index]; - asset_from_contents( - &serialized_syntax_set[..], - &format!("minimal syntax set {}", index), - COMPRESS_SERIALIZED_MINIMAL_SYNTAXES, - ) - .map_err(|_| format!("Could not parse minimal syntax set {}", index).into()) - } - - fn get_minimal_syntax_set_with_index(&self, index: usize) -> Option<&SyntaxSet> { - self.deserialized_minimal_syntaxes - .get(index) - .and_then(|cell| { - cell.try_borrow_with(|| self.load_minimal_syntax_set_with_index(index)) - .ok() - }) - } -} diff --git a/src/bin/bat/assets.rs b/src/bin/bat/assets.rs index 26d599b2..951a574b 100644 --- a/src/bin/bat/assets.rs +++ b/src/bin/bat/assets.rs @@ -21,7 +21,6 @@ pub fn cache_dir() -> Cow<'static, str> { pub fn clear_assets() { clear_asset("themes.bin", "theme set cache"); clear_asset("syntaxes.bin", "syntax set cache"); - clear_asset("minimal_syntaxes.bin", "minimal syntax sets cache"); clear_asset("metadata.yaml", "metadata file"); }