diff --git a/src/assets.rs b/src/assets.rs index 8f794483..3f844bc4 100644 --- a/src/assets.rs +++ b/src/assets.rs @@ -1,7 +1,6 @@ -use std::collections::HashMap; use std::ffi::OsStr; use std::fs; -use std::path::{Path, PathBuf}; +use std::path::Path; use lazycell::LazyCell; @@ -15,17 +14,26 @@ use crate::error::*; use crate::input::{InputReader, OpenedInput, OpenedInputKind}; use crate::syntax_mapping::{MappingTarget, SyntaxMapping}; +use ignored_suffixes::*; +use minimal_assets::*; +use serialized_syntax_set::*; + +#[cfg(feature = "build-assets")] +pub use crate::assets::build_assets::*; + +pub(crate) mod assets_metadata; +#[cfg(feature = "build-assets")] +mod build_assets; +mod ignored_suffixes; +mod minimal_assets; +mod serialized_syntax_set; + #[derive(Debug)] pub struct HighlightingAssets { syntax_set_cell: LazyCell, serialized_syntax_set: SerializedSyntaxSet, - minimal_syntaxes: MinimalSyntaxes, - - /// Lazily load serialized [SyntaxSet]s from [Self.minimal_syntaxes]. The - /// index in this vec matches the index in - /// [Self.minimal_syntaxes.serialized_syntax_sets] - deserialized_minimal_syntaxes: Vec>, + minimal_assets: MinimalAssets, theme_set: ThemeSet, fallback_theme: Option<&'static str>, @@ -37,22 +45,6 @@ pub struct SyntaxReferenceInSet<'a> { pub syntax_set: &'a SyntaxSet, } -/// Stores and allows lookup of minimal [SyntaxSet]s. The [SyntaxSet]s are -/// stored in serialized form, and are deserialized on-demand. This gives good -/// startup performance since only the necessary [SyntaxReference]s needs to be -/// deserialized. -#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)] -pub(crate) struct MinimalSyntaxes { - /// Lookup the index into `serialized_syntax_sets` of a [SyntaxSet] by the - /// name of any [SyntaxReference] inside the [SyntaxSet] - /// (We will later add `by_extension`, `by_first_line`, etc.) - pub(crate) by_name: HashMap, - - /// Serialized [SyntaxSet]s. Whether or not this data is compressed is - /// decided by [COMPRESS_SERIALIZED_MINIMAL_SYNTAXES] - pub(crate) serialized_syntax_sets: Vec>, -} - // Compress for size of ~700 kB instead of ~4600 kB at the cost of ~30% longer deserialization time pub(crate) const COMPRESS_SYNTAXES: bool = true; @@ -70,41 +62,16 @@ pub(crate) const COMPRESS_SERIALIZED_MINIMAL_SYNTAXES: bool = true; // efficient byte-by-byte copy of `serialized_syntax_sets`. pub(crate) const COMPRESS_MINIMAL_SYNTAXES: bool = false; -const IGNORED_SUFFIXES: [&str; 13] = [ - // Editor etc backups - "~", - ".bak", - ".old", - ".orig", - // Debian and derivatives apt/dpkg/ucf backups - ".dpkg-dist", - ".dpkg-old", - ".ucf-dist", - ".ucf-new", - ".ucf-old", - // Red Hat and derivatives rpm backups - ".rpmnew", - ".rpmorig", - ".rpmsave", - // Build system input/template files - ".in", -]; - impl HighlightingAssets { fn new( serialized_syntax_set: SerializedSyntaxSet, minimal_syntaxes: MinimalSyntaxes, theme_set: ThemeSet, ) -> Self { - // Prepare so we can lazily load minimal syntaxes without a mut reference - let deserialized_minimal_syntaxes = - vec![LazyCell::new(); minimal_syntaxes.serialized_syntax_sets.len()]; - HighlightingAssets { syntax_set_cell: LazyCell::new(), serialized_syntax_set, - deserialized_minimal_syntaxes, - minimal_syntaxes, + minimal_assets: MinimalAssets::new(minimal_syntaxes), theme_set, fallback_theme: None, } @@ -167,37 +134,12 @@ impl HighlightingAssets { /// tries to find a minimal [SyntaxSet]. If none is found, returns the /// [SyntaxSet] that contains all syntaxes. fn get_syntax_set_by_name(&self, name: &str) -> Result<&SyntaxSet> { - let minimal_syntax_set = self - .minimal_syntaxes - .by_name - .get(&name.to_ascii_lowercase()) - .and_then(|index| self.get_minimal_syntax_set_with_index(*index)); - - match minimal_syntax_set { + match self.minimal_assets.get_syntax_set_by_name(name) { Some(syntax_set) => Ok(syntax_set), None => self.get_syntax_set(), } } - fn load_minimal_syntax_set_with_index(&self, index: usize) -> Result { - let serialized_syntax_set = &self.minimal_syntaxes.serialized_syntax_sets[index]; - asset_from_contents( - &serialized_syntax_set[..], - &format!("minimal syntax set {}", index), - COMPRESS_SERIALIZED_MINIMAL_SYNTAXES, - ) - .map_err(|_| format!("Could not parse minimal syntax set {}", index).into()) - } - - fn get_minimal_syntax_set_with_index(&self, index: usize) -> Option<&SyntaxSet> { - self.deserialized_minimal_syntaxes - .get(index) - .and_then(|cell| { - cell.try_borrow_with(|| self.load_minimal_syntax_set_with_index(index)) - .ok() - }) - } - /// Use [Self::get_syntax_for_file_name] instead #[deprecated] pub fn syntax_for_file_name( @@ -319,7 +261,9 @@ impl HighlightingAssets { syntax = self.find_syntax_by_file_name_extension(file_name)?; } if syntax.is_none() { - syntax = self.get_extension_syntax_with_stripped_suffix(file_name)?; + syntax = try_with_stripped_suffix(file_name, |stripped_file_name| { + self.get_extension_syntax(stripped_file_name) // Note: recursion + })?; } Ok(syntax) } @@ -340,25 +284,6 @@ impl HighlightingAssets { ) } - /// If we find an ignored suffix on the file name, e.g. '~', we strip it and - /// then try again to find a syntax without it. Note that we do this recursively. - fn get_extension_syntax_with_stripped_suffix( - &self, - file_name: &OsStr, - ) -> Result> { - let file_path = Path::new(file_name); - let mut syntax = None; - if let Some(file_str) = file_path.to_str() { - for suffix in &IGNORED_SUFFIXES { - if let Some(stripped_filename) = file_str.strip_suffix(suffix) { - syntax = self.get_extension_syntax(OsStr::new(stripped_filename))?; - break; - } - } - } - Ok(syntax) - } - fn get_first_line_syntax( &self, reader: &mut InputReader, @@ -371,31 +296,6 @@ impl HighlightingAssets { } } -#[cfg(feature = "build-assets")] -pub use crate::build_assets::build_assets as build; - -/// A SyntaxSet in serialized form, i.e. bincoded and flate2 compressed. -/// We keep it in this format since we want to load it lazily. -#[derive(Debug)] -enum SerializedSyntaxSet { - /// The data comes from a user-generated cache file. - FromFile(PathBuf), - - /// The data to use is embedded into the bat binary. - FromBinary(&'static [u8]), -} - -impl SerializedSyntaxSet { - fn deserialize(&self) -> Result { - match self { - SerializedSyntaxSet::FromBinary(data) => Ok(from_binary(data, COMPRESS_SYNTAXES)), - SerializedSyntaxSet::FromFile(ref path) => { - asset_from_cache(path, "syntax set", COMPRESS_SYNTAXES) - } - } - } -} - pub(crate) fn get_serialized_integrated_syntaxset() -> &'static [u8] { include_bytes!("../assets/syntaxes.bin") } diff --git a/src/assets_metadata.rs b/src/assets/assets_metadata.rs similarity index 100% rename from src/assets_metadata.rs rename to src/assets/assets_metadata.rs diff --git a/src/build_assets.rs b/src/assets/build_assets.rs similarity index 99% rename from src/build_assets.rs rename to src/assets/build_assets.rs index 75e6d5bc..e88890c5 100644 --- a/src/build_assets.rs +++ b/src/assets/build_assets.rs @@ -7,7 +7,6 @@ use syntect::parsing::syntax_definition::{ use syntect::parsing::{Scope, SyntaxSet, SyntaxSetBuilder}; use crate::assets::*; -use crate::error::*; type SyntaxName = String; @@ -27,7 +26,7 @@ enum Dependency { ByScope(Scope), } -pub fn build_assets( +pub fn build( source_dir: &Path, include_integrated_assets: bool, target_dir: &Path, diff --git a/src/assets/ignored_suffixes.rs b/src/assets/ignored_suffixes.rs new file mode 100644 index 00000000..f653e3a0 --- /dev/null +++ b/src/assets/ignored_suffixes.rs @@ -0,0 +1,42 @@ +use std::ffi::OsStr; +use std::path::Path; + +use crate::error::*; + +const IGNORED_SUFFIXES: [&str; 13] = [ + // Editor etc backups + "~", + ".bak", + ".old", + ".orig", + // Debian and derivatives apt/dpkg/ucf backups + ".dpkg-dist", + ".dpkg-old", + ".ucf-dist", + ".ucf-new", + ".ucf-old", + // Red Hat and derivatives rpm backups + ".rpmnew", + ".rpmorig", + ".rpmsave", + // Build system input/template files + ".in", +]; + +/// If we find an ignored suffix on the file name, e.g. '~', we strip it and +/// then try again without it. +pub fn try_with_stripped_suffix(file_name: &OsStr, func: F) -> Result> +where + F: Fn(&OsStr) -> Result>, +{ + let mut from_stripped = None; + if let Some(file_str) = Path::new(file_name).to_str() { + for suffix in &IGNORED_SUFFIXES { + if let Some(stripped_filename) = file_str.strip_suffix(suffix) { + from_stripped = func(OsStr::new(stripped_filename))?; + break; + } + } + } + Ok(from_stripped) +} diff --git a/src/assets/minimal_assets.rs b/src/assets/minimal_assets.rs new file mode 100644 index 00000000..6bd33d08 --- /dev/null +++ b/src/assets/minimal_assets.rs @@ -0,0 +1,72 @@ +use std::collections::HashMap; + +use lazycell::LazyCell; + +use syntect::parsing::SyntaxSet; + +use super::*; + +#[derive(Debug)] +pub(crate) struct MinimalAssets { + minimal_syntaxes: MinimalSyntaxes, + + /// Lazily load serialized [SyntaxSet]s from [Self.minimal_syntaxes]. The + /// index in this vec matches the index in + /// [Self.minimal_syntaxes.serialized_syntax_sets] + deserialized_minimal_syntaxes: Vec>, +} + +/// Stores and allows lookup of minimal [SyntaxSet]s. The [SyntaxSet]s are +/// stored in serialized form, and are deserialized on-demand. This gives good +/// startup performance since only the necessary [SyntaxReference]s needs to be +/// deserialized. +#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)] +pub(crate) struct MinimalSyntaxes { + /// Lookup the index into `serialized_syntax_sets` of a [SyntaxSet] by the + /// name of any [SyntaxReference] inside the [SyntaxSet] + /// (We will later add `by_extension`, `by_first_line`, etc.) + pub(crate) by_name: HashMap, + + /// Serialized [SyntaxSet]s. Whether or not this data is compressed is + /// decided by [COMPRESS_SERIALIZED_MINIMAL_SYNTAXES] + pub(crate) serialized_syntax_sets: Vec>, +} + +impl MinimalAssets { + pub(crate) fn new(minimal_syntaxes: MinimalSyntaxes) -> Self { + // Prepare so we can lazily load minimal syntaxes without a mut reference + let deserialized_minimal_syntaxes = + vec![LazyCell::new(); minimal_syntaxes.serialized_syntax_sets.len()]; + + Self { + minimal_syntaxes, + deserialized_minimal_syntaxes, + } + } + + pub fn get_syntax_set_by_name(&self, name: &str) -> Option<&SyntaxSet> { + self.minimal_syntaxes + .by_name + .get(&name.to_ascii_lowercase()) + .and_then(|index| self.get_minimal_syntax_set_with_index(*index)) + } + + fn load_minimal_syntax_set_with_index(&self, index: usize) -> Result { + let serialized_syntax_set = &self.minimal_syntaxes.serialized_syntax_sets[index]; + asset_from_contents( + &serialized_syntax_set[..], + &format!("minimal syntax set {}", index), + COMPRESS_SERIALIZED_MINIMAL_SYNTAXES, + ) + .map_err(|_| format!("Could not parse minimal syntax set {}", index).into()) + } + + fn get_minimal_syntax_set_with_index(&self, index: usize) -> Option<&SyntaxSet> { + self.deserialized_minimal_syntaxes + .get(index) + .and_then(|cell| { + cell.try_borrow_with(|| self.load_minimal_syntax_set_with_index(index)) + .ok() + }) + } +} diff --git a/src/assets/serialized_syntax_set.rs b/src/assets/serialized_syntax_set.rs new file mode 100644 index 00000000..46099e32 --- /dev/null +++ b/src/assets/serialized_syntax_set.rs @@ -0,0 +1,27 @@ +use std::path::PathBuf; + +use syntect::parsing::SyntaxSet; + +use super::*; + +/// A SyntaxSet in serialized form, i.e. bincoded and flate2 compressed. +/// We keep it in this format since we want to load it lazily. +#[derive(Debug)] +pub enum SerializedSyntaxSet { + /// The data comes from a user-generated cache file. + FromFile(PathBuf), + + /// The data to use is embedded into the bat binary. + FromBinary(&'static [u8]), +} + +impl SerializedSyntaxSet { + pub fn deserialize(&self) -> Result { + match self { + SerializedSyntaxSet::FromBinary(data) => Ok(from_binary(data, COMPRESS_SYNTAXES)), + SerializedSyntaxSet::FromFile(ref path) => { + asset_from_cache(path, "syntax set", COMPRESS_SYNTAXES) + } + } + } +} diff --git a/src/lib.rs b/src/lib.rs index 950d0967..86bb5804 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -22,9 +22,9 @@ mod macros; pub mod assets; -pub mod assets_metadata; -#[cfg(feature = "build-assets")] -mod build_assets; +pub mod assets_metadata { + pub use super::assets::assets_metadata::*; +} pub mod config; pub mod controller; mod decorations;