mirror of
				https://github.com/sharkdp/bat.git
				synced 2025-10-20 18:53:53 +01:00 
			
		
		
		
	Isolate variables at compile time
This commit is contained in:
		
							
								
								
									
										1
									
								
								Cargo.lock
									
									
									
										generated
									
									
									
								
							
							
						
						
									
										1
									
								
								Cargo.lock
									
									
									
										generated
									
									
									
								
							| @@ -145,6 +145,7 @@ dependencies = [ | ||||
|  "grep-cli", | ||||
|  "home", | ||||
|  "indexmap 2.1.0", | ||||
|  "itertools", | ||||
|  "nix", | ||||
|  "nu-ansi-term", | ||||
|  "once_cell", | ||||
|   | ||||
| @@ -101,6 +101,7 @@ nix = { version = "0.26.4", default-features = false, features = ["term"] } | ||||
| [build-dependencies] | ||||
| anyhow = "1.0.75" | ||||
| indexmap = { version = "2.1.0", features = ["serde"] } | ||||
| itertools = "0.11.0" | ||||
| serde = { version = "1.0", features = ["derive"] } | ||||
| serde_with = "3.4.0" | ||||
| toml = { version = "0.8.6", features = ["preserve_order"] } | ||||
|   | ||||
| @@ -1,7 +1,8 @@ | ||||
| use std::{convert::Infallible, env, fs, path::Path, str::FromStr}; | ||||
|  | ||||
| use anyhow::anyhow; | ||||
| use anyhow::{anyhow, bail}; | ||||
| use indexmap::IndexMap; | ||||
| use itertools::Itertools; | ||||
| use serde::Deserialize; | ||||
| use serde_with::DeserializeFromStr; | ||||
| use walkdir::WalkDir; | ||||
| @@ -17,7 +18,6 @@ pub enum MappingTarget { | ||||
| } | ||||
| impl FromStr for MappingTarget { | ||||
|     type Err = Infallible; | ||||
|  | ||||
|     fn from_str(s: &str) -> Result<Self, Self::Err> { | ||||
|         match s { | ||||
|             "MappingTarget::MapToUnknown" => Ok(Self::MapToUnknown), | ||||
| @@ -36,10 +36,136 @@ impl MappingTarget { | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[derive(Clone, Debug, DeserializeFromStr)] | ||||
| /// A single matcher. | ||||
| /// | ||||
| /// Corresponds to `syntax_mapping::BuiltinMatcher`. | ||||
| struct Matcher(Vec<MatcherSegment>); | ||||
| /// Parse a matcher. | ||||
| /// | ||||
| /// Note that this implementation is rather strict: when it sees a '$', '{', or | ||||
| /// '}' where it does not make sense, it will immediately hard-error. | ||||
| /// | ||||
| /// The reason for this strictness is I currently cannot think of a valid reason | ||||
| /// why you would ever need '$', '{', or '}' as plaintext in a glob pattern. | ||||
| /// Therefore any such occurrences are likely human errors. | ||||
| /// | ||||
| /// If we later discover some edge cases, it's okay to make it more permissive. | ||||
| impl FromStr for Matcher { | ||||
|     type Err = anyhow::Error; | ||||
|     fn from_str(s: &str) -> Result<Self, Self::Err> { | ||||
|         use MatcherSegment as Seg; | ||||
|  | ||||
|         if s.is_empty() { | ||||
|             bail!("Empty string is not a valid glob matcher"); | ||||
|         } | ||||
|  | ||||
|         let mut segments = Vec::new(); | ||||
|         let mut buf = String::new(); | ||||
|         let mut is_in_var = false; | ||||
|  | ||||
|         let mut char_it = s.chars(); | ||||
|         loop { | ||||
|             match char_it.next() { | ||||
|                 Some('$') => { | ||||
|                     if is_in_var { | ||||
|                         bail!(r#"Saw a '$' when already in a variable: "{s}""#); | ||||
|                     } | ||||
|                     match char_it.next() { | ||||
|                         Some('{') => { | ||||
|                             // push text unless empty | ||||
|                             if !buf.is_empty() { | ||||
|                                 segments.push(Seg::Text(buf.clone())); | ||||
|                                 buf.clear(); | ||||
|                             } | ||||
|                             // open var | ||||
|                             is_in_var = true; | ||||
|                         } | ||||
|                         Some(_) | None => { | ||||
|                             bail!(r#"Expected a '{{' after '$': "{s}""#); | ||||
|                         } | ||||
|                     } | ||||
|                 } | ||||
|                 Some('{') => { | ||||
|                     bail!(r#"Saw a hanging '{{': "{s}""#); | ||||
|                 } | ||||
|                 Some('}') => { | ||||
|                     if !is_in_var { | ||||
|                         bail!(r#"Saw a '}}' when not in a variable: "{s}""#); | ||||
|                     } | ||||
|                     if buf.is_empty() { | ||||
|                         // `${}` | ||||
|                         bail!(r#"Variable name cannot be empty: "{s}""#); | ||||
|                     } | ||||
|                     // push variable | ||||
|                     segments.push(Seg::Env(buf.clone())); | ||||
|                     buf.clear(); | ||||
|                     // close var | ||||
|                     is_in_var = false; | ||||
|                 } | ||||
|                 Some(' ') if is_in_var => { | ||||
|                     bail!(r#"' ' Cannot be part of a variable's name: "{s}""#); | ||||
|                 } | ||||
|                 Some(c) => { | ||||
|                     // either plaintext or variable name | ||||
|                     buf.push(c); | ||||
|                 } | ||||
|                 None => { | ||||
|                     if is_in_var { | ||||
|                         bail!(r#"Variable unclosed: "{s}""#); | ||||
|                     } | ||||
|                     segments.push(Seg::Text(buf.clone())); | ||||
|                     break; | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         Ok(Self(segments)) | ||||
|     } | ||||
| } | ||||
| impl Matcher { | ||||
|     fn codegen(&self) -> String { | ||||
|         match self.0.len() { | ||||
|             0 => unreachable!("0-length matcher should never be created"), | ||||
|             // if-let guard would be ideal here | ||||
|             // see: https://github.com/rust-lang/rust/issues/51114 | ||||
|             1 if matches!(self.0[0], MatcherSegment::Text(_)) => { | ||||
|                 let MatcherSegment::Text(ref s) = self.0[0] else { | ||||
|                     unreachable!() | ||||
|                 }; | ||||
|                 format!(r###"BuiltinMatcher::Fixed(r#"{s}"#)"###) | ||||
|             } | ||||
|             // parser logic ensures that this case can only happen when there are dynamic segments | ||||
|             _ => { | ||||
|                 let segments_codegen = self.0.iter().map(MatcherSegment::codegen).join(", "); | ||||
|                 let closure = format!("|| join_segments(&[{segments_codegen}])"); | ||||
|                 format!("BuiltinMatcher::Dynamic(Lazy::new({closure}))") | ||||
|             } | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| /// A segment in a matcher. | ||||
| /// | ||||
| /// Corresponds to `syntax_mapping::MatcherSegment`. | ||||
| #[derive(Debug, Clone)] | ||||
| enum MatcherSegment { | ||||
|     Text(String), | ||||
|     Env(String), | ||||
| } | ||||
| impl MatcherSegment { | ||||
|     fn codegen(&self) -> String { | ||||
|         match self { | ||||
|             Self::Text(s) => format!(r###"MatcherSegment::Text(r#"{s}"#)"###), | ||||
|             Self::Env(s) => format!(r###"MatcherSegment::Env(r#"{s}"#)"###), | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| /// A struct that models a single .toml file in /src/syntax_mapping/builtins/. | ||||
| #[derive(Clone, Debug, Deserialize)] | ||||
| struct MappingDefModel { | ||||
|     mappings: IndexMap<MappingTarget, Vec<String>>, | ||||
|     mappings: IndexMap<MappingTarget, Vec<Matcher>>, | ||||
| } | ||||
| impl MappingDefModel { | ||||
|     fn into_mapping_list(self) -> MappingList { | ||||
| @@ -58,18 +184,20 @@ impl MappingDefModel { | ||||
| } | ||||
|  | ||||
| #[derive(Clone, Debug)] | ||||
| struct MappingList(Vec<(String, MappingTarget)>); | ||||
| struct MappingList(Vec<(Matcher, MappingTarget)>); | ||||
| impl MappingList { | ||||
|     fn codegen(&self) -> String { | ||||
|         let array_items: Vec<_> = self | ||||
|             .0 | ||||
|             .iter() | ||||
|             .map(|(matcher, target)| format!(r###"(r#"{matcher}"#, {t})"###, t = target.codegen())) | ||||
|             .map(|(matcher, target)| { | ||||
|                 format!("({m}, {t})", m = matcher.codegen(), t = target.codegen()) | ||||
|             }) | ||||
|             .collect(); | ||||
|         let len = array_items.len(); | ||||
|  | ||||
|         format!( | ||||
|             "static STATIC_RULES: [(&str, MappingTarget); {len}] = [\n{items}\n];", | ||||
|             "static STATIC_RULES: [(BuiltinMatcher, MappingTarget); {len}] = [\n{items}\n];", | ||||
|             items = array_items.join(",\n") | ||||
|         ) | ||||
|     } | ||||
|   | ||||
| @@ -1,9 +1,10 @@ | ||||
| use std::path::Path; | ||||
| use std::{env, path::Path}; | ||||
|  | ||||
| use crate::error::Result; | ||||
| use ignored_suffixes::IgnoredSuffixes; | ||||
|  | ||||
| use globset::{Candidate, GlobBuilder, GlobMatcher}; | ||||
| use once_cell::sync::Lazy; | ||||
|  | ||||
| pub mod ignored_suffixes; | ||||
|  | ||||
| @@ -14,6 +15,60 @@ include!(concat!( | ||||
|     "/codegen_static_syntax_mappings.rs" | ||||
| )); | ||||
|  | ||||
| /// A glob matcher generated from analysing the matcher string at compile time. | ||||
| /// | ||||
| /// This is so that the string searches are moved from run time to compile time, | ||||
| /// thus improving startup performance. | ||||
| #[derive(Debug)] | ||||
| enum BuiltinMatcher { | ||||
|     /// A plaintext matcher. | ||||
|     Fixed(&'static str), | ||||
|     /// A matcher that needs dynamic environment variable replacement. | ||||
|     /// | ||||
|     /// Evaluates to `None` when any environment variable replacement fails. | ||||
|     Dynamic(Lazy<Option<String>>), | ||||
| } | ||||
| impl BuiltinMatcher { | ||||
|     /// Finalise into a glob matcher. | ||||
|     /// | ||||
|     /// Returns `None` if any environment variable replacement fails (only | ||||
|     /// possible for dynamic matchers). | ||||
|     fn to_glob_matcher(&self) -> Option<GlobMatcher> { | ||||
|         let glob_str = match self { | ||||
|             Self::Fixed(s) => *s, | ||||
|             Self::Dynamic(s) => s.as_ref()?.as_str(), | ||||
|         }; | ||||
|         Some(make_glob_matcher(glob_str).expect("A builtin glob matcher failed to compile")) | ||||
|     } | ||||
| } | ||||
|  | ||||
| /// Join a list of matcher segments, replacing all environment variables. | ||||
| /// Returns `None` if any replacement fails. | ||||
| /// | ||||
| /// Used internally by `BuiltinMatcher::Dynamic`'s lazy evaluation closure. | ||||
| fn join_segments(segs: &[MatcherSegment]) -> Option<String> { | ||||
|     let mut buf = String::new(); | ||||
|     for seg in segs { | ||||
|         match seg { | ||||
|             MatcherSegment::Text(s) => buf.push_str(s), | ||||
|             MatcherSegment::Env(var) => { | ||||
|                 let replaced = env::var(var).ok()?; | ||||
|                 buf.push_str(&replaced); | ||||
|             } | ||||
|         } | ||||
|     } | ||||
|     Some(buf) | ||||
| } | ||||
|  | ||||
| /// A segment of a dynamic builtin matcher. | ||||
| /// | ||||
| /// Used internally by `BuiltinMatcher::Dynamic`'s lazy evaluation closure. | ||||
| #[derive(Clone, Debug)] | ||||
| enum MatcherSegment { | ||||
|     Text(&'static str), | ||||
|     Env(&'static str), | ||||
| } | ||||
|  | ||||
| #[derive(Debug, Clone, Copy, PartialEq, Eq)] | ||||
| #[non_exhaustive] | ||||
| pub enum MappingTarget<'a> { | ||||
| @@ -34,6 +89,15 @@ pub enum MappingTarget<'a> { | ||||
|     MapExtensionToUnknown, | ||||
| } | ||||
|  | ||||
| fn make_glob_matcher(from: &str) -> Result<GlobMatcher> { | ||||
|     let matcher = GlobBuilder::new(from) | ||||
|         .case_insensitive(true) | ||||
|         .literal_separator(true) | ||||
|         .build()? | ||||
|         .compile_matcher(); | ||||
|     Ok(matcher) | ||||
| } | ||||
|  | ||||
| #[derive(Debug, Clone, Default)] | ||||
| pub struct SyntaxMapping<'a> { | ||||
|     mappings: Vec<(GlobMatcher, MappingTarget<'a>)>, | ||||
| @@ -217,11 +281,8 @@ impl<'a> SyntaxMapping<'a> { | ||||
|     } | ||||
|  | ||||
|     pub fn insert(&mut self, from: &str, to: MappingTarget<'a>) -> Result<()> { | ||||
|         let glob = GlobBuilder::new(from) | ||||
|             .case_insensitive(true) | ||||
|             .literal_separator(true) | ||||
|             .build()?; | ||||
|         self.mappings.push((glob.compile_matcher(), to)); | ||||
|         let matcher = make_glob_matcher(from)?; | ||||
|         self.mappings.push((matcher, to)); | ||||
|         Ok(()) | ||||
|     } | ||||
|  | ||||
|   | ||||
		Reference in New Issue
	
	Block a user