mirror of
				https://github.com/sharkdp/bat.git
				synced 2025-10-30 22:54:07 +00:00 
			
		
		
		
	Isolate variables at compile time
This commit is contained in:
		| @@ -1,7 +1,8 @@ | ||||
| use std::{convert::Infallible, env, fs, path::Path, str::FromStr}; | ||||
|  | ||||
| use anyhow::anyhow; | ||||
| use anyhow::{anyhow, bail}; | ||||
| use indexmap::IndexMap; | ||||
| use itertools::Itertools; | ||||
| use serde::Deserialize; | ||||
| use serde_with::DeserializeFromStr; | ||||
| use walkdir::WalkDir; | ||||
| @@ -17,7 +18,6 @@ pub enum MappingTarget { | ||||
| } | ||||
| impl FromStr for MappingTarget { | ||||
|     type Err = Infallible; | ||||
|  | ||||
|     fn from_str(s: &str) -> Result<Self, Self::Err> { | ||||
|         match s { | ||||
|             "MappingTarget::MapToUnknown" => Ok(Self::MapToUnknown), | ||||
| @@ -36,10 +36,136 @@ impl MappingTarget { | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[derive(Clone, Debug, DeserializeFromStr)] | ||||
| /// A single matcher. | ||||
| /// | ||||
| /// Corresponds to `syntax_mapping::BuiltinMatcher`. | ||||
| struct Matcher(Vec<MatcherSegment>); | ||||
| /// Parse a matcher. | ||||
| /// | ||||
| /// Note that this implementation is rather strict: when it sees a '$', '{', or | ||||
| /// '}' where it does not make sense, it will immediately hard-error. | ||||
| /// | ||||
| /// The reason for this strictness is I currently cannot think of a valid reason | ||||
| /// why you would ever need '$', '{', or '}' as plaintext in a glob pattern. | ||||
| /// Therefore any such occurrences are likely human errors. | ||||
| /// | ||||
| /// If we later discover some edge cases, it's okay to make it more permissive. | ||||
| impl FromStr for Matcher { | ||||
|     type Err = anyhow::Error; | ||||
|     fn from_str(s: &str) -> Result<Self, Self::Err> { | ||||
|         use MatcherSegment as Seg; | ||||
|  | ||||
|         if s.is_empty() { | ||||
|             bail!("Empty string is not a valid glob matcher"); | ||||
|         } | ||||
|  | ||||
|         let mut segments = Vec::new(); | ||||
|         let mut buf = String::new(); | ||||
|         let mut is_in_var = false; | ||||
|  | ||||
|         let mut char_it = s.chars(); | ||||
|         loop { | ||||
|             match char_it.next() { | ||||
|                 Some('$') => { | ||||
|                     if is_in_var { | ||||
|                         bail!(r#"Saw a '$' when already in a variable: "{s}""#); | ||||
|                     } | ||||
|                     match char_it.next() { | ||||
|                         Some('{') => { | ||||
|                             // push text unless empty | ||||
|                             if !buf.is_empty() { | ||||
|                                 segments.push(Seg::Text(buf.clone())); | ||||
|                                 buf.clear(); | ||||
|                             } | ||||
|                             // open var | ||||
|                             is_in_var = true; | ||||
|                         } | ||||
|                         Some(_) | None => { | ||||
|                             bail!(r#"Expected a '{{' after '$': "{s}""#); | ||||
|                         } | ||||
|                     } | ||||
|                 } | ||||
|                 Some('{') => { | ||||
|                     bail!(r#"Saw a hanging '{{': "{s}""#); | ||||
|                 } | ||||
|                 Some('}') => { | ||||
|                     if !is_in_var { | ||||
|                         bail!(r#"Saw a '}}' when not in a variable: "{s}""#); | ||||
|                     } | ||||
|                     if buf.is_empty() { | ||||
|                         // `${}` | ||||
|                         bail!(r#"Variable name cannot be empty: "{s}""#); | ||||
|                     } | ||||
|                     // push variable | ||||
|                     segments.push(Seg::Env(buf.clone())); | ||||
|                     buf.clear(); | ||||
|                     // close var | ||||
|                     is_in_var = false; | ||||
|                 } | ||||
|                 Some(' ') if is_in_var => { | ||||
|                     bail!(r#"' ' Cannot be part of a variable's name: "{s}""#); | ||||
|                 } | ||||
|                 Some(c) => { | ||||
|                     // either plaintext or variable name | ||||
|                     buf.push(c); | ||||
|                 } | ||||
|                 None => { | ||||
|                     if is_in_var { | ||||
|                         bail!(r#"Variable unclosed: "{s}""#); | ||||
|                     } | ||||
|                     segments.push(Seg::Text(buf.clone())); | ||||
|                     break; | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         Ok(Self(segments)) | ||||
|     } | ||||
| } | ||||
| impl Matcher { | ||||
|     fn codegen(&self) -> String { | ||||
|         match self.0.len() { | ||||
|             0 => unreachable!("0-length matcher should never be created"), | ||||
|             // if-let guard would be ideal here | ||||
|             // see: https://github.com/rust-lang/rust/issues/51114 | ||||
|             1 if matches!(self.0[0], MatcherSegment::Text(_)) => { | ||||
|                 let MatcherSegment::Text(ref s) = self.0[0] else { | ||||
|                     unreachable!() | ||||
|                 }; | ||||
|                 format!(r###"BuiltinMatcher::Fixed(r#"{s}"#)"###) | ||||
|             } | ||||
|             // parser logic ensures that this case can only happen when there are dynamic segments | ||||
|             _ => { | ||||
|                 let segments_codegen = self.0.iter().map(MatcherSegment::codegen).join(", "); | ||||
|                 let closure = format!("|| join_segments(&[{segments_codegen}])"); | ||||
|                 format!("BuiltinMatcher::Dynamic(Lazy::new({closure}))") | ||||
|             } | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| /// A segment in a matcher. | ||||
| /// | ||||
| /// Corresponds to `syntax_mapping::MatcherSegment`. | ||||
| #[derive(Debug, Clone)] | ||||
| enum MatcherSegment { | ||||
|     Text(String), | ||||
|     Env(String), | ||||
| } | ||||
| impl MatcherSegment { | ||||
|     fn codegen(&self) -> String { | ||||
|         match self { | ||||
|             Self::Text(s) => format!(r###"MatcherSegment::Text(r#"{s}"#)"###), | ||||
|             Self::Env(s) => format!(r###"MatcherSegment::Env(r#"{s}"#)"###), | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| /// A struct that models a single .toml file in /src/syntax_mapping/builtins/. | ||||
| #[derive(Clone, Debug, Deserialize)] | ||||
| struct MappingDefModel { | ||||
|     mappings: IndexMap<MappingTarget, Vec<String>>, | ||||
|     mappings: IndexMap<MappingTarget, Vec<Matcher>>, | ||||
| } | ||||
| impl MappingDefModel { | ||||
|     fn into_mapping_list(self) -> MappingList { | ||||
| @@ -58,18 +184,20 @@ impl MappingDefModel { | ||||
| } | ||||
|  | ||||
| #[derive(Clone, Debug)] | ||||
| struct MappingList(Vec<(String, MappingTarget)>); | ||||
| struct MappingList(Vec<(Matcher, MappingTarget)>); | ||||
| impl MappingList { | ||||
|     fn codegen(&self) -> String { | ||||
|         let array_items: Vec<_> = self | ||||
|             .0 | ||||
|             .iter() | ||||
|             .map(|(matcher, target)| format!(r###"(r#"{matcher}"#, {t})"###, t = target.codegen())) | ||||
|             .map(|(matcher, target)| { | ||||
|                 format!("({m}, {t})", m = matcher.codegen(), t = target.codegen()) | ||||
|             }) | ||||
|             .collect(); | ||||
|         let len = array_items.len(); | ||||
|  | ||||
|         format!( | ||||
|             "static STATIC_RULES: [(&str, MappingTarget); {len}] = [\n{items}\n];", | ||||
|             "static STATIC_RULES: [(BuiltinMatcher, MappingTarget); {len}] = [\n{items}\n];", | ||||
|             items = array_items.join(",\n") | ||||
|         ) | ||||
|     } | ||||
|   | ||||
		Reference in New Issue
	
	Block a user