use std::{env, path::Path}; use crate::error::Result; use ignored_suffixes::IgnoredSuffixes; use globset::{Candidate, GlobBuilder, GlobMatcher}; use once_cell::sync::Lazy; pub mod ignored_suffixes; // Static syntax mappings generated from /src/syntax_mapping/builtins/ by the // build script (/build/syntax_mapping.rs). include!(concat!( env!("OUT_DIR"), "/codegen_static_syntax_mappings.rs" )); // The defined matcher strings are analysed at compile time and converted into // lazily-compiled `GlobMatcher`s. This is so that the string searches are moved // from run time to compile time, thus improving startup performance. // // To any future maintainer (including possibly myself) wondering why there is // not a `BuiltinMatcher` enum that looks like this: // // ``` // enum BuiltinMatcher { // Fixed(&'static str), // Dynamic(Lazy>), // } // ``` // // Because there was. I tried it and threw it out. // // Naively looking at the problem from a distance, this may seem like a good // design (strongly typed etc. etc.). It would also save on compiled size by // extracting out common behaviour into functions. But while actually // implementing the lazy matcher compilation logic, I realised that it's most // convenient for `BUILTIN_MAPPINGS` to have the following type: // // `[(Lazy>, MappingTarget); N]` // // The benefit for this is that operations like listing all builtin mappings // would be effectively memoised. The caller would not have to compile another // `GlobMatcher` for rules that they have previously visited. // // Unfortunately, this means we are going to have to store a distinct closure // for each rule anyway, which makes a `BuiltinMatcher` enum a pointless layer // of indirection. // // In the current implementation, the closure within each generated rule simply // calls either `build_matcher_fixed` or `build_matcher_dynamic`, depending on // whether the defined matcher contains dynamic segments or not. /// Compile a fixed glob string into a glob matcher. /// /// A failure to compile is a fatal error. /// /// Used internally by `Lazy`'s lazy evaluation closure. fn build_matcher_fixed(from: &str) -> GlobMatcher { make_glob_matcher(from).expect("A builtin fixed glob matcher failed to compile") } /// Join a list of matcher segments to create a glob string, replacing all /// environment variables, then compile to a glob matcher. /// /// Returns `None` if any replacement fails, or if the joined glob string fails /// to compile. /// /// Used internally by `Lazy`'s lazy evaluation closure. fn build_matcher_dynamic(segs: &[MatcherSegment]) -> Option { // join segments let mut buf = String::new(); for seg in segs { match seg { MatcherSegment::Text(s) => buf.push_str(s), MatcherSegment::Env(var) => { let replaced = env::var(var).ok()?; buf.push_str(&replaced); } } } // compile glob matcher let matcher = make_glob_matcher(&buf).ok()?; Some(matcher) } /// A segment of a dynamic builtin matcher. /// /// Used internally by `Lazy`'s lazy evaluation closure. #[derive(Clone, Debug)] enum MatcherSegment { Text(&'static str), Env(&'static str), } #[derive(Debug, Clone, Copy, PartialEq, Eq)] #[non_exhaustive] pub enum MappingTarget<'a> { /// For mapping a path to a specific syntax. MapTo(&'a str), /// For mapping a path (typically an extension-less file name) to an unknown /// syntax. This typically means later using the contents of the first line /// of the file to determine what syntax to use. MapToUnknown, /// For mapping a file extension (e.g. `*.conf`) to an unknown syntax. This /// typically means later using the contents of the first line of the file /// to determine what syntax to use. However, if a syntax handles a file /// name that happens to have the given file extension (e.g. `resolv.conf`), /// then that association will have higher precedence, and the mapping will /// be ignored. MapExtensionToUnknown, } fn make_glob_matcher(from: &str) -> Result { let matcher = GlobBuilder::new(from) .case_insensitive(true) .literal_separator(true) .build()? .compile_matcher(); Ok(matcher) } #[derive(Debug, Clone, Default)] pub struct SyntaxMapping<'a> { mappings: Vec<(GlobMatcher, MappingTarget<'a>)>, pub(crate) ignored_suffixes: IgnoredSuffixes<'a>, } impl<'a> SyntaxMapping<'a> { pub fn empty() -> SyntaxMapping<'a> { Default::default() } pub fn builtin() -> SyntaxMapping<'a> { let mut mapping = Self::empty(); mapping.insert("*.h", MappingTarget::MapTo("C++")).unwrap(); mapping .insert(".clang-format", MappingTarget::MapTo("YAML")) .unwrap(); mapping.insert("*.fs", MappingTarget::MapTo("F#")).unwrap(); mapping .insert("build", MappingTarget::MapToUnknown) .unwrap(); mapping .insert("**/.ssh/config", MappingTarget::MapTo("SSH Config")) .unwrap(); mapping .insert( "**/bat/config", MappingTarget::MapTo("Bourne Again Shell (bash)"), ) .unwrap(); mapping .insert( "/etc/profile", MappingTarget::MapTo("Bourne Again Shell (bash)"), ) .unwrap(); mapping .insert( "os-release", MappingTarget::MapTo("Bourne Again Shell (bash)"), ) .unwrap(); mapping .insert("*.pac", MappingTarget::MapTo("JavaScript (Babel)")) .unwrap(); mapping .insert("fish_history", MappingTarget::MapTo("YAML")) .unwrap(); for glob in ["*.jsonl", "*.sarif"] { mapping.insert(glob, MappingTarget::MapTo("JSON")).unwrap(); } // See #2151, https://nmap.org/book/nse-language.html mapping .insert("*.nse", MappingTarget::MapTo("Lua")) .unwrap(); // See #1008 mapping .insert("rails", MappingTarget::MapToUnknown) .unwrap(); mapping .insert("Containerfile", MappingTarget::MapTo("Dockerfile")) .unwrap(); mapping .insert("*.ksh", MappingTarget::MapTo("Bourne Again Shell (bash)")) .unwrap(); // Nginx and Apache syntax files both want to style all ".conf" files // see #1131 and #1137 mapping .insert("*.conf", MappingTarget::MapExtensionToUnknown) .unwrap(); for glob in &[ "/etc/nginx/**/*.conf", "/etc/nginx/sites-*/**/*", "nginx.conf", "mime.types", ] { mapping.insert(glob, MappingTarget::MapTo("nginx")).unwrap(); } for glob in &[ "/etc/apache2/**/*.conf", "/etc/apache2/sites-*/**/*", "httpd.conf", ] { mapping .insert(glob, MappingTarget::MapTo("Apache Conf")) .unwrap(); } for glob in &[ "**/systemd/**/*.conf", "**/systemd/**/*.example", "*.automount", "*.device", "*.dnssd", "*.link", "*.mount", "*.netdev", "*.network", "*.nspawn", "*.path", "*.service", "*.scope", "*.slice", "*.socket", "*.swap", "*.target", "*.timer", ] { mapping.insert(glob, MappingTarget::MapTo("INI")).unwrap(); } // unix mail spool for glob in &["/var/spool/mail/*", "/var/mail/*"] { mapping.insert(glob, MappingTarget::MapTo("Email")).unwrap() } // pacman hooks mapping .insert("*.hook", MappingTarget::MapTo("INI")) .unwrap(); mapping .insert("*.ron", MappingTarget::MapTo("Rust")) .unwrap(); // Global git config files rooted in `$XDG_CONFIG_HOME/git/` or `$HOME/.config/git/` // See e.g. https://git-scm.com/docs/git-config#FILES match ( std::env::var_os("XDG_CONFIG_HOME").filter(|val| !val.is_empty()), std::env::var_os("HOME") .filter(|val| !val.is_empty()) .map(|home| Path::new(&home).join(".config")), ) { (Some(xdg_config_home), Some(default_config_home)) if xdg_config_home == default_config_home => { insert_git_config_global(&mut mapping, &xdg_config_home) } (Some(xdg_config_home), Some(default_config_home)) /* else guard */ => { insert_git_config_global(&mut mapping, &xdg_config_home); insert_git_config_global(&mut mapping, &default_config_home) } (Some(config_home), None) => insert_git_config_global(&mut mapping, &config_home), (None, Some(config_home)) => insert_git_config_global(&mut mapping, &config_home), (None, None) => (), }; fn insert_git_config_global(mapping: &mut SyntaxMapping, config_home: impl AsRef) { let git_config_path = config_home.as_ref().join("git"); mapping .insert( &git_config_path.join("config").to_string_lossy(), MappingTarget::MapTo("Git Config"), ) .ok(); mapping .insert( &git_config_path.join("ignore").to_string_lossy(), MappingTarget::MapTo("Git Ignore"), ) .ok(); mapping .insert( &git_config_path.join("attributes").to_string_lossy(), MappingTarget::MapTo("Git Attributes"), ) .ok(); } mapping } pub fn insert(&mut self, from: &str, to: MappingTarget<'a>) -> Result<()> { let matcher = make_glob_matcher(from)?; self.mappings.push((matcher, to)); Ok(()) } pub fn mappings(&self) -> &[(GlobMatcher, MappingTarget<'a>)] { &self.mappings } pub fn get_syntax_for(&self, path: impl AsRef) -> Option> { // Try matching on the file name as-is. let candidate = Candidate::new(&path); let candidate_filename = path.as_ref().file_name().map(Candidate::new); for (ref glob, ref syntax) in self.mappings.iter().rev() { if glob.is_match_candidate(&candidate) || candidate_filename .as_ref() .map_or(false, |filename| glob.is_match_candidate(filename)) { return Some(*syntax); } } // Try matching on the file name after removing an ignored suffix. let file_name = path.as_ref().file_name()?; self.ignored_suffixes .try_with_stripped_suffix(file_name, |stripped_file_name| { Ok(self.get_syntax_for(stripped_file_name)) }) .ok()? } pub fn insert_ignored_suffix(&mut self, suffix: &'a str) { self.ignored_suffixes.add_suffix(suffix); } } #[cfg(test)] mod tests { use super::*; #[test] fn basic() { let mut map = SyntaxMapping::empty(); map.insert("/path/to/Cargo.lock", MappingTarget::MapTo("TOML")) .ok(); map.insert("/path/to/.ignore", MappingTarget::MapTo("Git Ignore")) .ok(); assert_eq!( map.get_syntax_for("/path/to/Cargo.lock"), Some(MappingTarget::MapTo("TOML")) ); assert_eq!(map.get_syntax_for("/path/to/other.lock"), None); assert_eq!( map.get_syntax_for("/path/to/.ignore"), Some(MappingTarget::MapTo("Git Ignore")) ); } #[test] fn user_can_override_builtin_mappings() { let mut map = SyntaxMapping::builtin(); assert_eq!( map.get_syntax_for("/etc/profile"), Some(MappingTarget::MapTo("Bourne Again Shell (bash)")) ); map.insert("/etc/profile", MappingTarget::MapTo("My Syntax")) .ok(); assert_eq!( map.get_syntax_for("/etc/profile"), Some(MappingTarget::MapTo("My Syntax")) ); } #[test] fn builtin_mappings() { let map = SyntaxMapping::builtin(); assert_eq!( map.get_syntax_for("/path/to/build"), Some(MappingTarget::MapToUnknown) ); } #[test] /// verifies that SyntaxMapping::builtin() doesn't repeat `Glob`-based keys fn no_duplicate_builtin_keys() { let mappings = SyntaxMapping::builtin().mappings; for i in 0..mappings.len() { let tail = mappings[i + 1..].into_iter(); let (dupl, _): (Vec<_>, Vec<_>) = tail.partition(|item| item.0.glob() == mappings[i].0.glob()); // emit repeats on failure assert_eq!( dupl.len(), 0, "Glob pattern `{}` mapped to multiple: {:?}", mappings[i].0.glob().glob(), { let (_, mut dupl_targets): (Vec, Vec) = dupl.into_iter().cloned().unzip(); dupl_targets.push(mappings[i].1) }, ) } } }