mirror of
				https://github.com/sharkdp/bat.git
				synced 2025-10-30 22:54:07 +00:00 
			
		
		
		
	Introduce MapExtensionToUnknown MappingTarget (#1889)
Co-authored-by: Martin Nordholts <enselic@gmail.com>
This commit is contained in:
		| @@ -9,6 +9,7 @@ | ||||
|  | ||||
| - Python syntax highlighting no longer suffers from abysmal performance in specific scenarios. See #1688 (@keith-hall) | ||||
| - First line not shown in diff context. See #1891 (@divagant-martian) | ||||
| - Do not ignore syntaxes that handle file names with a `*.conf` extension. See #1703 (@cbolgiano) | ||||
|  | ||||
| ## Performance | ||||
|  | ||||
| @@ -41,6 +42,8 @@ | ||||
| - Deprecate `HighlightingAssets::syntaxes()` and `HighlightingAssets::syntax_for_file_name()`. Use `HighlightingAssets::get_syntaxes()` and `HighlightingAssets::get_syntax_for_path()` instead. They return a `Result` which is needed for upcoming lazy-loading work to improve startup performance. They also return which `SyntaxSet` the returned `SyntaxReference` belongs to. See #1747, #1755, #1776, #1862 (@Enselic) | ||||
| - Remove `HighlightingAssets::from_files` and `HighlightingAssets::save_to_cache`. Instead of calling the former and then the latter you now make a single call to `bat::assets::build`. See #1802 (@Enselic) | ||||
| - Replace  the `error::Error(error::ErrorKind, _)` struct and enum with an `error::Error` enum. `Error(ErrorKind::UnknownSyntax, _)` becomes `Error::UnknownSyntax`, etc. Also remove the `error::ResultExt` trait. These changes stem from replacing `error-chain` with `thiserror`. See #1820 (@Enselic) | ||||
| - Add new `MappingTarget` enum variant `MapExtensionToUnknown`. Refer to its docummentation for more information. Clients are adviced to treat `MapExtensionToUnknown` the same as `MapToUnknown` in exhaustive matches. See #1703 (@cbolgiano) | ||||
|  | ||||
|  | ||||
|  | ||||
|  | ||||
|   | ||||
| @@ -153,9 +153,12 @@ impl HighlightingAssets { | ||||
|     } | ||||
|  | ||||
|     /// Detect the syntax based on, in order: | ||||
|     ///  1. Syntax mappings (e.g. `/etc/profile` -> `Bourne Again Shell (bash)`) | ||||
|     ///  1. Syntax mappings with [MappingTarget::MapTo] and [MappingTarget::MapToUnknown] | ||||
|     ///     (e.g. `/etc/profile` -> `Bourne Again Shell (bash)`) | ||||
|     ///  2. The file name (e.g. `Dockerfile`) | ||||
|     ///  3. The file name extension (e.g. `.rs`) | ||||
|     ///  3. Syntax mappings with [MappingTarget::MapExtensionToUnknown] | ||||
|     ///     (e.g. `*.conf`) | ||||
|     ///  4. The file name extension (e.g. `.rs`) | ||||
|     /// | ||||
|     /// When detecting syntax based on syntax mappings, the full path is taken | ||||
|     /// into account. When detecting syntax based on file name, no regard is | ||||
| @@ -165,9 +168,9 @@ impl HighlightingAssets { | ||||
|     /// | ||||
|     /// Returns [Error::UndetectedSyntax] if it was not possible detect syntax | ||||
|     /// based on path/file name/extension (or if the path was mapped to | ||||
|     /// [MappingTarget::MapToUnknown]). In this case it is appropriate to fall | ||||
|     /// back to other methods to detect syntax. Such as using the contents of | ||||
|     /// the first line of the file. | ||||
|     /// [MappingTarget::MapToUnknown] or [MappingTarget::MapExtensionToUnknown]). | ||||
|     /// In this case it is appropriate to fall back to other methods to detect | ||||
|     /// syntax. Such as using the contents of the first line of the file. | ||||
|     /// | ||||
|     /// Returns [Error::UnknownSyntax] if a syntax mapping exist, but the mapped | ||||
|     /// syntax does not exist. | ||||
| @@ -177,20 +180,31 @@ impl HighlightingAssets { | ||||
|         mapping: &SyntaxMapping, | ||||
|     ) -> Result<SyntaxReferenceInSet> { | ||||
|         let path = path.as_ref(); | ||||
|         match mapping.get_syntax_for(path) { | ||||
|             Some(MappingTarget::MapToUnknown) => { | ||||
|  | ||||
|         let syntax_match = mapping.get_syntax_for(path); | ||||
|  | ||||
|         if let Some(MappingTarget::MapToUnknown) = syntax_match { | ||||
|             return Err(Error::UndetectedSyntax(path.to_string_lossy().into())); | ||||
|         } | ||||
|  | ||||
|         if let Some(MappingTarget::MapTo(syntax_name)) = syntax_match { | ||||
|             return self | ||||
|                 .find_syntax_by_name(syntax_name)? | ||||
|                 .ok_or_else(|| Error::UnknownSyntax(syntax_name.to_owned())); | ||||
|         } | ||||
|  | ||||
|         let file_name = path.file_name().unwrap_or_default(); | ||||
|  | ||||
|         match (self.get_syntax_for_file_name(file_name)?, syntax_match) { | ||||
|             (Some(syntax), _) => Ok(syntax), | ||||
|  | ||||
|             (_, Some(MappingTarget::MapExtensionToUnknown)) => { | ||||
|                 Err(Error::UndetectedSyntax(path.to_string_lossy().into())) | ||||
|             } | ||||
|  | ||||
|             Some(MappingTarget::MapTo(syntax_name)) => self | ||||
|                 .find_syntax_by_name(syntax_name)? | ||||
|                 .ok_or_else(|| Error::UnknownSyntax(syntax_name.to_owned())), | ||||
|  | ||||
|             None => { | ||||
|                 let file_name = path.file_name().unwrap_or_default(); | ||||
|                 self.get_extension_syntax(file_name)? | ||||
|                     .ok_or_else(|| Error::UndetectedSyntax(path.to_string_lossy().into())) | ||||
|             } | ||||
|             _ => self | ||||
|                 .get_syntax_for_file_extension(file_name)? | ||||
|                 .ok_or_else(|| Error::UndetectedSyntax(path.to_string_lossy().into())), | ||||
|         } | ||||
|     } | ||||
|  | ||||
| @@ -263,14 +277,24 @@ impl HighlightingAssets { | ||||
|             .map(|syntax| SyntaxReferenceInSet { syntax, syntax_set })) | ||||
|     } | ||||
|  | ||||
|     fn get_extension_syntax(&self, file_name: &OsStr) -> Result<Option<SyntaxReferenceInSet>> { | ||||
|     fn get_syntax_for_file_name(&self, file_name: &OsStr) -> Result<Option<SyntaxReferenceInSet>> { | ||||
|         let mut syntax = self.find_syntax_by_extension(Some(file_name))?; | ||||
|         if syntax.is_none() { | ||||
|             syntax = self.find_syntax_by_extension(Path::new(file_name).extension())?; | ||||
|             syntax = try_with_stripped_suffix(file_name, |stripped_file_name| { | ||||
|                 self.get_syntax_for_file_name(stripped_file_name) // Note: recursion | ||||
|             })?; | ||||
|         } | ||||
|         Ok(syntax) | ||||
|     } | ||||
|  | ||||
|     fn get_syntax_for_file_extension( | ||||
|         &self, | ||||
|         file_name: &OsStr, | ||||
|     ) -> Result<Option<SyntaxReferenceInSet>> { | ||||
|         let mut syntax = self.find_syntax_by_extension(Path::new(file_name).extension())?; | ||||
|         if syntax.is_none() { | ||||
|             syntax = try_with_stripped_suffix(file_name, |stripped_file_name| { | ||||
|                 self.get_extension_syntax(stripped_file_name) // Note: recursion | ||||
|                 self.get_syntax_for_file_extension(stripped_file_name) // Note: recursion | ||||
|             })?; | ||||
|         } | ||||
|         Ok(syntax) | ||||
| @@ -530,6 +554,42 @@ mod tests { | ||||
|         assert_eq!(test.syntax_for_file("test.h"), "C"); | ||||
|     } | ||||
|  | ||||
|     #[test] | ||||
|     fn syntax_detection_with_extension_mapping_to_unknown() { | ||||
|         let mut test = SyntaxDetectionTest::new(); | ||||
|  | ||||
|         // Normally, a CMakeLists.txt file shall use the CMake syntax, even if it is | ||||
|         // a bash script in disguise | ||||
|         assert_eq!( | ||||
|             test.syntax_for_file_with_content("CMakeLists.txt", "#!/bin/bash"), | ||||
|             "CMake" | ||||
|         ); | ||||
|  | ||||
|         // Other .txt files shall use the Plain Text syntax | ||||
|         assert_eq!( | ||||
|             test.syntax_for_file_with_content("some-other.txt", "#!/bin/bash"), | ||||
|             "Plain Text" | ||||
|         ); | ||||
|  | ||||
|         // If we setup MapExtensionToUnknown on *.txt, the match on the full | ||||
|         // file name of "CMakeLists.txt" shall have higher prio, and CMake shall | ||||
|         // still be used for it | ||||
|         test.syntax_mapping | ||||
|             .insert("*.txt", MappingTarget::MapExtensionToUnknown) | ||||
|             .ok(); | ||||
|         assert_eq!( | ||||
|             test.syntax_for_file_with_content("CMakeLists.txt", "#!/bin/bash"), | ||||
|             "CMake" | ||||
|         ); | ||||
|  | ||||
|         // However, for *other* files with a .txt extension, first-line fallback | ||||
|         // shall now be used | ||||
|         assert_eq!( | ||||
|             test.syntax_for_file_with_content("some-other.txt", "#!/bin/bash"), | ||||
|             "Bourne Again Shell (bash)" | ||||
|         ); | ||||
|     } | ||||
|  | ||||
|     #[test] | ||||
|     fn syntax_detection_is_case_sensitive() { | ||||
|         let mut test = SyntaxDetectionTest::new(); | ||||
|   | ||||
| @@ -72,6 +72,7 @@ fn get_syntax_mapping_to_paths<'a>( | ||||
|     for mapping in mappings { | ||||
|         match mapping { | ||||
|             (_, MappingTarget::MapToUnknown) => {} | ||||
|             (_, MappingTarget::MapExtensionToUnknown) => {} | ||||
|             (matcher, MappingTarget::MapTo(s)) => { | ||||
|                 let globs = map.entry(*s).or_insert_with(Vec::new); | ||||
|                 globs.push(matcher.glob().glob().into()); | ||||
|   | ||||
| @@ -6,8 +6,21 @@ use globset::{Candidate, GlobBuilder, GlobMatcher}; | ||||
|  | ||||
| #[derive(Debug, Clone, Copy, PartialEq)] | ||||
| pub enum MappingTarget<'a> { | ||||
|     /// For mapping a path to a specific syntax. | ||||
|     MapTo(&'a str), | ||||
|  | ||||
|     /// For mapping a path (typically an extension-less file name) to an unknown | ||||
|     /// syntax. This typically means later using the contents of the first line | ||||
|     /// of the file to determine what syntax to use. | ||||
|     MapToUnknown, | ||||
|  | ||||
|     /// For mapping a file extension (e.g. `*.conf`) to an unknown syntax. This | ||||
|     /// typically means later using the contents of the first line of the file | ||||
|     /// to determine what syntax to use. However, if a syntax handles a file | ||||
|     /// name that happens to have the given file extension (e.g. `resolv.conf`), | ||||
|     /// then that association will have higher precedence, and the mapping will | ||||
|     /// be ignored. | ||||
|     MapExtensionToUnknown, | ||||
| } | ||||
|  | ||||
| #[derive(Debug, Clone, Default)] | ||||
| @@ -54,12 +67,7 @@ impl<'a> SyntaxMapping<'a> { | ||||
|         // Nginx and Apache syntax files both want to style all ".conf" files | ||||
|         // see #1131 and #1137 | ||||
|         mapping | ||||
|             .insert("*.conf", MappingTarget::MapToUnknown) | ||||
|             .unwrap(); | ||||
|  | ||||
|         // Re-insert a mapping for resolv.conf, see #1510 | ||||
|         mapping | ||||
|             .insert("resolv.conf", MappingTarget::MapTo("resolv")) | ||||
|             .insert("*.conf", MappingTarget::MapExtensionToUnknown) | ||||
|             .unwrap(); | ||||
|  | ||||
|         for glob in &[ | ||||
|   | ||||
		Reference in New Issue
	
	Block a user