From 18b71743c8ffc242653960b8fd335f3346b94094 Mon Sep 17 00:00:00 2001 From: Haris Mohamedy Date: Mon, 31 Mar 2025 17:23:19 -0700 Subject: [PATCH 1/3] Fix for multibyte characters in file path --- CHANGELOG.md | 1 + Cargo.lock | 7 +++++++ Cargo.toml | 1 + src/printer.rs | 13 +++++++------ tests/examples/test.A—B가 | 0 tests/integration_tests.rs | 11 +++++++++++ 6 files changed, 27 insertions(+), 6 deletions(-) create mode 100644 tests/examples/test.A—B가 diff --git a/CHANGELOG.md b/CHANGELOG.md index b8c59964..96017e33 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ - Fix `BAT_THEME_DARK` and `BAT_THEME_LIGHT` being ignored, see issue #3171 and PR #3168 (@bash) - Prevent `--list-themes` from outputting default theme info to stdout when it is piped, see #3189 (@einfachIrgendwer0815) - Rename some submodules to fix Dependabot submodule updates, see issue #3198 and PR #3201 (@victor-gp) +- Fix crash for multibyte characters in file path, see #3230 (@HSM95) ## Other diff --git a/Cargo.lock b/Cargo.lock index a87351cd..2ea16f11 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -153,6 +153,7 @@ dependencies = [ "terminal-colorsaurus", "thiserror 2.0.11", "toml", + "unicode-segmentation", "unicode-width 0.1.14", "wait-timeout", "walkdir", @@ -1678,6 +1679,12 @@ version = "1.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "adb9e6ca4f869e1180728b7950e35922a7fc6397f7b641499e8f3ef06e50dc83" +[[package]] +name = "unicode-segmentation" +version = "1.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493" + [[package]] name = "unicode-width" version = "0.1.14" diff --git a/Cargo.toml b/Cargo.toml index 68172e72..c792a4c2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -68,6 +68,7 @@ bytesize = { version = "1.3.0" } encoding_rs = "0.8.35" execute = { version = "0.2.13", optional = true } terminal-colorsaurus = "0.4" +unicode-segmentation = "1.12.0" [dependencies.git2] version = "0.20" diff --git a/src/printer.rs b/src/printer.rs index 2c364bd7..6d933363 100644 --- a/src/printer.rs +++ b/src/printer.rs @@ -17,6 +17,7 @@ use content_inspector::ContentType; use encoding_rs::{UTF_16BE, UTF_16LE}; +use unicode_segmentation::UnicodeSegmentation; use unicode_width::UnicodeWidthChar; use crate::assets::{HighlightingAssets, SyntaxReferenceInSet}; @@ -403,14 +404,14 @@ impl<'a> InteractivePrinter<'a> { handle: &mut OutputHandle, content: &str, ) -> Result<()> { - let mut content = content; let content_width = self.config.term_width - self.get_header_component_indent_length(); - while content.len() > content_width { - let (content_line, remaining) = content.split_at(content_width); - self.print_header_component_with_indent(handle, content_line)?; - content = remaining; + let mut content_graphemes: Vec<&str> = content.graphemes(true).collect(); + while content_graphemes.len() > content_width { + let (content_line, remaining) = content_graphemes.split_at(content_width); + self.print_header_component_with_indent(handle, content_line.join("").as_str())?; + content_graphemes = remaining.iter().cloned().collect(); } - self.print_header_component_with_indent(handle, content) + self.print_header_component_with_indent(handle, content_graphemes.join("").as_str()) } fn highlight_regions_for_line<'b>( diff --git a/tests/examples/test.A—B가 b/tests/examples/test.A—B가 new file mode 100644 index 00000000..e69de29b diff --git a/tests/integration_tests.rs b/tests/integration_tests.rs index 3aafb17c..97dbd550 100644 --- a/tests/integration_tests.rs +++ b/tests/integration_tests.rs @@ -1600,6 +1600,17 @@ oken .stderr(""); } +#[test] +fn header_narrow_terminal_with_multibyte_chars() { + bat() + .arg("--terminal-width=30") + .arg("--decorations=always") + .arg("test.A—B가") + .assert() + .success() + .stderr(""); +} + #[test] #[cfg(feature = "git")] // Expected output assumes git is enabled fn header_default() { From a55d23aaa4eb256560e6b17436750f031711bf52 Mon Sep 17 00:00:00 2001 From: Haris Mohamedy Date: Mon, 31 Mar 2025 17:50:23 -0700 Subject: [PATCH 2/3] Add PR to CHANGELOG --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 96017e33..de61621d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,7 +7,7 @@ - Fix `BAT_THEME_DARK` and `BAT_THEME_LIGHT` being ignored, see issue #3171 and PR #3168 (@bash) - Prevent `--list-themes` from outputting default theme info to stdout when it is piped, see #3189 (@einfachIrgendwer0815) - Rename some submodules to fix Dependabot submodule updates, see issue #3198 and PR #3201 (@victor-gp) -- Fix crash for multibyte characters in file path, see #3230 (@HSM95) +- Fix crash for multibyte characters in file path, see issue #3230 and PR #3245 (@HSM95) ## Other From b5413cc015664044f6be7815be58b5d4edffd7c8 Mon Sep 17 00:00:00 2001 From: Haris Mohamedy Date: Thu, 3 Apr 2025 00:49:14 -0700 Subject: [PATCH 3/3] Do not split into graphemes if not necessary --- src/printer.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/printer.rs b/src/printer.rs index 6d933363..6574069f 100644 --- a/src/printer.rs +++ b/src/printer.rs @@ -405,6 +405,10 @@ impl<'a> InteractivePrinter<'a> { content: &str, ) -> Result<()> { let content_width = self.config.term_width - self.get_header_component_indent_length(); + if content.chars().count() <= content_width { + return self.print_header_component_with_indent(handle, content); + } + let mut content_graphemes: Vec<&str> = content.graphemes(true).collect(); while content_graphemes.len() > content_width { let (content_line, remaining) = content_graphemes.split_at(content_width);