diff --git a/CHANGELOG.md b/CHANGELOG.md index bb40606b..ddefdcf5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,8 +2,9 @@ ## Features -## Bugfixes +- Improve native man pages and command help syntax highlighting by stripping overstriking, see #3517 (@akirk) +## Bugfixes - `--help` now correctly honors `--pager=builtin`. See #3516 (@keith-hall) - `--help` now correctly honors custom themes. See #3524 (@keith-hall) diff --git a/README.md b/README.md index eb0da902..771db7ad 100644 --- a/README.md +++ b/README.md @@ -195,18 +195,13 @@ bat main.cpp | xclip `MANPAGER` environment variable: ```bash -export MANPAGER="sh -c 'awk '\''{ gsub(/\x1B\[[0-9;]*m/, \"\", \$0); gsub(/.\x08/, \"\", \$0); print }'\'' | bat -p -lman'" +export MANPAGER="bat -plman" man 2 select ``` (replace `bat` with `batcat` if you are on Debian or Ubuntu) If you prefer to have this bundled in a new command, you can also use [`batman`](https://github.com/eth-p/bat-extras/blob/master/doc/batman.md). -> [!WARNING] -> This will [not work](https://github.com/sharkdp/bat/issues/1145) out of the box with Mandoc's `man` implementation. -> -> Please either use `batman`, or convert the shell script to a [shebang executable](https://en.wikipedia.org/wiki/Shebang_(Unix)) and point `MANPAGER` to that. - Note that the [Manpage syntax](assets/syntaxes/02_Extra/Manpage.sublime-syntax) is developed in this repository and still needs some work. #### `prettier` / `shfmt` / `rustfmt` diff --git a/src/preprocessor.rs b/src/preprocessor.rs index dc2aa66e..6b4e2935 100644 --- a/src/preprocessor.rs +++ b/src/preprocessor.rs @@ -149,6 +149,35 @@ pub fn strip_ansi(line: &str) -> String { buffer } +/// Strips overstrike sequences (backspace formatting) from input. +/// +/// Overstrike formatting is used by man pages and some help output: +/// - Bold: `X\x08X` (character, backspace, same character) +/// - Underline: `_\x08X` (underscore, backspace, character) +/// +/// This function removes these sequences, keeping only the visible character. +/// `first_backspace` is the position of the first backspace in the line. +pub fn strip_overstrike(line: &str, first_backspace: usize) -> String { + let mut output = String::with_capacity(line.len()); + output.push_str(&line[..first_backspace]); + output.pop(); + + let mut remaining = &line[first_backspace + 1..]; + + loop { + if let Some(pos) = remaining.find('\x08') { + output.push_str(&remaining[..pos]); + output.pop(); + remaining = &remaining[pos + 1..]; + } else { + output.push_str(remaining); + break; + } + } + + output +} + #[derive(Debug, PartialEq, Clone, Copy, Default)] pub enum StripAnsiMode { #[default] @@ -211,3 +240,24 @@ fn test_strip_ansi() { "multiple sequences" ); } + +#[test] +fn test_strip_overstrike() { + // Bold: X\x08X (same char repeated) + assert_eq!(strip_overstrike("H\x08Hello", 1), "Hello"); + + // Underline: _\x08X (underscore before char) + assert_eq!(strip_overstrike("_\x08Hello", 1), "Hello"); + + // Multiple overstrike sequences + assert_eq!(strip_overstrike("B\x08Bo\x08ol\x08ld\x08d", 1), "Bold"); + + // Backspace at start of line (nothing to pop) + assert_eq!(strip_overstrike("\x08Hello", 0), "Hello"); + + // Multiple consecutive backspaces + assert_eq!(strip_overstrike("ABC\x08\x08\x08XYZ", 3), "XYZ"); + + // Unicode with overstrike + assert_eq!(strip_overstrike("ä\x08äöü", 2), "äöü"); +} diff --git a/src/printer.rs b/src/printer.rs index a28cb190..1ddd5e66 100644 --- a/src/printer.rs +++ b/src/printer.rs @@ -29,8 +29,7 @@ use crate::error::*; use crate::input::OpenedInput; use crate::line_range::{MaxBufferedLineNumber, RangeCheckResult}; use crate::output::OutputHandle; -use crate::preprocessor::strip_ansi; -use crate::preprocessor::{expand_tabs, replace_nonprintable}; +use crate::preprocessor::{expand_tabs, replace_nonprintable, strip_ansi, strip_overstrike}; use crate::style::StyleComponent; use crate::terminal::{as_terminal_escaped, to_ansi_color}; use crate::vscreen::{AnsiStyle, EscapeSequence, EscapeSequenceIterator}; @@ -199,6 +198,7 @@ pub(crate) struct InteractivePrinter<'a> { background_color_highlight: Option, consecutive_empty_lines: usize, strip_ansi: bool, + strip_overstrike: bool, } impl<'a> InteractivePrinter<'a> { @@ -263,17 +263,22 @@ impl<'a> InteractivePrinter<'a> { || matches!(config.binary, BinaryBehavior::AsText)) && (config.colored_output || config.strip_ansi == StripAnsiMode::Auto); - let (is_plain_text, highlighter_from_set) = if needs_to_match_syntax { + let (is_plain_text, strip_overstrike, highlighter_from_set) = if needs_to_match_syntax { // Determine the type of syntax for highlighting const PLAIN_TEXT_SYNTAX: &str = "Plain Text"; + const MANPAGE_SYNTAX: &str = "Manpage"; + const COMMAND_HELP_SYNTAX: &str = "Command Help"; match assets.get_syntax(config.language, input, &config.syntax_mapping) { Ok(syntax_in_set) => ( syntax_in_set.syntax.name == PLAIN_TEXT_SYNTAX, + syntax_in_set.syntax.name == MANPAGE_SYNTAX + || syntax_in_set.syntax.name == COMMAND_HELP_SYNTAX, Some(HighlighterFromSet::new(syntax_in_set, theme)), ), Err(Error::UndetectedSyntax(_)) => ( true, + false, Some( assets .find_syntax_by_name(PLAIN_TEXT_SYNTAX)? @@ -285,7 +290,7 @@ impl<'a> InteractivePrinter<'a> { Err(e) => return Err(e), } } else { - (false, None) + (false, false, None) }; // Determine when to strip ANSI sequences @@ -310,6 +315,7 @@ impl<'a> InteractivePrinter<'a> { background_color_highlight, consecutive_empty_lines: 0, strip_ansi, + strip_overstrike, }) } @@ -622,6 +628,12 @@ impl Printer for InteractivePrinter<'_> { } }; + if self.strip_overstrike { + if let Some(pos) = line.find('\x08') { + line = strip_overstrike(&line, pos).into(); + } + } + // If ANSI escape sequences are supposed to be stripped, do it before syntax highlighting. if self.strip_ansi { line = strip_ansi(&line).into() diff --git a/tests/examples/git-commit.man b/tests/examples/git-commit.man new file mode 100644 index 00000000..a93a3808 --- /dev/null +++ b/tests/examples/git-commit.man @@ -0,0 +1,30 @@ +GIT-COMMIT(1) Git Manual GIT-COMMIT(1) + +NNAAMMEE + git-commit - Record changes to the repository + +SSYYNNOOPPSSIISS + ggiitt ccoommmmiitt [--aa | ----iinntteerraaccttiivvee | ----ppaattcchh] [--ss] [--vv] [--uu[_<_m_o_d_e_>]] [----aammeenndd] + [----ddrryy--rruunn] [(--cc | --CC | ----ssqquuaasshh) _<_c_o_m_m_i_t_> | ----ffiixxuupp [(aammeenndd|rreewwoorrdd)::]_<_c_o_m_m_i_t_>] + [--FF _<_f_i_l_e_> | --mm _<_m_s_g_>] [----rreesseett--aauutthhoorr] [----aallllooww--eemmppttyy] + [----aallllooww--eemmppttyy--mmeessssaaggee] [----nnoo--vveerriiffyy] [--ee] [----aauutthhoorr==_<_a_u_t_h_o_r_>] + [----ddaattee==_<_d_a_t_e_>] [----cclleeaannuupp==_<_m_o_d_e_>] [----[nnoo--]ssttaattuuss] + [--ii | --oo] [----ppaatthhssppeecc--ffrroomm--ffiillee==_<_f_i_l_e_> [----ppaatthhssppeecc--ffiillee--nnuull]] + [(----ttrraaiilleerr _<_t_o_k_e_n_>[(==|::)_<_v_a_l_u_e_>])...] [--SS[_<_k_e_y_i_d_>]] + [----] [_<_p_a_t_h_s_p_e_c_>...] + +DDEESSCCRRIIPPTTIIOONN + Create a new commit containing the current contents of the index and + the given log message describing the changes. The new commit is a + direct child of HEAD, usually the tip of the current branch, and the + branch is updated to point to it (unless no branch is associated with + the working tree, in which case HHEEAADD is "detached" as described in ggiitt-- + cchheecckkoouutt(1)). + + The content to be committed can be specified in several ways: + + 1. by using ggiitt--aadddd(1) to incrementally "add" changes to the index + before using the ccoommmmiitt command (Note: even modified files must be + "added"); + + 2. by using ggiitt--rrmm(1) to remove files from the working tree and the diff --git a/tests/examples/overstrike.txt b/tests/examples/overstrike.txt new file mode 100644 index 00000000..838764f3 --- /dev/null +++ b/tests/examples/overstrike.txt @@ -0,0 +1 @@ +BBold tteexxtt and _u_n_d_e_r_l_i_n_e diff --git a/tests/integration_tests.rs b/tests/integration_tests.rs index 9a3d67ce..8896d9fa 100644 --- a/tests/integration_tests.rs +++ b/tests/integration_tests.rs @@ -2541,6 +2541,73 @@ fn binary_as_text() { .stderr(""); } +#[test] +fn no_strip_overstrike_for_plain_text() { + // Overstrike is preserved for plain text files (no syntax highlighting) + bat() + .arg("--color=never") + .arg("--decorations=never") + .arg("overstrike.txt") + .assert() + .success() + .stdout("B\x08Bold t\x08te\x08ex\x08xt\x08t and _\x08u_\x08n_\x08d_\x08e_\x08r_\x08l_\x08i_\x08n_\x08e\n") + .stderr(""); +} + +#[test] +fn strip_overstrike_with_syntax_highlighting() { + // Overstrike is stripped for certain syntax highlighting like command help. + bat() + .arg("--force-colorization") + .arg("--language=help") + .arg("overstrike.txt") + .assert() + .success() + .stdout(predicate::str::contains("Bold text and underline")) + .stderr(""); +} + +#[test] +fn strip_overstrike_for_manpage_syntax() { + // Overstrike is stripped for .man files (Manpage syntax) + bat() + .arg("--force-colorization") + .arg("git-commit.man") + .assert() + .success() + .stdout(predicate::str::contains("NAME")) + .stdout(predicate::str::contains("git-commit - Record changes")) + .stdout(predicate::str::is_match(r"\x1b\[38;[0-9;]+m--interactive\x1b\[").unwrap()) + .stderr(""); +} + +#[test] +fn no_strip_overstrike_for_other_syntax() { + // Overstrike is NOT stripped for other syntaxes (e.g., Rust) + bat() + .arg("--force-colorization") + .arg("--language=rust") + .arg("overstrike.txt") + .assert() + .success() + .stdout(predicate::str::contains("\x08")) + .stderr(""); +} + +#[test] +fn show_all_shows_backspace_with_caret_notation() { + // --show-all should display backspace characters (not strip them) + bat() + .arg("--show-all") + .arg("--nonprintable-notation=caret") + .arg("--decorations=never") + .arg("overstrike.txt") + .assert() + .success() + .stdout(predicate::str::contains("^H")) + .stderr(""); +} + #[test] fn no_paging_arg() { bat()