1
0
mirror of https://github.com/sharkdp/bat.git synced 2026-02-08 00:32:08 +00:00

Merge pull request #3517 from akirk/strip-overstriking

Improve native man pages and command help syntax highlighting by stripping overstriking
This commit is contained in:
Keith Hall
2025-12-11 08:36:18 +02:00
committed by GitHub
7 changed files with 167 additions and 11 deletions

View File

@@ -2,8 +2,9 @@
## Features
## Bugfixes
- Improve native man pages and command help syntax highlighting by stripping overstriking, see #3517 (@akirk)
## Bugfixes
- `--help` now correctly honors `--pager=builtin`. See #3516 (@keith-hall)
- `--help` now correctly honors custom themes. See #3524 (@keith-hall)

View File

@@ -195,18 +195,13 @@ bat main.cpp | xclip
`MANPAGER` environment variable:
```bash
export MANPAGER="sh -c 'awk '\''{ gsub(/\x1B\[[0-9;]*m/, \"\", \$0); gsub(/.\x08/, \"\", \$0); print }'\'' | bat -p -lman'"
export MANPAGER="bat -plman"
man 2 select
```
(replace `bat` with `batcat` if you are on Debian or Ubuntu)
If you prefer to have this bundled in a new command, you can also use [`batman`](https://github.com/eth-p/bat-extras/blob/master/doc/batman.md).
> [!WARNING]
> This will [not work](https://github.com/sharkdp/bat/issues/1145) out of the box with Mandoc's `man` implementation.
>
> Please either use `batman`, or convert the shell script to a [shebang executable](https://en.wikipedia.org/wiki/Shebang_(Unix)) and point `MANPAGER` to that.
Note that the [Manpage syntax](assets/syntaxes/02_Extra/Manpage.sublime-syntax) is developed in this repository and still needs some work.
#### `prettier` / `shfmt` / `rustfmt`

View File

@@ -149,6 +149,35 @@ pub fn strip_ansi(line: &str) -> String {
buffer
}
/// Strips overstrike sequences (backspace formatting) from input.
///
/// Overstrike formatting is used by man pages and some help output:
/// - Bold: `X\x08X` (character, backspace, same character)
/// - Underline: `_\x08X` (underscore, backspace, character)
///
/// This function removes these sequences, keeping only the visible character.
/// `first_backspace` is the position of the first backspace in the line.
pub fn strip_overstrike(line: &str, first_backspace: usize) -> String {
let mut output = String::with_capacity(line.len());
output.push_str(&line[..first_backspace]);
output.pop();
let mut remaining = &line[first_backspace + 1..];
loop {
if let Some(pos) = remaining.find('\x08') {
output.push_str(&remaining[..pos]);
output.pop();
remaining = &remaining[pos + 1..];
} else {
output.push_str(remaining);
break;
}
}
output
}
#[derive(Debug, PartialEq, Clone, Copy, Default)]
pub enum StripAnsiMode {
#[default]
@@ -211,3 +240,24 @@ fn test_strip_ansi() {
"multiple sequences"
);
}
#[test]
fn test_strip_overstrike() {
// Bold: X\x08X (same char repeated)
assert_eq!(strip_overstrike("H\x08Hello", 1), "Hello");
// Underline: _\x08X (underscore before char)
assert_eq!(strip_overstrike("_\x08Hello", 1), "Hello");
// Multiple overstrike sequences
assert_eq!(strip_overstrike("B\x08Bo\x08ol\x08ld\x08d", 1), "Bold");
// Backspace at start of line (nothing to pop)
assert_eq!(strip_overstrike("\x08Hello", 0), "Hello");
// Multiple consecutive backspaces
assert_eq!(strip_overstrike("ABC\x08\x08\x08XYZ", 3), "XYZ");
// Unicode with overstrike
assert_eq!(strip_overstrike("ä\x08äöü", 2), "äöü");
}

View File

@@ -29,8 +29,7 @@ use crate::error::*;
use crate::input::OpenedInput;
use crate::line_range::{MaxBufferedLineNumber, RangeCheckResult};
use crate::output::OutputHandle;
use crate::preprocessor::strip_ansi;
use crate::preprocessor::{expand_tabs, replace_nonprintable};
use crate::preprocessor::{expand_tabs, replace_nonprintable, strip_ansi, strip_overstrike};
use crate::style::StyleComponent;
use crate::terminal::{as_terminal_escaped, to_ansi_color};
use crate::vscreen::{AnsiStyle, EscapeSequence, EscapeSequenceIterator};
@@ -199,6 +198,7 @@ pub(crate) struct InteractivePrinter<'a> {
background_color_highlight: Option<Color>,
consecutive_empty_lines: usize,
strip_ansi: bool,
strip_overstrike: bool,
}
impl<'a> InteractivePrinter<'a> {
@@ -263,17 +263,22 @@ impl<'a> InteractivePrinter<'a> {
|| matches!(config.binary, BinaryBehavior::AsText))
&& (config.colored_output || config.strip_ansi == StripAnsiMode::Auto);
let (is_plain_text, highlighter_from_set) = if needs_to_match_syntax {
let (is_plain_text, strip_overstrike, highlighter_from_set) = if needs_to_match_syntax {
// Determine the type of syntax for highlighting
const PLAIN_TEXT_SYNTAX: &str = "Plain Text";
const MANPAGE_SYNTAX: &str = "Manpage";
const COMMAND_HELP_SYNTAX: &str = "Command Help";
match assets.get_syntax(config.language, input, &config.syntax_mapping) {
Ok(syntax_in_set) => (
syntax_in_set.syntax.name == PLAIN_TEXT_SYNTAX,
syntax_in_set.syntax.name == MANPAGE_SYNTAX
|| syntax_in_set.syntax.name == COMMAND_HELP_SYNTAX,
Some(HighlighterFromSet::new(syntax_in_set, theme)),
),
Err(Error::UndetectedSyntax(_)) => (
true,
false,
Some(
assets
.find_syntax_by_name(PLAIN_TEXT_SYNTAX)?
@@ -285,7 +290,7 @@ impl<'a> InteractivePrinter<'a> {
Err(e) => return Err(e),
}
} else {
(false, None)
(false, false, None)
};
// Determine when to strip ANSI sequences
@@ -310,6 +315,7 @@ impl<'a> InteractivePrinter<'a> {
background_color_highlight,
consecutive_empty_lines: 0,
strip_ansi,
strip_overstrike,
})
}
@@ -622,6 +628,12 @@ impl Printer for InteractivePrinter<'_> {
}
};
if self.strip_overstrike {
if let Some(pos) = line.find('\x08') {
line = strip_overstrike(&line, pos).into();
}
}
// If ANSI escape sequences are supposed to be stripped, do it before syntax highlighting.
if self.strip_ansi {
line = strip_ansi(&line).into()

30
tests/examples/git-commit.man vendored Normal file
View File

@@ -0,0 +1,30 @@
GIT-COMMIT(1) Git Manual GIT-COMMIT(1)
NNAAMMEE
git-commit - Record changes to the repository
SSYYNNOOPPSSIISS
ggiitt ccoommmmiitt [--aa | ----iinntteerraaccttiivvee | ----ppaattcchh] [--ss] [--vv] [--uu[_<_m_o_d_e_>]] [----aammeenndd]
[----ddrryy--rruunn] [(--cc | --CC | ----ssqquuaasshh) _<_c_o_m_m_i_t_> | ----ffiixxuupp [(aammeenndd|rreewwoorrdd)::]_<_c_o_m_m_i_t_>]
[--FF _<_f_i_l_e_> | --mm _<_m_s_g_>] [----rreesseett--aauutthhoorr] [----aallllooww--eemmppttyy]
[----aallllooww--eemmppttyy--mmeessssaaggee] [----nnoo--vveerriiffyy] [--ee] [----aauutthhoorr==_<_a_u_t_h_o_r_>]
[----ddaattee==_<_d_a_t_e_>] [----cclleeaannuupp==_<_m_o_d_e_>] [----[nnoo--]ssttaattuuss]
[--ii | --oo] [----ppaatthhssppeecc--ffrroomm--ffiillee==_<_f_i_l_e_> [----ppaatthhssppeecc--ffiillee--nnuull]]
[(----ttrraaiilleerr _<_t_o_k_e_n_>[(==|::)_<_v_a_l_u_e_>])...] [--SS[_<_k_e_y_i_d_>]]
[----] [_<_p_a_t_h_s_p_e_c_>...]
DDEESSCCRRIIPPTTIIOONN
Create a new commit containing the current contents of the index and
the given log message describing the changes. The new commit is a
direct child of HEAD, usually the tip of the current branch, and the
branch is updated to point to it (unless no branch is associated with
the working tree, in which case HHEEAADD is "detached" as described in ggiitt--
cchheecckkoouutt(1)).
The content to be committed can be specified in several ways:
1. by using ggiitt--aadddd(1) to incrementally "add" changes to the index
before using the ccoommmmiitt command (Note: even modified files must be
"added");
2. by using ggiitt--rrmm(1) to remove files from the working tree and the

1
tests/examples/overstrike.txt vendored Normal file
View File

@@ -0,0 +1 @@
BBold tteexxtt and _u_n_d_e_r_l_i_n_e

View File

@@ -2541,6 +2541,73 @@ fn binary_as_text() {
.stderr("");
}
#[test]
fn no_strip_overstrike_for_plain_text() {
// Overstrike is preserved for plain text files (no syntax highlighting)
bat()
.arg("--color=never")
.arg("--decorations=never")
.arg("overstrike.txt")
.assert()
.success()
.stdout("B\x08Bold t\x08te\x08ex\x08xt\x08t and _\x08u_\x08n_\x08d_\x08e_\x08r_\x08l_\x08i_\x08n_\x08e\n")
.stderr("");
}
#[test]
fn strip_overstrike_with_syntax_highlighting() {
// Overstrike is stripped for certain syntax highlighting like command help.
bat()
.arg("--force-colorization")
.arg("--language=help")
.arg("overstrike.txt")
.assert()
.success()
.stdout(predicate::str::contains("Bold text and underline"))
.stderr("");
}
#[test]
fn strip_overstrike_for_manpage_syntax() {
// Overstrike is stripped for .man files (Manpage syntax)
bat()
.arg("--force-colorization")
.arg("git-commit.man")
.assert()
.success()
.stdout(predicate::str::contains("NAME"))
.stdout(predicate::str::contains("git-commit - Record changes"))
.stdout(predicate::str::is_match(r"\x1b\[38;[0-9;]+m--interactive\x1b\[").unwrap())
.stderr("");
}
#[test]
fn no_strip_overstrike_for_other_syntax() {
// Overstrike is NOT stripped for other syntaxes (e.g., Rust)
bat()
.arg("--force-colorization")
.arg("--language=rust")
.arg("overstrike.txt")
.assert()
.success()
.stdout(predicate::str::contains("\x08"))
.stderr("");
}
#[test]
fn show_all_shows_backspace_with_caret_notation() {
// --show-all should display backspace characters (not strip them)
bat()
.arg("--show-all")
.arg("--nonprintable-notation=caret")
.arg("--decorations=never")
.arg("overstrike.txt")
.assert()
.success()
.stdout(predicate::str::contains("^H"))
.stderr("");
}
#[test]
fn no_paging_arg() {
bat()