1
0
mirror of https://github.com/sharkdp/bat.git synced 2026-02-08 08:42:08 +00:00

Strip overstriking to better support man pages

This commit is contained in:
Alex Kirk
2025-12-05 12:15:48 +01:00
parent eb2a8e29c7
commit 51bdaa5f88
2 changed files with 78 additions and 3 deletions

View File

@@ -1,3 +1,4 @@
use std::borrow::Cow;
use std::fmt::Write;
use crate::{
@@ -149,6 +150,31 @@ pub fn strip_ansi(line: &str) -> String {
buffer
}
/// Strips overstrike sequences (backspace formatting) from input.
///
/// Overstrike formatting is used by man pages and some help output:
/// - Bold: `X\x08X` (character, backspace, same character)
/// - Underline: `_\x08X` (underscore, backspace, character)
///
/// This function removes these sequences, keeping only the visible character.
pub fn strip_overstrike(line: &str) -> Cow<'_, str> {
if !line.contains('\x08') {
return Cow::Borrowed(line);
}
let mut output = String::with_capacity(line.len());
for c in line.chars() {
if c == '\x08' {
output.pop();
} else {
output.push(c);
}
}
Cow::Owned(output)
}
#[derive(Debug, PartialEq, Clone, Copy, Default)]
pub enum StripAnsiMode {
#[default]
@@ -211,3 +237,30 @@ fn test_strip_ansi() {
"multiple sequences"
);
}
#[test]
fn test_strip_overstrike() {
// No overstrike - should return borrowed reference
assert_eq!(strip_overstrike("no overstrike"), "no overstrike");
// Empty string
assert_eq!(strip_overstrike(""), "");
// Bold: X\x08X (same char repeated)
assert_eq!(strip_overstrike("H\x08Hello"), "Hello");
// Underline: _\x08X (underscore before char)
assert_eq!(strip_overstrike("_\x08Hello"), "Hello");
// Multiple overstrike sequences
assert_eq!(strip_overstrike("B\x08Bo\x08ol\x08ld\x08d"), "Bold");
// Backspace at start of line (nothing to pop)
assert_eq!(strip_overstrike("\x08Hello"), "Hello");
// Multiple consecutive backspaces
assert_eq!(strip_overstrike("ABC\x08\x08\x08XYZ"), "XYZ");
// Unicode with overstrike
assert_eq!(strip_overstrike("ä\x08äöü"), "äöü");
}

View File

@@ -1,3 +1,4 @@
use std::borrow::Cow;
use std::vec::Vec;
use nu_ansi_term::Color::{Fixed, Green, Red, Yellow};
@@ -29,8 +30,7 @@ use crate::error::*;
use crate::input::OpenedInput;
use crate::line_range::{MaxBufferedLineNumber, RangeCheckResult};
use crate::output::OutputHandle;
use crate::preprocessor::strip_ansi;
use crate::preprocessor::{expand_tabs, replace_nonprintable};
use crate::preprocessor::{expand_tabs, replace_nonprintable, strip_ansi, strip_overstrike};
use crate::style::StyleComponent;
use crate::terminal::{as_terminal_escaped, to_ansi_color};
use crate::vscreen::{AnsiStyle, EscapeSequence, EscapeSequenceIterator};
@@ -152,7 +152,7 @@ impl Printer for SimplePrinter<'_> {
self.config.nonprintable_notation,
);
write!(handle, "{line}")?;
} else {
} else if self.config.binary == BinaryBehavior::AsText {
match handle {
OutputHandle::IoWrite(handle) => handle.write_all(line_buffer)?,
OutputHandle::FmtWrite(handle) => {
@@ -166,6 +166,23 @@ impl Printer for SimplePrinter<'_> {
)?;
}
}
} else {
match handle {
OutputHandle::IoWrite(handle) => {
// Only strip overstrike for valid UTF-8, otherwise write raw bytes
if let Ok(line) = std::str::from_utf8(line_buffer) {
let line = strip_overstrike(line);
handle.write_all(line.as_bytes())?;
} else {
handle.write_all(line_buffer)?;
}
}
OutputHandle::FmtWrite(handle) => {
let line = String::from_utf8_lossy(line_buffer);
let line = strip_overstrike(&line);
write!(handle, "{line}")?;
}
}
};
}
Ok(())
@@ -622,6 +639,11 @@ impl Printer for InteractivePrinter<'_> {
}
};
// Strip overstrike sequences (used by man pages for bold/underline).
if line.contains('\x08') {
line = Cow::Owned(strip_overstrike(&line).into_owned());
}
// If ANSI escape sequences are supposed to be stripped, do it before syntax highlighting.
if self.strip_ansi {
line = strip_ansi(&line).into()