diff --git a/src/preprocessor.rs b/src/preprocessor.rs index dc2aa66e..74486489 100644 --- a/src/preprocessor.rs +++ b/src/preprocessor.rs @@ -1,3 +1,4 @@ +use std::borrow::Cow; use std::fmt::Write; use crate::{ @@ -149,6 +150,31 @@ pub fn strip_ansi(line: &str) -> String { buffer } +/// Strips overstrike sequences (backspace formatting) from input. +/// +/// Overstrike formatting is used by man pages and some help output: +/// - Bold: `X\x08X` (character, backspace, same character) +/// - Underline: `_\x08X` (underscore, backspace, character) +/// +/// This function removes these sequences, keeping only the visible character. +pub fn strip_overstrike(line: &str) -> Cow<'_, str> { + if !line.contains('\x08') { + return Cow::Borrowed(line); + } + + let mut output = String::with_capacity(line.len()); + + for c in line.chars() { + if c == '\x08' { + output.pop(); + } else { + output.push(c); + } + } + + Cow::Owned(output) +} + #[derive(Debug, PartialEq, Clone, Copy, Default)] pub enum StripAnsiMode { #[default] @@ -211,3 +237,30 @@ fn test_strip_ansi() { "multiple sequences" ); } + +#[test] +fn test_strip_overstrike() { + // No overstrike - should return borrowed reference + assert_eq!(strip_overstrike("no overstrike"), "no overstrike"); + + // Empty string + assert_eq!(strip_overstrike(""), ""); + + // Bold: X\x08X (same char repeated) + assert_eq!(strip_overstrike("H\x08Hello"), "Hello"); + + // Underline: _\x08X (underscore before char) + assert_eq!(strip_overstrike("_\x08Hello"), "Hello"); + + // Multiple overstrike sequences + assert_eq!(strip_overstrike("B\x08Bo\x08ol\x08ld\x08d"), "Bold"); + + // Backspace at start of line (nothing to pop) + assert_eq!(strip_overstrike("\x08Hello"), "Hello"); + + // Multiple consecutive backspaces + assert_eq!(strip_overstrike("ABC\x08\x08\x08XYZ"), "XYZ"); + + // Unicode with overstrike + assert_eq!(strip_overstrike("ä\x08äöü"), "äöü"); +} diff --git a/src/printer.rs b/src/printer.rs index a28cb190..9079ca0e 100644 --- a/src/printer.rs +++ b/src/printer.rs @@ -1,3 +1,4 @@ +use std::borrow::Cow; use std::vec::Vec; use nu_ansi_term::Color::{Fixed, Green, Red, Yellow}; @@ -29,8 +30,7 @@ use crate::error::*; use crate::input::OpenedInput; use crate::line_range::{MaxBufferedLineNumber, RangeCheckResult}; use crate::output::OutputHandle; -use crate::preprocessor::strip_ansi; -use crate::preprocessor::{expand_tabs, replace_nonprintable}; +use crate::preprocessor::{expand_tabs, replace_nonprintable, strip_ansi, strip_overstrike}; use crate::style::StyleComponent; use crate::terminal::{as_terminal_escaped, to_ansi_color}; use crate::vscreen::{AnsiStyle, EscapeSequence, EscapeSequenceIterator}; @@ -152,7 +152,7 @@ impl Printer for SimplePrinter<'_> { self.config.nonprintable_notation, ); write!(handle, "{line}")?; - } else { + } else if self.config.binary == BinaryBehavior::AsText { match handle { OutputHandle::IoWrite(handle) => handle.write_all(line_buffer)?, OutputHandle::FmtWrite(handle) => { @@ -166,6 +166,23 @@ impl Printer for SimplePrinter<'_> { )?; } } + } else { + match handle { + OutputHandle::IoWrite(handle) => { + // Only strip overstrike for valid UTF-8, otherwise write raw bytes + if let Ok(line) = std::str::from_utf8(line_buffer) { + let line = strip_overstrike(line); + handle.write_all(line.as_bytes())?; + } else { + handle.write_all(line_buffer)?; + } + } + OutputHandle::FmtWrite(handle) => { + let line = String::from_utf8_lossy(line_buffer); + let line = strip_overstrike(&line); + write!(handle, "{line}")?; + } + } }; } Ok(()) @@ -622,6 +639,11 @@ impl Printer for InteractivePrinter<'_> { } }; + // Strip overstrike sequences (used by man pages for bold/underline). + if line.contains('\x08') { + line = Cow::Owned(strip_overstrike(&line).into_owned()); + } + // If ANSI escape sequences are supposed to be stripped, do it before syntax highlighting. if self.strip_ansi { line = strip_ansi(&line).into()