1
0
mirror of https://github.com/sharkdp/bat.git synced 2025-09-01 10:52:24 +01:00

Fix the read_line method for utf16le input

to determine the end of the line, instead of reading until \n (0x0A) and then reading until 0x00 and calling it done, read until we find 0x00 preceded by 0x0A.
This commit is contained in:
Keith Hall
2025-08-07 23:31:15 +03:00
parent 76e6a49a2e
commit 6675153460
2 changed files with 46 additions and 2 deletions

View File

@@ -17,6 +17,7 @@
- Add missing mappings for various bash/zsh files, see PR #3262 (@AdamGaskins)
- Send all bat errors to stderr by default, see #3336 (@JerryImMouse)
- Make --map-syntax target case insensitive to match --language, see #3206 (@keith-hall)
- Correctly determine the end of the line in UTF16LE input #3369 (@keith-hall)
## Other

View File

@@ -267,7 +267,7 @@ impl<'a> InputReader<'a> {
};
if content_type == Some(ContentType::UTF_16LE) {
reader.read_until(0x00, &mut first_line).ok();
read_utf16le_line(&mut reader, &mut first_line).ok();
}
InputReader {
@@ -286,13 +286,31 @@ impl<'a> InputReader<'a> {
let res = self.inner.read_until(b'\n', buf).map(|size| size > 0)?;
if self.content_type == Some(ContentType::UTF_16LE) {
let _ = self.inner.read_until(0x00, buf);
return read_utf16le_line(&mut self.inner, buf);
}
Ok(res)
}
}
fn read_utf16le_line<R: BufRead>(reader: &mut R, buf: &mut Vec<u8>) -> io::Result<bool> {
loop {
let mut temp = Vec::new();
let n = reader.read_until(0x00, &mut temp)?;
if n == 0 {
// EOF reached
break;
}
buf.extend_from_slice(&temp);
if buf.len() >= 2 && buf[buf.len() - 2] == 0x0A && buf[buf.len() - 1] == 0x00 {
// end of line found
break;
}
// end of line not found, keep going
}
return Ok(!buf.is_empty());
}
#[test]
fn basic() {
let content = b"#!/bin/bash\necho hello";
@@ -350,3 +368,28 @@ fn utf16le() {
assert!(!res.unwrap());
assert!(buffer.is_empty());
}
#[test]
fn utf16le_issue3367() {
let content = b"\xFF\xFE\x0A\x4E\x00\x4E\x0A\x4F\x00\x52";
let mut reader = InputReader::new(&content[..]);
assert_eq!(
b"\xFF\xFE\x0A\x4E\x00\x4E\x0A\x4F\x00\x52",
&reader.first_line[..]
);
let mut buffer = vec![];
let res = reader.read_line(&mut buffer);
assert!(res.is_ok());
assert!(res.unwrap());
assert_eq!(b"\xFF\xFE\x0A\x4E\x00\x4E\x0A\x4F\x00\x52", &buffer[..]);
buffer.clear();
let res = reader.read_line(&mut buffer);
assert!(res.is_ok());
assert!(!res.unwrap());
assert!(buffer.is_empty());
}