mirror of
https://github.com/sharkdp/bat.git
synced 2025-09-01 10:52:24 +01:00
Fix the read_line method for utf16le input
to determine the end of the line, instead of reading until \n (0x0A) and then reading until 0x00 and calling it done, read until we find 0x00 preceded by 0x0A.
This commit is contained in:
@@ -17,6 +17,7 @@
|
||||
- Add missing mappings for various bash/zsh files, see PR #3262 (@AdamGaskins)
|
||||
- Send all bat errors to stderr by default, see #3336 (@JerryImMouse)
|
||||
- Make --map-syntax target case insensitive to match --language, see #3206 (@keith-hall)
|
||||
- Correctly determine the end of the line in UTF16LE input #3369 (@keith-hall)
|
||||
|
||||
## Other
|
||||
|
||||
|
47
src/input.rs
47
src/input.rs
@@ -267,7 +267,7 @@ impl<'a> InputReader<'a> {
|
||||
};
|
||||
|
||||
if content_type == Some(ContentType::UTF_16LE) {
|
||||
reader.read_until(0x00, &mut first_line).ok();
|
||||
read_utf16le_line(&mut reader, &mut first_line).ok();
|
||||
}
|
||||
|
||||
InputReader {
|
||||
@@ -286,13 +286,31 @@ impl<'a> InputReader<'a> {
|
||||
let res = self.inner.read_until(b'\n', buf).map(|size| size > 0)?;
|
||||
|
||||
if self.content_type == Some(ContentType::UTF_16LE) {
|
||||
let _ = self.inner.read_until(0x00, buf);
|
||||
return read_utf16le_line(&mut self.inner, buf);
|
||||
}
|
||||
|
||||
Ok(res)
|
||||
}
|
||||
}
|
||||
|
||||
fn read_utf16le_line<R: BufRead>(reader: &mut R, buf: &mut Vec<u8>) -> io::Result<bool> {
|
||||
loop {
|
||||
let mut temp = Vec::new();
|
||||
let n = reader.read_until(0x00, &mut temp)?;
|
||||
if n == 0 {
|
||||
// EOF reached
|
||||
break;
|
||||
}
|
||||
buf.extend_from_slice(&temp);
|
||||
if buf.len() >= 2 && buf[buf.len() - 2] == 0x0A && buf[buf.len() - 1] == 0x00 {
|
||||
// end of line found
|
||||
break;
|
||||
}
|
||||
// end of line not found, keep going
|
||||
}
|
||||
return Ok(!buf.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn basic() {
|
||||
let content = b"#!/bin/bash\necho hello";
|
||||
@@ -350,3 +368,28 @@ fn utf16le() {
|
||||
assert!(!res.unwrap());
|
||||
assert!(buffer.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn utf16le_issue3367() {
|
||||
let content = b"\xFF\xFE\x0A\x4E\x00\x4E\x0A\x4F\x00\x52";
|
||||
let mut reader = InputReader::new(&content[..]);
|
||||
|
||||
assert_eq!(
|
||||
b"\xFF\xFE\x0A\x4E\x00\x4E\x0A\x4F\x00\x52",
|
||||
&reader.first_line[..]
|
||||
);
|
||||
|
||||
let mut buffer = vec![];
|
||||
|
||||
let res = reader.read_line(&mut buffer);
|
||||
assert!(res.is_ok());
|
||||
assert!(res.unwrap());
|
||||
assert_eq!(b"\xFF\xFE\x0A\x4E\x00\x4E\x0A\x4F\x00\x52", &buffer[..]);
|
||||
|
||||
buffer.clear();
|
||||
|
||||
let res = reader.read_line(&mut buffer);
|
||||
assert!(res.is_ok());
|
||||
assert!(!res.unwrap());
|
||||
assert!(buffer.is_empty());
|
||||
}
|
||||
|
Reference in New Issue
Block a user