diff --git a/CHANGELOG.md b/CHANGELOG.md index 7d3481f3..a1c2b775 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,7 @@ - Add missing mappings for various bash/zsh files, see PR #3262 (@AdamGaskins) - Send all bat errors to stderr by default, see #3336 (@JerryImMouse) - Make --map-syntax target case insensitive to match --language, see #3206 (@keith-hall) +- Correctly determine the end of the line in UTF16LE input #3369 (@keith-hall) ## Other diff --git a/src/input.rs b/src/input.rs index b36204df..e5f7e4d6 100644 --- a/src/input.rs +++ b/src/input.rs @@ -267,7 +267,7 @@ impl<'a> InputReader<'a> { }; if content_type == Some(ContentType::UTF_16LE) { - reader.read_until(0x00, &mut first_line).ok(); + read_utf16le_line(&mut reader, &mut first_line).ok(); } InputReader { @@ -286,13 +286,31 @@ impl<'a> InputReader<'a> { let res = self.inner.read_until(b'\n', buf).map(|size| size > 0)?; if self.content_type == Some(ContentType::UTF_16LE) { - let _ = self.inner.read_until(0x00, buf); + return read_utf16le_line(&mut self.inner, buf); } Ok(res) } } +fn read_utf16le_line(reader: &mut R, buf: &mut Vec) -> io::Result { + loop { + let mut temp = Vec::new(); + let n = reader.read_until(0x00, &mut temp)?; + if n == 0 { + // EOF reached + break; + } + buf.extend_from_slice(&temp); + if buf.len() >= 2 && buf[buf.len() - 2] == 0x0A && buf[buf.len() - 1] == 0x00 { + // end of line found + break; + } + // end of line not found, keep going + } + return Ok(!buf.is_empty()); +} + #[test] fn basic() { let content = b"#!/bin/bash\necho hello"; @@ -350,3 +368,28 @@ fn utf16le() { assert!(!res.unwrap()); assert!(buffer.is_empty()); } + +#[test] +fn utf16le_issue3367() { + let content = b"\xFF\xFE\x0A\x4E\x00\x4E\x0A\x4F\x00\x52"; + let mut reader = InputReader::new(&content[..]); + + assert_eq!( + b"\xFF\xFE\x0A\x4E\x00\x4E\x0A\x4F\x00\x52", + &reader.first_line[..] + ); + + let mut buffer = vec![]; + + let res = reader.read_line(&mut buffer); + assert!(res.is_ok()); + assert!(res.unwrap()); + assert_eq!(b"\xFF\xFE\x0A\x4E\x00\x4E\x0A\x4F\x00\x52", &buffer[..]); + + buffer.clear(); + + let res = reader.read_line(&mut buffer); + assert!(res.is_ok()); + assert!(!res.unwrap()); + assert!(buffer.is_empty()); +}