From 66751534605863a9e4588c470e98198ac1bfdc11 Mon Sep 17 00:00:00 2001 From: Keith Hall Date: Thu, 7 Aug 2025 23:31:15 +0300 Subject: [PATCH] Fix the read_line method for utf16le input to determine the end of the line, instead of reading until \n (0x0A) and then reading until 0x00 and calling it done, read until we find 0x00 preceded by 0x0A. --- CHANGELOG.md | 1 + src/input.rs | 47 +++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 46 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7d3481f3..a1c2b775 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,7 @@ - Add missing mappings for various bash/zsh files, see PR #3262 (@AdamGaskins) - Send all bat errors to stderr by default, see #3336 (@JerryImMouse) - Make --map-syntax target case insensitive to match --language, see #3206 (@keith-hall) +- Correctly determine the end of the line in UTF16LE input #3369 (@keith-hall) ## Other diff --git a/src/input.rs b/src/input.rs index b36204df..e5f7e4d6 100644 --- a/src/input.rs +++ b/src/input.rs @@ -267,7 +267,7 @@ impl<'a> InputReader<'a> { }; if content_type == Some(ContentType::UTF_16LE) { - reader.read_until(0x00, &mut first_line).ok(); + read_utf16le_line(&mut reader, &mut first_line).ok(); } InputReader { @@ -286,13 +286,31 @@ impl<'a> InputReader<'a> { let res = self.inner.read_until(b'\n', buf).map(|size| size > 0)?; if self.content_type == Some(ContentType::UTF_16LE) { - let _ = self.inner.read_until(0x00, buf); + return read_utf16le_line(&mut self.inner, buf); } Ok(res) } } +fn read_utf16le_line(reader: &mut R, buf: &mut Vec) -> io::Result { + loop { + let mut temp = Vec::new(); + let n = reader.read_until(0x00, &mut temp)?; + if n == 0 { + // EOF reached + break; + } + buf.extend_from_slice(&temp); + if buf.len() >= 2 && buf[buf.len() - 2] == 0x0A && buf[buf.len() - 1] == 0x00 { + // end of line found + break; + } + // end of line not found, keep going + } + return Ok(!buf.is_empty()); +} + #[test] fn basic() { let content = b"#!/bin/bash\necho hello"; @@ -350,3 +368,28 @@ fn utf16le() { assert!(!res.unwrap()); assert!(buffer.is_empty()); } + +#[test] +fn utf16le_issue3367() { + let content = b"\xFF\xFE\x0A\x4E\x00\x4E\x0A\x4F\x00\x52"; + let mut reader = InputReader::new(&content[..]); + + assert_eq!( + b"\xFF\xFE\x0A\x4E\x00\x4E\x0A\x4F\x00\x52", + &reader.first_line[..] + ); + + let mut buffer = vec![]; + + let res = reader.read_line(&mut buffer); + assert!(res.is_ok()); + assert!(res.unwrap()); + assert_eq!(b"\xFF\xFE\x0A\x4E\x00\x4E\x0A\x4F\x00\x52", &buffer[..]); + + buffer.clear(); + + let res = reader.read_line(&mut buffer); + assert!(res.is_ok()); + assert!(!res.unwrap()); + assert!(buffer.is_empty()); +}