mirror of
				https://github.com/sharkdp/bat.git
				synced 2025-11-04 00:51:56 +00:00 
			
		
		
		
	Merge pull request #3369 from forkeith/utf16le
Fix the read_line method for utf16le/be input
This commit is contained in:
		@@ -17,6 +17,7 @@
 | 
				
			|||||||
- Add missing mappings for various bash/zsh files, see PR #3262 (@AdamGaskins)
 | 
					- Add missing mappings for various bash/zsh files, see PR #3262 (@AdamGaskins)
 | 
				
			||||||
- Send all bat errors to stderr by default, see #3336 (@JerryImMouse)
 | 
					- Send all bat errors to stderr by default, see #3336 (@JerryImMouse)
 | 
				
			||||||
- Make --map-syntax target case insensitive to match --language, see #3206 (@keith-hall)
 | 
					- Make --map-syntax target case insensitive to match --language, see #3206 (@keith-hall)
 | 
				
			||||||
 | 
					- Correctly determine the end of the line in UTF16LE/BE input #3369 (@keith-hall)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
## Other
 | 
					## Other
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
							
								
								
									
										88
									
								
								src/input.rs
									
									
									
									
									
								
							
							
						
						
									
										88
									
								
								src/input.rs
									
									
									
									
									
								
							@@ -267,7 +267,9 @@ impl<'a> InputReader<'a> {
 | 
				
			|||||||
        };
 | 
					        };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        if content_type == Some(ContentType::UTF_16LE) {
 | 
					        if content_type == Some(ContentType::UTF_16LE) {
 | 
				
			||||||
            reader.read_until(0x00, &mut first_line).ok();
 | 
					            read_utf16_line(&mut reader, &mut first_line, 0x00, 0x0A).ok();
 | 
				
			||||||
 | 
					        } else if content_type == Some(ContentType::UTF_16BE) {
 | 
				
			||||||
 | 
					            read_utf16_line(&mut reader, &mut first_line, 0x0A, 0x00).ok();
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        InputReader {
 | 
					        InputReader {
 | 
				
			||||||
@@ -283,16 +285,44 @@ impl<'a> InputReader<'a> {
 | 
				
			|||||||
            return Ok(true);
 | 
					            return Ok(true);
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        let res = self.inner.read_until(b'\n', buf).map(|size| size > 0)?;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        if self.content_type == Some(ContentType::UTF_16LE) {
 | 
					        if self.content_type == Some(ContentType::UTF_16LE) {
 | 
				
			||||||
            let _ = self.inner.read_until(0x00, buf);
 | 
					            return read_utf16_line(&mut self.inner, buf, 0x00, 0x0A);
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        if self.content_type == Some(ContentType::UTF_16BE) {
 | 
				
			||||||
 | 
					            return read_utf16_line(&mut self.inner, buf, 0x0A, 0x00);
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        let res = self.inner.read_until(b'\n', buf).map(|size| size > 0)?;
 | 
				
			||||||
        Ok(res)
 | 
					        Ok(res)
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					fn read_utf16_line<R: BufRead>(
 | 
				
			||||||
 | 
					    reader: &mut R,
 | 
				
			||||||
 | 
					    buf: &mut Vec<u8>,
 | 
				
			||||||
 | 
					    read_until_char: u8,
 | 
				
			||||||
 | 
					    preceded_by_char: u8,
 | 
				
			||||||
 | 
					) -> io::Result<bool> {
 | 
				
			||||||
 | 
					    loop {
 | 
				
			||||||
 | 
					        let mut temp = Vec::new();
 | 
				
			||||||
 | 
					        let n = reader.read_until(read_until_char, &mut temp)?;
 | 
				
			||||||
 | 
					        if n == 0 {
 | 
				
			||||||
 | 
					            // EOF reached
 | 
				
			||||||
 | 
					            break;
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        buf.extend_from_slice(&temp);
 | 
				
			||||||
 | 
					        if buf.len() >= 2
 | 
				
			||||||
 | 
					            && buf[buf.len() - 2] == preceded_by_char
 | 
				
			||||||
 | 
					            && buf[buf.len() - 1] == read_until_char
 | 
				
			||||||
 | 
					        {
 | 
				
			||||||
 | 
					            // end of line found
 | 
				
			||||||
 | 
					            break;
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        // end of line not found, keep going
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    return Ok(!buf.is_empty());
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#[test]
 | 
					#[test]
 | 
				
			||||||
fn basic() {
 | 
					fn basic() {
 | 
				
			||||||
    let content = b"#!/bin/bash\necho hello";
 | 
					    let content = b"#!/bin/bash\necho hello";
 | 
				
			||||||
@@ -350,3 +380,53 @@ fn utf16le() {
 | 
				
			|||||||
    assert!(!res.unwrap());
 | 
					    assert!(!res.unwrap());
 | 
				
			||||||
    assert!(buffer.is_empty());
 | 
					    assert!(buffer.is_empty());
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#[test]
 | 
				
			||||||
 | 
					fn utf16le_issue3367() {
 | 
				
			||||||
 | 
					    let content = b"\xFF\xFE\x0A\x4E\x00\x4E\x0A\x4F\x00\x52\x0A\x00\
 | 
				
			||||||
 | 
					        \x6F\x00\x20\x00\x62\x00\x61\x00\x72\x00\x0A\x00\
 | 
				
			||||||
 | 
					        \x68\x00\x65\x00\x6C\x00\x6C\x00\x6F\x00\x20\x00\x77\x00\x6F\x00\x72\x00\x6C\x00\x64\x00";
 | 
				
			||||||
 | 
					    let mut reader = InputReader::new(&content[..]);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    assert_eq!(
 | 
				
			||||||
 | 
					        b"\xFF\xFE\x0A\x4E\x00\x4E\x0A\x4F\x00\x52\x0A\x00",
 | 
				
			||||||
 | 
					        &reader.first_line[..]
 | 
				
			||||||
 | 
					    );
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    let mut buffer = vec![];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    let res = reader.read_line(&mut buffer);
 | 
				
			||||||
 | 
					    assert!(res.is_ok());
 | 
				
			||||||
 | 
					    assert!(res.unwrap());
 | 
				
			||||||
 | 
					    assert_eq!(
 | 
				
			||||||
 | 
					        b"\xFF\xFE\x0A\x4E\x00\x4E\x0A\x4F\x00\x52\x0A\x00",
 | 
				
			||||||
 | 
					        &buffer[..]
 | 
				
			||||||
 | 
					    );
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    buffer.clear();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    let res = reader.read_line(&mut buffer);
 | 
				
			||||||
 | 
					    assert!(res.is_ok());
 | 
				
			||||||
 | 
					    assert!(res.unwrap());
 | 
				
			||||||
 | 
					    assert_eq!(
 | 
				
			||||||
 | 
					        b"\x6F\x00\x20\x00\x62\x00\x61\x00\x72\x00\x0A\x00",
 | 
				
			||||||
 | 
					        &buffer[..]
 | 
				
			||||||
 | 
					    );
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    buffer.clear();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    let res = reader.read_line(&mut buffer);
 | 
				
			||||||
 | 
					    assert!(res.is_ok());
 | 
				
			||||||
 | 
					    assert!(res.unwrap());
 | 
				
			||||||
 | 
					    assert_eq!(
 | 
				
			||||||
 | 
					        b"\x68\x00\x65\x00\x6C\x00\x6C\x00\x6F\x00\x20\x00\x77\x00\x6F\x00\x72\x00\x6C\x00\x64\x00",
 | 
				
			||||||
 | 
					        &buffer[..]
 | 
				
			||||||
 | 
					    );
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    buffer.clear();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    let res = reader.read_line(&mut buffer);
 | 
				
			||||||
 | 
					    assert!(res.is_ok());
 | 
				
			||||||
 | 
					    assert!(!res.unwrap());
 | 
				
			||||||
 | 
					    assert!(buffer.is_empty());
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 
 | 
				
			|||||||
							
								
								
									
										
											BIN
										
									
								
								tests/examples/test_UTF-16BE-complicated.txt
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								tests/examples/test_UTF-16BE-complicated.txt
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							
							
								
								
									
										
											BIN
										
									
								
								tests/examples/test_UTF-16BE.txt
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								tests/examples/test_UTF-16BE.txt
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							
							
								
								
									
										
											BIN
										
									
								
								tests/examples/test_UTF-16LE-complicated.txt
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								tests/examples/test_UTF-16LE-complicated.txt
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							@@ -1321,6 +1321,38 @@ fn utf16() {
 | 
				
			|||||||
        .assert()
 | 
					        .assert()
 | 
				
			||||||
        .success()
 | 
					        .success()
 | 
				
			||||||
        .stdout("hello world\n");
 | 
					        .stdout("hello world\n");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    bat()
 | 
				
			||||||
 | 
					        .arg("--plain")
 | 
				
			||||||
 | 
					        .arg("--decorations=always")
 | 
				
			||||||
 | 
					        .arg("test_UTF-16BE.txt")
 | 
				
			||||||
 | 
					        .assert()
 | 
				
			||||||
 | 
					        .success()
 | 
				
			||||||
 | 
					        .stdout("hello world\nthis is a test\n");
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#[test]
 | 
				
			||||||
 | 
					fn utf16le() {
 | 
				
			||||||
 | 
					    bat()
 | 
				
			||||||
 | 
					        .arg("--decorations=always")
 | 
				
			||||||
 | 
					        .arg("--style=numbers")
 | 
				
			||||||
 | 
					        .arg("--color=never")
 | 
				
			||||||
 | 
					        .arg("test_UTF-16LE-complicated.txt")
 | 
				
			||||||
 | 
					        .assert()
 | 
				
			||||||
 | 
					        .success()
 | 
				
			||||||
 | 
					        .stdout("   1 上一伊刀\n   2 foo bar\n   3 hello world\n");
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#[test]
 | 
				
			||||||
 | 
					fn utf16be() {
 | 
				
			||||||
 | 
					    bat()
 | 
				
			||||||
 | 
					        .arg("--decorations=always")
 | 
				
			||||||
 | 
					        .arg("--style=numbers")
 | 
				
			||||||
 | 
					        .arg("--color=never")
 | 
				
			||||||
 | 
					        .arg("test_UTF-16BE-complicated.txt")
 | 
				
			||||||
 | 
					        .assert()
 | 
				
			||||||
 | 
					        .success()
 | 
				
			||||||
 | 
					        .stdout("   1 上一伊刀\n   2 foo bar\n   3 hello world\n");
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
// Regression test for https://github.com/sharkdp/bat/issues/1922
 | 
					// Regression test for https://github.com/sharkdp/bat/issues/1922
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user