mirror of
				https://github.com/sharkdp/bat.git
				synced 2025-11-04 00:51:56 +00:00 
			
		
		
		
	Add support for UTF-16LE and UTF-16BE
This commit is contained in:
		
							
								
								
									
										65
									
								
								Cargo.lock
									
									
									
										generated
									
									
									
								
							
							
						
						
									
										65
									
								
								Cargo.lock
									
									
									
										generated
									
									
									
								
							@@ -52,6 +52,7 @@ dependencies = [
 | 
			
		||||
 "console 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)",
 | 
			
		||||
 "content_inspector 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
 | 
			
		||||
 "directories 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
 | 
			
		||||
 "encoding 0.2.33 (registry+https://github.com/rust-lang/crates.io-index)",
 | 
			
		||||
 "error-chain 0.12.0 (registry+https://github.com/rust-lang/crates.io-index)",
 | 
			
		||||
 "git2 0.7.5 (registry+https://github.com/rust-lang/crates.io-index)",
 | 
			
		||||
 "lazy_static 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
 | 
			
		||||
@@ -166,6 +167,63 @@ dependencies = [
 | 
			
		||||
 "winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
 | 
			
		||||
]
 | 
			
		||||
 | 
			
		||||
[[package]]
 | 
			
		||||
name = "encoding"
 | 
			
		||||
version = "0.2.33"
 | 
			
		||||
source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
			
		||||
dependencies = [
 | 
			
		||||
 "encoding-index-japanese 1.20141219.5 (registry+https://github.com/rust-lang/crates.io-index)",
 | 
			
		||||
 "encoding-index-korean 1.20141219.5 (registry+https://github.com/rust-lang/crates.io-index)",
 | 
			
		||||
 "encoding-index-simpchinese 1.20141219.5 (registry+https://github.com/rust-lang/crates.io-index)",
 | 
			
		||||
 "encoding-index-singlebyte 1.20141219.5 (registry+https://github.com/rust-lang/crates.io-index)",
 | 
			
		||||
 "encoding-index-tradchinese 1.20141219.5 (registry+https://github.com/rust-lang/crates.io-index)",
 | 
			
		||||
]
 | 
			
		||||
 | 
			
		||||
[[package]]
 | 
			
		||||
name = "encoding-index-japanese"
 | 
			
		||||
version = "1.20141219.5"
 | 
			
		||||
source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
			
		||||
dependencies = [
 | 
			
		||||
 "encoding_index_tests 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)",
 | 
			
		||||
]
 | 
			
		||||
 | 
			
		||||
[[package]]
 | 
			
		||||
name = "encoding-index-korean"
 | 
			
		||||
version = "1.20141219.5"
 | 
			
		||||
source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
			
		||||
dependencies = [
 | 
			
		||||
 "encoding_index_tests 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)",
 | 
			
		||||
]
 | 
			
		||||
 | 
			
		||||
[[package]]
 | 
			
		||||
name = "encoding-index-simpchinese"
 | 
			
		||||
version = "1.20141219.5"
 | 
			
		||||
source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
			
		||||
dependencies = [
 | 
			
		||||
 "encoding_index_tests 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)",
 | 
			
		||||
]
 | 
			
		||||
 | 
			
		||||
[[package]]
 | 
			
		||||
name = "encoding-index-singlebyte"
 | 
			
		||||
version = "1.20141219.5"
 | 
			
		||||
source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
			
		||||
dependencies = [
 | 
			
		||||
 "encoding_index_tests 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)",
 | 
			
		||||
]
 | 
			
		||||
 | 
			
		||||
[[package]]
 | 
			
		||||
name = "encoding-index-tradchinese"
 | 
			
		||||
version = "1.20141219.5"
 | 
			
		||||
source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
			
		||||
dependencies = [
 | 
			
		||||
 "encoding_index_tests 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)",
 | 
			
		||||
]
 | 
			
		||||
 | 
			
		||||
[[package]]
 | 
			
		||||
name = "encoding_index_tests"
 | 
			
		||||
version = "0.1.4"
 | 
			
		||||
source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
			
		||||
 | 
			
		||||
[[package]]
 | 
			
		||||
name = "error-chain"
 | 
			
		||||
version = "0.12.0"
 | 
			
		||||
@@ -841,6 +899,13 @@ dependencies = [
 | 
			
		||||
"checksum console 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)" = "ecd48adf136733979b49e15bc3b4c43cc0d3c85ece7bd08e6daa414c6fcb13e6"
 | 
			
		||||
"checksum content_inspector 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "d902e17eb0038a23c88baa0d78c75fac7968132e73f4fdb9ea77b03d2641b669"
 | 
			
		||||
"checksum directories 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "b106a38a9bf6c763c6c2e2c3332ab7635da453a68a6babca776386b3b287d338"
 | 
			
		||||
"checksum encoding 0.2.33 (registry+https://github.com/rust-lang/crates.io-index)" = "6b0d943856b990d12d3b55b359144ff341533e516d94098b1d3fc1ac666d36ec"
 | 
			
		||||
"checksum encoding-index-japanese 1.20141219.5 (registry+https://github.com/rust-lang/crates.io-index)" = "04e8b2ff42e9a05335dbf8b5c6f7567e5591d0d916ccef4e0b1710d32a0d0c91"
 | 
			
		||||
"checksum encoding-index-korean 1.20141219.5 (registry+https://github.com/rust-lang/crates.io-index)" = "4dc33fb8e6bcba213fe2f14275f0963fd16f0a02c878e3095ecfdf5bee529d81"
 | 
			
		||||
"checksum encoding-index-simpchinese 1.20141219.5 (registry+https://github.com/rust-lang/crates.io-index)" = "d87a7194909b9118fc707194baa434a4e3b0fb6a5a757c73c3adb07aa25031f7"
 | 
			
		||||
"checksum encoding-index-singlebyte 1.20141219.5 (registry+https://github.com/rust-lang/crates.io-index)" = "3351d5acffb224af9ca265f435b859c7c01537c0849754d3db3fdf2bfe2ae84a"
 | 
			
		||||
"checksum encoding-index-tradchinese 1.20141219.5 (registry+https://github.com/rust-lang/crates.io-index)" = "fd0e20d5688ce3cab59eb3ef3a2083a5c77bf496cb798dc6fcdb75f323890c18"
 | 
			
		||||
"checksum encoding_index_tests 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)" = "a246d82be1c9d791c5dfde9a2bd045fc3cbba3fa2b11ad558f27d01712f00569"
 | 
			
		||||
"checksum error-chain 0.12.0 (registry+https://github.com/rust-lang/crates.io-index)" = "07e791d3be96241c77c43846b665ef1384606da2cd2a48730abe606a12906e02"
 | 
			
		||||
"checksum flate2 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "37847f133aae7acf82bb9577ccd8bda241df836787642654286e79679826a54b"
 | 
			
		||||
"checksum fnv 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)" = "2fad85553e09a6f881f739c29f0b00b0f01357c743266d478b68951ce23285f3"
 | 
			
		||||
 
 | 
			
		||||
@@ -23,6 +23,7 @@ directories = "1.0"
 | 
			
		||||
lazy_static = "1.0"
 | 
			
		||||
wild = "2.0"
 | 
			
		||||
content_inspector = "0.2.2"
 | 
			
		||||
encoding = "0.2"
 | 
			
		||||
 | 
			
		||||
[dependencies.git2]
 | 
			
		||||
version = "0.7"
 | 
			
		||||
 
 | 
			
		||||
@@ -1,6 +1,8 @@
 | 
			
		||||
use std::fs::File;
 | 
			
		||||
use std::io::{self, BufRead, BufReader};
 | 
			
		||||
 | 
			
		||||
use content_inspector::{self, ContentType};
 | 
			
		||||
 | 
			
		||||
use errors::*;
 | 
			
		||||
 | 
			
		||||
const THEME_PREVIEW_FILE: &[u8] = include_bytes!("../assets/theme_preview.rs");
 | 
			
		||||
@@ -8,6 +10,7 @@ const THEME_PREVIEW_FILE: &[u8] = include_bytes!("../assets/theme_preview.rs");
 | 
			
		||||
pub struct InputFileReader<'a> {
 | 
			
		||||
    inner: Box<dyn BufRead + 'a>,
 | 
			
		||||
    pub first_line: Vec<u8>,
 | 
			
		||||
    pub content_type: ContentType,
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
impl<'a> InputFileReader<'a> {
 | 
			
		||||
@@ -15,18 +18,31 @@ impl<'a> InputFileReader<'a> {
 | 
			
		||||
        let mut first_line = vec![];
 | 
			
		||||
        reader.read_until(b'\n', &mut first_line).ok();
 | 
			
		||||
 | 
			
		||||
        let content_type = content_inspector::inspect(&first_line[..]);
 | 
			
		||||
 | 
			
		||||
        if content_type == ContentType::UTF_16LE {
 | 
			
		||||
            reader.read_until(0x00, &mut first_line).ok();
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        InputFileReader {
 | 
			
		||||
            inner: Box::new(reader),
 | 
			
		||||
            first_line,
 | 
			
		||||
            content_type,
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    pub fn read_line(&mut self, buf: &mut Vec<u8>) -> io::Result<bool> {
 | 
			
		||||
        if self.first_line.is_empty() {
 | 
			
		||||
            self.inner.read_until(b'\n', buf).map(|size| size > 0)
 | 
			
		||||
            let res = self.inner.read_until(b'\n', buf).map(|size| size > 0)?;
 | 
			
		||||
 | 
			
		||||
            if self.content_type == ContentType::UTF_16LE {
 | 
			
		||||
                self.inner.read_until(0x00, buf).ok();
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            Ok(res)
 | 
			
		||||
        } else {
 | 
			
		||||
            buf.append(&mut self.first_line);
 | 
			
		||||
            return Ok(true);
 | 
			
		||||
            Ok(true)
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
@@ -84,3 +100,32 @@ fn basic() {
 | 
			
		||||
    assert_eq!(false, res.unwrap());
 | 
			
		||||
    assert!(buffer.is_empty());
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#[test]
 | 
			
		||||
fn utf16le() {
 | 
			
		||||
    let content = b"\xFF\xFE\x73\x00\x0A\x00\x64\x00";
 | 
			
		||||
    let mut reader = InputFileReader::new(&content[..]);
 | 
			
		||||
 | 
			
		||||
    assert_eq!(b"\xFF\xFE\x73\x00\x0A\x00", &reader.first_line[..]);
 | 
			
		||||
 | 
			
		||||
    let mut buffer = vec![];
 | 
			
		||||
 | 
			
		||||
    let res = reader.read_line(&mut buffer);
 | 
			
		||||
    assert!(res.is_ok());
 | 
			
		||||
    assert_eq!(true, res.unwrap());
 | 
			
		||||
    assert_eq!(b"\xFF\xFE\x73\x00\x0A\x00", &buffer[..]);
 | 
			
		||||
 | 
			
		||||
    buffer.clear();
 | 
			
		||||
 | 
			
		||||
    let res = reader.read_line(&mut buffer);
 | 
			
		||||
    assert!(res.is_ok());
 | 
			
		||||
    assert_eq!(true, res.unwrap());
 | 
			
		||||
    assert_eq!(b"\x64\x00", &buffer[..]);
 | 
			
		||||
 | 
			
		||||
    buffer.clear();
 | 
			
		||||
 | 
			
		||||
    let res = reader.read_line(&mut buffer);
 | 
			
		||||
    assert!(res.is_ok());
 | 
			
		||||
    assert_eq!(false, res.unwrap());
 | 
			
		||||
    assert!(buffer.is_empty());
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
@@ -15,6 +15,7 @@ extern crate atty;
 | 
			
		||||
extern crate console;
 | 
			
		||||
extern crate content_inspector;
 | 
			
		||||
extern crate directories;
 | 
			
		||||
extern crate encoding;
 | 
			
		||||
extern crate git2;
 | 
			
		||||
extern crate syntect;
 | 
			
		||||
extern crate wild;
 | 
			
		||||
 
 | 
			
		||||
@@ -9,7 +9,10 @@ use console::AnsiCodeIterator;
 | 
			
		||||
use syntect::easy::HighlightLines;
 | 
			
		||||
use syntect::highlighting::Theme;
 | 
			
		||||
 | 
			
		||||
use content_inspector::{self, ContentType};
 | 
			
		||||
use content_inspector::ContentType;
 | 
			
		||||
 | 
			
		||||
use encoding::all::{UTF_16BE, UTF_16LE};
 | 
			
		||||
use encoding::{DecoderTrap, Encoding};
 | 
			
		||||
 | 
			
		||||
use app::Config;
 | 
			
		||||
use assets::HighlightingAssets;
 | 
			
		||||
@@ -121,12 +124,9 @@ impl<'a> InteractivePrinter<'a> {
 | 
			
		||||
            panel_width = 0;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        // Determine file content type
 | 
			
		||||
        let content_type = content_inspector::inspect(&reader.first_line[..]);
 | 
			
		||||
 | 
			
		||||
        let mut line_changes = None;
 | 
			
		||||
 | 
			
		||||
        let highlighter = if content_type.is_binary() {
 | 
			
		||||
        let highlighter = if reader.content_type.is_binary() {
 | 
			
		||||
            None
 | 
			
		||||
        } else {
 | 
			
		||||
            // Get the Git modifications
 | 
			
		||||
@@ -149,8 +149,8 @@ impl<'a> InteractivePrinter<'a> {
 | 
			
		||||
            colors,
 | 
			
		||||
            config,
 | 
			
		||||
            decorations,
 | 
			
		||||
            content_type: reader.content_type,
 | 
			
		||||
            ansi_prefix_sgr: String::new(),
 | 
			
		||||
            content_type,
 | 
			
		||||
            line_changes,
 | 
			
		||||
            highlighter,
 | 
			
		||||
        }
 | 
			
		||||
@@ -207,10 +207,11 @@ impl<'a> Printer for InteractivePrinter<'a> {
 | 
			
		||||
            _ => ("", "STDIN"),
 | 
			
		||||
        };
 | 
			
		||||
 | 
			
		||||
        let mode = if self.content_type.is_binary() {
 | 
			
		||||
            "   <BINARY>"
 | 
			
		||||
        } else {
 | 
			
		||||
            ""
 | 
			
		||||
        let mode = match self.content_type {
 | 
			
		||||
            ContentType::BINARY => "   <BINARY>",
 | 
			
		||||
            ContentType::UTF_16LE => "   <UTF-16LE>",
 | 
			
		||||
            ContentType::UTF_16BE => "   <UTF-16BE>",
 | 
			
		||||
            _ => ""
 | 
			
		||||
        };
 | 
			
		||||
 | 
			
		||||
        writeln!(
 | 
			
		||||
@@ -247,7 +248,18 @@ impl<'a> Printer for InteractivePrinter<'a> {
 | 
			
		||||
        line_number: usize,
 | 
			
		||||
        line_buffer: &[u8],
 | 
			
		||||
    ) -> Result<()> {
 | 
			
		||||
        let line = String::from_utf8_lossy(&line_buffer).to_string();
 | 
			
		||||
        let line = match self.content_type {
 | 
			
		||||
            ContentType::BINARY => {
 | 
			
		||||
                return Ok(());
 | 
			
		||||
            }
 | 
			
		||||
            ContentType::UTF_16LE => UTF_16LE
 | 
			
		||||
                .decode(&line_buffer, DecoderTrap::Strict)
 | 
			
		||||
                .unwrap_or("Invalid UTF-16LE".into()),
 | 
			
		||||
            ContentType::UTF_16BE => UTF_16BE
 | 
			
		||||
                .decode(&line_buffer, DecoderTrap::Strict)
 | 
			
		||||
                .unwrap_or("Invalid UTF-16BE".into()),
 | 
			
		||||
            _ => String::from_utf8_lossy(&line_buffer).to_string(),
 | 
			
		||||
        };
 | 
			
		||||
        let regions = {
 | 
			
		||||
            let highlighter = match self.highlighter {
 | 
			
		||||
                Some(ref mut highlighter) => highlighter,
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user