1
0
mirror of https://github.com/sharkdp/bat.git synced 2025-02-19 03:19:04 +00:00

Correctly handle CSV files with a single separator throughout

better auto-detection of CSV delimiter
- files with a tsv extension are automatically detected as tab delimited
- other files parsed as CSV go through the following steps:
  - if the first line contains at least 3 of the same separator, it uses that separator as a delimiter
  - if the first line contains only one supported separator character, it uses that separator as a delimiter
  - otherwise it falls back to treating all supported delimiters as the delimiter

 supported delimiters, in precedence order:
 - comma `,`
 - semi-colon `;`
 - tab `\t`
 - pipe `|`
This commit is contained in:
Keith Hall 2021-12-13 23:29:44 +02:00 committed by Keith Hall
parent ac40f7cfd8
commit 512bfde7ce
14 changed files with 419 additions and 38 deletions

View File

@ -2,20 +2,21 @@
---
# See http://www.sublimetext.com/docs/3/syntax.html
name: Comma Separated Values
file_extensions:
- csv
- tsv
scope: text.csv
scope: text.csv.comma
variables:
field_separator: (?:[,;|\t])
field_separator: (?:,)
record_separator: (?:$\n?)
contexts:
prototype:
- match: (?={{record_separator}})
pop: true
main:
- match: '^'
push: fields
fields:
- match: ""
- include: record_separator
- match: ''
push:
- field_or_record_separator
- field5
- field_or_record_separator
- field4
- field_or_record_separator
@ -24,54 +25,55 @@ contexts:
- field2
- field_or_record_separator
- field1
main:
record_separator_pop:
- match: (?={{record_separator}})
pop: true
record_separator:
- meta_include_prototype: false
- match: "^"
set: fields
- match: '{{record_separator}}'
scope: punctuation.terminator.record.csv
pop: true
field_or_record_separator:
- meta_include_prototype: false
- match: "{{record_separator}}"
scope: punctuation.terminator.record.csv
pop: true
- match: "{{field_separator}}"
- include: record_separator_pop
- match: '{{field_separator}}'
scope: punctuation.separator.sequence.csv
pop: true
field_contents:
- match: '"'
scope: punctuation.definition.string.begin.csv
push: double_quoted_string
push: scope:text.csv#double_quoted_string
- match: (?={{field_separator}}|{{record_separator}})
pop: true
double_quoted_string:
- meta_include_prototype: false
- meta_scope: string.quoted.double.csv
- match: '""'
scope: constant.character.escape.csv
- match: '"'
scope: punctuation.definition.string.end.csv
- include: record_separator_pop
- match: (?={{field_separator}})
pop: true
field1:
- match: ""
- match: ''
set:
- meta_content_scope: meta.field-1.csv support.type
- meta_content_scope: meta.field-1.csv variable.parameter
- include: field_contents
field2:
- match: ""
- match: ''
set:
- meta_content_scope: meta.field-2.csv support.function
- include: field_contents
field3:
- match: ""
- match: ''
set:
- meta_content_scope: meta.field-3.csv constant.numeric
- include: field_contents
field4:
- match: ""
- match: ''
set:
- meta_content_scope: meta.field-4.csv keyword.operator
- include: field_contents
field5:
- match: ''
set:
- meta_content_scope: meta.field-5.csv string.unquoted
- include: field_contents

View File

@ -0,0 +1,80 @@
%YAML 1.2
---
# See http://www.sublimetext.com/docs/3/syntax.html
name: Pipe Separated Values
scope: text.csv.pipe
variables:
field_separator: (?:\|)
record_separator: (?:$\n?)
contexts:
main:
- match: '^'
push: fields
fields:
- include: record_separator
- match: ''
push:
- field_or_record_separator
- field5
- field_or_record_separator
- field4
- field_or_record_separator
- field3
- field_or_record_separator
- field2
- field_or_record_separator
- field1
record_separator_pop:
- match: (?={{record_separator}})
pop: true
record_separator:
- meta_include_prototype: false
- match: '{{record_separator}}'
scope: punctuation.terminator.record.csv
pop: true
field_or_record_separator:
- meta_include_prototype: false
- include: record_separator_pop
- match: '{{field_separator}}'
scope: punctuation.separator.sequence.csv
pop: true
field_contents:
- match: '"'
scope: punctuation.definition.string.begin.csv
push: scope:text.csv#double_quoted_string
- include: record_separator_pop
- match: (?={{field_separator}})
pop: true
field1:
- match: ''
set:
- meta_content_scope: meta.field-1.csv variable.parameter
- include: field_contents
field2:
- match: ''
set:
- meta_content_scope: meta.field-2.csv support.function
- include: field_contents
field3:
- match: ''
set:
- meta_content_scope: meta.field-3.csv constant.numeric
- include: field_contents
field4:
- match: ''
set:
- meta_content_scope: meta.field-4.csv keyword.operator
- include: field_contents
field5:
- match: ''
set:
- meta_content_scope: meta.field-5.csv string.unquoted
- include: field_contents

View File

@ -0,0 +1,79 @@
%YAML 1.2
---
# See http://www.sublimetext.com/docs/3/syntax.html
name: Semi-Colon Separated Values
scope: text.csv.semi-colon
variables:
field_separator: (?:;)
record_separator: (?:$\n?)
contexts:
main:
- match: '^'
push: fields
fields:
- include: record_separator
- match: ''
push:
- field_or_record_separator
- field5
- field_or_record_separator
- field4
- field_or_record_separator
- field3
- field_or_record_separator
- field2
- field_or_record_separator
- field1
record_separator_pop:
- match: (?={{record_separator}})
pop: true
record_separator:
- meta_include_prototype: false
- match: '{{record_separator}}'
scope: punctuation.terminator.record.csv
pop: true
field_or_record_separator:
- meta_include_prototype: false
- include: record_separator_pop
- match: '{{field_separator}}'
scope: punctuation.separator.sequence.csv
pop: true
field_contents:
- match: '"'
scope: punctuation.definition.string.begin.csv
push: scope:text.csv#double_quoted_string
- include: record_separator_pop
- match: (?={{field_separator}})
pop: true
field1:
- match: ''
set:
- meta_content_scope: meta.field-1.csv variable.parameter
- include: field_contents
field2:
- match: ''
set:
- meta_content_scope: meta.field-2.csv support.function
- include: field_contents
field3:
- match: ''
set:
- meta_content_scope: meta.field-3.csv constant.numeric
- include: field_contents
field4:
- match: ''
set:
- meta_content_scope: meta.field-4.csv keyword.operator
- include: field_contents
field5:
- match: ''
set:
- meta_content_scope: meta.field-5.csv string.unquoted
- include: field_contents

View File

@ -0,0 +1,113 @@
%YAML 1.2
---
# See http://www.sublimetext.com/docs/3/syntax.html
name: Separated Values
file_extensions:
- csv
scope: text.csv
variables:
field_separator_chars: ',;\t|'
field_separator: (?:[{{field_separator_chars}}])
record_separator: (?:$\n?)
contexts:
main:
- meta_include_prototype: false
- include: three_field_separators
- include: single_separator_type_on_line
- match: '^'
push: unknown-separated-main
three_field_separators:
- match: ^(?=(?:[^,]*,){3})
set: scope:text.csv.comma
- match: ^(?=(?:[^;]*;){3})
set: scope:text.csv.semi-colon
- match: ^(?=(?:[^\t]*\t){3})
set: scope:text.csv.tab
- match: ^(?=(?:[^|]*\|){3})
set: scope:text.csv.pipe
single_separator_type_on_line:
- match: ^(?=[^{{field_separator_chars}}]*,[^;\t|]*$)
set: scope:text.csv.comma
- match: ^(?=[^{{field_separator_chars}}]*;[^,\t|]*$)
set: scope:text.csv.semi-colon
- match: ^(?=[^{{field_separator_chars}}]*\t[^,;|]*$)
set: scope:text.csv.tab
- match: ^(?=[^{{field_separator_chars}}]*\|[^,;\t]*$)
set: scope:text.csv.pipe
unknown-separated-main:
- include: record_separator
- match: ''
push:
- field_or_record_separator
- field5
- field_or_record_separator
- field4
- field_or_record_separator
- field3
- field_or_record_separator
- field2
- field_or_record_separator
- field1
record_separator_pop:
- match: (?={{record_separator}})
pop: true
record_separator:
- meta_include_prototype: false
- match: '{{record_separator}}'
scope: punctuation.terminator.record.csv
field_or_record_separator:
- meta_include_prototype: false
- include: record_separator_pop
- match: '{{field_separator}}'
scope: punctuation.separator.sequence.csv
pop: true
field_contents:
- match: '"'
scope: punctuation.definition.string.begin.csv
push: double_quoted_string
- include: record_separator_pop
- match: (?={{field_separator}})
pop: true
double_quoted_string:
- meta_include_prototype: false
- meta_scope: string.quoted.double.csv
- match: '""'
scope: constant.character.escape.csv
- match: '"'
scope: punctuation.definition.string.end.csv
pop: true
field1:
- match: ''
set:
- meta_content_scope: meta.field-1.csv variable.parameter
- include: field_contents
field2:
- match: ''
set:
- meta_content_scope: meta.field-2.csv support.function
- include: field_contents
field3:
- match: ''
set:
- meta_content_scope: meta.field-3.csv constant.numeric
- include: field_contents
field4:
- match: ''
set:
- meta_content_scope: meta.field-4.csv keyword.operator
- include: field_contents
field5:
- match: ''
set:
- meta_content_scope: meta.field-5.csv string.unquoted
- include: field_contents

View File

@ -0,0 +1,83 @@
%YAML 1.2
---
# See http://www.sublimetext.com/docs/3/syntax.html
name: Tab Separated Values
scope: text.csv.tab
file_extensions:
- tsv
variables:
field_separator: (?:\t)
record_separator: (?:$\n?)
contexts:
main:
- match: '^'
push: fields
fields:
- include: record_separator
- match: ''
push:
- field_or_record_separator
- field5
- field_or_record_separator
- field4
- field_or_record_separator
- field3
- field_or_record_separator
- field2
- field_or_record_separator
- field1
record_separator_pop:
- match: (?={{record_separator}})
pop: true
record_separator:
- meta_include_prototype: false
- match: '{{record_separator}}'
scope: punctuation.terminator.record.csv
pop: true
field_or_record_separator:
- meta_include_prototype: false
- include: record_separator_pop
- match: '{{field_separator}}'
scope: punctuation.separator.sequence.csv
pop: true
field_contents:
- match: '"'
scope: punctuation.definition.string.begin.csv
push: scope:text.csv#double_quoted_string
- include: record_separator_pop
- match: (?={{field_separator}})
pop: true
field1:
- match: ''
set:
- meta_content_scope: meta.field-1.csv variable.parameter
- include: field_contents
field2:
- match: ''
set:
- meta_content_scope: meta.field-2.csv support.function
- include: field_contents
field3:
- match: ''
set:
- meta_content_scope: meta.field-3.csv constant.numeric
- include: field_contents
field4:
- match: ''
set:
- meta_content_scope: meta.field-4.csv keyword.operator
- include: field_contents
field5:
- match: ''
set:
- meta_content_scope: meta.field-5.csv string.unquoted
- include: field_contents

View File

@ -0,0 +1,3 @@
foo,bar,baz,this|that,test,colors,cycle
1.2,1.7,2.5,blah;cool,test,colors,cycle
1 foo bar baz this|that test colors cycle
2 1.2 1.7 2.5 blah;cool test colors cycle

View File

@ -1,7 +1,7 @@
first,last,address,city,zip
John,Doe,120 any st.,"Anytown, WW",08123
a,b
1,"ha 
first,last,address,city,zip
John,Doe,120 any st.,"Anytown, WW",08123
a,b
1,"ha 
""ha"" 
ha",120 any st.,"Anytown, WW",08123
3,4,120 any st.,"Anytown, WW",08123
ha",120 any st.,"Anytown, WW",08123
3,4,120 any st.,"Anytown, WW",08123

Can't render this file because it contains an unexpected character in line 2 and column 177.

View File

@ -0,0 +1,3 @@
foo|bar|baz
1,2|1,7|2,7
1,5|8,5|-5,5
1 [3 38 2 253 151 31mfoo[38 2 253 151 31m|[38 2 102 217 239mbar[38 2 253 151 31m|[38 2 190 132 255mbaz
2 [3 38 2 253 151 31m1,2[38 2 253 151 31m|[38 2 102 217 239m1,7[38 2 253 151 31m|[38 2 190 132 255m2,7
3 [3 38 2 253 151 31m1,5[38 2 253 151 31m|[38 2 102 217 239m8,5[38 2 253 151 31m|[38 2 190 132 255m-5,5

View File

@ -0,0 +1,3 @@
foo;bar;baz
1,2;1,7;2,7
1,5;8,5;-5,5
1 [3 38 2 253 151 31mfoo[38 2 253 151 31m [38 2 102 217 239mbar[38 2 253 151 31m [38 2 190 132 255mbaz
2 [3 38 2 253 151 31m1,2[38 2 253 151 31m [38 2 102 217 239m1,7[38 2 253 151 31m [38 2 190 132 255m2,7
3 [3 38 2 253 151 31m1,5[38 2 253 151 31m [38 2 102 217 239m8,5[38 2 253 151 31m [38 2 190 132 255m-5,5

View File

@ -0,0 +1,3 @@
foo bar baz|;, test hello world tsv
1,2 1,7 2,7 a b c "hello again" tsv
";|," ;|, baz test "hello world" tsv
Can't render this file because it contains an unexpected character in line 2 and column 218.

View File

@ -0,0 +1,3 @@
foo,bar,baz,this|that,test,colors,cycle
1.2,1.7,2.5,blah;cool,test,colors,cycle
1 foo bar baz this|that test colors cycle
2 1.2 1.7 2.5 blah;cool test colors cycle

View File

@ -0,0 +1,3 @@
foo|bar|baz
1,2|1,7|2,7
1,5|8,5|-5,5
1 foo bar baz
2 1,2 1,7 2,7
3 1,5 8,5 -5,5

View File

@ -0,0 +1,3 @@
foo;bar;baz
1,2;1,7;2,7
1,5;8,5;-5,5
1 foo bar baz
2 1,2 1,7 2,7
3 1,5 8,5 -5,5

View File

@ -0,0 +1,3 @@
foo bar baz|;, test hello world tsv
1,2 1,7 2,7 a b c "hello again" tsv
";|," ;|, baz test "hello world" tsv
1 foo bar baz|;, test hello world tsv
2 1,2 1,7 2,7 a b c hello again tsv
3 ;|, ;|, baz test hello world tsv