1
0
mirror of https://github.com/sharkdp/bat.git synced 2025-09-01 19:02:22 +01:00

Correctly handle CSV files with a single separator throughout

better auto-detection of CSV delimiter
- files with a tsv extension are automatically detected as tab delimited
- other files parsed as CSV go through the following steps:
  - if the first line contains at least 3 of the same separator, it uses that separator as a delimiter
  - if the first line contains only one supported separator character, it uses that separator as a delimiter
  - otherwise it falls back to treating all supported delimiters as the delimiter

 supported delimiters, in precedence order:
 - comma `,`
 - semi-colon `;`
 - tab `\t`
 - pipe `|`
This commit is contained in:
Keith Hall
2021-12-13 23:29:44 +02:00
committed by Keith Hall
parent ac40f7cfd8
commit 512bfde7ce
14 changed files with 419 additions and 38 deletions

View File

@@ -0,0 +1,113 @@
%YAML 1.2
---
# See http://www.sublimetext.com/docs/3/syntax.html
name: Separated Values
file_extensions:
- csv
scope: text.csv
variables:
field_separator_chars: ',;\t|'
field_separator: (?:[{{field_separator_chars}}])
record_separator: (?:$\n?)
contexts:
main:
- meta_include_prototype: false
- include: three_field_separators
- include: single_separator_type_on_line
- match: '^'
push: unknown-separated-main
three_field_separators:
- match: ^(?=(?:[^,]*,){3})
set: scope:text.csv.comma
- match: ^(?=(?:[^;]*;){3})
set: scope:text.csv.semi-colon
- match: ^(?=(?:[^\t]*\t){3})
set: scope:text.csv.tab
- match: ^(?=(?:[^|]*\|){3})
set: scope:text.csv.pipe
single_separator_type_on_line:
- match: ^(?=[^{{field_separator_chars}}]*,[^;\t|]*$)
set: scope:text.csv.comma
- match: ^(?=[^{{field_separator_chars}}]*;[^,\t|]*$)
set: scope:text.csv.semi-colon
- match: ^(?=[^{{field_separator_chars}}]*\t[^,;|]*$)
set: scope:text.csv.tab
- match: ^(?=[^{{field_separator_chars}}]*\|[^,;\t]*$)
set: scope:text.csv.pipe
unknown-separated-main:
- include: record_separator
- match: ''
push:
- field_or_record_separator
- field5
- field_or_record_separator
- field4
- field_or_record_separator
- field3
- field_or_record_separator
- field2
- field_or_record_separator
- field1
record_separator_pop:
- match: (?={{record_separator}})
pop: true
record_separator:
- meta_include_prototype: false
- match: '{{record_separator}}'
scope: punctuation.terminator.record.csv
field_or_record_separator:
- meta_include_prototype: false
- include: record_separator_pop
- match: '{{field_separator}}'
scope: punctuation.separator.sequence.csv
pop: true
field_contents:
- match: '"'
scope: punctuation.definition.string.begin.csv
push: double_quoted_string
- include: record_separator_pop
- match: (?={{field_separator}})
pop: true
double_quoted_string:
- meta_include_prototype: false
- meta_scope: string.quoted.double.csv
- match: '""'
scope: constant.character.escape.csv
- match: '"'
scope: punctuation.definition.string.end.csv
pop: true
field1:
- match: ''
set:
- meta_content_scope: meta.field-1.csv variable.parameter
- include: field_contents
field2:
- match: ''
set:
- meta_content_scope: meta.field-2.csv support.function
- include: field_contents
field3:
- match: ''
set:
- meta_content_scope: meta.field-3.csv constant.numeric
- include: field_contents
field4:
- match: ''
set:
- meta_content_scope: meta.field-4.csv keyword.operator
- include: field_contents
field5:
- match: ''
set:
- meta_content_scope: meta.field-5.csv string.unquoted
- include: field_contents