From 512bfde7ce1c455a55d709aa8a46de103f97a782 Mon Sep 17 00:00:00 2001 From: Keith Hall Date: Mon, 13 Dec 2021 23:29:44 +0200 Subject: [PATCH 1/2] Correctly handle CSV files with a single separator throughout better auto-detection of CSV delimiter - files with a tsv extension are automatically detected as tab delimited - other files parsed as CSV go through the following steps: - if the first line contains at least 3 of the same separator, it uses that separator as a delimiter - if the first line contains only one supported separator character, it uses that separator as a delimiter - otherwise it falls back to treating all supported delimiters as the delimiter supported delimiters, in precedence order: - comma `,` - semi-colon `;` - tab `\t` - pipe `|` --- .../CSV-comma.sublime-syntax} | 66 +++++----- .../02_Extra/CSV/CSV-pipe.sublime-syntax | 80 +++++++++++++ .../CSV/CSV-semi-colon.sublime-syntax | 79 ++++++++++++ .../syntaxes/02_Extra/CSV/CSV.sublime-syntax | 113 ++++++++++++++++++ .../syntaxes/02_Extra/CSV/TSV.sublime-syntax | 83 +++++++++++++ .../highlighted/CSV/comma-delimited.csv | 3 + .../highlighted/CSV/comma_in_quotes.csv | 12 +- ...als_comma_decimal_point_pipe_delimited.csv | 3 + ...omma_decimal_point_semicolon_delimited.csv | 3 + tests/syntax-tests/highlighted/CSV/simple.tsv | 3 + .../source/CSV/comma-delimited.csv | 3 + ...als_comma_decimal_point_pipe_delimited.csv | 3 + ...omma_decimal_point_semicolon_delimited.csv | 3 + tests/syntax-tests/source/CSV/simple.tsv | 3 + 14 files changed, 419 insertions(+), 38 deletions(-) rename assets/syntaxes/02_Extra/{CSV.sublime-syntax => CSV/CSV-comma.sublime-syntax} (62%) create mode 100644 assets/syntaxes/02_Extra/CSV/CSV-pipe.sublime-syntax create mode 100644 assets/syntaxes/02_Extra/CSV/CSV-semi-colon.sublime-syntax create mode 100644 assets/syntaxes/02_Extra/CSV/CSV.sublime-syntax create mode 100644 assets/syntaxes/02_Extra/CSV/TSV.sublime-syntax create mode 100644 tests/syntax-tests/highlighted/CSV/comma-delimited.csv create mode 100644 tests/syntax-tests/highlighted/CSV/decimals_comma_decimal_point_pipe_delimited.csv create mode 100644 tests/syntax-tests/highlighted/CSV/decimals_comma_decimal_point_semicolon_delimited.csv create mode 100644 tests/syntax-tests/highlighted/CSV/simple.tsv create mode 100644 tests/syntax-tests/source/CSV/comma-delimited.csv create mode 100644 tests/syntax-tests/source/CSV/decimals_comma_decimal_point_pipe_delimited.csv create mode 100644 tests/syntax-tests/source/CSV/decimals_comma_decimal_point_semicolon_delimited.csv create mode 100644 tests/syntax-tests/source/CSV/simple.tsv diff --git a/assets/syntaxes/02_Extra/CSV.sublime-syntax b/assets/syntaxes/02_Extra/CSV/CSV-comma.sublime-syntax similarity index 62% rename from assets/syntaxes/02_Extra/CSV.sublime-syntax rename to assets/syntaxes/02_Extra/CSV/CSV-comma.sublime-syntax index 0ad17834..f23751ac 100644 --- a/assets/syntaxes/02_Extra/CSV.sublime-syntax +++ b/assets/syntaxes/02_Extra/CSV/CSV-comma.sublime-syntax @@ -2,20 +2,21 @@ --- # See http://www.sublimetext.com/docs/3/syntax.html name: Comma Separated Values -file_extensions: - - csv - - tsv -scope: text.csv +scope: text.csv.comma variables: - field_separator: (?:[,;|\t]) + field_separator: (?:,) record_separator: (?:$\n?) contexts: - prototype: - - match: (?={{record_separator}}) - pop: true + main: + - match: '^' + push: fields + fields: - - match: "" + - include: record_separator + - match: '' push: + - field_or_record_separator + - field5 - field_or_record_separator - field4 - field_or_record_separator @@ -24,54 +25,55 @@ contexts: - field2 - field_or_record_separator - field1 - main: + + record_separator_pop: + - match: (?={{record_separator}}) + pop: true + + record_separator: - meta_include_prototype: false - - match: "^" - set: fields + - match: '{{record_separator}}' + scope: punctuation.terminator.record.csv + pop: true field_or_record_separator: - meta_include_prototype: false - - match: "{{record_separator}}" - scope: punctuation.terminator.record.csv - pop: true - - match: "{{field_separator}}" + - include: record_separator_pop + - match: '{{field_separator}}' scope: punctuation.separator.sequence.csv pop: true field_contents: - match: '"' scope: punctuation.definition.string.begin.csv - push: double_quoted_string + push: scope:text.csv#double_quoted_string - - match: (?={{field_separator}}|{{record_separator}}) - pop: true - - double_quoted_string: - - meta_include_prototype: false - - meta_scope: string.quoted.double.csv - - match: '""' - scope: constant.character.escape.csv - - match: '"' - scope: punctuation.definition.string.end.csv + - include: record_separator_pop + - match: (?={{field_separator}}) pop: true field1: - - match: "" + - match: '' set: - - meta_content_scope: meta.field-1.csv support.type + - meta_content_scope: meta.field-1.csv variable.parameter - include: field_contents field2: - - match: "" + - match: '' set: - meta_content_scope: meta.field-2.csv support.function - include: field_contents field3: - - match: "" + - match: '' set: - meta_content_scope: meta.field-3.csv constant.numeric - include: field_contents field4: - - match: "" + - match: '' set: - meta_content_scope: meta.field-4.csv keyword.operator - include: field_contents + field5: + - match: '' + set: + - meta_content_scope: meta.field-5.csv string.unquoted + - include: field_contents diff --git a/assets/syntaxes/02_Extra/CSV/CSV-pipe.sublime-syntax b/assets/syntaxes/02_Extra/CSV/CSV-pipe.sublime-syntax new file mode 100644 index 00000000..52103fde --- /dev/null +++ b/assets/syntaxes/02_Extra/CSV/CSV-pipe.sublime-syntax @@ -0,0 +1,80 @@ +%YAML 1.2 +--- +# See http://www.sublimetext.com/docs/3/syntax.html +name: Pipe Separated Values +scope: text.csv.pipe +variables: + field_separator: (?:\|) + record_separator: (?:$\n?) + +contexts: + main: + - match: '^' + push: fields + + fields: + - include: record_separator + - match: '' + push: + - field_or_record_separator + - field5 + - field_or_record_separator + - field4 + - field_or_record_separator + - field3 + - field_or_record_separator + - field2 + - field_or_record_separator + - field1 + + record_separator_pop: + - match: (?={{record_separator}}) + pop: true + + record_separator: + - meta_include_prototype: false + - match: '{{record_separator}}' + scope: punctuation.terminator.record.csv + pop: true + + field_or_record_separator: + - meta_include_prototype: false + - include: record_separator_pop + - match: '{{field_separator}}' + scope: punctuation.separator.sequence.csv + pop: true + + field_contents: + - match: '"' + scope: punctuation.definition.string.begin.csv + push: scope:text.csv#double_quoted_string + + - include: record_separator_pop + - match: (?={{field_separator}}) + pop: true + + field1: + - match: '' + set: + - meta_content_scope: meta.field-1.csv variable.parameter + - include: field_contents + field2: + - match: '' + set: + - meta_content_scope: meta.field-2.csv support.function + - include: field_contents + field3: + - match: '' + set: + - meta_content_scope: meta.field-3.csv constant.numeric + - include: field_contents + field4: + - match: '' + set: + - meta_content_scope: meta.field-4.csv keyword.operator + - include: field_contents + field5: + - match: '' + set: + - meta_content_scope: meta.field-5.csv string.unquoted + - include: field_contents diff --git a/assets/syntaxes/02_Extra/CSV/CSV-semi-colon.sublime-syntax b/assets/syntaxes/02_Extra/CSV/CSV-semi-colon.sublime-syntax new file mode 100644 index 00000000..e0547ce0 --- /dev/null +++ b/assets/syntaxes/02_Extra/CSV/CSV-semi-colon.sublime-syntax @@ -0,0 +1,79 @@ +%YAML 1.2 +--- +# See http://www.sublimetext.com/docs/3/syntax.html +name: Semi-Colon Separated Values +scope: text.csv.semi-colon +variables: + field_separator: (?:;) + record_separator: (?:$\n?) +contexts: + main: + - match: '^' + push: fields + + fields: + - include: record_separator + - match: '' + push: + - field_or_record_separator + - field5 + - field_or_record_separator + - field4 + - field_or_record_separator + - field3 + - field_or_record_separator + - field2 + - field_or_record_separator + - field1 + + record_separator_pop: + - match: (?={{record_separator}}) + pop: true + + record_separator: + - meta_include_prototype: false + - match: '{{record_separator}}' + scope: punctuation.terminator.record.csv + pop: true + + field_or_record_separator: + - meta_include_prototype: false + - include: record_separator_pop + - match: '{{field_separator}}' + scope: punctuation.separator.sequence.csv + pop: true + + field_contents: + - match: '"' + scope: punctuation.definition.string.begin.csv + push: scope:text.csv#double_quoted_string + + - include: record_separator_pop + - match: (?={{field_separator}}) + pop: true + + field1: + - match: '' + set: + - meta_content_scope: meta.field-1.csv variable.parameter + - include: field_contents + field2: + - match: '' + set: + - meta_content_scope: meta.field-2.csv support.function + - include: field_contents + field3: + - match: '' + set: + - meta_content_scope: meta.field-3.csv constant.numeric + - include: field_contents + field4: + - match: '' + set: + - meta_content_scope: meta.field-4.csv keyword.operator + - include: field_contents + field5: + - match: '' + set: + - meta_content_scope: meta.field-5.csv string.unquoted + - include: field_contents diff --git a/assets/syntaxes/02_Extra/CSV/CSV.sublime-syntax b/assets/syntaxes/02_Extra/CSV/CSV.sublime-syntax new file mode 100644 index 00000000..8ce83acb --- /dev/null +++ b/assets/syntaxes/02_Extra/CSV/CSV.sublime-syntax @@ -0,0 +1,113 @@ +%YAML 1.2 +--- +# See http://www.sublimetext.com/docs/3/syntax.html +name: Separated Values +file_extensions: + - csv +scope: text.csv +variables: + field_separator_chars: ',;\t|' + field_separator: (?:[{{field_separator_chars}}]) + record_separator: (?:$\n?) +contexts: + main: + - meta_include_prototype: false + - include: three_field_separators + - include: single_separator_type_on_line + - match: '^' + push: unknown-separated-main + + three_field_separators: + - match: ^(?=(?:[^,]*,){3}) + set: scope:text.csv.comma + - match: ^(?=(?:[^;]*;){3}) + set: scope:text.csv.semi-colon + - match: ^(?=(?:[^\t]*\t){3}) + set: scope:text.csv.tab + - match: ^(?=(?:[^|]*\|){3}) + set: scope:text.csv.pipe + + single_separator_type_on_line: + - match: ^(?=[^{{field_separator_chars}}]*,[^;\t|]*$) + set: scope:text.csv.comma + - match: ^(?=[^{{field_separator_chars}}]*;[^,\t|]*$) + set: scope:text.csv.semi-colon + - match: ^(?=[^{{field_separator_chars}}]*\t[^,;|]*$) + set: scope:text.csv.tab + - match: ^(?=[^{{field_separator_chars}}]*\|[^,;\t]*$) + set: scope:text.csv.pipe + + unknown-separated-main: + - include: record_separator + - match: '' + push: + - field_or_record_separator + - field5 + - field_or_record_separator + - field4 + - field_or_record_separator + - field3 + - field_or_record_separator + - field2 + - field_or_record_separator + - field1 + + record_separator_pop: + - match: (?={{record_separator}}) + pop: true + + record_separator: + - meta_include_prototype: false + - match: '{{record_separator}}' + scope: punctuation.terminator.record.csv + + field_or_record_separator: + - meta_include_prototype: false + - include: record_separator_pop + - match: '{{field_separator}}' + scope: punctuation.separator.sequence.csv + pop: true + + field_contents: + - match: '"' + scope: punctuation.definition.string.begin.csv + push: double_quoted_string + + - include: record_separator_pop + - match: (?={{field_separator}}) + pop: true + + double_quoted_string: + - meta_include_prototype: false + - meta_scope: string.quoted.double.csv + - match: '""' + scope: constant.character.escape.csv + - match: '"' + scope: punctuation.definition.string.end.csv + pop: true + + field1: + - match: '' + set: + - meta_content_scope: meta.field-1.csv variable.parameter + - include: field_contents + field2: + - match: '' + set: + - meta_content_scope: meta.field-2.csv support.function + - include: field_contents + field3: + - match: '' + set: + - meta_content_scope: meta.field-3.csv constant.numeric + - include: field_contents + field4: + - match: '' + set: + - meta_content_scope: meta.field-4.csv keyword.operator + - include: field_contents + field5: + - match: '' + set: + - meta_content_scope: meta.field-5.csv string.unquoted + - include: field_contents diff --git a/assets/syntaxes/02_Extra/CSV/TSV.sublime-syntax b/assets/syntaxes/02_Extra/CSV/TSV.sublime-syntax new file mode 100644 index 00000000..fdca0c31 --- /dev/null +++ b/assets/syntaxes/02_Extra/CSV/TSV.sublime-syntax @@ -0,0 +1,83 @@ +%YAML 1.2 +--- +# See http://www.sublimetext.com/docs/3/syntax.html +name: Tab Separated Values +scope: text.csv.tab +file_extensions: + - tsv + +variables: + field_separator: (?:\t) + record_separator: (?:$\n?) + +contexts: + main: + - match: '^' + push: fields + + fields: + - include: record_separator + - match: '' + push: + - field_or_record_separator + - field5 + - field_or_record_separator + - field4 + - field_or_record_separator + - field3 + - field_or_record_separator + - field2 + - field_or_record_separator + - field1 + + record_separator_pop: + - match: (?={{record_separator}}) + pop: true + + record_separator: + - meta_include_prototype: false + - match: '{{record_separator}}' + scope: punctuation.terminator.record.csv + pop: true + + field_or_record_separator: + - meta_include_prototype: false + - include: record_separator_pop + - match: '{{field_separator}}' + scope: punctuation.separator.sequence.csv + pop: true + + field_contents: + - match: '"' + scope: punctuation.definition.string.begin.csv + push: scope:text.csv#double_quoted_string + + - include: record_separator_pop + - match: (?={{field_separator}}) + pop: true + + field1: + - match: '' + set: + - meta_content_scope: meta.field-1.csv variable.parameter + - include: field_contents + field2: + - match: '' + set: + - meta_content_scope: meta.field-2.csv support.function + - include: field_contents + field3: + - match: '' + set: + - meta_content_scope: meta.field-3.csv constant.numeric + - include: field_contents + field4: + - match: '' + set: + - meta_content_scope: meta.field-4.csv keyword.operator + - include: field_contents + field5: + - match: '' + set: + - meta_content_scope: meta.field-5.csv string.unquoted + - include: field_contents diff --git a/tests/syntax-tests/highlighted/CSV/comma-delimited.csv b/tests/syntax-tests/highlighted/CSV/comma-delimited.csv new file mode 100644 index 00000000..05285a7c --- /dev/null +++ b/tests/syntax-tests/highlighted/CSV/comma-delimited.csv @@ -0,0 +1,3 @@ +foo,bar,baz,this|that,test,colors,cycle +1.2,1.7,2.5,blah;cool,test,colors,cycle + diff --git a/tests/syntax-tests/highlighted/CSV/comma_in_quotes.csv b/tests/syntax-tests/highlighted/CSV/comma_in_quotes.csv index 9bd0da1a..8f4004a0 100644 --- a/tests/syntax-tests/highlighted/CSV/comma_in_quotes.csv +++ b/tests/syntax-tests/highlighted/CSV/comma_in_quotes.csv @@ -1,7 +1,7 @@ -first,last,address,city,zip -John,Doe,120 any st.,"Anytown, WW",08123 -a,b -1,"ha  +first,last,address,city,zip +John,Doe,120 any st.,"Anytown, WW",08123 +a,b +1,"ha  ""ha""  -ha",120 any st.,"Anytown, WW",08123 -3,4,120 any st.,"Anytown, WW",08123 +ha",120 any st.,"Anytown, WW",08123 +3,4,120 any st.,"Anytown, WW",08123 diff --git a/tests/syntax-tests/highlighted/CSV/decimals_comma_decimal_point_pipe_delimited.csv b/tests/syntax-tests/highlighted/CSV/decimals_comma_decimal_point_pipe_delimited.csv new file mode 100644 index 00000000..843264e6 --- /dev/null +++ b/tests/syntax-tests/highlighted/CSV/decimals_comma_decimal_point_pipe_delimited.csv @@ -0,0 +1,3 @@ +foo|bar|baz +1,2|1,7|2,7 +1,5|8,5|-5,5 diff --git a/tests/syntax-tests/highlighted/CSV/decimals_comma_decimal_point_semicolon_delimited.csv b/tests/syntax-tests/highlighted/CSV/decimals_comma_decimal_point_semicolon_delimited.csv new file mode 100644 index 00000000..da1b0704 --- /dev/null +++ b/tests/syntax-tests/highlighted/CSV/decimals_comma_decimal_point_semicolon_delimited.csv @@ -0,0 +1,3 @@ +foo;bar;baz +1,2;1,7;2,7 +1,5;8,5;-5,5 diff --git a/tests/syntax-tests/highlighted/CSV/simple.tsv b/tests/syntax-tests/highlighted/CSV/simple.tsv new file mode 100644 index 00000000..7dd1a9de --- /dev/null +++ b/tests/syntax-tests/highlighted/CSV/simple.tsv @@ -0,0 +1,3 @@ +foo bar baz|;, test hello world tsv +1,2 1,7 2,7 a b c "hello again" tsv +";|," ;|, baz test "hello world" tsv diff --git a/tests/syntax-tests/source/CSV/comma-delimited.csv b/tests/syntax-tests/source/CSV/comma-delimited.csv new file mode 100644 index 00000000..4d7b2ec2 --- /dev/null +++ b/tests/syntax-tests/source/CSV/comma-delimited.csv @@ -0,0 +1,3 @@ +foo,bar,baz,this|that,test,colors,cycle +1.2,1.7,2.5,blah;cool,test,colors,cycle + diff --git a/tests/syntax-tests/source/CSV/decimals_comma_decimal_point_pipe_delimited.csv b/tests/syntax-tests/source/CSV/decimals_comma_decimal_point_pipe_delimited.csv new file mode 100644 index 00000000..c8a46786 --- /dev/null +++ b/tests/syntax-tests/source/CSV/decimals_comma_decimal_point_pipe_delimited.csv @@ -0,0 +1,3 @@ +foo|bar|baz +1,2|1,7|2,7 +1,5|8,5|-5,5 diff --git a/tests/syntax-tests/source/CSV/decimals_comma_decimal_point_semicolon_delimited.csv b/tests/syntax-tests/source/CSV/decimals_comma_decimal_point_semicolon_delimited.csv new file mode 100644 index 00000000..63301750 --- /dev/null +++ b/tests/syntax-tests/source/CSV/decimals_comma_decimal_point_semicolon_delimited.csv @@ -0,0 +1,3 @@ +foo;bar;baz +1,2;1,7;2,7 +1,5;8,5;-5,5 diff --git a/tests/syntax-tests/source/CSV/simple.tsv b/tests/syntax-tests/source/CSV/simple.tsv new file mode 100644 index 00000000..2cf870a5 --- /dev/null +++ b/tests/syntax-tests/source/CSV/simple.tsv @@ -0,0 +1,3 @@ +foo bar baz|;, test hello world tsv +1,2 1,7 2,7 a b c "hello again" tsv +";|," ;|, baz test "hello world" tsv From 93fd013aa1c459a9a0a9a809b2a21ad67d6a7f86 Mon Sep 17 00:00:00 2001 From: Keith Hall Date: Fri, 24 Jan 2025 23:05:35 +0200 Subject: [PATCH 2/2] Update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 61103651..d1d4b99a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,7 @@ - Add syntax mapping for `paru` configuration files #3182 (@cyqsimon) - Add support for [Idris 2 programming language](https://www.idris-lang.org/) #3150 (@buzden) - Add syntax mapping for `nix`'s '`flake.lock` lockfiles #3196 (@odilf) +- Improvements to CSV/TSV highlighting, with autodetection of delimiter and support for TSV files, see #3186 (@keith-hall) ## Themes