mirror of
synced 2025-03-03 09:31:22 -05:00
3 changed files with 612 additions and 0 deletions
Normal file
Normal file
@ -0,0 +1,151 @@
// Ported from Go:
// https://github.com/golang/go/blob/go1.12.5/src/encoding/csv/
// Copyright 2018-2019 the Deno authors. All rights reserved. MIT license.
import { BufReader, BufState } from "../io/bufio.ts";
import { TextProtoReader } from "../textproto/mod.ts";
const INVALID_RUNE = ["\r", "\n", '"'];
export class ParseError extends Error {
StartLine: number;
Line: number;
constructor(start: number, line: number, message: string) {
this.StartLine = start;
this.Line = line;
export interface ParseOptions {
comma: string;
comment?: string;
trimLeadingSpace: boolean;
lazyQuotes?: boolean;
fieldsPerRecord?: number;
function chkOptions(opt: ParseOptions): Error | null {
if (
INVALID_RUNE.includes(opt.comma) ||
INVALID_RUNE.includes(opt.comment) ||
opt.comma === opt.comment
) {
return Error("Invalid Delimiter");
return null;
export async function read(
Startline: number,
reader: BufReader,
opt: ParseOptions = { comma: ",", comment: "#", trimLeadingSpace: false }
): Promise<[string[], BufState]> {
const tp = new TextProtoReader(reader);
let err: BufState;
let line: string;
let result: string[] = [];
let lineIndex = Startline;
[line, err] = await tp.readLine();
// Normalize \r\n to \n on all input lines.
if (
line.length >= 2 &&
line[line.length - 2] === "\r" &&
line[line.length - 1] === "\n"
) {
line = line.substring(0, line.length - 2);
line = line + "\n";
const trimmedLine = line.trimLeft();
if (trimmedLine.length === 0) {
return [[], err];
// line starting with comment character is ignored
if (opt.comment && trimmedLine[0] === opt.comment) {
return [result, err];
result = line.split(opt.comma);
let quoteError = false;
result = result.map(
(r): string => {
if (opt.trimLeadingSpace) {
r = r.trimLeft();
if (r[0] === '"' && r[r.length - 1] === '"') {
r = r.substring(1, r.length - 1);
} else if (r[0] === '"') {
r = r.substring(1, r.length);
if (!opt.lazyQuotes) {
if (r[0] !== '"' && r.indexOf('"') !== -1) {
quoteError = true;
return r;
if (quoteError) {
return [
new ParseError(Startline, lineIndex, 'bare " in non-quoted-field')
return [result, err];
export async function readAll(
reader: BufReader,
opt: ParseOptions = {
comma: ",",
trimLeadingSpace: false,
lazyQuotes: false
): Promise<[string[][], BufState]> {
const result: string[][] = [];
let _nbFields: number;
let err: BufState;
let lineResult: string[];
let first = true;
let lineIndex = 0;
err = chkOptions(opt);
if (err) return [result, err];
for (;;) {
[lineResult, err] = await read(lineIndex, reader, opt);
if (err) break;
// If fieldsPerRecord is 0, Read sets it to
// the number of fields in the first record
if (first) {
first = false;
if (opt.fieldsPerRecord !== undefined) {
if (opt.fieldsPerRecord === 0) {
_nbFields = lineResult.length;
} else {
_nbFields = opt.fieldsPerRecord;
if (lineResult.length > 0) {
if (_nbFields && _nbFields !== lineResult.length) {
return [
new ParseError(lineIndex, lineIndex, "wrong number of fields")
if (err !== "EOF") {
return [result, err];
return [result, null];
Normal file
Normal file
@ -0,0 +1,460 @@
// Test ported from Golang
// https://github.com/golang/go/blob/2cc15b1/src/encoding/csv/reader_test.go
import { test, runIfMain } from "../testing/mod.ts";
import { assertEquals, assert } from "../testing/asserts.ts";
import { readAll } from "./csv.ts";
import { StringReader } from "../io/readers.ts";
import { BufReader } from "../io/bufio.ts";
const ErrInvalidDelim = "Invalid Delimiter";
const ErrFieldCount = "wrong number of fields";
const ErrBareQuote = 'bare " in non-quoted-field';
// TODO(zekth): Activate remaining tests
const testCases = [
Name: "Simple",
Input: "a,b,c\n",
Output: [["a", "b", "c"]]
Name: "CRLF",
Input: "a,b\r\nc,d\r\n",
Output: [["a", "b"], ["c", "d"]]
Name: "BareCR",
Input: "a,b\rc,d\r\n",
Output: [["a", "b\rc", "d"]]
// {
// Name: "RFC4180test",
// Input: `#field1,field2,field3
// "aaa","bbb","ccc"
// "a,a","bbb","ccc"
// zzz,yyy,xxx`,
// UseFieldsPerRecord: true,
// FieldsPerRecord: 0,
// Output: [
// ["#field1", "field2", "field3"],
// ["aaa", "bbb", "ccc"],
// ["a,a", `bbb`, "ccc"],
// ["zzz", "yyy", "xxx"]
// ]
// },
Name: "NoEOLTest",
Input: "a,b,c",
Output: [["a", "b", "c"]]
Name: "Semicolon",
Input: "a;b;c\n",
Output: [["a", "b", "c"]],
Comma: ";"
// {
// Name: "MultiLine",
// Input: `"two
// line","one line","three
// line
// field"`,
// Output: [["two\nline"], ["one line"], ["three\nline\nfield"]]
// },
Name: "BlankLine",
Input: "a,b,c\n\nd,e,f\n\n",
Output: [["a", "b", "c"], ["d", "e", "f"]]
Name: "BlankLineFieldCount",
Input: "a,b,c\n\nd,e,f\n\n",
Output: [["a", "b", "c"], ["d", "e", "f"]],
UseFieldsPerRecord: true,
FieldsPerRecord: 0
Name: "TrimSpace",
Input: " a, b, c\n",
Output: [["a", "b", "c"]],
TrimLeadingSpace: true
Name: "LeadingSpace",
Input: " a, b, c\n",
Output: [[" a", " b", " c"]]
Name: "Comment",
Input: "#1,2,3\na,b,c\n#comment",
Output: [["a", "b", "c"]],
Comment: "#"
Name: "NoComment",
Input: "#1,2,3\na,b,c",
Output: [["#1", "2", "3"], ["a", "b", "c"]]
Name: "LazyQuotes",
Input: `a "word","1"2",a","b`,
Output: [[`a "word"`, `1"2`, `a"`, `b`]],
LazyQuotes: true
Name: "BareQuotes",
Input: `a "word","1"2",a"`,
Output: [[`a "word"`, `1"2`, `a"`]],
LazyQuotes: true
Name: "BareDoubleQuotes",
Input: `a""b,c`,
Output: [[`a""b`, `c`]],
LazyQuotes: true
Name: "BadDoubleQuotes",
Input: `a""b,c`,
Error: ErrBareQuote
// Error: &ParseError{StartLine: 1, Line: 1, Column: 1, Err: ErrBareQuote},
Name: "TrimQuote",
Input: ` "a"," b",c`,
Output: [["a", " b", "c"]],
TrimLeadingSpace: true
Name: "BadBareQuote",
Input: `a "word","b"`,
Error: ErrBareQuote
// Error: true //&ParseError{StartLine: 1, Line: 1, Column: 2, Err: ErrBareQuote},
Name: "BadTrailingQuote",
Input: `"a word",b"`,
Error: ErrBareQuote
Name: "ExtraneousQuote",
Input: `"a "word","b"`,
Error: ErrBareQuote
Name: "BadFieldCount",
Input: "a,b,c\nd,e",
Error: ErrFieldCount,
UseFieldsPerRecord: true,
FieldsPerRecord: 0
Name: "BadFieldCount1",
Input: `a,b,c`,
// Error: &ParseError{StartLine: 1, Line: 1, Err: ErrFieldCount},
UseFieldsPerRecord: true,
FieldsPerRecord: 2,
Error: ErrFieldCount
Name: "FieldCount",
Input: "a,b,c\nd,e",
Output: [["a", "b", "c"], ["d", "e"]]
Name: "TrailingCommaEOF",
Input: "a,b,c,",
Output: [["a", "b", "c", ""]]
Name: "TrailingCommaEOL",
Input: "a,b,c,\n",
Output: [["a", "b", "c", ""]]
Name: "TrailingCommaSpaceEOF",
Input: "a,b,c, ",
Output: [["a", "b", "c", ""]],
TrimLeadingSpace: true
Name: "TrailingCommaSpaceEOL",
Input: "a,b,c, \n",
Output: [["a", "b", "c", ""]],
TrimLeadingSpace: true
Name: "TrailingCommaLine3",
Input: "a,b,c\nd,e,f\ng,hi,",
Output: [["a", "b", "c"], ["d", "e", "f"], ["g", "hi", ""]],
TrimLeadingSpace: true
Name: "NotTrailingComma3",
Input: "a,b,c, \n",
Output: [["a", "b", "c", " "]]
Name: "CommaFieldTest",
Input: `x,y,z,w
Output: [
["x", "y", "z", "w"],
["x", "y", "z", ""],
["x", "y", "", ""],
["x", "", "", ""],
["", "", "", ""],
["x", "y", "z", "w"],
["x", "y", "z", ""],
["x", "y", "", ""],
["x", "", "", ""],
["", "", "", ""]
Name: "TrailingCommaIneffective1",
Input: "a,b,\nc,d,e",
Output: [["a", "b", ""], ["c", "d", "e"]],
TrimLeadingSpace: true
Name: "ReadAllReuseRecord",
Input: "a,b\nc,d",
Output: [["a", "b"], ["c", "d"]],
ReuseRecord: true
// {
// Name: "StartLine1", // Issue 19019
// Input: 'a,"b\nc"d,e',
// Error: true
// // Error: &ParseError{StartLine: 1, Line: 2, Column: 1, Err: ErrQuote},
// },
// {
// Name: "StartLine2",
// Input: 'a,b\n"d\n\n,e',
// Error: true
// // Error: &ParseError{StartLine: 2, Line: 5, Column: 0, Err: ErrQuote},
// },
// {
// Name: "CRLFInQuotedField", // Issue 21201
// Input: 'A,"Hello\r\nHi",B\r\n',
// Output: [["A", "Hello\nHi", "B"]]
// },
Name: "BinaryBlobField", // Issue 19410
Input: "x09\x41\xb4\x1c,aktau",
Output: [["x09A\xb4\x1c", "aktau"]]
// {
// Name: "TrailingCR",
// Input: "field1,field2\r",
// Output: [["field1", "field2"]]
// },
// {
// Name: "QuotedTrailingCR",
// Input: '"field"\r',
// Output: [['"field"']]
// },
// {
// Name: "QuotedTrailingCRCR",
// Input: '"field"\r\r',
// Error: true,
// // Error: &ParseError{StartLine: 1, Line: 1, Column: 6, Err: ErrQuote},
// },
// {
// Name: "FieldCR",
// Input: "field\rfield\r",
// Output: [["field\rfield"]]
// },
// {
// Name: "FieldCRCR",
// Input: "field\r\rfield\r\r",
// Output: [["field\r\rfield\r"]]
// },
Name: "FieldCRCRLF",
Input: "field\r\r\nfield\r\r\n",
Output: [["field\r"], ["field\r"]]
Name: "FieldCRCRLFCR",
Input: "field\r\r\n\rfield\r\r\n\r",
Output: [["field\r"], ["\rfield\r"]]
// {
// Name: "FieldCRCRLFCRCR",
// Input: "field\r\r\n\r\rfield\r\r\n\r\r",
// Output: [["field\r"], ["\r\rfield\r"], ["\r"]]
// },
// {
// Name: "MultiFieldCRCRLFCRCR",
// Input: "field1,field2\r\r\n\r\rfield1,field2\r\r\n\r\r,",
// Output: [["field1", "field2\r"], ["\r\rfield1", "field2\r"], ["\r\r", ""]]
// },
Name: "NonASCIICommaAndComment",
Input: "a£b,c£ \td,e\n€ comment\n",
Output: [["a", "b,c", "d,e"]],
TrimLeadingSpace: true,
Comma: "£",
Comment: "€"
Name: "NonASCIICommaAndCommentWithQuotes",
Input: 'a€" b,"€ c\nλ comment\n',
Output: [["a", " b,", " c"]],
Comma: "€",
Comment: "λ"
// λ and θ start with the same byte.
// This tests that the parser doesn't confuse such characters.
Name: "NonASCIICommaConfusion",
Input: '"abθcd"λefθgh',
Output: [["abθcd", "efθgh"]],
Comma: "λ",
Comment: "€"
Name: "NonASCIICommentConfusion",
Input: "λ\nλ\nθ\nλ\n",
Output: [["λ"], ["λ"], ["λ"]],
Comment: "θ"
// {
// Name: "QuotedFieldMultipleLF",
// Input: '"\n\n\n\n"',
// Output: [["\n\n\n\n"]]
// },
// {
// Name: "MultipleCRLF",
// Input: "\r\n\r\n\r\n\r\n"
// },
// {
// // The implementation may read each line in several chunks if it doesn't fit entirely
// // in the read buffer, so we should test the code to handle that condition.
// Name: "HugeLines",
// Input: strings.Repeat("#ignore\n", 10000) + strings.Repeat("@", 5000) + "," + strings.Repeat("*", 5000),
// Output: [[strings.Repeat("@", 5000), strings.Repeat("*", 5000)]],
// Comment: '#',
// },
Name: "QuoteWithTrailingCRLF",
Input: '"foo"bar"\r\n',
Error: ErrBareQuote
// Error: &ParseError{StartLine: 1, Line: 1, Column: 4, Err: ErrQuote},
Name: "LazyQuoteWithTrailingCRLF",
Input: '"foo"bar"\r\n',
Output: [[`foo"bar`]],
LazyQuotes: true
// {
// Name: "DoubleQuoteWithTrailingCRLF",
// Input: '"foo""bar"\r\n',
// Output: [[`foo"bar`]]
// },
// {
// Name: "EvenQuotes",
// Input: `""""""""`,
// Output: [[`"""`]]
// },
// {
// Name: "OddQuotes",
// Input: `"""""""`,
// Error: true
// // Error:" &ParseError{StartLine: 1, Line: 1, Column: 7, Err: ErrQuote}",
// },
// {
// Name: "LazyOddQuotes",
// Input: `"""""""`,
// Output: [[`"""`]],
// LazyQuotes: true
// },
Name: "BadComma1",
Comma: "\n",
Error: ErrInvalidDelim
Name: "BadComma2",
Comma: "\r",
Error: ErrInvalidDelim
Name: "BadComma3",
Comma: '"',
Error: ErrInvalidDelim
Name: "BadComment1",
Comment: "\n",
Error: ErrInvalidDelim
Name: "BadComment2",
Comment: "\r",
Error: ErrInvalidDelim
Name: "BadCommaComment",
Comma: "X",
Comment: "X",
Error: ErrInvalidDelim
for (const t of testCases) {
name: `[CSV] ${t.Name}`,
async fn(): Promise<void> {
let comma = ",";
let comment;
let fieldsPerRec;
let trim = false;
let lazyquote = false;
if (t.Comma) {
comma = t.Comma;
if (t.Comment) {
comment = t.Comment;
if (t.TrimLeadingSpace) {
trim = true;
if (t.UseFieldsPerRecord) {
fieldsPerRec = t.FieldsPerRecord;
if (t.LazyQuotes) {
lazyquote = t.LazyQuotes;
const actual = await readAll(new BufReader(new StringReader(t.Input)), {
comma: comma,
comment: comment,
trimLeadingSpace: trim,
fieldsPerRecord: fieldsPerRec,
lazyQuotes: lazyquote
if (t.Error) {
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const e: any = actual[1];
assertEquals(e.message, t.Error);
} else {
const expected = [t.Output, null];
assertEquals(actual, expected);
@ -1,2 +1,3 @@
// Copyright 2018-2019 the Deno authors. All rights reserved. MIT license.
import "./toml_test.ts";
import "./csv_test.ts";
Add table
Reference in a new issue