0
0
Fork 0
mirror of https://github.com/denoland/deno.git synced 2025-03-03 09:31:22 -05:00

feat(std/encoding/csv): Add stringify functionality (#8408)

This commit is contained in:
Jesse Jackson 2020-11-25 08:50:00 -06:00 committed by GitHub
parent d40b0711a7
commit ed11eb6687
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 737 additions and 6 deletions

View file

@ -32,12 +32,24 @@ writeVarbig(w: Deno.Writer, x: bigint, o: VarbigOptions = {}): Promise<number>
### API
#### `readMatrix(reader: BufReader, opt: ReadOptions = { comma: ",", trimLeadingSpace: false, lazyQuotes: false }): Promise<string[][]>`
#### `readMatrix`
```ts
(reader: BufReader, opt: ReadOptions = {
comma: ",",
trimLeadingSpace: false,
lazyQuotes: false,
}): Promise<string[][]>
```
Parse the CSV from the `reader` with the options provided and return
`string[][]`.
#### `parse(input: string | BufReader, opt: ParseOptions = { skipFirstRow: false }): Promise<unknown[]>`:
#### `parse`
```ts
(input: string | BufReader, opt: ParseOptions = { skipFirstRow: false }): Promise<unknown[]>
```
Parse the CSV string/buffer with the options provided. The result of this
function is as follows:
@ -70,8 +82,8 @@ function is as follows:
##### `ReadOptions`
- **`comma?: string;`**: Character which separates values. Default: `','`.
- **`comment?: string;`**: Character to start a comment. Default: `'#'`.
- **`comma?: string;`**: Character which separates values. Default: `","`.
- **`comment?: string;`**: Character to start a comment. Default: `"#"`.
- **`trimLeadingSpace?: boolean;`**: Flag to trim the leading space of the
value. Default: `false`.
- **`lazyQuotes?: boolean;`**: Allow unquoted quote in a quoted field or non
@ -79,7 +91,111 @@ function is as follows:
- **`fieldsPerRecord?`**: Enabling the check of fields for each row. If == 0,
first row is used as referral for the number of fields.
### Usage
#### `stringify`
```ts
(data: DataItem[], columns: Column[], options?: StringifyOptions): Promise<string>
```
- **`data`** is the source data to stringify. It's an array of items which are
plain objects or arrays.
`DataItem: Record<string, unknown> | unknown[]`
```ts
const data = [
{
name: "Deno",
repo: { org: "denoland", name: "deno" },
runsOn: ["Rust", "TypeScript"],
},
];
```
- **`columns`** is a list of instructions for how to target and transform the
data for each column of output. This is also where you can provide an explicit
header name for the column.
`Column`:
- The most essential aspect of a column is accessing the property holding the
data for that column on each object in the data array. If that member is at
the top level, `Column` can simply be a property accessor, which is either a
`string` (if it's a plain object) or a `number` (if it's an array).
```ts
const columns = [
"name",
];
```
Each property accessor will be used as the header for the column:
| name |
| :--: |
| Deno |
- If the required data is not at the top level (it's nested in other
objects/arrays), then a simple property accessor won't work, so an array of
them will be required.
```ts
const columns = [
["repo", "name"],
["repo", "org"],
];
```
When using arrays of property accessors, the header names inherit the value
of the last accessor in each array:
| name | org |
| :--: | :------: |
| deno | denoland |
- If the data is not already in the required output format, or a different
column header is desired, then a `ColumnDetails` object type can be used for
each column:
- **`fn?: (value: any) => string | Promise<string>`** is an optional
function to transform the targeted data into the desired format
- **`header?: string`** is the optional value to use for the column header
name
- **`prop: PropertyAccessor | PropertyAccessor[]`** is the property accessor
(`string` or `number`) or array of property accessors used to access the
data on each object
```ts
const columns = [
"name",
{
prop: ["runsOn", 0],
header: "language 1",
fn: (str: string) => str.toLowerCase(),
},
{
prop: ["runsOn", 1],
header: "language 2",
fn: (str: string) => str.toLowerCase(),
},
];
```
| name | language 1 | language 2 |
| :--: | :--------: | :--------: |
| Deno | rust | typescript |
- **`options`** are options for the delimiter-seprated output.
- **`headers?: boolean`**: Whether or not to include the row of headers.
Default: `true`
- **`separator?: string`**: Delimiter used to separate values. Examples:
- `","` _comma_ (Default)
- `"\t"` _tab_
- `"|"` _pipe_
- etc.
### Basic Usage
```ts
import { parse } from "https://deno.land/std@$STD_VERSION/encoding/csv.ts";
@ -94,6 +210,67 @@ console.log(
// [["a", "b", "c"], ["d", "e", "f"]]
```
```ts
import {
Column,
stringify,
} from "https://deno.land/std@$STD_VERSION/encoding/csv.ts";
type Character = {
age: number;
name: {
first: string;
last: string;
};
};
const data: Character[] = [
{
age: 70,
name: {
first: "Rick",
last: "Sanchez",
},
},
{
age: 14,
name: {
first: "Morty",
last: "Smith",
},
},
];
let columns: Column[] = [
["name", "first"],
"age",
];
console.log(await stringify(data, columns));
// first,age
// Rick,70
// Morty,14
//
columns = [
{
prop: "name",
fn: (name: Character["name"]) => `${name.first} ${name.last}`,
},
{
prop: "age",
header: "is_adult",
fn: (age: Character["age"]) => String(age >= 18),
},
];
console.log(await stringify(data, columns, { separator: "\t" }));
// name is_adult
// Rick Sanchez true
// Morty Smith false
//
```
## TOML
This module parse TOML files. It follows as much as possible the
@ -231,7 +408,7 @@ console.log(tomlObject);
YAML parser / dumper for Deno.
Heavily inspired from [js-yaml].
Heavily inspired from [`js-yaml`](https://github.com/nodeca/js-yaml).
### Basic usage

View file

@ -9,6 +9,15 @@ import { TextProtoReader } from "../textproto/mod.ts";
import { StringReader } from "../io/readers.ts";
import { assert } from "../_util/assert.ts";
export { NEWLINE, stringify, StringifyError } from "./csv_stringify.ts";
export type {
Column,
ColumnDetails,
DataItem,
StringifyOptions,
} from "./csv_stringify.ts";
const INVALID_RUNE = ["\r", "\n", '"'];
export const ERR_BARE_QUOTE = 'bare " in non-quoted-field';

View file

@ -0,0 +1,172 @@
// Copyright 2018-2020 the Deno authors. All rights reserved. MIT license.
// Implements the CSV spec at https://tools.ietf.org/html/rfc4180
/** This module is browser compatible. */
const QUOTE = '"';
export const NEWLINE = "\r\n";
export class StringifyError extends Error {
readonly name = "StringifyError";
}
function getEscapedString(value: unknown, sep: string): string {
if (value === undefined || value === null) return "";
let str = "";
if (typeof value === "object") str = JSON.stringify(value);
else str = String(value);
// Is regex.test more performant here? If so, how to dynamically create?
// https://stackoverflow.com/questions/3561493/
if (str.includes(sep) || str.includes(NEWLINE) || str.includes(QUOTE)) {
return `${QUOTE}${str.replaceAll(QUOTE, `${QUOTE}${QUOTE}`)}${QUOTE}`;
}
return str;
}
type PropertyAccessor = number | string;
/**
* @param fn Optional callback for transforming the value
*
* @param header Explicit column header name. If omitted,
* the (final) property accessor is used for this value.
*
* @param prop Property accessor(s) used to access the value on the object
*/
export type ColumnDetails = {
// "unknown" is more type-safe, but inconvenient for user. How to resolve?
// deno-lint-ignore no-explicit-any
fn?: (value: any) => string | Promise<string>;
header?: string;
prop: PropertyAccessor | PropertyAccessor[];
};
export type Column = ColumnDetails | PropertyAccessor | PropertyAccessor[];
type NormalizedColumn = Omit<ColumnDetails, "header" | "prop"> & {
header: string;
prop: PropertyAccessor[];
};
function normalizeColumn(column: Column): NormalizedColumn {
let fn: NormalizedColumn["fn"],
header: NormalizedColumn["header"],
prop: NormalizedColumn["prop"];
if (typeof column === "object") {
if (Array.isArray(column)) {
header = String(column[column.length - 1]);
prop = column;
} else {
({ fn } = column);
prop = Array.isArray(column.prop) ? column.prop : [column.prop];
header = typeof column.header === "string"
? column.header
: String(prop[prop.length - 1]);
}
} else {
header = String(column);
prop = [column];
}
return { fn, header, prop };
}
type ObjectWithStringPropertyKeys = Record<string, unknown>;
/** An object (plain or array) */
export type DataItem = ObjectWithStringPropertyKeys | unknown[];
/**
* Returns an array of values from an object using the property accessors
* (and optional transform function) in each column
*/
async function getValuesFromItem(
item: DataItem,
normalizedColumns: NormalizedColumn[],
): Promise<unknown[]> {
const values: unknown[] = [];
for (const column of normalizedColumns) {
let value: unknown = item;
for (const prop of column.prop) {
if (typeof value !== "object" || value === null) continue;
if (Array.isArray(value)) {
if (typeof prop === "number") value = value[prop];
else {
throw new StringifyError('Property accessor is not of type "number"');
}
} // I think this assertion is safe. Confirm?
else value = (value as ObjectWithStringPropertyKeys)[prop];
}
if (typeof column.fn === "function") value = await column.fn(value);
values.push(value);
}
return values;
}
/**
* @param headers Whether or not to include the row of headers.
* Default: `true`
*
* @param separator Delimiter used to separate values. Examples:
* - `","` _comma_ (Default)
* - `"\t"` _tab_
* - `"|"` _pipe_
* - etc.
*/
export type StringifyOptions = {
headers?: boolean;
separator?: string;
};
/**
* @param data The array of objects to encode
* @param columns Array of values specifying which data to include in the output
* @param options Output formatting options
*/
export async function stringify(
data: DataItem[],
columns: Column[],
options: StringifyOptions = {},
): Promise<string> {
const { headers, separator: sep } = {
headers: true,
separator: ",",
...options,
};
if (sep.includes(QUOTE) || sep.includes(NEWLINE)) {
const message = [
"Separator cannot include the following strings:",
' - U+0022: Quotation mark (")',
" - U+000D U+000A: Carriage Return + Line Feed (\\r\\n)",
].join("\n");
throw new StringifyError(message);
}
const normalizedColumns = columns.map(normalizeColumn);
let output = "";
if (headers) {
output += normalizedColumns
.map((column) => getEscapedString(column.header, sep))
.join(sep);
output += NEWLINE;
}
for (const item of data) {
const values = await getValuesFromItem(item, normalizedColumns);
output += values
.map((value) => getEscapedString(value, sep))
.join(sep);
output += NEWLINE;
}
return output;
}

View file

@ -0,0 +1,373 @@
// Copyright 2018-2020 the Deno authors. All rights reserved. MIT license.
import { assertEquals, assertThrowsAsync } from "../testing/asserts.ts";
import {
Column,
DataItem,
NEWLINE,
stringify,
StringifyError,
StringifyOptions,
} from "./csv_stringify.ts";
type StringifyTestCaseBase = {
columns: Column[];
data: DataItem[];
name: string;
options?: StringifyOptions;
};
type StringifyTestCaseError = StringifyTestCaseBase & {
errorMessage?: string;
// deno-lint-ignore no-explicit-any
throwsError: new (...args: any[]) => Error;
};
type StringifyTestCase = StringifyTestCaseBase & { expected: string };
const stringifyTestCases: (StringifyTestCase | StringifyTestCaseError)[] = [
{
columns: ["a"],
data: [["foo"], ["bar"]],
errorMessage: 'Property accessor is not of type "number"',
name: "[CSV_stringify] Access array index using string",
throwsError: StringifyError,
},
{
columns: [0],
data: [["foo"], ["bar"]],
errorMessage: [
"Separator cannot include the following strings:",
' - U+0022: Quotation mark (")',
" - U+000D U+000A: Carriage Return + Line Feed (\\r\\n)",
].join("\n"),
name: "[CSV_stringify] Double quote in separator",
options: { separator: '"' },
throwsError: StringifyError,
},
{
columns: [0],
data: [["foo"], ["bar"]],
errorMessage: [
"Separator cannot include the following strings:",
' - U+0022: Quotation mark (")',
" - U+000D U+000A: Carriage Return + Line Feed (\\r\\n)",
].join("\n"),
name: "[CSV_stringify] CRLF in separator",
options: { separator: "\r\n" },
throwsError: StringifyError,
},
{
columns: [
{
fn: (obj) => obj.toUpperCase(),
prop: "msg",
},
],
data: [{ msg: { value: "foo" } }, { msg: { value: "bar" } }],
name: "[CSV_stringify] Transform function",
throwsError: TypeError,
},
{
columns: [],
data: [],
expected: NEWLINE,
name: "[CSV_stringify] No data, no columns",
},
{
columns: [],
data: [],
expected: ``,
name: "[CSV_stringify] No data, no columns, no headers",
options: { headers: false },
},
{
columns: ["a"],
data: [],
expected: `a${NEWLINE}`,
name: "[CSV_stringify] No data, columns",
},
{
columns: ["a"],
data: [],
expected: ``,
name: "[CSV_stringify] No data, columns, no headers",
options: { headers: false },
},
{
columns: [],
data: [{ a: 1 }, { a: 2 }],
expected: `${NEWLINE}${NEWLINE}${NEWLINE}`,
name: "[CSV_stringify] Data, no columns",
},
{
columns: [0, 1],
data: [["foo", "bar"], ["baz", "qux"]],
expected: `0\r1${NEWLINE}foo\rbar${NEWLINE}baz\rqux${NEWLINE}`,
name: "[CSV_stringify] Separator: CR",
options: { separator: "\r" },
},
{
columns: [0, 1],
data: [["foo", "bar"], ["baz", "qux"]],
expected: `0\n1${NEWLINE}foo\nbar${NEWLINE}baz\nqux${NEWLINE}`,
name: "[CSV_stringify] Separator: LF",
options: { separator: "\n" },
},
{
columns: [1],
data: [{ 1: 1 }, { 1: 2 }],
expected: `1${NEWLINE}1${NEWLINE}2${NEWLINE}`,
name: "[CSV_stringify] Column: number accessor, Data: object",
},
{
columns: [{ header: "Value", prop: "value" }],
data: [{ value: "foo" }, { value: "bar" }],
expected: `foo${NEWLINE}bar${NEWLINE}`,
name: "[CSV_stringify] Explicit header value, no headers",
options: { headers: false },
},
{
columns: [1],
data: [["key", "foo"], ["key", "bar"]],
expected: `1${NEWLINE}foo${NEWLINE}bar${NEWLINE}`,
name: "[CSV_stringify] Column: number accessor, Data: array",
},
{
columns: [[1]],
data: [{ 1: 1 }, { 1: 2 }],
expected: `1${NEWLINE}1${NEWLINE}2${NEWLINE}`,
name: "[CSV_stringify] Column: array number accessor, Data: object",
},
{
columns: [[1]],
data: [["key", "foo"], ["key", "bar"]],
expected: `1${NEWLINE}foo${NEWLINE}bar${NEWLINE}`,
name: "[CSV_stringify] Column: array number accessor, Data: array",
},
{
columns: [[1, 1]],
data: [["key", ["key", "foo"]], ["key", ["key", "bar"]]],
expected: `1${NEWLINE}foo${NEWLINE}bar${NEWLINE}`,
name: "[CSV_stringify] Column: array number accessor, Data: array",
},
{
columns: ["value"],
data: [{ value: "foo" }, { value: "bar" }],
expected: `value${NEWLINE}foo${NEWLINE}bar${NEWLINE}`,
name: "[CSV_stringify] Column: string accessor, Data: object",
},
{
columns: [["value"]],
data: [{ value: "foo" }, { value: "bar" }],
expected: `value${NEWLINE}foo${NEWLINE}bar${NEWLINE}`,
name: "[CSV_stringify] Column: array string accessor, Data: object",
},
{
columns: [["msg", "value"]],
data: [{ msg: { value: "foo" } }, { msg: { value: "bar" } }],
expected: `value${NEWLINE}foo${NEWLINE}bar${NEWLINE}`,
name: "[CSV_stringify] Column: array string accessor, Data: object",
},
{
columns: [
{
header: "Value",
prop: ["msg", "value"],
},
],
data: [{ msg: { value: "foo" } }, { msg: { value: "bar" } }],
expected: `Value${NEWLINE}foo${NEWLINE}bar${NEWLINE}`,
name: "[CSV_stringify] Explicit header",
},
{
columns: [
{
fn: (str: string) => str.toUpperCase(),
prop: ["msg", "value"],
},
],
data: [{ msg: { value: "foo" } }, { msg: { value: "bar" } }],
expected: `value${NEWLINE}FOO${NEWLINE}BAR${NEWLINE}`,
name: "[CSV_stringify] Transform function 1",
},
{
columns: [
{
fn: (str: string) => Promise.resolve(str.toUpperCase()),
prop: ["msg", "value"],
},
],
data: [{ msg: { value: "foo" } }, { msg: { value: "bar" } }],
expected: `value${NEWLINE}FOO${NEWLINE}BAR${NEWLINE}`,
name: "[CSV_stringify] Transform function 1 async",
},
{
columns: [
{
fn: (obj: { value: string }) => obj.value,
prop: "msg",
},
],
data: [{ msg: { value: "foo" } }, { msg: { value: "bar" } }],
expected: `msg${NEWLINE}foo${NEWLINE}bar${NEWLINE}`,
name: "[CSV_stringify] Transform function 2",
},
{
columns: [
{
fn: (obj: { value: string }) => obj.value,
header: "Value",
prop: "msg",
},
],
data: [{ msg: { value: "foo" } }, { msg: { value: "bar" } }],
expected: `Value${NEWLINE}foo${NEWLINE}bar${NEWLINE}`,
name: "[CSV_stringify] Transform function 2, explicit header",
},
{
columns: [0],
data: [[{ value: "foo" }], [{ value: "bar" }]],
expected:
`0${NEWLINE}"{""value"":""foo""}"${NEWLINE}"{""value"":""bar""}"${NEWLINE}`,
name: "[CSV_stringify] Targeted value: object",
},
{
columns: [0],
data: [
[[{ value: "foo" }, { value: "bar" }]],
[[{ value: "baz" }, { value: "qux" }]],
],
expected:
`0${NEWLINE}"[{""value"":""foo""},{""value"":""bar""}]"${NEWLINE}"[{""value"":""baz""},{""value"":""qux""}]"${NEWLINE}`,
name: "[CSV_stringify] Targeted value: arary of objects",
},
{
columns: [0],
data: [[["foo", "bar"]], [["baz", "qux"]]],
expected:
`0${NEWLINE}"[""foo"",""bar""]"${NEWLINE}"[""baz"",""qux""]"${NEWLINE}`,
name: "[CSV_stringify] Targeted value: array",
},
{
columns: [0],
data: [[["foo", "bar"]], [["baz", "qux"]]],
expected:
`0${NEWLINE}"[""foo"",""bar""]"${NEWLINE}"[""baz"",""qux""]"${NEWLINE}`,
name: "[CSV_stringify] Targeted value: array, separator: tab",
options: { separator: "\t" },
},
{
columns: [0],
data: [[], []],
expected: `0${NEWLINE}${NEWLINE}${NEWLINE}`,
name: "[CSV_stringify] Targeted value: undefined",
},
{
columns: [0],
data: [[null], [null]],
expected: `0${NEWLINE}${NEWLINE}${NEWLINE}`,
name: "[CSV_stringify] Targeted value: null",
},
{
columns: [0],
data: [[0xa], [0xb]],
expected: `0${NEWLINE}10${NEWLINE}11${NEWLINE}`,
name: "[CSV_stringify] Targeted value: hex number",
},
{
columns: [0],
data: [[BigInt("1")], [BigInt("2")]],
expected: `0${NEWLINE}1${NEWLINE}2${NEWLINE}`,
name: "[CSV_stringify] Targeted value: BigInt",
},
{
columns: [0],
data: [[true], [false]],
expected: `0${NEWLINE}true${NEWLINE}false${NEWLINE}`,
name: "[CSV_stringify] Targeted value: boolean",
},
{
columns: [0],
data: [["foo"], ["bar"]],
expected: `0${NEWLINE}foo${NEWLINE}bar${NEWLINE}`,
name: "[CSV_stringify] Targeted value: string",
},
{
columns: [0],
data: [[Symbol("foo")], [Symbol("bar")]],
expected: `0${NEWLINE}Symbol(foo)${NEWLINE}Symbol(bar)${NEWLINE}`,
name: "[CSV_stringify] Targeted value: symbol",
},
{
columns: [0],
data: [[(n: number) => n]],
expected: `0${NEWLINE}(n) => n${NEWLINE}`,
name: "[CSV_stringify] Targeted value: function",
},
{
columns: [0],
data: [['foo"']],
expected: `0${NEWLINE}"foo"""${NEWLINE}`,
name: "[CSV_stringify] Value with double quote",
},
{
columns: [0],
data: [["foo\r\n"]],
expected: `0${NEWLINE}"foo\r\n"${NEWLINE}`,
name: "[CSV_stringify] Value with CRLF",
},
{
columns: [0],
data: [["foo\r"]],
expected: `0${NEWLINE}foo\r${NEWLINE}`,
name: "[CSV_stringify] Value with CR",
},
{
columns: [0],
data: [["foo\n"]],
expected: `0${NEWLINE}foo\n${NEWLINE}`,
name: "[CSV_stringify] Value with LF",
},
{
columns: [0],
data: [["foo,"]],
expected: `0${NEWLINE}"foo,"${NEWLINE}`,
name: "[CSV_stringify] Value with comma",
},
{
columns: [0],
data: [["foo,"]],
expected: `0${NEWLINE}foo,${NEWLINE}`,
name: "[CSV_stringify] Value with comma, tab separator",
options: { separator: "\t" },
},
];
for (const tc of stringifyTestCases) {
if ((tc as StringifyTestCaseError).throwsError) {
const t = tc as StringifyTestCaseError;
Deno.test({
async fn() {
await assertThrowsAsync(
async () => {
await stringify(t.data, t.columns, t.options);
},
t.throwsError,
t.errorMessage,
);
},
name: t.name,
});
} else {
const t = tc as StringifyTestCase;
Deno.test({
async fn() {
const actual = await stringify(t.data, t.columns, t.options);
assertEquals(actual, t.expected);
},
name: t.name,
});
}
}