diff --git a/std/encoding/README.md b/std/encoding/README.md index 15c8c40bf5..5886612692 100644 --- a/std/encoding/README.md +++ b/std/encoding/README.md @@ -2,6 +2,7 @@ Helper module for dealing with external data structures. +- [`ascii85`](#ascii85) - [`base32`](#base32) - [`binary`](#binary) - [`csv`](#csv) @@ -322,3 +323,58 @@ console.log(binaryData); console.log(encode(binaryData)); // => RC2E6GA= ``` + +## ascii85 + +Ascii85/base85 encoder and decoder with support for multiple standards + +### Basic usage + +`encode` encodes a `Uint8Array` to a ascii85 representation, and `decode` +decodes the given ascii85 representation to a `Uint8Array`. + +```ts +import { encode, decode } from "https://deno.land/std/encoding/ascii85.ts"; + +const a85Repr = "LpTqp"; + +const binaryData = decode(a85Repr); +console.log(binaryData); +// => Uint8Array [ 136, 180, 79, 24 ] + +console.log(encode(binaryData)); +// => LpTqp +``` + +### Specifying a standard and delimeter + +By default all functions are using the most popular Adobe version of ascii85 and +not adding any delimeter. However, there are three more standards supported - +btoa (different delimeter and additional compression of 4 bytes equal to 32), +[Z85](https://rfc.zeromq.org/spec/32/) and +[RFC 1924](https://tools.ietf.org/html/rfc1924). It's possible to use a +different encoding by specifying it in `options` object as a second parameter. + +Similarly, it's possible to make `encode` add a delimeter (`<~` and `~>` for +Adobe, `xbtoa Begin` and `xbtoa End` with newlines between the delimeters and +encoded data for btoa. Checksums for btoa are not supported. Delimeters are not +supported by other encodings.) + +encoding examples: + +```ts +import { encode, decode } from "https://deno.land/std/encoding/ascii85.ts"; +const binaryData = new Uint8Array([136, 180, 79, 24]); +console.log(encode(binaryData)); +// => LpTqp +console.log(encode(binaryData, { standard: "Adobe", delimeter: true })); +// => <~LpTqp~> +console.log(encode(binaryData, { standard: "btoa", delimeter: true })); +/* => xbtoa Begin +LpTqp +xbtoa End */ +console.log(encode(binaryData, { standard: "RFC 1924" })); +// => h_p`_ +console.log(encode(binaryData, { standard: "Z85" })); +// => H{P}{ +``` diff --git a/std/encoding/ascii85.ts b/std/encoding/ascii85.ts new file mode 100644 index 0000000000..b7a2e20651 --- /dev/null +++ b/std/encoding/ascii85.ts @@ -0,0 +1,129 @@ +// Copyright 2018-2020 the Deno authors. All rights reserved. MIT license. +/** This module is browser compatible. */ + +export type Ascii85Standard = "Adobe" | "btoa" | "RFC 1924" | "Z85"; +/** + * encoding/decoding options + * @property standard - characterset and delimeter (if supported and used). Defaults to Adobe + * @property delimeter - whether to use a delimeter (if supported) - "<~" and "~>" by default + */ +export interface Ascii85Options { + standard?: Ascii85Standard; + delimiter?: boolean; +} +const rfc1924 = + "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz!#$%&()*+-;<=>?@^_`{|}~"; +const Z85 = + "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ.-:+=^!/*?&<>()[]{}@%$#"; +/** + * Encodes a given Uint8Array into ascii85, supports multiple standards + * @param uint8 input to encode + * @param [options] encoding options + * @param [options.standard=Adobe] encoding standard (Adobe, btoa, RFC 1924 or Z85) + * @param [options.delimeter] whether to use a delimeter, if supported by encoding standard + */ +export function encode(uint8: Uint8Array, options?: Ascii85Options): string { + const standard = options?.standard ?? "Adobe"; + let output: string[] = [], + v: number, + n = 0, + difference = 0; + if (uint8.length % 4 !== 0) { + const tmp = uint8; + difference = 4 - (tmp.length % 4); + uint8 = new Uint8Array(tmp.length + difference); + uint8.set(tmp); + } + const view = new DataView(uint8.buffer); + for (let i = 0, len = uint8.length; i < len; i += 4) { + v = view.getUint32(i); + // Adobe and btoa standards compress 4 zeroes to single "z" character + if ( + (standard === "Adobe" || standard === "btoa") && + v === 0 && + i < len - difference - 3 + ) { + output[n++] = "z"; + continue; + } + // btoa compresses 4 spaces - that is, bytes equal to 32 - into single "y" character + if (standard === "btoa" && v === 538976288) { + output[n++] = "y"; + continue; + } + for (let j = 4; j >= 0; j--) { + output[n + j] = String.fromCharCode((v % 85) + 33); + v = Math.trunc(v / 85); + } + n += 5; + } + switch (standard) { + case "Adobe": + if (options?.delimiter) { + return `<~${output.slice(0, output.length - difference).join("")}~>`; + } + break; + case "btoa": + if (options?.delimiter) { + return `xbtoa Begin\n${output + .slice(0, output.length - difference) + .join("")}\nxbtoa End`; + } + break; + case "RFC 1924": + output = output.map((val) => rfc1924[val.charCodeAt(0) - 33]); + break; + case "Z85": + output = output.map((val) => Z85[val.charCodeAt(0) - 33]); + break; + } + return output.slice(0, output.length - difference).join(""); +} +/** + * Decodes a given ascii85 encoded string. + * @param ascii85 input to decode + * @param [options] decoding options + * @param [options.standard=Adobe] encoding standard used in the input string (Adobe, btoa, RFC 1924 or Z85) + */ +export function decode(ascii85: string, options?: Ascii85Options): Uint8Array { + const encoding = options?.standard ?? "Adobe"; + // translate all encodings to most basic adobe/btoa one and decompress some special characters ("z" and "y") + switch (encoding) { + case "Adobe": + ascii85 = ascii85.replaceAll(/(<~|~>)/g, "").replaceAll("z", "!!!!!"); + break; + case "btoa": + ascii85 = ascii85 + .replaceAll(/(xbtoa Begin|xbtoa End|\n)/g, "") + .replaceAll("z", "!!!!!") + .replaceAll("y", "+ + String.fromCharCode(rfc1924.indexOf(match) + 33) + ); + break; + case "Z85": + ascii85 = ascii85.replaceAll(/./g, (match) => + String.fromCharCode(Z85.indexOf(match) + 33) + ); + break; + } + //remove all invalid characters + ascii85 = ascii85.replaceAll(/[^!-u]/g, ""); + const len = ascii85.length, + output = new Uint8Array(len + 4 - (len % 4)); + const view = new DataView(output.buffer); + let v = 0, + n = 0, + max = 0; + for (let i = 0; i < len; ) { + for (max += 5; i < max; i++) { + v = v * 85 + (i < len ? ascii85.charCodeAt(i) : 117) - 33; + } + view.setUint32(n, v); + v = 0; + n += 4; + } + return output.slice(0, Math.trunc(len * 0.8)); +} diff --git a/std/encoding/ascii85_test.ts b/std/encoding/ascii85_test.ts new file mode 100644 index 0000000000..d068c0bf66 --- /dev/null +++ b/std/encoding/ascii85_test.ts @@ -0,0 +1,178 @@ +// Copyright 2018-2020 the Deno authors. All rights reserved. MIT license. +import { assertEquals } from "../testing/asserts.ts"; +import { encode, decode, Ascii85Standard } from "./ascii85.ts"; +type TestCases = Partial<{ [index in Ascii85Standard]: string[][] }>; +const utf8encoder = new TextEncoder(); +const testCasesNoDelimeter: TestCases = { + Adobe: [ + ["test", "FCfN8"], + ["ascii85", "@<5pmBfIs"], + ["Hello world!", "87cURD]j7BEbo80"], + //wikipedia example + [ + "Man is distinguished, not only by his reason, but by this singular passion from other animals, which is a lust of the mind, that by a perseverance of delight in the continued and indefatigable generation of knowledge, exceeds the short vehemence of any carnal pleasure.", + "9jqo^BlbD-BleB1DJ+*+F(f,q/0JhKFCj@.4Gp$d7F!,L7@<6@)/0JDEF@3BB/F*&OCAfu2/AKYi(DIb:@FD,*)+C]U=@3BN#EcYf8ATD3s@q?d$AftVqCh[NqF-FD5W8ARlolDIal(DIduD.RTpAKYo'+CT/5+Cei#DII?(E,9)oF*2M7/c", + ], + ["", ""], + ["\0", "!!"], + ["\0\0", "!!!"], + ["\0\0\0", "!!!!"], + //special Adobe and btoa test cases - 4 bytes equal to 0 should become a "z" + ["\0\0\0\0", "z"], + ["\0\0\0\0\0", "z!!"], + [" ", "+Cj@.4Gp$d7F!,L7@<6@)/0JDEF@3BB/F*&OCAfu2/AKYi(DIb:@FD,*)+C]U=@3BN#EcYf8ATD3s@q?d$AftVqCh[NqF-FD5W8ARlolDIal(DIduD.RTpAKYo'+CT/5+Cei#DII?(E,9)oF*2M7/c", + ], + ["", ""], + ["\0", "!!"], + ["\0\0", "!!!"], + ["\0\0\0", "!!!!"], + //special Adobe and btoa test cases - 4 bytes equal to 0 should become a "z" + ["\0\0\0\0", "z"], + ["\0\0\0\0\0", "z!!"], + //special btoa test case - 4 spaces should become "y" + [" ", "y"], + ], + "RFC 1924": [ + ["test", "bY*jN"], + ["ascii85", "VRK_?X*e|"], + ["Hello world!", "NM&qnZy%ZCX>)XGZfA9Ab7*B`EFf-gbRchTYDO_b1WctXlY|;AZc?TVIXXEb95kYW*~HEWgu;7Ze%PVbZB98AYyqSVIXj2a&u*NWpZI|V`U(3W*}r`Y-wj`bRcPNAarPDAY*TCbZKsNWn>^>Ze$>7Ze(RV>IZ)PBCZf|#NWn^b%EFfigV`XJzb0BnRWgv5CZ*p`Xc4cT~ZDnp_Wgu^6AYpEKAY);2ZeeU7aBO8^b9HiME&", + ], + ["", ""], + ["\0", "00"], + ["\0\0", "000"], + ["\0\0\0", "0000"], + ["\0\0\0\0", "00000"], + ["\0\0\0\0\0", "0000000"], + [" ", "ARr(h"], + ], + Z85: [ + ["test", "By/Jn"], + ["ascii85", "vrk{)x/E%"], + ["Hello world!", "nm=QNzY"], + ["ascii85", "<~@<5pmBfIs~>"], + ["Hello world!", "<~87cURD]j7BEbo80~>"], + //wikipedia example + [ + "Man is distinguished, not only by his reason, but by this singular passion from other animals, which is a lust of the mind, that by a perseverance of delight in the continued and indefatigable generation of knowledge, exceeds the short vehemence of any carnal pleasure.", + "<~9jqo^BlbD-BleB1DJ+*+F(f,q/0JhKFCj@.4Gp$d7F!,L7@<6@)/0JDEF@3BB/F*&OCAfu2/AKYi(DIb:@FD,*)+C]U=@3BN#EcYf8ATD3s@q?d$AftVqCh[NqF-FD5W8ARlolDIal(DIduD.RTpAKYo'+CT/5+Cei#DII?(E,9)oF*2M7/c~>", + ], + ["", "<~~>"], + ["\0", "<~!!~>"], + ["\0\0", "<~!!!~>"], + ["\0\0\0", "<~!!!!~>"], + //special Adobe and btoa test cases - 4 bytes equal to 0 should become a "z" + ["\0\0\0\0", "<~z~>"], + ["\0\0\0\0\0", "<~z!!~>"], + [" ", "<~+"], + ], + btoa: [ + ["test", "xbtoa Begin\nFCfN8\nxbtoa End"], + ["ascii85", "xbtoa Begin\n@<5pmBfIs\nxbtoa End"], + ["Hello world!", "xbtoa Begin\n87cURD]j7BEbo80\nxbtoa End"], + //wikipedia example + [ + "Man is distinguished, not only by his reason, but by this singular passion from other animals, which is a lust of the mind, that by a perseverance of delight in the continued and indefatigable generation of knowledge, exceeds the short vehemence of any carnal pleasure.", + "xbtoa Begin\n9jqo^BlbD-BleB1DJ+*+F(f,q/0JhKFCj@.4Gp$d7F!,L7@<6@)/0JDEF@3BB/F*&OCAfu2/AKYi(DIb:@FD,*)+C]U=@3BN#EcYf8ATD3s@q?d$AftVqCh[NqF-FD5W8ARlolDIal(DIduD.RTpAKYo'+CT/5+Cei#DII?(E,9)oF*2M7/c\nxbtoa End", + ], + ["", "xbtoa Begin\n\nxbtoa End"], + ["\0", "xbtoa Begin\n!!\nxbtoa End"], + ["\0\0", "xbtoa Begin\n!!!\nxbtoa End"], + ["\0\0\0", "xbtoa Begin\n!!!!\nxbtoa End"], + //special Adobe and btoa test cases - 4 bytes equal to 0 should become a "z" + ["\0\0\0\0", "xbtoa Begin\nz\nxbtoa End"], + ["\0\0\0\0\0", "xbtoa Begin\nz!!\nxbtoa End"], + //special btoa test case - 4 spaces should become "y" + [" ", "xbtoa Begin\ny\nxbtoa End"], + ], +}; + +for (const [standard, tests] of Object.entries(testCasesNoDelimeter)) { + if (tests === undefined) continue; + Deno.test({ + name: `[encoding/ascii85] encode ${standard}`, + fn(): void { + for (const [bin, b85] of tests) { + assertEquals( + encode(utf8encoder.encode(bin), { + standard: standard as Ascii85Standard, + }), + b85 + ); + } + }, + }); + + Deno.test({ + name: `[encoding/ascii85] decode ${standard}`, + fn(): void { + for (const [bin, b85] of tests) { + assertEquals( + decode(b85, { standard: standard as Ascii85Standard }), + utf8encoder.encode(bin) + ); + } + }, + }); +} +for (const [standard, tests] of Object.entries(testCasesDelimeter)) { + if (tests === undefined) continue; + Deno.test({ + name: `[encoding/ascii85] encode ${standard} with delimeter`, + fn(): void { + for (const [bin, b85] of tests) { + assertEquals( + encode(utf8encoder.encode(bin), { + standard: standard as Ascii85Standard, + delimiter: true, + }), + b85 + ); + } + }, + }); + + Deno.test({ + name: `[encoding/ascii85] decode ${standard} with delimeter`, + fn(): void { + for (const [bin, b85] of tests) { + assertEquals( + decode(b85, { + standard: standard as Ascii85Standard, + delimiter: true, + }), + utf8encoder.encode(bin) + ); + } + }, + }); +}