diff --git a/std/node/buffer.ts b/std/node/buffer.ts index 0fc8be6f1f..dae04a66ea 100644 --- a/std/node/buffer.ts +++ b/std/node/buffer.ts @@ -28,6 +28,46 @@ function checkEncoding(encoding = "utf8", strict = true): string { return normalized; } +interface EncodingOp { + byteLength(string: string): number; +} + +// https://github.com/nodejs/node/blob/56dbe466fdbc598baea3bfce289bf52b97b8b8f7/lib/buffer.js#L598 +const encodingOps: { [key: string]: EncodingOp } = { + utf8: { + byteLength: (string: string): number => + new TextEncoder().encode(string).byteLength, + }, + ucs2: { + byteLength: (string: string): number => string.length * 2, + }, + utf16le: { + byteLength: (string: string): number => string.length * 2, + }, + latin1: { + byteLength: (string: string): number => string.length, + }, + ascii: { + byteLength: (string: string): number => string.length, + }, + base64: { + byteLength: (string: string): number => + base64ByteLength(string, string.length), + }, + hex: { + byteLength: (string: string): number => string.length >>> 1, + }, +}; + +function base64ByteLength(str: string, bytes: number): number { + // Handle padding + if (str.charCodeAt(bytes - 1) === 0x3d) bytes--; + if (bytes > 1 && str.charCodeAt(bytes - 1) === 0x3d) bytes--; + + // Base64 ratio: 3/4 + return (bytes * 3) >>> 2; +} + /** * See also https://nodejs.org/api/buffer.html */ @@ -95,10 +135,13 @@ export default class Buffer extends Uint8Array { * used to convert the string into bytes. */ static byteLength( - string: string | Buffer | ArrayBufferView | ArrayBuffer | SharedArrayBuffer + string: string | Buffer | ArrayBufferView | ArrayBuffer | SharedArrayBuffer, + encoding = "utf8" ): number { if (typeof string != "string") return string.byteLength; - return new TextEncoder().encode(string).length; + + encoding = normalizeEncoding(encoding) || "utf8"; + return encodingOps[encoding].byteLength(string); } /** diff --git a/std/node/buffer_test.ts b/std/node/buffer_test.ts index 2e0a8c176d..f96fa8e4bf 100644 --- a/std/node/buffer_test.ts +++ b/std/node/buffer_test.ts @@ -195,10 +195,55 @@ Deno.test({ }, }); +// tests from: +// https://github.com/nodejs/node/blob/56dbe466fdbc598baea3bfce289bf52b97b8b8f7/test/parallel/test-buffer-bytelength.js#L70 Deno.test({ name: "Byte length is the expected for strings", fn() { - assertEquals(Buffer.byteLength("test"), 4, "Byte lenght differs on string"); + // Special case: zero length string + assertEquals(Buffer.byteLength("", "ascii"), 0); + assertEquals(Buffer.byteLength("", "HeX"), 0); + + // utf8 + assertEquals(Buffer.byteLength("∑éllö wørl∂!", "utf-8"), 19); + assertEquals(Buffer.byteLength("κλμνξο", "utf8"), 12); + assertEquals(Buffer.byteLength("挵挶挷挸挹", "utf-8"), 15); + assertEquals(Buffer.byteLength("𠝹𠱓𠱸", "UTF8"), 12); + // Without an encoding, utf8 should be assumed + assertEquals(Buffer.byteLength("hey there"), 9); + assertEquals(Buffer.byteLength("𠱸挶νξ#xx :)"), 17); + assertEquals(Buffer.byteLength("hello world", ""), 11); + // It should also be assumed with unrecognized encoding + assertEquals(Buffer.byteLength("hello world", "abc"), 11); + assertEquals(Buffer.byteLength("ßœ∑≈", "unkn0wn enc0ding"), 10); + + // base64 + assertEquals(Buffer.byteLength("aGVsbG8gd29ybGQ=", "base64"), 11); + assertEquals(Buffer.byteLength("aGVsbG8gd29ybGQ=", "BASE64"), 11); + assertEquals(Buffer.byteLength("bm9kZS5qcyByb2NrcyE=", "base64"), 14); + assertEquals(Buffer.byteLength("aGkk", "base64"), 3); + assertEquals( + Buffer.byteLength("bHNrZGZsa3NqZmtsc2xrZmFqc2RsZmtqcw==", "base64"), + 25 + ); + // special padding + assertEquals(Buffer.byteLength("aaa=", "base64"), 2); + assertEquals(Buffer.byteLength("aaaa==", "base64"), 3); + + assertEquals(Buffer.byteLength("Il était tué"), 14); + assertEquals(Buffer.byteLength("Il était tué", "utf8"), 14); + + ["ascii", "latin1", "binary"] + .reduce((es: string[], e: string) => es.concat(e, e.toUpperCase()), []) + .forEach((encoding: string) => { + assertEquals(Buffer.byteLength("Il était tué", encoding), 12); + }); + + ["ucs2", "ucs-2", "utf16le", "utf-16le"] + .reduce((es: string[], e: string) => es.concat(e, e.toUpperCase()), []) + .forEach((encoding: string) => { + assertEquals(Buffer.byteLength("Il était tué", encoding), 24); + }); }, });