From ac5fcf626a77db7795f7ab2b4f15e4ecb3270171 Mon Sep 17 00:00:00 2001 From: Marcos Casagrande Date: Mon, 24 Oct 2022 20:27:22 +0200 Subject: [PATCH] perf(ext/web): add op_encode_binary_string (#16352) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a new op to use in `reader.readAsBinaryString(blob)`. ``` File API binary string: 400b 35.12 µs/iter (21.93 µs … 3.27 ms) 31.87 µs 131.95 µs 217.63 µs File API binary string: 4kb 46.49 µs/iter (29.36 µs … 4.42 ms) 42.5 µs 122.48 µs 155.1 µs File API binary string: 2.2mb 4.17 ms/iter (1.75 ms … 8.54 ms) 5.48 ms 7.39 ms 8.54 ms ``` **main** ``` benchmark time (avg) (min … max) p75 p99 p995 --------------------------------------------------------------------- ----------------------------- File API binary string: 400b 56.17 µs/iter (43.09 µs … 784.52 µs) 49.6 µs 177.18 µs 241.23 µs File API binary string: 4kb 277.2 µs/iter (240.29 µs … 1.84 ms) 269.87 µs 649.79 µs 774.46 µs File API binary string: 2.2mb 180.03 ms/iter (173.32 ms … 194.35 ms) 182.54 ms 194.35 ms 194.35 ms ``` It can also handle bigger files, when encoding a 200mb file, main crashes with OOM ``` <--- Last few GCs ---> [132677:0x560504676550] 5012 ms: Scavenge 417.3 (434.6) -> 401.8 (434.6) MB, 0.1 / 0.0 ms (average mu = 0.824, current mu = 0.825) allocation failure; [132677:0x560504676550] 5038 ms: Scavenge 417.3 (434.6) -> 401.8 (434.6) MB, 0.1 / 0.0 ms (average mu = 0.824, current mu = 0.825) allocation failure; [132677:0x560504676550] 5064 ms: Scavenge 417.3 (434.6) -> 401.8 (434.6) MB, 0.1 / 0.0 ms (average mu = 0.824, current mu = 0.825) allocation failure; ``` --- cli/tests/unit/text_encoding_test.ts | 58 ++++++++++++++++++++++++++++ ext/web/10_filereader.js | 12 +----- ext/web/lib.rs | 6 +++ 3 files changed, 66 insertions(+), 10 deletions(-) diff --git a/cli/tests/unit/text_encoding_test.ts b/cli/tests/unit/text_encoding_test.ts index 70942d98d7..06ec090481 100644 --- a/cli/tests/unit/text_encoding_test.ts +++ b/cli/tests/unit/text_encoding_test.ts @@ -247,6 +247,7 @@ Deno.test(function toStringShouldBeWebCompatibility() { const decoder = new TextDecoder(); assertEquals(decoder.toString(), "[object TextDecoder]"); }); + Deno.test(function textEncoderShouldCoerceToString() { const encoder = new TextEncoder(); const fixutreText = "text"; @@ -261,3 +262,60 @@ Deno.test(function textEncoderShouldCoerceToString() { const decoded = decoder.decode(bytes); assertEquals(decoded, fixutreText); }); + +Deno.test(function binaryEncode() { + // @ts-ignore: Deno.core allowed + const ops = Deno.core.ops; + function asBinaryString(bytes: Uint8Array): string { + return Array.from(bytes).map( + (v: number) => String.fromCodePoint(v), + ).join(""); + } + + function decodeBinary(binaryString: string) { + const chars: string[] = Array.from(binaryString); + return chars.map((v: string): number | undefined => v.codePointAt(0)); + } + + // invalid utf-8 code points + const invalid = new Uint8Array([0xC0]); + assertEquals( + ops.op_encode_binary_string(invalid), + asBinaryString(invalid), + ); + + const invalid2 = new Uint8Array([0xC1]); + assertEquals( + ops.op_encode_binary_string(invalid2), + asBinaryString(invalid2), + ); + + for (let i = 0, j = 255; i <= 255; i++, j--) { + const bytes = new Uint8Array([i, j]); + const binaryString = ops.op_encode_binary_string(bytes); + assertEquals( + binaryString, + asBinaryString(bytes), + ); + assertEquals(Array.from(bytes), decodeBinary(binaryString)); + } + + const inputs = [ + "σ😀", + "Кириллица is Cyrillic", + "𝓽𝓮𝔁𝓽", + "lone𝄞\ud888surrogate", + "\udc00\ud800", + "\ud800", + ]; + for (const input of inputs) { + const bytes = new TextEncoder().encode(input); + const binaryString = ops.op_encode_binary_string(bytes); + assertEquals( + binaryString, + asBinaryString(bytes), + ); + + assertEquals(Array.from(bytes), decodeBinary(binaryString)); + } +}); diff --git a/ext/web/10_filereader.js b/ext/web/10_filereader.js index 8a76b2e0f1..49f4babe11 100644 --- a/ext/web/10_filereader.js +++ b/ext/web/10_filereader.js @@ -13,6 +13,7 @@ "use strict"; ((window) => { + const core = window.Deno.core; const webidl = window.__bootstrap.webidl; const { forgivingBase64Encode } = window.__bootstrap.infra; const { ProgressEvent } = window.__bootstrap.event; @@ -21,8 +22,6 @@ const { parseMimeType } = window.__bootstrap.mimesniff; const { DOMException } = window.__bootstrap.domException; const { - ArrayPrototypeJoin, - ArrayPrototypeMap, ArrayPrototypePush, ArrayPrototypeReduce, FunctionPrototypeCall, @@ -33,7 +32,6 @@ ObjectPrototypeIsPrototypeOf, queueMicrotask, SafeArrayIterator, - StringFromCodePoint, Symbol, TypedArrayPrototypeSet, TypeError, @@ -170,13 +168,7 @@ break; } case "BinaryString": - this[result] = ArrayPrototypeJoin( - ArrayPrototypeMap( - [...new Uint8Array(bytes.buffer)], - (v) => StringFromCodePoint(v), - ), - "", - ); + this[result] = core.ops.op_encode_binary_string(bytes); break; case "Text": { let decoder = undefined; diff --git a/ext/web/lib.rs b/ext/web/lib.rs index 85e32b70ae..8a9d3e18c7 100644 --- a/ext/web/lib.rs +++ b/ext/web/lib.rs @@ -94,6 +94,7 @@ pub fn init( op_encoding_new_decoder::decl(), op_encoding_decode::decl(), op_encoding_encode_into::decl(), + op_encode_binary_string::decl(), op_blob_create_part::decl(), op_blob_slice_part::decl(), op_blob_read_part::decl(), @@ -337,6 +338,11 @@ fn op_encoding_encode_into( Ok(()) } +#[op] +fn op_encode_binary_string(s: &[u8]) -> ByteString { + ByteString::from(s) +} + /// Creates a [`CancelHandle`] resource that can be used to cancel invocations of certain ops. #[op(fast)] pub fn op_cancel_handle(state: &mut OpState) -> u32 {