mirror of
https://github.com/denoland/deno.git
synced 2025-03-03 09:31:22 -05:00
perf: Optimize TextEncoder and TextDecoder (#4430)
* add tests for "Deno.core.encode" and "Deno.core.decode" for empty inputs * use "Deno.core.encode" in "TextEncoder" * use "Deno.core.decode" in "TextDecoder" * remove "core_decode" and "core_encode" benchmarks
This commit is contained in:
parent
392d2c1118
commit
87d2ba42bf
7 changed files with 32 additions and 156 deletions
|
@ -1,80 +0,0 @@
|
|||
// Copyright 2018-2020 the Deno authors. All rights reserved. MIT license.
|
||||
// The following code is based off:
|
||||
// https://github.com/samthor/fast-text-encoding
|
||||
//
|
||||
// Copyright 2017 Sam Thorogood. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not
|
||||
// use this file except in compliance with the License. You may obtain a copy of
|
||||
// the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
// License for the specific language governing permissions and limitations under
|
||||
// the License.
|
||||
//
|
||||
|
||||
export function encodeUtf8(input: string): Uint8Array {
|
||||
let pos = 0;
|
||||
const len = input.length;
|
||||
|
||||
let at = 0; // output position
|
||||
let tlen = Math.max(32, len + (len >> 1) + 7); // 1.5x size
|
||||
let target = new Uint8Array((tlen >> 3) << 3); // ... but at 8 byte offset
|
||||
|
||||
while (pos < len) {
|
||||
let value = input.charCodeAt(pos++);
|
||||
if (value >= 0xd800 && value <= 0xdbff) {
|
||||
// high surrogate
|
||||
if (pos < len) {
|
||||
const extra = input.charCodeAt(pos);
|
||||
if ((extra & 0xfc00) === 0xdc00) {
|
||||
++pos;
|
||||
value = ((value & 0x3ff) << 10) + (extra & 0x3ff) + 0x10000;
|
||||
}
|
||||
}
|
||||
if (value >= 0xd800 && value <= 0xdbff) {
|
||||
continue; // drop lone surrogate
|
||||
}
|
||||
}
|
||||
|
||||
// expand the buffer if we couldn't write 4 bytes
|
||||
if (at + 4 > target.length) {
|
||||
tlen += 8; // minimum extra
|
||||
tlen *= 1.0 + (pos / input.length) * 2; // take 2x the remaining
|
||||
tlen = (tlen >> 3) << 3; // 8 byte offset
|
||||
|
||||
const update = new Uint8Array(tlen);
|
||||
update.set(target);
|
||||
target = update;
|
||||
}
|
||||
|
||||
if ((value & 0xffffff80) === 0) {
|
||||
// 1-byte
|
||||
target[at++] = value; // ASCII
|
||||
continue;
|
||||
} else if ((value & 0xfffff800) === 0) {
|
||||
// 2-byte
|
||||
target[at++] = ((value >> 6) & 0x1f) | 0xc0;
|
||||
} else if ((value & 0xffff0000) === 0) {
|
||||
// 3-byte
|
||||
target[at++] = ((value >> 12) & 0x0f) | 0xe0;
|
||||
target[at++] = ((value >> 6) & 0x3f) | 0x80;
|
||||
} else if ((value & 0xffe00000) === 0) {
|
||||
// 4-byte
|
||||
target[at++] = ((value >> 18) & 0x07) | 0xf0;
|
||||
target[at++] = ((value >> 12) & 0x3f) | 0x80;
|
||||
target[at++] = ((value >> 6) & 0x3f) | 0x80;
|
||||
} else {
|
||||
// FIXME: do we care
|
||||
continue;
|
||||
}
|
||||
|
||||
target[at++] = (value & 0x3f) | 0x80;
|
||||
}
|
||||
|
||||
return target.slice(0, at);
|
||||
}
|
|
@ -26,7 +26,7 @@
|
|||
import * as base64 from "./base64.ts";
|
||||
import { decodeUtf8 } from "./decode_utf8.ts";
|
||||
import * as domTypes from "./dom_types.ts";
|
||||
import { encodeUtf8 } from "./encode_utf8.ts";
|
||||
import { core } from "../core.ts";
|
||||
|
||||
const CONTINUE = null;
|
||||
const END_OF_STREAM = -1;
|
||||
|
@ -352,6 +352,15 @@ export class TextDecoder {
|
|||
bytes = new Uint8Array(0);
|
||||
}
|
||||
|
||||
// For simple utf-8 decoding "Deno.core.decode" can be used for performance
|
||||
if (
|
||||
this._encoding === "utf-8" &&
|
||||
this.fatal === false &&
|
||||
this.ignoreBOM === false
|
||||
) {
|
||||
return core.decode(bytes);
|
||||
}
|
||||
|
||||
// For performance reasons we utilise a highly optimised decoder instead of
|
||||
// the general decoder.
|
||||
if (this._encoding === "utf-8") {
|
||||
|
@ -396,10 +405,9 @@ interface TextEncoderEncodeIntoResult {
|
|||
export class TextEncoder {
|
||||
readonly encoding = "utf-8";
|
||||
encode(input = ""): Uint8Array {
|
||||
// For performance reasons we utilise a highly optimised decoder instead of
|
||||
// the general decoder.
|
||||
// Deno.core.encode() provides very efficient utf-8 encoding
|
||||
if (this.encoding === "utf-8") {
|
||||
return encodeUtf8(input);
|
||||
return core.encode(input);
|
||||
}
|
||||
|
||||
const encoder = new UTF8Encoder();
|
||||
|
|
|
@ -1,37 +0,0 @@
|
|||
const mixed = new TextEncoder().encode("@Ā๐😀");
|
||||
|
||||
function generateRandom(bytes) {
|
||||
const result = new Uint8Array(bytes);
|
||||
let i = 0;
|
||||
while (i < bytes) {
|
||||
const toAdd = Math.floor(Math.random() * Math.min(4, bytes - i));
|
||||
switch (toAdd) {
|
||||
case 0:
|
||||
result[i] = mixed[0];
|
||||
i++;
|
||||
break;
|
||||
case 1:
|
||||
result[i] = mixed[1];
|
||||
result[i + 1] = mixed[2];
|
||||
i += 2;
|
||||
break;
|
||||
case 2:
|
||||
result[i] = mixed[3];
|
||||
result[i + 1] = mixed[4];
|
||||
result[i + 2] = mixed[5];
|
||||
i += 3;
|
||||
break;
|
||||
case 3:
|
||||
result[i] = mixed[6];
|
||||
result[i + 1] = mixed[7];
|
||||
result[i + 2] = mixed[8];
|
||||
result[i + 3] = mixed[9];
|
||||
i += 4;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
const randomData = generateRandom(1024);
|
||||
for (let i = 0; i < 10_000; i++) Deno.core.decode(randomData);
|
|
@ -1,32 +0,0 @@
|
|||
const mixed = "@Ā๐😀";
|
||||
|
||||
function generateRandom(bytes) {
|
||||
let result = "";
|
||||
let i = 0;
|
||||
while (i < bytes) {
|
||||
const toAdd = Math.floor(Math.random() * Math.min(4, bytes - i));
|
||||
switch (toAdd) {
|
||||
case 0:
|
||||
result += mixed[0];
|
||||
i++;
|
||||
break;
|
||||
case 1:
|
||||
result += mixed[1];
|
||||
i++;
|
||||
break;
|
||||
case 2:
|
||||
result += mixed[2];
|
||||
i++;
|
||||
break;
|
||||
case 3:
|
||||
result += mixed[3];
|
||||
result += mixed[4];
|
||||
i += 2;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
const randomData = generateRandom(1024);
|
||||
for (let i = 0; i < 10_000; i++) Deno.core.encode(randomData);
|
|
@ -632,7 +632,20 @@ fn encode(
|
|||
};
|
||||
let text_str = text.to_rust_string_lossy(scope);
|
||||
let text_bytes = text_str.as_bytes().to_vec().into_boxed_slice();
|
||||
let buf = boxed_slice_to_uint8array(scope, text_bytes);
|
||||
|
||||
let buf = if text_bytes.is_empty() {
|
||||
let ab = v8::ArrayBuffer::new(scope, 0);
|
||||
v8::Uint8Array::new(ab, 0, 0).expect("Failed to create UintArray8")
|
||||
} else {
|
||||
let buf_len = text_bytes.len();
|
||||
let backing_store =
|
||||
v8::ArrayBuffer::new_backing_store_from_boxed_slice(text_bytes);
|
||||
let mut backing_store_shared = backing_store.make_shared();
|
||||
let ab =
|
||||
v8::ArrayBuffer::with_backing_store(scope, &mut backing_store_shared);
|
||||
v8::Uint8Array::new(ab, 0, buf_len).expect("Failed to create UintArray8")
|
||||
};
|
||||
|
||||
rv.set(buf.into())
|
||||
}
|
||||
|
||||
|
|
|
@ -27,12 +27,18 @@ function main() {
|
|||
108, 100
|
||||
];
|
||||
|
||||
const empty = Deno.core.encode("");
|
||||
if (empty.length !== 0) throw new Error("assert");
|
||||
|
||||
assertArrayEquals(Array.from(Deno.core.encode("𝓽𝓮𝔁𝓽")), fixture1);
|
||||
assertArrayEquals(
|
||||
Array.from(Deno.core.encode("Hello \udc12\ud834 World")),
|
||||
fixture2
|
||||
);
|
||||
|
||||
const emptyBuf = Deno.core.decode(new Uint8Array(0));
|
||||
if (emptyBuf !== "") throw new Error("assert");
|
||||
|
||||
assert(Deno.core.decode(new Uint8Array(fixture1)) === "𝓽𝓮𝔁𝓽");
|
||||
assert(Deno.core.decode(new Uint8Array(fixture2)) === "Hello <20><> World");
|
||||
}
|
||||
|
|
|
@ -28,9 +28,7 @@ exec_time_benchmarks = [
|
|||
("workers_startup", ["cli/tests/workers_startup_bench.ts"]),
|
||||
("workers_round_robin", ["cli/tests/workers_round_robin_bench.ts"]),
|
||||
("text_decoder", ["cli/tests/text_decoder_perf.js"]),
|
||||
("core_decode", ["cli/tests/core_decode_perf.js"]),
|
||||
("text_encoder", ["cli/tests/text_encoder_perf.js"]),
|
||||
("core_encode", ["cli/tests/core_encode_perf.js"]),
|
||||
]
|
||||
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue