1
0
Fork 0
mirror of https://github.com/denoland/deno.git synced 2025-01-24 08:00:10 -05:00
denoland-deno/ext/node/polyfills/internal_binding/_utils.ts
Aapo Alasuutari 9d6584c16f
perf(ext/node): Optimise Buffer string operations (#20158)
Extracted from https://github.com/denoland/deno/pull/17815

Optimise Buffer's string operations, most significantly when dealing
with ASCII and UTF-16. Base64 and HEX encodings are affected to much
lesser degrees.

## Performance

### String length 15
With very small strings we're at break-even or sometimes even lose a tad
bit of performance from creating a `DataView` that ends up not paying
for itself.

**This PR:**
```
benchmark                                                     time (avg)        iter/s             (min … max)       p75       p99      p995
-------------------------------------------------------------------------------------------------------------- -----------------------------
Buffer.from ascii string                                       1.15 µs/iter     871,388.6   (728.78 ns … 1.56 µs)   1.23 µs   1.56 µs   1.56 µs
Buffer.from base64 string                                      1.63 µs/iter     612,790.9     (1.31 µs … 1.96 µs)   1.77 µs   1.96 µs   1.96 µs
Buffer.from utf16 string                                       1.41 µs/iter     707,396.3   (915.24 ns … 1.93 µs)   1.61 µs   1.93 µs   1.93 µs
Buffer.from hex string                                         1.87 µs/iter     535,357.9     (1.56 µs … 2.19 µs)      2 µs   2.19 µs   2.19 µs
Buffer.toString ascii string                                 154.58 ns/iter   6,469,162.8    (149.69 ns … 198 ns) 154.51 ns 182.89 ns 191.91 ns
Buffer.toString base64 string                                161.65 ns/iter   6,186,189.6 (150.91 ns … 181.15 ns) 165.18 ns 171.87 ns 174.94 ns
Buffer.toString utf16 string                                 292.74 ns/iter   3,415,959.8 (285.43 ns … 312.47 ns) 295.25 ns 310.47 ns 312.47 ns
Buffer.toString hex string                                    89.61 ns/iter  11,159,315.6  (81.09 ns … 123.77 ns)  91.09 ns 113.62 ns 119.28 ns
```

**Main:**
```
benchmark                                                     time (avg)        iter/s             (min … max)       p75       p99      p995
-------------------------------------------------------------------------------------------------------------- -----------------------------
Buffer.from ascii string                                       1.26 µs/iter     794,875.8     (1.07 µs … 1.46 µs)   1.31 µs   1.46 µs   1.46 µs
Buffer.from base64 string                                      1.65 µs/iter     607,853.3     (1.38 µs … 2.01 µs)   1.69 µs   2.01 µs   2.01 µs
Buffer.from utf16 string                                       1.34 µs/iter     744,894.6     (1.09 µs … 1.55 µs)   1.45 µs   1.55 µs   1.55 µs
Buffer.from hex string                                         2.01 µs/iter     496,345.8      (1.54 µs … 2.6 µs)   2.26 µs    2.6 µs    2.6 µs
Buffer.toString ascii string                                 150.16 ns/iter   6,659,630.5 (144.99 ns … 166.68 ns)  152.4 ns 157.26 ns 159.14 ns
Buffer.toString base64 string                                164.73 ns/iter   6,070,692.0 (158.77 ns … 185.63 ns) 168.48 ns 175.74 ns 176.68 ns
Buffer.toString utf16 string                                 150.61 ns/iter   6,639,864.0  (148.2 ns … 168.29 ns) 150.93 ns 157.21 ns 168.15 ns
Buffer.toString hex string                                    94.21 ns/iter  10,614,972.9   (86.21 ns … 98.75 ns)  95.43 ns  97.99 ns  98.21 ns
```

### String length 1500
With moderate lengths we already see great upsides for `Buffer.from()`
with ASCII and UTF-16.

**This PR:**
```
benchmark                                                     time (avg)        iter/s             (min … max)       p75       p99      p995
-------------------------------------------------------------------------------------------------------------- -----------------------------
Buffer.from ascii string                                       5.79 µs/iter     172,562.6     (4.72 µs … 4.71 ms)   5.04 µs   10.3 µs  11.67 µs
Buffer.from base64 string                                      5.08 µs/iter     196,678.9     (4.97 µs … 5.76 µs)   5.08 µs   5.76 µs   5.76 µs
Buffer.from utf16 string                                       9.68 µs/iter     103,316.5     (7.14 µs … 3.44 ms)  10.32 µs  13.42 µs  15.21 µs
Buffer.from hex string                                         53.7 µs/iter      18,620.2     (49.37 µs … 2.2 ms)  54.74 µs   72.2 µs  81.07 µs
Buffer.toString ascii string                                   6.63 µs/iter     150,761.3     (5.59 µs … 1.11 ms)   6.08 µs  15.68 µs  24.77 µs
Buffer.toString base64 string                                460.57 ns/iter   2,171,224.4 (448.33 ns … 511.73 ns) 465.05 ns 495.54 ns 511.73 ns
Buffer.toString utf16 string                                   6.52 µs/iter     153,287.0     (6.47 µs … 6.66 µs)   6.53 µs   6.66 µs   6.66 µs
Buffer.toString hex string                                     3.68 µs/iter     271,965.4     (3.64 µs … 3.82 µs)   3.68 µs   3.82 µs   3.82 µs
```

**Main:**
```
benchmark                                                     time (avg)        iter/s             (min … max)       p75       p99      p995
-------------------------------------------------------------------------------------------------------------- -----------------------------
Buffer.from ascii string                                      11.46 µs/iter      87,298.1    (8.53 µs … 834.1 µs)   9.61 µs  83.31 µs   87.3 µs
Buffer.from base64 string                                       5.4 µs/iter     185,027.8     (5.07 µs … 7.49 µs)   5.44 µs   7.49 µs   7.49 µs
Buffer.from utf16 string                                       20.3 µs/iter      49,270.8  (13.55 µs … 649.11 µs)   18.8 µs 113.93 µs 125.17 µs
Buffer.from hex string                                        52.03 µs/iter      19,218.9    (48.74 µs … 2.59 ms)  52.84 µs  67.05 µs  73.56 µs
Buffer.toString ascii string                                   6.46 µs/iter     154,822.5     (6.32 µs … 6.69 µs)   6.52 µs   6.69 µs   6.69 µs
Buffer.toString base64 string                                440.19 ns/iter   2,271,764.6    (427 ns … 490.77 ns) 444.74 ns 484.64 ns 490.77 ns
Buffer.toString utf16 string                                   6.89 µs/iter     145,106.7     (6.81 µs … 7.24 µs)   6.91 µs   7.24 µs   7.24 µs
Buffer.toString hex string                                     3.66 µs/iter     273,456.5      (3.6 µs … 4.02 µs)   3.64 µs   4.02 µs   4.02 µs
```

### String length 2^20
With massive lengths we the difference in ASCII and UTF-16 parsing
performance is enormous.

**This PR:**
```
benchmark                                                           time (avg)        iter/s             (min … max)       p75       p99      p995
-------------------------------------------------------------------------------------------------------------------- -----------------------------
Buffer.from ascii string                                              4.1 ms/iter         243.7     (2.64 ms … 6.74 ms)   4.43 ms   6.26 ms   6.74 ms
Buffer.from base64 string                                            3.74 ms/iter         267.6     (2.91 ms … 4.92 ms)   3.96 ms   4.31 ms   4.92 ms
Buffer.from utf16 string                                             7.72 ms/iter         129.5    (5.91 ms … 11.03 ms)   7.97 ms  11.03 ms  11.03 ms
Buffer.from hex string                                              35.72 ms/iter          28.0   (34.71 ms … 38.42 ms)  35.93 ms  38.42 ms  38.42 ms
Buffer.toString ascii string                                        78.92 ms/iter          12.7   (42.72 ms … 94.13 ms)  91.64 ms  94.13 ms  94.13 ms
Buffer.toString base64 string                                      833.62 µs/iter       1,199.6   (638.05 µs … 5.97 ms) 826.86 µs   2.45 ms   2.48 ms
Buffer.toString utf16 string                                        79.35 ms/iter          12.6    (69.72 ms … 88.9 ms)  86.66 ms   88.9 ms   88.9 ms
Buffer.toString hex string                                          31.04 ms/iter          32.2      (4.3 ms … 46.9 ms)  37.21 ms   46.9 ms   46.9 ms
```

**Main:**
```
benchmark                                                           time (avg)        iter/s             (min … max)       p75       p99      p995
-------------------------------------------------------------------------------------------------------------------- -----------------------------
Buffer.from ascii string                                            18.66 ms/iter          53.6   (15.61 ms … 23.26 ms)  20.62 ms  23.26 ms  23.26 ms
Buffer.from base64 string                                             4.7 ms/iter         212.9     (2.94 ms … 9.07 ms)   4.65 ms   9.06 ms   9.07 ms
Buffer.from utf16 string                                            33.49 ms/iter          29.9   (31.24 ms … 35.67 ms)  34.08 ms  35.67 ms  35.67 ms
Buffer.from hex string                                              39.38 ms/iter          25.4   (38.66 ms … 42.36 ms)  39.58 ms  42.36 ms  42.36 ms
Buffer.toString ascii string                                        77.68 ms/iter          12.9   (67.46 ms … 95.68 ms)  84.71 ms  95.68 ms  95.68 ms
Buffer.toString base64 string                                      825.53 µs/iter       1,211.3   (655.38 µs … 6.69 ms) 816.62 µs   3.07 ms   3.13 ms
Buffer.toString utf16 string                                        76.54 ms/iter          13.1    (66.9 ms … 85.26 ms)  83.63 ms  85.26 ms  85.26 ms
Buffer.toString hex string                                          38.56 ms/iter          25.9   (33.83 ms … 46.56 ms)  45.33 ms  46.56 ms  46.56 ms
```
2023-09-07 14:41:16 -06:00

108 lines
3.5 KiB
TypeScript

// Copyright 2018-2023 the Deno authors. All rights reserved. MIT license.
// TODO(petamoriken): enable prefer-primordials for node polyfills
// deno-lint-ignore-file prefer-primordials
import {
forgivingBase64Decode,
forgivingBase64UrlEncode,
} from "ext:deno_web/00_infra.js";
export function asciiToBytes(str: string) {
const length = str.length;
const byteArray = new Uint8Array(length);
for (let i = 0; i < length; ++i) {
byteArray[i] = str.charCodeAt(i) & 255;
}
return byteArray;
}
export function base64ToBytes(str: string) {
str = base64clean(str);
str = str.replaceAll("-", "+").replaceAll("_", "/");
return forgivingBase64Decode(str);
}
const INVALID_BASE64_RE = /[^+/0-9A-Za-z-_]/g;
function base64clean(str: string) {
// Node takes equal signs as end of the Base64 encoding
const eqIndex = str.indexOf("=");
str = eqIndex !== -1 ? str.substring(0, eqIndex).trimStart() : str.trim();
// Node strips out invalid characters like \n and \t from the string, std/base64 does not
str = str.replace(INVALID_BASE64_RE, "");
// Node converts strings with length < 2 to ''
const length = str.length;
if (length < 2) return "";
// Node allows for non-padded base64 strings (missing trailing ===), std/base64 does not
switch (length % 4) {
case 0:
return str;
case 1:
return `${str}===`;
case 2:
return `${str}==`;
case 3:
return `${str}=`;
default:
throw new Error("Unexpected NaN value for string length");
}
}
export function base64UrlToBytes(str: string) {
str = base64clean(str);
str = str.replaceAll("+", "-").replaceAll("/", "_");
return forgivingBase64UrlEncode(str);
}
export function hexToBytes(str: string) {
const length = str.length >>> 1;
const byteArray = new Uint8Array(length);
let i: number;
for (i = 0; i < length; i++) {
const a = Number.parseInt(str[i * 2], 16);
const b = Number.parseInt(str[i * 2 + 1], 16);
if (Number.isNaN(a) && Number.isNaN(b)) {
break;
}
byteArray[i] = (a << 4) | b;
}
// Returning a buffer subarray is okay: This API's return value
// is never exposed to users and is only ever used for its length
// and the data within the subarray.
return i === length ? byteArray : byteArray.subarray(0, i);
}
export function utf16leToBytes(str: string, units?: number) {
// If units is defined, round it to even values for 16 byte "steps"
// and use it as an upper bound value for our string byte array's length.
const length = Math.min(str.length * 2, units ? (units >>> 1) * 2 : Infinity);
const byteArray = new Uint8Array(length);
const view = new DataView(byteArray.buffer);
let i: number;
for (i = 0; i * 2 < length; i++) {
view.setUint16(i * 2, str.charCodeAt(i), true);
}
// Returning a buffer subarray is okay: This API's return value
// is never exposed to users and is only ever used for its length
// and the data within the subarray.
return i * 2 === length ? byteArray : byteArray.subarray(0, i * 2);
}
export function bytesToAscii(bytes: Uint8Array) {
let res = "";
const length = bytes.byteLength;
for (let i = 0; i < length; ++i) {
res = `${res}${String.fromCharCode(bytes[i] & 127)}`;
}
return res;
}
export function bytesToUtf16le(bytes: Uint8Array) {
let res = "";
const length = bytes.byteLength;
const view = new DataView(bytes.buffer, bytes.byteOffset, length);
for (let i = 0; i < length - 1; i += 2) {
res = `${res}${String.fromCharCode(view.getUint16(i, true))}`;
}
return res;
}