0
0
Fork 0
mirror of https://github.com/denoland/deno.git synced 2025-03-03 17:34:47 -05:00

perf(ext/url): improve URLPattern perf (#21488)

This significantly optimizes URLPattern in the case where the same
URL is matched against many patterns (like in a router).

Also minor speedups to other use-cases.
This commit is contained in:
Luca Casonato 2023-12-08 12:02:52 +01:00 committed by GitHub
parent b24356d9b9
commit e15c735ede
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -12,17 +12,20 @@ const ops = core.ops;
import * as webidl from "ext:deno_webidl/00_webidl.js"; import * as webidl from "ext:deno_webidl/00_webidl.js";
import { createFilteredInspectProxy } from "ext:deno_console/01_console.js"; import { createFilteredInspectProxy } from "ext:deno_console/01_console.js";
const { const {
ArrayPrototypePop, ArrayPrototypePush,
MathRandom,
ObjectAssign,
ObjectCreate,
ObjectPrototypeIsPrototypeOf,
RegExpPrototypeExec, RegExpPrototypeExec,
RegExpPrototypeTest, RegExpPrototypeTest,
ObjectPrototypeIsPrototypeOf, SafeMap,
SafeRegExp, SafeRegExp,
Symbol, Symbol,
SymbolFor, SymbolFor,
TypeError, TypeError,
} = primordials; } = primordials;
const EMPTY_MATCH = [""];
const _components = Symbol("components"); const _components = Symbol("components");
/** /**
@ -54,10 +57,88 @@ const COMPONENTS_KEYS = [
* @property {string[]} groupNameList * @property {string[]} groupNameList
*/ */
/**
* This implements a least-recently-used cache that has a pseudo-"young
* generation" by using sampling. The idea is that we want to keep the most
* recently used items in the cache, but we don't want to pay the cost of
* updating the cache on every access. This relies on the fact that the data
* we're caching is not uniformly distributed, and that the most recently used
* items are more likely to be used again soon (long tail distribution).
*
* The LRU cache is implemented as a Map, with the key being the cache key and
* the value being the cache value. When an item is accessed, it is moved to the
* end of the Map. When an item is inserted, if the Map is at capacity, the
* first item in the Map is deleted. Because maps iterate using insertion order,
* this means that the oldest item is always the first.
*
* The sampling is implemented by using a random number generator to decide
* whether to update the cache on each access. This means that the cache will
* not be updated on every access, but will be updated on a random subset of
* accesses.
*
* @template K
* @template V
*/
class SampledLRUCache {
/** @type {SafeMap<K, V>} */
#map = new SafeMap();
#capacity = 0;
#sampleRate = 0.1;
/** @type {K} */
#lastUsedKey = undefined;
/** @type {V} */
#lastUsedValue = undefined;
/** @param {number} capacity */
constructor(capacity) {
this.#capacity = capacity;
}
/**
* @param {K} key
* @param {(key: K) => V} factory
* @return {V}
*/
getOrInsert(key, factory) {
if (this.#lastUsedKey === key) return this.#lastUsedValue;
const value = this.#map.get(key);
if (value !== undefined) {
if (MathRandom() < this.#sampleRate) {
// put the item into the map
this.#map.delete(key);
this.#map.set(key, value);
}
this.#lastUsedKey = key;
this.#lastUsedValue = value;
return value;
} else {
// value doesn't exist yet, create
const value = factory(key);
if (MathRandom() < this.#sampleRate) {
// if the map is at capacity, delete the oldest (first) element
if (this.#map.size > this.#capacity) {
// deno-lint-ignore prefer-primordials
this.#map.delete(this.#map.keys().next().value);
}
// insert the new value
this.#map.set(key, value);
}
this.#lastUsedKey = key;
this.#lastUsedValue = value;
return value;
}
}
}
const matchInputCache = new SampledLRUCache(4096);
class URLPattern { class URLPattern {
/** @type {Components} */ /** @type {Components} */
[_components]; [_components];
#reusedResult;
/** /**
* @param {URLPatternInput} input * @param {URLPatternInput} input
* @param {string} [baseURL] * @param {string} [baseURL]
@ -80,9 +161,6 @@ class URLPattern {
components[key].regexpString, components[key].regexpString,
"u", "u",
); );
// used for fast path
components[key].matchOnEmptyInput =
components[key].regexpString === "^$";
} catch (e) { } catch (e) {
throw new TypeError(`${prefix}: ${key} is invalid; ${e.message}`); throw new TypeError(`${prefix}: ${key} is invalid; ${e.message}`);
} }
@ -144,20 +222,28 @@ class URLPattern {
baseURL = webidl.converters.USVString(baseURL, prefix, "Argument 2"); baseURL = webidl.converters.USVString(baseURL, prefix, "Argument 2");
} }
const res = ops.op_urlpattern_process_match_input( const res = baseURL === undefined
? matchInputCache.getOrInsert(
input, input,
baseURL, ops.op_urlpattern_process_match_input,
); )
if (res === null) { : ops.op_urlpattern_process_match_input(input, baseURL);
return false; if (res === null) return false;
}
const values = res[0]; const values = res[0];
for (let i = 0; i < COMPONENTS_KEYS.length; ++i) { for (let i = 0; i < COMPONENTS_KEYS.length; ++i) {
const key = COMPONENTS_KEYS[i]; const key = COMPONENTS_KEYS[i];
if (!RegExpPrototypeTest(this[_components][key].regexp, values[key])) { const component = this[_components][key];
return false; switch (component.regexpString) {
case "^$":
if (values[key] !== "") return false;
break;
case "^(.*)$":
break;
default: {
if (!RegExpPrototypeTest(component.regexp, values[key])) return false;
}
} }
} }
@ -178,48 +264,65 @@ class URLPattern {
baseURL = webidl.converters.USVString(baseURL, prefix, "Argument 2"); baseURL = webidl.converters.USVString(baseURL, prefix, "Argument 2");
} }
const res = ops.op_urlpattern_process_match_input( const res = baseURL === undefined
? matchInputCache.getOrInsert(
input, input,
baseURL, ops.op_urlpattern_process_match_input,
); )
: ops.op_urlpattern_process_match_input(input, baseURL);
if (res === null) { if (res === null) {
return null; return null;
} }
const { 0: values, 1: inputs } = res; const { 0: values, 1: inputs } = res; /** @type {URLPatternResult} */
if (inputs[1] === null) {
ArrayPrototypePop(inputs);
}
/** @type {URLPatternResult} */ // globalThis.allocAttempt++;
const result = { inputs }; this.#reusedResult ??= { inputs: [undefined] };
const result = this.#reusedResult;
// We don't construct the `inputs` until after the matching is done under
// the assumption that most patterns do not match.
const components = this[_components];
for (let i = 0; i < COMPONENTS_KEYS.length; ++i) { for (let i = 0; i < COMPONENTS_KEYS.length; ++i) {
const key = COMPONENTS_KEYS[i]; const key = COMPONENTS_KEYS[i];
/** @type {Component} */ /** @type {Component} */
const component = this[_components][key]; const component = components[key];
const input = values[key];
const match = component.matchOnEmptyInput && input === "" const res = result[key] ??= {
? EMPTY_MATCH // fast path input: values[key],
: RegExpPrototypeExec(component.regexp, input); groups: component.regexpString === "^(.*)$" ? { "0": values[key] } : {},
};
if (match === null) { switch (component.regexpString) {
return null; case "^$":
} if (values[key] !== "") return null;
break;
const groups = {}; case "^(.*)$":
res.groups["0"] = values[key];
break;
default: {
const match = RegExpPrototypeExec(component.regexp, values[key]);
if (match === null) return null;
const groupList = component.groupNameList; const groupList = component.groupNameList;
const groups = res.groups;
for (let i = 0; i < groupList.length; ++i) { for (let i = 0; i < groupList.length; ++i) {
// TODO(lucacasonato): this is vulnerable to override mistake
groups[groupList[i]] = match[i + 1] ?? ""; groups[groupList[i]] = match[i + 1] ?? "";
} }
break;
result[key] = { }
input, }
groups, res.input = values[key];
};
} }
// Now populate result.inputs
result.inputs[0] = typeof inputs[0] === "string"
? inputs[0]
: ObjectAssign(ObjectCreate(null), inputs[0]);
if (inputs[1] !== null) ArrayPrototypePush(result.inputs, inputs[1]);
this.#reusedResult = undefined;
return result; return result;
} }