diff --git a/ext/url/01_urlpattern.js b/ext/url/01_urlpattern.js index e0f63b7072..2ab39f51f6 100644 --- a/ext/url/01_urlpattern.js +++ b/ext/url/01_urlpattern.js @@ -12,17 +12,20 @@ const ops = core.ops; import * as webidl from "ext:deno_webidl/00_webidl.js"; import { createFilteredInspectProxy } from "ext:deno_console/01_console.js"; const { - ArrayPrototypePop, + ArrayPrototypePush, + MathRandom, + ObjectAssign, + ObjectCreate, + ObjectPrototypeIsPrototypeOf, RegExpPrototypeExec, RegExpPrototypeTest, - ObjectPrototypeIsPrototypeOf, + SafeMap, SafeRegExp, Symbol, SymbolFor, TypeError, } = primordials; -const EMPTY_MATCH = [""]; const _components = Symbol("components"); /** @@ -54,10 +57,88 @@ const COMPONENTS_KEYS = [ * @property {string[]} groupNameList */ +/** + * This implements a least-recently-used cache that has a pseudo-"young + * generation" by using sampling. The idea is that we want to keep the most + * recently used items in the cache, but we don't want to pay the cost of + * updating the cache on every access. This relies on the fact that the data + * we're caching is not uniformly distributed, and that the most recently used + * items are more likely to be used again soon (long tail distribution). + * + * The LRU cache is implemented as a Map, with the key being the cache key and + * the value being the cache value. When an item is accessed, it is moved to the + * end of the Map. When an item is inserted, if the Map is at capacity, the + * first item in the Map is deleted. Because maps iterate using insertion order, + * this means that the oldest item is always the first. + * + * The sampling is implemented by using a random number generator to decide + * whether to update the cache on each access. This means that the cache will + * not be updated on every access, but will be updated on a random subset of + * accesses. + * + * @template K + * @template V + */ +class SampledLRUCache { + /** @type {SafeMap} */ + #map = new SafeMap(); + #capacity = 0; + #sampleRate = 0.1; + + /** @type {K} */ + #lastUsedKey = undefined; + /** @type {V} */ + #lastUsedValue = undefined; + + /** @param {number} capacity */ + constructor(capacity) { + this.#capacity = capacity; + } + + /** + * @param {K} key + * @param {(key: K) => V} factory + * @return {V} + */ + getOrInsert(key, factory) { + if (this.#lastUsedKey === key) return this.#lastUsedValue; + const value = this.#map.get(key); + if (value !== undefined) { + if (MathRandom() < this.#sampleRate) { + // put the item into the map + this.#map.delete(key); + this.#map.set(key, value); + } + this.#lastUsedKey = key; + this.#lastUsedValue = value; + return value; + } else { + // value doesn't exist yet, create + const value = factory(key); + if (MathRandom() < this.#sampleRate) { + // if the map is at capacity, delete the oldest (first) element + if (this.#map.size > this.#capacity) { + // deno-lint-ignore prefer-primordials + this.#map.delete(this.#map.keys().next().value); + } + // insert the new value + this.#map.set(key, value); + } + this.#lastUsedKey = key; + this.#lastUsedValue = value; + return value; + } + } +} + +const matchInputCache = new SampledLRUCache(4096); + class URLPattern { /** @type {Components} */ [_components]; + #reusedResult; + /** * @param {URLPatternInput} input * @param {string} [baseURL] @@ -80,9 +161,6 @@ class URLPattern { components[key].regexpString, "u", ); - // used for fast path - components[key].matchOnEmptyInput = - components[key].regexpString === "^$"; } catch (e) { throw new TypeError(`${prefix}: ${key} is invalid; ${e.message}`); } @@ -144,20 +222,28 @@ class URLPattern { baseURL = webidl.converters.USVString(baseURL, prefix, "Argument 2"); } - const res = ops.op_urlpattern_process_match_input( - input, - baseURL, - ); - if (res === null) { - return false; - } + const res = baseURL === undefined + ? matchInputCache.getOrInsert( + input, + ops.op_urlpattern_process_match_input, + ) + : ops.op_urlpattern_process_match_input(input, baseURL); + if (res === null) return false; const values = res[0]; for (let i = 0; i < COMPONENTS_KEYS.length; ++i) { const key = COMPONENTS_KEYS[i]; - if (!RegExpPrototypeTest(this[_components][key].regexp, values[key])) { - return false; + const component = this[_components][key]; + switch (component.regexpString) { + case "^$": + if (values[key] !== "") return false; + break; + case "^(.*)$": + break; + default: { + if (!RegExpPrototypeTest(component.regexp, values[key])) return false; + } } } @@ -178,48 +264,65 @@ class URLPattern { baseURL = webidl.converters.USVString(baseURL, prefix, "Argument 2"); } - const res = ops.op_urlpattern_process_match_input( - input, - baseURL, - ); + const res = baseURL === undefined + ? matchInputCache.getOrInsert( + input, + ops.op_urlpattern_process_match_input, + ) + : ops.op_urlpattern_process_match_input(input, baseURL); if (res === null) { return null; } - const { 0: values, 1: inputs } = res; - if (inputs[1] === null) { - ArrayPrototypePop(inputs); - } + const { 0: values, 1: inputs } = res; /** @type {URLPatternResult} */ - /** @type {URLPatternResult} */ - const result = { inputs }; + // globalThis.allocAttempt++; + this.#reusedResult ??= { inputs: [undefined] }; + const result = this.#reusedResult; + // We don't construct the `inputs` until after the matching is done under + // the assumption that most patterns do not match. + + const components = this[_components]; for (let i = 0; i < COMPONENTS_KEYS.length; ++i) { const key = COMPONENTS_KEYS[i]; /** @type {Component} */ - const component = this[_components][key]; - const input = values[key]; + const component = components[key]; - const match = component.matchOnEmptyInput && input === "" - ? EMPTY_MATCH // fast path - : RegExpPrototypeExec(component.regexp, input); - - if (match === null) { - return null; - } - - const groups = {}; - const groupList = component.groupNameList; - for (let i = 0; i < groupList.length; ++i) { - groups[groupList[i]] = match[i + 1] ?? ""; - } - - result[key] = { - input, - groups, + const res = result[key] ??= { + input: values[key], + groups: component.regexpString === "^(.*)$" ? { "0": values[key] } : {}, }; + + switch (component.regexpString) { + case "^$": + if (values[key] !== "") return null; + break; + case "^(.*)$": + res.groups["0"] = values[key]; + break; + default: { + const match = RegExpPrototypeExec(component.regexp, values[key]); + if (match === null) return null; + const groupList = component.groupNameList; + const groups = res.groups; + for (let i = 0; i < groupList.length; ++i) { + // TODO(lucacasonato): this is vulnerable to override mistake + groups[groupList[i]] = match[i + 1] ?? ""; + } + break; + } + } + res.input = values[key]; } + // Now populate result.inputs + result.inputs[0] = typeof inputs[0] === "string" + ? inputs[0] + : ObjectAssign(ObjectCreate(null), inputs[0]); + if (inputs[1] !== null) ArrayPrototypePush(result.inputs, inputs[1]); + + this.#reusedResult = undefined; return result; }