mirror of
https://github.com/denoland/deno.git
synced 2025-03-03 09:31:22 -05:00
fix(URL): Implement spec-compliant host parsing (#6689)
This commit is contained in:
parent
39dba12a06
commit
69e0886362
9 changed files with 160 additions and 49 deletions
1
Cargo.lock
generated
1
Cargo.lock
generated
|
@ -393,6 +393,7 @@ dependencies = [
|
|||
"futures 0.3.5",
|
||||
"fwdansi",
|
||||
"http",
|
||||
"idna",
|
||||
"indexmap",
|
||||
"lazy_static",
|
||||
"libc",
|
||||
|
|
|
@ -33,6 +33,7 @@ dlopen = "0.1.8"
|
|||
dprint-plugin-typescript = "0.19.5"
|
||||
futures = { version = "0.3.5", features = ["compat", "io-compat"] }
|
||||
http = "0.2.1"
|
||||
idna = "0.2.0"
|
||||
indexmap = "1.4.0"
|
||||
lazy_static = "1.4.0"
|
||||
libc = "0.2.71"
|
||||
|
|
12
cli/js/ops/idna.ts
Normal file
12
cli/js/ops/idna.ts
Normal file
|
@ -0,0 +1,12 @@
|
|||
// Copyright 2018-2020 the Deno authors. All rights reserved. MIT license.
|
||||
|
||||
/** https://url.spec.whatwg.org/#idna */
|
||||
|
||||
import { sendSync } from "./dispatch_json.ts";
|
||||
|
||||
export function domainToAscii(
|
||||
domain: string,
|
||||
{ beStrict = false }: { beStrict?: boolean } = {}
|
||||
): string {
|
||||
return sendSync("op_domain_to_ascii", { domain, beStrict });
|
||||
}
|
|
@ -1,11 +1,14 @@
|
|||
// Copyright 2018-2020 the Deno authors. All rights reserved. MIT license.
|
||||
import { build } from "../build.ts";
|
||||
import { getRandomValues } from "../ops/get_random_values.ts";
|
||||
import { domainToAscii } from "../ops/idna.ts";
|
||||
import { customInspect } from "./console.ts";
|
||||
import { TextEncoder } from "./text_encoding.ts";
|
||||
import { urls } from "./url_search_params.ts";
|
||||
|
||||
interface URLParts {
|
||||
protocol: string;
|
||||
slashes: string;
|
||||
username: string;
|
||||
password: string;
|
||||
hostname: string;
|
||||
|
@ -57,7 +60,9 @@ function parse(url: string, isBase = true): URLParts | undefined {
|
|||
if (isBase && parts.protocol == "") {
|
||||
return undefined;
|
||||
}
|
||||
const isSpecial = specialSchemes.includes(parts.protocol);
|
||||
if (parts.protocol == "file") {
|
||||
parts.slashes = "//";
|
||||
parts.username = "";
|
||||
parts.password = "";
|
||||
[parts.hostname, restUrl] = takePattern(restUrl, /^[/\\]{2}([^/\\?#]*)/);
|
||||
|
@ -68,7 +73,8 @@ function parse(url: string, isBase = true): URLParts | undefined {
|
|||
// equivalent to: `new URL("file://localhost/foo/bar")`.
|
||||
[parts.hostname, restUrl] = takePattern(restUrl, /^[/\\]{2,}([^/\\?#]*)/);
|
||||
}
|
||||
} else if (specialSchemes.includes(parts.protocol)) {
|
||||
} else if (isSpecial) {
|
||||
parts.slashes = "//";
|
||||
let restAuthority;
|
||||
[restAuthority, restUrl] = takePattern(restUrl, /^[/\\]{2,}([^/\\?#]+)/);
|
||||
if (isBase && restAuthority == "") {
|
||||
|
@ -92,17 +98,18 @@ function parse(url: string, isBase = true): URLParts | undefined {
|
|||
return undefined;
|
||||
}
|
||||
} else {
|
||||
[parts.slashes, restUrl] = takePattern(restUrl, /^([/\\]{2})/);
|
||||
parts.username = "";
|
||||
parts.password = "";
|
||||
parts.hostname = "";
|
||||
if (parts.slashes) {
|
||||
[parts.hostname, restUrl] = takePattern(restUrl, /^([^/\\?#]*)/);
|
||||
} else {
|
||||
parts.hostname = "";
|
||||
}
|
||||
parts.port = "";
|
||||
}
|
||||
try {
|
||||
const IPv6re = /^\[[0-9a-fA-F.:]{2,}\]$/;
|
||||
if (!IPv6re.test(parts.hostname)) {
|
||||
parts.hostname = encodeHostname(parts.hostname); // Non-IPv6 URLs
|
||||
}
|
||||
parts.hostname = parts.hostname.toLowerCase();
|
||||
parts.hostname = encodeHostname(parts.hostname, isSpecial);
|
||||
} catch {
|
||||
return undefined;
|
||||
}
|
||||
|
@ -298,7 +305,8 @@ export class URLImpl implements URL {
|
|||
set hostname(value: string) {
|
||||
value = String(value);
|
||||
try {
|
||||
parts.get(this)!.hostname = encodeHostname(value);
|
||||
const isSpecial = specialSchemes.includes(parts.get(this)!.protocol);
|
||||
parts.get(this)!.hostname = encodeHostname(value, isSpecial);
|
||||
} catch {}
|
||||
}
|
||||
|
||||
|
@ -307,11 +315,9 @@ export class URLImpl implements URL {
|
|||
this.username || this.password
|
||||
? `${this.username}${this.password ? ":" + this.password : ""}@`
|
||||
: "";
|
||||
let slash = "";
|
||||
if (this.host || this.protocol === "file:") {
|
||||
slash = "//";
|
||||
}
|
||||
return `${this.protocol}${slash}${authentication}${this.host}${this.pathname}${this.search}${this.hash}`;
|
||||
return `${this.protocol}${parts.get(this)!.slashes}${authentication}${
|
||||
this.host
|
||||
}${this.pathname}${this.search}${this.hash}`;
|
||||
}
|
||||
|
||||
set href(value: string) {
|
||||
|
@ -427,6 +433,7 @@ export class URLImpl implements URL {
|
|||
} else if (baseParts) {
|
||||
parts.set(this, {
|
||||
protocol: baseParts.protocol,
|
||||
slashes: baseParts.slashes,
|
||||
username: baseParts.username,
|
||||
password: baseParts.password,
|
||||
hostname: baseParts.hostname,
|
||||
|
@ -479,7 +486,7 @@ export class URLImpl implements URL {
|
|||
}
|
||||
|
||||
function charInC0ControlSet(c: string): boolean {
|
||||
return c >= "\u0000" && c <= "\u001F";
|
||||
return (c >= "\u0000" && c <= "\u001F") || c > "\u007E";
|
||||
}
|
||||
|
||||
function charInSearchSet(c: string): boolean {
|
||||
|
@ -503,20 +510,72 @@ function charInUserinfoSet(c: string): boolean {
|
|||
return charInPathSet(c) || ["\u0027", "\u002F", "\u003A", "\u003B", "\u003D", "\u0040", "\u005B", "\u005C", "\u005D", "\u005E", "\u007C"].includes(c);
|
||||
}
|
||||
|
||||
function charIsForbiddenInHost(c: string): boolean {
|
||||
// prettier-ignore
|
||||
return ["\u0000", "\u0009", "\u000A", "\u000D", "\u0020", "\u0023", "\u0025", "\u002F", "\u003A", "\u003C", "\u003E", "\u003F", "\u0040", "\u005B", "\u005C", "\u005D", "\u005E"].includes(c);
|
||||
}
|
||||
|
||||
const encoder = new TextEncoder();
|
||||
|
||||
function encodeChar(c: string): string {
|
||||
return `%${c.charCodeAt(0).toString(16)}`.toUpperCase();
|
||||
return [...encoder.encode(c)]
|
||||
.map((n) => `%${n.toString(16)}`)
|
||||
.join("")
|
||||
.toUpperCase();
|
||||
}
|
||||
|
||||
function encodeUserinfo(s: string): string {
|
||||
return [...s].map((c) => (charInUserinfoSet(c) ? encodeChar(c) : c)).join("");
|
||||
}
|
||||
|
||||
function encodeHostname(s: string): string {
|
||||
// FIXME: https://url.spec.whatwg.org/#idna
|
||||
if (s.includes(":")) {
|
||||
function encodeHostname(s: string, isSpecial = true): string {
|
||||
// IPv6 parsing.
|
||||
if (s.startsWith("[") && s.endsWith("]")) {
|
||||
if (!s.match(/^\[[0-9A-Fa-f.:]{2,}\]$/)) {
|
||||
throw new TypeError("Invalid hostname.");
|
||||
}
|
||||
return s.toLowerCase();
|
||||
}
|
||||
|
||||
let result = s;
|
||||
|
||||
if (!isSpecial) {
|
||||
// Check against forbidden host code points except for "%".
|
||||
for (const c of result) {
|
||||
if (charIsForbiddenInHost(c) && c != "\u0025") {
|
||||
throw new TypeError("Invalid hostname.");
|
||||
}
|
||||
}
|
||||
|
||||
// Percent-encode C0 control set.
|
||||
result = [...result]
|
||||
.map((c) => (charInC0ControlSet(c) ? encodeChar(c) : c))
|
||||
.join("");
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// Percent-decode.
|
||||
if (result.match(/%(?![0-9A-Fa-f]{2})/) != null) {
|
||||
throw new TypeError("Invalid hostname.");
|
||||
}
|
||||
return encodeURIComponent(s);
|
||||
result = result.replace(/%(.{2})/g, (_, hex) =>
|
||||
String.fromCodePoint(Number(`0x${hex}`))
|
||||
);
|
||||
|
||||
// IDNA domain to ASCII.
|
||||
result = domainToAscii(result);
|
||||
|
||||
// Check against forbidden host code points.
|
||||
for (const c of result) {
|
||||
if (charIsForbiddenInHost(c)) {
|
||||
throw new TypeError("Invalid hostname.");
|
||||
}
|
||||
}
|
||||
|
||||
// TODO(nayeemrmn): IPv4 parsing.
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
function encodePathname(s: string): string {
|
||||
|
|
43
cli/ops/idna.rs
Normal file
43
cli/ops/idna.rs
Normal file
|
@ -0,0 +1,43 @@
|
|||
// Copyright 2018-2020 the Deno authors. All rights reserved. MIT license.
|
||||
|
||||
//! https://url.spec.whatwg.org/#idna
|
||||
|
||||
use super::dispatch_json::{Deserialize, JsonOp, Value};
|
||||
use crate::op_error::{ErrorKind, OpError};
|
||||
use crate::state::State;
|
||||
use deno_core::CoreIsolate;
|
||||
use deno_core::ZeroCopyBuf;
|
||||
use idna::{domain_to_ascii, domain_to_ascii_strict};
|
||||
|
||||
pub fn init(i: &mut CoreIsolate, s: &State) {
|
||||
i.register_op("op_domain_to_ascii", s.stateful_json_op(op_domain_to_ascii));
|
||||
}
|
||||
|
||||
fn invalid_domain_error() -> OpError {
|
||||
OpError {
|
||||
kind: ErrorKind::TypeError,
|
||||
msg: "Invalid domain.".to_string(),
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
struct DomainToAscii {
|
||||
domain: String,
|
||||
be_strict: bool,
|
||||
}
|
||||
|
||||
fn op_domain_to_ascii(
|
||||
_state: &State,
|
||||
args: Value,
|
||||
_zero_copy: &mut [ZeroCopyBuf],
|
||||
) -> Result<JsonOp, OpError> {
|
||||
let args: DomainToAscii = serde_json::from_value(args)?;
|
||||
let domain = if args.be_strict {
|
||||
domain_to_ascii_strict(args.domain.as_str())
|
||||
.map_err(|_| invalid_domain_error())?
|
||||
} else {
|
||||
domain_to_ascii(args.domain.as_str()).map_err(|_| invalid_domain_error())?
|
||||
};
|
||||
Ok(JsonOp::Sync(json!(domain)))
|
||||
}
|
|
@ -13,6 +13,7 @@ pub mod errors;
|
|||
pub mod fetch;
|
||||
pub mod fs;
|
||||
pub mod fs_events;
|
||||
pub mod idna;
|
||||
pub mod io;
|
||||
pub mod net;
|
||||
#[cfg(unix)]
|
||||
|
|
|
@ -25,32 +25,25 @@ unitTest(function urlParsing(): void {
|
|||
String(url),
|
||||
"https://foo:bar@baz.qat:8000/qux/quux?foo=bar&baz=12#qat"
|
||||
);
|
||||
assertEquals(
|
||||
JSON.stringify({ key: url }),
|
||||
`{"key":"https://foo:bar@baz.qat:8000/qux/quux?foo=bar&baz=12#qat"}`
|
||||
);
|
||||
});
|
||||
|
||||
// IPv6 type hostname.
|
||||
const urlv6 = new URL(
|
||||
"https://foo:bar@[::1]:8000/qux/quux?foo=bar&baz=12#qat"
|
||||
);
|
||||
assertEquals(urlv6.origin, "https://[::1]:8000");
|
||||
assertEquals(urlv6.password, "bar");
|
||||
assertEquals(urlv6.pathname, "/qux/quux");
|
||||
assertEquals(urlv6.port, "8000");
|
||||
assertEquals(urlv6.protocol, "https:");
|
||||
assertEquals(urlv6.search, "?foo=bar&baz=12");
|
||||
assertEquals(urlv6.searchParams.getAll("foo"), ["bar"]);
|
||||
assertEquals(urlv6.searchParams.getAll("baz"), ["12"]);
|
||||
assertEquals(urlv6.username, "foo");
|
||||
assertEquals(
|
||||
String(urlv6),
|
||||
"https://foo:bar@[::1]:8000/qux/quux?foo=bar&baz=12#qat"
|
||||
);
|
||||
assertEquals(
|
||||
JSON.stringify({ key: urlv6 }),
|
||||
`{"key":"https://foo:bar@[::1]:8000/qux/quux?foo=bar&baz=12#qat"}`
|
||||
);
|
||||
unitTest(function urlHostParsing(): void {
|
||||
// IPv6.
|
||||
assertEquals(new URL("https://foo:bar@[::1]:8000").hostname, "[::1]");
|
||||
|
||||
// Forbidden host code point.
|
||||
assertThrows(() => new URL("https:// a"), TypeError, "Invalid URL.");
|
||||
assertThrows(() => new URL("abcde:// a"), TypeError, "Invalid URL.");
|
||||
assertThrows(() => new URL("https://%"), TypeError, "Invalid URL.");
|
||||
assertEquals(new URL("abcde://%").hostname, "%");
|
||||
|
||||
// Percent-decode.
|
||||
assertEquals(new URL("https://%21").hostname, "!");
|
||||
assertEquals(new URL("abcde://%21").hostname, "%21");
|
||||
|
||||
// TODO(nayeemrmn): IPv4 parsing.
|
||||
// assertEquals(new URL("https://260").hostname, "0.0.1.4");
|
||||
assertEquals(new URL("abcde://260").hostname, "260");
|
||||
});
|
||||
|
||||
unitTest(function urlModifications(): void {
|
||||
|
@ -208,6 +201,7 @@ unitTest(function urlUncHostname() {
|
|||
|
||||
unitTest(function urlHostnameUpperCase() {
|
||||
assertEquals(new URL("https://EXAMPLE.COM").href, "https://example.com/");
|
||||
assertEquals(new URL("abcde://EXAMPLE.COM").href, "abcde://EXAMPLE.COM/");
|
||||
});
|
||||
|
||||
unitTest(function urlTrim() {
|
||||
|
@ -223,11 +217,9 @@ unitTest(function urlEncoding() {
|
|||
new URL("https://:a !$&*()=,;+'\"@example.com").password,
|
||||
"a%20!$&*()%3D,%3B+%27%22"
|
||||
);
|
||||
// FIXME: https://url.spec.whatwg.org/#idna
|
||||
// assertEquals(
|
||||
// new URL("https://a !$&*()=,+'\"").hostname,
|
||||
// "a%20%21%24%26%2A%28%29%3D%2C+%27%22"
|
||||
// );
|
||||
assertEquals(new URL("abcde://mañana/c?d#e").hostname, "ma%C3%B1ana");
|
||||
// https://url.spec.whatwg.org/#idna
|
||||
assertEquals(new URL("https://mañana/c?d#e").hostname, "xn--maana-pta");
|
||||
assertEquals(
|
||||
new URL("https://example.com/a ~!@$&*()=:/,;+'\"\\").pathname,
|
||||
"/a%20~!@$&*()=:/,;+'%22/"
|
||||
|
|
|
@ -121,6 +121,7 @@ impl WebWorker {
|
|||
handle,
|
||||
);
|
||||
ops::worker_host::init(isolate, &state);
|
||||
ops::idna::init(isolate, &state);
|
||||
ops::io::init(isolate, &state);
|
||||
ops::resources::init(isolate, &state);
|
||||
ops::errors::init(isolate, &state);
|
||||
|
|
|
@ -261,6 +261,7 @@ impl MainWorker {
|
|||
ops::fetch::init(isolate, &state);
|
||||
ops::fs::init(isolate, &state);
|
||||
ops::fs_events::init(isolate, &state);
|
||||
ops::idna::init(isolate, &state);
|
||||
ops::io::init(isolate, &state);
|
||||
ops::plugin::init(isolate, &state);
|
||||
ops::net::init(isolate, &state);
|
||||
|
|
Loading…
Add table
Reference in a new issue