From 93c6f927d42f907dcd0d72945a175e171fa91b3a Mon Sep 17 00:00:00 2001 From: Nathan Whitaker <17734409+nathanwhit@users.noreply.github.com> Date: Mon, 11 Mar 2024 15:49:43 -0700 Subject: [PATCH] fix(ext/node): Match punycode module behavior to node (#22847) Fixes #19214. We were using the `idna` crate to implement our polyfill for `punycode.toASCII` and `punycode.toUnicode`. The `idna` crate is correct, and adheres to the IDNA2003/2008 spec, but it turns out `node`'s implementations don't really follow any spec! Instead, node splits the domain by `'.'` and punycode encodes/decodes each part. This means that node's implementations will happily work on codepoints that are disallowed by the IDNA specs, causing the error in #19214. While fixing this, I went ahead and matched the node behavior on all of the punycode functions and enabled node's punycode test in our `node_compat` suite. --- ext/node/lib.rs | 2 + ext/node/ops/idna.rs | 141 ++++++++- ext/node/polyfills/dns.ts | 12 +- ext/node/polyfills/internal/idna.ts | 19 ++ ext/node/polyfills/punycode.ts | 24 +- ext/node/polyfills/url.ts | 13 +- tests/integration/node_unit_tests.rs | 1 + tests/node_compat/config.jsonc | 1 + .../test/parallel/test-punycode.js | 280 ++++++++++++++++++ tests/unit_node/punycode_test.ts | 16 + tools/node_compat/TODO.md | 3 +- 11 files changed, 491 insertions(+), 21 deletions(-) create mode 100644 tests/node_compat/test/parallel/test-punycode.js create mode 100644 tests/unit_node/punycode_test.ts diff --git a/ext/node/lib.rs b/ext/node/lib.rs index 6d5a21acea..f9553a0383 100644 --- a/ext/node/lib.rs +++ b/ext/node/lib.rs @@ -261,6 +261,8 @@ deno_core::extension!(deno_node, ops::v8::op_vm_run_in_new_context, ops::idna::op_node_idna_domain_to_ascii, ops::idna::op_node_idna_domain_to_unicode, + ops::idna::op_node_idna_punycode_to_ascii, + ops::idna::op_node_idna_punycode_to_unicode, ops::idna::op_node_idna_punycode_decode, ops::idna::op_node_idna_punycode_encode, ops::zlib::op_zlib_new, diff --git a/ext/node/ops/idna.rs b/ext/node/ops/idna.rs index 884e812ccd..9c9450c70f 100644 --- a/ext/node/ops/idna.rs +++ b/ext/node/ops/idna.rs @@ -1,16 +1,126 @@ // Copyright 2018-2024 the Deno authors. All rights reserved. MIT license. -use deno_core::error::AnyError; +use deno_core::anyhow::Error; +use deno_core::error::range_error; use deno_core::op2; +use std::borrow::Cow; + +// map_domain, to_ascii and to_unicode are based on the punycode implementation in node.js +// https://github.com/nodejs/node/blob/73025c4dec042e344eeea7912ed39f7b7c4a3991/lib/punycode.js + +const PUNY_PREFIX: &str = "xn--"; + +fn invalid_input_err() -> Error { + range_error("Invalid input") +} + +fn not_basic_err() -> Error { + range_error("Illegal input >= 0x80 (not a basic code point)") +} + +/// map a domain by mapping each label with the given function +fn map_domain( + domain: &str, + f: impl Fn(&str) -> Result, E>, +) -> Result { + let mut result = String::with_capacity(domain.len()); + let mut domain = domain; + + // if it's an email, leave the local part as is + let mut parts = domain.split('@'); + if let (Some(local), Some(remaining)) = (parts.next(), parts.next()) { + result.push_str(local); + result.push('@'); + domain = remaining; + } + + // split into labels and map each one + for (i, label) in domain.split('.').enumerate() { + if i > 0 { + result.push('.'); + } + result.push_str(&f(label)?); + } + Ok(result) +} + +/// Maps a unicode domain to ascii by punycode encoding each label +/// +/// Note this is not IDNA2003 or IDNA2008 compliant, rather it matches node.js's punycode implementation +fn to_ascii(input: &str) -> Result { + if input.is_ascii() { + return Ok(input.into()); + } + + let mut result = String::with_capacity(input.len()); // at least as long as input + + let rest = map_domain(input, |label| { + if label.is_ascii() { + Ok(label.into()) + } else { + idna::punycode::encode_str(label) + .map(|encoded| [PUNY_PREFIX, &encoded].join("").into()) // add the prefix + .ok_or_else(|| { + Error::msg("Input would take more than 63 characters to encode") // only error possible per the docs + }) + } + })?; + + result.push_str(&rest); + Ok(result) +} + +/// Maps an ascii domain to unicode by punycode decoding each label +/// +/// Note this is not IDNA2003 or IDNA2008 compliant, rather it matches node.js's punycode implementation +fn to_unicode(input: &str) -> Result { + map_domain(input, |s| { + if let Some(puny) = s.strip_prefix(PUNY_PREFIX) { + // it's a punycode encoded label + Ok( + idna::punycode::decode_to_string(&puny.to_lowercase()) + .ok_or_else(invalid_input_err)? + .into(), + ) + } else { + Ok(s.into()) + } + }) +} + +/// Converts a domain to unicode with behavior that is +/// compatible with the `punycode` module in node.js +#[op2] +#[string] +pub fn op_node_idna_punycode_to_ascii( + #[string] domain: String, +) -> Result { + to_ascii(&domain) +} + +/// Converts a domain to ASCII with behavior that is +/// compatible with the `punycode` module in node.js +#[op2] +#[string] +pub fn op_node_idna_punycode_to_unicode( + #[string] domain: String, +) -> Result { + to_unicode(&domain) +} + +/// Converts a domain to ASCII as per the IDNA spec +/// (specifically UTS #46) #[op2] #[string] pub fn op_node_idna_domain_to_ascii( #[string] domain: String, -) -> Result { - Ok(idna::domain_to_ascii(&domain)?) +) -> Result { + idna::domain_to_ascii(&domain).map_err(|e| e.into()) } +/// Converts a domain to Unicode as per the IDNA spec +/// (specifically UTS #46) #[op2] #[string] pub fn op_node_idna_domain_to_unicode(#[string] domain: String) -> String { @@ -19,8 +129,29 @@ pub fn op_node_idna_domain_to_unicode(#[string] domain: String) -> String { #[op2] #[string] -pub fn op_node_idna_punycode_decode(#[string] domain: String) -> String { - idna::punycode::decode_to_string(&domain).unwrap_or_default() +pub fn op_node_idna_punycode_decode( + #[string] domain: String, +) -> Result { + if domain.is_empty() { + return Ok(domain); + } + + // all code points before the last delimiter must be basic + // see https://github.com/nodejs/node/blob/73025c4dec042e344eeea7912ed39f7b7c4a3991/lib/punycode.js#L215-L227 + let last_dash = domain.len() + - 1 + - domain + .bytes() + .rev() + .position(|b| b == b'-') + .unwrap_or(domain.len() - 1); + + if !domain[..last_dash].is_ascii() { + return Err(not_basic_err()); + } + + idna::punycode::decode_to_string(&domain) + .ok_or_else(|| deno_core::error::range_error("Invalid input")) } #[op2] diff --git a/ext/node/polyfills/dns.ts b/ext/node/polyfills/dns.ts index 3b3565cb34..78b934e602 100644 --- a/ext/node/polyfills/dns.ts +++ b/ext/node/polyfills/dns.ts @@ -92,7 +92,7 @@ import { GetAddrInfoReqWrap, QueryReqWrap, } from "ext:deno_node/internal_binding/cares_wrap.ts"; -import { toASCII } from "node:punycode"; +import { domainToASCII } from "ext:deno_node/internal/idna.ts"; import { notImplemented } from "ext:deno_node/_utils.ts"; function onlookup( @@ -264,7 +264,13 @@ export function lookup( req.hostname = hostname; req.oncomplete = all ? onlookupall : onlookup; - const err = getaddrinfo(req, toASCII(hostname), family, hints, verbatim); + const err = getaddrinfo( + req, + domainToASCII(hostname), + family, + hints, + verbatim, + ); if (err) { nextTick( @@ -332,7 +338,7 @@ function resolver(bindingName: keyof ChannelWrapQuery) { req.ttl = !!(options && (options as ResolveOptions).ttl); - const err = this._handle[bindingName](req, toASCII(name)); + const err = this._handle[bindingName](req, domainToASCII(name)); if (err) { throw dnsException(err, bindingName, name); diff --git a/ext/node/polyfills/internal/idna.ts b/ext/node/polyfills/internal/idna.ts index 6484fe9512..93ed065cce 100644 --- a/ext/node/polyfills/internal/idna.ts +++ b/ext/node/polyfills/internal/idna.ts @@ -51,6 +51,11 @@ "use strict"; +import { + op_node_idna_domain_to_ascii, + op_node_idna_domain_to_unicode, +} from "ext:core/ops"; + /** * Creates an array containing the numeric code points of each Unicode * character in the string. While JavaScript uses UCS-2 internally, @@ -105,3 +110,17 @@ export const ucs2 = { decode: ucs2decode, encode: ucs2encode, }; + +/** + * Converts a domain to ASCII as per the IDNA spec + */ +export function domainToASCII(domain: string) { + return op_node_idna_domain_to_ascii(domain); +} + +/** + * Converts a domain to Unicode as per the IDNA spec + */ +export function domainToUnicode(domain: string) { + return op_node_idna_domain_to_unicode(domain); +} diff --git a/ext/node/polyfills/punycode.ts b/ext/node/polyfills/punycode.ts index 6f137d31fa..e89be15a22 100644 --- a/ext/node/polyfills/punycode.ts +++ b/ext/node/polyfills/punycode.ts @@ -1,28 +1,40 @@ // Copyright 2018-2024 the Deno authors. All rights reserved. MIT license. import { - op_node_idna_domain_to_ascii, - op_node_idna_domain_to_unicode, op_node_idna_punycode_decode, op_node_idna_punycode_encode, + op_node_idna_punycode_to_ascii, + op_node_idna_punycode_to_unicode, } from "ext:core/ops"; +import { deprecate } from "node:util"; + import { ucs2 } from "ext:deno_node/internal/idna.ts"; +// deno-lint-ignore no-explicit-any +function punyDeprecated(fn: any) { + return deprecate( + fn, + "The `punycode` module is deprecated. Please use a userland " + + "alternative instead.", + "DEP0040", + ); +} + function toASCII(domain) { - return op_node_idna_domain_to_ascii(domain); + return punyDeprecated(op_node_idna_punycode_to_ascii)(domain); } function toUnicode(domain) { - return op_node_idna_domain_to_unicode(domain); + return punyDeprecated(op_node_idna_punycode_to_unicode)(domain); } function decode(domain) { - return op_node_idna_punycode_decode(domain); + return punyDeprecated(op_node_idna_punycode_decode)(domain); } function encode(domain) { - return op_node_idna_punycode_encode(domain); + return punyDeprecated(op_node_idna_punycode_encode)(domain); } export { decode, encode, toASCII, toUnicode, ucs2 }; diff --git a/ext/node/polyfills/url.ts b/ext/node/polyfills/url.ts index 14195d1465..6633334ba5 100644 --- a/ext/node/polyfills/url.ts +++ b/ext/node/polyfills/url.ts @@ -70,7 +70,10 @@ import { CHAR_ZERO_WIDTH_NOBREAK_SPACE, } from "ext:deno_node/path/_constants.ts"; import * as path from "node:path"; -import { toASCII, toUnicode } from "node:punycode"; +import { + domainToASCII as idnaToASCII, + domainToUnicode as idnaToUnicode, +} from "ext:deno_node/internal/idna.ts"; import { isWindows, osType } from "ext:deno_node/_util/os.ts"; import { encodeStr, hexTable } from "ext:deno_node/internal/querystring.ts"; import querystring from "node:querystring"; @@ -813,7 +816,7 @@ export class Url { // Use lenient mode (`true`) to try to support even non-compliant // URLs. - this.hostname = toASCII(this.hostname); + this.hostname = idnaToASCII(this.hostname); // Prevent two potential routes of hostname spoofing. // 1. If this.hostname is empty, it must have become empty due to toASCII @@ -1251,7 +1254,7 @@ export function resolveObject(source: string | Url, relative: string) { * @see https://www.rfc-editor.org/rfc/rfc3490#section-4 */ export function domainToASCII(domain: string) { - return toASCII(domain); + return idnaToASCII(domain); } /** @@ -1261,7 +1264,7 @@ export function domainToASCII(domain: string) { * @see https://www.rfc-editor.org/rfc/rfc3490#section-4 */ export function domainToUnicode(domain: string) { - return toUnicode(domain); + return idnaToUnicode(domain); } /** @@ -1396,7 +1399,7 @@ export function pathToFileURL(filepath: string): URL { ); } - outURL.hostname = domainToASCII(hostname); + outURL.hostname = idnaToASCII(hostname); outURL.pathname = encodePathChars(paths.slice(3).join("/")); } else { let resolved = path.resolve(filepath); diff --git a/tests/integration/node_unit_tests.rs b/tests/integration/node_unit_tests.rs index 3c824b6b23..2fd7e78f66 100644 --- a/tests/integration/node_unit_tests.rs +++ b/tests/integration/node_unit_tests.rs @@ -77,6 +77,7 @@ util::unit_test_factory!( path_test, perf_hooks_test, process_test, + punycode_test, querystring_test, readline_test, repl_test, diff --git a/tests/node_compat/config.jsonc b/tests/node_compat/config.jsonc index c5902f6f91..4d89b1a89a 100644 --- a/tests/node_compat/config.jsonc +++ b/tests/node_compat/config.jsonc @@ -456,6 +456,7 @@ "test-process-uptime.js", "test-promise-unhandled-silent.js", "test-promise-unhandled-throw-handler.js", + "test-punycode.js", "test-querystring-escape.js", "test-querystring-maxKeys-non-finite.js", "test-querystring-multichar-separator.js", diff --git a/tests/node_compat/test/parallel/test-punycode.js b/tests/node_compat/test/parallel/test-punycode.js new file mode 100644 index 0000000000..8c268a003d --- /dev/null +++ b/tests/node_compat/test/parallel/test-punycode.js @@ -0,0 +1,280 @@ +// deno-fmt-ignore-file +// deno-lint-ignore-file + +// Copyright Joyent and Node contributors. All rights reserved. MIT license. +// Taken from Node 18.12.1 +// This file is automatically generated by `tools/node_compat/setup.ts`. Do not modify this file manually. + +// Flags: --pending-deprecation + +// Copyright Joyent, Inc. and other Node contributors. +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the +// "Software"), to deal in the Software without restriction, including +// without limitation the rights to use, copy, modify, merge, publish, +// distribute, sublicense, and/or sell copies of the Software, and to permit +// persons to whom the Software is furnished to do so, subject to the +// following conditions: +// +// The above copyright notice and this permission notice shall be included +// in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN +// NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, +// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR +// OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE +// USE OR OTHER DEALINGS IN THE SOFTWARE. + +'use strict'; +const common = require('../common'); + +const punycodeWarning = + 'The `punycode` module is deprecated. Please use a userland alternative ' + + 'instead.'; +common.expectWarning('DeprecationWarning', punycodeWarning, 'DEP0040'); + +const punycode = require('punycode'); +const assert = require('assert'); + +assert.strictEqual(punycode.encode('ü'), 'tda'); +assert.strictEqual(punycode.encode('Goethe'), 'Goethe-'); +assert.strictEqual(punycode.encode('Bücher'), 'Bcher-kva'); +assert.strictEqual( + punycode.encode( + 'Willst du die Blüthe des frühen, die Früchte des späteren Jahres' + ), + 'Willst du die Blthe des frhen, die Frchte des spteren Jahres-x9e96lkal' +); +assert.strictEqual(punycode.encode('日本語'), 'wgv71a119e'); +assert.strictEqual(punycode.encode('𩸽'), 'x73l'); + +assert.strictEqual(punycode.decode('tda'), 'ü'); +assert.strictEqual(punycode.decode('Goethe-'), 'Goethe'); +assert.strictEqual(punycode.decode('Bcher-kva'), 'Bücher'); +assert.strictEqual( + punycode.decode( + 'Willst du die Blthe des frhen, die Frchte des spteren Jahres-x9e96lkal' + ), + 'Willst du die Blüthe des frühen, die Früchte des späteren Jahres' +); +assert.strictEqual(punycode.decode('wgv71a119e'), '日本語'); +assert.strictEqual(punycode.decode('x73l'), '𩸽'); +assert.throws(() => { + punycode.decode(' '); +}, /^RangeError: Invalid input$/); +assert.throws(() => { + punycode.decode('α-'); +}, /^RangeError: Illegal input >= 0x80 \(not a basic code point\)$/); +assert.throws(() => { + punycode.decode('あ'); +}, /^RangeError: Invalid input$/); + +// http://tools.ietf.org/html/rfc3492#section-7.1 +const tests = [ + // (A) Arabic (Egyptian) + { + encoded: 'egbpdaj6bu4bxfgehfvwxn', + decoded: '\u0644\u064A\u0647\u0645\u0627\u0628\u062A\u0643\u0644\u0645' + + '\u0648\u0634\u0639\u0631\u0628\u064A\u061F' + }, + + // (B) Chinese (simplified) + { + encoded: 'ihqwcrb4cv8a8dqg056pqjye', + decoded: '\u4ED6\u4EEC\u4E3A\u4EC0\u4E48\u4E0D\u8BF4\u4E2D\u6587' + }, + + // (C) Chinese (traditional) + { + encoded: 'ihqwctvzc91f659drss3x8bo0yb', + decoded: '\u4ED6\u5011\u7232\u4EC0\u9EBD\u4E0D\u8AAA\u4E2D\u6587' + }, + + // (D) Czech: Proprostnemluvesky + { + encoded: 'Proprostnemluvesky-uyb24dma41a', + decoded: '\u0050\u0072\u006F\u010D\u0070\u0072\u006F\u0073\u0074\u011B' + + '\u006E\u0065\u006D\u006C\u0075\u0076\u00ED\u010D\u0065\u0073\u006B\u0079' + }, + + // (E) Hebrew + { + encoded: '4dbcagdahymbxekheh6e0a7fei0b', + decoded: '\u05DC\u05DE\u05D4\u05D4\u05DD\u05E4\u05E9\u05D5\u05D8\u05DC' + + '\u05D0\u05DE\u05D3\u05D1\u05E8\u05D9\u05DD\u05E2\u05D1\u05E8\u05D9\u05EA' + }, + + // (F) Hindi (Devanagari) + { + encoded: 'i1baa7eci9glrd9b2ae1bj0hfcgg6iyaf8o0a1dig0cd', + decoded: '\u092F\u0939\u0932\u094B\u0917\u0939\u093F\u0928\u094D\u0926' + + '\u0940\u0915\u094D\u092F\u094B\u0902\u0928\u0939\u0940\u0902\u092C' + + '\u094B\u0932\u0938\u0915\u0924\u0947\u0939\u0948\u0902' + }, + + // (G) Japanese (kanji and hiragana) + { + encoded: 'n8jok5ay5dzabd5bym9f0cm5685rrjetr6pdxa', + decoded: '\u306A\u305C\u307F\u3093\u306A\u65E5\u672C\u8A9E\u3092\u8A71' + + '\u3057\u3066\u304F\u308C\u306A\u3044\u306E\u304B' + }, + + // (H) Korean (Hangul syllables) + { + encoded: '989aomsvi5e83db1d2a355cv1e0vak1dwrv93d5xbh15a0dt30a5jpsd879' + + 'ccm6fea98c', + decoded: '\uC138\uACC4\uC758\uBAA8\uB4E0\uC0AC\uB78C\uB4E4\uC774\uD55C' + + '\uAD6D\uC5B4\uB97C\uC774\uD574\uD55C\uB2E4\uBA74\uC5BC\uB9C8\uB098' + + '\uC88B\uC744\uAE4C' + }, + + // (I) Russian (Cyrillic) + { + encoded: 'b1abfaaepdrnnbgefbadotcwatmq2g4l', + decoded: '\u043F\u043E\u0447\u0435\u043C\u0443\u0436\u0435\u043E\u043D' + + '\u0438\u043D\u0435\u0433\u043E\u0432\u043E\u0440\u044F\u0442\u043F' + + '\u043E\u0440\u0443\u0441\u0441\u043A\u0438' + }, + + // (J) Spanish: PorqunopuedensimplementehablarenEspaol + { + encoded: 'PorqunopuedensimplementehablarenEspaol-fmd56a', + decoded: '\u0050\u006F\u0072\u0071\u0075\u00E9\u006E\u006F\u0070\u0075' + + '\u0065\u0064\u0065\u006E\u0073\u0069\u006D\u0070\u006C\u0065\u006D' + + '\u0065\u006E\u0074\u0065\u0068\u0061\u0062\u006C\u0061\u0072\u0065' + + '\u006E\u0045\u0073\u0070\u0061\u00F1\u006F\u006C' + }, + + // (K) Vietnamese: Tisaohkhngth + // chnitingVit + { + encoded: 'TisaohkhngthchnitingVit-kjcr8268qyxafd2f1b9g', + decoded: '\u0054\u1EA1\u0069\u0073\u0061\u006F\u0068\u1ECD\u006B\u0068' + + '\u00F4\u006E\u0067\u0074\u0068\u1EC3\u0063\u0068\u1EC9\u006E\u00F3' + + '\u0069\u0074\u0069\u1EBF\u006E\u0067\u0056\u0069\u1EC7\u0074' + }, + + // (L) 3B + { + encoded: '3B-ww4c5e180e575a65lsy2b', + decoded: '\u0033\u5E74\u0042\u7D44\u91D1\u516B\u5148\u751F' + }, + + // (M) -with-SUPER-MONKEYS + { + encoded: '-with-SUPER-MONKEYS-pc58ag80a8qai00g7n9n', + decoded: '\u5B89\u5BA4\u5948\u7F8E\u6075\u002D\u0077\u0069\u0074\u0068' + + '\u002D\u0053\u0055\u0050\u0045\u0052\u002D\u004D\u004F\u004E\u004B' + + '\u0045\u0059\u0053' + }, + + // (N) Hello-Another-Way- + { + encoded: 'Hello-Another-Way--fc4qua05auwb3674vfr0b', + decoded: '\u0048\u0065\u006C\u006C\u006F\u002D\u0041\u006E\u006F\u0074' + + '\u0068\u0065\u0072\u002D\u0057\u0061\u0079\u002D\u305D\u308C\u305E' + + '\u308C\u306E\u5834\u6240' + }, + + // (O) 2 + { + encoded: '2-u9tlzr9756bt3uc0v', + decoded: '\u3072\u3068\u3064\u5C4B\u6839\u306E\u4E0B\u0032' + }, + + // (P) MajiKoi5 + { + encoded: 'MajiKoi5-783gue6qz075azm5e', + decoded: '\u004D\u0061\u006A\u0069\u3067\u004B\u006F\u0069\u3059\u308B' + + '\u0035\u79D2\u524D' + }, + + // (Q) de + { + encoded: 'de-jg4avhby1noc0d', + decoded: '\u30D1\u30D5\u30A3\u30FC\u0064\u0065\u30EB\u30F3\u30D0' + }, + + // (R) + { + encoded: 'd9juau41awczczp', + decoded: '\u305D\u306E\u30B9\u30D4\u30FC\u30C9\u3067' + }, + + // (S) -> $1.00 <- + { + encoded: '-> $1.00 <--', + decoded: '\u002D\u003E\u0020\u0024\u0031\u002E\u0030\u0030\u0020\u003C' + + '\u002D' + }, +]; + +let errors = 0; +const handleError = (error, name) => { + console.error( + `FAIL: ${name} expected ${error.expected}, got ${error.actual}` + ); + errors++; +}; + +const regexNonASCII = /[^\x20-\x7E]/; +const testBattery = { + encode: (test) => assert.strictEqual( + punycode.encode(test.decoded), + test.encoded + ), + decode: (test) => assert.strictEqual( + punycode.decode(test.encoded), + test.decoded + ), + toASCII: (test) => assert.strictEqual( + punycode.toASCII(test.decoded), + regexNonASCII.test(test.decoded) ? + `xn--${test.encoded}` : + test.decoded + ), + toUnicode: (test) => assert.strictEqual( + punycode.toUnicode( + regexNonASCII.test(test.decoded) ? + `xn--${test.encoded}` : + test.decoded + ), + regexNonASCII.test(test.decoded) ? + test.decoded.toLowerCase() : + test.decoded + ) +}; + +tests.forEach((testCase) => { + Object.keys(testBattery).forEach((key) => { + try { + testBattery[key](testCase); + } catch (error) { + handleError(error, key); + } + }); +}); + +// BMP code point +assert.strictEqual(punycode.ucs2.encode([0x61]), 'a'); +// Supplementary code point (surrogate pair) +assert.strictEqual(punycode.ucs2.encode([0x1D306]), '\uD834\uDF06'); +// high surrogate +assert.strictEqual(punycode.ucs2.encode([0xD800]), '\uD800'); +// High surrogate followed by non-surrogates +assert.strictEqual(punycode.ucs2.encode([0xD800, 0x61, 0x62]), '\uD800ab'); +// low surrogate +assert.strictEqual(punycode.ucs2.encode([0xDC00]), '\uDC00'); +// Low surrogate followed by non-surrogates +assert.strictEqual(punycode.ucs2.encode([0xDC00, 0x61, 0x62]), '\uDC00ab'); + +assert.strictEqual(errors, 0); + +// test map domain +assert.strictEqual(punycode.toASCII('Bücher@日本語.com'), + 'Bücher@xn--wgv71a119e.com'); +assert.strictEqual(punycode.toUnicode('Bücher@xn--wgv71a119e.com'), + 'Bücher@日本語.com'); diff --git a/tests/unit_node/punycode_test.ts b/tests/unit_node/punycode_test.ts new file mode 100644 index 0000000000..f5f8c4f973 --- /dev/null +++ b/tests/unit_node/punycode_test.ts @@ -0,0 +1,16 @@ +// Copyright 2018-2024 the Deno authors. All rights reserved. MIT license. + +import * as punycode from "node:punycode"; +import { assertEquals } from "@std/assert/mod.ts"; + +Deno.test("regression #19214", () => { + const input = "个��.hk"; + + assertEquals(punycode.toASCII(input), "xn--ciq6844ba.hk"); + + assertEquals(punycode.toUnicode("xn--ciq6844ba.hk"), input); +}); + +Deno.test("Decode empty input", () => { + assertEquals(punycode.decode(""), ""); +}); diff --git a/tools/node_compat/TODO.md b/tools/node_compat/TODO.md index 17feb37b63..8321ae1df3 100644 --- a/tools/node_compat/TODO.md +++ b/tools/node_compat/TODO.md @@ -3,7 +3,7 @@ NOTE: This file should not be manually edited. Please edit `tests/node_compat/config.json` and run `deno task setup` in `tools/node_compat` dir instead. -Total: 2997 +Total: 2996 - [abort/test-abort-backtrace.js](https://github.com/nodejs/node/tree/v18.12.1/test/abort/test-abort-backtrace.js) - [abort/test-abort-fatal-error.js](https://github.com/nodejs/node/tree/v18.12.1/test/abort/test-abort-fatal-error.js) @@ -1937,7 +1937,6 @@ Total: 2997 - [parallel/test-promises-unhandled-rejections.js](https://github.com/nodejs/node/tree/v18.12.1/test/parallel/test-promises-unhandled-rejections.js) - [parallel/test-promises-unhandled-symbol-rejections.js](https://github.com/nodejs/node/tree/v18.12.1/test/parallel/test-promises-unhandled-symbol-rejections.js) - [parallel/test-promises-warning-on-unhandled-rejection.js](https://github.com/nodejs/node/tree/v18.12.1/test/parallel/test-promises-warning-on-unhandled-rejection.js) -- [parallel/test-punycode.js](https://github.com/nodejs/node/tree/v18.12.1/test/parallel/test-punycode.js) - [parallel/test-queue-microtask-uncaught-asynchooks.js](https://github.com/nodejs/node/tree/v18.12.1/test/parallel/test-queue-microtask-uncaught-asynchooks.js) - [parallel/test-queue-microtask.js](https://github.com/nodejs/node/tree/v18.12.1/test/parallel/test-queue-microtask.js) - [parallel/test-readable-from-iterator-closing.js](https://github.com/nodejs/node/tree/v18.12.1/test/parallel/test-readable-from-iterator-closing.js)