0
0
Fork 0
mirror of https://github.com/denoland/deno.git synced 2025-03-03 17:34:47 -05:00

fix: Support the stream option to TextDecoder#decode (#10805)

This commit is contained in:
Andreu Botella 2021-06-01 11:24:16 +02:00 committed by GitHub
parent 6dd7a7ecd9
commit e466a6fc9a
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 75 additions and 37 deletions

View file

@ -222,12 +222,18 @@
return result; return result;
} }
function Big5Decoder(big5, bytes, fatal = false, ignoreBOM = false) { function Big5Decoder(
big5,
bytes,
fatal = false,
ignoreBOM = false,
stream = false,
lead = 0x00,
) {
if (ignoreBOM) { if (ignoreBOM) {
throw new TypeError("Ignoring the BOM is available only with utf-8."); throw new TypeError("Ignoring the BOM is available only with utf-8.");
} }
const res = []; const res = [];
let lead = 0x00;
for (let i = 0; i < bytes.length; i++) { for (let i = 0; i < bytes.length; i++) {
const byte = bytes[i]; const byte = bytes[i];
if (lead !== 0x00) { if (lead !== 0x00) {
@ -276,11 +282,11 @@
res.push(decoderError(fatal)); res.push(decoderError(fatal));
continue; continue;
} }
if (lead !== 0x00) { if (!stream && lead !== 0x00) {
lead = 0x00; lead = 0x00;
res.push(decoderError(fatal)); res.push(decoderError(fatal));
} }
return res; return [res, lead];
} }
function Utf16ByteDecoder( function Utf16ByteDecoder(
@ -288,9 +294,9 @@
be = false, be = false,
fatal = false, fatal = false,
ignoreBOM = false, ignoreBOM = false,
stream = false,
{ leadByte = null, leadSurrogate = null } = {},
) { ) {
let leadByte = null;
let leadSurrogate = null;
const result = []; const result = [];
for (let i = 0; i < bytes.length; i++) { for (let i = 0; i < bytes.length; i++) {
@ -327,10 +333,10 @@
} }
result.push(codeUnit); result.push(codeUnit);
} }
if (!(leadByte === null && leadSurrogate === null)) { if (!stream && !(leadByte === null && leadSurrogate === null)) {
result.push(decoderError(fatal)); result.push(decoderError(fatal));
} }
return result; return [result, { leadByte, leadSurrogate }];
} }
const gb18030Ranges = { const gb18030Ranges = {
@ -587,14 +593,13 @@
bytes, bytes,
fatal = false, fatal = false,
ignoreBOM = false, ignoreBOM = false,
stream = false,
{ first = 0x00, second = 0x00, third = 0x00 } = {},
) { ) {
if (ignoreBOM) { if (ignoreBOM) {
throw new TypeError("Ignoring the BOM is available only with utf-8."); throw new TypeError("Ignoring the BOM is available only with utf-8.");
} }
const result = []; const result = [];
let first = 0x00;
let second = 0x00;
let third = 0x00;
for (let i = 0; i < bytes.length; i++) { for (let i = 0; i < bytes.length; i++) {
const byte = bytes[i]; const byte = bytes[i];
if (third !== 0x00) { if (third !== 0x00) {
@ -667,10 +672,10 @@
} }
result.push(decoderError(fatal)); result.push(decoderError(fatal));
} }
if (!(first === 0x00 && second === 0x00 && third === 0x00)) { if (!stream && !(first === 0x00 && second === 0x00 && third === 0x00)) {
result.push(decoderError(fatal)); result.push(decoderError(fatal));
} }
return result; return [result, { first, second, third }];
} }
class SingleByteDecoder { class SingleByteDecoder {
@ -4153,6 +4158,7 @@
class TextDecoder { class TextDecoder {
#encoding = ""; #encoding = "";
#state;
get encoding() { get encoding() {
return this.#encoding; return this.#encoding;
@ -4186,9 +4192,11 @@
} }
decode(input, options = { stream: false }) { decode(input, options = { stream: false }) {
if (options.stream) { const stream = Boolean(options.stream);
throw new TypeError("Stream not supported.");
} // If we're decoding anything other than the first chunk of a stream,
// we will not ignore a BOM.
const ignoreBOM = this.ignoreBOM && this.#state === undefined;
let bytes; let bytes;
if (input instanceof Uint8Array) { if (input instanceof Uint8Array) {
@ -4216,7 +4224,9 @@
if ( if (
this.#encoding === "utf-8" && this.#encoding === "utf-8" &&
this.fatal === false && this.fatal === false &&
this.ignoreBOM === false ignoreBOM === false &&
stream === false &&
this.#state === undefined
) { ) {
return core.decode(bytes); return core.decode(bytes);
} }
@ -4224,42 +4234,59 @@
// For performance reasons we utilise a highly optimised decoder instead of // For performance reasons we utilise a highly optimised decoder instead of
// the general decoder. // the general decoder.
if (this.#encoding === "utf-8") { if (this.#encoding === "utf-8") {
return decodeUtf8(bytes, this.fatal, this.ignoreBOM); const [result, state] = decodeUtf8(
bytes,
this.fatal,
ignoreBOM,
stream,
this.#state,
);
this.#state = stream ? state : undefined;
return result;
} }
if (this.#encoding === "utf-16le" || this.#encoding === "utf-16be") { if (this.#encoding === "utf-16le" || this.#encoding === "utf-16be") {
const result = Utf16ByteDecoder( const [result, state] = Utf16ByteDecoder(
bytes, bytes,
this.#encoding.endsWith("be"), this.#encoding.endsWith("be"),
this.fatal, this.fatal,
this.ignoreBOM, ignoreBOM,
stream,
this.#state,
); );
this.#state = stream ? state : undefined;
return String.fromCharCode.apply(null, result); return String.fromCharCode.apply(null, result);
} }
if (this.#encoding === "big5") { if (this.#encoding === "big5") {
const result = Big5Decoder( const [result, state] = Big5Decoder(
encodingIndexes.get("big5"), encodingIndexes.get("big5"),
bytes, bytes,
this.fatal, this.fatal,
this.ignoreBOM, ignoreBOM,
stream,
this.#state,
); );
this.#state = stream ? state : undefined;
return String.fromCharCode.apply(null, result); return String.fromCharCode.apply(null, result);
} }
if (this.#encoding === "gbk" || this.#encoding === "gb18030") { if (this.#encoding === "gbk" || this.#encoding === "gb18030") {
const result = gb18030Decoder( const [result, state] = gb18030Decoder(
encodingIndexes.get("gb18030"), encodingIndexes.get("gb18030"),
bytes, bytes,
this.fatal, this.fatal,
this.ignoreBOM, ignoreBOM,
stream,
this.#state,
); );
this.#state = stream ? state : undefined;
return String.fromCodePoint.apply(null, result); return String.fromCodePoint.apply(null, result);
} }
const decoder = decoders.get(this.#encoding)({ const decoder = decoders.get(this.#encoding)({
fatal: this.fatal, fatal: this.fatal,
ignoreBOM: this.ignoreBOM, ignoreBOM,
}); });
const inputStream = new Stream(bytes); const inputStream = new Stream(bytes);
const output = []; const output = [];
@ -4333,17 +4360,27 @@
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE. // SOFTWARE.
function decodeUtf8(input, fatal, ignoreBOM) { function decodeUtf8(
input,
fatal,
ignoreBOM,
stream,
{ state = 0, codepoint = 0 } = {},
) {
let outString = ""; let outString = "";
// Prepare a buffer so that we don't have to do a lot of string concats, which // Prepare a buffer so that we don't have to do a lot of string concats, which
// are very slow. // are very slow.
const outBufferLength = Math.min(1024, input.length); // When decoding non-streaming UTF-8, the maximum output string length is
// input.length, but if state !== 0, there might be one additional code
// point.
const outBufferLength = Math.min(
1024,
input.length + (state === 0 ? 0 : 2),
);
const outBuffer = new Uint16Array(outBufferLength); const outBuffer = new Uint16Array(outBufferLength);
let outIndex = 0; let outIndex = 0;
let state = 0;
let codepoint = 0;
let type; let type;
let i = let i =
@ -4416,9 +4453,10 @@
} }
} }
// Add a replacement character if we ended in the middle of a sequence or // Add a replacement character if we ended in the middle of a sequence and
// encountered an invalid code at the end. // we aren't in streaming more, or if we encountered an invalid code at the
if (state !== 0) { // end.
if (state === 12 || (!stream && state !== 0)) {
if (fatal) throw new TypeError(`Decoder error. Unexpected end of data.`); if (fatal) throw new TypeError(`Decoder error. Unexpected end of data.`);
outBuffer[outIndex++] = 0xfffd; // Replacement character outBuffer[outIndex++] = 0xfffd; // Replacement character
} }
@ -4429,7 +4467,7 @@
outBuffer.subarray(0, outIndex), outBuffer.subarray(0, outIndex),
); );
return outString; return [outString, { state, codepoint }];
} }
// Following code is forked from https://github.com/beatgammit/base64-js // Following code is forked from https://github.com/beatgammit/base64-js

View file

@ -189,7 +189,7 @@ declare class TextDecoder {
options?: { fatal?: boolean; ignoreBOM?: boolean }, options?: { fatal?: boolean; ignoreBOM?: boolean },
); );
/** Returns the result of running encoding's decoder. */ /** Returns the result of running encoding's decoder. */
decode(input?: BufferSource, options?: { stream?: false }): string; decode(input?: BufferSource, options?: { stream?: boolean }): string;
readonly [Symbol.toStringTag]: string; readonly [Symbol.toStringTag]: string;
} }

View file

@ -91,6 +91,7 @@
"encode-utf8.any.html": false, "encode-utf8.any.html": false,
"readable-writable-properties.any.html": false "readable-writable-properties.any.html": false
}, },
"textdecoder-arguments.any.html": true,
"textdecoder-byte-order-marks.any.html": true, "textdecoder-byte-order-marks.any.html": true,
"textdecoder-copy.any.html": false, "textdecoder-copy.any.html": false,
"textdecoder-fatal-single-byte.any.html?1-1000": true, "textdecoder-fatal-single-byte.any.html?1-1000": true,
@ -132,7 +133,7 @@
"windows-949 => EUC-KR", "windows-949 => EUC-KR",
"x-user-defined => x-user-defined" "x-user-defined => x-user-defined"
], ],
"textdecoder-streaming.any.html": false, "textdecoder-streaming.any.html": true,
"textdecoder-utf16-surrogates.any.html": true, "textdecoder-utf16-surrogates.any.html": true,
"textencoder-constructor-non-utf.any.html": [ "textencoder-constructor-non-utf.any.html": [
"Encoding argument supported for decode: EUC-JP", "Encoding argument supported for decode: EUC-JP",
@ -142,8 +143,7 @@
"Encoding argument supported for decode: x-user-defined" "Encoding argument supported for decode: x-user-defined"
], ],
"textencoder-utf16-surrogates.any.html": true, "textencoder-utf16-surrogates.any.html": true,
"unsupported-encodings.any.html": false, "unsupported-encodings.any.html": false
"textdecoder-arguments.any.html": false
}, },
"hr-time": { "hr-time": {
"monotonic-clock.any.html": true, "monotonic-clock.any.html": true,