From 7b0375fae7578c0c7e4f4229e59ad5046ecf75ab Mon Sep 17 00:00:00 2001 From: Andreu Botella Date: Fri, 2 Jul 2021 12:11:20 +0200 Subject: [PATCH] perf: speed up TextEncoder.prototype.encodeInto() (#11219) The current implementation of op_encoding_encode_into UTF-8 encodes each individual code point in the input string into the output buffer. But after the ops binding, the input is a Rust String, so the UTF-8 bytes can simply be copied to the output. This should improve this API's performance. --- extensions/web/lib.rs | 43 +++++++++++++++++++++++++++---------------- 1 file changed, 27 insertions(+), 16 deletions(-) diff --git a/extensions/web/lib.rs b/extensions/web/lib.rs index 6e35524762..67022c7eab 100644 --- a/extensions/web/lib.rs +++ b/extensions/web/lib.rs @@ -298,23 +298,34 @@ fn op_encoding_encode_into( input: String, mut buffer: ZeroCopyBuf, ) -> Result { - let dst: &mut [u8] = &mut buffer; - let mut read = 0; - let mut written = 0; - for char in input.chars() { - let len = char.len_utf8(); - if dst.len() < written + len { - break; + // Since `input` is already UTF-8, we can simply find the last UTF-8 code + // point boundary from input that fits in `buffer`, and copy the bytes up to + // that point. + let boundary = if buffer.len() >= input.len() { + input.len() + } else { + let mut boundary = buffer.len(); + + // The maximum length of a UTF-8 code point is 4 bytes. + for _ in 0..4 { + if input.is_char_boundary(boundary) { + break; + } + debug_assert!(boundary > 0); + boundary -= 1; } - char.encode_utf8(&mut dst[written..]); - written += len; - if char > '\u{FFFF}' { - read += 2 - } else { - read += 1 - }; - } - Ok(EncodeIntoResult { read, written }) + + debug_assert!(input.is_char_boundary(boundary)); + boundary + }; + + buffer[..boundary].copy_from_slice(input[..boundary].as_bytes()); + + Ok(EncodeIntoResult { + // The `read` output parameter is measured in UTF-16 code units. + read: input[..boundary].encode_utf16().count(), + written: boundary, + }) } pub fn get_declaration() -> PathBuf {