From 7b0375fae7578c0c7e4f4229e59ad5046ecf75ab Mon Sep 17 00:00:00 2001
From: Andreu Botella <abb@randomunok.com>
Date: Fri, 2 Jul 2021 12:11:20 +0200
Subject: [PATCH] perf: speed up TextEncoder.prototype.encodeInto() (#11219)

The current implementation of op_encoding_encode_into UTF-8 encodes each
individual code point in the input string into the output buffer. But after the
ops binding, the input is a Rust String, so the UTF-8 bytes can simply be copied
to the output. This should improve this API's performance.
---
 extensions/web/lib.rs | 43 +++++++++++++++++++++++++++----------------
 1 file changed, 27 insertions(+), 16 deletions(-)

diff --git a/extensions/web/lib.rs b/extensions/web/lib.rs
index 6e35524762..67022c7eab 100644
--- a/extensions/web/lib.rs
+++ b/extensions/web/lib.rs
@@ -298,23 +298,34 @@ fn op_encoding_encode_into(
   input: String,
   mut buffer: ZeroCopyBuf,
 ) -> Result<EncodeIntoResult, AnyError> {
-  let dst: &mut [u8] = &mut buffer;
-  let mut read = 0;
-  let mut written = 0;
-  for char in input.chars() {
-    let len = char.len_utf8();
-    if dst.len() < written + len {
-      break;
+  // Since `input` is already UTF-8, we can simply find the last UTF-8 code
+  // point boundary from input that fits in `buffer`, and copy the bytes up to
+  // that point.
+  let boundary = if buffer.len() >= input.len() {
+    input.len()
+  } else {
+    let mut boundary = buffer.len();
+
+    // The maximum length of a UTF-8 code point is 4 bytes.
+    for _ in 0..4 {
+      if input.is_char_boundary(boundary) {
+        break;
+      }
+      debug_assert!(boundary > 0);
+      boundary -= 1;
     }
-    char.encode_utf8(&mut dst[written..]);
-    written += len;
-    if char > '\u{FFFF}' {
-      read += 2
-    } else {
-      read += 1
-    };
-  }
-  Ok(EncodeIntoResult { read, written })
+
+    debug_assert!(input.is_char_boundary(boundary));
+    boundary
+  };
+
+  buffer[..boundary].copy_from_slice(input[..boundary].as_bytes());
+
+  Ok(EncodeIntoResult {
+    // The `read` output parameter is measured in UTF-16 code units.
+    read: input[..boundary].encode_utf16().count(),
+    written: boundary,
+  })
 }
 
 pub fn get_declaration() -> PathBuf {