0
0
Fork 0
mirror of https://github.com/denoland/deno.git synced 2025-03-03 09:31:22 -05:00

perf(web): Optimize TextDecoder by adding a new U16String type (#13923)

This commit is contained in:
Andreu Botella 2022-03-16 00:22:00 +01:00 committed by GitHub
parent bb53135ed8
commit 672f66dde1
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 147 additions and 16 deletions

View file

@ -27,6 +27,7 @@ pub use serde_v8;
pub use serde_v8::Buffer as ZeroCopyBuf;
pub use serde_v8::ByteString;
pub use serde_v8::StringOrBuffer;
pub use serde_v8::U16String;
pub use url;
pub use v8;

View file

@ -16,6 +16,7 @@ use deno_core::Extension;
use deno_core::OpState;
use deno_core::Resource;
use deno_core::ResourceId;
use deno_core::U16String;
use deno_core::ZeroCopyBuf;
use encoding_rs::CoderResult;
use encoding_rs::Decoder;
@ -268,7 +269,7 @@ fn op_encoding_decode(
state: &mut OpState,
data: ZeroCopyBuf,
options: DecodeOptions,
) -> Result<String, AnyError> {
) -> Result<U16String, AnyError> {
let DecodeOptions { rid, stream } = options;
let resource = state.resource_table.get::<TextDecoderResource>(rid)?;
@ -276,23 +277,20 @@ fn op_encoding_decode(
let mut decoder = resource.decoder.borrow_mut();
let fatal = resource.fatal;
let max_buffer_length = if fatal {
decoder
.max_utf8_buffer_length_without_replacement(data.len())
.ok_or_else(|| range_error("Value too large to decode."))?
} else {
decoder
.max_utf8_buffer_length(data.len())
.ok_or_else(|| range_error("Value too large to decode."))?
};
let max_buffer_length = decoder
.max_utf16_buffer_length(data.len())
.ok_or_else(|| range_error("Value too large to decode."))?;
let mut output = String::with_capacity(max_buffer_length);
let mut output = U16String::with_zeroes(max_buffer_length);
if fatal {
let (result, _) =
decoder.decode_to_string_without_replacement(&data, &mut output, !stream);
let (result, _, written) =
decoder.decode_to_utf16_without_replacement(&data, &mut output, !stream);
match result {
DecoderResult::InputEmpty => Ok(output),
DecoderResult::InputEmpty => {
output.truncate(written);
Ok(output)
}
DecoderResult::OutputFull => {
Err(range_error("Provided buffer too small."))
}
@ -301,9 +299,13 @@ fn op_encoding_decode(
}
}
} else {
let (result, _, _) = decoder.decode_to_string(&data, &mut output, !stream);
let (result, _, written, _) =
decoder.decode_to_utf16(&data, &mut output, !stream);
match result {
CoderResult::InputEmpty => Ok(output),
CoderResult::InputEmpty => {
output.truncate(written);
Ok(output)
}
CoderResult::OutputFull => Err(range_error("Provided buffer too small.")),
}
}

View file

@ -14,6 +14,7 @@ pub use keys::KeyCache;
pub use magic::buffer::MagicBuffer as Buffer;
pub use magic::bytestring::ByteString;
pub use magic::string_or_buffer::StringOrBuffer;
pub use magic::u16string::U16String;
pub use magic::Value;
pub use ser::{to_v8, Serializer};
pub use serializable::{Serializable, SerializablePkg};

View file

@ -3,6 +3,7 @@ pub mod buffer;
pub mod bytestring;
mod field;
pub mod string_or_buffer;
pub mod u16string;
mod value;
pub mod zero_copy_buf;

View file

@ -0,0 +1,62 @@
use std::ops::{Deref, DerefMut};
use serde::Serialize;
pub const NAME: &str = "$__v8_magic_u16string";
pub const FIELD_PTR: &str = "$__v8_magic_u16string_ptr";
pub const FIELD_LEN: &str = "$__v8_magic_u16string_len";
#[derive(Default, PartialEq, Eq)]
pub struct U16String(pub Vec<u16>);
impl U16String {
pub fn with_zeroes(length: usize) -> U16String {
U16String(vec![0u16; length])
}
pub fn truncate(&mut self, new_length: usize) {
self.0.truncate(new_length);
self.0.shrink_to_fit()
}
}
impl Deref for U16String {
type Target = [u16];
fn deref(&self) -> &[u16] {
self.0.deref()
}
}
impl DerefMut for U16String {
fn deref_mut(&mut self) -> &mut [u16] {
self.0.deref_mut()
}
}
impl AsRef<[u16]> for U16String {
fn as_ref(&self) -> &[u16] {
self.0.as_ref()
}
}
impl AsMut<[u16]> for U16String {
fn as_mut(&mut self) -> &mut [u16] {
self.0.as_mut()
}
}
impl Serialize for U16String {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: serde::Serializer,
{
use serde::ser::SerializeStruct;
let mut s = serializer.serialize_struct(NAME, 3)?;
s.serialize_field(FIELD_PTR, &(self.0.as_ptr() as usize))?;
s.serialize_field(FIELD_LEN, &self.0.len())?;
s.end()
}
}
// TODO: Deserialize

View file

@ -339,11 +339,64 @@ impl<'a, 'b, 'c> ser::SerializeStruct
}
}
pub struct MagicU16StringSerializer<'a, 'b, 'c> {
scope: ScopePtr<'a, 'b, 'c>,
ptr: Option<std::ptr::NonNull<u16>>,
len: Option<usize>,
}
impl<'a, 'b, 'c> MagicU16StringSerializer<'a, 'b, 'c> {
pub fn new(scope: ScopePtr<'a, 'b, 'c>) -> Self {
Self {
scope,
ptr: None,
len: None,
}
}
}
impl<'a, 'b, 'c> ser::SerializeStruct for MagicU16StringSerializer<'a, 'b, 'c> {
type Ok = JsValue<'a>;
type Error = Error;
fn serialize_field<T: ?Sized + Serialize>(
&mut self,
key: &'static str,
value: &T,
) -> Result<()> {
// Get u64 chunk
let transmuted = value.serialize(magic::FieldSerializer {})?;
match key {
magic::u16string::FIELD_PTR => {
self.ptr = std::ptr::NonNull::new(transmuted as *mut u16)
}
magic::u16string::FIELD_LEN => self.len = Some(transmuted as usize),
_ => unreachable!(),
}
Ok(())
}
fn end(self) -> JsResult<'a> {
// SAFETY: This function is only called from U16String::serialize(), which
// guarantees the Vec is still alive.
let slice = unsafe {
std::slice::from_raw_parts(self.ptr.unwrap().as_ptr(), self.len.unwrap())
};
let scope = &mut *self.scope.borrow_mut();
let v8_value =
v8::String::new_from_two_byte(scope, slice, v8::NewStringType::Normal)
.unwrap();
Ok(v8_value.into())
}
}
// Dispatches between magic and regular struct serializers
pub enum StructSerializers<'a, 'b, 'c> {
Magic(MagicSerializer<'a>),
MagicBuffer(MagicBufferSerializer<'a, 'b, 'c>),
MagicByteString(MagicByteStringSerializer<'a, 'b, 'c>),
MagicU16String(MagicU16StringSerializer<'a, 'b, 'c>),
Regular(ObjectSerializer<'a, 'b, 'c>),
}
@ -360,6 +413,7 @@ impl<'a, 'b, 'c> ser::SerializeStruct for StructSerializers<'a, 'b, 'c> {
StructSerializers::Magic(s) => s.serialize_field(key, value),
StructSerializers::MagicBuffer(s) => s.serialize_field(key, value),
StructSerializers::MagicByteString(s) => s.serialize_field(key, value),
StructSerializers::MagicU16String(s) => s.serialize_field(key, value),
StructSerializers::Regular(s) => s.serialize_field(key, value),
}
}
@ -369,6 +423,7 @@ impl<'a, 'b, 'c> ser::SerializeStruct for StructSerializers<'a, 'b, 'c> {
StructSerializers::Magic(s) => s.end(),
StructSerializers::MagicBuffer(s) => s.end(),
StructSerializers::MagicByteString(s) => s.end(),
StructSerializers::MagicU16String(s) => s.end(),
StructSerializers::Regular(s) => s.end(),
}
}
@ -607,6 +662,10 @@ impl<'a, 'b, 'c> ser::Serializer for Serializer<'a, 'b, 'c> {
let m = MagicByteStringSerializer::new(self.scope);
Ok(StructSerializers::MagicByteString(m))
}
magic::u16string::NAME => {
let m = MagicU16StringSerializer::new(self.scope);
Ok(StructSerializers::MagicU16String(m))
}
_ => {
let o = ObjectSerializer::new(self.scope, len);
Ok(StructSerializers::Regular(o))

View file

@ -4,6 +4,7 @@ use std::mem::transmute_copy;
use crate::Buffer;
use crate::ByteString;
use crate::U16String;
/// Serializable exists to allow boxing values as "objects" to be serialized later,
/// this is particularly useful for async op-responses. This trait is a more efficient
@ -63,6 +64,7 @@ pub enum Primitive {
String(String),
Buffer(Buffer),
ByteString(ByteString),
U16String(U16String),
}
impl serde::Serialize for Primitive {
@ -86,6 +88,7 @@ impl serde::Serialize for Primitive {
Self::String(x) => x.serialize(s),
Self::Buffer(x) => x.serialize(s),
Self::ByteString(x) => x.serialize(s),
Self::U16String(x) => x.serialize(s),
}
}
}
@ -130,6 +133,8 @@ impl<T: serde::Serialize + 'static> From<T> for SerializablePkg {
Self::Primitive(Primitive::Buffer(tc(x)))
} else if tid == TypeId::of::<ByteString>() {
Self::Primitive(Primitive::ByteString(tc(x)))
} else if tid == TypeId::of::<U16String>() {
Self::Primitive(Primitive::U16String(tc(x)))
} else {
Self::Serializable(Box::new(x))
}