From 5d263c932faf210f5b7a1e448a8eb26a2ac3ba68 Mon Sep 17 00:00:00 2001 From: Arthur Silva Date: Thu, 28 Jul 2022 12:46:10 +0200 Subject: [PATCH] serde_v8: improvements to avoid hitting unimplemented codepaths (#13915) --- Cargo.lock | 1 + core/Cargo.toml | 4 +- serde_v8/Cargo.toml | 1 + serde_v8/de.rs | 182 ++++++++++++++++++++++++++---------------- serde_v8/error.rs | 1 + serde_v8/ser.rs | 40 ++++++++-- serde_v8/tests/de.rs | 66 +++++++++++---- serde_v8/tests/ser.rs | 6 +- 8 files changed, 203 insertions(+), 98 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index dcbac8b35d..2a9854d346 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3859,6 +3859,7 @@ dependencies = [ "bytes", "derive_more", "serde", + "serde_bytes", "serde_json", "smallvec", "v8", diff --git a/core/Cargo.toml b/core/Cargo.toml index a0549fcda7..3de4d63a3a 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -20,7 +20,9 @@ v8_use_custom_libcxx = ["v8/use_custom_libcxx"] anyhow = "1.0.57" deno_ops = { path = "../ops", version = "0.22.0" } futures = "0.3.21" -indexmap = "1.8.1" +# Stay on 1.6 to avoid a dependency cycle in ahash https://github.com/tkaitchuck/aHash/issues/95 +# Projects not depending on ahash are unafected as cargo will pull any 1.X that is >= 1.6. +indexmap = "1.6" libc = "0.2.126" log = "0.4.16" once_cell = "1.10.0" diff --git a/serde_v8/Cargo.toml b/serde_v8/Cargo.toml index 3307bc0bba..3bdf897069 100644 --- a/serde_v8/Cargo.toml +++ b/serde_v8/Cargo.toml @@ -16,6 +16,7 @@ path = "lib.rs" bytes = "1" derive_more = "0.99.17" serde = { version = "1.0.136", features = ["derive"] } +serde_bytes = "0.11" smallvec = { version = "1.8", features = ["union"] } v8 = { version = "0.47.1", default-features = false } diff --git a/serde_v8/de.rs b/serde_v8/de.rs index 08b28c0f5c..c09796d4b2 100644 --- a/serde_v8/de.rs +++ b/serde_v8/de.rs @@ -58,19 +58,6 @@ where Ok(t) } -macro_rules! wip { - ($method:ident) => { - fn $method(self, _v: V) -> Result - where - V: Visitor<'de>, - { - unimplemented!() - } - }; -} - -// TODO: maybe check for BigInt truncation ? -// (i.e: values larger than i64/u64 can hold) macro_rules! deserialize_signed { ($dmethod:ident, $vmethod:ident, $t:tt) => { fn $dmethod(self, visitor: V) -> Result @@ -196,7 +183,12 @@ impl<'de, 'a, 'b, 's, 'x> de::Deserializer<'de> ) } - wip!(deserialize_char); + fn deserialize_char(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + self.deserialize_str(visitor) + } fn deserialize_str(self, visitor: V) -> Result where @@ -218,9 +210,6 @@ impl<'de, 'a, 'b, 's, 'x> de::Deserializer<'de> } } - wip!(deserialize_bytes); - wip!(deserialize_byte_buf); - fn deserialize_option(self, visitor: V) -> Result where V: Visitor<'de>, @@ -286,8 +275,14 @@ impl<'de, 'a, 'b, 's, 'x> de::Deserializer<'de> where V: Visitor<'de>, { - // TODO: error on length mismatch let obj = v8::Local::::try_from(self.input).unwrap(); + if obj.is_array() { + // If the obj is an array fail if it's length differs from the tuple length + let array = v8::Local::::try_from(self.input).unwrap(); + if array.length() as usize != len { + return Err(Error::LengthMismatch); + } + } let seq = SeqAccess { pos: 0, len: len as u32, @@ -317,26 +312,33 @@ impl<'de, 'a, 'b, 's, 'x> de::Deserializer<'de> // Assume object, then get_own_property_names let obj = v8::Local::::try_from(self.input) .map_err(|_| Error::ExpectedObject)?; - let prop_names = obj.get_own_property_names(self.scope); - let mut keys: Vec = match prop_names { - Some(names) => from_v8(self.scope, names.into()).unwrap(), - None => vec![], - }; - let keys: Vec> = keys - .drain(..) - .map(|x| x.into()) - // Filter keys to drop keys whose value is undefined - // TODO: optimize, since this doubles our get calls - .filter(|key| !obj.get(self.scope, *key).unwrap().is_undefined()) - .collect(); - let map = MapAccess { - obj, - keys, - pos: 0, - scope: self.scope, - }; - visitor.visit_map(map) + if v8::Local::::try_from(self.input).is_ok() { + let pairs_array = v8::Local::::try_from(self.input) + .unwrap() + .as_array(self.scope); + let map = MapPairsAccess { + pos: 0, + len: pairs_array.length(), + obj: pairs_array, + scope: self.scope, + }; + visitor.visit_map(map) + } else { + let prop_names = obj.get_own_property_names(self.scope); + let keys: Vec = match prop_names { + Some(names) => from_v8(self.scope, names.into()).unwrap(), + None => vec![], + }; + + let map = MapObjectAccess { + obj, + keys: keys.into_iter(), + next_value: None, + scope: self.scope, + }; + visitor.visit_map(map) + } } fn deserialize_struct( @@ -445,64 +447,104 @@ impl<'de, 'a, 'b, 's, 'x> de::Deserializer<'de> { visitor.visit_none() } + + fn deserialize_bytes(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + magic::buffer::ZeroCopyBuf::from_v8(self.scope, self.input) + .and_then(|zb| visitor.visit_bytes(&*zb)) + } + + fn deserialize_byte_buf(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + magic::buffer::ZeroCopyBuf::from_v8(self.scope, self.input) + .and_then(|zb| visitor.visit_byte_buf(Vec::from(&*zb))) + } } -struct MapAccess<'a, 'b, 's> { +struct MapObjectAccess<'a, 's> { obj: v8::Local<'a, v8::Object>, - scope: &'b mut v8::HandleScope<'s>, - keys: Vec>, - pos: usize, + scope: &'a mut v8::HandleScope<'s>, + keys: std::vec::IntoIter>, + next_value: Option>, } -impl<'de> de::MapAccess<'de> for MapAccess<'_, '_, '_> { +impl<'de> de::MapAccess<'de> for MapObjectAccess<'_, '_> { type Error = Error; fn next_key_seed>( &mut self, seed: K, ) -> Result> { - Ok(match self.keys.get(self.pos) { - Some(key) => { - let mut deserializer = Deserializer::new(self.scope, *key, None); - Some(seed.deserialize(&mut deserializer)?) + for key in self.keys.by_ref() { + let v8_val = self.obj.get(self.scope, key.v8_value).unwrap(); + if v8_val.is_undefined() { + // Historically keys/value pairs with undefined values are not added to the output + continue; } - None => None, - }) + self.next_value = Some(v8_val); + let mut deserializer = Deserializer::new(self.scope, key.v8_value, None); + let k = seed.deserialize(&mut deserializer)?; + return Ok(Some(k)); + } + Ok(None) } fn next_value_seed>( &mut self, seed: V, ) -> Result { - if self.pos >= self.keys.len() { - return Err(Error::LengthMismatch); - } - let key = self.keys[self.pos]; - self.pos += 1; - let v8_val = self.obj.get(self.scope, key).unwrap(); + let v8_val = self.next_value.take().unwrap(); let mut deserializer = Deserializer::new(self.scope, v8_val, None); seed.deserialize(&mut deserializer) } - fn next_entry_seed< - K: de::DeserializeSeed<'de>, - V: de::DeserializeSeed<'de>, - >( + fn size_hint(&self) -> Option { + Some(self.keys.len()) + } +} + +struct MapPairsAccess<'a, 's> { + obj: v8::Local<'a, v8::Array>, + pos: u32, + len: u32, + scope: &'a mut v8::HandleScope<'s>, +} + +impl<'de> de::MapAccess<'de> for MapPairsAccess<'_, '_> { + type Error = Error; + + fn next_key_seed>( &mut self, - kseed: K, - vseed: V, - ) -> Result> { - if self.pos >= self.keys.len() { - return Ok(None); + seed: K, + ) -> Result> { + if self.pos < self.len { + let v8_key = self.obj.get_index(self.scope, self.pos).unwrap(); + self.pos += 1; + let mut deserializer = Deserializer::new(self.scope, v8_key, None); + let k = seed.deserialize(&mut deserializer)?; + Ok(Some(k)) + } else { + Ok(None) } - let v8_key = self.keys[self.pos]; + } + + fn next_value_seed>( + &mut self, + seed: V, + ) -> Result { + debug_assert!(self.pos < self.len); + let v8_val = self.obj.get_index(self.scope, self.pos).unwrap(); self.pos += 1; - let mut kdeserializer = Deserializer::new(self.scope, v8_key, None); - Ok(Some((kseed.deserialize(&mut kdeserializer)?, { - let v8_val = self.obj.get(self.scope, v8_key).unwrap(); - let mut deserializer = Deserializer::new(self.scope, v8_val, None); - vseed.deserialize(&mut deserializer)? - }))) + let mut deserializer = Deserializer::new(self.scope, v8_val, None); + seed.deserialize(&mut deserializer) + } + + fn size_hint(&self) -> Option { + Some((self.len - self.pos) as usize / 2) } } diff --git a/serde_v8/error.rs b/serde_v8/error.rs index eb6131b306..f6afc69dfa 100644 --- a/serde_v8/error.rs +++ b/serde_v8/error.rs @@ -6,6 +6,7 @@ use serde::{de, ser}; pub type Result = std::result::Result; #[derive(Clone, Debug, PartialEq)] +#[non_exhaustive] pub enum Error { Message(String), diff --git a/serde_v8/ser.rs b/serde_v8/ser.rs index 22235ddd97..3bd71a02dd 100644 --- a/serde_v8/ser.rs +++ b/serde_v8/ser.rs @@ -419,19 +419,20 @@ impl<'a, 'b, 'c> ser::Serializer for Serializer<'a, 'b, 'c> { Ok(v8::Boolean::new(&mut self.scope.borrow_mut(), v).into()) } - fn serialize_char(self, _v: char) -> JsResult<'a> { - unimplemented!(); + fn serialize_char(self, v: char) -> JsResult<'a> { + self.serialize_str(&v.to_string()) } fn serialize_str(self, v: &str) -> JsResult<'a> { - v8::String::new(&mut self.scope.borrow_mut(), v) - .map(|v| v.into()) - .ok_or(Error::ExpectedString) + Ok( + v8::String::new(&mut self.scope.borrow_mut(), v) + .unwrap() + .into(), + ) } - fn serialize_bytes(self, _v: &[u8]) -> JsResult<'a> { - // TODO: investigate using Uint8Arrays - unimplemented!() + fn serialize_bytes(self, v: &[u8]) -> JsResult<'a> { + Ok(slice_to_uint8array(&mut self.scope.borrow_mut(), v).into()) } fn serialize_none(self) -> JsResult<'a> { @@ -570,3 +571,26 @@ impl<'a, 'b, 'c> ser::Serializer for Serializer<'a, 'b, 'c> { Ok(VariantSerializer::new(scope, variant, x)) } } + +pub fn slice_to_uint8array<'a>( + scope: &mut v8::HandleScope<'a>, + buf: &[u8], +) -> v8::Local<'a, v8::Uint8Array> { + let buffer = if buf.is_empty() { + v8::ArrayBuffer::new(scope, 0) + } else { + let store: v8::UniqueRef<_> = + v8::ArrayBuffer::new_backing_store(scope, buf.len()); + // SAFETY: raw memory copy into the v8 ArrayBuffer allocated above + unsafe { + std::ptr::copy_nonoverlapping( + buf.as_ptr(), + store.data().unwrap().as_ptr() as *mut u8, + buf.len(), + ) + } + v8::ArrayBuffer::with_backing_store(scope, &store.make_shared()) + }; + v8::Uint8Array::new(scope, buffer, 0, buf.len()) + .expect("Failed to create UintArray8") +} diff --git a/serde_v8/tests/de.rs b/serde_v8/tests/de.rs index d08f98d67c..1d4dae51ae 100644 --- a/serde_v8/tests/de.rs +++ b/serde_v8/tests/de.rs @@ -90,6 +90,7 @@ detest!(de_option_undefined, Option, "undefined", None); detest!(de_unit_null, (), "null", ()); detest!(de_unit_undefined, (), "undefined", ()); detest!(de_bool, bool, "true", true); +detest!(de_char, char, "'é'", 'é'); detest!(de_u64, u64, "32", 32); detest!(de_string, String, "'Hello'", "Hello".to_owned()); detest!(de_vec_u64, Vec, "[1,2,3,4,5]", vec![1, 2, 3, 4, 5]); @@ -105,6 +106,12 @@ detest!( "[123, true, null]", (123, true, ()) ); +defail!( + de_tuple_wrong_len, + (u64, bool, ()), + "[123, true, null, 'extra']", + |e| e == Err(Error::LengthMismatch) +); detest!( de_mathop, MathOp, @@ -281,24 +288,30 @@ detest!( de_json_object, serde_json::Value, "({a: 1, b: 'hello', c: true})", - serde_json::Value::Object( - vec![ - ( - "a".to_string(), - serde_json::Value::Number(serde_json::Number::from(1)), - ), - ( - "b".to_string(), - serde_json::Value::String("hello".to_string()), - ), - ("c".to_string(), serde_json::Value::Bool(true),), - ] - .drain(..) - .collect() - ) + serde_json::json!({ + "a": 1, + "b": "hello", + "c": true, + }) +); +detest!( + de_json_object_from_map, + serde_json::Value, + "(new Map([['a', 1], ['b', 'hello'], ['c', true]]))", + serde_json::json!({ + "a": 1, + "b": "hello", + "c": true, + }) +); +// TODO: this is not optimal, ideally we'd get an array of [1,2,3] instead. +// Fixing that will require exposing Set::AsArray in the v8 bindings. +detest!( + de_json_object_from_set, + serde_json::Value, + "(new Set([1, 2, 3]))", + serde_json::json!({}) ); -detest!(de_bigint_u64, u64, "BigInt(2**59)", 1 << 59); -detest!(de_bigint_i64, i64, "BigInt(-(2**59))", -(1 << 59)); defail!(defail_struct, MathOp, "123", |e| e == Err(Error::ExpectedObject)); @@ -323,6 +336,25 @@ detest!(de_bstr, ByteString, "'hello'", "hello".into()); defail!(defail_bstr, ByteString, "'👋bye'", |e| e == Err(Error::ExpectedLatin1)); +#[derive(PartialEq, Debug, Deserialize)] +pub struct StructWithBytes { + #[serde(with = "serde_bytes")] + a: Vec, + #[serde(with = "serde_bytes")] + b: Vec, + #[serde(with = "serde_bytes")] + c: Vec, +} +detest!( + de_struct_with_bytes, + StructWithBytes, + "({ a: new Uint8Array([1, 2]), b: (new Uint8Array([3 , 4])).buffer, c: (new Uint32Array([0])).buffer})", + StructWithBytes { + a: vec![1, 2], + b: vec![3, 4], + c: vec![0, 0, 0, 0], + } +); detest!( de_u16str, U16String, diff --git a/serde_v8/tests/ser.rs b/serde_v8/tests/ser.rs index e292e242da..76a43bdcca 100644 --- a/serde_v8/tests/ser.rs +++ b/serde_v8/tests/ser.rs @@ -104,17 +104,19 @@ macro_rules! sertest_polluted { }; } +sertest!(ser_char, 'é', "x === 'é'"); sertest!(ser_option_some, Some(true), "x === true"); sertest!(ser_option_null, None as Option, "x === null"); sertest!(ser_unit_null, (), "x === null"); sertest!(ser_bool, true, "x === true"); sertest!(ser_u64, 32, "x === 32"); sertest!(ser_f64, 12345.0, "x === 12345.0"); -sertest!(ser_string, "Hello".to_owned(), "x === 'Hello'"); +sertest!(ser_string, "Hello", "x === 'Hello'"); +sertest!(ser_bytes, b"\x01\x02\x03", "arrEqual(x, [1, 2, 3])"); sertest!(ser_vec_u64, vec![1, 2, 3, 4, 5], "arrEqual(x, [1,2,3,4,5])"); sertest!( ser_vec_string, - vec!["hello".to_owned(), "world".to_owned(),], + vec!["hello", "world"], "arrEqual(x, ['hello', 'world'])" ); sertest!(ser_tuple, (123, true, ()), "arrEqual(x, [123, true, null])");