From dfd58429134246a7b32023045b256b7d96332468 Mon Sep 17 00:00:00 2001 From: Andreu Botella Date: Tue, 22 Jun 2021 14:11:38 +0200 Subject: [PATCH] Add missing methods for one- and two-byte strings (#704) Fixes #703. --- src/binding.cc | 41 ++++++++++++++ src/string.rs | 132 ++++++++++++++++++++++++++++++++++++++++++++-- tests/test_api.rs | 44 ++++++++++++++++ 3 files changed, 213 insertions(+), 4 deletions(-) diff --git a/src/binding.cc b/src/binding.cc index 12fc7783..176c95a9 100644 --- a/src/binding.cc +++ b/src/binding.cc @@ -852,12 +852,32 @@ const v8::String* v8__String__NewFromOneByte(v8::Isolate* isolate, v8::String::NewFromOneByte(isolate, data, new_type, length)); } +const v8::String* v8__String__NewFromTwoByte(v8::Isolate* isolate, + const uint16_t* data, + v8::NewStringType new_type, + int length) { + return maybe_local_to_ptr( + v8::String::NewFromTwoByte(isolate, data, new_type, length)); +} + int v8__String__Length(const v8::String& self) { return self.Length(); } int v8__String__Utf8Length(const v8::String& self, v8::Isolate* isolate) { return self.Utf8Length(isolate); } +int v8__String__Write(const v8::String& self, v8::Isolate* isolate, + uint16_t* buffer, int start, int length, + int options) { + return self.Write(isolate, buffer, start, length, options); +} + +int v8__String__WriteOneByte(const v8::String& self, v8::Isolate* isolate, + uint8_t* buffer, int start, int length, + int options) { + return self.WriteOneByte(isolate, buffer, start, length, options); +} + int v8__String__WriteUtf8(const v8::String& self, v8::Isolate* isolate, char* buffer, int length, int* nchars_ref, int options) { @@ -884,6 +904,26 @@ const v8::String* v8__String__NewExternalOneByteStatic(v8::Isolate* isolate, isolate, new ExternalStaticOneByteStringResource(data, length))); } +class ExternalStaticStringResource + : public v8::String::ExternalStringResource { + public: + ExternalStaticStringResource(const uint16_t* data, int length) + : _data(data), _length(length) {} + const uint16_t* data() const override { return _data; } + size_t length() const override { return _length; } + + private: + const uint16_t* _data; + const int _length; +}; + +const v8::String* v8__String__NewExternalTwoByteStatic(v8::Isolate* isolate, + const uint16_t* data, + int length) { + return maybe_local_to_ptr(v8::String::NewExternalTwoByte( + isolate, new ExternalStaticStringResource(data, length))); +} + bool v8__String__IsExternal(const v8::String& self) { return self.IsExternal(); } @@ -894,6 +934,7 @@ bool v8__String__IsExternalTwoByte(const v8::String& self) { return self.IsExternalTwoByte(); } bool v8__String__IsOneByte(const v8::String& self) { return self.IsOneByte(); } +bool v8__String__ContainsOnlyOneByte(const v8::String& self) { return self.ContainsOnlyOneByte(); } const v8::Symbol* v8__Symbol__New(v8::Isolate* isolate, const v8::String& description) { diff --git a/src/string.rs b/src/string.rs index 8ea7d96a..7c9ce45c 100644 --- a/src/string.rs +++ b/src/string.rs @@ -27,10 +27,35 @@ extern "C" { length: int, ) -> *const String; + fn v8__String__NewFromTwoByte( + isolate: *mut Isolate, + data: *const u16, + new_type: NewStringType, + length: int, + ) -> *const String; + fn v8__String__Length(this: *const String) -> int; fn v8__String__Utf8Length(this: *const String, isolate: *mut Isolate) -> int; + fn v8__String__Write( + this: *const String, + isolate: *mut Isolate, + buffer: *mut u16, + start: int, + length: int, + options: WriteOptions, + ) -> int; + + fn v8__String__WriteOneByte( + this: *const String, + isolate: *mut Isolate, + buffer: *mut u8, + start: int, + length: int, + options: WriteOptions, + ) -> int; + fn v8__String__WriteUtf8( this: *const String, isolate: *mut Isolate, @@ -46,10 +71,17 @@ extern "C" { length: int, ) -> *const String; + fn v8__String__NewExternalTwoByteStatic( + isolate: *mut Isolate, + buffer: *const u16, + length: int, + ) -> *const String; + fn v8__String__IsExternal(this: *const String) -> bool; fn v8__String__IsExternalOneByte(this: *const String) -> bool; fn v8__String__IsExternalTwoByte(this: *const String) -> bool; fn v8__String__IsOneByte(this: *const String) -> bool; + fn v8__String__ContainsOnlyOneByte(this: *const String) -> bool; } #[repr(C)] @@ -118,13 +150,34 @@ impl String { buffer: &[u8], new_type: NewStringType, ) -> Option> { + let buffer_len = buffer.len().try_into().ok()?; unsafe { scope.cast_local(|sd| { v8__String__NewFromOneByte( sd.get_isolate_ptr(), buffer.as_ptr(), new_type, - buffer.len() as int, + buffer_len, + ) + }) + } + } + + /// Allocates a new string from UTF-16 data. Only returns an empty value when + /// length > kMaxLength. + pub fn new_from_two_byte<'s>( + scope: &mut HandleScope<'s, ()>, + buffer: &[u16], + new_type: NewStringType, + ) -> Option> { + let buffer_len = buffer.len().try_into().ok()?; + unsafe { + scope.cast_local(|sd| { + v8__String__NewFromTwoByte( + sd.get_isolate_ptr(), + buffer.as_ptr(), + new_type, + buffer_len, ) }) } @@ -141,6 +194,49 @@ impl String { unsafe { v8__String__Utf8Length(self, scope) as usize } } + /// Writes the contents of the string to an external buffer, as 16-bit + /// (UTF-16) character codes. + pub fn write( + &self, + scope: &mut Isolate, + buffer: &mut [u16], + start: usize, + options: WriteOptions, + ) -> usize { + unsafe { + v8__String__Write( + self, + scope, + buffer.as_mut_ptr(), + start.try_into().unwrap_or(int::max_value()), + buffer.len().try_into().unwrap_or(int::max_value()), + options, + ) as usize + } + } + + /// Writes the contents of the string to an external buffer, as one-byte + /// (Latin-1) characters. + pub fn write_one_byte( + &self, + scope: &mut Isolate, + buffer: &mut [u8], + start: usize, + options: WriteOptions, + ) -> usize { + unsafe { + v8__String__WriteOneByte( + self, + scope, + buffer.as_mut_ptr(), + start.try_into().unwrap_or(int::max_value()), + buffer.len().try_into().unwrap_or(int::max_value()), + options, + ) as usize + } + } + + /// Writes the contents of the string to an external buffer, as UTF-8. pub fn write_utf8( &self, scope: &mut Isolate, @@ -191,6 +287,23 @@ impl String { } } + // Creates a v8::String from a `&'static [u16]`. + pub fn new_external_twobyte_static<'s>( + scope: &mut HandleScope<'s, ()>, + buffer: &'static [u16], + ) -> Option> { + let buffer_len = buffer.len().try_into().ok()?; + unsafe { + scope.cast_local(|sd| { + v8__String__NewExternalTwoByteStatic( + sd.get_isolate_ptr(), + buffer.as_ptr(), + buffer_len, + ) + }) + } + } + /// True if string is external pub fn is_external(&self) -> bool { // TODO: re-enable on next v8-release @@ -209,17 +322,28 @@ impl String { } /// True if string is external & two-byte - /// NOTE: can't yet be created via rusty_v8 + /// (e.g: created with new_external_twobyte_static) pub fn is_external_twobyte(&self) -> bool { unsafe { v8__String__IsExternalTwoByte(self) } } - /// True if string is known to contain only one-byte data - /// doesn't read the string so can return false positives + /// True if string is known to contain only one-byte data. + /// Doesn't read the string so can return false positives. + /// + /// For a method that will not return false positives at the cost of + /// potentially reading the entire string, use [`contains_only_onebyte()`]. + /// + /// [`contains_only_onebyte()`]: String::contains_only_onebyte pub fn is_onebyte(&self) -> bool { unsafe { v8__String__IsExternalOneByte(self) } } + /// True if the string contains only one-byte data. + /// Will read the entire string in some cases. + pub fn contains_only_onebyte(&self) -> bool { + unsafe { v8__String__ContainsOnlyOneByte(self) } + } + /// Convenience function not present in the original V8 API. pub fn to_rust_string_lossy( &self, diff --git a/tests/test_api.rs b/tests/test_api.rs index fca62c7d..515dcd48 100644 --- a/tests/test_api.rs +++ b/tests/test_api.rs @@ -161,6 +161,12 @@ fn test_string() { local.write_utf8(scope, &mut vec, Some(&mut nchars), options) ); assert_eq!(15, nchars); + let mut u16_buffer = [0u16; 16]; + assert_eq!(15, local.write(scope, &mut u16_buffer, 0, options)); + assert_eq!( + String::from(reference), + String::from_utf16(&u16_buffer[..15]).unwrap() + ); } { let scope = &mut v8::HandleScope::new(isolate); @@ -184,8 +190,24 @@ fn test_string() { .unwrap(); assert_eq!(3, local.length()); assert_eq!(3, local.utf8_length(scope)); + let options = v8::WriteOptions::NO_NULL_TERMINATION; + let mut buffer = [0u8; 3]; + assert_eq!(3, local.write_one_byte(scope, &mut buffer, 0, options)); + assert_eq!(b"foo", &buffer); assert_eq!("foo", local.to_rust_string_lossy(scope)); } + { + let scope = &mut v8::HandleScope::new(isolate); + let local = v8::String::new_from_two_byte( + scope, + &[0xD83E, 0xDD95], + v8::NewStringType::Normal, + ) + .unwrap(); + assert_eq!(2, local.length()); + assert_eq!(4, local.utf8_length(scope)); + assert_eq!("🦕", local.to_rust_string_lossy(scope)); + } } #[test] @@ -5141,7 +5163,9 @@ fn external_strings() { // Externality checks assert!(json_external.is_external()); assert!(json_external.is_external_onebyte()); + assert!(!json_external.is_external_twobyte()); assert!(json_external.is_onebyte()); + assert!(json_external.contains_only_onebyte()); // In & out let hello = @@ -5151,11 +5175,31 @@ fn external_strings() { // Externality checks assert!(hello.is_external()); assert!(hello.is_external_onebyte()); + assert!(!hello.is_external_twobyte()); assert!(hello.is_onebyte()); + assert!(hello.contains_only_onebyte()); + + // Two-byte static + let two_byte = v8::String::new_external_twobyte_static( + scope, + &[0xDD95, 0x0020, 0xD83E, 0xDD95], + ) + .unwrap(); + let rust_str = two_byte.to_rust_string_lossy(scope); + assert_eq!(rust_str, "\u{FFFD} 🦕"); + assert!(two_byte.length() == 4); + // Externality checks + assert!(two_byte.is_external()); + assert!(!two_byte.is_external_onebyte()); + assert!(two_byte.is_external_twobyte()); + assert!(!two_byte.is_onebyte()); + assert!(!two_byte.contains_only_onebyte()); // two-byte "internal" test let gradients = v8::String::new(scope, "∇gradients").unwrap(); assert!(!gradients.is_external()); assert!(!gradients.is_external_onebyte()); + assert!(!gradients.is_external_twobyte()); assert!(!gradients.is_onebyte()); + assert!(!gradients.contains_only_onebyte()); }