0
0
Fork 0
mirror of https://github.com/denoland/rusty_v8.git synced 2025-01-21 21:50:20 -05:00

feat: Fix is_onebyte and add an uninit write for onebyte (#1255)

Some fixes around one-byte strings:

 - `is_onebyte` was calling the wrong v8 API.
 - We didn't have a way to write one-byte strings with uninitialized buffers
 - (bonus) The test_string method was quite slow making testing a bit of a pain
This commit is contained in:
Matt Mastracci 2023-06-26 09:30:16 -06:00 committed by GitHub
parent b97dd1b1d8
commit 672254e113
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 62 additions and 23 deletions

View file

@ -258,6 +258,28 @@ impl String {
} }
} }
/// Writes the contents of the string to an external [`MaybeUninit`] buffer, as one-byte
/// (Latin-1) characters.
#[inline(always)]
pub fn write_one_byte_uninit(
&self,
scope: &mut Isolate,
buffer: &mut [MaybeUninit<u8>],
start: usize,
options: WriteOptions,
) -> usize {
unsafe {
v8__String__WriteOneByte(
self,
scope,
buffer.as_mut_ptr() as *mut u8,
start.try_into().unwrap_or(int::max_value()),
buffer.len().try_into().unwrap_or(int::max_value()),
options,
) as usize
}
}
/// Writes the contents of the string to an external buffer, as UTF-8. /// Writes the contents of the string to an external buffer, as UTF-8.
#[inline(always)] #[inline(always)]
pub fn write_utf8( pub fn write_utf8(
@ -281,7 +303,7 @@ impl String {
} }
} }
/// Writes the contents of the string to an external buffer, as UTF-8. /// Writes the contents of the string to an external [`MaybeUninit`] buffer, as UTF-8.
pub fn write_utf8_uninit( pub fn write_utf8_uninit(
&self, &self,
scope: &mut Isolate, scope: &mut Isolate,
@ -378,16 +400,17 @@ impl String {
unsafe { v8__String__IsExternalTwoByte(self) } unsafe { v8__String__IsExternalTwoByte(self) }
} }
/// True if string is known to contain only one-byte data. /// Will return true if and only if string is known for certain to contain only one-byte data,
/// Doesn't read the string so can return false positives. /// ie: Latin-1, a.k.a. ISO-8859-1 code points. Doesn't read the string so can return false
/// negatives, and a return value of false does not mean this string is not one-byte data.
/// ///
/// For a method that will not return false positives at the cost of /// For a method that will not return false negatives at the cost of
/// potentially reading the entire string, use [`contains_only_onebyte()`]. /// potentially reading the entire string, use [`contains_only_onebyte()`].
/// ///
/// [`contains_only_onebyte()`]: String::contains_only_onebyte /// [`contains_only_onebyte()`]: String::contains_only_onebyte
#[inline(always)] #[inline(always)]
pub fn is_onebyte(&self) -> bool { pub fn is_onebyte(&self) -> bool {
unsafe { v8__String__IsExternalOneByte(self) } unsafe { v8__String__IsOneByte(self) }
} }
/// True if the string contains only one-byte data. /// True if the string contains only one-byte data.

View file

@ -305,27 +305,35 @@ fn test_string() {
} }
{ {
let scope = &mut v8::HandleScope::new(isolate); let scope = &mut v8::HandleScope::new(isolate);
let buffer = (0..v8::String::max_length() / 4) let mut buffer = Vec::with_capacity(v8::String::max_length());
.map(|_| '\u{10348}') // UTF8: 0xF0 0x90 0x8D 0x88 for _ in 0..buffer.capacity() / 4 {
.collect::<String>(); // U+10348 in UTF-8
let local = v8::String::new_from_utf8( buffer.push(0xF0_u8);
scope, buffer.push(0x90_u8);
buffer.as_bytes(), buffer.push(0x8D_u8);
v8::NewStringType::Normal, buffer.push(0x88_u8);
) }
.unwrap(); let local =
v8::String::new_from_utf8(scope, &buffer, v8::NewStringType::Normal)
.unwrap();
// U+10348 is 2 UTF-16 code units, which is the unit of v8::String.length(). // U+10348 is 2 UTF-16 code units, which is the unit of v8::String.length().
assert_eq!(v8::String::max_length() / 2, local.length()); assert_eq!(v8::String::max_length() / 2, local.length());
assert_eq!(buffer, local.to_rust_string_lossy(scope)); assert_eq!(
buffer.as_slice(),
let too_long = (0..(v8::String::max_length() / 4) + 1) local.to_rust_string_lossy(scope).as_bytes()
.map(|_| '\u{10348}') // UTF8: 0xF0 0x90 0x8D 0x88
.collect::<String>();
let none = v8::String::new_from_utf8(
scope,
too_long.as_bytes(),
v8::NewStringType::Normal,
); );
let mut too_long = Vec::with_capacity(v8::String::max_length() + 4);
for _ in 0..too_long.capacity() / 4 {
// U+10348 in UTF-8
too_long.push(0xF0_u8);
too_long.push(0x90_u8);
too_long.push(0x8D_u8);
too_long.push(0x88_u8);
}
let none =
v8::String::new_from_utf8(scope, &too_long, v8::NewStringType::Normal);
assert!(none.is_none()); assert!(none.is_none());
} }
{ {
@ -8316,6 +8324,14 @@ fn external_strings() {
assert!(!gradients.is_external_twobyte()); assert!(!gradients.is_external_twobyte());
assert!(!gradients.is_onebyte()); assert!(!gradients.is_onebyte());
assert!(!gradients.contains_only_onebyte()); assert!(!gradients.contains_only_onebyte());
// one-byte "internal" test
let latin1 = v8::String::new(scope, "latin-1").unwrap();
assert!(!latin1.is_external());
assert!(!latin1.is_external_onebyte());
assert!(!latin1.is_external_twobyte());
assert!(latin1.is_onebyte());
assert!(latin1.contains_only_onebyte());
} }
#[test] #[test]