1
0
Fork 0
mirror of https://github.com/denoland/deno.git synced 2025-01-21 13:00:36 -05:00

perf(compile): read embedded files as static references when UTF-8 and reading as strings (#27033)

This commit is contained in:
David Sherret 2024-11-27 21:28:41 -05:00 committed by GitHub
parent 76daa03aa9
commit f161adf19e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
19 changed files with 158 additions and 61 deletions

View file

@ -22,6 +22,8 @@ impl<'a> deno_config::fs::DenoConfigFs for DenoConfigFsAdapter<'a> {
self
.0
.read_text_file_lossy_sync(path, None)
// todo(https://github.com/denoland/deno_config/pull/140): avoid clone
.map(|s| s.into_owned())
.map_err(|err| err.into_io_error())
}

2
cli/cache/mod.rs vendored
View file

@ -116,6 +116,8 @@ impl<'a> deno_cache_dir::DenoCacheEnv for DenoCacheEnvFsAdapter<'a> {
self
.0
.read_file_sync(path, None)
// todo(https://github.com/denoland/deno_cache_dir/pull/66): avoid clone
.map(|bytes| bytes.into_owned())
.map_err(|err| err.into_io_error())
}

View file

@ -1060,7 +1060,10 @@ impl<TGraphContainer: ModuleGraphContainer> NodeRequireLoader
self.npm_resolver.ensure_read_permission(permissions, path)
}
fn load_text_file_lossy(&self, path: &Path) -> Result<String, AnyError> {
fn load_text_file_lossy(
&self,
path: &Path,
) -> Result<Cow<'static, str>, AnyError> {
// todo(dsherret): use the preloaded module from the graph if available?
let media_type = MediaType::from_path(path);
let text = self.fs.read_text_file_lossy_sync(path, None)?;
@ -1075,15 +1078,18 @@ impl<TGraphContainer: ModuleGraphContainer> NodeRequireLoader
.into(),
);
}
self.emitter.emit_parsed_source_sync(
&specifier,
media_type,
// this is probably not super accurate due to require esm, but probably ok.
// If we find this causes a lot of churn in the emit cache then we should
// investigate how we can make this better
ModuleKind::Cjs,
&text.into(),
)
self
.emitter
.emit_parsed_source_sync(
&specifier,
media_type,
// this is probably not super accurate due to require esm, but probably ok.
// If we find this causes a lot of churn in the emit cache then we should
// investigate how we can make this better
ModuleKind::Cjs,
&text.into(),
)
.map(Cow::Owned)
} else {
Ok(text)
}

View file

@ -160,7 +160,7 @@ impl CjsCodeAnalyzer for CliCjsCodeAnalyzer {
if let Ok(source_from_file) =
self.fs.read_text_file_lossy_async(path, None).await
{
Cow::Owned(source_from_file)
source_from_file
} else {
return Ok(ExtNodeCjsAnalysis::Cjs(CjsAnalysisExports {
exports: vec![],

View file

@ -37,7 +37,7 @@ use crate::node::CliNodeCodeTranslator;
use crate::npm::CliNpmResolver;
use crate::npm::InnerCliNpmResolverRef;
use crate::util::sync::AtomicFlag;
use crate::util::text_encoding::from_utf8_lossy_owned;
use crate::util::text_encoding::from_utf8_lossy_cow;
pub type CjsTracker = deno_resolver::cjs::CjsTracker<DenoFsNodeResolverEnv>;
pub type IsCjsResolver =
@ -62,7 +62,10 @@ pub struct ModuleCodeStringSource {
pub struct CliDenoResolverFs(pub Arc<dyn FileSystem>);
impl deno_resolver::fs::DenoResolverFs for CliDenoResolverFs {
fn read_to_string_lossy(&self, path: &Path) -> std::io::Result<String> {
fn read_to_string_lossy(
&self,
path: &Path,
) -> std::io::Result<Cow<'static, str>> {
self
.0
.read_text_file_lossy_sync(path, None)
@ -182,18 +185,21 @@ impl NpmModuleLoader {
let code = if self.cjs_tracker.is_maybe_cjs(specifier, media_type)? {
// translate cjs to esm if it's cjs and inject node globals
let code = from_utf8_lossy_owned(code);
let code = from_utf8_lossy_cow(code);
ModuleSourceCode::String(
self
.node_code_translator
.translate_cjs_to_esm(specifier, Some(Cow::Owned(code)))
.translate_cjs_to_esm(specifier, Some(code))
.await?
.into_owned()
.into(),
)
} else {
// esm and json code is untouched
ModuleSourceCode::Bytes(code.into_boxed_slice().into())
ModuleSourceCode::Bytes(match code {
Cow::Owned(bytes) => bytes.into_boxed_slice().into(),
Cow::Borrowed(bytes) => bytes.into(),
})
};
Ok(ModuleCodeStringSource {

View file

@ -282,14 +282,13 @@ impl StandaloneModules {
.vfs
.read_file_all(entry, VfsFileSubDataKind::ModuleGraph)?,
Err(err) if err.kind() == ErrorKind::NotFound => {
let bytes = match RealFs.read_file_sync(&path, None) {
match RealFs.read_file_sync(&path, None) {
Ok(bytes) => bytes,
Err(FsError::Io(err)) if err.kind() == ErrorKind::NotFound => {
return Ok(None)
}
Err(err) => return Err(err.into()),
};
Cow::Owned(bytes)
}
}
Err(err) => return Err(err.into()),
};
@ -694,7 +693,7 @@ impl<'a> DenoCompileBinaryWriter<'a> {
&file_path,
match maybe_source {
Some(source) => source,
None => RealFs.read_file_sync(&file_path, None)?,
None => RealFs.read_file_sync(&file_path, None)?.into_owned(),
},
VfsFileSubDataKind::ModuleGraph,
)

View file

@ -91,6 +91,7 @@ use crate::resolver::CliNpmReqResolver;
use crate::resolver::NpmModuleLoader;
use crate::util::progress_bar::ProgressBar;
use crate::util::progress_bar::ProgressBarStyle;
use crate::util::text_encoding::from_utf8_lossy_cow;
use crate::util::v8::construct_v8_flags;
use crate::worker::CliCodeCache;
use crate::worker::CliMainWorkerFactory;
@ -516,13 +517,13 @@ impl NodeRequireLoader for EmbeddedModuleLoader {
fn load_text_file_lossy(
&self,
path: &std::path::Path,
) -> Result<String, AnyError> {
) -> Result<Cow<'static, str>, AnyError> {
let file_entry = self.shared.vfs.file_entry(path)?;
let file_bytes = self
.shared
.vfs
.read_file_all(file_entry, VfsFileSubDataKind::ModuleGraph)?;
Ok(String::from_utf8(file_bytes.into_owned())?)
Ok(from_utf8_lossy_cow(file_bytes))
}
fn is_maybe_cjs(

View file

@ -743,15 +743,12 @@ impl deno_io::fs::File for FileBackedVfsFile {
Err(FsError::NotSupported)
}
fn read_all_sync(self: Rc<Self>) -> FsResult<Vec<u8>> {
self.read_to_end().map(|bytes| bytes.into_owned())
fn read_all_sync(self: Rc<Self>) -> FsResult<Cow<'static, [u8]>> {
self.read_to_end()
}
async fn read_all_async(self: Rc<Self>) -> FsResult<Vec<u8>> {
async fn read_all_async(self: Rc<Self>) -> FsResult<Cow<'static, [u8]>> {
let inner = (*self).clone();
tokio::task::spawn_blocking(move || {
inner.read_to_end().map(|bytes| bytes.into_owned())
})
.await?
tokio::task::spawn_blocking(move || inner.read_to_end()).await?
}
fn chmod_sync(self: Rc<Self>, _pathmode: u32) -> FsResult<()> {

View file

@ -11,6 +11,15 @@ use deno_core::ModuleSourceCode;
static SOURCE_MAP_PREFIX: &[u8] =
b"//# sourceMappingURL=data:application/json;base64,";
#[inline(always)]
pub fn from_utf8_lossy_cow(bytes: Cow<[u8]>) -> Cow<str> {
match bytes {
Cow::Borrowed(bytes) => String::from_utf8_lossy(bytes),
Cow::Owned(bytes) => Cow::Owned(from_utf8_lossy_owned(bytes)),
}
}
#[inline(always)]
pub fn from_utf8_lossy_owned(bytes: Vec<u8>) -> String {
match String::from_utf8_lossy(&bytes) {
Cow::Owned(code) => code,

View file

@ -3,6 +3,7 @@
// Allow using Arc for this module.
#![allow(clippy::disallowed_types)]
use std::borrow::Cow;
use std::collections::hash_map::Entry;
use std::collections::HashMap;
use std::io::Error;
@ -457,11 +458,11 @@ impl FileSystem for InMemoryFs {
&self,
path: &Path,
_access_check: Option<AccessCheckCb>,
) -> FsResult<Vec<u8>> {
) -> FsResult<Cow<'static, [u8]>> {
let entry = self.get_entry(path);
match entry {
Some(entry) => match &*entry {
PathEntry::File(data) => Ok(data.clone()),
PathEntry::File(data) => Ok(Cow::Owned(data.clone())),
PathEntry::Dir => Err(FsError::Io(Error::new(
ErrorKind::InvalidInput,
"Is a directory",
@ -474,7 +475,7 @@ impl FileSystem for InMemoryFs {
&'a self,
path: PathBuf,
access_check: Option<AccessCheckCb<'a>>,
) -> FsResult<Vec<u8>> {
) -> FsResult<Cow<'static, [u8]>> {
self.read_file_sync(&path, access_check)
}
}

View file

@ -1,5 +1,6 @@
// Copyright 2018-2024 the Deno authors. All rights reserved. MIT license.
use core::str;
use std::borrow::Cow;
use std::path::Path;
use std::path::PathBuf;
@ -288,7 +289,7 @@ pub trait FileSystem: std::fmt::Debug + MaybeSend + MaybeSync {
&self,
path: &Path,
access_check: Option<AccessCheckCb>,
) -> FsResult<Vec<u8>> {
) -> FsResult<Cow<'static, [u8]>> {
let options = OpenOptions::read();
let file = self.open_sync(path, options, access_check)?;
let buf = file.read_all_sync()?;
@ -298,7 +299,7 @@ pub trait FileSystem: std::fmt::Debug + MaybeSend + MaybeSync {
&'a self,
path: PathBuf,
access_check: Option<AccessCheckCb<'a>>,
) -> FsResult<Vec<u8>> {
) -> FsResult<Cow<'static, [u8]>> {
let options = OpenOptions::read();
let file = self.open_async(path, options, access_check).await?;
let buf = file.read_all_async().await?;
@ -327,17 +328,25 @@ pub trait FileSystem: std::fmt::Debug + MaybeSend + MaybeSync {
&self,
path: &Path,
access_check: Option<AccessCheckCb>,
) -> FsResult<String> {
) -> FsResult<Cow<'static, str>> {
let buf = self.read_file_sync(path, access_check)?;
Ok(string_from_utf8_lossy(buf))
Ok(string_from_cow_utf8_lossy(buf))
}
async fn read_text_file_lossy_async<'a>(
&'a self,
path: PathBuf,
access_check: Option<AccessCheckCb<'a>>,
) -> FsResult<String> {
) -> FsResult<Cow<'static, str>> {
let buf = self.read_file_async(path, access_check).await?;
Ok(string_from_utf8_lossy(buf))
Ok(string_from_cow_utf8_lossy(buf))
}
}
#[inline(always)]
fn string_from_cow_utf8_lossy(buf: Cow<'static, [u8]>) -> Cow<'static, str> {
match buf {
Cow::Owned(buf) => Cow::Owned(string_from_utf8_lossy(buf)),
Cow::Borrowed(buf) => String::from_utf8_lossy(buf),
}
}

View file

@ -17,6 +17,7 @@ pub use crate::interface::OpenOptions;
pub use crate::ops::FsOpsError;
pub use crate::ops::FsOpsErrorKind;
pub use crate::ops::OperationError;
pub use crate::ops::V8MaybeStaticStr;
pub use crate::std_fs::RealFs;
pub use crate::sync::MaybeSend;
pub use crate::sync::MaybeSync;

View file

@ -1,5 +1,6 @@
// Copyright 2018-2024 the Deno authors. All rights reserved. MIT license.
use std::borrow::Cow;
use std::cell::RefCell;
use std::error::Error;
use std::fmt::Formatter;
@ -18,12 +19,15 @@ use crate::FsPermissions;
use crate::OpenOptions;
use boxed_error::Boxed;
use deno_core::op2;
use deno_core::v8;
use deno_core::CancelFuture;
use deno_core::CancelHandle;
use deno_core::FastString;
use deno_core::JsBuffer;
use deno_core::OpState;
use deno_core::ResourceId;
use deno_core::ToJsBuffer;
use deno_core::ToV8;
use deno_io::fs::FileResource;
use deno_io::fs::FsError;
use deno_io::fs::FsStat;
@ -1333,7 +1337,8 @@ where
.read_file_sync(&path, Some(&mut access_check))
.map_err(|error| map_permission_error("readfile", error, &path))?;
Ok(buf.into())
// todo(https://github.com/denoland/deno/issues/27107): do not clone here
Ok(buf.into_owned().into_boxed_slice().into())
}
#[op2(async, stack_trace)]
@ -1375,15 +1380,61 @@ where
.map_err(|error| map_permission_error("readfile", error, &path))?
};
Ok(buf.into())
// todo(https://github.com/denoland/deno/issues/27107): do not clone here
Ok(buf.into_owned().into_boxed_slice().into())
}
// todo(https://github.com/denoland/deno_core/pull/986): remove
// when upgrading deno_core
#[derive(Debug)]
pub struct FastStringV8AllocationError;
impl std::error::Error for FastStringV8AllocationError {}
impl std::fmt::Display for FastStringV8AllocationError {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
write!(
f,
"failed to allocate string; buffer exceeds maximum length"
)
}
}
/// Maintains a static reference to the string if possible.
pub struct V8MaybeStaticStr(pub Cow<'static, str>);
impl<'s> ToV8<'s> for V8MaybeStaticStr {
type Error = FastStringV8AllocationError;
#[inline]
fn to_v8(
self,
scope: &mut v8::HandleScope<'s>,
) -> Result<v8::Local<'s, v8::Value>, Self::Error> {
// todo(https://github.com/denoland/deno_core/pull/986): remove this check
// when upgrading deno_core
const MAX_V8_STRING_LENGTH: usize = 536870888;
if self.0.len() > MAX_V8_STRING_LENGTH {
return Err(FastStringV8AllocationError);
}
Ok(
match self.0 {
Cow::Borrowed(text) => FastString::from_static(text),
Cow::Owned(value) => value.into(),
}
.v8_string(scope)
.into(),
)
}
}
#[op2(stack_trace)]
#[string]
#[to_v8]
pub fn op_fs_read_file_text_sync<P>(
state: &mut OpState,
#[string] path: String,
) -> Result<String, FsOpsError>
) -> Result<V8MaybeStaticStr, FsOpsError>
where
P: FsPermissions + 'static,
{
@ -1395,17 +1446,16 @@ where
let str = fs
.read_text_file_lossy_sync(&path, Some(&mut access_check))
.map_err(|error| map_permission_error("readfile", error, &path))?;
Ok(str)
Ok(V8MaybeStaticStr(str))
}
#[op2(async, stack_trace)]
#[string]
#[to_v8]
pub async fn op_fs_read_file_text_async<P>(
state: Rc<RefCell<OpState>>,
#[string] path: String,
#[smi] cancel_rid: Option<ResourceId>,
) -> Result<String, FsOpsError>
) -> Result<V8MaybeStaticStr, FsOpsError>
where
P: FsPermissions + 'static,
{
@ -1439,7 +1489,7 @@ where
.map_err(|error| map_permission_error("readfile", error, &path))?
};
Ok(str)
Ok(V8MaybeStaticStr(str))
}
fn to_seek_from(offset: i64, whence: i32) -> Result<SeekFrom, FsOpsError> {

View file

@ -2,6 +2,7 @@
#![allow(clippy::disallowed_methods)]
use std::borrow::Cow;
use std::env::current_dir;
use std::fs;
use std::io;
@ -371,7 +372,7 @@ impl FileSystem for RealFs {
&self,
path: &Path,
access_check: Option<AccessCheckCb>,
) -> FsResult<Vec<u8>> {
) -> FsResult<Cow<'static, [u8]>> {
let mut file = open_with_access_check(
OpenOptions {
read: true,
@ -382,13 +383,13 @@ impl FileSystem for RealFs {
)?;
let mut buf = Vec::new();
file.read_to_end(&mut buf)?;
Ok(buf)
Ok(Cow::Owned(buf))
}
async fn read_file_async<'a>(
&'a self,
path: PathBuf,
access_check: Option<AccessCheckCb<'a>>,
) -> FsResult<Vec<u8>> {
) -> FsResult<Cow<'static, [u8]>> {
let mut file = open_with_access_check(
OpenOptions {
read: true,
@ -400,7 +401,7 @@ impl FileSystem for RealFs {
spawn_blocking(move || {
let mut buf = Vec::new();
file.read_to_end(&mut buf)?;
Ok::<_, FsError>(buf)
Ok::<_, FsError>(Cow::Owned(buf))
})
.await?
.map_err(Into::into)

View file

@ -215,8 +215,8 @@ pub trait File {
fn write_all_sync(self: Rc<Self>, buf: &[u8]) -> FsResult<()>;
async fn write_all(self: Rc<Self>, buf: BufView) -> FsResult<()>;
fn read_all_sync(self: Rc<Self>) -> FsResult<Vec<u8>>;
async fn read_all_async(self: Rc<Self>) -> FsResult<Vec<u8>>;
fn read_all_sync(self: Rc<Self>) -> FsResult<Cow<'static, [u8]>>;
async fn read_all_async(self: Rc<Self>) -> FsResult<Cow<'static, [u8]>>;
fn chmod_sync(self: Rc<Self>, pathmode: u32) -> FsResult<()>;
async fn chmod_async(self: Rc<Self>, mode: u32) -> FsResult<()>;

View file

@ -789,26 +789,26 @@ impl crate::fs::File for StdFileResourceInner {
}
}
fn read_all_sync(self: Rc<Self>) -> FsResult<Vec<u8>> {
fn read_all_sync(self: Rc<Self>) -> FsResult<Cow<'static, [u8]>> {
match self.kind {
StdFileResourceKind::File | StdFileResourceKind::Stdin(_) => {
let mut buf = Vec::new();
self.with_sync(|file| Ok(file.read_to_end(&mut buf)?))?;
Ok(buf)
Ok(Cow::Owned(buf))
}
StdFileResourceKind::Stdout | StdFileResourceKind::Stderr => {
Err(FsError::NotSupported)
}
}
}
async fn read_all_async(self: Rc<Self>) -> FsResult<Vec<u8>> {
async fn read_all_async(self: Rc<Self>) -> FsResult<Cow<'static, [u8]>> {
match self.kind {
StdFileResourceKind::File | StdFileResourceKind::Stdin(_) => {
self
.with_inner_blocking_task(|file| {
let mut buf = Vec::new();
file.read_to_end(&mut buf)?;
Ok(buf)
Ok(Cow::Owned(buf))
})
.await
}

View file

@ -157,7 +157,10 @@ pub trait NodeRequireLoader {
path: &'a Path,
) -> Result<Cow<'a, Path>, AnyError>;
fn load_text_file_lossy(&self, path: &Path) -> Result<String, AnyError>;
fn load_text_file_lossy(
&self,
path: &Path,
) -> Result<Cow<'static, str>, AnyError>;
/// Get if the module kind is maybe CJS and loading should determine
/// if its CJS or ESM.
@ -873,6 +876,8 @@ impl deno_package_json::fs::DenoPkgJsonFs for DenoFsNodeResolverEnv {
self
.fs
.read_text_file_lossy_sync(path, None)
// todo(https://github.com/denoland/deno_package_json/pull/9): don't clone
.map(|text| text.into_owned())
.map_err(|err| err.into_io_error())
}
}
@ -887,6 +892,8 @@ impl<'a> deno_package_json::fs::DenoPkgJsonFs for DenoPkgJsonFsAdapter<'a> {
self
.0
.read_text_file_lossy_sync(path, None)
// todo(https://github.com/denoland/deno_package_json/pull/9): don't clone
.map(|text| text.into_owned())
.map_err(|err| err.into_io_error())
}
}

View file

@ -8,6 +8,7 @@ use deno_core::v8;
use deno_core::JsRuntimeInspector;
use deno_core::OpState;
use deno_fs::FileSystemRc;
use deno_fs::V8MaybeStaticStr;
use deno_package_json::PackageJsonRc;
use deno_path_util::normalize_path;
use deno_path_util::url_from_file_path;
@ -477,11 +478,11 @@ where
}
#[op2(stack_trace)]
#[string]
#[to_v8]
pub fn op_require_read_file<P>(
state: &mut OpState,
#[string] file_path: String,
) -> Result<String, RequireError>
) -> Result<V8MaybeStaticStr, RequireError>
where
P: NodePermissions + 'static,
{
@ -492,6 +493,7 @@ where
let loader = state.borrow::<NodeRequireLoaderRc>();
loader
.load_text_file_lossy(&file_path)
.map(V8MaybeStaticStr)
.map_err(|e| RequireErrorKind::ReadModule(e).into_box())
}

View file

@ -1,5 +1,6 @@
// Copyright 2018-2024 the Deno authors. All rights reserved. MIT license.
use std::borrow::Cow;
use std::path::Path;
use std::path::PathBuf;
@ -10,7 +11,10 @@ pub struct DirEntry {
}
pub trait DenoResolverFs {
fn read_to_string_lossy(&self, path: &Path) -> std::io::Result<String>;
fn read_to_string_lossy(
&self,
path: &Path,
) -> std::io::Result<Cow<'static, str>>;
fn realpath_sync(&self, path: &Path) -> std::io::Result<PathBuf>;
fn exists_sync(&self, path: &Path) -> bool;
fn is_dir_sync(&self, path: &Path) -> bool;