From 9aa02769c8bab25102afa07366434b59e9eec715 Mon Sep 17 00:00:00 2001 From: David Sherret Date: Sun, 19 Jan 2025 14:23:07 -0500 Subject: [PATCH] perf(compile): remove swc from denort (#27721) This is achieved by storing CJS export analysis ahead of time in the executable, which should also improve the performance of `denort` by this never being done anymore (I'm too lazy atm to bench this, but it will be significant for some programs). --- Cargo.lock | 12 +- Cargo.toml | 2 +- cli/factory.rs | 24 +- cli/lib/standalone/binary.rs | 308 +++++++++++++++++++++- cli/lib/standalone/virtual_fs.rs | 368 +++++++++++++++++++++----- cli/rt/Cargo.toml | 9 +- cli/rt/binary.rs | 435 +++++++++++++++---------------- cli/rt/file_system.rs | 79 +++--- cli/rt/node.rs | 94 ++++--- cli/rt/run.rs | 47 ++-- cli/snapshot/Cargo.toml | 2 +- cli/standalone/binary.rs | 425 +++++++++++++++++++++++------- cli/standalone/mod.rs | 1 - cli/standalone/serialization.rs | 237 ----------------- cli/standalone/virtual_fs.rs | 9 +- resolvers/deno/cjs.rs | 22 +- resolvers/node/Cargo.toml | 1 + resolvers/node/analyze.rs | 4 +- runtime/Cargo.toml | 36 +-- runtime/lib.rs | 1 + runtime/shared.rs | 76 ------ runtime/snapshot.rs | 75 +++++- runtime/web_worker.rs | 5 +- runtime/worker.rs | 5 +- 24 files changed, 1368 insertions(+), 909 deletions(-) delete mode 100644 cli/standalone/serialization.rs diff --git a/Cargo.lock b/Cargo.lock index 5bd6870c18..7a6108b2e7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1960,11 +1960,12 @@ dependencies = [ [[package]] name = "deno_media_type" -version = "0.2.3" +version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a417f8bd3f1074185c4c8ccb6ea6261ae173781596cc358e68ad07aaac11009d" +checksum = "577fe2bbe04f3e9b1b7c6fac6a75101a9fbd611c50a6b68789e69f4d63dcb2b4" dependencies = [ "data-url", + "encoding_rs", "serde", "url", ] @@ -2341,7 +2342,6 @@ dependencies = [ "dlopen2", "encoding_rs", "fastwebsockets", - "flate2", "http 1.1.0", "http-body-util", "hyper 0.14.28", @@ -2682,12 +2682,11 @@ name = "denort" version = "2.1.5" dependencies = [ "async-trait", - "deno_ast", + "bincode", "deno_cache_dir", "deno_config", "deno_core", "deno_error", - "deno_graph", "deno_lib", "deno_media_type", "deno_npm", @@ -2705,8 +2704,10 @@ dependencies = [ "node_resolver", "pretty_assertions", "serde", + "serde_json", "sys_traits", "test_server", + "thiserror 2.0.3", "tokio", "tokio-util", "twox-hash", @@ -5246,6 +5247,7 @@ dependencies = [ "once_cell", "path-clean", "regex", + "serde", "serde_json", "sys_traits", "thiserror 2.0.3", diff --git a/Cargo.toml b/Cargo.toml index d1e96fe0ad..4ee2abe993 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -56,7 +56,7 @@ deno_core = { version = "0.331.0" } deno_bench_util = { version = "0.180.0", path = "./bench_util" } deno_config = { version = "=0.45.0", features = ["workspace", "sync"] } deno_lockfile = "=0.24.0" -deno_media_type = { version = "0.2.3", features = ["module_specifier"] } +deno_media_type = { version = "0.2.4", features = ["module_specifier"] } deno_npm = "=0.27.2" deno_path_util = "=0.3.0" deno_permissions = { version = "0.45.0", path = "./runtime/permissions" } diff --git a/cli/factory.rs b/cli/factory.rs index 3f74d71a50..91c5d07b75 100644 --- a/cli/factory.rs +++ b/cli/factory.rs @@ -813,19 +813,11 @@ impl CliFactory { .services .node_code_translator .get_or_try_init_async(async { - let caches = self.caches()?; - let node_analysis_cache = - NodeAnalysisCache::new(caches.node_analysis_db()); let node_resolver = self.node_resolver().await?.clone(); - let cjs_esm_analyzer = CliCjsCodeAnalyzer::new( - node_analysis_cache, - self.cjs_tracker()?.clone(), - self.fs().clone(), - Some(self.parsed_source_cache().clone()), - ); + let cjs_code_analyzer = self.create_cjs_code_analyzer()?; Ok(Arc::new(NodeCodeTranslator::new( - cjs_esm_analyzer, + cjs_code_analyzer, self.in_npm_pkg_checker()?.clone(), node_resolver, self.npm_resolver().await?.clone(), @@ -836,6 +828,17 @@ impl CliFactory { .await } + fn create_cjs_code_analyzer(&self) -> Result { + let caches = self.caches()?; + let node_analysis_cache = NodeAnalysisCache::new(caches.node_analysis_db()); + Ok(CliCjsCodeAnalyzer::new( + node_analysis_cache, + self.cjs_tracker()?.clone(), + self.fs().clone(), + Some(self.parsed_source_cache().clone()), + )) + } + pub async fn npm_req_resolver( &self, ) -> Result<&Arc, AnyError> { @@ -1025,6 +1028,7 @@ impl CliFactory { ) -> Result { let cli_options = self.cli_options()?; Ok(DenoCompileBinaryWriter::new( + self.create_cjs_code_analyzer()?, self.cjs_tracker()?, self.cli_options()?, self.deno_dir()?, diff --git a/cli/lib/standalone/binary.rs b/cli/lib/standalone/binary.rs index eb158d414e..ae02197bf4 100644 --- a/cli/lib/standalone/binary.rs +++ b/cli/lib/standalone/binary.rs @@ -4,10 +4,12 @@ use std::borrow::Cow; use std::collections::BTreeMap; use deno_config::workspace::PackageJsonDepResolution; +use deno_media_type::MediaType; use deno_runtime::deno_permissions::PermissionsOptions; use deno_runtime::deno_telemetry::OtelConfig; use deno_semver::Version; use indexmap::IndexMap; +use node_resolver::analyze::CjsAnalysisExports; use serde::Deserialize; use serde::Serialize; use url::Url; @@ -17,6 +19,24 @@ use crate::args::UnstableConfig; pub const MAGIC_BYTES: &[u8; 8] = b"d3n0l4nd"; +pub trait DenoRtDeserializable<'a>: Sized { + fn deserialize(input: &'a [u8]) -> std::io::Result<(&'a [u8], Self)>; +} + +impl<'a> DenoRtDeserializable<'a> for Cow<'a, [u8]> { + fn deserialize(input: &'a [u8]) -> std::io::Result<(&'a [u8], Self)> { + let (input, data) = read_bytes_with_u32_len(input)?; + Ok((input, Cow::Borrowed(data))) + } +} + +pub trait DenoRtSerializable<'a> { + fn serialize( + &'a self, + builder: &mut capacity_builder::BytesBuilder<'a, Vec>, + ); +} + #[derive(Deserialize, Serialize)] pub enum NodeModules { Managed { @@ -73,19 +93,208 @@ pub struct Metadata { pub vfs_case_sensitivity: FileSystemCaseSensitivity, } -pub struct SourceMapStore { - data: IndexMap, Cow<'static, [u8]>>, +#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] +pub struct SpecifierId(u32); + +impl SpecifierId { + pub fn new(id: u32) -> Self { + Self(id) + } } -impl SourceMapStore { +impl<'a> capacity_builder::BytesAppendable<'a> for SpecifierId { + fn append_to_builder( + self, + builder: &mut capacity_builder::BytesBuilder<'a, TBytes>, + ) { + builder.append_le(self.0); + } +} + +impl<'a> DenoRtSerializable<'a> for SpecifierId { + fn serialize( + &'a self, + builder: &mut capacity_builder::BytesBuilder<'a, Vec>, + ) { + builder.append_le(self.0); + } +} + +impl<'a> DenoRtDeserializable<'a> for SpecifierId { + fn deserialize(input: &'a [u8]) -> std::io::Result<(&'a [u8], Self)> { + let (input, id) = read_u32(input)?; + Ok((input, Self(id))) + } +} + +#[derive(Deserialize, Serialize)] +pub enum CjsExportAnalysisEntry { + Esm, + Cjs(CjsAnalysisExports), +} + +const HAS_TRANSPILED_FLAG: u8 = 1 << 0; +const HAS_SOURCE_MAP_FLAG: u8 = 1 << 1; +const HAS_CJS_EXPORT_ANALYSIS_FLAG: u8 = 1 << 2; + +pub struct RemoteModuleEntry<'a> { + pub media_type: MediaType, + pub data: Cow<'a, [u8]>, + pub maybe_transpiled: Option>, + pub maybe_source_map: Option>, + pub maybe_cjs_export_analysis: Option>, +} + +impl<'a> DenoRtSerializable<'a> for RemoteModuleEntry<'a> { + fn serialize( + &'a self, + builder: &mut capacity_builder::BytesBuilder<'a, Vec>, + ) { + fn append_maybe_data<'a>( + builder: &mut capacity_builder::BytesBuilder<'a, Vec>, + maybe_data: Option<&'a [u8]>, + ) { + if let Some(data) = maybe_data { + builder.append_le(data.len() as u32); + builder.append(data); + } + } + + let mut has_data_flags = 0; + if self.maybe_transpiled.is_some() { + has_data_flags |= HAS_TRANSPILED_FLAG; + } + if self.maybe_source_map.is_some() { + has_data_flags |= HAS_SOURCE_MAP_FLAG; + } + if self.maybe_cjs_export_analysis.is_some() { + has_data_flags |= HAS_CJS_EXPORT_ANALYSIS_FLAG; + } + builder.append(serialize_media_type(self.media_type)); + builder.append_le(self.data.len() as u32); + builder.append(self.data.as_ref()); + builder.append(has_data_flags); + append_maybe_data(builder, self.maybe_transpiled.as_deref()); + append_maybe_data(builder, self.maybe_source_map.as_deref()); + append_maybe_data(builder, self.maybe_cjs_export_analysis.as_deref()); + } +} + +impl<'a> DenoRtDeserializable<'a> for RemoteModuleEntry<'a> { + fn deserialize(input: &'a [u8]) -> std::io::Result<(&'a [u8], Self)> { + #[allow(clippy::type_complexity)] + fn deserialize_data_if_has_flag( + input: &[u8], + has_data_flags: u8, + flag: u8, + ) -> std::io::Result<(&[u8], Option>)> { + if has_data_flags & flag != 0 { + let (input, bytes) = read_bytes_with_u32_len(input)?; + Ok((input, Some(Cow::Borrowed(bytes)))) + } else { + Ok((input, None)) + } + } + + let (input, media_type) = MediaType::deserialize(input)?; + let (input, data) = read_bytes_with_u32_len(input)?; + let (input, has_data_flags) = read_u8(input)?; + let (input, maybe_transpiled) = + deserialize_data_if_has_flag(input, has_data_flags, HAS_TRANSPILED_FLAG)?; + let (input, maybe_source_map) = + deserialize_data_if_has_flag(input, has_data_flags, HAS_SOURCE_MAP_FLAG)?; + let (input, maybe_cjs_export_analysis) = deserialize_data_if_has_flag( + input, + has_data_flags, + HAS_CJS_EXPORT_ANALYSIS_FLAG, + )?; + Ok(( + input, + Self { + media_type, + data: Cow::Borrowed(data), + maybe_transpiled, + maybe_source_map, + maybe_cjs_export_analysis, + }, + )) + } +} + +fn serialize_media_type(media_type: MediaType) -> u8 { + match media_type { + MediaType::JavaScript => 0, + MediaType::Jsx => 1, + MediaType::Mjs => 2, + MediaType::Cjs => 3, + MediaType::TypeScript => 4, + MediaType::Mts => 5, + MediaType::Cts => 6, + MediaType::Dts => 7, + MediaType::Dmts => 8, + MediaType::Dcts => 9, + MediaType::Tsx => 10, + MediaType::Json => 11, + MediaType::Wasm => 12, + MediaType::Css => 13, + MediaType::SourceMap => 14, + MediaType::Unknown => 15, + } +} + +impl<'a> DenoRtDeserializable<'a> for MediaType { + fn deserialize(input: &'a [u8]) -> std::io::Result<(&'a [u8], Self)> { + let (input, value) = read_u8(input)?; + let value = match value { + 0 => MediaType::JavaScript, + 1 => MediaType::Jsx, + 2 => MediaType::Mjs, + 3 => MediaType::Cjs, + 4 => MediaType::TypeScript, + 5 => MediaType::Mts, + 6 => MediaType::Cts, + 7 => MediaType::Dts, + 8 => MediaType::Dmts, + 9 => MediaType::Dcts, + 10 => MediaType::Tsx, + 11 => MediaType::Json, + 12 => MediaType::Wasm, + 13 => MediaType::Css, + 14 => MediaType::SourceMap, + 15 => MediaType::Unknown, + value => { + return Err(std::io::Error::new( + std::io::ErrorKind::InvalidData, + format!("Unknown media type value: {value}"), + )) + } + }; + Ok((input, value)) + } +} + +/// Data stored keyed by specifier. +pub struct SpecifierDataStore { + data: IndexMap, +} + +impl Default for SpecifierDataStore { + fn default() -> Self { + Self { + data: IndexMap::new(), + } + } +} + +impl SpecifierDataStore { pub fn with_capacity(capacity: usize) -> Self { Self { data: IndexMap::with_capacity(capacity), } } - pub fn iter(&self) -> impl Iterator { - self.data.iter().map(|(k, v)| (k.as_ref(), v.as_ref())) + pub fn iter(&self) -> impl Iterator { + self.data.iter().map(|(k, v)| (*k, v)) } #[allow(clippy::len_without_is_empty)] @@ -93,15 +302,88 @@ impl SourceMapStore { self.data.len() } - pub fn add( - &mut self, - specifier: Cow<'static, str>, - source_map: Cow<'static, [u8]>, - ) { - self.data.insert(specifier, source_map); + pub fn contains(&self, specifier: SpecifierId) -> bool { + self.data.contains_key(&specifier) } - pub fn get(&self, specifier: &str) -> Option<&[u8]> { - self.data.get(specifier).map(|v| v.as_ref()) + pub fn add(&mut self, specifier: SpecifierId, value: TData) { + self.data.insert(specifier, value); + } + + pub fn get(&self, specifier: SpecifierId) -> Option<&TData> { + self.data.get(&specifier) + } +} + +impl<'a, TData> SpecifierDataStore +where + TData: DenoRtSerializable<'a> + 'a, +{ + pub fn serialize( + &'a self, + builder: &mut capacity_builder::BytesBuilder<'a, Vec>, + ) { + builder.append_le(self.len() as u32); + for (specifier, value) in self.iter() { + builder.append(specifier); + value.serialize(builder); + } + } +} + +impl<'a, TData> DenoRtDeserializable<'a> for SpecifierDataStore +where + TData: DenoRtDeserializable<'a>, +{ + fn deserialize(input: &'a [u8]) -> std::io::Result<(&'a [u8], Self)> { + let (input, len) = read_u32_as_usize(input)?; + let mut data = IndexMap::with_capacity(len); + let mut input = input; + for _ in 0..len { + let (new_input, specifier) = SpecifierId::deserialize(input)?; + let (new_input, value) = TData::deserialize(new_input)?; + data.insert(specifier, value); + input = new_input; + } + Ok((input, Self { data })) + } +} + +fn read_bytes_with_u32_len(input: &[u8]) -> std::io::Result<(&[u8], &[u8])> { + let (input, len) = read_u32_as_usize(input)?; + let (input, data) = read_bytes(input, len)?; + Ok((input, data)) +} + +fn read_u32_as_usize(input: &[u8]) -> std::io::Result<(&[u8], usize)> { + read_u32(input).map(|(input, len)| (input, len as usize)) +} + +fn read_u32(input: &[u8]) -> std::io::Result<(&[u8], u32)> { + let (input, len_bytes) = read_bytes(input, 4)?; + let len = u32::from_le_bytes(len_bytes.try_into().unwrap()); + Ok((input, len)) +} + +fn read_u8(input: &[u8]) -> std::io::Result<(&[u8], u8)> { + check_has_len(input, 1)?; + Ok((&input[1..], input[0])) +} + +fn read_bytes(input: &[u8], len: usize) -> std::io::Result<(&[u8], &[u8])> { + check_has_len(input, len)?; + let (len_bytes, input) = input.split_at(len); + Ok((input, len_bytes)) +} + +#[inline(always)] +fn check_has_len(input: &[u8], len: usize) -> std::io::Result<()> { + if input.len() < len { + Err(std::io::Error::new( + std::io::ErrorKind::InvalidData, + "Unexpected end of data", + )) + } else { + Ok(()) } } diff --git a/cli/lib/standalone/virtual_fs.rs b/cli/lib/standalone/virtual_fs.rs index 5e11491349..124c2a0002 100644 --- a/cli/lib/standalone/virtual_fs.rs +++ b/cli/lib/standalone/virtual_fs.rs @@ -1,7 +1,10 @@ // Copyright 2018-2025 the Deno authors. MIT license. use std::cmp::Ordering; +use std::collections::hash_map::Entry; use std::collections::HashMap; +use std::collections::VecDeque; +use std::fmt; use std::path::Path; use std::path::PathBuf; @@ -12,17 +15,13 @@ use deno_runtime::deno_core::anyhow::bail; use deno_runtime::deno_core::anyhow::Context; use deno_runtime::deno_core::error::AnyError; use indexmap::IndexSet; +use serde::de; +use serde::de::SeqAccess; +use serde::de::Visitor; use serde::Deserialize; +use serde::Deserializer; use serde::Serialize; - -#[derive(Debug, Copy, Clone)] -pub enum VfsFileSubDataKind { - /// Raw bytes of the file. - Raw, - /// Bytes to use for module loading. For example, for TypeScript - /// files this will be the transpiled JavaScript source. - ModuleGraph, -} +use serde::Serializer; #[derive(Debug, PartialEq, Eq)] pub enum WindowsSystemRootablePath { @@ -32,6 +31,14 @@ pub enum WindowsSystemRootablePath { } impl WindowsSystemRootablePath { + pub fn root_for_current_os() -> Self { + if cfg!(windows) { + WindowsSystemRootablePath::WindowSystemRoot + } else { + WindowsSystemRootablePath::Path(PathBuf::from("/")) + } + } + pub fn join(&self, name_component: &str) -> PathBuf { // this method doesn't handle multiple components debug_assert!( @@ -118,6 +125,10 @@ impl VirtualDirectoryEntries { self.0.get_mut(index) } + pub fn get_by_index(&self, index: usize) -> Option<&VfsEntry> { + self.0.get(index) + } + pub fn binary_search( &self, name: &str, @@ -188,27 +199,67 @@ pub struct VirtualDirectory { pub entries: VirtualDirectoryEntries, } -#[derive(Debug, Clone, Copy, Serialize, Deserialize)] +#[derive(Debug, Clone, Copy)] pub struct OffsetWithLength { - #[serde(rename = "o")] pub offset: u64, - #[serde(rename = "l")] pub len: u64, } +// serialize as an array in order to save space +impl Serialize for OffsetWithLength { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + let array = [self.offset, self.len]; + array.serialize(serializer) + } +} + +impl<'de> Deserialize<'de> for OffsetWithLength { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + struct OffsetWithLengthVisitor; + + impl<'de> Visitor<'de> for OffsetWithLengthVisitor { + type Value = OffsetWithLength; + + fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + formatter.write_str("an array with two elements: [offset, len]") + } + + fn visit_seq(self, mut seq: A) -> Result + where + A: SeqAccess<'de>, + { + let offset = seq + .next_element()? + .ok_or_else(|| de::Error::invalid_length(0, &self))?; + let len = seq + .next_element()? + .ok_or_else(|| de::Error::invalid_length(1, &self))?; + Ok(OffsetWithLength { offset, len }) + } + } + + deserializer.deserialize_seq(OffsetWithLengthVisitor) + } +} + #[derive(Debug, Clone, Serialize, Deserialize)] pub struct VirtualFile { #[serde(rename = "n")] pub name: String, #[serde(rename = "o")] pub offset: OffsetWithLength, - /// Offset file to use for module loading when it differs from the - /// raw file. Often this will be the same offset as above for data - /// such as JavaScript files, but for TypeScript files the `offset` - /// will be the original raw bytes when included as an asset and this - /// offset will be to the transpiled JavaScript source. - #[serde(rename = "m")] - pub module_graph_offset: OffsetWithLength, + #[serde(rename = "m", skip_serializing_if = "Option::is_none")] + pub transpiled_offset: Option, + #[serde(rename = "c", skip_serializing_if = "Option::is_none")] + pub cjs_export_analysis_offset: Option, + #[serde(rename = "s", skip_serializing_if = "Option::is_none")] + pub source_map_offset: Option, } #[derive(Debug, Serialize, Deserialize)] @@ -314,12 +365,82 @@ pub struct BuiltVfs { pub files: Vec>, } +#[derive(Debug, Default)] +struct FilesData { + files: Vec>, + current_offset: u64, + file_offsets: HashMap<(String, usize), OffsetWithLength>, +} + +impl FilesData { + pub fn file_bytes(&self, offset: OffsetWithLength) -> Option<&[u8]> { + if offset.len == 0 { + return Some(&[]); + } + + // the debug assertions in this method should never happen + // because it would indicate providing an offset not in the vfs + let mut count: u64 = 0; + for file in &self.files { + // clippy wanted a match + match count.cmp(&offset.offset) { + Ordering::Equal => { + debug_assert_eq!(offset.len, file.len() as u64); + if offset.len == file.len() as u64 { + return Some(file); + } else { + return None; + } + } + Ordering::Less => { + count += file.len() as u64; + } + Ordering::Greater => { + debug_assert!(false); + return None; + } + } + } + debug_assert!(false); + None + } + + pub fn add_data(&mut self, data: Vec) -> OffsetWithLength { + if data.is_empty() { + return OffsetWithLength { offset: 0, len: 0 }; + } + let checksum = crate::util::checksum::gen(&[&data]); + match self.file_offsets.entry((checksum, data.len())) { + Entry::Occupied(occupied_entry) => { + let offset_and_len = *occupied_entry.get(); + debug_assert_eq!(data.len() as u64, offset_and_len.len); + offset_and_len + } + Entry::Vacant(vacant_entry) => { + let offset_and_len = OffsetWithLength { + offset: self.current_offset, + len: data.len() as u64, + }; + vacant_entry.insert(offset_and_len); + self.current_offset += offset_and_len.len; + self.files.push(data); + offset_and_len + } + } + } +} + +pub struct AddFileDataOptions { + pub data: Vec, + pub maybe_transpiled: Option>, + pub maybe_source_map: Option>, + pub maybe_cjs_export_analysis: Option>, +} + #[derive(Debug)] pub struct VfsBuilder { executable_root: VirtualDirectory, - files: Vec>, - current_offset: u64, - file_offsets: HashMap, + files: FilesData, /// The minimum root directory that should be included in the VFS. min_root_dir: Option, case_sensitivity: FileSystemCaseSensitivity, @@ -338,9 +459,7 @@ impl VfsBuilder { name: "/".to_string(), entries: Default::default(), }, - files: Vec::new(), - current_offset: 0, - file_offsets: Default::default(), + files: Default::default(), min_root_dir: Default::default(), // This is not exactly correct because file systems on these OSes // may be case-sensitive or not based on the directory, but this @@ -360,7 +479,11 @@ impl VfsBuilder { } pub fn files_len(&self) -> usize { - self.files.len() + self.files.files.len() + } + + pub fn file_bytes(&self, offset: OffsetWithLength) -> Option<&[u8]> { + self.files.file_bytes(offset) } /// Add a directory that might be the minimum root directory @@ -387,13 +510,8 @@ impl VfsBuilder { common_components.push(a); } if common_components.is_empty() { - if cfg!(windows) { - self.min_root_dir = - Some(WindowsSystemRootablePath::WindowSystemRoot); - } else { - self.min_root_dir = - Some(WindowsSystemRootablePath::Path(PathBuf::from("/"))); - } + self.min_root_dir = + Some(WindowsSystemRootablePath::root_for_current_os()); } else { self.min_root_dir = Some(WindowsSystemRootablePath::Path( common_components.iter().collect(), @@ -513,7 +631,7 @@ impl VfsBuilder { VfsEntry::Dir(dir) => { current_dir = dir; } - _ => unreachable!(), + _ => unreachable!("{}", path.display()), }; } @@ -525,7 +643,15 @@ impl VfsBuilder { #[allow(clippy::disallowed_methods)] let file_bytes = std::fs::read(path) .with_context(|| format!("Reading {}", path.display()))?; - self.add_file_with_data(path, file_bytes, VfsFileSubDataKind::Raw) + self.add_file_with_data( + path, + AddFileDataOptions { + data: file_bytes, + maybe_cjs_export_analysis: None, + maybe_transpiled: None, + maybe_source_map: None, + }, + ) } fn add_file_at_path_not_symlink( @@ -536,14 +662,13 @@ impl VfsBuilder { #[allow(clippy::disallowed_methods)] let file_bytes = std::fs::read(path) .with_context(|| format!("Reading {}", path.display()))?; - self.add_file_with_data_raw(path, file_bytes, VfsFileSubDataKind::Raw) + self.add_file_with_data_raw(path, file_bytes) } pub fn add_file_with_data( &mut self, path: &Path, - data: Vec, - sub_data_kind: VfsFileSubDataKind, + options: AddFileDataOptions, ) -> Result<(), AnyError> { // ok, fs implementation #[allow(clippy::disallowed_methods)] @@ -552,9 +677,9 @@ impl VfsBuilder { })?; if metadata.is_symlink() { let target = self.add_symlink(path)?.into_path_buf(); - self.add_file_with_data_raw(&target, data, sub_data_kind) + self.add_file_with_data_raw_options(&target, options) } else { - self.add_file_with_data_raw(path, data, sub_data_kind) + self.add_file_with_data_raw_options(path, options) } } @@ -562,25 +687,39 @@ impl VfsBuilder { &mut self, path: &Path, data: Vec, - sub_data_kind: VfsFileSubDataKind, + ) -> Result<(), AnyError> { + self.add_file_with_data_raw_options( + path, + AddFileDataOptions { + data, + maybe_transpiled: None, + maybe_cjs_export_analysis: None, + maybe_source_map: None, + }, + ) + } + + fn add_file_with_data_raw_options( + &mut self, + path: &Path, + options: AddFileDataOptions, ) -> Result<(), AnyError> { log::debug!("Adding file '{}'", path.display()); - let checksum = crate::util::checksum::gen(&[&data]); let case_sensitivity = self.case_sensitivity; - let offset = if let Some(offset) = self.file_offsets.get(&checksum) { - // duplicate file, reuse an old offset - *offset - } else { - self.file_offsets.insert(checksum, self.current_offset); - self.current_offset - }; + let offset_and_len = self.files.add_data(options.data); + let transpiled_offset = options + .maybe_transpiled + .map(|data| self.files.add_data(data)); + let source_map_offset = options + .maybe_source_map + .map(|data| self.files.add_data(data)); + let cjs_export_analysis_offset = options + .maybe_cjs_export_analysis + .map(|data| self.files.add_data(data)); let dir = self.add_dir_raw(path.parent().unwrap()); let name = path.file_name().unwrap().to_string_lossy(); - let offset_and_len = OffsetWithLength { - offset, - len: data.len() as u64, - }; + dir.entries.insert_or_modify( &name, case_sensitivity, @@ -588,28 +727,30 @@ impl VfsBuilder { VfsEntry::File(VirtualFile { name: name.to_string(), offset: offset_and_len, - module_graph_offset: offset_and_len, + transpiled_offset, + cjs_export_analysis_offset, + source_map_offset, }) }, |entry| match entry { - VfsEntry::File(virtual_file) => match sub_data_kind { - VfsFileSubDataKind::Raw => { - virtual_file.offset = offset_and_len; + VfsEntry::File(virtual_file) => { + virtual_file.offset = offset_and_len; + // doesn't overwrite to None + if transpiled_offset.is_some() { + virtual_file.transpiled_offset = transpiled_offset; } - VfsFileSubDataKind::ModuleGraph => { - virtual_file.module_graph_offset = offset_and_len; + if source_map_offset.is_some() { + virtual_file.source_map_offset = source_map_offset; } - }, + if cjs_export_analysis_offset.is_some() { + virtual_file.cjs_export_analysis_offset = + cjs_export_analysis_offset; + } + } VfsEntry::Dir(_) | VfsEntry::Symlink(_) => unreachable!(), }, ); - // new file, update the list of files - if self.current_offset == offset { - self.files.push(data); - self.current_offset += offset_and_len.len; - } - Ok(()) } @@ -689,6 +830,53 @@ impl VfsBuilder { } } + /// Adds the CJS export analysis to the provided file. + /// + /// Warning: This will panic if the file wasn't properly + /// setup before calling this. + pub fn add_cjs_export_analysis(&mut self, path: &Path, data: Vec) { + self.add_data_for_file_or_panic(path, data, |file, offset_with_length| { + file.cjs_export_analysis_offset = Some(offset_with_length); + }) + } + + fn add_data_for_file_or_panic( + &mut self, + path: &Path, + data: Vec, + update_file: impl FnOnce(&mut VirtualFile, OffsetWithLength), + ) { + let offset_with_length = self.files.add_data(data); + let case_sensitivity = self.case_sensitivity; + let dir = self.get_dir_mut(path.parent().unwrap()).unwrap(); + let name = path.file_name().unwrap().to_string_lossy(); + let file = dir + .entries + .get_mut_by_name(&name, case_sensitivity) + .unwrap(); + match file { + VfsEntry::File(virtual_file) => { + update_file(virtual_file, offset_with_length); + } + VfsEntry::Dir(_) | VfsEntry::Symlink(_) => { + unreachable!() + } + } + } + + /// Iterates through all the files in the virtual file system. + pub fn iter_files( + &self, + ) -> impl Iterator + '_ { + FileIterator { + pending_dirs: VecDeque::from([( + WindowsSystemRootablePath::root_for_current_os(), + &self.executable_root, + )]), + current_dir_index: 0, + } + } + pub fn build(self) -> BuiltVfs { fn strip_prefix_from_symlinks( dir: &mut VirtualDirectory, @@ -714,11 +902,7 @@ impl VfsBuilder { } let mut current_dir = self.executable_root; - let mut current_path = if cfg!(windows) { - WindowsSystemRootablePath::WindowSystemRoot - } else { - WindowsSystemRootablePath::Path(PathBuf::from("/")) - }; + let mut current_path = WindowsSystemRootablePath::root_for_current_os(); loop { if current_dir.entries.len() != 1 { break; @@ -754,11 +938,51 @@ impl VfsBuilder { root_path: current_path, case_sensitivity: self.case_sensitivity, entries: current_dir.entries, - files: self.files, + files: self.files.files, } } } +struct FileIterator<'a> { + pending_dirs: VecDeque<(WindowsSystemRootablePath, &'a VirtualDirectory)>, + current_dir_index: usize, +} + +impl<'a> Iterator for FileIterator<'a> { + type Item = (PathBuf, &'a VirtualFile); + + fn next(&mut self) -> Option { + while !self.pending_dirs.is_empty() { + let (dir_path, current_dir) = self.pending_dirs.front()?; + if let Some(entry) = + current_dir.entries.get_by_index(self.current_dir_index) + { + self.current_dir_index += 1; + match entry { + VfsEntry::Dir(virtual_directory) => { + self.pending_dirs.push_back(( + WindowsSystemRootablePath::Path( + dir_path.join(&virtual_directory.name), + ), + virtual_directory, + )); + } + VfsEntry::File(virtual_file) => { + return Some((dir_path.join(&virtual_file.name), virtual_file)); + } + VfsEntry::Symlink(_) => { + // ignore + } + } + } else { + self.pending_dirs.pop_front(); + self.current_dir_index = 0; + } + } + None + } +} + #[derive(Debug)] pub enum SymlinkTarget { File(PathBuf), diff --git a/cli/rt/Cargo.toml b/cli/rt/Cargo.toml index 63eaba29c4..c10a1640c4 100644 --- a/cli/rt/Cargo.toml +++ b/cli/rt/Cargo.toml @@ -26,16 +26,12 @@ deno_runtime = { workspace = true, features = ["include_js_files_for_snapshottin deno_core = { workspace = true, features = ["include_js_files_for_snapshotting"] } [dependencies] -deno_ast = { workspace = true, features = ["bundler", "cjs", "codegen", "proposal", "react", "sourcemap", "transforms", "typescript", "view", "visit"] } -# todo(dsherret): remove deno_cache_dir deno_cache_dir.workspace = true deno_config.workspace = true deno_core = { workspace = true, features = ["include_js_files_for_snapshotting"] } deno_error.workspace = true -# todo(dsherret): remove deno_graph -deno_graph = { version = "=0.87.0" } deno_lib.workspace = true -deno_media_type.workspace = true +deno_media_type = { workspace = true, features = ["data_url", "decoding"] } deno_npm.workspace = true deno_package_json.workspace = true deno_path_util.workspace = true @@ -48,11 +44,14 @@ libsui = "0.5.0" node_resolver.workspace = true async-trait.workspace = true +bincode = "=1.3.3" import_map = { version = "=0.21.0", features = ["ext"] } indexmap.workspace = true log = { workspace = true, features = ["serde"] } serde.workspace = true +serde_json.workspace = true sys_traits = { workspace = true, features = ["getrandom", "filetime", "libc", "real", "strip_unc", "winapi"] } +thiserror.workspace = true tokio.workspace = true tokio-util.workspace = true twox-hash.workspace = true diff --git a/cli/rt/binary.rs b/cli/rt/binary.rs index 0c77892296..19aad257ca 100644 --- a/cli/rt/binary.rs +++ b/cli/rt/binary.rs @@ -16,11 +16,14 @@ use deno_core::url::Url; use deno_core::FastString; use deno_core::ModuleSourceCode; use deno_core::ModuleType; +use deno_error::JsError; use deno_error::JsErrorBox; +use deno_lib::standalone::binary::DenoRtDeserializable; use deno_lib::standalone::binary::Metadata; -use deno_lib::standalone::binary::SourceMapStore; +use deno_lib::standalone::binary::RemoteModuleEntry; +use deno_lib::standalone::binary::SpecifierDataStore; +use deno_lib::standalone::binary::SpecifierId; use deno_lib::standalone::binary::MAGIC_BYTES; -use deno_lib::standalone::virtual_fs::VfsFileSubDataKind; use deno_lib::standalone::virtual_fs::VirtualDirectory; use deno_lib::standalone::virtual_fs::VirtualDirectoryEntries; use deno_media_type::MediaType; @@ -33,16 +36,17 @@ use deno_runtime::deno_fs::RealFs; use deno_runtime::deno_io::fs::FsError; use deno_semver::package::PackageReq; use deno_semver::StackString; +use indexmap::IndexMap; +use thiserror::Error; use crate::file_system::FileBackedVfs; use crate::file_system::VfsRoot; pub struct StandaloneData { pub metadata: Metadata, - pub modules: StandaloneModules, + pub modules: Arc, pub npm_snapshot: Option, pub root_path: PathBuf, - pub source_maps: SourceMapStore, pub vfs: Arc, } @@ -58,18 +62,6 @@ pub fn extract_standalone( return Ok(None); }; - let DeserializedDataSection { - mut metadata, - npm_snapshot, - remote_modules, - source_maps, - vfs_root_entries, - vfs_files_data, - } = match deserialize_binary_data_section(data)? { - Some(data_section) => data_section, - None => return Ok(None), - }; - let root_path = { let maybe_current_exe = std::env::current_exe().ok(); let current_exe_name = maybe_current_exe @@ -80,6 +72,19 @@ pub fn extract_standalone( .unwrap_or_else(|| Cow::Borrowed("binary")); std::env::temp_dir().join(format!("deno-compile-{}", current_exe_name)) }; + let root_url = deno_path_util::url_from_directory_path(&root_path)?; + + let DeserializedDataSection { + mut metadata, + npm_snapshot, + modules_store: remote_modules, + vfs_root_entries, + vfs_files_data, + } = match deserialize_binary_data_section(&root_url, data)? { + Some(data_section) => data_section, + None => return Ok(None), + }; + let cli_args = cli_args.into_owned(); metadata.argv.reserve(cli_args.len() - 1); for arg in cli_args.into_iter().skip(1) { @@ -103,13 +108,12 @@ pub fn extract_standalone( }; Ok(Some(StandaloneData { metadata, - modules: StandaloneModules { - remote_modules, + modules: Arc::new(StandaloneModules { + modules: remote_modules, vfs: vfs.clone(), - }, + }), npm_snapshot, root_path, - source_maps, vfs, })) } @@ -117,13 +121,13 @@ pub fn extract_standalone( pub struct DeserializedDataSection { pub metadata: Metadata, pub npm_snapshot: Option, - pub remote_modules: RemoteModulesStore, - pub source_maps: SourceMapStore, + pub modules_store: RemoteModulesStore, pub vfs_root_entries: VirtualDirectoryEntries, pub vfs_files_data: &'static [u8], } pub fn deserialize_binary_data_section( + root_dir_url: &Url, data: &'static [u8], ) -> Result, AnyError> { fn read_magic_bytes(input: &[u8]) -> Result<(&[u8], bool), AnyError> { @@ -137,15 +141,6 @@ pub fn deserialize_binary_data_section( Ok((input, true)) } - #[allow(clippy::type_complexity)] - fn read_source_map_entry( - input: &[u8], - ) -> Result<(&[u8], (Cow, &[u8])), AnyError> { - let (input, specifier) = read_string_lossy(input)?; - let (input, source_map) = read_bytes_with_u32_len(input)?; - Ok((input, (specifier, source_map))) - } - let (input, found) = read_magic_bytes(data)?; if !found { return Ok(None); @@ -164,24 +159,24 @@ pub fn deserialize_binary_data_section( } else { Some(deserialize_npm_snapshot(data).context("deserializing npm snapshot")?) }; - // 3. Remote modules - let (input, remote_modules) = - RemoteModulesStore::build(input).context("deserializing remote modules")?; - // 4. VFS + // 3. Specifiers + let (input, specifiers_store) = + SpecifierStore::deserialize(root_dir_url, input) + .context("deserializing specifiers")?; + // 4. Redirects + let (input, redirects_store) = + SpecifierDataStore::::deserialize(input) + .context("deserializing redirects")?; + // 5. Remote modules + let (input, remote_modules_store) = + SpecifierDataStore::>::deserialize(input) + .context("deserializing remote modules")?; + // 6. VFS let (input, data) = read_bytes_with_u64_len(input).context("vfs")?; let vfs_root_entries: VirtualDirectoryEntries = serde_json::from_slice(data).context("deserializing vfs data")?; let (input, vfs_files_data) = read_bytes_with_u64_len(input).context("reading vfs files data")?; - // 5. Source maps - let (mut input, source_map_data_len) = read_u32_as_usize(input)?; - let mut source_maps = SourceMapStore::with_capacity(source_map_data_len); - for _ in 0..source_map_data_len { - let (current_input, (specifier, source_map)) = - read_source_map_entry(input)?; - input = current_input; - source_maps.add(specifier, Cow::Borrowed(source_map)); - } // finally ensure we read the magic bytes at the end let (_input, found) = read_magic_bytes(input)?; @@ -189,18 +184,69 @@ pub fn deserialize_binary_data_section( bail!("Could not find magic bytes at the end of the data."); } + let modules_store = RemoteModulesStore::new( + specifiers_store, + redirects_store, + remote_modules_store, + ); + Ok(Some(DeserializedDataSection { metadata, npm_snapshot, - remote_modules, - source_maps, + modules_store, vfs_root_entries, vfs_files_data, })) } +struct SpecifierStore { + data: IndexMap, SpecifierId>, + reverse: IndexMap>, +} + +impl SpecifierStore { + pub fn deserialize<'a>( + root_dir_url: &Url, + input: &'a [u8], + ) -> std::io::Result<(&'a [u8], Self)> { + let (input, len) = read_u32_as_usize(input)?; + let mut data = IndexMap::with_capacity(len); + let mut reverse = IndexMap::with_capacity(len); + let mut input = input; + for _ in 0..len { + let (new_input, specifier_str) = read_string_lossy(input)?; + let specifier = match Url::parse(&specifier_str) { + Ok(url) => url, + Err(err) => match root_dir_url.join(&specifier_str) { + Ok(url) => url, + Err(_) => { + return Err(std::io::Error::new( + std::io::ErrorKind::InvalidData, + err, + )); + } + }, + }; + let (new_input, id) = SpecifierId::deserialize(new_input)?; + let specifier = Arc::new(specifier); + data.insert(specifier.clone(), id); + reverse.insert(id, specifier); + input = new_input; + } + Ok((input, Self { data, reverse })) + } + + pub fn get_id(&self, specifier: &Url) -> Option { + self.data.get(specifier).cloned() + } + + pub fn get_specifier(&self, specifier_id: SpecifierId) -> Option<&Url> { + self.reverse.get(&specifier_id).map(|url| url.as_ref()) + } +} + pub struct StandaloneModules { - remote_modules: RemoteModulesStore, + modules: RemoteModulesStore, vfs: Arc, } @@ -208,11 +254,11 @@ impl StandaloneModules { pub fn resolve_specifier<'a>( &'a self, specifier: &'a Url, - ) -> Result, JsErrorBox> { + ) -> Result, TooManyRedirectsError> { if specifier.scheme() == "file" { Ok(Some(specifier)) } else { - self.remote_modules.resolve_specifier(specifier) + self.modules.resolve_specifier(specifier) } } @@ -223,41 +269,51 @@ impl StandaloneModules { pub fn read<'a>( &'a self, specifier: &'a Url, - kind: VfsFileSubDataKind, - ) -> Result>, AnyError> { + ) -> Result>, JsErrorBox> { if specifier.scheme() == "file" { - let path = deno_path_util::url_to_file_path(specifier)?; + let path = deno_path_util::url_to_file_path(specifier) + .map_err(JsErrorBox::from_err)?; + let mut transpiled = None; + let mut source_map = None; + let mut cjs_export_analysis = None; let bytes = match self.vfs.file_entry(&path) { - Ok(entry) => self.vfs.read_file_all(entry, kind)?, + Ok(entry) => { + let bytes = self + .vfs + .read_file_all(entry) + .map_err(JsErrorBox::from_err)?; + transpiled = entry + .transpiled_offset + .and_then(|t| self.vfs.read_file_offset_with_len(t).ok()); + source_map = entry + .source_map_offset + .and_then(|t| self.vfs.read_file_offset_with_len(t).ok()); + cjs_export_analysis = entry + .cjs_export_analysis_offset + .and_then(|t| self.vfs.read_file_offset_with_len(t).ok()); + bytes + } Err(err) if err.kind() == ErrorKind::NotFound => { match RealFs.read_file_sync(&path, None) { Ok(bytes) => bytes, Err(FsError::Io(err)) if err.kind() == ErrorKind::NotFound => { return Ok(None) } - Err(err) => return Err(err.into()), + Err(err) => return Err(JsErrorBox::from_err(err)), } } - Err(err) => return Err(err.into()), + Err(err) => return Err(JsErrorBox::from_err(err)), }; Ok(Some(DenoCompileModuleData { media_type: MediaType::from_specifier(specifier), specifier, data: bytes, + transpiled, + source_map, + cjs_export_analysis, })) } else { - self.remote_modules.read(specifier).map(|maybe_entry| { - maybe_entry.map(|entry| DenoCompileModuleData { - media_type: entry.media_type, - specifier: entry.specifier, - data: match kind { - VfsFileSubDataKind::Raw => entry.data, - VfsFileSubDataKind::ModuleGraph => { - entry.transpiled_data.unwrap_or(entry.data) - } - }, - }) - }) + self.modules.read(specifier).map_err(JsErrorBox::from_err) } } } @@ -266,6 +322,9 @@ pub struct DenoCompileModuleData<'a> { pub specifier: &'a Url, pub media_type: MediaType, pub data: Cow<'static, [u8]>, + pub transpiled: Option>, + pub source_map: Option>, + pub cjs_export_analysis: Option>, } impl<'a> DenoCompileModuleData<'a> { @@ -280,6 +339,7 @@ impl<'a> DenoCompileModuleData<'a> { } } + let data = self.transpiled.unwrap_or(self.data); let (media_type, source) = match self.media_type { MediaType::JavaScript | MediaType::Jsx @@ -291,18 +351,15 @@ impl<'a> DenoCompileModuleData<'a> { | MediaType::Dts | MediaType::Dmts | MediaType::Dcts - | MediaType::Tsx => { - (ModuleType::JavaScript, into_string_unsafe(self.data)) - } - MediaType::Json => (ModuleType::Json, into_string_unsafe(self.data)), + | MediaType::Tsx => (ModuleType::JavaScript, into_string_unsafe(data)), + MediaType::Json => (ModuleType::Json, into_string_unsafe(data)), MediaType::Wasm => { - (ModuleType::Wasm, DenoCompileModuleSource::Bytes(self.data)) + (ModuleType::Wasm, DenoCompileModuleSource::Bytes(data)) } // just assume javascript if we made it here - MediaType::Css | MediaType::SourceMap | MediaType::Unknown => ( - ModuleType::JavaScript, - DenoCompileModuleSource::Bytes(self.data), - ), + MediaType::Css | MediaType::SourceMap | MediaType::Unknown => { + (ModuleType::JavaScript, DenoCompileModuleSource::Bytes(data)) + } }; (self.specifier, media_type, source) } @@ -332,102 +389,53 @@ impl DenoCompileModuleSource { } } -pub struct RemoteModuleEntry<'a> { - pub specifier: &'a Url, - pub media_type: MediaType, - pub data: Cow<'static, [u8]>, - pub transpiled_data: Option>, -} - -enum RemoteModulesStoreSpecifierValue { - Data(usize), - Redirect(Url), -} +#[derive(Debug, Error, JsError)] +#[class(generic)] +#[error("Too many redirects resolving: {0}")] +pub struct TooManyRedirectsError(Url); pub struct RemoteModulesStore { - specifiers: HashMap, - files_data: &'static [u8], + specifiers: SpecifierStore, + redirects: SpecifierDataStore, + remote_modules: SpecifierDataStore>, } impl RemoteModulesStore { - fn build(input: &'static [u8]) -> Result<(&'static [u8], Self), AnyError> { - fn read_specifier(input: &[u8]) -> Result<(&[u8], (Url, u64)), AnyError> { - let (input, specifier) = read_string_lossy(input)?; - let specifier = Url::parse(&specifier)?; - let (input, offset) = read_u64(input)?; - Ok((input, (specifier, offset))) + fn new( + specifiers: SpecifierStore, + redirects: SpecifierDataStore, + remote_modules: SpecifierDataStore>, + ) -> Self { + Self { + specifiers, + redirects, + remote_modules, } - - fn read_redirect(input: &[u8]) -> Result<(&[u8], (Url, Url)), AnyError> { - let (input, from) = read_string_lossy(input)?; - let from = Url::parse(&from)?; - let (input, to) = read_string_lossy(input)?; - let to = Url::parse(&to)?; - Ok((input, (from, to))) - } - - fn read_headers( - input: &[u8], - ) -> Result<(&[u8], HashMap), AnyError> - { - let (input, specifiers_len) = read_u32_as_usize(input)?; - let (mut input, redirects_len) = read_u32_as_usize(input)?; - let mut specifiers = - HashMap::with_capacity(specifiers_len + redirects_len); - for _ in 0..specifiers_len { - let (current_input, (specifier, offset)) = - read_specifier(input).context("reading specifier")?; - input = current_input; - specifiers.insert( - specifier, - RemoteModulesStoreSpecifierValue::Data(offset as usize), - ); - } - - for _ in 0..redirects_len { - let (current_input, (from, to)) = read_redirect(input)?; - input = current_input; - specifiers.insert(from, RemoteModulesStoreSpecifierValue::Redirect(to)); - } - - Ok((input, specifiers)) - } - - let (input, specifiers) = read_headers(input)?; - let (input, files_data) = read_bytes_with_u64_len(input)?; - - Ok(( - input, - Self { - specifiers, - files_data, - }, - )) } pub fn resolve_specifier<'a>( &'a self, specifier: &'a Url, - ) -> Result, JsErrorBox> { + ) -> Result, TooManyRedirectsError> { + let Some(mut current) = self.specifiers.get_id(specifier) else { + return Ok(None); + }; let mut count = 0; - let mut current = specifier; loop { if count > 10 { - return Err(JsErrorBox::generic(format!( - "Too many redirects resolving '{}'", - specifier - ))); + return Err(TooManyRedirectsError(specifier.clone())); } - match self.specifiers.get(current) { - Some(RemoteModulesStoreSpecifierValue::Redirect(to)) => { - current = to; + match self.redirects.get(current) { + Some(to) => { + current = *to; count += 1; } - Some(RemoteModulesStoreSpecifierValue::Data(_)) => { - return Ok(Some(current)); - } None => { - return Ok(None); + if count == 0 { + return Ok(Some(specifier)); + } else { + return Ok(self.specifiers.get_specifier(current)); + } } } } @@ -436,45 +444,52 @@ impl RemoteModulesStore { pub fn read<'a>( &'a self, original_specifier: &'a Url, - ) -> Result>, AnyError> { + ) -> Result>, TooManyRedirectsError> { + #[allow(clippy::ptr_arg)] + fn handle_cow_ref(data: &Cow<'static, [u8]>) -> Cow<'static, [u8]> { + match data { + Cow::Borrowed(data) => Cow::Borrowed(data), + Cow::Owned(data) => { + // this variant should never happen because the data + // should always be borrowed static in denort + debug_assert!(false); + Cow::Owned(data.clone()) + } + } + } + let mut count = 0; - let mut specifier = original_specifier; + let Some(mut specifier) = self.specifiers.get_id(original_specifier) else { + return Ok(None); + }; loop { if count > 10 { - bail!("Too many redirects resolving '{}'", original_specifier); + return Err(TooManyRedirectsError(original_specifier.clone())); } - match self.specifiers.get(specifier) { - Some(RemoteModulesStoreSpecifierValue::Redirect(to)) => { - specifier = to; + match self.redirects.get(specifier) { + Some(to) => { + specifier = *to; count += 1; } - Some(RemoteModulesStoreSpecifierValue::Data(offset)) => { - let input = &self.files_data[*offset..]; - let (input, media_type_byte) = read_bytes(input, 1)?; - let media_type = deserialize_media_type(media_type_byte[0])?; - let (input, data) = read_bytes_with_u32_len(input)?; - check_has_len(input, 1)?; - let (input, has_transpiled) = (&input[1..], input[0]); - let (_, transpiled_data) = match has_transpiled { - 0 => (input, None), - 1 => { - let (input, data) = read_bytes_with_u32_len(input)?; - (input, Some(data)) - } - value => bail!( - "Invalid transpiled data flag: {}. Compiled data is corrupt.", - value - ), - }; - return Ok(Some(RemoteModuleEntry { - specifier, - media_type, - data: Cow::Borrowed(data), - transpiled_data: transpiled_data.map(Cow::Borrowed), - })); - } None => { - return Ok(None); + let Some(entry) = self.remote_modules.get(specifier) else { + return Ok(None); + }; + return Ok(Some(DenoCompileModuleData { + specifier: if count == 0 { + original_specifier + } else { + self.specifiers.get_specifier(specifier).unwrap() + }, + media_type: entry.media_type, + data: handle_cow_ref(&entry.data), + transpiled: entry.maybe_transpiled.as_ref().map(handle_cow_ref), + source_map: entry.maybe_source_map.as_ref().map(handle_cow_ref), + cjs_export_analysis: entry + .maybe_cjs_export_analysis + .as_ref() + .map(handle_cow_ref), + })); } } } @@ -581,28 +596,6 @@ fn deserialize_npm_snapshot( ) } -fn deserialize_media_type(value: u8) -> Result { - match value { - 0 => Ok(MediaType::JavaScript), - 1 => Ok(MediaType::Jsx), - 2 => Ok(MediaType::Mjs), - 3 => Ok(MediaType::Cjs), - 4 => Ok(MediaType::TypeScript), - 5 => Ok(MediaType::Mts), - 6 => Ok(MediaType::Cts), - 7 => Ok(MediaType::Dts), - 8 => Ok(MediaType::Dmts), - 9 => Ok(MediaType::Dcts), - 10 => Ok(MediaType::Tsx), - 11 => Ok(MediaType::Json), - 12 => Ok(MediaType::Wasm), - 13 => Ok(MediaType::Css), - 14 => Ok(MediaType::SourceMap), - 15 => Ok(MediaType::Unknown), - _ => bail!("Unknown media type value: {}", value), - } -} - fn parse_hashmap_n_times( mut input: &[u8], times: usize, @@ -641,45 +634,49 @@ fn parse_vec_n_times_with_index( Ok((input, results)) } -fn read_bytes_with_u64_len(input: &[u8]) -> Result<(&[u8], &[u8]), AnyError> { +fn read_bytes_with_u64_len(input: &[u8]) -> std::io::Result<(&[u8], &[u8])> { let (input, len) = read_u64(input)?; let (input, data) = read_bytes(input, len as usize)?; Ok((input, data)) } -fn read_bytes_with_u32_len(input: &[u8]) -> Result<(&[u8], &[u8]), AnyError> { +fn read_bytes_with_u32_len(input: &[u8]) -> std::io::Result<(&[u8], &[u8])> { let (input, len) = read_u32_as_usize(input)?; let (input, data) = read_bytes(input, len)?; Ok((input, data)) } -fn read_bytes(input: &[u8], len: usize) -> Result<(&[u8], &[u8]), AnyError> { +fn read_bytes(input: &[u8], len: usize) -> std::io::Result<(&[u8], &[u8])> { check_has_len(input, len)?; let (len_bytes, input) = input.split_at(len); Ok((input, len_bytes)) } #[inline(always)] -fn check_has_len(input: &[u8], len: usize) -> Result<(), AnyError> { +fn check_has_len(input: &[u8], len: usize) -> std::io::Result<()> { if input.len() < len { - bail!("Unexpected end of data."); + Err(std::io::Error::new( + std::io::ErrorKind::InvalidData, + "Unexpected end of data", + )) + } else { + Ok(()) } - Ok(()) } -fn read_string_lossy(input: &[u8]) -> Result<(&[u8], Cow), AnyError> { +fn read_string_lossy(input: &[u8]) -> std::io::Result<(&[u8], Cow)> { let (input, data_bytes) = read_bytes_with_u32_len(input)?; Ok((input, String::from_utf8_lossy(data_bytes))) } -fn read_u32_as_usize(input: &[u8]) -> Result<(&[u8], usize), AnyError> { +fn read_u32_as_usize(input: &[u8]) -> std::io::Result<(&[u8], usize)> { let (input, len_bytes) = read_bytes(input, 4)?; - let len = u32::from_le_bytes(len_bytes.try_into()?); + let len = u32::from_le_bytes(len_bytes.try_into().unwrap()); Ok((input, len as usize)) } -fn read_u64(input: &[u8]) -> Result<(&[u8], u64), AnyError> { +fn read_u64(input: &[u8]) -> std::io::Result<(&[u8], u64)> { let (input, len_bytes) = read_bytes(input, 8)?; - let len = u64::from_le_bytes(len_bytes.try_into()?); + let len = u64::from_le_bytes(len_bytes.try_into().unwrap()); Ok((input, len)) } diff --git a/cli/rt/file_system.rs b/cli/rt/file_system.rs index 8cbe5300a1..097674f015 100644 --- a/cli/rt/file_system.rs +++ b/cli/rt/file_system.rs @@ -17,9 +17,9 @@ use deno_core::BufMutView; use deno_core::BufView; use deno_core::ResourceHandleFd; use deno_lib::standalone::virtual_fs::FileSystemCaseSensitivity; +use deno_lib::standalone::virtual_fs::OffsetWithLength; use deno_lib::standalone::virtual_fs::VfsEntry; use deno_lib::standalone::virtual_fs::VfsEntryRef; -use deno_lib::standalone::virtual_fs::VfsFileSubDataKind; use deno_lib::standalone::virtual_fs::VirtualDirectory; use deno_lib::standalone::virtual_fs::VirtualFile; use deno_lib::sys::DenoLibSys; @@ -40,6 +40,7 @@ use sys_traits::boxed::BoxedFsMetadataValue; use sys_traits::boxed::FsMetadataBoxed; use sys_traits::boxed::FsReadDirBoxed; use sys_traits::FsCopy; +use url::Url; #[derive(Debug, Clone)] pub struct DenoRtSys(Arc); @@ -49,6 +50,16 @@ impl DenoRtSys { Self(vfs) } + pub fn is_specifier_in_vfs(&self, specifier: &Url) -> bool { + deno_path_util::url_to_file_path(specifier) + .map(|p| self.is_in_vfs(&p)) + .unwrap_or(false) + } + + pub fn is_in_vfs(&self, path: &Path) -> bool { + self.0.is_path_within(path) + } + fn error_if_in_vfs(&self, path: &Path) -> FsResult<()> { if self.0.is_path_within(path) { Err(FsError::NotSupported) @@ -63,8 +74,7 @@ impl DenoRtSys { newpath: &Path, ) -> std::io::Result { let old_file = self.0.file_entry(oldpath)?; - let old_file_bytes = - self.0.read_file_all(old_file, VfsFileSubDataKind::Raw)?; + let old_file_bytes = self.0.read_file_all(old_file)?; let len = old_file_bytes.len() as u64; RealFs .write_file_sync( @@ -1079,11 +1089,7 @@ impl FileBackedVfsFile { return Ok(Cow::Borrowed(&[])); } if read_pos == 0 { - Ok( - self - .vfs - .read_file_all(&self.file, VfsFileSubDataKind::Raw)?, - ) + Ok(self.vfs.read_file_all(&self.file)?) } else { let size = (self.file.offset.len - read_pos) as usize; let mut buf = vec![0; size]; @@ -1378,13 +1384,16 @@ impl FileBackedVfs { pub fn read_file_all( &self, file: &VirtualFile, - sub_data_kind: VfsFileSubDataKind, ) -> std::io::Result> { - let read_len = match sub_data_kind { - VfsFileSubDataKind::Raw => file.offset.len, - VfsFileSubDataKind::ModuleGraph => file.module_graph_offset.len, - }; - let read_range = self.get_read_range(file, sub_data_kind, 0, read_len)?; + self.read_file_offset_with_len(file.offset) + } + + pub fn read_file_offset_with_len( + &self, + offset_with_len: OffsetWithLength, + ) -> std::io::Result> { + let read_range = + self.get_read_range(offset_with_len, 0, offset_with_len.len)?; match &self.vfs_data { Cow::Borrowed(data) => Ok(Cow::Borrowed(&data[read_range])), Cow::Owned(data) => Ok(Cow::Owned(data[read_range].to_vec())), @@ -1397,12 +1406,7 @@ impl FileBackedVfs { pos: u64, buf: &mut [u8], ) -> std::io::Result { - let read_range = self.get_read_range( - file, - VfsFileSubDataKind::Raw, - pos, - buf.len() as u64, - )?; + let read_range = self.get_read_range(file.offset, pos, buf.len() as u64)?; let read_len = read_range.len(); buf[..read_len].copy_from_slice(&self.vfs_data[read_range]); Ok(read_len) @@ -1410,15 +1414,10 @@ impl FileBackedVfs { fn get_read_range( &self, - file: &VirtualFile, - sub_data_kind: VfsFileSubDataKind, + file_offset_and_len: OffsetWithLength, pos: u64, len: u64, ) -> std::io::Result> { - let file_offset_and_len = match sub_data_kind { - VfsFileSubDataKind::Raw => file.offset, - VfsFileSubDataKind::ModuleGraph => file.module_graph_offset, - }; if pos > file_offset_and_len.len { return Err(std::io::Error::new( std::io::ErrorKind::UnexpectedEof, @@ -1470,13 +1469,7 @@ mod test { #[track_caller] fn read_file(vfs: &FileBackedVfs, path: &Path) -> String { let file = vfs.file_entry(path).unwrap(); - String::from_utf8( - vfs - .read_file_all(file, VfsFileSubDataKind::Raw) - .unwrap() - .into_owned(), - ) - .unwrap() + String::from_utf8(vfs.read_file_all(file).unwrap().into_owned()).unwrap() } #[test] @@ -1492,32 +1485,19 @@ mod test { let src_path = src_path.to_path_buf(); let mut builder = VfsBuilder::new(); builder - .add_file_with_data_raw( - &src_path.join("a.txt"), - "data".into(), - VfsFileSubDataKind::Raw, - ) + .add_file_with_data_raw(&src_path.join("a.txt"), "data".into()) .unwrap(); builder - .add_file_with_data_raw( - &src_path.join("b.txt"), - "data".into(), - VfsFileSubDataKind::Raw, - ) + .add_file_with_data_raw(&src_path.join("b.txt"), "data".into()) .unwrap(); assert_eq!(builder.files_len(), 1); // because duplicate data builder - .add_file_with_data_raw( - &src_path.join("c.txt"), - "c".into(), - VfsFileSubDataKind::Raw, - ) + .add_file_with_data_raw(&src_path.join("c.txt"), "c".into()) .unwrap(); builder .add_file_with_data_raw( &src_path.join("sub_dir").join("d.txt"), "d".into(), - VfsFileSubDataKind::Raw, ) .unwrap(); builder.add_file_at_path(&src_path.join("e.txt")).unwrap(); @@ -1678,7 +1658,6 @@ mod test { .add_file_with_data_raw( temp_path.join("a.txt").as_path(), "0123456789".to_string().into_bytes(), - VfsFileSubDataKind::Raw, ) .unwrap(); let (dest_path, virtual_fs) = into_virtual_fs(builder, &temp_dir); diff --git a/cli/rt/node.rs b/cli/rt/node.rs index c3545bf4a4..ef4f99cc8a 100644 --- a/cli/rt/node.rs +++ b/cli/rt/node.rs @@ -6,15 +6,17 @@ use std::sync::Arc; use deno_core::url::Url; use deno_error::JsErrorBox; use deno_lib::loader::NpmModuleLoader; +use deno_lib::standalone::binary::CjsExportAnalysisEntry; use deno_media_type::MediaType; use deno_resolver::npm::DenoInNpmPackageChecker; use deno_resolver::npm::NpmReqResolver; -use deno_runtime::deno_fs; +use deno_runtime::deno_fs::FileSystem; use deno_runtime::deno_node::RealIsBuiltInNodeModuleChecker; use node_resolver::analyze::CjsAnalysis; use node_resolver::analyze::CjsAnalysisExports; use node_resolver::analyze::NodeCodeTranslator; +use crate::binary::StandaloneModules; use crate::file_system::DenoRtSys; pub type DenoRtCjsTracker = @@ -48,18 +50,24 @@ pub type DenoRtNpmReqResolver = NpmReqResolver< pub struct CjsCodeAnalyzer { cjs_tracker: Arc, - fs: deno_fs::FileSystemRc, + modules: Arc, + sys: DenoRtSys, } impl CjsCodeAnalyzer { pub fn new( cjs_tracker: Arc, - fs: deno_fs::FileSystemRc, + modules: Arc, + sys: DenoRtSys, ) -> Self { - Self { cjs_tracker, fs } + Self { + cjs_tracker, + modules, + sys, + } } - async fn inner_cjs_analysis<'a>( + fn inner_cjs_analysis<'a>( &self, specifier: &Url, source: Cow<'a, str>, @@ -77,43 +85,42 @@ impl CjsCodeAnalyzer { .is_maybe_cjs(specifier, media_type) .map_err(JsErrorBox::from_err)?; let analysis = if is_maybe_cjs { - let maybe_cjs = deno_core::unsync::spawn_blocking({ - let specifier = specifier.clone(); - let source: Arc = source.to_string().into(); - move || -> Result<_, JsErrorBox> { - let parsed_source = deno_ast::parse_program(deno_ast::ParseParams { - specifier, - text: source.clone(), - media_type, - capture_tokens: true, - scope_analysis: false, - maybe_syntax: None, - }) - .map_err(JsErrorBox::from_err)?; - let is_script = parsed_source.compute_is_script(); - let is_cjs = cjs_tracker - .is_cjs_with_known_is_script( - parsed_source.specifier(), - media_type, - is_script, - ) - .map_err(JsErrorBox::from_err)?; - if is_cjs { - let analysis = parsed_source.analyze_cjs(); - Ok(Some(CjsAnalysisExports { - exports: analysis.exports, - reexports: analysis.reexports, - })) - } else { - Ok(None) + let data = self + .modules + .read(specifier)? + .and_then(|d| d.cjs_export_analysis); + match data { + Some(data) => { + let data: CjsExportAnalysisEntry = bincode::deserialize(&data) + .map_err(|err| JsErrorBox::generic(err.to_string()))?; + match data { + CjsExportAnalysisEntry::Esm => { + cjs_tracker.set_is_known_script(specifier, false); + CjsAnalysis::Esm(source) + } + CjsExportAnalysisEntry::Cjs(analysis) => { + cjs_tracker.set_is_known_script(specifier, true); + CjsAnalysis::Cjs(analysis) + } } } - }) - .await - .unwrap()?; - match maybe_cjs { - Some(cjs) => CjsAnalysis::Cjs(cjs), - None => CjsAnalysis::Esm(source), + None => { + if log::log_enabled!(log::Level::Debug) { + if self.sys.is_specifier_in_vfs(specifier) { + log::debug!( + "No CJS export analysis was stored for '{}'. Assuming ESM. This might indicate a bug in Deno.", + specifier + ); + } else { + log::debug!( + "Analyzing potentially CommonJS files is not supported at runtime in a compiled executable ({}). Assuming ESM.", + specifier + ); + } + } + // assume ESM as we don't have access to swc here + CjsAnalysis::Esm(source) + } } } else { CjsAnalysis::Esm(source) @@ -133,9 +140,10 @@ impl node_resolver::analyze::CjsCodeAnalyzer for CjsCodeAnalyzer { let source = match source { Some(source) => source, None => { - if let Ok(path) = specifier.to_file_path() { + if let Ok(path) = deno_path_util::url_to_file_path(specifier) { + // todo(dsherret): should this use the sync method instead? if let Ok(source_from_file) = - self.fs.read_text_file_lossy_async(path, None).await + self.sys.read_text_file_lossy_async(path, None).await { source_from_file } else { @@ -152,6 +160,6 @@ impl node_resolver::analyze::CjsCodeAnalyzer for CjsCodeAnalyzer { } } }; - self.inner_cjs_analysis(specifier, source).await + self.inner_cjs_analysis(specifier, source) } } diff --git a/cli/rt/run.rs b/cli/rt/run.rs index 57bf95f35f..6f5c05b467 100644 --- a/cli/rt/run.rs +++ b/cli/rt/run.rs @@ -34,8 +34,6 @@ use deno_lib::npm::create_npm_process_state_provider; use deno_lib::npm::NpmRegistryReadPermissionChecker; use deno_lib::npm::NpmRegistryReadPermissionCheckerMode; use deno_lib::standalone::binary::NodeModules; -use deno_lib::standalone::binary::SourceMapStore; -use deno_lib::standalone::virtual_fs::VfsFileSubDataKind; use deno_lib::util::hash::FastInsecureHasher; use deno_lib::util::text_encoding::from_utf8_lossy_cow; use deno_lib::util::text_encoding::from_utf8_lossy_owned; @@ -98,13 +96,12 @@ use crate::node::DenoRtNpmReqResolver; struct SharedModuleLoaderState { cjs_tracker: Arc, code_cache: Option>, - modules: StandaloneModules, + modules: Arc, node_code_translator: Arc, node_resolver: Arc, npm_module_loader: Arc, npm_registry_permission_checker: NpmRegistryReadPermissionChecker, npm_req_resolver: Arc, - source_maps: SourceMapStore, vfs: Arc, workspace_resolver: WorkspaceResolver, } @@ -292,8 +289,11 @@ impl ModuleLoader for EmbeddedModuleLoader { } if specifier.scheme() == "jsr" { - if let Some(specifier) = - self.shared.modules.resolve_specifier(&specifier)? + if let Some(specifier) = self + .shared + .modules + .resolve_specifier(&specifier) + .map_err(JsErrorBox::from_err)? { return Ok(specifier.clone()); } @@ -351,7 +351,7 @@ impl ModuleLoader for EmbeddedModuleLoader { ) -> deno_core::ModuleLoadResponse { if original_specifier.scheme() == "data" { let data_url_text = - match deno_graph::source::RawDataUrl::parse(original_specifier) + match deno_media_type::data_url::RawDataUrl::parse(original_specifier) .and_then(|url| url.decode()) { Ok(response) => response, @@ -401,11 +401,7 @@ impl ModuleLoader for EmbeddedModuleLoader { ); } - match self - .shared - .modules - .read(original_specifier, VfsFileSubDataKind::ModuleGraph) - { + match self.shared.modules.read(original_specifier) { Ok(Some(module)) => { let media_type = module.media_type; let (module_specifier, module_type, module_source) = @@ -511,16 +507,9 @@ impl ModuleLoader for EmbeddedModuleLoader { } fn get_source_map(&self, file_name: &str) -> Option> { - if file_name.starts_with("file:///") { - let url = - deno_path_util::url_from_directory_path(self.shared.vfs.root()).ok()?; - let file_url = Url::parse(file_name).ok()?; - let relative_path = url.make_relative(&file_url)?; - self.shared.source_maps.get(&relative_path) - } else { - self.shared.source_maps.get(file_name) - } - .map(Cow::Borrowed) + let url = Url::parse(file_name).ok()?; + let data = self.shared.modules.read(&url).ok()??; + data.source_map } fn get_source_mapped_source_line( @@ -529,11 +518,7 @@ impl ModuleLoader for EmbeddedModuleLoader { line_number: usize, ) -> Option { let specifier = Url::parse(file_name).ok()?; - let data = self - .shared - .modules - .read(&specifier, VfsFileSubDataKind::Raw) - .ok()??; + let data = self.shared.modules.read(&specifier).ok()??; let source = String::from_utf8_lossy(&data.data); // Do NOT use .lines(): it skips the terminating empty line. @@ -580,7 +565,9 @@ impl NodeRequireLoader for EmbeddedModuleLoader { let file_bytes = self .shared .vfs - .read_file_all(file_entry, VfsFileSubDataKind::ModuleGraph) + .read_file_offset_with_len( + file_entry.transpiled_offset.unwrap_or(file_entry.offset), + ) .map_err(JsErrorBox::from_err)?; Ok(from_utf8_lossy_cow(file_bytes)) } @@ -653,7 +640,6 @@ pub async fn run( modules, npm_snapshot, root_path, - source_maps, vfs, } = data; let root_cert_store_provider = Arc::new(StandaloneRootCertStoreProvider { @@ -798,7 +784,7 @@ pub async fn run( npm_resolver: npm_resolver.clone(), })); let cjs_esm_code_analyzer = - CjsCodeAnalyzer::new(cjs_tracker.clone(), fs.clone()); + CjsCodeAnalyzer::new(cjs_tracker.clone(), modules.clone(), sys.clone()); let node_code_translator = Arc::new(NodeCodeTranslator::new( cjs_esm_code_analyzer, in_npm_pkg_checker, @@ -883,7 +869,6 @@ pub async fn run( )), npm_registry_permission_checker, npm_req_resolver, - source_maps, vfs: vfs.clone(), workspace_resolver, }), diff --git a/cli/snapshot/Cargo.toml b/cli/snapshot/Cargo.toml index e4ad13e1d8..06c968ef63 100644 --- a/cli/snapshot/Cargo.toml +++ b/cli/snapshot/Cargo.toml @@ -17,4 +17,4 @@ path = "lib.rs" disable = [] [build-dependencies] -deno_runtime = { workspace = true, features = ["include_js_files_for_snapshotting", "only_snapshotted_js_sources"] } +deno_runtime = { workspace = true, features = ["include_js_files_for_snapshotting", "only_snapshotted_js_sources", "snapshotting"] } diff --git a/cli/standalone/binary.rs b/cli/standalone/binary.rs index 3e4a5e4e4c..4cb30316c1 100644 --- a/cli/standalone/binary.rs +++ b/cli/standalone/binary.rs @@ -1,6 +1,7 @@ // Copyright 2018-2025 the Deno authors. MIT license. use std::borrow::Cow; +use std::collections::HashMap; use std::collections::VecDeque; use std::env; use std::ffi::OsString; @@ -10,6 +11,7 @@ use std::path::Component; use std::path::Path; use std::path::PathBuf; +use capacity_builder::BytesAppendable; use deno_ast::MediaType; use deno_ast::ModuleKind; use deno_ast::ModuleSpecifier; @@ -23,16 +25,19 @@ use deno_graph::ModuleGraph; use deno_lib::args::CaData; use deno_lib::args::UnstableConfig; use deno_lib::shared::ReleaseChannel; +use deno_lib::standalone::binary::CjsExportAnalysisEntry; use deno_lib::standalone::binary::Metadata; use deno_lib::standalone::binary::NodeModules; +use deno_lib::standalone::binary::RemoteModuleEntry; use deno_lib::standalone::binary::SerializedResolverWorkspaceJsrPackage; use deno_lib::standalone::binary::SerializedWorkspaceResolver; use deno_lib::standalone::binary::SerializedWorkspaceResolverImportMap; -use deno_lib::standalone::binary::SourceMapStore; +use deno_lib::standalone::binary::SpecifierDataStore; +use deno_lib::standalone::binary::SpecifierId; +use deno_lib::standalone::binary::MAGIC_BYTES; use deno_lib::standalone::virtual_fs::BuiltVfs; use deno_lib::standalone::virtual_fs::VfsBuilder; use deno_lib::standalone::virtual_fs::VfsEntry; -use deno_lib::standalone::virtual_fs::VfsFileSubDataKind; use deno_lib::standalone::virtual_fs::VirtualDirectory; use deno_lib::standalone::virtual_fs::VirtualDirectoryEntries; use deno_lib::standalone::virtual_fs::WindowsSystemRootablePath; @@ -44,15 +49,16 @@ use deno_npm::NpmSystemInfo; use deno_path_util::url_from_directory_path; use deno_path_util::url_to_file_path; use indexmap::IndexMap; +use node_resolver::analyze::CjsAnalysis; +use node_resolver::analyze::CjsCodeAnalyzer; -use super::serialization::serialize_binary_data_section; -use super::serialization::RemoteModulesStoreBuilder; use super::virtual_fs::output_vfs; use crate::args::CliOptions; use crate::args::CompileFlags; use crate::cache::DenoDir; use crate::emit::Emitter; use crate::http_util::HttpClientProvider; +use crate::node::CliCjsCodeAnalyzer; use crate::npm::CliNpmResolver; use crate::resolver::CliCjsTracker; use crate::util::archive; @@ -104,48 +110,60 @@ impl<'a> StandaloneRelativeFileBaseUrl<'a> { } } -#[allow(clippy::too_many_arguments)] -fn write_binary_bytes( - mut file_writer: File, - original_bin: Vec, - metadata: &Metadata, - npm_snapshot: Option, - remote_modules: &RemoteModulesStoreBuilder, - source_map_store: &SourceMapStore, - vfs: &BuiltVfs, - compile_flags: &CompileFlags, -) -> Result<(), AnyError> { - let data_section_bytes = serialize_binary_data_section( - metadata, - npm_snapshot, - remote_modules, - source_map_store, - vfs, - ) - .context("Serializing binary data section.")?; +struct SpecifierStore<'a> { + data: IndexMap<&'a Url, SpecifierId>, +} - let target = compile_flags.resolve_target(); - if target.contains("linux") { - libsui::Elf::new(&original_bin).append( - "d3n0l4nd", - &data_section_bytes, - &mut file_writer, - )?; - } else if target.contains("windows") { - let mut pe = libsui::PortableExecutable::from(&original_bin)?; - if let Some(icon) = compile_flags.icon.as_ref() { - let icon = std::fs::read(icon)?; - pe = pe.set_icon(&icon)?; +impl<'a> SpecifierStore<'a> { + pub fn with_capacity(capacity: usize) -> Self { + Self { + data: IndexMap::with_capacity(capacity), + } + } + + pub fn get_or_add(&mut self, specifier: &'a Url) -> SpecifierId { + let len = self.data.len(); + let entry = self.data.entry(specifier); + match entry { + indexmap::map::Entry::Occupied(occupied_entry) => *occupied_entry.get(), + indexmap::map::Entry::Vacant(vacant_entry) => { + let new_id = SpecifierId::new(len as u32); + vacant_entry.insert(new_id); + new_id + } + } + } + + pub fn for_serialization( + self, + base_url: &StandaloneRelativeFileBaseUrl<'a>, + ) -> SpecifierStoreForSerialization<'a> { + SpecifierStoreForSerialization { + data: self + .data + .into_iter() + .map(|(specifier, id)| (base_url.specifier_key(specifier), id)) + .collect(), + } + } +} + +struct SpecifierStoreForSerialization<'a> { + data: Vec<(Cow<'a, str>, SpecifierId)>, +} + +impl<'a> BytesAppendable<'a> for &'a SpecifierStoreForSerialization<'a> { + fn append_to_builder( + self, + builder: &mut capacity_builder::BytesBuilder<'a, TBytes>, + ) { + builder.append_le(self.data.len() as u32); + for (specifier_str, id) in &self.data { + builder.append_le(specifier_str.len() as u32); + builder.append(specifier_str.as_ref()); + builder.append(*id); } - - pe.write_resource("d3n0l4nd", data_section_bytes)? - .build(&mut file_writer)?; - } else if target.contains("darwin") { - libsui::Macho::from(original_bin)? - .write_section("d3n0l4nd", data_section_bytes)? - .build_and_sign(&mut file_writer)?; } - Ok(()) } pub fn is_standalone_binary(exe_path: &Path) -> bool { @@ -168,6 +186,7 @@ pub struct WriteBinOptions<'a> { } pub struct DenoCompileBinaryWriter<'a> { + cjs_code_analyzer: CliCjsCodeAnalyzer, cjs_tracker: &'a CliCjsTracker, cli_options: &'a CliOptions, deno_dir: &'a DenoDir, @@ -181,6 +200,7 @@ pub struct DenoCompileBinaryWriter<'a> { impl<'a> DenoCompileBinaryWriter<'a> { #[allow(clippy::too_many_arguments)] pub fn new( + cjs_code_analyzer: CliCjsCodeAnalyzer, cjs_tracker: &'a CliCjsTracker, cli_options: &'a CliOptions, deno_dir: &'a DenoDir, @@ -191,6 +211,7 @@ impl<'a> DenoCompileBinaryWriter<'a> { npm_system_info: NpmSystemInfo, ) -> Self { Self { + cjs_code_analyzer, cjs_tracker, cli_options, deno_dir, @@ -230,7 +251,7 @@ impl<'a> DenoCompileBinaryWriter<'a> { ) } } - self.write_standalone_binary(options, original_binary) + self.write_standalone_binary(options, original_binary).await } async fn get_base_binary( @@ -328,7 +349,7 @@ impl<'a> DenoCompileBinaryWriter<'a> { /// This functions creates a standalone deno binary by appending a bundle /// and magic trailer to the currently executing binary. #[allow(clippy::too_many_arguments)] - fn write_standalone_binary( + async fn write_standalone_binary( &self, options: WriteBinOptions<'_>, original_bin: Vec, @@ -372,23 +393,50 @@ impl<'a> DenoCompileBinaryWriter<'a> { .add_file_at_path(&path) .with_context(|| format!("Including {}", path.display()))?; } - let mut remote_modules_store = RemoteModulesStoreBuilder::default(); - let mut source_maps = Vec::with_capacity(graph.specifiers_count()); - // todo(dsherret): transpile in parallel + let specifiers_count = graph.specifiers_count(); + let mut specifier_store = SpecifierStore::with_capacity(specifiers_count); + let mut remote_modules_store = + SpecifierDataStore::with_capacity(specifiers_count); + // todo(dsherret): transpile and analyze CJS in parallel for module in graph.modules() { if module.specifier().scheme() == "data" { continue; // don't store data urls as an entry as they're in the code } - let (maybe_original_source, maybe_transpiled, media_type) = match module { + let mut maybe_source_map = None; + let mut maybe_transpiled = None; + let mut maybe_cjs_analysis = None; + let (maybe_original_source, media_type) = match module { deno_graph::Module::Js(m) => { - let original_bytes = m.source.as_bytes().to_vec(); - let maybe_transpiled = if m.media_type.is_emittable() { - let is_cjs = self.cjs_tracker.is_cjs_with_known_is_script( - &m.specifier, + let specifier = &m.specifier; + let original_bytes = m.source.as_bytes(); + if self.cjs_tracker.is_maybe_cjs(specifier, m.media_type)? { + if self.cjs_tracker.is_cjs_with_known_is_script( + specifier, m.media_type, m.is_script, - )?; - let module_kind = ModuleKind::from_is_cjs(is_cjs); + )? { + let cjs_analysis = self + .cjs_code_analyzer + .analyze_cjs( + module.specifier(), + Some(Cow::Borrowed(m.source.as_ref())), + ) + .await?; + maybe_cjs_analysis = Some(match cjs_analysis { + CjsAnalysis::Esm(_) => CjsExportAnalysisEntry::Esm, + CjsAnalysis::Cjs(exports) => { + CjsExportAnalysisEntry::Cjs(exports) + } + }); + } else { + maybe_cjs_analysis = Some(CjsExportAnalysisEntry::Esm); + } + } + if m.media_type.is_emittable() { + let module_kind = match maybe_cjs_analysis.as_ref() { + Some(CjsExportAnalysisEntry::Cjs(_)) => ModuleKind::Cjs, + _ => ModuleKind::Esm, + }; let (source, source_map) = self.emitter.emit_parsed_source_for_deno_compile( &m.specifier, @@ -397,60 +445,67 @@ impl<'a> DenoCompileBinaryWriter<'a> { &m.source, )?; if source != m.source.as_ref() { - source_maps.push((&m.specifier, source_map)); - Some(source.into_bytes()) - } else { - None + maybe_source_map = Some(source_map.into_bytes()); + maybe_transpiled = Some(source.into_bytes()); } - } else { - None - }; - (Some(original_bytes), maybe_transpiled, m.media_type) + } + (Some(original_bytes), m.media_type) } deno_graph::Module::Json(m) => { - (Some(m.source.as_bytes().to_vec()), None, m.media_type) + (Some(m.source.as_bytes()), m.media_type) } deno_graph::Module::Wasm(m) => { - (Some(m.source.to_vec()), None, MediaType::Wasm) + (Some(m.source.as_ref()), MediaType::Wasm) } deno_graph::Module::Npm(_) | deno_graph::Module::Node(_) - | deno_graph::Module::External(_) => (None, None, MediaType::Unknown), + | deno_graph::Module::External(_) => (None, MediaType::Unknown), }; if let Some(original_source) = maybe_original_source { + let maybe_cjs_export_analysis = maybe_cjs_analysis + .as_ref() + .map(bincode::serialize) + .transpose()?; if module.specifier().scheme() == "file" { let file_path = deno_path_util::url_to_file_path(module.specifier())?; vfs .add_file_with_data( &file_path, - original_source, - VfsFileSubDataKind::Raw, + deno_lib::standalone::virtual_fs::AddFileDataOptions { + data: original_source.to_vec(), + maybe_transpiled, + maybe_source_map, + maybe_cjs_export_analysis, + }, ) .with_context(|| { format!("Failed adding '{}'", file_path.display()) })?; - if let Some(transpiled_source) = maybe_transpiled { - vfs - .add_file_with_data( - &file_path, - transpiled_source, - VfsFileSubDataKind::ModuleGraph, - ) - .with_context(|| { - format!("Failed adding '{}'", file_path.display()) - })?; - } } else { + let specifier_id = specifier_store.get_or_add(module.specifier()); remote_modules_store.add( - module.specifier(), - media_type, - original_source, - maybe_transpiled, + specifier_id, + RemoteModuleEntry { + media_type, + data: Cow::Borrowed(original_source), + maybe_transpiled: maybe_transpiled.map(Cow::Owned), + maybe_source_map: maybe_source_map.map(Cow::Owned), + maybe_cjs_export_analysis: maybe_cjs_export_analysis + .map(Cow::Owned), + }, ); } } } - remote_modules_store.add_redirects(&graph.redirects); + + let mut redirects_store = + SpecifierDataStore::with_capacity(graph.redirects.len()); + for (from, to) in &graph.redirects { + redirects_store.add( + specifier_store.get_or_add(from), + specifier_store.get_or_add(to), + ); + } if let Some(import_map) = self.workspace_resolver.maybe_import_map() { if let Ok(file_path) = url_to_file_path(import_map.base_url()) { @@ -468,7 +523,48 @@ impl<'a> DenoCompileBinaryWriter<'a> { } } + // do CJS export analysis on all the files in the VFS + // todo(dsherret): analyze cjs in parallel + let mut to_add = Vec::new(); + for (file_path, file) in vfs.iter_files() { + if file.cjs_export_analysis_offset.is_some() { + continue; // already analyzed + } + let specifier = deno_path_util::url_from_file_path(&file_path)?; + let media_type = MediaType::from_specifier(&specifier); + if self.cjs_tracker.is_maybe_cjs(&specifier, media_type)? { + let maybe_source = vfs + .file_bytes(file.offset) + .map(|text| String::from_utf8_lossy(text)); + let cjs_analysis_result = self + .cjs_code_analyzer + .analyze_cjs(&specifier, maybe_source) + .await; + let maybe_analysis = match cjs_analysis_result { + Ok(CjsAnalysis::Esm(_)) => Some(CjsExportAnalysisEntry::Esm), + Ok(CjsAnalysis::Cjs(exports)) => { + Some(CjsExportAnalysisEntry::Cjs(exports)) + } + Err(err) => { + log::debug!( + "Ignoring cjs export analysis for '{}': {}", + specifier, + err + ); + None + } + }; + if let Some(analysis) = &maybe_analysis { + to_add.push((file_path, bincode::serialize(analysis)?)); + } + } + } + for (file_path, analysis) in to_add { + vfs.add_cjs_export_analysis(&file_path, analysis); + } + let vfs = self.build_vfs_consolidating_global_npm_cache(vfs); + let root_dir_url = match &vfs.root_path { WindowsSystemRootablePath::Path(dir) => { Some(url_from_directory_path(dir)?) @@ -494,14 +590,6 @@ impl<'a> DenoCompileBinaryWriter<'a> { None }; - let mut source_map_store = SourceMapStore::with_capacity(source_maps.len()); - for (specifier, source_map) in source_maps { - source_map_store.add( - Cow::Owned(root_dir_url.specifier_key(specifier).into_owned()), - Cow::Owned(source_map.into_bytes()), - ); - } - let node_modules = match &self.npm_resolver { CliNpmResolver::Managed(_) => { npm_snapshot.as_ref().map(|_| NodeModules::Managed { @@ -611,17 +699,18 @@ impl<'a> DenoCompileBinaryWriter<'a> { vfs_case_sensitivity: vfs.case_sensitivity, }; - write_binary_bytes( - writer, - original_bin, + let data_section_bytes = serialize_binary_data_section( &metadata, npm_snapshot.map(|s| s.into_serialized()), + &specifier_store.for_serialization(&root_dir_url), + &redirects_store, &remote_modules_store, - &source_map_store, &vfs, - compile_flags, ) - .context("Writing binary bytes") + .context("Serializing binary data section.")?; + + write_binary_bytes(writer, original_bin, data_section_bytes, compile_flags) + .context("Writing binary bytes") } fn fill_npm_vfs(&self, builder: &mut VfsBuilder) -> Result<(), AnyError> { @@ -792,6 +881,146 @@ impl<'a> DenoCompileBinaryWriter<'a> { } } +#[allow(clippy::too_many_arguments)] +fn write_binary_bytes( + mut file_writer: File, + original_bin: Vec, + data_section_bytes: Vec, + compile_flags: &CompileFlags, +) -> Result<(), AnyError> { + let target = compile_flags.resolve_target(); + if target.contains("linux") { + libsui::Elf::new(&original_bin).append( + "d3n0l4nd", + &data_section_bytes, + &mut file_writer, + )?; + } else if target.contains("windows") { + let mut pe = libsui::PortableExecutable::from(&original_bin)?; + if let Some(icon) = compile_flags.icon.as_ref() { + let icon = std::fs::read(icon)?; + pe = pe.set_icon(&icon)?; + } + + pe.write_resource("d3n0l4nd", data_section_bytes)? + .build(&mut file_writer)?; + } else if target.contains("darwin") { + libsui::Macho::from(original_bin)? + .write_section("d3n0l4nd", data_section_bytes)? + .build_and_sign(&mut file_writer)?; + } + Ok(()) +} + +/// Binary format: +/// * d3n0l4nd +/// * +/// * +/// * +/// * +/// * +/// * +/// * +/// * d3n0l4nd +#[allow(clippy::too_many_arguments)] +fn serialize_binary_data_section( + metadata: &Metadata, + npm_snapshot: Option, + specifiers: &SpecifierStoreForSerialization, + redirects: &SpecifierDataStore, + remote_modules: &SpecifierDataStore>, + vfs: &BuiltVfs, +) -> Result, AnyError> { + let metadata = serde_json::to_string(metadata)?; + let npm_snapshot = + npm_snapshot.map(serialize_npm_snapshot).unwrap_or_default(); + let serialized_vfs = serde_json::to_string(&vfs.entries)?; + + let bytes = capacity_builder::BytesBuilder::build(|builder| { + builder.append(MAGIC_BYTES); + // 1. Metadata + { + builder.append_le(metadata.len() as u64); + builder.append(&metadata); + } + // 2. Npm snapshot + { + builder.append_le(npm_snapshot.len() as u64); + builder.append(&npm_snapshot); + } + // 3. Specifiers + builder.append(specifiers); + // 4. Redirects + redirects.serialize(builder); + // 5. Remote modules + remote_modules.serialize(builder); + // 6. VFS + { + builder.append_le(serialized_vfs.len() as u64); + builder.append(&serialized_vfs); + let vfs_bytes_len = vfs.files.iter().map(|f| f.len() as u64).sum::(); + builder.append_le(vfs_bytes_len); + for file in &vfs.files { + builder.append(file); + } + } + + // write the magic bytes at the end so we can use it + // to make sure we've deserialized correctly + builder.append(MAGIC_BYTES); + })?; + + Ok(bytes) +} + +fn serialize_npm_snapshot( + mut snapshot: SerializedNpmResolutionSnapshot, +) -> Vec { + fn append_string(bytes: &mut Vec, string: &str) { + let len = string.len() as u32; + bytes.extend_from_slice(&len.to_le_bytes()); + bytes.extend_from_slice(string.as_bytes()); + } + + snapshot.packages.sort_by(|a, b| a.id.cmp(&b.id)); // determinism + let ids_to_stored_ids = snapshot + .packages + .iter() + .enumerate() + .map(|(i, pkg)| (&pkg.id, i as u32)) + .collect::>(); + + let mut root_packages: Vec<_> = snapshot.root_packages.iter().collect(); + root_packages.sort(); + let mut bytes = Vec::new(); + + bytes.extend_from_slice(&(snapshot.packages.len() as u32).to_le_bytes()); + for pkg in &snapshot.packages { + append_string(&mut bytes, &pkg.id.as_serialized()); + } + + bytes.extend_from_slice(&(root_packages.len() as u32).to_le_bytes()); + for (req, id) in root_packages { + append_string(&mut bytes, &req.to_string()); + let id = ids_to_stored_ids.get(&id).unwrap(); + bytes.extend_from_slice(&id.to_le_bytes()); + } + + for pkg in &snapshot.packages { + let deps_len = pkg.dependencies.len() as u32; + bytes.extend_from_slice(&deps_len.to_le_bytes()); + let mut deps: Vec<_> = pkg.dependencies.iter().collect(); + deps.sort(); + for (req, id) in deps { + append_string(&mut bytes, req); + let id = ids_to_stored_ids.get(&id).unwrap(); + bytes.extend_from_slice(&id.to_le_bytes()); + } + } + + bytes +} + fn get_denort_path(deno_exe: PathBuf) -> Option { let mut denort = deno_exe; denort.set_file_name(if cfg!(windows) { diff --git a/cli/standalone/mod.rs b/cli/standalone/mod.rs index e0b1fe5633..81ca2b4ff1 100644 --- a/cli/standalone/mod.rs +++ b/cli/standalone/mod.rs @@ -1,5 +1,4 @@ // Copyright 2018-2025 the Deno authors. MIT license. pub mod binary; -mod serialization; mod virtual_fs; diff --git a/cli/standalone/serialization.rs b/cli/standalone/serialization.rs deleted file mode 100644 index fd2ebdc042..0000000000 --- a/cli/standalone/serialization.rs +++ /dev/null @@ -1,237 +0,0 @@ -// Copyright 2018-2025 the Deno authors. MIT license. - -use std::collections::BTreeMap; -use std::collections::HashMap; - -use deno_ast::MediaType; -use deno_core::error::AnyError; -use deno_core::serde_json; -use deno_core::url::Url; -use deno_lib::standalone::binary::Metadata; -use deno_lib::standalone::binary::SourceMapStore; -use deno_lib::standalone::binary::MAGIC_BYTES; -use deno_lib::standalone::virtual_fs::BuiltVfs; -use deno_npm::resolution::SerializedNpmResolutionSnapshot; - -/// Binary format: -/// * d3n0l4nd -/// * -/// * -/// * -/// * -/// * -/// * -/// * d3n0l4nd -pub fn serialize_binary_data_section( - metadata: &Metadata, - npm_snapshot: Option, - remote_modules: &RemoteModulesStoreBuilder, - source_map_store: &SourceMapStore, - vfs: &BuiltVfs, -) -> Result, AnyError> { - let metadata = serde_json::to_string(metadata)?; - let npm_snapshot = - npm_snapshot.map(serialize_npm_snapshot).unwrap_or_default(); - let serialized_vfs = serde_json::to_string(&vfs.entries)?; - - let bytes = capacity_builder::BytesBuilder::build(|builder| { - builder.append(MAGIC_BYTES); - // 1. Metadata - { - builder.append_le(metadata.len() as u64); - builder.append(&metadata); - } - // 2. Npm snapshot - { - builder.append_le(npm_snapshot.len() as u64); - builder.append(&npm_snapshot); - } - // 3. Remote modules - { - remote_modules.write(builder); - } - // 4. VFS - { - builder.append_le(serialized_vfs.len() as u64); - builder.append(&serialized_vfs); - let vfs_bytes_len = vfs.files.iter().map(|f| f.len() as u64).sum::(); - builder.append_le(vfs_bytes_len); - for file in &vfs.files { - builder.append(file); - } - } - // 5. Source maps - { - builder.append_le(source_map_store.len() as u32); - for (specifier, source_map) in source_map_store.iter() { - builder.append_le(specifier.len() as u32); - builder.append(specifier); - builder.append_le(source_map.len() as u32); - builder.append(source_map); - } - } - - // write the magic bytes at the end so we can use it - // to make sure we've deserialized correctly - builder.append(MAGIC_BYTES); - })?; - - Ok(bytes) -} - -#[derive(Default)] -pub struct RemoteModulesStoreBuilder { - specifiers: Vec<(String, u64)>, - data: Vec<(MediaType, Vec, Option>)>, - data_byte_len: u64, - redirects: Vec<(String, String)>, - redirects_len: u64, -} - -impl RemoteModulesStoreBuilder { - pub fn add( - &mut self, - specifier: &Url, - media_type: MediaType, - data: Vec, - maybe_transpiled: Option>, - ) { - log::debug!("Adding '{}' ({})", specifier, media_type); - let specifier = specifier.to_string(); - self.specifiers.push((specifier, self.data_byte_len)); - let maybe_transpiled_len = match &maybe_transpiled { - // data length (4 bytes), data - Some(data) => 4 + data.len() as u64, - None => 0, - }; - // media type (1 byte), data length (4 bytes), data, has transpiled (1 byte), transpiled length - self.data_byte_len += 1 + 4 + data.len() as u64 + 1 + maybe_transpiled_len; - self.data.push((media_type, data, maybe_transpiled)); - } - - pub fn add_redirects(&mut self, redirects: &BTreeMap) { - self.redirects.reserve(redirects.len()); - for (from, to) in redirects { - log::debug!("Adding redirect '{}' -> '{}'", from, to); - let from = from.to_string(); - let to = to.to_string(); - self.redirects_len += (4 + from.len() + 4 + to.len()) as u64; - self.redirects.push((from, to)); - } - } - - fn write<'a, TBytes: capacity_builder::BytesType>( - &'a self, - builder: &mut capacity_builder::BytesBuilder<'a, TBytes>, - ) { - builder.append_le(self.specifiers.len() as u32); - builder.append_le(self.redirects.len() as u32); - for (specifier, offset) in &self.specifiers { - builder.append_le(specifier.len() as u32); - builder.append(specifier); - builder.append_le(*offset); - } - for (from, to) in &self.redirects { - builder.append_le(from.len() as u32); - builder.append(from); - builder.append_le(to.len() as u32); - builder.append(to); - } - builder.append_le( - self - .data - .iter() - .map(|(_, data, maybe_transpiled)| { - 1 + 4 - + (data.len() as u64) - + 1 - + match maybe_transpiled { - Some(transpiled) => 4 + (transpiled.len() as u64), - None => 0, - } - }) - .sum::(), - ); - for (media_type, data, maybe_transpiled) in &self.data { - builder.append(serialize_media_type(*media_type)); - builder.append_le(data.len() as u32); - builder.append(data); - if let Some(transpiled) = maybe_transpiled { - builder.append(1); - builder.append_le(transpiled.len() as u32); - builder.append(transpiled); - } else { - builder.append(0); - } - } - } -} - -fn serialize_npm_snapshot( - mut snapshot: SerializedNpmResolutionSnapshot, -) -> Vec { - fn append_string(bytes: &mut Vec, string: &str) { - let len = string.len() as u32; - bytes.extend_from_slice(&len.to_le_bytes()); - bytes.extend_from_slice(string.as_bytes()); - } - - snapshot.packages.sort_by(|a, b| a.id.cmp(&b.id)); // determinism - let ids_to_stored_ids = snapshot - .packages - .iter() - .enumerate() - .map(|(i, pkg)| (&pkg.id, i as u32)) - .collect::>(); - - let mut root_packages: Vec<_> = snapshot.root_packages.iter().collect(); - root_packages.sort(); - let mut bytes = Vec::new(); - - bytes.extend_from_slice(&(snapshot.packages.len() as u32).to_le_bytes()); - for pkg in &snapshot.packages { - append_string(&mut bytes, &pkg.id.as_serialized()); - } - - bytes.extend_from_slice(&(root_packages.len() as u32).to_le_bytes()); - for (req, id) in root_packages { - append_string(&mut bytes, &req.to_string()); - let id = ids_to_stored_ids.get(&id).unwrap(); - bytes.extend_from_slice(&id.to_le_bytes()); - } - - for pkg in &snapshot.packages { - let deps_len = pkg.dependencies.len() as u32; - bytes.extend_from_slice(&deps_len.to_le_bytes()); - let mut deps: Vec<_> = pkg.dependencies.iter().collect(); - deps.sort(); - for (req, id) in deps { - append_string(&mut bytes, req); - let id = ids_to_stored_ids.get(&id).unwrap(); - bytes.extend_from_slice(&id.to_le_bytes()); - } - } - - bytes -} - -fn serialize_media_type(media_type: MediaType) -> u8 { - match media_type { - MediaType::JavaScript => 0, - MediaType::Jsx => 1, - MediaType::Mjs => 2, - MediaType::Cjs => 3, - MediaType::TypeScript => 4, - MediaType::Mts => 5, - MediaType::Cts => 6, - MediaType::Dts => 7, - MediaType::Dmts => 8, - MediaType::Dcts => 9, - MediaType::Tsx => 10, - MediaType::Json => 11, - MediaType::Wasm => 12, - MediaType::Css => 13, - MediaType::SourceMap => 14, - MediaType::Unknown => 15, - } -} diff --git a/cli/standalone/virtual_fs.rs b/cli/standalone/virtual_fs.rs index 4c52022804..fa79b784dc 100644 --- a/cli/standalone/virtual_fs.rs +++ b/cli/standalone/virtual_fs.rs @@ -204,8 +204,13 @@ fn vfs_as_display_tree( let mut size = Size::default(); add_offset_to_size(file.offset, &mut size, seen_offsets); - if file.module_graph_offset.offset != file.offset.offset { - add_offset_to_size(file.module_graph_offset, &mut size, seen_offsets); + let maybe_offsets = [ + file.transpiled_offset, + file.source_map_offset, + file.cjs_export_analysis_offset, + ]; + for offset in maybe_offsets.into_iter().flatten() { + add_offset_to_size(offset, &mut size, seen_offsets); } size } diff --git a/resolvers/deno/cjs.rs b/resolvers/deno/cjs.rs index 4358b0ced2..f3347fc9a0 100644 --- a/resolvers/deno/cjs.rs +++ b/resolvers/deno/cjs.rs @@ -50,6 +50,26 @@ impl self.treat_as_cjs_with_is_script(specifier, media_type, None) } + /// Mark a file as being known CJS or ESM. + pub fn set_is_known_script(&self, specifier: &Url, is_script: bool) { + let new_value = if is_script { + ResolutionMode::Require + } else { + ResolutionMode::Import + }; + // block to really ensure dashmap is not borrowed while trying to insert + { + if let Some(value) = self.known.get(specifier) { + // you shouldn't be insert a value in here that's + // already known and is a different value than what + // was previously determined + debug_assert_eq!(*value, new_value); + return; + } + } + self.known.insert(specifier.clone(), new_value); + } + /// Gets whether the file is CJS. If true, this is for sure /// cjs because `is_script` is provided. /// @@ -233,7 +253,7 @@ impl } } else if is_script == Some(false) { // we know this is esm - known_cache.insert(specifier.clone(), ResolutionMode::Import); + known_cache.insert(specifier.clone(), ResolutionMode::Import); Some(ResolutionMode::Import) } else { None diff --git a/resolvers/node/Cargo.toml b/resolvers/node/Cargo.toml index 1f6ee39212..e44303a8eb 100644 --- a/resolvers/node/Cargo.toml +++ b/resolvers/node/Cargo.toml @@ -28,6 +28,7 @@ lazy-regex.workspace = true once_cell.workspace = true path-clean = "=0.1.0" regex.workspace = true +serde.workspace = true serde_json.workspace = true sys_traits.workspace = true thiserror.workspace = true diff --git a/resolvers/node/analyze.rs b/resolvers/node/analyze.rs index 7f5aeb0bf3..b5fc224037 100644 --- a/resolvers/node/analyze.rs +++ b/resolvers/node/analyze.rs @@ -14,6 +14,8 @@ use futures::stream::FuturesUnordered; use futures::FutureExt; use futures::StreamExt; use once_cell::sync::Lazy; +use serde::Deserialize; +use serde::Serialize; use sys_traits::FsCanonicalize; use sys_traits::FsMetadata; use sys_traits::FsRead; @@ -36,7 +38,7 @@ pub enum CjsAnalysis<'a> { Cjs(CjsAnalysisExports), } -#[derive(Debug, Clone)] +#[derive(Debug, Clone, Serialize, Deserialize)] pub struct CjsAnalysisExports { pub exports: Vec, pub reexports: Vec, diff --git a/runtime/Cargo.toml b/runtime/Cargo.toml index b87d4cfbdf..83ed3da495 100644 --- a/runtime/Cargo.toml +++ b/runtime/Cargo.toml @@ -27,6 +27,7 @@ hmr = ["include_js_files_for_snapshotting"] # conditionally exclude the runtime source transpilation logic, and add an # assertion that a snapshot is provided. only_snapshotted_js_sources = ["include_js_files_for_snapshotting"] +snapshotting = ["deno_ast"] [lints.rust] unexpected_cfgs = { level = "warn", check-cfg = ['cfg(tokio_unstable)'] } @@ -39,41 +40,8 @@ path = "lib.rs" name = "extension" path = "examples/extension/main.rs" -[build-dependencies] -deno_ast.workspace = true -deno_broadcast_channel.workspace = true -deno_cache.workspace = true -deno_canvas.workspace = true -deno_console.workspace = true -deno_core.workspace = true -deno_cron.workspace = true -deno_crypto.workspace = true -deno_fetch.workspace = true -deno_os.workspace = true -deno_ffi.workspace = true -deno_fs = { workspace = true, features = ["sync_fs"] } -deno_http.workspace = true -deno_io.workspace = true -deno_net.workspace = true -deno_node.workspace = true -deno_kv.workspace = true -deno_tls.workspace = true -deno_url.workspace = true -deno_web.workspace = true -deno_process.workspace = true -deno_webgpu.workspace = true -deno_webidl.workspace = true -deno_websocket.workspace = true -deno_webstorage.workspace = true -deno_napi.workspace = true -flate2 = { workspace = true, features = ["default"] } -serde.workspace = true - -[target.'cfg(windows)'.build-dependencies] -winapi.workspace = true - [dependencies] -deno_ast.workspace = true +deno_ast = { workspace = true, optional = true } deno_broadcast_channel.workspace = true deno_cache.workspace = true deno_canvas.workspace = true diff --git a/runtime/lib.rs b/runtime/lib.rs index c83fe5d60b..a2f3e54353 100644 --- a/runtime/lib.rs +++ b/runtime/lib.rs @@ -36,6 +36,7 @@ pub mod inspector_server; pub mod js; pub mod ops; pub mod permissions; +#[cfg(feature = "snapshotting")] pub mod snapshot; pub mod tokio_util; pub mod web_worker; diff --git a/runtime/shared.rs b/runtime/shared.rs index ecf2088fe1..f712cef65b 100644 --- a/runtime/shared.rs +++ b/runtime/shared.rs @@ -1,17 +1,8 @@ // Copyright 2018-2025 the Deno authors. MIT license. // Utilities shared between `build.rs` and the rest of the crate. -use std::path::Path; - -use deno_ast::MediaType; -use deno_ast::ParseParams; -use deno_ast::SourceMapOption; use deno_core::extension; use deno_core::Extension; -use deno_core::ModuleCodeString; -use deno_core::ModuleName; -use deno_core::SourceMapData; -use deno_error::JsErrorBox; extension!(runtime, deps = [ @@ -60,70 +51,3 @@ extension!(runtime, } } ); - -deno_error::js_error_wrapper!( - deno_ast::ParseDiagnostic, - JsParseDiagnostic, - "Error" -); -deno_error::js_error_wrapper!( - deno_ast::TranspileError, - JsTranspileError, - "Error" -); - -pub fn maybe_transpile_source( - name: ModuleName, - source: ModuleCodeString, -) -> Result<(ModuleCodeString, Option), JsErrorBox> { - // Always transpile `node:` built-in modules, since they might be TypeScript. - let media_type = if name.starts_with("node:") { - MediaType::TypeScript - } else { - MediaType::from_path(Path::new(&name)) - }; - - match media_type { - MediaType::TypeScript => {} - MediaType::JavaScript => return Ok((source, None)), - MediaType::Mjs => return Ok((source, None)), - _ => panic!( - "Unsupported media type for snapshotting {media_type:?} for file {}", - name - ), - } - - let parsed = deno_ast::parse_module(ParseParams { - specifier: deno_core::url::Url::parse(&name).unwrap(), - text: source.into(), - media_type, - capture_tokens: false, - scope_analysis: false, - maybe_syntax: None, - }) - .map_err(|e| JsErrorBox::from_err(JsParseDiagnostic(e)))?; - let transpiled_source = parsed - .transpile( - &deno_ast::TranspileOptions { - imports_not_used_as_values: deno_ast::ImportsNotUsedAsValues::Remove, - ..Default::default() - }, - &deno_ast::TranspileModuleOptions::default(), - &deno_ast::EmitOptions { - source_map: if cfg!(debug_assertions) { - SourceMapOption::Separate - } else { - SourceMapOption::None - }, - ..Default::default() - }, - ) - .map_err(|e| JsErrorBox::from_err(JsTranspileError(e)))? - .into_source(); - - let maybe_source_map: Option = transpiled_source - .source_map - .map(|sm| sm.into_bytes().into()); - let source_text = transpiled_source.text; - Ok((source_text.into(), maybe_source_map)) -} diff --git a/runtime/snapshot.rs b/runtime/snapshot.rs index eec8579e59..a2f0322763 100644 --- a/runtime/snapshot.rs +++ b/runtime/snapshot.rs @@ -7,10 +7,17 @@ use std::path::PathBuf; use std::rc::Rc; use std::sync::Arc; +use deno_ast::MediaType; +use deno_ast::ParseParams; +use deno_ast::SourceMapOption; use deno_cache::SqliteBackedCache; use deno_core::snapshot::*; use deno_core::v8; use deno_core::Extension; +use deno_core::ModuleCodeString; +use deno_core::ModuleName; +use deno_core::SourceMapData; +use deno_error::JsErrorBox; use deno_http::DefaultHttpPropertyExtractor; use deno_io::fs::FsError; use deno_permissions::PermissionCheckError; @@ -19,7 +26,6 @@ use deno_resolver::npm::NpmResolver; use crate::ops; use crate::ops::bootstrap::SnapshotOptions; -use crate::shared::maybe_transpile_source; use crate::shared::runtime; #[derive(Clone)] @@ -370,3 +376,70 @@ pub fn create_runtime_snapshot( println!("cargo:rerun-if-changed={}", path.display()); } } + +deno_error::js_error_wrapper!( + deno_ast::ParseDiagnostic, + JsParseDiagnostic, + "Error" +); +deno_error::js_error_wrapper!( + deno_ast::TranspileError, + JsTranspileError, + "Error" +); + +pub fn maybe_transpile_source( + name: ModuleName, + source: ModuleCodeString, +) -> Result<(ModuleCodeString, Option), JsErrorBox> { + // Always transpile `node:` built-in modules, since they might be TypeScript. + let media_type = if name.starts_with("node:") { + MediaType::TypeScript + } else { + MediaType::from_path(Path::new(&name)) + }; + + match media_type { + MediaType::TypeScript => {} + MediaType::JavaScript => return Ok((source, None)), + MediaType::Mjs => return Ok((source, None)), + _ => panic!( + "Unsupported media type for snapshotting {media_type:?} for file {}", + name + ), + } + + let parsed = deno_ast::parse_module(ParseParams { + specifier: deno_core::url::Url::parse(&name).unwrap(), + text: source.into(), + media_type, + capture_tokens: false, + scope_analysis: false, + maybe_syntax: None, + }) + .map_err(|e| JsErrorBox::from_err(JsParseDiagnostic(e)))?; + let transpiled_source = parsed + .transpile( + &deno_ast::TranspileOptions { + imports_not_used_as_values: deno_ast::ImportsNotUsedAsValues::Remove, + ..Default::default() + }, + &deno_ast::TranspileModuleOptions::default(), + &deno_ast::EmitOptions { + source_map: if cfg!(debug_assertions) { + SourceMapOption::Separate + } else { + SourceMapOption::None + }, + ..Default::default() + }, + ) + .map_err(|e| JsErrorBox::from_err(JsTranspileError(e)))? + .into_source(); + + let maybe_source_map: Option = transpiled_source + .source_map + .map(|sm| sm.into_bytes().into()); + let source_text = transpiled_source.text; + Ok((source_text.into(), maybe_source_map)) +} diff --git a/runtime/web_worker.rs b/runtime/web_worker.rs index bb769c46a9..d7cf85bab9 100644 --- a/runtime/web_worker.rs +++ b/runtime/web_worker.rs @@ -60,7 +60,6 @@ use node_resolver::NpmPackageFolderResolver; use crate::inspector_server::InspectorServer; use crate::ops; -use crate::shared::maybe_transpile_source; use crate::shared::runtime; use crate::tokio_util::create_and_run_current_thread; use crate::worker::create_op_metrics; @@ -595,9 +594,7 @@ impl WebWorker { shared_array_buffer_store: services.shared_array_buffer_store, compiled_wasm_module_store: services.compiled_wasm_module_store, extensions, - extension_transpiler: Some(Rc::new(|specifier, source| { - maybe_transpile_source(specifier, source) - })), + extension_transpiler: None, inspector: true, feature_checker: Some(services.feature_checker), op_metrics_factory_fn, diff --git a/runtime/worker.rs b/runtime/worker.rs index 72eb54ec47..270c8b5392 100644 --- a/runtime/worker.rs +++ b/runtime/worker.rs @@ -52,7 +52,6 @@ use crate::code_cache::CodeCache; use crate::code_cache::CodeCacheType; use crate::inspector_server::InspectorServer; use crate::ops; -use crate::shared::maybe_transpile_source; use crate::shared::runtime; use crate::BootstrapOptions; @@ -502,9 +501,7 @@ impl MainWorker { shared_array_buffer_store: services.shared_array_buffer_store.clone(), compiled_wasm_module_store: services.compiled_wasm_module_store.clone(), extensions, - extension_transpiler: Some(Rc::new(|specifier, source| { - maybe_transpile_source(specifier, source) - })), + extension_transpiler: None, inspector: true, is_main: true, feature_checker: Some(services.feature_checker.clone()),