From c1dcf1b618e87f1a50869fffe38b473a77377fe8 Mon Sep 17 00:00:00 2001 From: David Sherret Date: Mon, 2 Dec 2024 21:10:16 -0500 Subject: [PATCH] refactor: add deno_npm_cache crate (#27200) Extracting out more code from the CLI for reuse elsewhere (still more work to do, but this is a start). This is the code for extracting npm tarballs and saving information in the npm cache in the global deno_dir. --- Cargo.lock | 35 ++- Cargo.toml | 3 + cli/Cargo.toml | 1 + cli/args/mod.rs | 28 ++- cli/npm/managed/mod.rs | 73 +++--- cli/npm/managed/registry.rs | 23 +- cli/npm/managed/resolvers/common.rs | 4 +- cli/npm/managed/resolvers/global.rs | 12 +- cli/npm/managed/resolvers/local.rs | 16 +- cli/npm/managed/resolvers/mod.rs | 8 +- cli/npm/mod.rs | 111 ++++++++- cli/util/path.rs | 13 -- cli/util/sync/mod.rs | 2 - cli/util/sync/value_creator.rs | 213 ------------------ resolvers/npm_cache/Cargo.toml | 42 ++++ resolvers/npm_cache/README.md | 6 + .../mod.rs => resolvers/npm_cache/lib.rs | 130 ++++++++--- .../npm_cache}/registry_info.rs | 71 +++--- .../npm_cache/remote.rs | 6 +- .../cache => resolvers/npm_cache}/tarball.rs | 76 +++---- .../npm_cache}/tarball_extract.rs | 45 ++-- resolvers/npm_cache/todo.md | 9 + 22 files changed, 492 insertions(+), 435 deletions(-) delete mode 100644 cli/util/sync/value_creator.rs create mode 100644 resolvers/npm_cache/Cargo.toml create mode 100644 resolvers/npm_cache/README.md rename cli/npm/managed/cache/mod.rs => resolvers/npm_cache/lib.rs (71%) rename {cli/npm/managed/cache => resolvers/npm_cache}/registry_info.rs (84%) rename cli/npm/common.rs => resolvers/npm_cache/remote.rs (95%) rename {cli/npm/managed/cache => resolvers/npm_cache}/tarball.rs (77%) rename {cli/npm/managed/cache => resolvers/npm_cache}/tarball_extract.rs (90%) create mode 100644 resolvers/npm_cache/todo.md diff --git a/Cargo.lock b/Cargo.lock index f9288c9eb5..28548ab84c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1220,6 +1220,7 @@ dependencies = [ "deno_lint", "deno_lockfile", "deno_npm", + "deno_npm_cache", "deno_package_json", "deno_path_util", "deno_resolver", @@ -1998,6 +1999,35 @@ dependencies = [ "url", ] +[[package]] +name = "deno_npm_cache" +version = "0.0.1" +dependencies = [ + "anyhow", + "async-trait", + "base64 0.21.7", + "boxed_error", + "deno_cache_dir", + "deno_core", + "deno_npm", + "deno_semver", + "deno_unsync", + "faster-hex", + "flate2", + "futures", + "http 1.1.0", + "log", + "parking_lot", + "percent-encoding", + "rand", + "ring", + "serde_json", + "tar", + "tempfile", + "thiserror 1.0.64", + "url", +] + [[package]] name = "deno_ops" version = "0.199.0" @@ -2260,10 +2290,11 @@ dependencies = [ [[package]] name = "deno_unsync" -version = "0.4.1" +version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2f36b4ef61a04ce201b925a5dffa90f88437d37fee4836c758470dd15ba7f05e" +checksum = "d774fd83f26b24f0805a6ab8b26834a0d06ceac0db517b769b1e4633c96a2057" dependencies = [ + "futures", "parking_lot", "tokio", ] diff --git a/Cargo.toml b/Cargo.toml index 068046607f..23670beec3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -30,6 +30,7 @@ members = [ "ext/webstorage", "resolvers/deno", "resolvers/node", + "resolvers/npm_cache", "runtime", "runtime/permissions", "tests", @@ -93,6 +94,7 @@ deno_websocket = { version = "0.185.0", path = "./ext/websocket" } deno_webstorage = { version = "0.175.0", path = "./ext/webstorage" } # resolvers +deno_npm_cache = { version = "0.0.1", path = "./resolvers/npm_cache" } deno_resolver = { version = "0.12.0", path = "./resolvers/deno" } node_resolver = { version = "0.19.0", path = "./resolvers/node" } @@ -117,6 +119,7 @@ data-encoding = "2.3.3" data-url = "=0.3.0" deno_cache_dir = "=0.14.0" deno_package_json = { version = "0.2.1", default-features = false } +deno_unsync = "0.4.2" dlopen2 = "0.6.1" ecb = "=0.1.2" elliptic-curve = { version = "0.13.4", features = ["alloc", "arithmetic", "ecdh", "std", "pem", "jwk"] } diff --git a/cli/Cargo.toml b/cli/Cargo.toml index 2012c90dfb..4a343ce747 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -77,6 +77,7 @@ deno_graph = { version = "=0.86.3" } deno_lint = { version = "=0.68.2", features = ["docs"] } deno_lockfile.workspace = true deno_npm.workspace = true +deno_npm_cache.workspace = true deno_package_json.workspace = true deno_path_util.workspace = true deno_resolver.workspace = true diff --git a/cli/args/mod.rs b/cli/args/mod.rs index fb576a8c3e..0b049cf409 100644 --- a/cli/args/mod.rs +++ b/cli/args/mod.rs @@ -27,6 +27,7 @@ use deno_npm::npm_rc::NpmRc; use deno_npm::npm_rc::ResolvedNpmRc; use deno_npm::resolution::ValidSerializedNpmResolutionSnapshot; use deno_npm::NpmSystemInfo; +use deno_npm_cache::NpmCacheSetting; use deno_path_util::normalize_path; use deno_semver::npm::NpmPackageReqReference; use deno_telemetry::OtelConfig; @@ -238,20 +239,25 @@ pub enum CacheSetting { } impl CacheSetting { - pub fn should_use_for_npm_package(&self, package_name: &str) -> bool { + pub fn as_npm_cache_setting(&self) -> NpmCacheSetting { match self { - CacheSetting::ReloadAll => false, - CacheSetting::ReloadSome(list) => { - if list.iter().any(|i| i == "npm:") { - return false; + CacheSetting::Only => NpmCacheSetting::Only, + CacheSetting::ReloadAll => NpmCacheSetting::ReloadAll, + CacheSetting::ReloadSome(values) => { + if values.iter().any(|v| v == "npm:") { + NpmCacheSetting::ReloadAll + } else { + NpmCacheSetting::ReloadSome { + npm_package_names: values + .iter() + .filter_map(|v| v.strip_prefix("npm:")) + .map(|n| n.to_string()) + .collect(), + } } - let specifier = format!("npm:{package_name}"); - if list.contains(&specifier) { - return false; - } - true } - _ => true, + CacheSetting::RespectHeaders => unreachable!(), // not supported + CacheSetting::Use => NpmCacheSetting::Use, } } } diff --git a/cli/npm/managed/mod.rs b/cli/npm/managed/mod.rs index 88094d5141..da39f55e85 100644 --- a/cli/npm/managed/mod.rs +++ b/cli/npm/managed/mod.rs @@ -5,8 +5,6 @@ use std::path::Path; use std::path::PathBuf; use std::sync::Arc; -use cache::RegistryInfoDownloader; -use cache::TarballCache; use deno_ast::ModuleSpecifier; use deno_cache_dir::npm::NpmCacheDir; use deno_core::anyhow::Context; @@ -42,22 +40,23 @@ use crate::args::NpmProcessState; use crate::args::NpmProcessStateKind; use crate::args::PackageJsonDepValueParseWithLocationError; use crate::cache::FastInsecureHasher; -use crate::http_util::HttpClientProvider; use crate::util::fs::canonicalize_path_maybe_not_exists_with_fs; use crate::util::progress_bar::ProgressBar; use crate::util::sync::AtomicFlag; -use self::cache::NpmCache; use self::registry::CliNpmRegistryApi; use self::resolution::NpmResolution; use self::resolvers::create_npm_fs_resolver; use self::resolvers::NpmPackageFsResolver; +use super::CliNpmCache; +use super::CliNpmCacheEnv; +use super::CliNpmRegistryInfoProvider; use super::CliNpmResolver; +use super::CliNpmTarballCache; use super::InnerCliNpmResolverRef; use super::ResolvePkgFolderFromDenoReqError; -pub mod cache; mod registry; mod resolution; mod resolvers; @@ -85,8 +84,9 @@ pub struct CliManagedNpmResolverCreateOptions { pub async fn create_managed_npm_resolver_for_lsp( options: CliManagedNpmResolverCreateOptions, ) -> Arc { - let npm_cache = create_cache(&options); - let npm_api = create_api(&options, npm_cache.clone()); + let cache_env = create_cache_env(&options); + let npm_cache = create_cache(cache_env.clone(), &options); + let npm_api = create_api(npm_cache.clone(), cache_env.clone(), &options); // spawn due to the lsp's `Send` requirement deno_core::unsync::spawn(async move { let snapshot = match resolve_snapshot(&npm_api, options.snapshot).await { @@ -97,8 +97,8 @@ pub async fn create_managed_npm_resolver_for_lsp( } }; create_inner( + cache_env, options.fs, - options.http_client_provider, options.maybe_lockfile, npm_api, npm_cache, @@ -118,12 +118,13 @@ pub async fn create_managed_npm_resolver_for_lsp( pub async fn create_managed_npm_resolver( options: CliManagedNpmResolverCreateOptions, ) -> Result, AnyError> { - let npm_cache = create_cache(&options); - let npm_api = create_api(&options, npm_cache.clone()); + let npm_cache_env = create_cache_env(&options); + let npm_cache = create_cache(npm_cache_env.clone(), &options); + let npm_api = create_api(npm_cache.clone(), npm_cache_env.clone(), &options); let snapshot = resolve_snapshot(&npm_api, options.snapshot).await?; Ok(create_inner( + npm_cache_env, options.fs, - options.http_client_provider, options.maybe_lockfile, npm_api, npm_cache, @@ -139,11 +140,11 @@ pub async fn create_managed_npm_resolver( #[allow(clippy::too_many_arguments)] fn create_inner( + env: Arc, fs: Arc, - http_client_provider: Arc, maybe_lockfile: Option>, npm_api: Arc, - npm_cache: Arc, + npm_cache: Arc, npm_rc: Arc, npm_install_deps_provider: Arc, text_only_progress_bar: crate::util::progress_bar::ProgressBar, @@ -157,12 +158,10 @@ fn create_inner( snapshot, maybe_lockfile.clone(), )); - let tarball_cache = Arc::new(TarballCache::new( + let tarball_cache = Arc::new(CliNpmTarballCache::new( npm_cache.clone(), - fs.clone(), - http_client_provider.clone(), + env, npm_rc.clone(), - text_only_progress_bar.clone(), )); let fs_resolver = create_npm_fs_resolver( fs.clone(), @@ -190,25 +189,39 @@ fn create_inner( )) } -fn create_cache(options: &CliManagedNpmResolverCreateOptions) -> Arc { - Arc::new(NpmCache::new( +fn create_cache_env( + options: &CliManagedNpmResolverCreateOptions, +) -> Arc { + Arc::new(CliNpmCacheEnv::new( + options.fs.clone(), + options.http_client_provider.clone(), + options.text_only_progress_bar.clone(), + )) +} + +fn create_cache( + env: Arc, + options: &CliManagedNpmResolverCreateOptions, +) -> Arc { + Arc::new(CliNpmCache::new( options.npm_cache_dir.clone(), - options.cache_setting.clone(), + options.cache_setting.as_npm_cache_setting(), + env, options.npmrc.clone(), )) } fn create_api( + cache: Arc, + env: Arc, options: &CliManagedNpmResolverCreateOptions, - npm_cache: Arc, ) -> Arc { Arc::new(CliNpmRegistryApi::new( - npm_cache.clone(), - Arc::new(RegistryInfoDownloader::new( - npm_cache, - options.http_client_provider.clone(), + cache.clone(), + Arc::new(CliNpmRegistryInfoProvider::new( + cache, + env, options.npmrc.clone(), - options.text_only_progress_bar.clone(), )), )) } @@ -292,10 +305,10 @@ pub struct ManagedCliNpmResolver { fs_resolver: Arc, maybe_lockfile: Option>, npm_api: Arc, - npm_cache: Arc, + npm_cache: Arc, npm_install_deps_provider: Arc, resolution: Arc, - tarball_cache: Arc, + tarball_cache: Arc, text_only_progress_bar: ProgressBar, npm_system_info: NpmSystemInfo, top_level_install_flag: AtomicFlag, @@ -317,10 +330,10 @@ impl ManagedCliNpmResolver { fs_resolver: Arc, maybe_lockfile: Option>, npm_api: Arc, - npm_cache: Arc, + npm_cache: Arc, npm_install_deps_provider: Arc, resolution: Arc, - tarball_cache: Arc, + tarball_cache: Arc, text_only_progress_bar: ProgressBar, npm_system_info: NpmSystemInfo, lifecycle_scripts: LifecycleScriptsConfig, diff --git a/cli/npm/managed/registry.rs b/cli/npm/managed/registry.rs index 8f15d619b9..b431c77c5d 100644 --- a/cli/npm/managed/registry.rs +++ b/cli/npm/managed/registry.rs @@ -14,27 +14,28 @@ use deno_core::parking_lot::Mutex; use deno_npm::registry::NpmPackageInfo; use deno_npm::registry::NpmRegistryApi; use deno_npm::registry::NpmRegistryPackageInfoLoadError; +use deno_npm_cache::NpmCacheSetting; -use crate::args::CacheSetting; +use crate::npm::CliNpmCache; +use crate::npm::CliNpmRegistryInfoProvider; use crate::util::sync::AtomicFlag; -use super::cache::NpmCache; -use super::cache::RegistryInfoDownloader; - +// todo(#27198): Remove this and move functionality down into +// RegistryInfoProvider, which already does most of this. #[derive(Debug)] pub struct CliNpmRegistryApi(Option>); impl CliNpmRegistryApi { pub fn new( - cache: Arc, - registry_info_downloader: Arc, + cache: Arc, + registry_info_provider: Arc, ) -> Self { Self(Some(Arc::new(CliNpmRegistryApiInner { cache, force_reload_flag: Default::default(), mem_cache: Default::default(), previously_reloaded_packages: Default::default(), - registry_info_downloader, + registry_info_provider, }))) } @@ -83,11 +84,11 @@ enum CacheItem { #[derive(Debug)] struct CliNpmRegistryApiInner { - cache: Arc, + cache: Arc, force_reload_flag: AtomicFlag, mem_cache: Mutex>, previously_reloaded_packages: Mutex>, - registry_info_downloader: Arc, + registry_info_provider: Arc, } impl CliNpmRegistryApiInner { @@ -118,7 +119,7 @@ impl CliNpmRegistryApiInner { return Ok(result); } } - api.registry_info_downloader + api.registry_info_provider .load_package_info(&name) .await .map_err(Arc::new) @@ -159,7 +160,7 @@ impl CliNpmRegistryApiInner { // is disabled or if we're already reloading if matches!( self.cache.cache_setting(), - CacheSetting::Only | CacheSetting::ReloadAll + NpmCacheSetting::Only | NpmCacheSetting::ReloadAll ) { return false; } diff --git a/cli/npm/managed/resolvers/common.rs b/cli/npm/managed/resolvers/common.rs index eee11c7604..332756daa4 100644 --- a/cli/npm/managed/resolvers/common.rs +++ b/cli/npm/managed/resolvers/common.rs @@ -24,7 +24,7 @@ use deno_runtime::deno_fs::FileSystem; use deno_runtime::deno_node::NodePermissions; use node_resolver::errors::PackageFolderResolveError; -use crate::npm::managed::cache::TarballCache; +use crate::npm::CliNpmTarballCache; /// Part of the resolution that interacts with the file system. #[async_trait(?Send)] @@ -140,7 +140,7 @@ impl RegistryReadPermissionChecker { /// Caches all the packages in parallel. pub async fn cache_packages( packages: &[NpmResolutionPackage], - tarball_cache: &Arc, + tarball_cache: &Arc, ) -> Result<(), AnyError> { let mut futures_unordered = futures::stream::FuturesUnordered::new(); for package in packages { diff --git a/cli/npm/managed/resolvers/global.rs b/cli/npm/managed/resolvers/global.rs index f0193e78e9..2b48c3d2fc 100644 --- a/cli/npm/managed/resolvers/global.rs +++ b/cli/npm/managed/resolvers/global.rs @@ -8,6 +8,8 @@ use std::path::PathBuf; use std::sync::Arc; use crate::colors; +use crate::npm::CliNpmCache; +use crate::npm::CliNpmTarballCache; use async_trait::async_trait; use deno_ast::ModuleSpecifier; use deno_core::error::AnyError; @@ -24,8 +26,6 @@ use node_resolver::errors::ReferrerNotFoundError; use crate::args::LifecycleScriptsConfig; use crate::cache::FastInsecureHasher; -use super::super::cache::NpmCache; -use super::super::cache::TarballCache; use super::super::resolution::NpmResolution; use super::common::cache_packages; use super::common::lifecycle_scripts::LifecycleScriptsStrategy; @@ -35,8 +35,8 @@ use super::common::RegistryReadPermissionChecker; /// Resolves packages from the global npm cache. #[derive(Debug)] pub struct GlobalNpmPackageResolver { - cache: Arc, - tarball_cache: Arc, + cache: Arc, + tarball_cache: Arc, resolution: Arc, system_info: NpmSystemInfo, registry_read_permission_checker: RegistryReadPermissionChecker, @@ -45,9 +45,9 @@ pub struct GlobalNpmPackageResolver { impl GlobalNpmPackageResolver { pub fn new( - cache: Arc, + cache: Arc, fs: Arc, - tarball_cache: Arc, + tarball_cache: Arc, resolution: Arc, system_info: NpmSystemInfo, lifecycle_scripts: LifecycleScriptsConfig, diff --git a/cli/npm/managed/resolvers/local.rs b/cli/npm/managed/resolvers/local.rs index ca7867425d..0c279d9e12 100644 --- a/cli/npm/managed/resolvers/local.rs +++ b/cli/npm/managed/resolvers/local.rs @@ -17,6 +17,8 @@ use std::sync::Arc; use crate::args::LifecycleScriptsConfig; use crate::colors; +use crate::npm::CliNpmCache; +use crate::npm::CliNpmTarballCache; use async_trait::async_trait; use deno_ast::ModuleSpecifier; use deno_cache_dir::npm::mixed_case_package_name_decode; @@ -52,8 +54,6 @@ use crate::util::fs::LaxSingleProcessFsFlag; use crate::util::progress_bar::ProgressBar; use crate::util::progress_bar::ProgressMessagePrompt; -use super::super::cache::NpmCache; -use super::super::cache::TarballCache; use super::super::resolution::NpmResolution; use super::common::bin_entries; use super::common::NpmPackageFsResolver; @@ -63,12 +63,12 @@ use super::common::RegistryReadPermissionChecker; /// and resolves packages from it. #[derive(Debug)] pub struct LocalNpmPackageResolver { - cache: Arc, + cache: Arc, fs: Arc, npm_install_deps_provider: Arc, progress_bar: ProgressBar, resolution: Arc, - tarball_cache: Arc, + tarball_cache: Arc, root_node_modules_path: PathBuf, root_node_modules_url: Url, system_info: NpmSystemInfo, @@ -79,12 +79,12 @@ pub struct LocalNpmPackageResolver { impl LocalNpmPackageResolver { #[allow(clippy::too_many_arguments)] pub fn new( - cache: Arc, + cache: Arc, fs: Arc, npm_install_deps_provider: Arc, progress_bar: ProgressBar, resolution: Arc, - tarball_cache: Arc, + tarball_cache: Arc, node_modules_folder: PathBuf, system_info: NpmSystemInfo, lifecycle_scripts: LifecycleScriptsConfig, @@ -284,10 +284,10 @@ fn local_node_modules_package_contents_path( #[allow(clippy::too_many_arguments)] async fn sync_resolution_with_fs( snapshot: &NpmResolutionSnapshot, - cache: &Arc, + cache: &Arc, npm_install_deps_provider: &NpmInstallDepsProvider, progress_bar: &ProgressBar, - tarball_cache: &Arc, + tarball_cache: &Arc, root_node_modules_dir_path: &Path, system_info: &NpmSystemInfo, lifecycle_scripts: &LifecycleScriptsConfig, diff --git a/cli/npm/managed/resolvers/mod.rs b/cli/npm/managed/resolvers/mod.rs index 36d795ee7e..736270749f 100644 --- a/cli/npm/managed/resolvers/mod.rs +++ b/cli/npm/managed/resolvers/mod.rs @@ -12,6 +12,8 @@ use deno_runtime::deno_fs::FileSystem; use crate::args::LifecycleScriptsConfig; use crate::args::NpmInstallDepsProvider; +use crate::npm::CliNpmCache; +use crate::npm::CliNpmTarballCache; use crate::util::progress_bar::ProgressBar; pub use self::common::NpmPackageFsResolver; @@ -19,18 +21,16 @@ pub use self::common::NpmPackageFsResolver; use self::global::GlobalNpmPackageResolver; use self::local::LocalNpmPackageResolver; -use super::cache::NpmCache; -use super::cache::TarballCache; use super::resolution::NpmResolution; #[allow(clippy::too_many_arguments)] pub fn create_npm_fs_resolver( fs: Arc, - npm_cache: Arc, + npm_cache: Arc, npm_install_deps_provider: &Arc, progress_bar: &ProgressBar, resolution: Arc, - tarball_cache: Arc, + tarball_cache: Arc, maybe_node_modules_path: Option, system_info: NpmSystemInfo, lifecycle_scripts: LifecycleScriptsConfig, diff --git a/cli/npm/mod.rs b/cli/npm/mod.rs index 0e955ac5b4..48d90d7dd0 100644 --- a/cli/npm/mod.rs +++ b/cli/npm/mod.rs @@ -1,33 +1,39 @@ // Copyright 2018-2024 the Deno authors. All rights reserved. MIT license. mod byonm; -mod common; mod managed; use std::borrow::Cow; use std::path::Path; use std::sync::Arc; -use common::maybe_auth_header_for_npm_registry; use dashmap::DashMap; use deno_core::error::AnyError; use deno_core::serde_json; +use deno_core::url::Url; use deno_npm::npm_rc::ResolvedNpmRc; use deno_npm::registry::NpmPackageInfo; use deno_resolver::npm::ByonmInNpmPackageChecker; use deno_resolver::npm::ByonmNpmResolver; use deno_resolver::npm::CliNpmReqResolver; use deno_resolver::npm::ResolvePkgFolderFromDenoReqError; +use deno_runtime::deno_fs::FileSystem; use deno_runtime::deno_node::NodePermissions; use deno_runtime::ops::process::NpmProcessStateProvider; use deno_semver::package::PackageNv; use deno_semver::package::PackageReq; -use managed::cache::registry_info::get_package_url; +use http::HeaderName; +use http::HeaderValue; use managed::create_managed_in_npm_pkg_checker; use node_resolver::InNpmPackageChecker; use node_resolver::NpmPackageFolderResolver; use crate::file_fetcher::FileFetcher; +use crate::http_util::HttpClientProvider; +use crate::util::fs::atomic_write_file_with_retries_and_fs; +use crate::util::fs::hard_link_dir_recursive; +use crate::util::fs::AtomicWriteFileFsAdapter; +use crate::util::progress_bar::ProgressBar; pub use self::byonm::CliByonmNpmResolver; pub use self::byonm::CliByonmNpmResolverCreateOptions; @@ -36,6 +42,99 @@ pub use self::managed::CliManagedNpmResolverCreateOptions; pub use self::managed::CliNpmResolverManagedSnapshotOption; pub use self::managed::ManagedCliNpmResolver; +pub type CliNpmTarballCache = deno_npm_cache::TarballCache; +pub type CliNpmCache = deno_npm_cache::NpmCache; +pub type CliNpmRegistryInfoProvider = + deno_npm_cache::RegistryInfoProvider; + +#[derive(Debug)] +pub struct CliNpmCacheEnv { + fs: Arc, + http_client_provider: Arc, + progress_bar: ProgressBar, +} + +impl CliNpmCacheEnv { + pub fn new( + fs: Arc, + http_client_provider: Arc, + progress_bar: ProgressBar, + ) -> Self { + Self { + fs, + http_client_provider, + progress_bar, + } + } +} + +#[async_trait::async_trait(?Send)] +impl deno_npm_cache::NpmCacheEnv for CliNpmCacheEnv { + fn exists(&self, path: &Path) -> bool { + self.fs.exists_sync(path) + } + + fn hard_link_dir_recursive( + &self, + from: &Path, + to: &Path, + ) -> Result<(), AnyError> { + // todo(dsherret): use self.fs here instead + hard_link_dir_recursive(from, to) + } + + fn atomic_write_file_with_retries( + &self, + file_path: &Path, + data: &[u8], + ) -> std::io::Result<()> { + atomic_write_file_with_retries_and_fs( + &AtomicWriteFileFsAdapter { + fs: self.fs.as_ref(), + write_mode: crate::cache::CACHE_PERM, + }, + file_path, + data, + ) + } + + async fn download_with_retries_on_any_tokio_runtime( + &self, + url: Url, + maybe_auth_header: Option<(HeaderName, HeaderValue)>, + ) -> Result>, deno_npm_cache::DownloadError> { + let guard = self.progress_bar.update(url.as_str()); + let client = self.http_client_provider.get_or_create().map_err(|err| { + deno_npm_cache::DownloadError { + status_code: None, + error: err, + } + })?; + client + .download_with_progress_and_retries(url, maybe_auth_header, &guard) + .await + .map_err(|err| { + use crate::http_util::DownloadError::*; + let status_code = match &err { + Fetch { .. } + | UrlParse { .. } + | HttpParse { .. } + | Json { .. } + | ToStr { .. } + | NoRedirectHeader { .. } + | TooManyRedirects => None, + BadResponse(bad_response_error) => { + Some(bad_response_error.status_code) + } + }; + deno_npm_cache::DownloadError { + status_code, + error: err.into(), + } + }) + } +} + pub enum CliNpmResolverCreateOptions { Managed(CliManagedNpmResolverCreateOptions), Byonm(CliByonmNpmResolverCreateOptions), @@ -179,13 +278,15 @@ impl NpmFetchResolver { if let Some(info) = self.info_by_name.get(name) { return info.value().clone(); } + // todo(#27198): use RegistryInfoProvider instead let fetch_package_info = || async { - let info_url = get_package_url(&self.npmrc, name); + let info_url = deno_npm_cache::get_package_url(&self.npmrc, name); let file_fetcher = self.file_fetcher.clone(); let registry_config = self.npmrc.get_registry_config(name); // TODO(bartlomieju): this should error out, not use `.ok()`. let maybe_auth_header = - maybe_auth_header_for_npm_registry(registry_config).ok()?; + deno_npm_cache::maybe_auth_header_for_npm_registry(registry_config) + .ok()?; // spawn due to the lsp's `Send` requirement let file = deno_core::unsync::spawn(async move { file_fetcher diff --git a/cli/util/path.rs b/cli/util/path.rs index 173f357c08..df66b83766 100644 --- a/cli/util/path.rs +++ b/cli/util/path.rs @@ -51,19 +51,6 @@ pub fn get_extension(file_path: &Path) -> Option { .map(|e| e.to_lowercase()); } -pub fn get_atomic_dir_path(file_path: &Path) -> PathBuf { - let rand = gen_rand_path_component(); - let new_file_name = format!( - ".{}_{}", - file_path - .file_name() - .map(|f| f.to_string_lossy()) - .unwrap_or(Cow::Borrowed("")), - rand - ); - file_path.with_file_name(new_file_name) -} - pub fn get_atomic_file_path(file_path: &Path) -> PathBuf { let rand = gen_rand_path_component(); let extension = format!("{rand}.tmp"); diff --git a/cli/util/sync/mod.rs b/cli/util/sync/mod.rs index 3c2ffbd7dd..c3b2a315b0 100644 --- a/cli/util/sync/mod.rs +++ b/cli/util/sync/mod.rs @@ -3,11 +3,9 @@ mod async_flag; mod sync_read_async_write_lock; mod task_queue; -mod value_creator; pub use async_flag::AsyncFlag; pub use deno_core::unsync::sync::AtomicFlag; pub use sync_read_async_write_lock::SyncReadAsyncWriteLock; pub use task_queue::TaskQueue; pub use task_queue::TaskQueuePermit; -pub use value_creator::MultiRuntimeAsyncValueCreator; diff --git a/cli/util/sync/value_creator.rs b/cli/util/sync/value_creator.rs deleted file mode 100644 index 57aabe801a..0000000000 --- a/cli/util/sync/value_creator.rs +++ /dev/null @@ -1,213 +0,0 @@ -// Copyright 2018-2024 the Deno authors. All rights reserved. MIT license. - -use std::sync::Arc; - -use deno_core::futures::future::BoxFuture; -use deno_core::futures::future::LocalBoxFuture; -use deno_core::futures::future::Shared; -use deno_core::futures::FutureExt; -use deno_core::parking_lot::Mutex; -use tokio::task::JoinError; - -type JoinResult = Result>; -type CreateFutureFn = - Box LocalBoxFuture<'static, TResult> + Send + Sync>; - -#[derive(Debug)] -struct State { - retry_index: usize, - future: Option>>>, -} - -/// Attempts to create a shared value asynchronously on one tokio runtime while -/// many runtimes are requesting the value. -/// -/// This is only useful when the value needs to get created once across -/// many runtimes. -/// -/// This handles the case where the tokio runtime creating the value goes down -/// while another one is waiting on the value. -pub struct MultiRuntimeAsyncValueCreator { - create_future: CreateFutureFn, - state: Mutex>, -} - -impl std::fmt::Debug - for MultiRuntimeAsyncValueCreator -{ - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("MultiRuntimeAsyncValueCreator").finish() - } -} - -impl MultiRuntimeAsyncValueCreator { - pub fn new(create_future: CreateFutureFn) -> Self { - Self { - state: Mutex::new(State { - retry_index: 0, - future: None, - }), - create_future, - } - } - - pub async fn get(&self) -> TResult { - let (mut future, mut retry_index) = { - let mut state = self.state.lock(); - let future = match &state.future { - Some(future) => future.clone(), - None => { - let future = self.create_shared_future(); - state.future = Some(future.clone()); - future - } - }; - (future, state.retry_index) - }; - - loop { - let result = future.await; - - match result { - Ok(result) => return result, - Err(join_error) => { - if join_error.is_cancelled() { - let mut state = self.state.lock(); - - if state.retry_index == retry_index { - // we were the first one to retry, so create a new future - // that we'll run from the current runtime - state.retry_index += 1; - state.future = Some(self.create_shared_future()); - } - - retry_index = state.retry_index; - future = state.future.as_ref().unwrap().clone(); - - // just in case we're stuck in a loop - if retry_index > 1000 { - panic!("Something went wrong.") // should never happen - } - } else { - panic!("{}", join_error); - } - } - } - } - } - - fn create_shared_future( - &self, - ) -> Shared>> { - let future = (self.create_future)(); - deno_core::unsync::spawn(future) - .map(|result| result.map_err(Arc::new)) - .boxed() - .shared() - } -} - -#[cfg(test)] -mod test { - use deno_core::unsync::spawn; - - use super::*; - - #[tokio::test] - async fn single_runtime() { - let value_creator = MultiRuntimeAsyncValueCreator::new(Box::new(|| { - async { 1 }.boxed_local() - })); - let value = value_creator.get().await; - assert_eq!(value, 1); - } - - #[test] - fn multi_runtimes() { - let value_creator = - Arc::new(MultiRuntimeAsyncValueCreator::new(Box::new(|| { - async { - tokio::task::yield_now().await; - 1 - } - .boxed_local() - }))); - let handles = (0..3) - .map(|_| { - let value_creator = value_creator.clone(); - std::thread::spawn(|| { - create_runtime().block_on(async move { value_creator.get().await }) - }) - }) - .collect::>(); - for handle in handles { - assert_eq!(handle.join().unwrap(), 1); - } - } - - #[test] - fn multi_runtimes_first_never_finishes() { - let is_first_run = Arc::new(Mutex::new(true)); - let (tx, rx) = std::sync::mpsc::channel::<()>(); - let value_creator = Arc::new(MultiRuntimeAsyncValueCreator::new({ - let is_first_run = is_first_run.clone(); - Box::new(move || { - let is_first_run = is_first_run.clone(); - let tx = tx.clone(); - async move { - let is_first_run = { - let mut is_first_run = is_first_run.lock(); - let initial_value = *is_first_run; - *is_first_run = false; - tx.send(()).unwrap(); - initial_value - }; - if is_first_run { - tokio::time::sleep(std::time::Duration::from_millis(30_000)).await; - panic!("TIMED OUT"); // should not happen - } else { - tokio::task::yield_now().await; - } - 1 - } - .boxed_local() - }) - })); - std::thread::spawn({ - let value_creator = value_creator.clone(); - let is_first_run = is_first_run.clone(); - move || { - create_runtime().block_on(async { - let value_creator = value_creator.clone(); - // spawn a task that will never complete - spawn(async move { value_creator.get().await }); - // wait for the task to set is_first_run to false - while *is_first_run.lock() { - tokio::time::sleep(std::time::Duration::from_millis(20)).await; - } - // now exit the runtime while the value_creator is still pending - }) - } - }); - let handle = { - let value_creator = value_creator.clone(); - std::thread::spawn(|| { - create_runtime().block_on(async move { - let value_creator = value_creator.clone(); - rx.recv().unwrap(); - // even though the other runtime shutdown, this get() should - // recover and still get the value - value_creator.get().await - }) - }) - }; - assert_eq!(handle.join().unwrap(), 1); - } - - fn create_runtime() -> tokio::runtime::Runtime { - tokio::runtime::Builder::new_current_thread() - .enable_all() - .build() - .unwrap() - } -} diff --git a/resolvers/npm_cache/Cargo.toml b/resolvers/npm_cache/Cargo.toml new file mode 100644 index 0000000000..df01f62131 --- /dev/null +++ b/resolvers/npm_cache/Cargo.toml @@ -0,0 +1,42 @@ +# Copyright 2018-2024 the Deno authors. All rights reserved. MIT license. + +[package] +name = "deno_npm_cache" +version = "0.0.1" +authors.workspace = true +edition.workspace = true +license.workspace = true +readme = "README.md" +repository.workspace = true +description = "Helpers for downloading and caching npm dependencies for Deno" + +[lib] +path = "lib.rs" + +[dependencies] +# todo(dsherret): remove this dependency +anyhow.workspace = true +# todo(dsherret): remove this dependency +deno_core.workspace = true + +async-trait.workspace = true +base64.workspace = true +boxed_error.workspace = true +deno_cache_dir.workspace = true +deno_npm.workspace = true +deno_semver.workspace = true +deno_unsync = { workspace = true, features = ["tokio"] } +faster-hex.workspace = true +flate2 = { workspace = true, features = ["zlib-ng-compat"] } +futures.workspace = true +http.workspace = true +log.workspace = true +parking_lot.workspace = true +percent-encoding.workspace = true +rand.workspace = true +ring.workspace = true +serde_json.workspace = true +tar.workspace = true +tempfile = "3.4.0" +thiserror.workspace = true +url.workspace = true diff --git a/resolvers/npm_cache/README.md b/resolvers/npm_cache/README.md new file mode 100644 index 0000000000..a7edbb4159 --- /dev/null +++ b/resolvers/npm_cache/README.md @@ -0,0 +1,6 @@ +# deno_npm_cache + +[![crates](https://img.shields.io/crates/v/deno_npm_cache.svg)](https://crates.io/crates/deno_npm_cache) +[![docs](https://docs.rs/deno_npm_cache/badge.svg)](https://docs.rs/deno_npm_cache) + +Helpers for downloading and caching npm dependencies for Deno. diff --git a/cli/npm/managed/cache/mod.rs b/resolvers/npm_cache/lib.rs similarity index 71% rename from cli/npm/managed/cache/mod.rs rename to resolvers/npm_cache/lib.rs index 8ae99f41e0..4e8966a4e1 100644 --- a/cli/npm/managed/cache/mod.rs +++ b/resolvers/npm_cache/lib.rs @@ -1,63 +1,133 @@ // Copyright 2018-2024 the Deno authors. All rights reserved. MIT license. use std::collections::HashSet; -use std::fs; use std::io::ErrorKind; use std::path::Path; use std::path::PathBuf; use std::sync::Arc; -use deno_ast::ModuleSpecifier; +use anyhow::bail; +use anyhow::Context; +use anyhow::Error as AnyError; use deno_cache_dir::npm::NpmCacheDir; -use deno_core::anyhow::bail; -use deno_core::anyhow::Context; -use deno_core::error::AnyError; -use deno_core::parking_lot::Mutex; -use deno_core::serde_json; -use deno_core::url::Url; use deno_npm::npm_rc::ResolvedNpmRc; use deno_npm::registry::NpmPackageInfo; use deno_npm::NpmPackageCacheFolderId; use deno_semver::package::PackageNv; use deno_semver::Version; +use http::HeaderName; +use http::HeaderValue; +use http::StatusCode; +use parking_lot::Mutex; +use url::Url; -use crate::args::CacheSetting; -use crate::cache::CACHE_PERM; -use crate::util::fs::atomic_write_file_with_retries; -use crate::util::fs::hard_link_dir_recursive; - -pub mod registry_info; +mod registry_info; +mod remote; mod tarball; mod tarball_extract; -pub use registry_info::RegistryInfoDownloader; +pub use registry_info::RegistryInfoProvider; pub use tarball::TarballCache; +// todo(#27198): make both of these private and get the rest of the code +// using RegistryInfoProvider. +pub use registry_info::get_package_url; +pub use remote::maybe_auth_header_for_npm_registry; + +#[derive(Debug)] +pub struct DownloadError { + pub status_code: Option, + pub error: AnyError, +} + +impl std::error::Error for DownloadError { + fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { + Some(self.error.as_ref()) + } +} + +impl std::fmt::Display for DownloadError { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + self.error.fmt(f) + } +} + +#[async_trait::async_trait(?Send)] +pub trait NpmCacheEnv: Send + Sync + 'static { + fn exists(&self, path: &Path) -> bool; + fn hard_link_dir_recursive( + &self, + from: &Path, + to: &Path, + ) -> Result<(), AnyError>; + fn atomic_write_file_with_retries( + &self, + file_path: &Path, + data: &[u8], + ) -> std::io::Result<()>; + async fn download_with_retries_on_any_tokio_runtime( + &self, + url: Url, + maybe_auth_header: Option<(HeaderName, HeaderValue)>, + ) -> Result>, DownloadError>; +} + +/// Indicates how cached source files should be handled. +#[derive(Debug, Clone, Eq, PartialEq)] +pub enum NpmCacheSetting { + /// Only the cached files should be used. Any files not in the cache will + /// error. This is the equivalent of `--cached-only` in the CLI. + Only, + /// No cached source files should be used, and all files should be reloaded. + /// This is the equivalent of `--reload` in the CLI. + ReloadAll, + /// Only some cached resources should be used. This is the equivalent of + /// `--reload=npm:chalk` + ReloadSome { npm_package_names: Vec }, + /// The cached source files should be used for local modules. This is the + /// default behavior of the CLI. + Use, +} + +impl NpmCacheSetting { + pub fn should_use_for_npm_package(&self, package_name: &str) -> bool { + match self { + NpmCacheSetting::ReloadAll => false, + NpmCacheSetting::ReloadSome { npm_package_names } => { + !npm_package_names.iter().any(|n| n == package_name) + } + _ => true, + } + } +} + /// Stores a single copy of npm packages in a cache. #[derive(Debug)] -pub struct NpmCache { +pub struct NpmCache { + env: Arc, cache_dir: Arc, - cache_setting: CacheSetting, + cache_setting: NpmCacheSetting, npmrc: Arc, - /// ensures a package is only downloaded once per run previously_reloaded_packages: Mutex>, } -impl NpmCache { +impl NpmCache { pub fn new( cache_dir: Arc, - cache_setting: CacheSetting, + cache_setting: NpmCacheSetting, + env: Arc, npmrc: Arc, ) -> Self { Self { cache_dir, cache_setting, + env, previously_reloaded_packages: Default::default(), npmrc, } } - pub fn cache_setting(&self) -> &CacheSetting { + pub fn cache_setting(&self) -> &NpmCacheSetting { &self.cache_setting } @@ -118,7 +188,9 @@ impl NpmCache { // it seems Windows does an "AccessDenied" error when moving a // directory with hard links, so that's why this solution is done with_folder_sync_lock(&folder_id.nv, &package_folder, || { - hard_link_dir_recursive(&original_package_folder, &package_folder) + self + .env + .hard_link_dir_recursive(&original_package_folder, &package_folder) })?; Ok(()) } @@ -158,7 +230,7 @@ impl NpmCache { pub fn resolve_package_folder_id_from_specifier( &self, - specifier: &ModuleSpecifier, + specifier: &Url, ) -> Option { self .cache_dir @@ -180,7 +252,7 @@ impl NpmCache { ) -> Result, AnyError> { let file_cache_path = self.get_registry_package_info_file_cache_path(name); - let file_text = match fs::read_to_string(file_cache_path) { + let file_text = match std::fs::read_to_string(file_cache_path) { Ok(file_text) => file_text, Err(err) if err.kind() == ErrorKind::NotFound => return Ok(None), Err(err) => return Err(err.into()), @@ -195,7 +267,9 @@ impl NpmCache { ) -> Result<(), AnyError> { let file_cache_path = self.get_registry_package_info_file_cache_path(name); let file_text = serde_json::to_string(&package_info)?; - atomic_write_file_with_retries(&file_cache_path, file_text, CACHE_PERM)?; + self + .env + .atomic_write_file_with_retries(&file_cache_path, file_text.as_bytes())?; Ok(()) } @@ -216,7 +290,7 @@ fn with_folder_sync_lock( output_folder: &Path, action: impl FnOnce() -> Result<(), AnyError>, ) -> Result<(), AnyError> { - fs::create_dir_all(output_folder).with_context(|| { + std::fs::create_dir_all(output_folder).with_context(|| { format!("Error creating '{}'.", output_folder.display()) })?; @@ -229,7 +303,7 @@ fn with_folder_sync_lock( // then wait until the other process finishes with a timeout), but // for now this is good enough. let sync_lock_path = output_folder.join(NPM_PACKAGE_SYNC_LOCK_FILENAME); - match fs::OpenOptions::new() + match std::fs::OpenOptions::new() .write(true) .create(true) .truncate(false) @@ -257,7 +331,7 @@ fn with_folder_sync_lock( match inner(output_folder, action) { Ok(()) => Ok(()), Err(err) => { - if let Err(remove_err) = fs::remove_dir_all(output_folder) { + if let Err(remove_err) = std::fs::remove_dir_all(output_folder) { if remove_err.kind() != std::io::ErrorKind::NotFound { bail!( concat!( diff --git a/cli/npm/managed/cache/registry_info.rs b/resolvers/npm_cache/registry_info.rs similarity index 84% rename from cli/npm/managed/cache/registry_info.rs rename to resolvers/npm_cache/registry_info.rs index 6d39d3c13f..7ab50f0495 100644 --- a/cli/npm/managed/cache/registry_info.rs +++ b/resolvers/npm_cache/registry_info.rs @@ -3,28 +3,22 @@ use std::collections::HashMap; use std::sync::Arc; -use deno_core::anyhow::anyhow; -use deno_core::anyhow::bail; -use deno_core::anyhow::Context; -use deno_core::error::custom_error; -use deno_core::error::AnyError; -use deno_core::futures::future::LocalBoxFuture; -use deno_core::futures::FutureExt; -use deno_core::parking_lot::Mutex; -use deno_core::serde_json; -use deno_core::url::Url; +use anyhow::anyhow; +use anyhow::bail; +use anyhow::Context; +use anyhow::Error as AnyError; use deno_npm::npm_rc::ResolvedNpmRc; use deno_npm::registry::NpmPackageInfo; +use deno_unsync::sync::MultiRuntimeAsyncValueCreator; +use futures::future::LocalBoxFuture; +use futures::FutureExt; +use parking_lot::Mutex; +use url::Url; -use crate::args::CacheSetting; -use crate::http_util::HttpClientProvider; -use crate::npm::common::maybe_auth_header_for_npm_registry; -use crate::util::progress_bar::ProgressBar; -use crate::util::sync::MultiRuntimeAsyncValueCreator; - -use super::NpmCache; - -// todo(dsherret): create seams and unit test this +use crate::remote::maybe_auth_header_for_npm_registry; +use crate::NpmCache; +use crate::NpmCacheEnv; +use crate::NpmCacheSetting; type LoadResult = Result>; type LoadFuture = LocalBoxFuture<'static, LoadResult>; @@ -49,30 +43,31 @@ enum MemoryCacheItem { MemoryCached(Result>, Arc>), } +// todo(#27198): refactor to store this only in the http cache and also +// consolidate with CliNpmRegistryApi. + /// Downloads packuments from the npm registry. /// /// This is shared amongst all the workers. #[derive(Debug)] -pub struct RegistryInfoDownloader { - cache: Arc, - http_client_provider: Arc, +pub struct RegistryInfoProvider { + // todo(#27198): remove this + cache: Arc>, + env: Arc, npmrc: Arc, - progress_bar: ProgressBar, memory_cache: Mutex>, } -impl RegistryInfoDownloader { +impl RegistryInfoProvider { pub fn new( - cache: Arc, - http_client_provider: Arc, + cache: Arc>, + env: Arc, npmrc: Arc, - progress_bar: ProgressBar, ) -> Self { Self { cache, - http_client_provider, + env, npmrc, - progress_bar, memory_cache: Default::default(), } } @@ -94,8 +89,8 @@ impl RegistryInfoDownloader { self: &Arc, name: &str, ) -> Result>, AnyError> { - if *self.cache.cache_setting() == CacheSetting::Only { - return Err(custom_error( + if *self.cache.cache_setting() == NpmCacheSetting::Only { + return Err(deno_core::error::custom_error( "NotCached", format!( "An npm specifier not found in cache: \"{name}\", --cached-only is specified." @@ -167,7 +162,7 @@ impl RegistryInfoDownloader { ) -> Result { // this scenario failing should be exceptionally rare so let's // deal with improving it only when anyone runs into an issue - let maybe_package_info = deno_core::unsync::spawn_blocking({ + let maybe_package_info = deno_unsync::spawn_blocking({ let cache = self.cache.clone(); let name = name.to_string(); move || cache.load_package_info(&name) @@ -199,20 +194,18 @@ impl RegistryInfoDownloader { return std::future::ready(Err(Arc::new(err))).boxed_local() } }; - let guard = self.progress_bar.update(package_url.as_str()); let name = name.to_string(); async move { - let client = downloader.http_client_provider.get_or_create()?; - let maybe_bytes = client - .download_with_progress_and_retries( + let maybe_bytes = downloader + .env + .download_with_retries_on_any_tokio_runtime( package_url, maybe_auth_header, - &guard, ) .await?; match maybe_bytes { Some(bytes) => { - let future_result = deno_core::unsync::spawn_blocking( + let future_result = deno_unsync::spawn_blocking( move || -> Result { let package_info = serde_json::from_slice(&bytes)?; match downloader.cache.save_package_info(&name, &package_info) { @@ -241,6 +234,8 @@ impl RegistryInfoDownloader { } } +// todo(#27198): make this private and only use RegistryInfoProvider in the rest of +// the code pub fn get_package_url(npmrc: &ResolvedNpmRc, name: &str) -> Url { let registry_url = npmrc.get_registry_url(name); // The '/' character in scoped package names "@scope/name" must be diff --git a/cli/npm/common.rs b/resolvers/npm_cache/remote.rs similarity index 95% rename from cli/npm/common.rs rename to resolvers/npm_cache/remote.rs index 55f1bc086d..538554612f 100644 --- a/cli/npm/common.rs +++ b/resolvers/npm_cache/remote.rs @@ -1,10 +1,10 @@ // Copyright 2018-2024 the Deno authors. All rights reserved. MIT license. +use anyhow::bail; +use anyhow::Context; +use anyhow::Error as AnyError; use base64::prelude::BASE64_STANDARD; use base64::Engine; -use deno_core::anyhow::bail; -use deno_core::anyhow::Context; -use deno_core::error::AnyError; use deno_npm::npm_rc::RegistryConfig; use http::header; diff --git a/cli/npm/managed/cache/tarball.rs b/resolvers/npm_cache/tarball.rs similarity index 77% rename from cli/npm/managed/cache/tarball.rs rename to resolvers/npm_cache/tarball.rs index 7cf88d6d64..3102d811d1 100644 --- a/cli/npm/managed/cache/tarball.rs +++ b/resolvers/npm_cache/tarball.rs @@ -3,33 +3,26 @@ use std::collections::HashMap; use std::sync::Arc; -use deno_core::anyhow::anyhow; -use deno_core::anyhow::bail; -use deno_core::anyhow::Context; -use deno_core::error::custom_error; -use deno_core::error::AnyError; -use deno_core::futures::future::LocalBoxFuture; -use deno_core::futures::FutureExt; -use deno_core::parking_lot::Mutex; -use deno_core::url::Url; +use anyhow::anyhow; +use anyhow::bail; +use anyhow::Context; +use anyhow::Error as AnyError; use deno_npm::npm_rc::ResolvedNpmRc; use deno_npm::registry::NpmPackageVersionDistInfo; -use deno_runtime::deno_fs::FileSystem; use deno_semver::package::PackageNv; +use deno_unsync::sync::MultiRuntimeAsyncValueCreator; +use futures::future::LocalBoxFuture; +use futures::FutureExt; use http::StatusCode; +use parking_lot::Mutex; +use url::Url; -use crate::args::CacheSetting; -use crate::http_util::DownloadError; -use crate::http_util::HttpClientProvider; -use crate::npm::common::maybe_auth_header_for_npm_registry; -use crate::util::progress_bar::ProgressBar; -use crate::util::sync::MultiRuntimeAsyncValueCreator; - -use super::tarball_extract::verify_and_extract_tarball; -use super::tarball_extract::TarballExtractionMode; -use super::NpmCache; - -// todo(dsherret): create seams and unit test this +use crate::remote::maybe_auth_header_for_npm_registry; +use crate::tarball_extract::verify_and_extract_tarball; +use crate::tarball_extract::TarballExtractionMode; +use crate::NpmCache; +use crate::NpmCacheEnv; +use crate::NpmCacheSetting; type LoadResult = Result<(), Arc>; type LoadFuture = LocalBoxFuture<'static, LoadResult>; @@ -49,29 +42,23 @@ enum MemoryCacheItem { /// /// This is shared amongst all the workers. #[derive(Debug)] -pub struct TarballCache { - cache: Arc, - fs: Arc, - http_client_provider: Arc, +pub struct TarballCache { + cache: Arc>, + env: Arc, npmrc: Arc, - progress_bar: ProgressBar, memory_cache: Mutex>, } -impl TarballCache { +impl TarballCache { pub fn new( - cache: Arc, - fs: Arc, - http_client_provider: Arc, + cache: Arc>, + env: Arc, npmrc: Arc, - progress_bar: ProgressBar, ) -> Self { Self { cache, - fs, - http_client_provider, + env, npmrc, - progress_bar, memory_cache: Default::default(), } } @@ -144,11 +131,11 @@ impl TarballCache { let package_folder = tarball_cache.cache.package_folder_for_nv_and_url(&package_nv, registry_url); let should_use_cache = tarball_cache.cache.should_use_cache_for_package(&package_nv); - let package_folder_exists = tarball_cache.fs.exists_sync(&package_folder); + let package_folder_exists = tarball_cache.env.exists(&package_folder); if should_use_cache && package_folder_exists { return Ok(()); - } else if tarball_cache.cache.cache_setting() == &CacheSetting::Only { - return Err(custom_error( + } else if tarball_cache.cache.cache_setting() == &NpmCacheSetting::Only { + return Err(deno_core::error::custom_error( "NotCached", format!( "An npm specifier not found in cache: \"{}\", --cached-only is specified.", @@ -169,15 +156,13 @@ impl TarballCache { tarball_cache.npmrc.tarball_config(&tarball_uri); let maybe_auth_header = maybe_registry_config.and_then(|c| maybe_auth_header_for_npm_registry(c).ok()?); - let guard = tarball_cache.progress_bar.update(&dist.tarball); - let result = tarball_cache.http_client_provider - .get_or_create()? - .download_with_progress_and_retries(tarball_uri, maybe_auth_header, &guard) + let result = tarball_cache.env + .download_with_retries_on_any_tokio_runtime(tarball_uri, maybe_auth_header) .await; let maybe_bytes = match result { Ok(maybe_bytes) => maybe_bytes, - Err(DownloadError::BadResponse(err)) => { - if err.status_code == StatusCode::UNAUTHORIZED + Err(err) => { + if err.status_code == Some(StatusCode::UNAUTHORIZED) && maybe_registry_config.is_none() && tarball_cache.npmrc.get_registry_config(&package_nv.name).auth_token.is_some() { @@ -194,7 +179,6 @@ impl TarballCache { } return Err(err.into()) }, - Err(err) => return Err(err.into()), }; match maybe_bytes { Some(bytes) => { @@ -213,7 +197,7 @@ impl TarballCache { }; let dist = dist.clone(); let package_nv = package_nv.clone(); - deno_core::unsync::spawn_blocking(move || { + deno_unsync::spawn_blocking(move || { verify_and_extract_tarball( &package_nv, &bytes, diff --git a/cli/npm/managed/cache/tarball_extract.rs b/resolvers/npm_cache/tarball_extract.rs similarity index 90% rename from cli/npm/managed/cache/tarball_extract.rs rename to resolvers/npm_cache/tarball_extract.rs index e2d242e662..262618d905 100644 --- a/cli/npm/managed/cache/tarball_extract.rs +++ b/resolvers/npm_cache/tarball_extract.rs @@ -1,16 +1,17 @@ // Copyright 2018-2024 the Deno authors. All rights reserved. MIT license. +use std::borrow::Cow; use std::collections::HashSet; use std::fs; use std::io::ErrorKind; use std::path::Path; use std::path::PathBuf; +use anyhow::bail; +use anyhow::Context; +use anyhow::Error as AnyError; use base64::prelude::BASE64_STANDARD; use base64::Engine; -use deno_core::anyhow::bail; -use deno_core::anyhow::Context; -use deno_core::error::AnyError; use deno_npm::registry::NpmPackageVersionDistInfo; use deno_npm::registry::NpmPackageVersionDistInfoIntegrity; use deno_semver::package::PackageNv; @@ -18,8 +19,6 @@ use flate2::read::GzDecoder; use tar::Archive; use tar::EntryType; -use crate::util::path::get_atomic_dir_path; - #[derive(Debug, Copy, Clone)] pub enum TarballExtractionMode { /// Overwrites the destination directory without deleting any files. @@ -206,10 +205,30 @@ fn extract_tarball(data: &[u8], output_folder: &Path) -> Result<(), AnyError> { Ok(()) } +fn get_atomic_dir_path(file_path: &Path) -> PathBuf { + let rand = gen_rand_path_component(); + let new_file_name = format!( + ".{}_{}", + file_path + .file_name() + .map(|f| f.to_string_lossy()) + .unwrap_or(Cow::Borrowed("")), + rand + ); + file_path.with_file_name(new_file_name) +} + +fn gen_rand_path_component() -> String { + (0..4).fold(String::new(), |mut output, _| { + output.push_str(&format!("{:02x}", rand::random::())); + output + }) +} + #[cfg(test)] mod test { use deno_semver::Version; - use test_util::TempDir; + use tempfile::TempDir; use super::*; @@ -303,21 +322,21 @@ mod test { #[test] fn rename_with_retries_succeeds_exists() { - let temp_dir = TempDir::new(); + let temp_dir = TempDir::new().unwrap(); let folder_1 = temp_dir.path().join("folder_1"); let folder_2 = temp_dir.path().join("folder_2"); - folder_1.create_dir_all(); - folder_1.join("a.txt").write("test"); - folder_2.create_dir_all(); + std::fs::create_dir_all(&folder_1).unwrap(); + std::fs::write(folder_1.join("a.txt"), "test").unwrap(); + std::fs::create_dir_all(&folder_2).unwrap(); // this will not end up in the output as rename_with_retries assumes // the folders ending up at the destination are the same - folder_2.join("b.txt").write("test2"); + std::fs::write(folder_2.join("b.txt"), "test2").unwrap(); let dest_folder = temp_dir.path().join("dest_folder"); - rename_with_retries(folder_1.as_path(), dest_folder.as_path()).unwrap(); - rename_with_retries(folder_2.as_path(), dest_folder.as_path()).unwrap(); + rename_with_retries(folder_1.as_path(), &dest_folder).unwrap(); + rename_with_retries(folder_2.as_path(), &dest_folder).unwrap(); assert!(dest_folder.join("a.txt").exists()); assert!(!dest_folder.join("b.txt").exists()); } diff --git a/resolvers/npm_cache/todo.md b/resolvers/npm_cache/todo.md new file mode 100644 index 0000000000..e10b1cfd89 --- /dev/null +++ b/resolvers/npm_cache/todo.md @@ -0,0 +1,9 @@ +This crate is a work in progress: + +1. Remove `deno_core` dependency. +1. Remove `anyhow` dependency. +1. Add a clippy.toml file that bans accessing the file system directory and + instead does it through a trait. +1. Make this crate work in Wasm. +1. Refactor to store npm packument in a single place: + https://github.com/denoland/deno/issues/27198