1
0
Fork 0
mirror of https://github.com/denoland/deno.git synced 2025-01-21 04:52:26 -05:00

feat(lsp): registry suggestion cache respects cache headers (#13010)

Fixes #9931
This commit is contained in:
Kitson Kelly 2021-12-09 22:16:17 +11:00 committed by GitHub
parent 69ad5f0e78
commit 2347e60934
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 286 additions and 22 deletions

7
Cargo.lock generated
View file

@ -335,6 +335,12 @@ version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c4872d67bab6358e59559027aa3b9157c53d9358c51423c17554809a8858e0f8"
[[package]]
name = "cache_control"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1bf2a5fb3207c12b5d208ebc145f967fea5cac41a021c37417ccc31ba40f39ee"
[[package]]
name = "cc"
version = "1.0.71"
@ -628,6 +634,7 @@ version = "1.16.4"
dependencies = [
"atty",
"base64 0.13.0",
"cache_control",
"chrono",
"clap",
"data-url",

View file

@ -48,6 +48,8 @@ deno_runtime = { version = "0.36.0", path = "../runtime" }
atty = "=0.2.14"
base64 = "=0.13.0"
cache_control = "=0.2.0"
chrono = "=0.4.19"
clap = "=2.33.3"
data-url = "=0.1.1"
dissimilar = "=1.0.2"
@ -89,8 +91,6 @@ fwdansi = "=1.1.0"
winapi = { version = "=0.3.9", features = ["knownfolders", "mswsock", "objbase", "shlobj", "tlhelp32", "winbase", "winerror", "winsock2"] }
[dev-dependencies]
# Used in benchmark
chrono = "=0.4.19"
flaky_test = "=0.1.0"
os_pipe = "=0.9.2"
pretty_assertions = "=0.7.2"

View file

@ -4,10 +4,12 @@ use crate::auth_tokens::AuthTokens;
use crate::colors;
use crate::http_cache::HttpCache;
use crate::http_util::fetch_once;
use crate::http_util::CacheSemantics;
use crate::http_util::FetchOnceArgs;
use crate::http_util::FetchOnceResult;
use crate::text_encoding;
use crate::version::get_user_agent;
use data_url::DataUrl;
use deno_ast::MediaType;
use deno_core::error::custom_error;
@ -34,6 +36,7 @@ use std::io::Read;
use std::path::PathBuf;
use std::pin::Pin;
use std::sync::Arc;
use std::time::SystemTime;
pub const SUPPORTED_SCHEMES: [&str; 5] =
["data", "blob", "file", "http", "https"];
@ -89,6 +92,10 @@ pub enum CacheSetting {
/// `--reload=https://deno.land/std` or
/// `--reload=https://deno.land/std,https://deno.land/x/example`.
ReloadSome(Vec<String>),
/// The usability of a cached value is determined by analyzing the cached
/// headers and other metadata associated with a cached response, reloading
/// any cached "non-fresh" cached responses.
RespectHeaders,
/// The cached source files should be used for local modules. This is the
/// default behavior of the CLI.
Use,
@ -96,10 +103,23 @@ pub enum CacheSetting {
impl CacheSetting {
/// Returns if the cache should be used for a given specifier.
pub fn should_use(&self, specifier: &ModuleSpecifier) -> bool {
pub fn should_use(
&self,
specifier: &ModuleSpecifier,
http_cache: &HttpCache,
) -> bool {
match self {
CacheSetting::ReloadAll => false,
CacheSetting::Use | CacheSetting::Only => true,
CacheSetting::RespectHeaders => {
if let Ok((_, headers, cache_time)) = http_cache.get(specifier) {
let cache_semantics =
CacheSemantics::new(headers, cache_time, SystemTime::now());
cache_semantics.should_use()
} else {
false
}
}
CacheSetting::ReloadSome(list) => {
let mut url = specifier.clone();
url.set_fragment(None);
@ -312,7 +332,7 @@ impl FileFetcher {
return Err(custom_error("Http", "Too many redirects."));
}
let (mut source_file, headers) = match self.http_cache.get(specifier) {
let (mut source_file, headers, _) = match self.http_cache.get(specifier) {
Err(err) => {
if let Some(err) = err.downcast_ref::<std::io::Error>() {
if err.kind() == std::io::ErrorKind::NotFound {
@ -469,7 +489,7 @@ impl FileFetcher {
return futures::future::err(err).boxed();
}
if self.cache_setting.should_use(specifier) {
if self.cache_setting.should_use(specifier, &self.http_cache) {
match self.fetch_cached(specifier, redirect_limit) {
Ok(Some(file)) => {
return futures::future::ok(file).boxed();
@ -495,7 +515,7 @@ impl FileFetcher {
info!("{} {}", colors::green("Download"), specifier);
let maybe_etag = match self.http_cache.get(specifier) {
Ok((_, headers)) => headers.get("etag").cloned(),
Ok((_, headers, _)) => headers.get("etag").cloned(),
_ => None,
};
let maybe_auth_token = self.auth_tokens.get(specifier);
@ -682,7 +702,7 @@ mod tests {
.fetch_remote(specifier, &mut Permissions::allow_all(), 1)
.await;
assert!(result.is_ok());
let (_, headers) = file_fetcher.http_cache.get(specifier).unwrap();
let (_, headers, _) = file_fetcher.http_cache.get(specifier).unwrap();
(result.unwrap(), headers)
}
@ -1065,7 +1085,7 @@ mod tests {
// the value above.
assert_eq!(file.media_type, MediaType::JavaScript);
let (_, headers) = file_fetcher_02.http_cache.get(&specifier).unwrap();
let (_, headers, _) = file_fetcher_02.http_cache.get(&specifier).unwrap();
assert_eq!(headers.get("content-type").unwrap(), "text/javascript");
metadata.headers = HashMap::new();
metadata
@ -1194,7 +1214,7 @@ mod tests {
"",
"redirected files should have empty cached contents"
);
let (_, headers) = file_fetcher.http_cache.get(&specifier).unwrap();
let (_, headers, _) = file_fetcher.http_cache.get(&specifier).unwrap();
assert_eq!(
headers.get("location").unwrap(),
"http://localhost:4545/subdir/redirects/redirect1.js"
@ -1204,7 +1224,7 @@ mod tests {
fs::read_to_string(redirected_cached_filename).unwrap(),
"export const redirect = 1;\n"
);
let (_, headers) =
let (_, headers, _) =
file_fetcher.http_cache.get(&redirected_specifier).unwrap();
assert!(headers.get("location").is_none());
}
@ -1247,7 +1267,7 @@ mod tests {
"",
"redirected files should have empty cached contents"
);
let (_, headers) = file_fetcher.http_cache.get(&specifier).unwrap();
let (_, headers, _) = file_fetcher.http_cache.get(&specifier).unwrap();
assert_eq!(
headers.get("location").unwrap(),
"http://localhost:4546/subdir/redirects/redirect1.js"
@ -1258,7 +1278,7 @@ mod tests {
"",
"redirected files should have empty cached contents"
);
let (_, headers) = file_fetcher
let (_, headers, _) = file_fetcher
.http_cache
.get(&redirected_01_specifier)
.unwrap();
@ -1271,7 +1291,7 @@ mod tests {
fs::read_to_string(redirected_02_cached_filename).unwrap(),
"export const redirect = 1;\n"
);
let (_, headers) = file_fetcher
let (_, headers, _) = file_fetcher
.http_cache
.get(&redirected_02_specifier)
.unwrap();
@ -1392,7 +1412,7 @@ mod tests {
"",
"redirected files should have empty cached contents"
);
let (_, headers) = file_fetcher.http_cache.get(&specifier).unwrap();
let (_, headers, _) = file_fetcher.http_cache.get(&specifier).unwrap();
assert_eq!(
headers.get("location").unwrap(),
"/subdir/redirects/redirect1.js"
@ -1402,7 +1422,7 @@ mod tests {
fs::read_to_string(redirected_cached_filename).unwrap(),
"export const redirect = 1;\n"
);
let (_, headers) =
let (_, headers, _) =
file_fetcher.http_cache.get(&redirected_specifier).unwrap();
assert!(headers.get("location").is_none());
}
@ -1499,6 +1519,60 @@ mod tests {
assert_eq!(file.source.as_str(), r#"console.log("goodbye deno");"#);
}
#[tokio::test]
async fn test_respect_cache_revalidates() {
let _g = test_util::http_server();
let temp_dir = Rc::new(TempDir::new().unwrap());
let (file_fetcher, _) =
setup(CacheSetting::RespectHeaders, Some(temp_dir.clone()));
let specifier =
ModuleSpecifier::parse("http://localhost:4545/dynamic").unwrap();
let result = file_fetcher
.fetch(&specifier, &mut Permissions::allow_all())
.await;
assert!(result.is_ok());
let file = result.unwrap();
let first = file.source.as_str();
let (file_fetcher, _) =
setup(CacheSetting::RespectHeaders, Some(temp_dir.clone()));
let result = file_fetcher
.fetch(&specifier, &mut Permissions::allow_all())
.await;
assert!(result.is_ok());
let file = result.unwrap();
let second = file.source.as_str();
assert_ne!(first, second);
}
#[tokio::test]
async fn test_respect_cache_still_fresh() {
let _g = test_util::http_server();
let temp_dir = Rc::new(TempDir::new().unwrap());
let (file_fetcher, _) =
setup(CacheSetting::RespectHeaders, Some(temp_dir.clone()));
let specifier =
ModuleSpecifier::parse("http://localhost:4545/dynamic_cache").unwrap();
let result = file_fetcher
.fetch(&specifier, &mut Permissions::allow_all())
.await;
assert!(result.is_ok());
let file = result.unwrap();
let first = file.source.as_str();
let (file_fetcher, _) =
setup(CacheSetting::RespectHeaders, Some(temp_dir.clone()));
let result = file_fetcher
.fetch(&specifier, &mut Permissions::allow_all())
.await;
assert!(result.is_ok());
let file = result.unwrap();
let second = file.source.as_str();
assert_eq!(first, second);
}
#[tokio::test]
async fn test_fetch_local_utf_16be() {
let expected = String::from_utf8(

View file

@ -17,6 +17,7 @@ use std::fs::File;
use std::io;
use std::path::Path;
use std::path::PathBuf;
use std::time::SystemTime;
pub const CACHE_PERM: u32 = 0o644;
@ -81,6 +82,8 @@ pub struct HttpCache {
pub struct Metadata {
pub headers: HeadersMap,
pub url: String,
#[serde(default = "SystemTime::now")]
pub now: SystemTime,
}
impl Metadata {
@ -138,7 +141,10 @@ impl HttpCache {
// TODO(bartlomieju): this method should check headers file
// and validate against ETAG/Last-modified-as headers.
// ETAG check is currently done in `cli/file_fetcher.rs`.
pub fn get(&self, url: &Url) -> Result<(File, HeadersMap), AnyError> {
pub fn get(
&self,
url: &Url,
) -> Result<(File, HeadersMap, SystemTime), AnyError> {
let cache_filename = self.location.join(
url_to_filename(url)
.ok_or_else(|| generic_error("Can't convert url to filename."))?,
@ -147,7 +153,7 @@ impl HttpCache {
let file = File::open(cache_filename)?;
let metadata = fs::read_to_string(metadata_filename)?;
let metadata: Metadata = serde_json::from_str(&metadata)?;
Ok((file, metadata.headers))
Ok((file, metadata.headers, metadata.now))
}
pub fn set(
@ -169,6 +175,7 @@ impl HttpCache {
fs_util::atomic_write_file(&cache_filename, content, CACHE_PERM)?;
let metadata = Metadata {
now: SystemTime::now(),
url: url.to_string(),
headers: headers_map,
};
@ -227,7 +234,7 @@ mod tests {
assert!(r.is_ok());
let r = cache.get(&url);
assert!(r.is_ok());
let (mut file, headers) = r.unwrap();
let (mut file, headers, _) = r.unwrap();
let mut content = String::new();
file.read_to_string(&mut content).unwrap();
assert_eq!(content, "Hello world");

View file

@ -1,6 +1,9 @@
// Copyright 2018-2021 the Deno authors. All rights reserved. MIT license.
use crate::auth_tokens::AuthToken;
use cache_control::Cachability;
use cache_control::CacheControl;
use chrono::DateTime;
use deno_core::error::custom_error;
use deno_core::error::generic_error;
use deno_core::error::AnyError;
@ -13,6 +16,8 @@ use deno_runtime::deno_fetch::reqwest::Client;
use deno_runtime::deno_fetch::reqwest::StatusCode;
use log::debug;
use std::collections::HashMap;
use std::time::Duration;
use std::time::SystemTime;
/// Construct the next uri based on base uri and location header fragment
/// See <https://tools.ietf.org/html/rfc3986#section-4.2>
@ -46,6 +51,153 @@ fn resolve_url_from_location(base_url: &Url, location: &str) -> Url {
// Vec<(String, String)>
pub type HeadersMap = HashMap<String, String>;
/// A structure used to determine if a entity in the http cache can be used.
///
/// This is heavily influenced by
/// https://github.com/kornelski/rusty-http-cache-semantics which is BSD
/// 2-Clause Licensed and copyright Kornel Lesiński
pub(crate) struct CacheSemantics {
cache_control: CacheControl,
cached: SystemTime,
headers: HashMap<String, String>,
now: SystemTime,
}
impl CacheSemantics {
pub fn new(
headers: HashMap<String, String>,
cached: SystemTime,
now: SystemTime,
) -> Self {
let cache_control = headers
.get("cache-control")
.map(|v| CacheControl::from_value(v).unwrap_or_default())
.unwrap_or_default();
Self {
cache_control,
cached,
headers,
now,
}
}
fn age(&self) -> Duration {
let mut age = self.age_header_value();
if let Ok(resident_time) = self.now.duration_since(self.cached) {
age += resident_time;
}
age
}
fn age_header_value(&self) -> Duration {
Duration::from_secs(
self
.headers
.get("age")
.and_then(|v| v.parse().ok())
.unwrap_or(0),
)
}
fn is_stale(&self) -> bool {
self.max_age() <= self.age()
}
fn max_age(&self) -> Duration {
if self.cache_control.cachability == Some(Cachability::NoCache) {
return Duration::from_secs(0);
}
if self.headers.get("vary").map(|s| s.trim()) == Some("*") {
return Duration::from_secs(0);
}
if let Some(max_age) = self.cache_control.max_age {
return max_age;
}
let default_min_ttl = Duration::from_secs(0);
let server_date = self.raw_server_date();
if let Some(expires) = self.headers.get("expires") {
return match DateTime::parse_from_rfc2822(expires) {
Err(_) => Duration::from_secs(0),
Ok(expires) => {
let expires = SystemTime::UNIX_EPOCH
+ Duration::from_secs(expires.timestamp().max(0) as _);
return default_min_ttl
.max(expires.duration_since(server_date).unwrap_or_default());
}
};
}
if let Some(last_modified) = self.headers.get("last-modified") {
if let Ok(last_modified) = DateTime::parse_from_rfc2822(last_modified) {
let last_modified = SystemTime::UNIX_EPOCH
+ Duration::from_secs(last_modified.timestamp().max(0) as _);
if let Ok(diff) = server_date.duration_since(last_modified) {
let secs_left = diff.as_secs() as f64 * 0.1;
return default_min_ttl.max(Duration::from_secs(secs_left as _));
}
}
}
default_min_ttl
}
fn raw_server_date(&self) -> SystemTime {
self
.headers
.get("date")
.and_then(|d| DateTime::parse_from_rfc2822(d).ok())
.and_then(|d| {
SystemTime::UNIX_EPOCH
.checked_add(Duration::from_secs(d.timestamp() as _))
})
.unwrap_or(self.cached)
}
/// Returns true if the cached value is "fresh" respecting cached headers,
/// otherwise returns false.
pub fn should_use(&self) -> bool {
if self.cache_control.cachability == Some(Cachability::NoCache) {
return false;
}
if let Some(max_age) = self.cache_control.max_age {
if self.age() > max_age {
return false;
}
}
if let Some(min_fresh) = self.cache_control.min_fresh {
if self.time_to_live() < min_fresh {
return false;
}
}
if self.is_stale() {
let has_max_stale = self.cache_control.max_stale.is_some();
let allows_stale = has_max_stale
&& self
.cache_control
.max_stale
.map_or(true, |val| val > self.age() - self.max_age());
if !allows_stale {
return false;
}
}
true
}
fn time_to_live(&self) -> Duration {
self.max_age().checked_sub(self.age()).unwrap_or_default()
}
}
#[derive(Debug, PartialEq)]
pub enum FetchOnceResult {
Code(Vec<u8>, HeadersMap),

View file

@ -282,7 +282,7 @@ impl Default for ModuleRegistry {
let dir = deno_dir::DenoDir::new(None).unwrap();
let location = dir.root.join("registries");
let http_cache = HttpCache::new(&location);
let cache_setting = CacheSetting::Use;
let cache_setting = CacheSetting::RespectHeaders;
let file_fetcher = FileFetcher::new(
http_cache,
cache_setting,
@ -305,7 +305,7 @@ impl ModuleRegistry {
let http_cache = HttpCache::new(location);
let file_fetcher = FileFetcher::new(
http_cache,
CacheSetting::Use,
CacheSetting::RespectHeaders,
true,
None,
BlobStore::default(),
@ -387,12 +387,17 @@ impl ModuleRegistry {
.await;
// if there is an error fetching, we will cache an empty file, so that
// subsequent requests they are just an empty doc which will error without
// needing to connect to the remote URL
// needing to connect to the remote URL. We will cache it for 1 week.
if fetch_result.is_err() {
let mut headers_map = HashMap::new();
headers_map.insert(
"cache-control".to_string(),
"max-age=604800, immutable".to_string(),
);
self
.file_fetcher
.http_cache
.set(specifier, HashMap::default(), &[])?;
.set(specifier, headers_map, &[])?;
}
let file = fetch_result?;
let config: RegistryConfigurationJson = serde_json::from_str(&file.source)?;

View file

@ -893,6 +893,25 @@ async fn main_server(
);
Ok(res)
}
(_, "/dynamic") => {
let mut res = Response::new(Body::from(
serde_json::to_string_pretty(&std::time::SystemTime::now()).unwrap(),
));
res
.headers_mut()
.insert("cache-control", HeaderValue::from_static("no-cache"));
Ok(res)
}
(_, "/dynamic_cache") => {
let mut res = Response::new(Body::from(
serde_json::to_string_pretty(&std::time::SystemTime::now()).unwrap(),
));
res.headers_mut().insert(
"cache-control",
HeaderValue::from_static("public, max-age=604800, immutable"),
);
Ok(res)
}
_ => {
let mut file_path = testdata_path();
file_path.push(&req.uri().path()[1..]);