// Copyright 2018-2024 the Deno authors. All rights reserved. MIT license.

use std::collections::BTreeMap;
use std::collections::HashMap;
use std::io::BufReader;
use std::io::BufWriter;
use std::io::Read;
use std::io::Write;
use std::path::Path;
use std::path::PathBuf;
use std::sync::Arc;

use deno_ast::ModuleSpecifier;
use deno_core::anyhow::bail;
use deno_core::error::AnyError;
use deno_core::parking_lot::Mutex;
use deno_core::unsync::sync::AtomicFlag;
use deno_runtime::code_cache::CodeCache;
use deno_runtime::code_cache::CodeCacheType;

use crate::cache::FastInsecureHasher;
use crate::util::path::get_atomic_file_path;
use crate::worker::CliCodeCache;

enum CodeCacheStrategy {
  FirstRun(FirstRunCodeCacheStrategy),
  SubsequentRun(SubsequentRunCodeCacheStrategy),
}

#[derive(Debug, Clone, PartialEq, Eq)]
pub struct DenoCompileCodeCacheEntry {
  pub source_hash: u64,
  pub data: Vec<u8>,
}

pub struct DenoCompileCodeCache {
  strategy: CodeCacheStrategy,
}

impl DenoCompileCodeCache {
  pub fn new(file_path: PathBuf, cache_key: u64) -> Self {
    // attempt to deserialize the cache data
    match deserialize(&file_path, cache_key) {
      Ok(data) => {
        log::debug!(
          "Loaded {} code cache entries from {}",
          data.len(),
          file_path.display()
        );
        Self {
          strategy: CodeCacheStrategy::SubsequentRun(
            SubsequentRunCodeCacheStrategy {
              is_finished: AtomicFlag::lowered(),
              data: Mutex::new(data),
            },
          ),
        }
      }
      Err(err) => {
        log::debug!(
          "Failed to deserialize code cache from {}: {:#}",
          file_path.display(),
          err
        );
        Self {
          strategy: CodeCacheStrategy::FirstRun(FirstRunCodeCacheStrategy {
            cache_key,
            file_path,
            is_finished: AtomicFlag::lowered(),
            data: Mutex::new(FirstRunCodeCacheData {
              cache: HashMap::new(),
              add_count: 0,
            }),
          }),
        }
      }
    }
  }
}

impl CodeCache for DenoCompileCodeCache {
  fn get_sync(
    &self,
    specifier: &ModuleSpecifier,
    code_cache_type: CodeCacheType,
    source_hash: u64,
  ) -> Option<Vec<u8>> {
    match &self.strategy {
      CodeCacheStrategy::FirstRun(strategy) => {
        if !strategy.is_finished.is_raised() {
          // we keep track of how many times the cache is requested
          // then serialize the cache when we get that number of
          // "set" calls
          strategy.data.lock().add_count += 1;
        }
        None
      }
      CodeCacheStrategy::SubsequentRun(strategy) => {
        if strategy.is_finished.is_raised() {
          return None;
        }
        strategy.take_from_cache(specifier, code_cache_type, source_hash)
      }
    }
  }

  fn set_sync(
    &self,
    specifier: ModuleSpecifier,
    code_cache_type: CodeCacheType,
    source_hash: u64,
    bytes: &[u8],
  ) {
    match &self.strategy {
      CodeCacheStrategy::FirstRun(strategy) => {
        if strategy.is_finished.is_raised() {
          return;
        }

        let data_to_serialize = {
          let mut data = strategy.data.lock();
          data.cache.insert(
            (specifier.to_string(), code_cache_type),
            DenoCompileCodeCacheEntry {
              source_hash,
              data: bytes.to_vec(),
            },
          );
          if data.add_count != 0 {
            data.add_count -= 1;
          }
          if data.add_count == 0 {
            // don't allow using the cache anymore
            strategy.is_finished.raise();
            if data.cache.is_empty() {
              None
            } else {
              Some(std::mem::take(&mut data.cache))
            }
          } else {
            None
          }
        };
        if let Some(cache_data) = &data_to_serialize {
          strategy.write_cache_data(cache_data);
        }
      }
      CodeCacheStrategy::SubsequentRun(_) => {
        // do nothing
      }
    }
  }
}

impl CliCodeCache for DenoCompileCodeCache {
  fn enabled(&self) -> bool {
    match &self.strategy {
      CodeCacheStrategy::FirstRun(strategy) => {
        !strategy.is_finished.is_raised()
      }
      CodeCacheStrategy::SubsequentRun(strategy) => {
        !strategy.is_finished.is_raised()
      }
    }
  }

  fn as_code_cache(self: Arc<Self>) -> Arc<dyn CodeCache> {
    self
  }
}

type CodeCacheKey = (String, CodeCacheType);

struct FirstRunCodeCacheData {
  cache: HashMap<CodeCacheKey, DenoCompileCodeCacheEntry>,
  add_count: usize,
}

struct FirstRunCodeCacheStrategy {
  cache_key: u64,
  file_path: PathBuf,
  is_finished: AtomicFlag,
  data: Mutex<FirstRunCodeCacheData>,
}

impl FirstRunCodeCacheStrategy {
  fn write_cache_data(
    &self,
    cache_data: &HashMap<CodeCacheKey, DenoCompileCodeCacheEntry>,
  ) {
    let count = cache_data.len();
    let temp_file = get_atomic_file_path(&self.file_path);
    match serialize(&temp_file, self.cache_key, cache_data) {
      Ok(()) => {
        if let Err(err) = std::fs::rename(&temp_file, &self.file_path) {
          log::debug!("Failed to rename code cache: {}", err);
          let _ = std::fs::remove_file(&temp_file);
        } else {
          log::debug!("Serialized {} code cache entries", count);
        }
      }
      Err(err) => {
        let _ = std::fs::remove_file(&temp_file);
        log::debug!("Failed to serialize code cache: {}", err);
      }
    }
  }
}

struct SubsequentRunCodeCacheStrategy {
  is_finished: AtomicFlag,
  data: Mutex<HashMap<CodeCacheKey, DenoCompileCodeCacheEntry>>,
}

impl SubsequentRunCodeCacheStrategy {
  fn take_from_cache(
    &self,
    specifier: &ModuleSpecifier,
    code_cache_type: CodeCacheType,
    source_hash: u64,
  ) -> Option<Vec<u8>> {
    let mut data = self.data.lock();
    // todo(dsherret): how to avoid the clone here?
    let entry = data.remove(&(specifier.to_string(), code_cache_type))?;
    if entry.source_hash != source_hash {
      return None;
    }
    if data.is_empty() {
      self.is_finished.raise();
    }
    Some(entry.data)
  }
}

/// File format:
/// - <header>
///   - <cache key>
///   - <u32: number of entries>
/// - <[entry length]> - u64 * number of entries
/// - <[entry]>
///   - <[u8]: entry data>
///   - <String: specifier>
///   - <u8>: code cache type
///   - <u32: specifier length>
///   - <u64: source hash>
///   - <u64: entry data hash>
fn serialize(
  file_path: &Path,
  cache_key: u64,
  cache: &HashMap<CodeCacheKey, DenoCompileCodeCacheEntry>,
) -> Result<(), AnyError> {
  let cache_file = std::fs::OpenOptions::new()
    .create(true)
    .truncate(true)
    .write(true)
    .open(file_path)?;
  let mut writer = BufWriter::new(cache_file);
  serialize_with_writer(&mut writer, cache_key, cache)
}

fn serialize_with_writer<T: Write>(
  writer: &mut BufWriter<T>,
  cache_key: u64,
  cache: &HashMap<CodeCacheKey, DenoCompileCodeCacheEntry>,
) -> Result<(), AnyError> {
  // header
  writer.write_all(&cache_key.to_le_bytes())?;
  writer.write_all(&(cache.len() as u32).to_le_bytes())?;
  // lengths of each entry
  for ((specifier, _), entry) in cache {
    let len: u64 =
      entry.data.len() as u64 + specifier.len() as u64 + 1 + 4 + 8 + 8;
    writer.write_all(&len.to_le_bytes())?;
  }
  // entries
  for ((specifier, code_cache_type), entry) in cache {
    writer.write_all(&entry.data)?;
    writer.write_all(&[match code_cache_type {
      CodeCacheType::EsModule => 0,
      CodeCacheType::Script => 1,
    }])?;
    writer.write_all(specifier.as_bytes())?;
    writer.write_all(&(specifier.len() as u32).to_le_bytes())?;
    writer.write_all(&entry.source_hash.to_le_bytes())?;
    let hash: u64 = FastInsecureHasher::new_without_deno_version()
      .write(&entry.data)
      .finish();
    writer.write_all(&hash.to_le_bytes())?;
  }

  writer.flush()?;

  Ok(())
}

fn deserialize(
  file_path: &Path,
  expected_cache_key: u64,
) -> Result<HashMap<CodeCacheKey, DenoCompileCodeCacheEntry>, AnyError> {
  let cache_file = std::fs::File::open(file_path)?;
  let mut reader = BufReader::new(cache_file);
  deserialize_with_reader(&mut reader, expected_cache_key)
}

fn deserialize_with_reader<T: Read>(
  reader: &mut BufReader<T>,
  expected_cache_key: u64,
) -> Result<HashMap<CodeCacheKey, DenoCompileCodeCacheEntry>, AnyError> {
  // it's very important to use this below so that a corrupt cache file
  // doesn't cause a memory allocation error
  fn new_vec_sized<T: Clone>(
    capacity: usize,
    default_value: T,
  ) -> Result<Vec<T>, AnyError> {
    let mut vec = Vec::new();
    vec.try_reserve(capacity)?;
    vec.resize(capacity, default_value);
    Ok(vec)
  }

  fn try_subtract(a: usize, b: usize) -> Result<usize, AnyError> {
    if a < b {
      bail!("Integer underflow");
    }
    Ok(a - b)
  }

  let mut header_bytes = vec![0; 8 + 4];
  reader.read_exact(&mut header_bytes)?;
  let actual_cache_key = u64::from_le_bytes(header_bytes[..8].try_into()?);
  if actual_cache_key != expected_cache_key {
    // cache bust
    bail!("Cache key mismatch");
  }
  let len = u32::from_le_bytes(header_bytes[8..].try_into()?) as usize;
  // read the lengths for each entry found in the file
  let entry_len_bytes_capacity = len * 8;
  let mut entry_len_bytes = new_vec_sized(entry_len_bytes_capacity, 0)?;
  reader.read_exact(&mut entry_len_bytes)?;
  let mut lengths = Vec::new();
  lengths.try_reserve(len)?;
  for i in 0..len {
    let pos = i * 8;
    lengths.push(
      u64::from_le_bytes(entry_len_bytes[pos..pos + 8].try_into()?) as usize,
    );
  }

  let mut map = HashMap::new();
  map.try_reserve(len)?;
  for len in lengths {
    let mut buffer = new_vec_sized(len, 0)?;
    reader.read_exact(&mut buffer)?;
    let entry_data_hash_start_pos = try_subtract(buffer.len(), 8)?;
    let expected_entry_data_hash =
      u64::from_le_bytes(buffer[entry_data_hash_start_pos..].try_into()?);
    let source_hash_start_pos = try_subtract(entry_data_hash_start_pos, 8)?;
    let source_hash = u64::from_le_bytes(
      buffer[source_hash_start_pos..entry_data_hash_start_pos].try_into()?,
    );
    let specifier_end_pos = try_subtract(source_hash_start_pos, 4)?;
    let specifier_len = u32::from_le_bytes(
      buffer[specifier_end_pos..source_hash_start_pos].try_into()?,
    ) as usize;
    let specifier_start_pos = try_subtract(specifier_end_pos, specifier_len)?;
    let specifier = String::from_utf8(
      buffer[specifier_start_pos..specifier_end_pos].to_vec(),
    )?;
    let code_cache_type_pos = try_subtract(specifier_start_pos, 1)?;
    let code_cache_type = match buffer[code_cache_type_pos] {
      0 => CodeCacheType::EsModule,
      1 => CodeCacheType::Script,
      _ => bail!("Invalid code cache type"),
    };
    buffer.truncate(code_cache_type_pos);
    let actual_entry_data_hash: u64 =
      FastInsecureHasher::new_without_deno_version()
        .write(&buffer)
        .finish();
    if expected_entry_data_hash != actual_entry_data_hash {
      bail!("Hash mismatch.")
    }
    map.insert(
      (specifier, code_cache_type),
      DenoCompileCodeCacheEntry {
        source_hash,
        data: buffer,
      },
    );
  }

  Ok(map)
}

#[cfg(test)]
mod test {
  use test_util::TempDir;

  use super::*;
  use std::fs::File;

  #[test]
  fn serialize_deserialize() {
    let cache_key = 123456;
    let cache = {
      let mut cache = HashMap::new();
      cache.insert(
        ("specifier1".to_string(), CodeCacheType::EsModule),
        DenoCompileCodeCacheEntry {
          source_hash: 1,
          data: vec![1, 2, 3],
        },
      );
      cache.insert(
        ("specifier2".to_string(), CodeCacheType::EsModule),
        DenoCompileCodeCacheEntry {
          source_hash: 2,
          data: vec![4, 5, 6],
        },
      );
      cache.insert(
        ("specifier2".to_string(), CodeCacheType::Script),
        DenoCompileCodeCacheEntry {
          source_hash: 2,
          data: vec![6, 5, 1],
        },
      );
      cache
    };
    let mut buffer = Vec::new();
    serialize_with_writer(&mut BufWriter::new(&mut buffer), cache_key, &cache)
      .unwrap();
    let deserialized =
      deserialize_with_reader(&mut BufReader::new(&buffer[..]), cache_key)
        .unwrap();
    assert_eq!(cache, deserialized);
  }

  #[test]
  fn serialize_deserialize_empty() {
    let cache_key = 1234;
    let cache = HashMap::new();
    let mut buffer = Vec::new();
    serialize_with_writer(&mut BufWriter::new(&mut buffer), cache_key, &cache)
      .unwrap();
    let deserialized =
      deserialize_with_reader(&mut BufReader::new(&buffer[..]), cache_key)
        .unwrap();
    assert_eq!(cache, deserialized);
  }

  #[test]
  fn serialize_deserialize_corrupt() {
    let buffer = "corrupttestingtestingtesting".as_bytes().to_vec();
    let err = deserialize_with_reader(&mut BufReader::new(&buffer[..]), 1234)
      .unwrap_err();
    assert_eq!(err.to_string(), "Cache key mismatch");
  }

  #[test]
  fn code_cache() {
    let temp_dir = TempDir::new();
    let file_path = temp_dir.path().join("cache.bin").to_path_buf();
    let url1 = ModuleSpecifier::parse("https://deno.land/example1.js").unwrap();
    let url2 = ModuleSpecifier::parse("https://deno.land/example2.js").unwrap();
    // first run
    {
      let code_cache = DenoCompileCodeCache::new(file_path.clone(), 1234);
      assert!(code_cache
        .get_sync(&url1, CodeCacheType::EsModule, 0)
        .is_none());
      assert!(code_cache
        .get_sync(&url2, CodeCacheType::EsModule, 1)
        .is_none());
      assert!(code_cache.enabled());
      code_cache.set_sync(url1.clone(), CodeCacheType::EsModule, 0, &[1, 2, 3]);
      assert!(code_cache.enabled());
      assert!(!file_path.exists());
      code_cache.set_sync(url2.clone(), CodeCacheType::EsModule, 1, &[2, 1, 3]);
      assert!(file_path.exists()); // now the new code cache exists
      assert!(!code_cache.enabled()); // no longer enabled
    }
    // second run
    {
      let code_cache = DenoCompileCodeCache::new(file_path.clone(), 1234);
      assert!(code_cache.enabled());
      let result1 = code_cache
        .get_sync(&url1, CodeCacheType::EsModule, 0)
        .unwrap();
      assert!(code_cache.enabled());
      let result2 = code_cache
        .get_sync(&url2, CodeCacheType::EsModule, 1)
        .unwrap();
      assert!(!code_cache.enabled()); // no longer enabled
      assert_eq!(result1, vec![1, 2, 3]);
      assert_eq!(result2, vec![2, 1, 3]);
    }

    // new cache key first run
    {
      let code_cache = DenoCompileCodeCache::new(file_path.clone(), 54321);
      assert!(code_cache
        .get_sync(&url1, CodeCacheType::EsModule, 0)
        .is_none());
      assert!(code_cache
        .get_sync(&url2, CodeCacheType::EsModule, 1)
        .is_none());
      code_cache.set_sync(url1.clone(), CodeCacheType::EsModule, 0, &[2, 2, 3]);
      code_cache.set_sync(url2.clone(), CodeCacheType::EsModule, 1, &[3, 2, 3]);
    }
    // new cache key second run
    {
      let code_cache = DenoCompileCodeCache::new(file_path.clone(), 54321);
      let result1 = code_cache
        .get_sync(&url1, CodeCacheType::EsModule, 0)
        .unwrap();
      assert_eq!(result1, vec![2, 2, 3]);
      assert!(code_cache
        .get_sync(&url2, CodeCacheType::EsModule, 5) // different hash will cause none
        .is_none());
    }
  }
}