Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

perf(compile): code cache #26528

Merged
merged 25 commits into from
Nov 18, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
7236756
perf(compile): use less memory
dsherret Oct 22, 2024
1928daf
working now with typescript
dsherret Oct 23, 2024
32913e7
Merge branch 'main' into perf_deno_compile_less_memory
dsherret Oct 23, 2024
788f4ab
Tell v8 that something is a string like before.
dsherret Oct 23, 2024
742ae3f
fix byonm issue
dsherret Oct 23, 2024
1ad5f62
Merge branch 'main' into perf_deno_compile_less_memory
dsherret Oct 23, 2024
fd13339
maybe fix ci
dsherret Oct 24, 2024
1e0dabd
do not store data urls in the binary
dsherret Oct 24, 2024
d7cd10b
switch to le because this is not network
dsherret Oct 24, 2024
c156944
review
dsherret Oct 24, 2024
938c3e0
perf(compile): code cache for initial load
dsherret Oct 24, 2024
0a7c050
use distinct strategies for compile
dsherret Oct 24, 2024
0957e09
use distinct strategies for compile
dsherret Oct 24, 2024
bad329a
tests
dsherret Oct 24, 2024
e9e4ad2
support --no-code-cache
dsherret Oct 24, 2024
8a910fd
lint
dsherret Oct 24, 2024
bc08455
Merge branch 'main' into perf_compile_code_cache
dsherret Oct 24, 2024
815be3a
remove unused use
dsherret Oct 24, 2024
ec0d0c7
fix test
dsherret Oct 24, 2024
1140105
Do not subtract with overflow when deserializing.
dsherret Oct 24, 2024
5b13157
maybe fix test failing because they had the same binary name
dsherret Oct 24, 2024
d360dda
Merge branch 'main' into perf_compile_code_cache
dsherret Nov 18, 2024
488f6dc
update after merge
dsherret Nov 18, 2024
0e38fab
lint
dsherret Nov 18, 2024
b1a784b
Merge branch 'main' into perf_compile_code_cache
dsherret Nov 18, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
perf(compile): code cache for initial load
  • Loading branch information
dsherret committed Oct 24, 2024
commit 938c3e00735fc313988bcd58f3ebddb48908255e
9 changes: 9 additions & 0 deletions cli/standalone/binary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ use indexmap::IndexMap;
use log::Level;
use serde::Deserialize;
use serde::Serialize;
use windows_sys::Wdk::System;

use crate::args::CaData;
use crate::args::CliOptions;
Expand Down Expand Up @@ -171,6 +172,9 @@ pub struct SerializedWorkspaceResolver {
pub struct Metadata {
pub argv: Vec<String>,
pub seed: Option<u64>,
/// A randomly generated value that is used as the cache key for this
/// compilation.
pub cache_key: String,
pub permissions: PermissionFlags,
pub location: Option<Url>,
pub v8_flags: Vec<String>,
Expand Down Expand Up @@ -656,6 +660,11 @@ impl<'a> DenoCompileBinaryWriter<'a> {
let metadata = Metadata {
argv: compile_flags.args.clone(),
seed: cli_options.seed(),
cache_key: std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap()
.as_nanos()
.to_string(),
location: cli_options.location_flag().clone(),
permissions: cli_options.permission_flags().clone(),
v8_flags: cli_options.v8_flags().clone(),
Expand Down
288 changes: 288 additions & 0 deletions cli/standalone/code_cache.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,288 @@
// Copyright 2018-2024 the Deno authors. All rights reserved. MIT license.

use std::collections::BTreeMap;
use std::collections::HashMap;
use std::io::BufReader;
use std::io::BufWriter;
use std::io::Read;
use std::io::Write;
use std::path::Path;
use std::path::PathBuf;

use deno_ast::ModuleSpecifier;
use deno_core::anyhow::bail;
use deno_core::error::AnyError;
use deno_core::parking_lot::Mutex;
use deno_core::unsync::sync::AtomicFlag;
use deno_runtime::code_cache::CodeCache;
use deno_runtime::code_cache::CodeCacheType;

use crate::cache::FastInsecureHasher;
use crate::util::path::get_atomic_file_path;

struct MutableData {
cache: HashMap<String, DenoCompileCodeCacheEntry>,
modified: bool,
add_count: usize,
}

impl MutableData {
fn take_from_cache(
&mut self,
specifier: &ModuleSpecifier,
source_hash: u64,
) -> Option<Vec<u8>> {
let entry = self.cache.remove(specifier.as_str())?;
if entry.source_hash != source_hash {
return None;
}
Some(entry.data)
}

fn take_cache_data(
&mut self,
) -> Option<HashMap<String, DenoCompileCodeCacheEntry>> {
// always purge this from memory
let cache_data = std::mem::take(&mut self.cache);

if !self.modified {
return None;
}
Some(cache_data)
}
}

#[derive(Debug, Clone)]
pub struct DenoCompileCodeCacheEntry {
pub source_hash: u64,
pub data: Vec<u8>,
}

pub struct DenoCompileCodeCache {
cache_key: String,
file_path: PathBuf,
finished: AtomicFlag,
data: Mutex<MutableData>,
}

impl DenoCompileCodeCache {
pub fn new(file_path: PathBuf, cache_key: String) -> Self {
// attempt to deserialize the cache data
let cache = match deserialize(&file_path, &cache_key) {
Ok(cache) => cache,
Err(err) => {
log::debug!("Failed to deserialize code cache: {}", err);
HashMap::new()
}
};

Self {
cache_key,
file_path,
finished: AtomicFlag::lowered(),
data: Mutex::new(MutableData {
cache,
modified: false,
add_count: 0,
}),
}
}

fn write_cache_data(
&self,
cache_data: &HashMap<String, DenoCompileCodeCacheEntry>,
) {
let temp_file = get_atomic_file_path(&self.file_path);
match serialize(&temp_file, &self.cache_key, cache_data) {
Ok(()) => {
if let Err(err) = std::fs::rename(&temp_file, &self.file_path) {
log::debug!("Failed to rename code cache: {}", err);
}
}
Err(err) => {
let _ = std::fs::remove_file(&temp_file);
log::debug!("Failed to serialize code cache: {}", err);
}
}
}
}

impl CodeCache for DenoCompileCodeCache {
fn get_sync(
&self,
specifier: &ModuleSpecifier,
code_cache_type: CodeCacheType,
source_hash: u64,
) -> Option<Vec<u8>> {
if self.finished.is_raised() {
return None;
}
let mut data = self.data.lock();
match data.take_from_cache(specifier, source_hash) {
Some(data) => Some(data),
None => {
data.add_count += 1;
None
}
}
}

fn set_sync(
&self,
specifier: ModuleSpecifier,
code_cache_type: CodeCacheType,
source_hash: u64,
bytes: &[u8],
) {
if self.finished.is_raised() {
return;
}
let data_to_serialize = {
let mut data = self.data.lock();
data.cache.insert(
specifier.to_string(),
DenoCompileCodeCacheEntry {
source_hash,
data: bytes.to_vec(),
},
);
data.modified = true;
if data.add_count != 0 {
data.add_count -= 1;
}
if data.add_count == 0 {
// don't allow using the cache anymore
self.finished.raise();
data.take_cache_data()
} else {
None
}
};
if let Some(cache_data) = &data_to_serialize {
self.write_cache_data(&cache_data);
}
}

fn enabled(&self) -> bool {
!self.finished.is_raised()
}
}

/// File format:
/// - <header>
/// - <cache key>
/// - <u32: number of entries>
/// - <[entry length]> - u64 * number of entries
/// - <[entry]>
/// - <[u8]: entry data>
/// - <String: specifier>
/// - <u32: specifier length>
/// - <u64: source hash>
/// - <u64: entry data hash>
fn serialize(
file_path: &Path,
cache_key: &str,
cache: &HashMap<String, DenoCompileCodeCacheEntry>,
) -> Result<(), AnyError> {
let cache_file = std::fs::OpenOptions::new()
.create(true)
.truncate(true)
.write(true)
.open(file_path)?;
let mut writer = BufWriter::new(cache_file);
// header
writer.write_all(cache_key.as_bytes())?;
writer.write_all(&(cache.len() as u32).to_le_bytes())?;
// lengths of each entry
for (specifier, entry) in cache {
let len: u64 = entry.data.len() as u64 + specifier.len() as u64 + 4 + 8 + 8;
writer.write_all(&len.to_le_bytes())?;
}
// entries
for (specifier, entry) in cache {
writer.write_all(&entry.data)?;
writer.write_all(specifier.as_bytes())?;
writer.write_all(&(specifier.len() as u32).to_le_bytes())?;
writer.write_all(&entry.source_hash.to_le_bytes())?;
let hash: u64 = FastInsecureHasher::new_without_deno_version()
.write(&entry.data)
.finish();
writer.write_all(&hash.to_le_bytes())?;
}

writer.flush()?;

Ok(())
}

fn deserialize(
file_path: &Path,
cache_key: &str,
) -> Result<HashMap<String, DenoCompileCodeCacheEntry>, AnyError> {
let cache_file = std::fs::File::open(file_path)?;
let mut reader = BufReader::new(cache_file);
let mut header_bytes = vec![0; cache_key.len() + 4];
reader.read_exact(&mut header_bytes)?;
if &header_bytes[..cache_key.len()] != cache_key.as_bytes() {
// cache bust
bail!("Cache key mismatch");
}
let len =
u32::from_le_bytes(header_bytes[cache_key.len()..].try_into()?) as usize;
// read the lengths for each entry found in the file
let entry_len_bytes_capacity = len * 8;
let mut entry_len_bytes = Vec::new();
entry_len_bytes.try_reserve(entry_len_bytes_capacity)?;
entry_len_bytes.resize(entry_len_bytes_capacity, 0);
reader.read_exact(&mut entry_len_bytes)?;
let mut lengths = Vec::new();
lengths.try_reserve(len)?;
for i in 0..len {
let pos = i * 8;
lengths.push(
u64::from_le_bytes(entry_len_bytes[pos..pos + 8].try_into()?) as usize,
);
}

let mut map = HashMap::new();
map.try_reserve(len)?;
for len in lengths {
let mut buffer = Vec::new();
buffer.try_reserve(len)?;
buffer.resize(len, 0);

reader.read_exact(&mut buffer)?;
let entry_data_hash_start_pos = buffer.len() - 8;
let expected_entry_data_hash =
u64::from_le_bytes(buffer[entry_data_hash_start_pos..].try_into()?);
let source_hash_start_pos = entry_data_hash_start_pos - 8;
let source_hash = u64::from_le_bytes(
buffer[source_hash_start_pos..entry_data_hash_start_pos].try_into()?,
);
let specifier_end_pos = source_hash_start_pos - 4;
let specifier_len = u32::from_le_bytes(
buffer[specifier_end_pos..source_hash_start_pos].try_into()?,
) as usize;
let specifier_start_pos = specifier_end_pos - specifier_len;
let specifier = String::from_utf8(
buffer[specifier_start_pos..specifier_end_pos].to_vec(),
)?;
buffer.truncate(specifier_start_pos);
let actual_entry_data_hash: u64 =
FastInsecureHasher::new_without_deno_version()
.write(&buffer)
.finish();
if expected_entry_data_hash != actual_entry_data_hash {
bail!("Hash mismatch.")
}
map.insert(
specifier,
DenoCompileCodeCacheEntry {
source_hash,
data: buffer,
},
);
}

Ok(map)
}
Loading