Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

perf(compile): code cache #26528

Merged
merged 25 commits into from
Nov 18, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
7236756
perf(compile): use less memory
dsherret Oct 22, 2024
1928daf
working now with typescript
dsherret Oct 23, 2024
32913e7
Merge branch 'main' into perf_deno_compile_less_memory
dsherret Oct 23, 2024
788f4ab
Tell v8 that something is a string like before.
dsherret Oct 23, 2024
742ae3f
fix byonm issue
dsherret Oct 23, 2024
1ad5f62
Merge branch 'main' into perf_deno_compile_less_memory
dsherret Oct 23, 2024
fd13339
maybe fix ci
dsherret Oct 24, 2024
1e0dabd
do not store data urls in the binary
dsherret Oct 24, 2024
d7cd10b
switch to le because this is not network
dsherret Oct 24, 2024
c156944
review
dsherret Oct 24, 2024
938c3e0
perf(compile): code cache for initial load
dsherret Oct 24, 2024
0a7c050
use distinct strategies for compile
dsherret Oct 24, 2024
0957e09
use distinct strategies for compile
dsherret Oct 24, 2024
bad329a
tests
dsherret Oct 24, 2024
e9e4ad2
support --no-code-cache
dsherret Oct 24, 2024
8a910fd
lint
dsherret Oct 24, 2024
bc08455
Merge branch 'main' into perf_compile_code_cache
dsherret Oct 24, 2024
815be3a
remove unused use
dsherret Oct 24, 2024
ec0d0c7
fix test
dsherret Oct 24, 2024
1140105
Do not subtract with overflow when deserializing.
dsherret Oct 24, 2024
5b13157
maybe fix test failing because they had the same binary name
dsherret Oct 24, 2024
d360dda
Merge branch 'main' into perf_compile_code_cache
dsherret Nov 18, 2024
488f6dc
update after merge
dsherret Nov 18, 2024
0e38fab
lint
dsherret Nov 18, 2024
b1a784b
Merge branch 'main' into perf_compile_code_cache
dsherret Nov 18, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
support --no-code-cache
  • Loading branch information
dsherret committed Oct 24, 2024
commit e9e4ad2bbac6ca467b2ab29e8eaa54250dea2dc7
3 changes: 3 additions & 0 deletions cli/args/flags.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1921,6 +1921,7 @@ On the first invocation with deno will download the proper binary and cache it i
])
.help_heading(COMPILE_HEADING),
)
.arg(no_code_cache_arg())
.arg(
Arg::new("no-terminal")
.long("no-terminal")
Expand Down Expand Up @@ -4412,6 +4413,8 @@ fn compile_parse(
};
ext_arg_parse(flags, matches);

flags.code_cache_enabled = !matches.get_flag("no-code-cache");

flags.subcommand = DenoSubcommand::Compile(CompileFlags {
source_file,
output,
Expand Down
18 changes: 12 additions & 6 deletions cli/standalone/binary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,7 @@ pub struct SerializedWorkspaceResolver {
pub struct Metadata {
pub argv: Vec<String>,
pub seed: Option<u64>,
pub code_cache_key: u64,
pub code_cache_key: Option<u64>,
pub permissions: PermissionFlags,
pub location: Option<Url>,
pub v8_flags: Vec<String>,
Expand Down Expand Up @@ -596,14 +596,20 @@ impl<'a> DenoCompileBinaryWriter<'a> {
VfsBuilder::new(root_path.clone())?
};
let mut remote_modules_store = RemoteModulesStoreBuilder::default();
let mut code_cache_key_hasher = FastInsecureHasher::new_deno_versioned();
let mut code_cache_key_hasher = if cli_options.code_cache_enabled() {
Some(FastInsecureHasher::new_deno_versioned())
} else {
None
};
for module in graph.modules() {
if module.specifier().scheme() == "data" {
continue; // don't store data urls as an entry as they're in the code
}
if let Some(source) = module.source() {
code_cache_key_hasher.write(module.specifier().as_str().as_bytes());
code_cache_key_hasher.write(source.as_bytes());
if let Some(hasher) = &mut code_cache_key_hasher {
if let Some(source) = module.source() {
hasher.write(module.specifier().as_str().as_bytes());
hasher.write(source.as_bytes());
}
}
let (maybe_source, media_type) = match module {
deno_graph::Module::Js(m) => {
Expand Down Expand Up @@ -664,7 +670,7 @@ impl<'a> DenoCompileBinaryWriter<'a> {
let metadata = Metadata {
argv: compile_flags.args.clone(),
seed: cli_options.seed(),
code_cache_key: code_cache_key_hasher.finish(),
code_cache_key: code_cache_key_hasher.map(|h| h.finish()),
location: cli_options.location_flag().clone(),
permissions: cli_options.permission_flags().clone(),
v8_flags: cli_options.v8_flags().clone(),
Expand Down
66 changes: 49 additions & 17 deletions cli/standalone/code_cache.rs
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ impl CodeCache for DenoCompileCodeCache {
fn get_sync(
&self,
specifier: &ModuleSpecifier,
_code_cache_type: CodeCacheType,
code_cache_type: CodeCacheType,
source_hash: u64,
) -> Option<Vec<u8>> {
match &self.strategy {
Expand All @@ -91,15 +91,15 @@ impl CodeCache for DenoCompileCodeCache {
if strategy.is_finished.is_raised() {
return None;
}
strategy.take_from_cache(specifier, source_hash)
strategy.take_from_cache(specifier, code_cache_type, source_hash)
}
}
}

fn set_sync(
&self,
specifier: ModuleSpecifier,
_code_cache_type: CodeCacheType,
code_cache_type: CodeCacheType,
source_hash: u64,
bytes: &[u8],
) {
Expand All @@ -112,7 +112,7 @@ impl CodeCache for DenoCompileCodeCache {
let data_to_serialize = {
let mut data = strategy.data.lock();
data.cache.insert(
specifier.to_string(),
(specifier.to_string(), code_cache_type),
DenoCompileCodeCacheEntry {
source_hash,
data: bytes.to_vec(),
Expand Down Expand Up @@ -161,8 +161,10 @@ impl CliCodeCache for DenoCompileCodeCache {
}
}

type CodeCacheKey = (String, CodeCacheType);

struct FirstRunCodeCacheData {
cache: HashMap<String, DenoCompileCodeCacheEntry>,
cache: HashMap<CodeCacheKey, DenoCompileCodeCacheEntry>,
add_count: usize,
}

Expand All @@ -176,7 +178,7 @@ struct FirstRunCodeCacheStrategy {
impl FirstRunCodeCacheStrategy {
fn write_cache_data(
&self,
cache_data: &HashMap<String, DenoCompileCodeCacheEntry>,
cache_data: &HashMap<CodeCacheKey, DenoCompileCodeCacheEntry>,
) {
let count = cache_data.len();
let temp_file = get_atomic_file_path(&self.file_path);
Expand All @@ -198,20 +200,25 @@ impl FirstRunCodeCacheStrategy {

struct SubsequentRunCodeCacheStrategy {
is_finished: AtomicFlag,
data: Mutex<HashMap<String, DenoCompileCodeCacheEntry>>,
data: Mutex<HashMap<CodeCacheKey, DenoCompileCodeCacheEntry>>,
}

impl SubsequentRunCodeCacheStrategy {
fn take_from_cache(
&self,
specifier: &ModuleSpecifier,
code_cache_type: CodeCacheType,
source_hash: u64,
) -> Option<Vec<u8>> {
let mut data = self.data.lock();
let entry = data.remove(specifier.as_str())?;
// todo(dsherret): how to avoid the clone here?
let entry = data.remove(&(specifier.to_string(), code_cache_type))?;
if entry.source_hash != source_hash {
return None;
}
if data.is_empty() {
self.is_finished.raise();
}
Some(entry.data)
}
}
Expand All @@ -224,13 +231,14 @@ impl SubsequentRunCodeCacheStrategy {
/// - <[entry]>
/// - <[u8]: entry data>
/// - <String: specifier>
/// - <u8>: code cache type
/// - <u32: specifier length>
/// - <u64: source hash>
/// - <u64: entry data hash>
fn serialize(
file_path: &Path,
cache_key: u64,
cache: &HashMap<String, DenoCompileCodeCacheEntry>,
cache: &HashMap<CodeCacheKey, DenoCompileCodeCacheEntry>,
) -> Result<(), AnyError> {
let cache_file = std::fs::OpenOptions::new()
.create(true)
Expand All @@ -242,13 +250,18 @@ fn serialize(
writer.write_all(&cache_key.to_le_bytes())?;
writer.write_all(&(cache.len() as u32).to_le_bytes())?;
// lengths of each entry
for (specifier, entry) in cache {
let len: u64 = entry.data.len() as u64 + specifier.len() as u64 + 4 + 8 + 8;
for ((specifier, _), entry) in cache {
let len: u64 =
entry.data.len() as u64 + specifier.len() as u64 + 1 + 4 + 8 + 8;
writer.write_all(&len.to_le_bytes())?;
}
// entries
for (specifier, entry) in cache {
for ((specifier, code_cache_type), entry) in cache {
writer.write_all(&entry.data)?;
writer.write_all(&[match code_cache_type {
CodeCacheType::EsModule => 0,
CodeCacheType::Script => 1,
}])?;
writer.write_all(specifier.as_bytes())?;
writer.write_all(&(specifier.len() as u32).to_le_bytes())?;
writer.write_all(&entry.source_hash.to_le_bytes())?;
Expand All @@ -266,7 +279,7 @@ fn serialize(
fn deserialize(
file_path: &Path,
expected_cache_key: u64,
) -> Result<HashMap<String, DenoCompileCodeCacheEntry>, AnyError> {
) -> Result<HashMap<CodeCacheKey, DenoCompileCodeCacheEntry>, AnyError> {
// it's very important to use this below so that a corrupt cache file
// doesn't cause a memory allocation error
fn new_vec_sized<T: Clone>(
Expand Down Expand Up @@ -322,7 +335,13 @@ fn deserialize(
let specifier = String::from_utf8(
buffer[specifier_start_pos..specifier_end_pos].to_vec(),
)?;
buffer.truncate(specifier_start_pos);
let code_cache_type_pos = specifier_start_pos - 1;
let code_cache_type = match buffer[code_cache_type_pos] {
0 => CodeCacheType::EsModule,
1 => CodeCacheType::Script,
_ => bail!("Invalid code cache type"),
};
buffer.truncate(code_cache_type_pos);
let actual_entry_data_hash: u64 =
FastInsecureHasher::new_without_deno_version()
.write(&buffer)
Expand All @@ -331,7 +350,7 @@ fn deserialize(
bail!("Hash mismatch.")
}
map.insert(
specifier,
(specifier, code_cache_type),
DenoCompileCodeCacheEntry {
source_hash,
data: buffer,
Expand All @@ -356,19 +375,26 @@ mod test {
let cache = {
let mut cache = HashMap::new();
cache.insert(
"specifier1".to_string(),
("specifier1".to_string(), CodeCacheType::EsModule),
DenoCompileCodeCacheEntry {
source_hash: 1,
data: vec![1, 2, 3],
},
);
cache.insert(
"specifier2".to_string(),
("specifier2".to_string(), CodeCacheType::EsModule),
DenoCompileCodeCacheEntry {
source_hash: 2,
data: vec![4, 5, 6],
},
);
cache.insert(
("specifier2".to_string(), CodeCacheType::Script),
DenoCompileCodeCacheEntry {
source_hash: 2,
data: vec![6, 5, 1],
},
);
cache
};
let file_path = temp_dir.path().join("cache.bin").to_path_buf();
Expand Down Expand Up @@ -412,20 +438,26 @@ mod test {
assert!(code_cache
.get_sync(&url2, CodeCacheType::EsModule, 1)
.is_none());
assert!(code_cache.enabled());
code_cache.set_sync(url1.clone(), CodeCacheType::EsModule, 0, &[1, 2, 3]);
assert!(code_cache.enabled());
assert!(!file_path.exists());
code_cache.set_sync(url2.clone(), CodeCacheType::EsModule, 1, &[2, 1, 3]);
assert!(file_path.exists()); // now the new code cache exists
assert!(!code_cache.enabled()); // no longer enabled
}
// second run
{
let code_cache = DenoCompileCodeCache::new(file_path.clone(), 1234);
assert!(code_cache.enabled());
let result1 = code_cache
.get_sync(&url1, CodeCacheType::EsModule, 0)
.unwrap();
assert!(code_cache.enabled());
let result2 = code_cache
.get_sync(&url2, CodeCacheType::EsModule, 1)
.unwrap();
assert!(!code_cache.enabled()); // no longer enabled
assert_eq!(result1, vec![1, 2, 3]);
assert_eq!(result2, vec![2, 1, 3]);
}
Expand Down
47 changes: 29 additions & 18 deletions cli/standalone/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ struct SharedModuleLoaderState {
workspace_resolver: WorkspaceResolver,
node_resolver: Arc<CliNodeResolver>,
npm_module_loader: Arc<NpmModuleLoader>,
code_cache: Arc<dyn CliCodeCache>,
code_cache: Option<Arc<dyn CliCodeCache>>,
}

impl SharedModuleLoaderState {
Expand All @@ -110,14 +110,17 @@ impl SharedModuleLoaderState {
specifier: &ModuleSpecifier,
source: &[u8],
) -> Option<SourceCodeCacheInfo> {
if !self.code_cache.enabled() {
let Some(code_cache) = &self.code_cache else {
return None;
};
if !code_cache.enabled() {
return None;
}
// deno version is already included in the root cache key
let hash = FastInsecureHasher::new_without_deno_version()
.write_hashable(source)
.finish();
let data = self.code_cache.get_sync(
let data = code_cache.get_sync(
specifier,
deno_runtime::code_cache::CodeCacheType::EsModule,
hash,
Expand Down Expand Up @@ -382,14 +385,16 @@ impl ModuleLoader for EmbeddedModuleLoader {
&self,
specifier: ModuleSpecifier,
source_hash: u64,
code_cache: &[u8],
code_cache_data: &[u8],
) -> LocalBoxFuture<'static, ()> {
self.shared.code_cache.set_sync(
specifier,
deno_runtime::code_cache::CodeCacheType::EsModule,
source_hash,
code_cache,
);
if let Some(code_cache) = &self.shared.code_cache {
code_cache.set_sync(
specifier,
deno_runtime::code_cache::CodeCacheType::EsModule,
source_hash,
code_cache_data,
);
}
std::future::ready(()).boxed_local()
}
}
Expand Down Expand Up @@ -615,13 +620,19 @@ pub async fn run(data: StandaloneData) -> Result<i32, AnyError> {
metadata.workspace_resolver.pkg_json_resolution,
)
};
let code_cache = Arc::new(DenoCompileCodeCache::new(
root_path.with_file_name(format!(
"{}.cache",
root_path.file_name().unwrap().to_string_lossy()
)),
metadata.code_cache_key,
));
let code_cache = match metadata.code_cache_key {
Some(code_cache_key) => Some(Arc::new(DenoCompileCodeCache::new(
root_path.with_file_name(format!(
"{}.cache",
root_path.file_name().unwrap().to_string_lossy()
)),
code_cache_key,
)) as Arc<dyn CliCodeCache>),
None => {
log::debug!("Code cache disabled.");
None
}
};
let module_loader_factory = StandaloneModuleLoaderFactory {
shared: Arc::new(SharedModuleLoaderState {
modules,
Expand Down Expand Up @@ -673,7 +684,7 @@ pub async fn run(data: StandaloneData) -> Result<i32, AnyError> {
let worker_factory = CliMainWorkerFactory::new(
Arc::new(BlobStore::default()),
cjs_resolutions,
Some(code_cache),
code_cache,
feature_checker,
fs,
None,
Expand Down
1 change: 1 addition & 0 deletions runtime/code_cache.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

use deno_core::ModuleSpecifier;

#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum CodeCacheType {
EsModule,
Script,
Expand Down
22 changes: 22 additions & 0 deletions tests/specs/compile/no_code_cache/__test__.jsonc
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
{
"tempDir": true,
"steps": [{
"if": "unix",
"args": "compile --output main --no-code-cache --log-level=debug main.ts",
"output": "[WILDCARD]"
}, {
"if": "unix",
"commandName": "./main",
"args": [],
"output": "main.out"
}, {
"if": "windows",
"args": "compile --output main.exe --no-code-cache --log-level=debug main.ts",
"output": "[WILDCARD]"
}, {
"if": "windows",
"commandName": "./main.exe",
"args": [],
"output": "main.out"
}]
}
1 change: 1 addition & 0 deletions tests/specs/compile/no_code_cache/main.out
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[WILDCARD]Code cache disabled.[WILDCARD]
3 changes: 3 additions & 0 deletions tests/specs/compile/no_code_cache/main.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
import { join } from "jsr:@std/[email protected]/join";

console.log(join);