Skip to content

perf(lsp): Cache semantic tokens for open documents #23799

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
May 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions cli/lsp/documents.rs
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,16 @@ impl AssetOrDocument {
}
}

pub fn maybe_semantic_tokens(&self) -> Option<lsp::SemanticTokens> {
match self {
AssetOrDocument::Asset(_) => None,
AssetOrDocument::Document(d) => d
.open_data
.as_ref()
.and_then(|d| d.maybe_semantic_tokens.lock().clone()),
}
}

pub fn text(&self) -> Arc<str> {
match self {
AssetOrDocument::Asset(a) => a.text(),
Expand Down Expand Up @@ -252,6 +262,7 @@ fn get_maybe_test_module_fut(
pub struct DocumentOpenData {
lsp_version: i32,
maybe_parsed_source: Option<ParsedSourceResult>,
maybe_semantic_tokens: Arc<Mutex<Option<lsp::SemanticTokens>>>,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Might be nice to add something like CloneCell<T: Send + Clone> to cli::util::sync that has methods like set(value: T) and get_cloned(&self): T in order to bury the mutex there.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That said, should this use something like OnceCell instead? I guess no because async.

}

#[derive(Debug)]
Expand Down Expand Up @@ -333,6 +344,7 @@ impl Document {
open_data: maybe_lsp_version.map(|v| DocumentOpenData {
lsp_version: v,
maybe_parsed_source,
maybe_semantic_tokens: Default::default(),
}),
resolver,
specifier,
Expand Down Expand Up @@ -424,6 +436,8 @@ impl Document {
open_data: self.open_data.as_ref().map(|d| DocumentOpenData {
lsp_version: d.lsp_version,
maybe_parsed_source,
// reset semantic tokens
maybe_semantic_tokens: Default::default(),
}),
resolver,
specifier: self.specifier.clone(),
Expand Down Expand Up @@ -502,6 +516,7 @@ impl Document {
open_data: self.open_data.is_some().then_some(DocumentOpenData {
lsp_version: version,
maybe_parsed_source,
maybe_semantic_tokens: Default::default(),
}),
resolver: self.resolver.clone(),
}))
Expand Down Expand Up @@ -655,6 +670,15 @@ impl Document {
) {
*self.maybe_navigation_tree.lock() = Some(navigation_tree);
}

pub fn cache_semantic_tokens_full(
&self,
semantic_tokens: lsp::SemanticTokens,
) {
if let Some(open_data) = self.open_data.as_ref() {
*open_data.maybe_semantic_tokens.lock() = Some(semantic_tokens);
}
}
}

fn resolve_media_type(
Expand Down
27 changes: 27 additions & 0 deletions cli/lsp/language_server.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2529,6 +2529,16 @@ impl Inner {
.performance
.mark_with_args("lsp.semantic_tokens_full", &params);
let asset_or_doc = self.get_asset_or_document(&specifier)?;
if let Some(tokens) = asset_or_doc.maybe_semantic_tokens() {
let response = if !tokens.data.is_empty() {
Some(SemanticTokensResult::Tokens(tokens.clone()))
} else {
None
};
self.performance.measure(mark);
return Ok(response);
}

let line_index = asset_or_doc.line_index();

let semantic_classification = self
Expand All @@ -2542,6 +2552,11 @@ impl Inner {

let semantic_tokens =
semantic_classification.to_semantic_tokens(line_index)?;

if let Some(doc) = asset_or_doc.document() {
doc.cache_semantic_tokens_full(semantic_tokens.clone());
}

let response = if !semantic_tokens.data.is_empty() {
Some(SemanticTokensResult::Tokens(semantic_tokens))
} else {
Expand All @@ -2566,6 +2581,18 @@ impl Inner {
.performance
.mark_with_args("lsp.semantic_tokens_range", &params);
let asset_or_doc = self.get_asset_or_document(&specifier)?;
if let Some(tokens) = asset_or_doc.maybe_semantic_tokens() {
let tokens =
super::semantic_tokens::tokens_within_range(&tokens, params.range);
let response = if !tokens.data.is_empty() {
Some(SemanticTokensRangeResult::Tokens(tokens))
} else {
None
};
self.performance.measure(mark);
return Ok(response);
}

let line_index = asset_or_doc.line_index();

let semantic_classification = self
Expand Down
174 changes: 174 additions & 0 deletions cli/lsp/semantic_tokens.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

use std::ops::Index;
use std::ops::IndexMut;
use tower_lsp::lsp_types as lsp;
use tower_lsp::lsp_types::SemanticToken;
use tower_lsp::lsp_types::SemanticTokenModifier;
use tower_lsp::lsp_types::SemanticTokenType;
Expand Down Expand Up @@ -247,6 +248,54 @@ impl SemanticTokensBuilder {
}
}

pub fn tokens_within_range(
tokens: &SemanticTokens,
range: lsp::Range,
) -> SemanticTokens {
let mut line = 0;
let mut character = 0;

let mut first_token_line = 0;
let mut first_token_char = 0;
let mut keep_start_idx = tokens.data.len();
let mut keep_end_idx = keep_start_idx;
for (i, token) in tokens.data.iter().enumerate() {
if token.delta_line != 0 {
character = 0;
}
line += token.delta_line;
character += token.delta_start;
let token_start = lsp::Position::new(line, character);
if i < keep_start_idx && token_start >= range.start {
keep_start_idx = i;
first_token_line = line;
first_token_char = character;
}
if token_start > range.end {
keep_end_idx = i;
break;
}
}
if keep_end_idx == keep_start_idx {
return SemanticTokens {
result_id: None,
data: Vec::new(),
};
}

let mut data = tokens.data[keep_start_idx..keep_end_idx].to_vec();
// we need to adjust the delta_line and delta_start on the first token
// as it is relative to 0 now, not the previous token
let first_token = &mut data[0];
first_token.delta_line = first_token_line;
first_token.delta_start = first_token_char;

SemanticTokens {
result_id: None,
data,
}
}

#[cfg(test)]
mod tests {
use super::*;
Expand Down Expand Up @@ -352,4 +401,129 @@ mod tests {
]
);
}

#[test]
fn test_tokens_within_range() {
let mut builder = SemanticTokensBuilder::new();
builder.push(1, 0, 5, 0, 0);
builder.push(2, 1, 1, 1, 0);
builder.push(2, 2, 3, 2, 0);
builder.push(2, 5, 5, 3, 0);
builder.push(3, 0, 4, 4, 0);
builder.push(5, 2, 3, 5, 0);
let tokens = builder.build(None);
let range = lsp::Range {
start: lsp::Position {
line: 2,
character: 2,
},
end: lsp::Position {
line: 4,
character: 0,
},
};

let result = tokens_within_range(&tokens, range);

assert_eq!(
result.data,
vec![
// line 2 char 2
SemanticToken {
delta_line: 2,
delta_start: 2,
length: 3,
token_type: 2,
token_modifiers_bitset: 0
},
// line 2 char 5
SemanticToken {
delta_line: 0,
delta_start: 3,
length: 5,
token_type: 3,
token_modifiers_bitset: 0
},
// line 3 char 0
SemanticToken {
delta_line: 1,
delta_start: 0,
length: 4,
token_type: 4,
token_modifiers_bitset: 0
}
]
);
}

#[test]
fn test_tokens_within_range_include_end() {
let mut builder = SemanticTokensBuilder::new();
builder.push(1, 0, 1, 0, 0);
builder.push(2, 1, 2, 1, 0);
builder.push(2, 3, 3, 2, 0);
builder.push(3, 0, 4, 3, 0);
let tokens = builder.build(None);
let range = lsp::Range {
start: lsp::Position {
line: 2,
character: 2,
},
end: lsp::Position {
line: 3,
character: 4,
},
};
let result = tokens_within_range(&tokens, range);

assert_eq!(
result.data,
vec![
// line 2 char 3
SemanticToken {
delta_line: 2,
delta_start: 3,
length: 3,
token_type: 2,
token_modifiers_bitset: 0
},
// line 3 char 0
SemanticToken {
delta_line: 1,
delta_start: 0,
length: 4,
token_type: 3,
token_modifiers_bitset: 0
}
]
);
}

#[test]
fn test_tokens_within_range_empty() {
let mut builder = SemanticTokensBuilder::new();
builder.push(1, 0, 1, 0, 0);
builder.push(2, 1, 2, 1, 0);
builder.push(2, 3, 3, 2, 0);
builder.push(3, 0, 4, 3, 0);
let tokens = builder.build(None);
let range = lsp::Range {
start: lsp::Position {
line: 3,
character: 2,
},
end: lsp::Position {
line: 3,
character: 4,
},
};
let result = tokens_within_range(&tokens, range);

assert_eq!(result.data, vec![]);

assert_eq!(
tokens_within_range(&SemanticTokens::default(), range).data,
vec![]
);
}
}
84 changes: 84 additions & 0 deletions tests/integration/lsp_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12591,3 +12591,87 @@ fn lsp_ts_code_fix_any_param() {

panic!("failed to find 'Infer parameter types from usage' fix in fixes: {fixes:#?}");
}

#[test]
fn lsp_semantic_token_caching() {
let context = TestContextBuilder::new().use_temp_cwd().build();
let temp_dir = context.temp_dir().path();

let mut client: LspClient = context
.new_lsp_command()
.collect_perf()
.set_root_dir(temp_dir.clone())
.build();
client.initialize_default();

let a = source_file(
temp_dir.join("a.ts"),
r#"
export const a = 1;
export const b = 2;
export const bar = () => "bar";
function foo(fun: (number, number, number) => number, c: number) {
const double = (x) => x * 2;
return fun(double(a), b, c);
}"#,
);

client.did_open_file(&a);

// requesting a range won't cache the tokens, so this will
// be computed
let res = client.write_request(
"textDocument/semanticTokens/range",
json!({
"textDocument": a.identifier(),
"range": {
"start": a.range_of("const bar").start,
"end": a.range_of("}").end,
}
}),
);

assert_eq!(
client
.perf()
.measure_count("tsc.request.getEncodedSemanticClassifications"),
1,
);

// requesting for the full doc should compute and cache the tokens
let _full = client.write_request(
"textDocument/semanticTokens/full",
json!({
"textDocument": a.identifier(),
}),
);

assert_eq!(
client
.perf()
.measure_count("tsc.request.getEncodedSemanticClassifications"),
2,
);

// use the cached tokens
let res_cached = client.write_request(
"textDocument/semanticTokens/range",
json!({
"textDocument": a.identifier(),
"range": {
"start": a.range_of("const bar").start,
"end": a.range_of("}").end,
}
}),
);

// make sure we actually used the cache
assert_eq!(
client
.perf()
.measure_count("tsc.request.getEncodedSemanticClassifications"),
2,
);

assert_eq!(res, res_cached);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How are we integration testing whether or not a cache is being used? Shouldn't the result be the same?

Copy link
Member Author

@nathanwhit nathanwhit May 15, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added some code to the test client to collect performance markers, so we can assert on how many times tsc.request.getEncodedSemanticClassifications has been called

}
Loading