-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathstringDetector.ts
More file actions
141 lines (120 loc) · 3.88 KB
/
Copy pathstringDetector.ts
File metadata and controls
141 lines (120 loc) · 3.88 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
import * as vscode from 'vscode';
import { Logger } from './logger';
export interface DetectedString {
text: string;
line: number;
startChar: number;
endChar: number;
quoteType: string;
languageId: string;
}
export class StringDetector {
private detectedStrings: DetectedString[] = [];
public detectionTimeout: NodeJS.Timeout | undefined;
constructor(private logger: Logger) {}
async detectStrings(document: vscode.TextDocument): Promise<DetectedString[]> {
this.detectedStrings = [];
this.logger.log(`Detecting strings in ${document.fileName}`);
try {
const text = document.getText();
const lines = text.split('\n');
for (let lineIndex = 0; lineIndex < lines.length; lineIndex++) {
const line = lines[lineIndex];
const stringsInLine = this.extractStringsFromLine(line, lineIndex, document.languageId);
this.detectedStrings.push(...stringsInLine);
}
this.logger.log(`Detected ${this.detectedStrings.length} strings`);
return this.detectedStrings;
} catch (error) {
this.logger.error('Error detecting strings', error);
return [];
}
}
private extractStringsFromLine(line: string, lineIndex: number, languageId: string): DetectedString[] {
const strings: DetectedString[] = [];
const patterns = this.getStringPatterns(languageId);
for (const pattern of patterns) {
let match;
while ((match = pattern.regex.exec(line)) !== null) {
const fullMatch = match[0];
const stringContent = match[1] || match[2] || match[3]; // Different capture groups for different quote types
// Skip empty strings or very short strings
if (!stringContent || stringContent.trim().length < 2) {
continue;
}
// Skip strings that look like code (contain special characters)
if (this.looksLikeCode(stringContent)) {
continue;
}
strings.push({
text: stringContent,
line: lineIndex,
startChar: match.index,
endChar: match.index + fullMatch.length,
quoteType: pattern.name,
languageId
});
// Reset regex lastIndex to avoid infinite loops with global flag
if (!pattern.regex.global) {
break;
}
}
// Reset regex for next iteration
pattern.regex.lastIndex = 0;
}
return strings;
}
private getStringPatterns(languageId: string): Array<{name: string, regex: RegExp}> {
const patterns = [
// Double quotes
{ name: 'double', regex: /"([^"\\]*(\\.[^"\\]*)*)"/g },
// Single quotes
{ name: 'single', regex: /'([^'\\]*(\\.[^'\\]*)*)'/g }
];
// Add language-specific patterns
switch (languageId) {
case 'javascript':
case 'typescript':
case 'javascriptreact':
case 'typescriptreact':
// Template literals
patterns.push({ name: 'template', regex: /`([^`\\]*(\\.[^`\\]*)*)`/g });
break;
case 'python':
// Triple quotes
patterns.push({ name: 'triple-double', regex: /"""([\s\S]*?)"""/g });
patterns.push({ name: 'triple-single', regex: /'''([\s\S]*?)'''/g });
break;
case 'csharp':
case 'fsharp':
// Verbatim strings
patterns.push({ name: 'verbatim', regex: /@"([^"]*(?:""[^"]*)*)"/g });
break;
case 'php':
// Heredoc and Nowdoc patterns could be added here
break;
}
return patterns;
}
private looksLikeCode(text: string): boolean {
// Skip strings that contain mostly code-like patterns
const codePatterns = [
/^[a-zA-Z_][a-zA-Z0-9_]*$/, // Single identifier
/^\d+$/, // Just numbers
/^[\w\-\.]+\.[a-zA-Z]{2,4}$/, // File extensions or URLs
/^[#@$][\w\-]+/, // CSS selectors, variables
/[{}\[\]();,]/, // Code punctuation
/^\s*$/, // Empty or whitespace only
/^[\w\s]*:\s*[\w\s]*$/, // Key-value pairs
/^\w+\(.*\)$/, // Function calls
/^[<>]=?|[!=]=|[&|]{2}/, // Operators
];
return codePatterns.some(pattern => pattern.test(text));
}
getDetectedStrings(): DetectedString[] {
return this.detectedStrings;
}
clearDetectedStrings(): void {
this.detectedStrings = [];
}
}