Skip to content

Commit 48cd541

Browse files
committed
Python: Support template strings in rest of extractor
Adds three new AST nodes to the mix: - `TemplateString` represents a t-string in Python 3.14 - `TemplateStringPart` represents one of the string constituents of a t-string. (The interpolated expressions are represented as `Expr` nodes, just like f-strings.) - `JoinedTemplateString` represents an implicit concatenation of template strings. Importantly, we _completely avoid_ the complicated construction we currently do for format strings (as well as the confusing nomenclature). No extra injection of empty strings (so that a template string is a strict alternation of strings and expressions). A `JoinedTemplateString` simply has a list of template string children, and a `TemplateString` has a list of "values" which may be either `Expr` or `TemplateStringPart` nodes. If we ever find that we actually want the more complicated interface for these strings, then I would much rather we reconstruct this inside of QL rather than in the parser.
1 parent 5928d0f commit 48cd541

File tree

7 files changed

+323
-4
lines changed

7 files changed

+323
-4
lines changed

python/extractor/semmle/python/ast.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,15 @@ def __init__(self, prefix, text, s):
5656
self.text = text
5757
self.s = s
5858

59+
class TemplateStringPart(AstBase):
60+
'''A string constituent of a template string literal'''
61+
62+
__slots__ = "text", "s",
63+
64+
def __init__(self, text, s):
65+
self.text = text
66+
self.s = s
67+
5968
class alias(AstBase):
6069
__slots__ = "value", "asname",
6170

@@ -356,6 +365,19 @@ class JoinedStr(expr):
356365
def __init__(self, values):
357366
self.values = values
358367

368+
class TemplateString(expr):
369+
__slots__ = "prefix", "values",
370+
371+
def __init__(self, prefix, values):
372+
self.prefix = prefix
373+
self.values = values
374+
375+
class JoinedTemplateString(expr):
376+
__slots__ = "strings",
377+
378+
def __init__(self, strings):
379+
self.strings = strings
380+
359381

360382
class Lambda(expr):
361383
__slots__ = "args", "inner_scope",

python/extractor/semmle/python/master.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -186,12 +186,20 @@
186186

187187
FormattedValue = ClassNode("FormattedValue", expr, descriptive_name='formatted value')
188188

189+
189190
AnnAssign = ClassNode("AnnAssign", stmt, descriptive_name='annotated assignment')
190191

191192
AssignExpr = ClassNode('AssignExpr', expr, "assignment expression")
192193

193194
SpecialOperation = ClassNode('SpecialOperation', expr, "special operation")
194195

196+
TemplateString = ClassNode('TemplateString', expr, 'template string literal')
197+
198+
template_string_list = ListNode(TemplateString)
199+
200+
JoinedTemplateString = ClassNode("JoinedTemplateString", expr, descriptive_name='joined template string')
201+
TemplateStringPart = ClassNode('TemplateStringPart', expr, "string part of a template string")
202+
195203
type_parameter = ClassNode('type_parameter', descriptive_name='type parameter')
196204
type_parameter.field('location', location)
197205
type_parameter_list = ListNode(type_parameter)
@@ -435,6 +443,9 @@
435443
Subscript.field('index', expr)
436444
Subscript.field('ctx', expr_context, 'context')
437445

446+
TemplateString.field('prefix', string, 'prefix')
447+
TemplateString.field('values', expr_list, 'values')
448+
438449
Try.field('body', stmt_list)
439450
Try.field('orelse', stmt_list, 'else block')
440451
Try.field('handlers', stmt_list, 'exception handlers')
@@ -484,10 +495,15 @@
484495
StringPart.field('text', string)
485496
StringPart.field('location', location)
486497

498+
TemplateStringPart.field('text', string)
499+
500+
487501
Await.field('value', expr, 'expression waited upon')
488502

489503
FormattedStringLiteral.field('values', expr_list)
490504

505+
JoinedTemplateString.field('strings', template_string_list)
506+
491507
FormattedValue.field('value', expr, "expression to be formatted")
492508
FormattedValue.field('conversion', string, 'type conversion')
493509
FormattedValue.field('format_spec', FormattedStringLiteral, 'format specifier')

python/extractor/semmle/python/parser/tsg_parser.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -273,6 +273,8 @@ def get_location_info(attrs):
273273
ast.Print: ("values",),
274274
ast.Set: ("elts",),
275275
ast.Str: ("implicitly_concatenated_parts",),
276+
ast.TemplateString: ("values",),
277+
ast.JoinedTemplateString: ("strings",),
276278
ast.TypeAlias: ("type_parameters",),
277279
ast.Try: ("body", "handlers", "orelse", "finalbody"),
278280
ast.Tuple: ("elts",),
Lines changed: 194 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,194 @@
1+
Module: [1, 0] - [18, 0]
2+
body: [
3+
Assign: [1, 0] - [1, 14]
4+
targets: [
5+
Name: [1, 0] - [1, 4]
6+
variable: Variable('name', None)
7+
ctx: Store
8+
]
9+
value:
10+
Str: [1, 7] - [1, 14]
11+
s: 'World'
12+
prefix: '"'
13+
implicitly_concatenated_parts: None
14+
Assign: [2, 0] - [2, 15]
15+
targets: [
16+
Name: [2, 0] - [2, 5]
17+
variable: Variable('value', None)
18+
ctx: Store
19+
]
20+
value:
21+
Num: [2, 8] - [2, 15]
22+
n: 42.5678
23+
text: '42.5678'
24+
Assign: [3, 0] - [3, 15]
25+
targets: [
26+
Name: [3, 0] - [3, 5]
27+
variable: Variable('first', None)
28+
ctx: Store
29+
]
30+
value:
31+
Str: [3, 8] - [3, 15]
32+
s: 'first'
33+
prefix: '"'
34+
implicitly_concatenated_parts: None
35+
Assign: [4, 0] - [4, 17]
36+
targets: [
37+
Name: [4, 0] - [4, 6]
38+
variable: Variable('second', None)
39+
ctx: Store
40+
]
41+
value:
42+
Str: [4, 9] - [4, 17]
43+
s: 'second'
44+
prefix: '"'
45+
implicitly_concatenated_parts: None
46+
If: [6, 0] - [6, 5]
47+
test:
48+
Num: [6, 3] - [6, 4]
49+
n: 1
50+
text: '1'
51+
body: [
52+
Expr: [7, 4] - [7, 7]
53+
value:
54+
TemplateString: [7, 4] - [7, 7]
55+
prefix: 't"'
56+
values: []
57+
]
58+
orelse: None
59+
If: [8, 0] - [8, 5]
60+
test:
61+
Num: [8, 3] - [8, 4]
62+
n: 2
63+
text: '2'
64+
body: [
65+
Expr: [9, 4] - [9, 21]
66+
value:
67+
TemplateString: [9, 4] - [9, 21]
68+
prefix: 't"'
69+
values: [
70+
TemplateStringPart: [9, 6] - [9, 13]
71+
text: '"Hello, "'
72+
s: 'Hello, '
73+
Name: [9, 14] - [9, 18]
74+
variable: Variable('name', None)
75+
ctx: Load
76+
TemplateStringPart: [9, 19] - [9, 20]
77+
text: '"!"'
78+
s: '!'
79+
]
80+
]
81+
orelse: None
82+
If: [10, 0] - [10, 5]
83+
test:
84+
Num: [10, 3] - [10, 4]
85+
n: 3
86+
text: '3'
87+
body: [
88+
Expr: [11, 4] - [11, 42]
89+
value:
90+
TemplateString: [11, 4] - [11, 42]
91+
prefix: 't"'
92+
values: [
93+
TemplateStringPart: [11, 6] - [11, 13]
94+
text: '"Value: "'
95+
s: 'Value: '
96+
Name: [11, 14] - [11, 19]
97+
variable: Variable('value', None)
98+
ctx: Load
99+
TemplateStringPart: [11, 24] - [11, 31]
100+
text: '", Hex: "'
101+
s: ', Hex: '
102+
Name: [11, 32] - [11, 37]
103+
variable: Variable('value', None)
104+
ctx: Load
105+
]
106+
]
107+
orelse: None
108+
If: [12, 0] - [12, 5]
109+
test:
110+
Num: [12, 3] - [12, 4]
111+
n: 4
112+
text: '4'
113+
body: [
114+
Expr: [13, 4] - [13, 29]
115+
value:
116+
TemplateString: [13, 4] - [13, 29]
117+
prefix: 't"'
118+
values: [
119+
TemplateStringPart: [13, 6] - [13, 28]
120+
text: '"Just a regular string."'
121+
s: 'Just a regular string.'
122+
]
123+
]
124+
orelse: None
125+
If: [14, 0] - [14, 5]
126+
test:
127+
Num: [14, 3] - [14, 4]
128+
n: 5
129+
text: '5'
130+
body: [
131+
Expr: [15, 4] - [15, 50]
132+
value:
133+
TemplateString: [15, 4] - [15, 50]
134+
prefix: 't"'
135+
values: [
136+
TemplateStringPart: [15, 6] - [15, 15]
137+
text: '"Multiple "'
138+
s: 'Multiple '
139+
Name: [15, 16] - [15, 21]
140+
variable: Variable('first', None)
141+
ctx: Load
142+
TemplateStringPart: [15, 22] - [15, 27]
143+
text: '" and "'
144+
s: ' and '
145+
Name: [15, 28] - [15, 34]
146+
variable: Variable('second', None)
147+
ctx: Load
148+
TemplateStringPart: [15, 35] - [15, 49]
149+
text: '" placeholders."'
150+
s: ' placeholders.'
151+
]
152+
]
153+
orelse: None
154+
If: [16, 0] - [16, 5]
155+
test:
156+
Num: [16, 3] - [16, 4]
157+
n: 6
158+
text: '6'
159+
body: [
160+
Expr: [17, 4] - [17, 66]
161+
value:
162+
JoinedTemplateString: [17, 4] - [17, 66]
163+
strings: [
164+
TemplateString: [17, 4] - [17, 31]
165+
prefix: 't"'
166+
values: [
167+
TemplateStringPart: [17, 6] - [17, 30]
168+
text: '"Implicit concatenation: "'
169+
s: 'Implicit concatenation: '
170+
]
171+
TemplateString: [17, 32] - [17, 49]
172+
prefix: 't"'
173+
values: [
174+
TemplateStringPart: [17, 34] - [17, 41]
175+
text: '"Hello, "'
176+
s: 'Hello, '
177+
Name: [17, 42] - [17, 46]
178+
variable: Variable('name', None)
179+
ctx: Load
180+
TemplateStringPart: [17, 47] - [17, 48]
181+
text: '"!"'
182+
s: '!'
183+
]
184+
TemplateString: [17, 50] - [17, 66]
185+
prefix: 't"'
186+
values: [
187+
TemplateStringPart: [17, 52] - [17, 65]
188+
text: '" How are you?"'
189+
s: ' How are you?'
190+
]
191+
]
192+
]
193+
orelse: None
194+
]
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
name = "World"
2+
value = 42.5678
3+
first = "first"
4+
second = "second"
5+
6+
if 1:
7+
t""
8+
if 2:
9+
t"Hello, {name}!"
10+
if 3:
11+
t"Value: {value:.2f}, Hex: {value:#x}"
12+
if 4:
13+
t"Just a regular string."
14+
if 5:
15+
t"Multiple {first} and {second} placeholders."
16+
if 6:
17+
t"Implicit concatenation: " t"Hello, {name}!" t" How are you?"

python/extractor/tsg-python/python.tsg

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,9 @@
117117
(string string_content: (_) @part)
118118
{ let @part.node = (ast-node @part "StringPart") }
119119

120+
(template_string string_content: (_) @part)
121+
{ let @part.node = (ast-node @part "TemplateStringPart") }
122+
120123
; A string concatenation that contains no interpolated expressions is just a `Str` (and its children
121124
; will be `StringPart`s). A string concatenation that contains interpolated expressions is a
122125
; `JoinedStr`, however.
@@ -142,6 +145,12 @@
142145
}
143146
}
144147

148+
(template_string) @tstring
149+
{ let @tstring.node = (ast-node @tstring "TemplateString") }
150+
151+
(concatenated_template_string) @tstrings
152+
{ let @tstrings.node = (ast-node @tstrings "JoinedTemplateString") }
153+
145154
(pair) @kvpair
146155
{ let @kvpair.node = (ast-node @kvpair "KeyValuePair") }
147156

@@ -2052,6 +2061,44 @@
20522061

20532062
;;;;;; End of JoinedStr (`f"foo"`)
20542063

2064+
;;;;;; JoinedTemplateString / TemplateString (`t"foo"`)
2065+
2066+
; Record the prefix of the template string.
2067+
(template_string) @tstring
2068+
{
2069+
attr (@tstring.node) prefix = (string-prefix @tstring)
2070+
}
2071+
2072+
; Attach raw children (string parts and interpolations) to the template string node.
2073+
(template_string (string_content) @part) @tmpl_any
2074+
{
2075+
edge @tmpl_any.node -> @part.node
2076+
attr (@tmpl_any.node -> @part.node) values = (named-child-index @part)
2077+
attr (@part.node) ctx = "load"
2078+
let safe_string = (concatenate-strings (string-safe-prefix @tmpl_any) (source-text @part) (string-quotes @tmpl_any))
2079+
attr (@part.node) s = safe_string
2080+
attr (@part.node) text = safe_string
2081+
}
2082+
2083+
(template_string (interpolation expression: (_) @part) @interp) @tmpl_any
2084+
{
2085+
edge @tmpl_any.node -> @part.node
2086+
attr (@tmpl_any.node -> @part.node) values = (named-child-index @interp)
2087+
attr (@part.node) ctx = "load"
2088+
}
2089+
2090+
2091+
; Concatenated template strings simply have a list-like field containing the template strings that
2092+
; are concatenated together.
2093+
(concatenated_template_string (template_string) @tstring) @tmpl_concat
2094+
{
2095+
edge @tmpl_concat.node -> @tstring.node
2096+
attr (@tmpl_concat.node -> @tstring.node) strings = (named-child-index @tstring)
2097+
attr (@tstring.node) ctx = "load"
2098+
}
2099+
2100+
;;;;;; End of JoinedTemplateString / TemplateString (`t"foo"`)
2101+
20552102

20562103

20572104
;;;;;; List (`[...]`)

0 commit comments

Comments
 (0)