-
Notifications
You must be signed in to change notification settings - Fork 0
/
parse.s
405 lines (376 loc) · 9.52 KB
/
parse.s
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
.include "valueh.s"
.include "asth.s"
.include "functionh.s"
.include "debugh.s"
.equ stream_reg, %r12
.macro peek where:req
movzb (stream_reg), \where
.endm
.macro advance
inc stream_reg
.endm
.macro unadvance
dec stream_reg
.endm
.globl kn_parse
kn_parse:
push stream_reg
mov %rdi, stream_reg
handle_stream:
peek %eax
advance
lea (,%rax,8), %rcx
add parse_table(%rip), %rcx
jmp *(%rcx)
done_parsing:
mov stream_reg, %rdi /* this is used by kn_value_new_function */
pop stream_reg
ret
.equ whitespace, handle_stream
/* todo: parse whitespace characters before going back, as consecutive whitespace is likely */
/* parse a comment out */
comment:
peek %eax
advance
cmp $'\n', %al /* check to see if we're at end of line */
setne %al /* if we are, set the currently read thing to `0` */
test %al, %al /* check if `%al` is zero (ie EOS or `\n` which was replaced) */
jnz comment /* nonzero = not end of comment */
jmp handle_stream /* zero = end of comment */
integer:
lea -'0'(%rax), %rax
lea done_parsing(%rip), %rcx
push %rcx
0:
peek %ecx
sub $'0', %ecx
cmp $9, %rcx
ja 1f /* if it's not a digit, then stop */
advance
imul $10, %rax
add %rcx, %rax
jmp 0b
1:
KN_NEW_NUMBER %rax
ret
identifier:
lea -1(stream_reg), %rdi
0: # parse the identifier
peek %eax
advance
sub $'0', %al
cmp $9, %al
jbe 0b
cmp $('_' - '0'), %al
je 0b
sub $('a' - '0'), %al
cmp $('z' - 'a'), %al
jbe 0b
# fetch the variable
mov stream_reg, %rsi
sub %rdi, %rsi
dec %rsi
unadvance
call kn_env_fetch
# convert it to a string
jmp done_parsing
string:
mov stream_reg, %rdi # keep string start
peek %ecx
0: # parse string
peek %ecx
advance
test %cl, %cl
jz string_missing_quote
cmp %al, %cl
jne 0b
# find length of string
mov stream_reg, %rsi
sub %rdi, %rsi
dec %rsi
# allocate the string and return
call kn_string_new_borrowed
KN_NEW_STRING %rax
jmp done_parsing
string_missing_quote:
dec %rdi # todo: can this be lea?
mov %rdi, %rsi
lea unterminated_quote_msg(%rip), %rdi
call abort
# sub $32, %rsp
# mov stream_reg, (%rsp) /* store quote start */
# 0:
# peek %ecx
# advance
# cmp $0, %ecx
# je 1f
# cmp %al, %cl
# jne 0b
#
# /* find the length of the string */
# mov stream_reg, %rdi
# sub (%rsp), %rdi
# dec %rdi
# mov %rdi, 8(%rsp) /* preserve length */
#
# /* allocate it and dereference it */
# call kn_str_alloc
# mov %rax, 16(%rsp)
# mov %rax, %rdi
# call kn_str_deref
#
# /* populate the string */
# mov %rax, %rdi /* the string we jsut allocated */
# mov (%rsp), %rsi /* quote start */
# mov 8(%rsp), %rdx /* length */
#
# /* set trailing NUL */
# mov %rdi, %rax
# add %rdx, %rax
# movb $0, (%rax)
#
# call _memcpy
#
# /* return */
# mov 16(%rsp), %rax /* load the allocated string */
# add $32, %rsp
#
# kn_vl_new_string %rax
# jmp done_parsing
literal_false:
xor %eax, %eax
jmp strip_literal
literal_true:
mov $KN_TRUE, %eax
jmp strip_literal
literal_null:
mov $KN_NULL, %eax
# fallthrough
strip_literal:
# jmp done_parsing # TODO: parse more than one keyword letter
peek %ecx
sub $'A', %cl
cmp $('Z' - 'A'), %cl
ja done_parsing
advance
jmp strip_literal
.macro decl_sym_function label:req
function_\label:
lea kn_func_\label(%rip), %rdi
jmp function
.endm
.macro decl_kw_function label:req
function_\label:
lea kn_func_\label(%rip), %rdi
jmp keyword_function
.endm
decl_sym_function not
decl_sym_function mod
decl_sym_function and
decl_sym_function mul
decl_sym_function add
decl_sym_function sub
decl_sym_function div
decl_sym_function then
decl_sym_function lth
decl_sym_function assign
decl_sym_function gth
decl_sym_function eql
decl_sym_function pow
decl_sym_function system
decl_sym_function or
decl_kw_function block
decl_kw_function dump
decl_kw_function call
decl_kw_function eval
decl_kw_function get
decl_kw_function length
decl_kw_function output
decl_kw_function prompt
decl_kw_function quit
decl_kw_function random
decl_kw_function set
decl_kw_function if
decl_kw_function while
keyword_function:
peek %eax
advance
sub $'A', %al
cmp $('Z' - 'A'), %rax
jle keyword_function
unadvance
function:
lea kn_func_prompt(%rip), %rbx
# total jank for the win...
push %r13
push %r14
push %r15
sub $8, %rsp
call kn_ast_alloc
mov %rax, %r13
mov KN_AST_OFF_FN(%r13), %rax
KN_FN_ARITY %rax, %r14
lea KN_AST_OFF_ARGS(%r13), %r15
0:
test %r14, %r14
jz 0f
mov stream_reg, %rdi
call kn_parse
mov %rdi, %r12
mov %rax, (%r15)
add $KN_VALUE_SIZE, %r15
dec %r14
jmp 0b
0:
KN_NEW_AST %r13, %rax
add $8, %rsp
pop %r15
pop %r14
pop %r13
jmp done_parsing
/* A token was expected, but could not be found. */
expected_token:
lea expected_token_fmt(%rip), %rdi
jmp abort
/* an unknown character was character was given. */
invalid:
lea invalid_token_fmt(%rip), %rdi
mov %rax, %rsi
jmp abort
.data
expected_token_fmt:
.asciz "expected a token.\n"
invalid_token_fmt:
.asciz "unknown token character '%1$c' (0x%1$x).\n"
unterminated_quote_msg:
.asciz "unterminated quote encountered: %s\n"
parse_table:
.quad parse_table+8
.quad expected_token /* \x00 */
.quad invalid /* \x01 */
.quad invalid /* \x02 */
.quad invalid /* \x03 */
.quad invalid /* \x04 */
.quad invalid /* \x05 */
.quad invalid /* \x06 */
.quad invalid /* \x07 */
.quad invalid /* \x08 */
.quad whitespace /* \t */
.quad whitespace /* \n */
.quad whitespace /* \v */
.quad whitespace /* \f */
.quad whitespace /* \r */
.quad invalid /* \x0E */
.quad invalid /* \x0F */
.quad invalid /* \x10 */
.quad invalid /* \x11 */
.quad invalid /* \x12 */
.quad invalid /* \x13 */
.quad invalid /* \x14 */
.quad invalid /* \x15 */
.quad invalid /* \x16 */
.quad invalid /* \x17 */
.quad invalid /* \x18 */
.quad invalid /* \x19 */
.quad invalid /* \x1A */
.quad invalid /* \x1B */
.quad invalid /* \x1C */
.quad invalid /* \x1D */
.quad invalid /* \x1E */
.quad invalid /* \x1F */
.quad whitespace /* <space> */
.quad function_not /* ! */
.quad string /* " */
.quad comment /* # */
.quad invalid /* $ */
.quad function_mod /* % */
.quad function_and /* & */
.quad string /* ' */
.quad whitespace /* ( */
.quad whitespace /* ) */
.quad function_mul /* * */
.quad function_add /* + */
.quad invalid /* , */
.quad function_sub /* - */
.quad invalid /* . */
.quad function_div /* / */
.quad integer /* 0 */
.quad integer /* 1 */
.quad integer /* 2 */
.quad integer /* 3 */
.quad integer /* 4 */
.quad integer /* 5 */
.quad integer /* 6 */
.quad integer /* 7 */
.quad integer /* 8 */
.quad integer /* 9 */
.quad whitespace /* : */
.quad function_then /* ; */
.quad function_lth /* < */
.quad function_assign /* = */
.quad function_gth /* > */
.quad function_eql /* ? */
.quad invalid /* @ */
.quad invalid /* A */
.quad function_block /* B */
.quad function_call /* C */
.quad function_dump /* D */
.quad function_eval /* E */
.quad literal_false /* F */
.quad function_get /* G */
.quad invalid /* H */
.quad function_if /* I */
.quad invalid /* J */
.quad invalid /* K */
.quad function_length /* L */
.quad invalid /* M */
.quad literal_null /* N */
.quad function_output /* O */
.quad function_prompt /* P */
.quad function_quit /* Q */
.quad function_random /* R */
.quad function_set /* S */
.quad literal_true /* T */
.quad invalid /* U */
.quad invalid /* V */
.quad function_while /* W */
.quad invalid /* X */
.quad invalid /* Y */
.quad invalid /* Z */
.quad whitespace /* [ */
.quad invalid /* \ */
.quad whitespace /* ] */
.quad function_pow /* ^ */
.quad identifier /* _ */
.quad function_system /* ` */
.quad identifier /* a */
.quad identifier /* b */
.quad identifier /* c */
.quad identifier /* d */
.quad identifier /* e */
.quad identifier /* f */
.quad identifier /* g */
.quad identifier /* h */
.quad identifier /* i */
.quad identifier /* j */
.quad identifier /* k */
.quad identifier /* l */
.quad identifier /* m */
.quad identifier /* n */
.quad identifier /* o */
.quad identifier /* p */
.quad identifier /* q */
.quad identifier /* r */
.quad identifier /* s */
.quad identifier /* t */
.quad identifier /* u */
.quad identifier /* v */
.quad identifier /* w */
.quad identifier /* x */
.quad identifier /* y */
.quad identifier /* z */
.quad whitespace /* { */
.quad function_or /* | */
.quad whitespace /* } */
.quad invalid /* ~ */
.quad invalid /* 0x7f */