Skip to content

Instantly share code, notes, and snippets.

@zehnpaard
Last active January 1, 2025 03:26
Show Gist options
  • Save zehnpaard/fea8c7f804ae96e64ca608d7d01efe8b to your computer and use it in GitHub Desktop.
Save zehnpaard/fea8c7f804ae96e64ca608d7d01efe8b to your computer and use it in GitHub Desktop.
Python Implementation of Backtracking Recursive Descent Parser based loosely on Language Implementation Patterns
list : '[' elements ']';
elements : element (',' element)*;
element : NAME | assign | list | list_assign;
assign : NAME '=' NAME;
list_assign : list '=' list;
NAME : ('a'..'z' | 'A'..'Z')+;
from contextlib import contextmanager
class Peekable:
def __init__(self, input_, k, sentinel=None):
self.sentinel = sentinel
self._k = k
self._stream = iter(input_)
self._peek = [next(self._stream, sentinel) for _ in range(k)]
def __getitem__(self, n):
if isinstance(n, int) and n >= self._k:
raise IndexError(f"Invalid lookahead index {n} on Peekable with k={self._k}")
return self._peek[n]
def __iter__(self):
return self
def __next__(self):
if self._peek[0] == self.sentinel:
raise StopIteration
res = self._peek[0]
self._peek = self._peek[1:]
self._peek.append(next(self._stream, self.sentinel))
return res
class Backtrackable:
def __init__(self, input_, sentinel=None):
self._stream = iter(input_)
self.sentinel = sentinel
self.i = 0
self.buffer = []
self.backtrack_points = []
def __getitem__(self, n):
if isinstance(n, int):
self._fill(n+1)
return self.buffer[self.i + n]
else:
raise IndexError(f"Unsupported operation: Indexing into Backtrackable with {n}")
def __iter__(self):
return self
def __next__(self):
self._fill(1)
res = self.buffer[self.i]
self.i += 1
return res
def _fill(self, n):
for _ in range(self.i + n - len(self.buffer)):
self.buffer.append(next(self._stream, self.sentinel))
@contextmanager
def backtrack_always(self):
self.backtrack_points.append(self.i)
try:
yield
finally:
self.i = self.backtrack_points.pop()
import string
from utils import Peekable
import xtokens as t
def lex(char_iterable):
stream = Peekable(char_iterable, 1)
return _lex(stream)
def _lex(stream):
while True:
match stream[0]:
case stream.sentinel:
yield t.Eof()
break
case '[':
next(stream)
yield t.Lbrack()
case ']':
next(stream)
yield t.Rbrack()
case '=':
next(stream)
yield t.Equal()
case ',':
next(stream)
yield t.Comma()
case c if _is_letter(c):
yield _lex_ident(stream)
case c if c in string.whitespace:
next(stream)
case c:
raise ValueError(f"Invalid character {c}")
def _lex_ident(stream):
cs = []
while _is_letter(stream[0]):
cs.append(next(stream))
return t.Ident(''.join(cs))
def _is_letter(c):
return c in string.ascii_letters
import xtokens as t
import xlexer as xl
from utils import Backtrackable
def parse(char_stream):
tokens = Backtrackable(xl.lex(char_stream))
_list(tokens)
_match(tokens, t.Eof)
def _test(tokens, f):
with tokens.backtrack_always():
try:
f(tokens)
except ValueError:
return False
return True
def _element(tokens):
if _test(tokens, _assign):
_assign(tokens)
elif _test(tokens, _name):
_name(tokens)
elif _test(tokens, _assign_list):
_assign_list(tokens)
elif _test(tokens, _list):
_list(tokens)
else:
raise ValueError(f"Invalid token {tokens[0]}")
def _list(tokens):
_match(tokens, t.Lbrack)
_elements(tokens)
_match(tokens, t.Rbrack)
def _elements(tokens):
_element(tokens)
while tokens[0] == t.Comma():
next(tokens)
_element(tokens)
def _assign(tokens):
_match(tokens, t.Ident)
_match(tokens, t.Equal)
_match(tokens, t.Ident)
def _assign_list(tokens):
_list(tokens)
_match(tokens, t.Equal)
_list(tokens)
def _name(tokens):
_match(tokens, t.Ident)
def _match(tokens, token_type):
if isinstance(tokens[0], token_type):
next(tokens)
else:
raise ValueError(f"Failed to match {tokens[0]} to type {token_type}")
from dataclasses import dataclass
@dataclass
class Lbrack:
pass
@dataclass
class Rbrack:
pass
@dataclass
class Equal:
pass
@dataclass
class Comma:
pass
@dataclass
class Eof:
pass
@dataclass
class Ident:
s : str
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment