Skip to content

Instantly share code, notes, and snippets.

@zehnpaard
Last active January 1, 2025 03:26
Show Gist options
  • Save zehnpaard/fea8c7f804ae96e64ca608d7d01efe8b to your computer and use it in GitHub Desktop.
Save zehnpaard/fea8c7f804ae96e64ca608d7d01efe8b to your computer and use it in GitHub Desktop.

Revisions

  1. zehnpaard revised this gist Sep 18, 2021. 1 changed file with 23 additions and 0 deletions.
    23 changes: 23 additions & 0 deletions utils.py
    Original file line number Diff line number Diff line change
    @@ -1,5 +1,28 @@
    from contextlib import contextmanager

    class Peekable:
    def __init__(self, input_, k, sentinel=None):
    self.sentinel = sentinel
    self._k = k
    self._stream = iter(input_)
    self._peek = [next(self._stream, sentinel) for _ in range(k)]

    def __getitem__(self, n):
    if isinstance(n, int) and n >= self._k:
    raise IndexError(f"Invalid lookahead index {n} on Peekable with k={self._k}")
    return self._peek[n]

    def __iter__(self):
    return self

    def __next__(self):
    if self._peek[0] == self.sentinel:
    raise StopIteration
    res = self._peek[0]
    self._peek = self._peek[1:]
    self._peek.append(next(self._stream, self.sentinel))
    return res

    class Backtrackable:
    def __init__(self, input_, sentinel=None):
    self._stream = iter(input_)
  2. zehnpaard revised this gist Sep 12, 2021. 1 changed file with 1 addition and 1 deletion.
    2 changes: 1 addition & 1 deletion xparser.py
    Original file line number Diff line number Diff line change
    @@ -4,7 +4,7 @@
    from utils import Backtrackable

    def parse(char_stream):
    tokens = Backtrackable(xl.lex(char_stream), 2)
    tokens = Backtrackable(xl.lex(char_stream))
    _list(tokens)
    _match(tokens, t.Eof)

  3. zehnpaard created this gist Sep 9, 2021.
    6 changes: 6 additions & 0 deletions grammar
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,6 @@
    list : '[' elements ']';
    elements : element (',' element)*;
    element : NAME | assign | list | list_assign;
    assign : NAME '=' NAME;
    list_assign : list '=' list;
    NAME : ('a'..'z' | 'A'..'Z')+;
    37 changes: 37 additions & 0 deletions utils.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,37 @@
    from contextlib import contextmanager

    class Backtrackable:
    def __init__(self, input_, sentinel=None):
    self._stream = iter(input_)
    self.sentinel = sentinel
    self.i = 0
    self.buffer = []
    self.backtrack_points = []

    def __getitem__(self, n):
    if isinstance(n, int):
    self._fill(n+1)
    return self.buffer[self.i + n]
    else:
    raise IndexError(f"Unsupported operation: Indexing into Backtrackable with {n}")

    def __iter__(self):
    return self

    def __next__(self):
    self._fill(1)
    res = self.buffer[self.i]
    self.i += 1
    return res

    def _fill(self, n):
    for _ in range(self.i + n - len(self.buffer)):
    self.buffer.append(next(self._stream, self.sentinel))

    @contextmanager
    def backtrack_always(self):
    self.backtrack_points.append(self.i)
    try:
    yield
    finally:
    self.i = self.backtrack_points.pop()
    42 changes: 42 additions & 0 deletions xlexer.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,42 @@
    import string

    from utils import Peekable
    import xtokens as t

    def lex(char_iterable):
    stream = Peekable(char_iterable, 1)
    return _lex(stream)

    def _lex(stream):
    while True:
    match stream[0]:
    case stream.sentinel:
    yield t.Eof()
    break
    case '[':
    next(stream)
    yield t.Lbrack()
    case ']':
    next(stream)
    yield t.Rbrack()
    case '=':
    next(stream)
    yield t.Equal()
    case ',':
    next(stream)
    yield t.Comma()
    case c if _is_letter(c):
    yield _lex_ident(stream)
    case c if c in string.whitespace:
    next(stream)
    case c:
    raise ValueError(f"Invalid character {c}")

    def _lex_ident(stream):
    cs = []
    while _is_letter(stream[0]):
    cs.append(next(stream))
    return t.Ident(''.join(cs))

    def _is_letter(c):
    return c in string.ascii_letters
    59 changes: 59 additions & 0 deletions xparser.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,59 @@
    import xtokens as t
    import xlexer as xl

    from utils import Backtrackable

    def parse(char_stream):
    tokens = Backtrackable(xl.lex(char_stream), 2)
    _list(tokens)
    _match(tokens, t.Eof)

    def _test(tokens, f):
    with tokens.backtrack_always():
    try:
    f(tokens)
    except ValueError:
    return False
    return True

    def _element(tokens):
    if _test(tokens, _assign):
    _assign(tokens)
    elif _test(tokens, _name):
    _name(tokens)
    elif _test(tokens, _assign_list):
    _assign_list(tokens)
    elif _test(tokens, _list):
    _list(tokens)
    else:
    raise ValueError(f"Invalid token {tokens[0]}")

    def _list(tokens):
    _match(tokens, t.Lbrack)
    _elements(tokens)
    _match(tokens, t.Rbrack)

    def _elements(tokens):
    _element(tokens)
    while tokens[0] == t.Comma():
    next(tokens)
    _element(tokens)

    def _assign(tokens):
    _match(tokens, t.Ident)
    _match(tokens, t.Equal)
    _match(tokens, t.Ident)

    def _assign_list(tokens):
    _list(tokens)
    _match(tokens, t.Equal)
    _list(tokens)

    def _name(tokens):
    _match(tokens, t.Ident)

    def _match(tokens, token_type):
    if isinstance(tokens[0], token_type):
    next(tokens)
    else:
    raise ValueError(f"Failed to match {tokens[0]} to type {token_type}")
    25 changes: 25 additions & 0 deletions xtokens.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,25 @@
    from dataclasses import dataclass

    @dataclass
    class Lbrack:
    pass

    @dataclass
    class Rbrack:
    pass

    @dataclass
    class Equal:
    pass

    @dataclass
    class Comma:
    pass

    @dataclass
    class Eof:
    pass

    @dataclass
    class Ident:
    s : str