Skip to content

Instantly share code, notes, and snippets.

@Chubek
Created October 12, 2024 11:40
Show Gist options
  • Save Chubek/bf29e6671cdc9637c401becca90c7bdf to your computer and use it in GitHub Desktop.
Save Chubek/bf29e6671cdc9637c401becca90c7bdf to your computer and use it in GitHub Desktop.
GourmetParsec.ml: OCaml Parser Combinator

GourmetParsec

The file GourmetParsec.ml contains a very simple Parser Combinator in OCaml. It also has parsers for several lexical structures.

This is not a good parser combinator by any means.

Papers:

  • Pure Functional Parsing: an Advanced Tutorial by Peter Ljunglof
  • A Typed, Algebraic Approach to Parsing by Neelakantan R. Krishnaswami and Jeremy Yallop

There's loads more papers on parser combinators.

module ParseFuncs = struct
type 'a t = string -> ('a * string) option
let success (p: 'a t) : 'a t =
fun input ->
match p input with
| Some (result, rest) as x -> x
| None -> None
let failure (_: 'a t) : 'a t =
fun _ -> None
let void (p: 'a t) : unit t =
fun input ->
match p input with
| Some (_, rest) -> Some ((), rest)
| None -> None
let satisfy (pred: char -> bool) : char t =
fun input ->
if String.length input > 0 && pred (String.get input 0) then
Some (String.get input 0, String.sub input 1 (String.length input - 1))
else
None
let char (c : char) : char t =
satisfy (fun x -> x = c)
let string (s : string) : string t =
fun input ->
if String.starts_with ~prefix:s input then
let offs = s |> String.length in
let len = input |> String.length in
Some (s, String.sub input offs (len - offs))
else
None
let icase (p: 'a t) : 'a t =
fun input ->
match input |> String.lowercase_ascii |> p with
| Some (result, rest) as x -> x
| None -> None
let return (x: 'a) : 'a t =
fun input -> Some (x, input)
let const (p: 'a t) : unit t =
fun input ->
match p input with
| Some (_, rest) -> Some ((), rest)
| None -> None
let (<<^) (p: 'a t) (_: 'b t) : 'a t =
fun input ->
match p input with
| Some (result, rest) as x -> x
| None -> None
let (^>>) (_: 'a t) (p: 'b t) : 'b t =
fun input ->
match p input with
| Some (result, rest) as x -> x
| None -> None
let (>>=) (p: 'a t) (f: 'a -> 'b t) : 'b t =
fun input ->
match p input with
| Some (result, rest) -> f result rest
| None -> None
let (>>-) (p: 'a t) (p': 'b t) : 'b t =
p >>= fun _ -> p'
let (<|>) (p1: 'a t) (p2: 'a t) : 'a t =
fun input ->
match p1 input with
| Some result -> Some result
| None -> p2 input
let (<$>) (f: 'a -> 'b) (p: 'a t) : 'b t =
fun input ->
match p input with
| Some (result, rest) -> Some (f result, rest)
| None -> None
let optional (p: 'a t) : 'a option t =
fun input ->
match p input with
| Some (result, rest) -> Some (Some result, rest)
| None -> Some (None, input)
let optional_fallback (p: 'a t) (defl: 'a) : 'a t =
fun input ->
match p input with
| Some (result, rest) as x -> x
| None -> Some (defl, input)
let rec many (p: 'a t) : 'a list t =
fun input ->
match p input with
| Some (result, rest) ->
(match many p rest with
| Some (results, final_rest) -> Some (result :: results, final_rest)
| None -> Some ([result], rest))
| None -> Some ([], input)
let many1 (p: 'a t) : 'a list t =
fun input ->
match p input with
| Some (result, rest) ->
(match many p rest with
| Some (results, final_rest) -> Some (result :: results, final_rest)
| None -> Some ([result], rest))
| None -> None
let sep_by (sep: 'b t) (p: 'a t) : 'a list t =
fun input ->
match p input with
| Some (result, rest) ->
let rec aux acc input =
match sep input with
| Some (_, sep_rest) ->
(match p sep_rest with
| Some (next_result, next_rest) -> aux (next_result :: acc) next_rest
| None -> Some (List.rev acc, input))
| None -> Some (List.rev acc, input)
in aux [result] rest
| None -> Some ([], input)
let sep_by1 (sep: 'b t) (p: 'a t) : 'a list t =
fun input ->
match p input with
| Some (result, rest) ->
let rec aux acc input =
match sep input with
| Some (_, sep_rest) ->
(match p sep_rest with
| Some (next_result, next_rest) -> aux (next_result :: acc) next_rest
| None -> Some (List.rev acc, input))
| None -> Some (List.rev acc, input)
in aux [result] rest
| None -> None
let end_by (end_p: 'b t) (p: 'a t) : 'a list t =
fun input ->
let rec aux acc input =
match p input with
| Some (result, rest) ->
(match end_p rest with
| Some (_, end_rest) -> aux (result :: acc) end_rest
| None -> Some (List.rev acc, input))
| None -> Some (List.rev acc, input)
in aux [] input
let end_by1 (end_p: 'b t) (p: 'a t) : 'a list t =
fun input ->
match p input with
| Some (result, rest) ->
let rec aux acc input =
match end_p input with
| Some (_, end_rest) ->
(match p end_rest with
| Some (next_result, next_rest) -> aux (next_result :: acc) next_rest
| None -> Some (List.rev (result :: acc), input))
| None -> Some (List.rev (result :: acc), input)
in aux [] rest
| None -> None
let end_till (end_p: 'b t) (p: 'a t) : 'a list t =
fun input ->
let rec aux acc input =
match end_p input with
| Some (_, rest) -> Some (List.rev acc, rest)
| None ->
match p input with
| Some (result, rest) -> aux (result :: acc) rest
| None -> Some (List.rev acc, input)
in aux [] input
let skip_many (p: 'a t) : unit t =
fun input ->
let rec aux input =
match p input with
| Some (_, rest) -> aux rest
| None -> Some ((), input)
in aux input
let skip_many1 (p: 'a t) : unit t =
fun input ->
match p input with
| Some (_, rest) ->
let rec aux input =
match p input with
| Some (_, rest) -> aux rest
| None -> Some ((), input)
in aux rest
| None -> None
let chainl1 (p: 'a t) (op: ('a -> 'a -> 'a) t) : 'a list t =
fun input ->
let rec aux acc input =
match op input with
| Some (f, op_rest) ->
(match p op_rest with
| Some (next_result, next_rest) -> aux (f acc next_result) next_rest
| None -> Some ([acc], input))
| None -> Some ([acc], input)
in
match p input with
| Some (result, rest) -> aux result rest
| None -> None
let chainr1 (p: 'a t) (op: ('a -> 'a -> 'a) t) : 'a list t =
fun input ->
let rec aux input =
match p input with
| Some (result, rest) ->
(match op rest with
| Some (f, op_rest) ->
(match aux op_rest with
| Some (next_results, final_rest) ->
let combined =
List.fold_left (fun acc x -> f x acc) result next_results in
Some ([combined], final_rest)
| None -> Some ([result], rest))
| None -> Some ([result], rest))
| None -> None
in aux input
let prefix (pre: 'a t) (p: 'b t) : 'b t =
fun input ->
match pre input with
| Some (_, rest) -> p rest
| None -> None
let postfix (p: 'a t) (post: 'b t) : 'a t =
fun input ->
match p input with
| Some (result, rest) ->
(match post rest with
| Some (_, final_rest) -> Some (result, final_rest)
| None -> None)
| None -> None
let between (open_p: 'a t) (close_p: 'b t) (p: 'c t) : 'c t =
fun input ->
match open_p input with
| Some (_, open_rest) ->
(match p open_rest with
| Some (result, p_rest) ->
(match close_p p_rest with
| Some (_, final_rest) -> Some (result, final_rest)
| None -> None)
| None -> None)
| None -> None
let wrap (p_wrap : 'b t) (p: 'a t) : 'a t =
fun input ->
match p_wrap input with
| Some (_, after_leading_wrap) ->
(match p after_leading_wrap with
| Some (result, after_p) ->
(match p_wrap after_p with
| Some (_, final_rest) -> Some (result, final_rest)
| None -> Some (result, after_p))
| None -> None)
| None -> None
let wrap_either_side (p_wrap : 'b t) (p : 'a t) : 'a t =
between (optional p_wrap) (optional p_wrap) p
let sequence (parsers: 'a t list) : 'a list t =
fun input ->
let rec aux acc remaining_parsers current_input =
match remaining_parsers with
| [] -> Some (List.rev acc, current_input)
| p :: ps ->
match p current_input with
| Some (result, rest) -> aux (result :: acc) ps rest
| None -> None
in
aux [] parsers input
let count (n: int) (p: 'a t) : 'a list t =
fun input ->
let rec aux acc n current_input =
if n = 0 then
Some (List.rev acc, current_input)
else
match p current_input with
| Some (result, rest) -> aux (result :: acc) (n - 1) rest
| None -> None
in
aux [] n input
let range (min: int) (max: int) (p: 'a t) : 'a list t =
fun input ->
let rec aux acc count current_input =
if count >= max then
Some (List.rev acc, current_input)
else
match p current_input with
| Some (result, rest) -> aux (result :: acc) (count + 1) rest
| None ->
if count >= min then Some (List.rev acc, current_input)
else None
in
aux [] 0 input
let with_input (p: 'a t) : ('a * string) t =
fun input ->
match p input with
| Some (result, rest) ->
let consumed = String.sub input 0 (String.length input - String.length rest) in
Some ((result, consumed), rest)
| None -> None
end
module ParseCharacter = struct
open ParseFuncs
let p_any = satisfy (fun _ -> true)
let p_some = many p_any
let p_not_apostrophe = satisfy (fun c -> c != '\'')
let p_not_quote = satisfy (fun c -> c != '"')
let p_bdigit = satisfy (fun c -> c == '0' || c == '1')
let p_zdigit = satisfy (fun c -> c >= '0' && c <= '9')
let p_odigit = satisfy (fun c -> c >= '0' && c <= '7')
let p_ndigit = satisfy (fun c -> c >= '1' && c < '9')
let p_xdigit = satisfy (fun c -> (c >= '0' && c <= '9')
|| (c >= 'a' && c <= 'f')
|| (c >= 'A' && c <= 'F'))
let p_upper = satisfy (fun c -> c >= 'A' && c <= 'Z')
let p_lower = satisfy (fun c -> c >= 'a' && c <= 'z')
let p_letter = satisfy (fun c -> (c >= 'a' && c <= 'z')
|| (c >= 'A' && c <= 'Z'))
let p_blank = satisfy (fun c -> c == '\t' || c == ' ')
let p_newline = satisfy (fun c -> c == '\n' || c == '\r')
let p_symbols = satisfy (function
| '~' | '@' | '$' | '?' | '!' -> true
| _ -> false)
end
module ParseLexeme = struct
open ParseFuncs
open ParseCharacter
let p_lexeme lexeme = string lexeme
let p_some_blank_opt = skip_many1 p_blank
let p_some_newline_opt = skip_many1 p_newline
let p_some_whitespace_opt = skip_many1 (p_blank <|> p_newline)
let p_wrap_space p = wrap p_some_whitespace_opt p
let p_ninteger = prefix p_ndigit (many1 p_zdigit)
let p_binteger = prefix (char '#') (many1 p_bdigit)
let p_ointeger = prefix (char '$') (many1 p_odigit)
let p_xinteger = prefix (char '"') (many1 p_xdigit)
let p_integer = p_ninteger
<|> p_binteger
<|> p_ointeger
<|> p_xinteger
let p_real = (many1 p_zdigit)
>>- (char '.')
>>- (many1 p_zdigit)
let p_identifier =
prefix (p_letter
<|> (char '_'))
(many1 (p_letter
<|> char '_'
<|> p_zdigit))
let p_upper_identifier =
p_upper >>- p_identifier
let p_char_lit = between (char '\'')
(char '\'')
(many1 p_not_apostrophe)
let p_string_lit = between (char '"')
(char '"')
(many p_not_quote)
let p_op_add = p_lexeme "+"
let p_op_sub = p_lexeme "-"
let p_op_mul = p_lexeme "*"
let p_op_mod = p_lexeme "%"
let p_op_pow = p_lexeme "**"
let p_op_fdiv = p_lexeme "/"
let p_op_idiv = p_lexeme "//"
let p_op_eq = p_lexeme "=="
let p_op_ne = p_lexeme "<>"
let p_op_gt = p_lexeme ">"
let p_op_ge = p_lexeme ">="
let p_op_lt = p_lexeme "<"
let p_op_le = p_lexeme "<="
let p_op_shr = p_lexeme ">>"
let p_op_shl = p_lexeme "<<"
let p_op_ror = p_lexeme ">>>"
let p_op_conj = p_lexeme "&&"
let p_op_disj = p_lexeme "||"
let p_op_and = p_lexeme "&"
let p_op_or = p_lexeme "|"
let p_op_xor = p_lexeme "^"
let p_op_cat = p_lexeme ".."
let p_op_lexg_eq = icase (p_lexeme "eq?")
let p_op_lexg_ne = icase (p_lexeme "ne?")
let p_op_lexg_gt = icase (p_lexeme "gt?")
let p_op_lexg_ge = icase (p_lexeme "ge?")
let p_op_lexg_gt = icase (p_lexeme "gt?")
let p_op_lexg_lt = icase (p_lexeme "lt?")
let p_op_lexg_le = icase (p_lexeme "le?")
let p_assign_simple_op = p_lexeme "="
let p_assign_deepcopy_op = p_lexeme ":="
let p_assign_immutable_op = p_lexeme "=>"
let p_assign_move_op = p_lexeme "<-"
let p_assign_lazy_op = p_lexeme "~="
let p_assign_async_op = p_lexeme "@="
let p_assign_cow_op = p_lexeme "=^"
let p_assign_mon_op = p_lexeme "<=>"
let p_assign_casual_op = p_lexeme "<?="
let p_assign_weakref_op = p_lexeme "=>="
let p_assign_add_op = p_lexeme "+="
let p_assign_sub_op = p_lexeme "-="
let p_assign_mul_op = p_lexeme "*="
let p_assign_mod_op = p_lexeme "%="
let p_assign_pow_op = p_lexeme "**="
let p_assign_fdiv_op = p_lexeme "/="
let p_assign_idiv_op = p_lexeme "//="
let p_assign_shr_op = p_lexeme ">>="
let p_assign_shl_op = p_lexeme "<<="
let p_assign_ror_op = p_lexeme ">>>="
let p_assign_conj_op = p_lexeme "&&="
let p_assign_disj_op = p_lexeme "||="
let p_assign_and_op = p_lexeme "&="
let p_assign_or_op = p_lexeme "|="
let p_assign_xor_op = p_lexeme "^="
let p_delim_lparen = p_lexeme "("
let p_delim_rparen = p_lexeme ")"
let p_delim_lbrack = p_lexeme "["
let p_delim_rbrack = p_lexeme "]"
let p_delim_lbrace = p_lexeme "{"
let p_delim_rbrace = p_lexeme "}"
let p_sep_comma = p_lexeme ","
let p_sep_semi = p_lexeme ";"
let p_sep_colon = p_lexeme ":"
let p_sep_pipe = p_lexeme "|"
end
module ParseExpression = struct
open ParseCharacter
open ParseLexeme
open ParseFuncs
end
module ContrlflowGraph = struct
type t =
{ entry : basic_block
; exit : basic_block
; dominators : LabelMap.t list
; immediate_dominators : LabelMap.t
; dominance_frontiers : LabelMap.t list
; loop_depth : IntMap.t
; loops : LoopSet.t
}
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment