Chubek · May 4, 2024 17:22
diff --git a/README.md b/README.md
diff --git a/ebnf-bootstrap.ebnf b/ebnf-bootstrap.ebnf
 digit				::= 
 				 | '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9'
 				 ;
 				 
 digit-natural                   ::=
                                 digit - '0'
                                 ;

 upper-case 			::= 
 				 | 'A' | 'B' | 'C' | 'D' | 'E' | 'F' | 'G' | 'H' | 'I' | 'J'
 			         | 'K' | 'L' | 'M' | 'N' | 'O' | 'P' | 'Q' | 'R' | 'S' | 'T'
 			         | 'U' | 'V' | 'W' | 'X' | 'Y' | 'Z'
 				 ;

 lower-case 			::= 
 				 | 'a' | 'b' | 'c' | 'd' | 'e' | 'f' | 'g' | 'h' | 'i' | 'j'
 			       	 | 'k' | 'l' | 'm' | 'n' | 'o' | 'p' | 'q' | 'r' | 's' | 't'
 			         | 'u' | 'v' | 'w' | 'x' | 'y' | 'z'
 				 ;

 punctuation 			::= 
 				 | '.' | ',' | ';' | ':' | '!' | '?' | '-' | '_' | '(' | ')' 
 			       	 | '{' | '}' | '[' | ']' | '"' | "'" | '`' | '@' | '#' 
 			         | '$' | '%' | '^' | '&' | '*' | '+' | '=' | '<' | '>' 
 			       	 | '/' | '|' | '~' | '\'
 				 ;

 space				::= 
 				 ? Sequential white space ?
 				 ;

 newline				::= 
 				 ? The newline character on native system ?
 				 ;

 tabulator			::= 
 				 ? The tab character on native system ?
 				 ;

 alphabetic			::= 
 				 upper-case | lower-case
 				 ;

 alphanumeric			::= 
 				 alphabetic | digit
 				 ;

 symbolic			::= 
 				 punctuation | alphanumeric | space
 				 ;

 printable			::= 
 				 symbolic | tabulator
 				 ;

 character			::= 
 				 printable | newline
 				 ;

diff --git a/ebnf.vim b/ebnf.vim
 " ebnf.vim - Syntax highlighting for EBNF
 " Improved by: Your Name Here
 " Original Author: Chubak Bidpaa ([email protected])

 if exists("b:current_syntax")
  finish
 endif

 " Define regions for comments (adjust according to EBNF variant)
 syntax region ebnfComment start=/\v\#.*$/ end=/$/ keepend
 " Improved definitions for terminal strings to avoid matching operators inside
 syntax region ebnfMultiCharTerminal start=/\v"/ end=/\v"/
 syntax region ebnfSingleCharTerminal start=/\v'/ end=/\v'/

 " Capture patterns (optional, depends on EBNF variant)
 syntax region ebnfCapture start=/\v\s*\?/ end=/\v\s*\?$/ keepend

 " Match non-terminal identifiers more accurately
 syntax match ebnfNonTermIdent /\v[-_a-zA-Z][-_a-z0-9]*/
 " Highlight LHS identifiers uniquely (assuming they start at the line's beginning)
 syntax match ebnfLhsIdent /\v^[-_a-zA-Z][-_a-z0-9]*/

 syntax match ebnfOperator "::="
 syntax match ebnfOperator "[{}()\[\]|/,]"
 syntax match ebnfOperator ";"
 syntax match ebnfOperator "-"
 syntax match ebnfSpecial "\.\.\."
 syntax match ebnfQuantifier "[?*+]"

 " Linking highlights
 highlight link ebnfComment Comment
 highlight link ebnfMultiCharTerminal String
 highlight link ebnfSingleCharTerminal Character
 highlight link ebnfNonTermIdent Identifier
 highlight link ebnfLhsIdent Underlined
 highlight link ebnfOperator Operator
 highlight link ebnfSpecial Special
 highlight link ebnfQuantifier SpecialChar
 highlight link ebnfCapture Statement

 let b:current_syntax = "ebnf"
diff --git a/tokenize-ebnf.scm b/tokenize-ebnf.scm
 ; tokenize-ebnf.scm
 ; A tokenizer for EBNF
 ; This works with any Scheme interpreter or compiler compliant with R7RS-Small
 ; Released under Public Domain License - 2024 (C) Chubak Bidpaa


 (define (consume-while predicate rest)
  (let loop ((rest-prime rest)
 	     (acc '()))
    (if (and (not (null? rest-prime))
 	    (predicate (car rest-prime)))
      (loop (cdr rest-prime) (cons (car rest-prime) acc))
 	(values (list->string (reverse acc)) rest-prime))))

 (define (consume-until predicate rest)
  (let loop ((rest-prime rest)
 	     (acc '()))
    (if (or (null? rest-prime)
 	    (predicate (car rest-prime)))
      (values (list->string (reverse acc)) rest-prime)
 	(loop (cdr rest-prime) (cons (car rest-prime) acc)))))


 (define (tokenize input)
  (let loop ((rest (string->list input))
 	     (tokens '(INIT StartOfStream)))
    (cond 
      [(null? rest)
       (reverse (list '(END EndOfStream) tokens))]
      [(member (car rest) 
 	       '(#\newline #\space #\tab))
       (loop (cdr rest) tokens)]
      [(member (car rest)
 	       '(#\" #\' #\`))       
       (let-values
 	 ([(acc rest-after) (consume-while (lambda (ch) (not (char=? ch (car rest)))) (cdr rest))])
 	 (loop rest-after (cons (cons 'TERMINAL acc) tokens)))]
      [(char-alphabetic? (car rest))
       (let-values
 	 ([(acc rest-after)
 	   (consume-while (lambda (ch)
 			    (or (char-alphabetic? ch)
 				(char-numeric? ch)
 				(char=? ch #\-))) rest)])
 	 (loop rest-after (cons (cons 'NON-TEMRINAL-ID acc) tokens)))]
      [(char=? (car rest) #\:)
       (let-values
 	 ([(_ rest-after) (consume-while (lambda (ch) (or (char=? ch #\:) (char=? ch #\=))) (cdr rest))])
 	 (loop rest-after (cons '(ASSIGN ASSIGN) tokens)))]
      [(char=? (car rest) #\.)
       (let-values
 	 ([(_ rest-after) (consume-while (lambda (ch) (char=? ch #\.)) (cdr rest))])
 	 (loop rest-after (cons '(RANGE RANGE) tokens)))]
      [(char=? (car rest) #\\)
       (let-values
 	 ([(acc rest-after) (consume-while (lambda (ch) (not (member ch '(#\newline #\space #\tab)))) (cdr rest))])
 	 (loop rest-after (cons (cons 'SPECIAL acc) tokens)))]
      [(char=? (car rest) #\/)
       (let-values
 	 ([(acc rest-after) (consume-until (lambda (ch) (char=? ch #\/)) (cdr rest))])
 	 (loop rest-after (cons (cons 'REGEX acc) tokens)))]
      [(member (car rest) '(#\( #\[ #\{ #\) #\] #\} #\; #\, #\|))
       (loop (cdr rest)
 	     (cons (case (car rest)
 		     [(#\;) '(CTRL SEMI)]
 		     [(#\() '(SUB-START LPAREN)]
 		     [(#\[) '(SUB-START LBRACK)]
 		     [(#\{) '(SUB-START LCURLY)]
 		     [(#\)) '(SUB-END RPAREN)]
 		     [(#\]) '(SUB-END RBRACK)]
 		     [(#\}) '(SUB-END RCURLY)]
 		     [(#\|) '(CTRL ALTCHR)]
 		     [(#\,) '(DECORE COLON)]) tokens))]
      [(char=? (car rest) #\#)
       (let-values 
 	 ([(acc rest-after) (consume-until (lambda (ch) (char=? ch #\newline)) (cdr rest))])
 	 (loop rest-after (cons (cons 'COMMENT acc) tokens)))]
      [else
 	(error "Loose token" (car rest))])))
	digit ::=
	\| '0' \| '1' \| '2' \| '3' \| '4' \| '5' \| '6' \| '7' \| '8' \| '9'
	;

	digit-natural ::=
	digit - '0'
	;

	upper-case ::=
	\| 'A' \| 'B' \| 'C' \| 'D' \| 'E' \| 'F' \| 'G' \| 'H' \| 'I' \| 'J'
	\| 'K' \| 'L' \| 'M' \| 'N' \| 'O' \| 'P' \| 'Q' \| 'R' \| 'S' \| 'T'
	\| 'U' \| 'V' \| 'W' \| 'X' \| 'Y' \| 'Z'
	;

	lower-case ::=
	\| 'a' \| 'b' \| 'c' \| 'd' \| 'e' \| 'f' \| 'g' \| 'h' \| 'i' \| 'j'
	\| 'k' \| 'l' \| 'm' \| 'n' \| 'o' \| 'p' \| 'q' \| 'r' \| 's' \| 't'
	\| 'u' \| 'v' \| 'w' \| 'x' \| 'y' \| 'z'
	;

	punctuation ::=
	\| '.' \| ',' \| ';' \| ':' \| '!' \| '?' \| '-' \| '_' \| '(' \| ')'
	\| '{' \| '}' \| '[' \| ']' \| '"' \| "'" \| '`' \| '@' \| '#'
	\| '$' \| '%' \| '^' \| '&' \| '*' \| '+' \| '=' \| '<' \| '>'
	\| '/' \| '\|' \| '~' \| '\'
	;

	space ::=
	? Sequential white space ?
	;

	newline ::=
	? The newline character on native system ?
	;

	tabulator ::=
	? The tab character on native system ?
	;

	alphabetic ::=
	upper-case \| lower-case
	;

	alphanumeric ::=
	alphabetic \| digit
	;

	symbolic ::=
	punctuation \| alphanumeric \| space
	;

	printable ::=
	symbolic \| tabulator
	;

	character ::=
	printable \| newline
	;
	" ebnf.vim - Syntax highlighting for EBNF
	" Improved by: Your Name Here
	" Original Author: Chubak Bidpaa ([email protected])

	if exists("b:current_syntax")
	finish
	endif

	" Define regions for comments (adjust according to EBNF variant)
	syntax region ebnfComment start=/\v\#.*$/ end=/$/ keepend
	" Improved definitions for terminal strings to avoid matching operators inside
	syntax region ebnfMultiCharTerminal start=/\v"/ end=/\v"/
	syntax region ebnfSingleCharTerminal start=/\v'/ end=/\v'/

	" Capture patterns (optional, depends on EBNF variant)
	syntax region ebnfCapture start=/\v\s\?/ end=/\v\s\?$/ keepend

	" Match non-terminal identifiers more accurately
	syntax match ebnfNonTermIdent /\v[-_a-zA-Z][-_a-z0-9]*/
	" Highlight LHS identifiers uniquely (assuming they start at the line's beginning)
	syntax match ebnfLhsIdent /\v^[-_a-zA-Z][-_a-z0-9]*/

	syntax match ebnfOperator "::="
	syntax match ebnfOperator "[{}()\[\]\|/,]"
	syntax match ebnfOperator ";"
	syntax match ebnfOperator "-"
	syntax match ebnfSpecial "\.\.\."
	syntax match ebnfQuantifier "[?*+]"

	" Linking highlights
	highlight link ebnfComment Comment
	highlight link ebnfMultiCharTerminal String
	highlight link ebnfSingleCharTerminal Character
	highlight link ebnfNonTermIdent Identifier
	highlight link ebnfLhsIdent Underlined
	highlight link ebnfOperator Operator
	highlight link ebnfSpecial Special
	highlight link ebnfQuantifier SpecialChar
	highlight link ebnfCapture Statement

	let b:current_syntax = "ebnf"
	; tokenize-ebnf.scm
	; A tokenizer for EBNF
	; This works with any Scheme interpreter or compiler compliant with R7RS-Small
	; Released under Public Domain License - 2024 (C) Chubak Bidpaa


	(define (consume-while predicate rest)
	(let loop ((rest-prime rest)
	(acc '()))
	(if (and (not (null? rest-prime))
	(predicate (car rest-prime)))
	(loop (cdr rest-prime) (cons (car rest-prime) acc))
	(values (list->string (reverse acc)) rest-prime))))

	(define (consume-until predicate rest)
	(let loop ((rest-prime rest)
	(acc '()))
	(if (or (null? rest-prime)
	(predicate (car rest-prime)))
	(values (list->string (reverse acc)) rest-prime)
	(loop (cdr rest-prime) (cons (car rest-prime) acc)))))


	(define (tokenize input)
	(let loop ((rest (string->list input))
	(tokens '(INIT StartOfStream)))
	(cond
	[(null? rest)
	(reverse (list '(END EndOfStream) tokens))]
	[(member (car rest)
	'(#\newline #\space #\tab))
	(loop (cdr rest) tokens)]
	[(member (car rest)
	'(#\" #\' #\`))
	(let-values
	([(acc rest-after) (consume-while (lambda (ch) (not (char=? ch (car rest)))) (cdr rest))])
	(loop rest-after (cons (cons 'TERMINAL acc) tokens)))]
	[(char-alphabetic? (car rest))
	(let-values
	([(acc rest-after)
	(consume-while (lambda (ch)
	(or (char-alphabetic? ch)
	(char-numeric? ch)
	(char=? ch #\-))) rest)])
	(loop rest-after (cons (cons 'NON-TEMRINAL-ID acc) tokens)))]
	[(char=? (car rest) #\:)
	(let-values
	([(_ rest-after) (consume-while (lambda (ch) (or (char=? ch #\:) (char=? ch #\=))) (cdr rest))])
	(loop rest-after (cons '(ASSIGN ASSIGN) tokens)))]
	[(char=? (car rest) #\.)
	(let-values
	([(_ rest-after) (consume-while (lambda (ch) (char=? ch #\.)) (cdr rest))])
	(loop rest-after (cons '(RANGE RANGE) tokens)))]
	[(char=? (car rest) #\\)
	(let-values
	([(acc rest-after) (consume-while (lambda (ch) (not (member ch '(#\newline #\space #\tab)))) (cdr rest))])
	(loop rest-after (cons (cons 'SPECIAL acc) tokens)))]
	[(char=? (car rest) #\/)
	(let-values
	([(acc rest-after) (consume-until (lambda (ch) (char=? ch #\/)) (cdr rest))])
	(loop rest-after (cons (cons 'REGEX acc) tokens)))]
	[(member (car rest) '(#\( #\[ #\{ #\) #\] #\} #\; #\, #\\|))
	(loop (cdr rest)
	(cons (case (car rest)
	[(#\;) '(CTRL SEMI)]
	[(#\() '(SUB-START LPAREN)]
	[(#\[) '(SUB-START LBRACK)]
	[(#\{) '(SUB-START LCURLY)]
	[(#\)) '(SUB-END RPAREN)]
	[(#\]) '(SUB-END RBRACK)]
	[(#\}) '(SUB-END RCURLY)]
	[(#\\|) '(CTRL ALTCHR)]
	[(#\,) '(DECORE COLON)]) tokens))]
	[(char=? (car rest) #\#)
	(let-values
	([(acc rest-after) (consume-until (lambda (ch) (char=? ch #\newline)) (cdr rest))])
	(loop rest-after (cons (cons 'COMMENT acc) tokens)))]
	[else
	(error "Loose token" (car rest))])))