Fix dc bug

This bug fix also made things much simpler. What it does is allow the lexers to ask for more data from stdin when needed, like when a string or comment is not ended properly. The dc bug was that it just didn't work well with register commands. Signed-off-by: Gavin Howard <[email protected]>
2021-06-27 01:40:15 -06:00 · 2021-06-27 01:40:15 -06:00 · 8a1d001dcf
parent 1c9d222b46
commit 8a1d001dcf
10 changed files with 176 additions and 124 deletions
--- a/include/lex.h
+++ b/include/lex.h
@ -433,6 +433,8 @@ typedef struct BcLex {
 	/// the string.
 	BcVec str;

+	bool is_stdin;
+
 } BcLex;

 /**
@ -458,10 +460,11 @@ void bc_lex_file(BcLex *l, const char *file);

 /**
 * Sets the text the lexer will lex.
- * @param l     The lexer.
- * @param text  The text to lex.
+ * @param l         The lexer.
+ * @param text      The text to lex.
+ * @param is_stdin  True if the text is from stdin, false otherwise.
 */
-void bc_lex_text(BcLex *l, const char *text);
+void bc_lex_text(BcLex *l, const char *text, bool is_stdin);

 /**
 * Generic next function for the parser to call. It takes care of calling the
@ -518,4 +521,10 @@ void bc_lex_commonTokens(BcLex *l, char c);
 */
 void bc_lex_invalidChar(BcLex *l, char c);

+/**
+ * Reads a line from stdin and puts it into the lexer's buffer.
+ * @param l         The lexer.
+ */
+bool bc_lex_readLine(BcLex *l);
+
 #endif // BC_LEX_H
--- a/include/parse.h
+++ b/include/parse.h
@ -218,10 +218,11 @@ void bc_parse_pushName(const BcParse* p, char *name, bool var);

 /**
 * Sets the text that the parser will parse.
- * @param p     The parser.
- * @param text  The text that will be parsed.
+ * @param p         The parser.
+ * @param text      The text to lex.
+ * @param is_stdin  True if the text is from stdin, false otherwise.
 */
-void bc_parse_text(BcParse *p, const char *text);
+void bc_parse_text(BcParse *p, const char *text, bool is_stdin);

 // References to const 0 and 1 strings for special cases. bc and dc have
 // specific instructions for 0 and 1 because they pop up so often and (in the
--- a/include/vm.h
+++ b/include/vm.h
@ -334,6 +334,9 @@ typedef struct BcVm {
 #if !BC_ENABLE_LIBRARY
 	BcParse prs;
 	BcProgram prog;
+
+	BcVec line_buf;
+	BcVec buffer;
 #endif // !BC_ENABLE_LIBRARY

 	BcVec jmp_bufs;
@ -374,6 +377,7 @@ typedef struct BcVm {
 	bool no_exit_exprs;
 	bool exit_exprs;
 	bool eof;
+	bool is_stdin;
 #endif // !BC_ENABLE_LIBRARY

 	BcBigDig maxes[BC_PROG_GLOBALS_LEN + BC_ENABLE_EXTRA_MATH];
@ -450,6 +454,8 @@ void* bc_vm_malloc(size_t n);
 void* bc_vm_realloc(void *ptr, size_t n);
 char* bc_vm_strdup(const char *str);

+bool bc_vm_readLine(bool clear);
+
 void bc_pledge(const char *promises, const char *execpromises);

 char* bc_vm_getenv(const char* var);
--- a/manuals/dc.1.md.in
+++ b/manuals/dc.1.md.in
@ -139,19 +139,12 @@ If no files are given on the command-line and no files or expressions are given
 by the **-f**, **-\-file**, **-e**, or **-\-expression** options, then dc(1)
 read from **stdin**.

-However, there are a few caveats to this.
+However, there is a caveat to this.

 First, **stdin** is evaluated a line at a time. The only exception to this is if
 a string has been finished, but not ended. This means that, except for escaped
 brackets, all brackets must be balanced before dc(1) parses and executes.

-Second, as a consequence of the above, if the user attempts to use the left
-bracket character, **[**, as a register name, dc(1) will not execute until a
-balancing right bracket, **]** is given. Then it will give an error since it is
-an error to use the left bracket as register name (see the **REGISTERS**
-section). In fact, this is why it is an error.
-
-
 # STDOUT

 Any non-error output is written to **stdout**. In addition, if history (see the
--- a/src/bc_lex.c
+++ b/src/bc_lex.c
@ -90,17 +90,31 @@ static void bc_lex_identifier(BcLex *l) {
 static void bc_lex_string(BcLex *l) {

 	// We need to keep track of newlines to increment them properly.
-	size_t len, nlines = 0, i = l->i;
-	const char *buf = l->buf;
+	size_t len, nlines, i;
+	const char *buf;
 	char c;
+	bool got_more;

 	l->t = BC_LEX_STR;

-	// Fortunately for us, bc doesn't escape quotes. Instead, the equivalent is
-	// '\q', which makes this loop simpler.
-	for (; (c = buf[i]) && c != '"'; ++i) nlines += (c == '\n');
+	do {

-	if (BC_ERR(c == '\0')) {
+		nlines = 0;
+		buf = l->buf;
+		got_more = false;
+
+		assert(!vm.is_stdin || buf == vm.buffer.v);
+
+		// Fortunately for us, bc doesn't escape quotes. Instead, the equivalent
+		// is '\q', which makes this loop simpler.
+		for (i = l->i; (c = buf[i]) && c != '"'; ++i) nlines += (c == '\n');
+
+		if (BC_ERR(c == '\0') && !vm.eof && l->is_stdin)
+			got_more = bc_lex_readLine(l);
+
+	} while (got_more && c != '"');
+
+	if (c != '"') {
 		l->i = i;
 		bc_lex_err(l, BC_ERR_PARSE_STRING);
 	}
--- a/src/dc_lex.c
+++ b/src/dc_lex.c
@ -72,10 +72,8 @@ static void dc_lex_register(BcLex *l) {
 	else {

 		// I don't allow newlines because newlines are used for controlling when
-		// execution happens, and allowing newlines would just be complex. For
-		// the same reason, I don't allow the '[' character; it would just be
-		// too complex.
-		if (BC_ERR(l->buf[l->i - 1] == '\n' || l->buf[l->i - 1] == '['))
+		// execution happens, and allowing newlines would just be complex.
+		if (BC_ERR(l->buf[l->i - 1] == '\n'))
 			bc_lex_verr(l, BC_ERR_PARSE_CHAR, l->buf[l->i - 1]);

 		// Set the lexer string and token.
@ -94,33 +92,49 @@ static void dc_lex_register(BcLex *l) {
 */
 static void dc_lex_string(BcLex *l) {

-	size_t depth = 1, nls = 0, i = l->i;
+	size_t depth, nls, i;
 	char c;
+	bool got_more;

 	// Set the token and clear the string.
 	l->t = BC_LEX_STR;
 	bc_vec_popAll(&l->str);

-	// This is the meat. As long as we don't run into the NUL byte, and we have
-	// "depth", which means we haven't completely balanced brackets yet, we
-	// continue eating the string.
-	for (; (c = l->buf[i]) && depth; ++i) {
+	do {

-		// Check for escaped brackets and set the depths as appropriate.
-		if (c == '\\') {
-			c = l->buf[++i];
-			if (!c) break;
-		}
-		else {
-			depth += (c == '[');
-			depth -= (c == ']');
+		depth = 1;
+		nls = 0;
+		got_more = false;
+
+		assert(!l->is_stdin || l->buf == vm.buffer.v);
+
+		// This is the meat. As long as we don't run into the NUL byte, and we
+		// have "depth", which means we haven't completely balanced brackets
+		// yet, we continue eating the string.
+		for (i = l->i; (c = l->buf[i]) && depth; ++i) {
+
+			// Check for escaped brackets and set the depths as appropriate.
+			if (c == '\\') {
+				c = l->buf[++i];
+				if (!c) break;
+			}
+			else {
+				depth += (c == '[');
+				depth -= (c == ']');
+			}
+
+			// We want to adjust the line in the lexer as necessary.
+			nls += (c == '\n');
+
+			if (depth) bc_vec_push(&l->str, &c);
 		}

-		// We want to adjust the line in the lexer as necessary.
-		nls += (c == '\n');
+		if (BC_ERR(c == '\0' && depth)) {
+			if (!vm.eof && l->is_stdin) got_more = bc_lex_readLine(l);
+			if (got_more) bc_vec_popAll(&l->str);
+		}

-		if (depth) bc_vec_push(&l->str, &c);
-	}
+	} while (got_more && depth);

 	// Obviously, if we didn't balance, that's an error.
 	if (BC_ERR(c == '\0' && depth)) {
--- a/src/lex.c
+++ b/src/lex.c
@ -55,23 +55,38 @@ void bc_lex_lineComment(BcLex *l) {
 void bc_lex_comment(BcLex *l) {

 	size_t i, nlines = 0;
-	const char *buf = l->buf;
-	bool end = false;
+	const char *buf;
+	bool end = false, got_more;
 	char c;

 	l->i += 1;
 	l->t = BC_LEX_WHITESPACE;

-	for (i = l->i; !end; i += !end) {
+	do {

-		for (; (c = buf[i]) && c != '*'; ++i) nlines += (c == '\n');
+		buf = l->buf;
+		got_more = false;

-		if (BC_ERR(!c || buf[i + 1] == '\0')) {
-			l->i = i;
-			bc_lex_err(l, BC_ERR_PARSE_COMMENT);
+		assert(!vm.is_stdin || buf == vm.buffer.v);
+
+		for (i = l->i; !end; i += !end) {
+
+			for (; (c = buf[i]) && c != '*'; ++i) nlines += (c == '\n');
+
+			if (BC_ERR(!c || buf[i + 1] == '\0')) {
+
+				if (!vm.eof && l->is_stdin) got_more = bc_lex_readLine(l);
+
+				break;
+			}
+
+			end = (buf[i + 1] == '/');
 		}
+	} while (got_more && !end);

-		end = buf[i + 1] == '/';
+	if (!end) {
+		l->i = i;
+		bc_lex_err(l, BC_ERR_PARSE_COMMENT);
 	}

 	l->i = i + 2;
@ -220,11 +235,25 @@ void bc_lex_next(BcLex *l) {
 	} while (l->t == BC_LEX_WHITESPACE);
 }

-void bc_lex_text(BcLex *l, const char *text) {
-	assert(l != NULL && text != NULL);
+static void bc_lex_fixText(BcLex *l, const char *text, size_t len) {
 	l->buf = text;
+	l->len = len;
+}
+
+bool bc_lex_readLine(BcLex *l) {
+
+	bool good = bc_vm_readLine(false);
+
+	bc_lex_fixText(l, vm.buffer.v, vm.buffer.len - 1);
+
+	return good;
+}
+
+void bc_lex_text(BcLex *l, const char *text, bool is_stdin) {
+	assert(l != NULL && text != NULL);
+	bc_lex_fixText(l, text, strlen(text));
 	l->i = 0;
-	l->len = strlen(text);
 	l->t = l->last = BC_LEX_INVALID;
+	l->is_stdin = is_stdin;
 	bc_lex_next(l);
 }
--- a/src/parse.c
+++ b/src/parse.c
@ -137,10 +137,10 @@ void bc_parse_number(BcParse *p) {
 #endif // BC_ENABLE_EXTRA_MATH
 }

-void bc_parse_text(BcParse *p, const char *text) {
+void bc_parse_text(BcParse *p, const char *text, bool is_stdin) {
 	// Make sure the pointer isn't invalidated.
 	p->func = bc_vec_item(&p->prog->fns, p->fidx);
-	bc_lex_text(&p->l, text);
+	bc_lex_text(&p->l, text, is_stdin);
 }

 void bc_parse_reset(BcParse *p) {
--- a/src/program.c
+++ b/src/program.c
@ -461,7 +461,7 @@ static void bc_program_read(BcProgram *p) {

 	if (s == BC_STATUS_EOF) bc_vm_err(BC_ERR_EXEC_READ_EXPR);

-	bc_parse_text(&parse, buf.v);
+	bc_parse_text(&parse, buf.v, false);
 	vm.expr(&parse, BC_PARSE_NOREAD | BC_PARSE_NEEDVAL);

 	if (BC_ERR(parse.l.t != BC_LEX_NLINE && parse.l.t != BC_LEX_EOF))
@ -1564,7 +1564,7 @@ static void bc_program_execStr(BcProgram *p, const char *restrict code,

 		BC_SIG_UNLOCK;

-		bc_parse_text(&prs, str);
+		bc_parse_text(&prs, str, false);
 		vm.expr(&prs, BC_PARSE_NOCALL);

 		BC_SIG_LOCK;
--- a/src/vm.c
+++ b/src/vm.c
@ -690,9 +690,9 @@ static void bc_vm_clean(void) {
 	}
 }

-static void bc_vm_process(const char *text) {
+static void bc_vm_process(const char *text, bool is_stdin) {

-	bc_parse_text(&vm.prs, text);
+	bc_parse_text(&vm.prs, text, is_stdin);

 	do {

@ -727,7 +727,14 @@ static void bc_vm_endif(void) {
 	}

 	if (good) {
-		while (BC_PARSE_IF_END(&vm.prs)) bc_vm_process("else {}");
+
+		bool is_stdin = vm.is_stdin;
+
+		vm.is_stdin = false;
+
+		while (BC_PARSE_IF_END(&vm.prs)) bc_vm_process("else {}", false);
+
+		vm.is_stdin = is_stdin;
 	}
 	else bc_parse_err(&vm.prs, BC_ERR_PARSE_BLOCK);
 }
@ -751,7 +758,7 @@ static void bc_vm_file(const char *file) {

 	BC_SIG_UNLOCK;

-	bc_vm_process(data);
+	bc_vm_process(data, false);

 #if BC_ENABLED
 	if (BC_IS_BC) bc_vm_endif();
@ -770,87 +777,66 @@ err:
 	BC_LONGJMP_CONT;
 }

+bool bc_vm_readLine(bool clear) {
+
+	BcStatus s;
+	bool good;
+
+	if (clear) bc_vec_empty(&vm.buffer);
+
+	bc_vec_empty(&vm.line_buf);
+
+	if (vm.eof) return false;
+
+	do {
+		s = bc_read_line(&vm.line_buf, ">>> ");
+		vm.eof = (s == BC_STATUS_EOF);
+	} while (!(s) && !vm.eof && vm.line_buf.len < 1);
+
+	good = (vm.line_buf.len > 1);
+
+	if (good) bc_vec_concat(&vm.buffer, vm.line_buf.v);
+
+	return good;
+}
+
 static void bc_vm_stdin(void) {

 	BcStatus s;
-	BcVec buf, buffer;
-	size_t string = 0;
-	bool comment = false, hash = false;
+	bool clear = true;
+
+	vm.is_stdin = true;

 	bc_lex_file(&vm.prs.l, bc_program_stdin_name);

 	BC_SIG_LOCK;
-	bc_vec_init(&buffer, sizeof(uchar), NULL);
-	bc_vec_init(&buf, sizeof(uchar), NULL);
-	bc_vec_pushByte(&buffer, '\0');
+	bc_vec_init(&vm.buffer, sizeof(uchar), NULL);
+	bc_vec_init(&vm.line_buf, sizeof(uchar), NULL);
 	BC_SETJMP_LOCKED(err);
 	BC_SIG_UNLOCK;

+// This label is because errors can cause jumps to end up at the err label
+// below. If that happens, and the error should be cleared and execution
+// continue, then we need to jump back.
 restart:

-	// This loop is complex because the vm tries not to send any lines that end
-	// with a backslash to the parser. The reason for that is because the parser
-	// treats a backslash+newline combo as whitespace, per the bc spec. In that
-	// case, and for strings and comments, the parser will expect more stuff.
-	while ((!(s = bc_read_line(&buf, ">>> ")) ||
-	        (vm.eof = (s == BC_STATUS_EOF))) && buf.len > 1)
-	{
-		char c2, *str = buf.v;
-		size_t i, len = buf.len - 1;
+	while (bc_vm_readLine(clear)) {

-		for (i = 0; i < len; ++i) {
+		size_t len = vm.buffer.len - 1;
+		const char *str = vm.buffer.v;

-			bool notend = len > i + 1;
-			uchar c = (uchar) str[i];
+		clear = (len < 2 || str[len - 2] != '\\' || str[len - 1] != '\n');
+		if (!clear) continue;

-			hash = (!comment && !string && ((hash && c != '\n') ||
-			                                (!hash && c == '#')));
-
-			if (!hash && !comment && (i - 1 > len || str[i - 1] != '\\')) {
-				if (BC_IS_BC) string ^= (c == '"');
-				else if (c == ']') string -= 1;
-				else if (c == '[') string += 1;
-			}
-
-			if (BC_IS_BC && !hash && !string && notend) {
-
-				c2 = str[i + 1];
-
-				if (c == '/' && !comment && c2 == '*') {
-					comment = true;
-					i += 1;
-				}
-				else if (c == '*' && comment && c2 == '/') {
-					comment = false;
-					i += 1;
-				}
-			}
-		}
-
-		bc_vec_concat(&buffer, buf.v);
-
-		if (string || comment) continue;
-		if (len >= 2 && str[len - 2] == '\\' && str[len - 1] == '\n') continue;
-#if BC_ENABLE_HISTORY
-		if (vm.history.stdin_has_data) continue;
-#endif // BC_ENABLE_HISTORY
-
-		bc_vm_process(buffer.v);
-		bc_vec_empty(&buffer);
+		bc_vm_process(vm.buffer.v, true);

 		if (vm.eof) break;
 		else bc_vm_clean();
 	}

-	if (!BC_STATUS_IS_ERROR(s)) {
-		if (BC_ERR(comment))
-			bc_parse_err(&vm.prs, BC_ERR_PARSE_COMMENT);
-		else if (BC_ERR(string))
-			bc_parse_err(&vm.prs, BC_ERR_PARSE_STRING);
 #if BC_ENABLED
-		else if (BC_IS_BC) bc_vm_endif();
+	if (!BC_STATUS_IS_ERROR(s) && BC_IS_BC) bc_vm_endif();
 #endif // BC_ENABLED
-	}

 err:
 	BC_SIG_MAYLOCK;
@ -869,14 +855,14 @@ err:
 #endif // !BC_ENABLE_MEMCHECK

 	if (!vm.status && !vm.eof) {
-		bc_vec_empty(&buffer);
+		bc_vec_empty(&vm.buffer);
 		BC_LONGJMP_STOP;
 		BC_SIG_UNLOCK;
 		goto restart;
 	}

-	bc_vec_free(&buf);
-	bc_vec_free(&buffer);
+	bc_vec_free(&vm.line_buf);
+	bc_vec_free(&vm.buffer);

 	BC_LONGJMP_CONT;
 }
@ -885,7 +871,7 @@ err:
 static void bc_vm_load(const char *name, const char *text) {

 	bc_lex_file(&vm.prs.l, name);
-	bc_parse_text(&vm.prs, text);
+	bc_parse_text(&vm.prs, text, false);

 	while (vm.prs.l.t != BC_LEX_EOF) vm.parse(&vm.prs);
 }
@ -984,7 +970,7 @@ static void bc_vm_exec(void) {

 			more = bc_read_buf(&buf, vm.exprs.v, &len);
 			bc_vec_pushByte(&buf, '\0');
-			bc_vm_process(buf.v);
+			bc_vm_process(buf.v, false);

 			bc_vec_popAll(&buf);