Fix dc bug

This bug fix also made things much simpler. What it does is allow the
lexers to ask for more data from stdin when needed, like when a string
or comment is not ended properly. The dc bug was that it just didn't
work well with register commands.

Signed-off-by: Gavin Howard <[email protected]>
This commit is contained in:
Gavin Howard 2021-06-27 01:40:15 -06:00
parent 1c9d222b46
commit 8a1d001dcf
Signed by: gavin
GPG Key ID: C08038BDF280D33E
10 changed files with 176 additions and 124 deletions

View File

@ -433,6 +433,8 @@ typedef struct BcLex {
/// the string.
BcVec str;
bool is_stdin;
} BcLex;
/**
@ -458,10 +460,11 @@ void bc_lex_file(BcLex *l, const char *file);
/**
* Sets the text the lexer will lex.
* @param l The lexer.
* @param text The text to lex.
* @param l The lexer.
* @param text The text to lex.
* @param is_stdin True if the text is from stdin, false otherwise.
*/
void bc_lex_text(BcLex *l, const char *text);
void bc_lex_text(BcLex *l, const char *text, bool is_stdin);
/**
* Generic next function for the parser to call. It takes care of calling the
@ -518,4 +521,10 @@ void bc_lex_commonTokens(BcLex *l, char c);
*/
void bc_lex_invalidChar(BcLex *l, char c);
/**
* Reads a line from stdin and puts it into the lexer's buffer.
* @param l The lexer.
*/
bool bc_lex_readLine(BcLex *l);
#endif // BC_LEX_H

View File

@ -218,10 +218,11 @@ void bc_parse_pushName(const BcParse* p, char *name, bool var);
/**
* Sets the text that the parser will parse.
* @param p The parser.
* @param text The text that will be parsed.
* @param p The parser.
* @param text The text to lex.
* @param is_stdin True if the text is from stdin, false otherwise.
*/
void bc_parse_text(BcParse *p, const char *text);
void bc_parse_text(BcParse *p, const char *text, bool is_stdin);
// References to const 0 and 1 strings for special cases. bc and dc have
// specific instructions for 0 and 1 because they pop up so often and (in the

View File

@ -334,6 +334,9 @@ typedef struct BcVm {
#if !BC_ENABLE_LIBRARY
BcParse prs;
BcProgram prog;
BcVec line_buf;
BcVec buffer;
#endif // !BC_ENABLE_LIBRARY
BcVec jmp_bufs;
@ -374,6 +377,7 @@ typedef struct BcVm {
bool no_exit_exprs;
bool exit_exprs;
bool eof;
bool is_stdin;
#endif // !BC_ENABLE_LIBRARY
BcBigDig maxes[BC_PROG_GLOBALS_LEN + BC_ENABLE_EXTRA_MATH];
@ -450,6 +454,8 @@ void* bc_vm_malloc(size_t n);
void* bc_vm_realloc(void *ptr, size_t n);
char* bc_vm_strdup(const char *str);
bool bc_vm_readLine(bool clear);
void bc_pledge(const char *promises, const char *execpromises);
char* bc_vm_getenv(const char* var);

View File

@ -139,19 +139,12 @@ If no files are given on the command-line and no files or expressions are given
by the **-f**, **-\-file**, **-e**, or **-\-expression** options, then dc(1)
read from **stdin**.
However, there are a few caveats to this.
However, there is a caveat to this.
First, **stdin** is evaluated a line at a time. The only exception to this is if
a string has been finished, but not ended. This means that, except for escaped
brackets, all brackets must be balanced before dc(1) parses and executes.
Second, as a consequence of the above, if the user attempts to use the left
bracket character, **[**, as a register name, dc(1) will not execute until a
balancing right bracket, **]** is given. Then it will give an error since it is
an error to use the left bracket as register name (see the **REGISTERS**
section). In fact, this is why it is an error.
# STDOUT
Any non-error output is written to **stdout**. In addition, if history (see the

View File

@ -90,17 +90,31 @@ static void bc_lex_identifier(BcLex *l) {
static void bc_lex_string(BcLex *l) {
// We need to keep track of newlines to increment them properly.
size_t len, nlines = 0, i = l->i;
const char *buf = l->buf;
size_t len, nlines, i;
const char *buf;
char c;
bool got_more;
l->t = BC_LEX_STR;
// Fortunately for us, bc doesn't escape quotes. Instead, the equivalent is
// '\q', which makes this loop simpler.
for (; (c = buf[i]) && c != '"'; ++i) nlines += (c == '\n');
do {
if (BC_ERR(c == '\0')) {
nlines = 0;
buf = l->buf;
got_more = false;
assert(!vm.is_stdin || buf == vm.buffer.v);
// Fortunately for us, bc doesn't escape quotes. Instead, the equivalent
// is '\q', which makes this loop simpler.
for (i = l->i; (c = buf[i]) && c != '"'; ++i) nlines += (c == '\n');
if (BC_ERR(c == '\0') && !vm.eof && l->is_stdin)
got_more = bc_lex_readLine(l);
} while (got_more && c != '"');
if (c != '"') {
l->i = i;
bc_lex_err(l, BC_ERR_PARSE_STRING);
}

View File

@ -72,10 +72,8 @@ static void dc_lex_register(BcLex *l) {
else {
// I don't allow newlines because newlines are used for controlling when
// execution happens, and allowing newlines would just be complex. For
// the same reason, I don't allow the '[' character; it would just be
// too complex.
if (BC_ERR(l->buf[l->i - 1] == '\n' || l->buf[l->i - 1] == '['))
// execution happens, and allowing newlines would just be complex.
if (BC_ERR(l->buf[l->i - 1] == '\n'))
bc_lex_verr(l, BC_ERR_PARSE_CHAR, l->buf[l->i - 1]);
// Set the lexer string and token.
@ -94,33 +92,49 @@ static void dc_lex_register(BcLex *l) {
*/
static void dc_lex_string(BcLex *l) {
size_t depth = 1, nls = 0, i = l->i;
size_t depth, nls, i;
char c;
bool got_more;
// Set the token and clear the string.
l->t = BC_LEX_STR;
bc_vec_popAll(&l->str);
// This is the meat. As long as we don't run into the NUL byte, and we have
// "depth", which means we haven't completely balanced brackets yet, we
// continue eating the string.
for (; (c = l->buf[i]) && depth; ++i) {
do {
// Check for escaped brackets and set the depths as appropriate.
if (c == '\\') {
c = l->buf[++i];
if (!c) break;
}
else {
depth += (c == '[');
depth -= (c == ']');
depth = 1;
nls = 0;
got_more = false;
assert(!l->is_stdin || l->buf == vm.buffer.v);
// This is the meat. As long as we don't run into the NUL byte, and we
// have "depth", which means we haven't completely balanced brackets
// yet, we continue eating the string.
for (i = l->i; (c = l->buf[i]) && depth; ++i) {
// Check for escaped brackets and set the depths as appropriate.
if (c == '\\') {
c = l->buf[++i];
if (!c) break;
}
else {
depth += (c == '[');
depth -= (c == ']');
}
// We want to adjust the line in the lexer as necessary.
nls += (c == '\n');
if (depth) bc_vec_push(&l->str, &c);
}
// We want to adjust the line in the lexer as necessary.
nls += (c == '\n');
if (BC_ERR(c == '\0' && depth)) {
if (!vm.eof && l->is_stdin) got_more = bc_lex_readLine(l);
if (got_more) bc_vec_popAll(&l->str);
}
if (depth) bc_vec_push(&l->str, &c);
}
} while (got_more && depth);
// Obviously, if we didn't balance, that's an error.
if (BC_ERR(c == '\0' && depth)) {

View File

@ -55,23 +55,38 @@ void bc_lex_lineComment(BcLex *l) {
void bc_lex_comment(BcLex *l) {
size_t i, nlines = 0;
const char *buf = l->buf;
bool end = false;
const char *buf;
bool end = false, got_more;
char c;
l->i += 1;
l->t = BC_LEX_WHITESPACE;
for (i = l->i; !end; i += !end) {
do {
for (; (c = buf[i]) && c != '*'; ++i) nlines += (c == '\n');
buf = l->buf;
got_more = false;
if (BC_ERR(!c || buf[i + 1] == '\0')) {
l->i = i;
bc_lex_err(l, BC_ERR_PARSE_COMMENT);
assert(!vm.is_stdin || buf == vm.buffer.v);
for (i = l->i; !end; i += !end) {
for (; (c = buf[i]) && c != '*'; ++i) nlines += (c == '\n');
if (BC_ERR(!c || buf[i + 1] == '\0')) {
if (!vm.eof && l->is_stdin) got_more = bc_lex_readLine(l);
break;
}
end = (buf[i + 1] == '/');
}
} while (got_more && !end);
end = buf[i + 1] == '/';
if (!end) {
l->i = i;
bc_lex_err(l, BC_ERR_PARSE_COMMENT);
}
l->i = i + 2;
@ -220,11 +235,25 @@ void bc_lex_next(BcLex *l) {
} while (l->t == BC_LEX_WHITESPACE);
}
void bc_lex_text(BcLex *l, const char *text) {
assert(l != NULL && text != NULL);
static void bc_lex_fixText(BcLex *l, const char *text, size_t len) {
l->buf = text;
l->len = len;
}
bool bc_lex_readLine(BcLex *l) {
bool good = bc_vm_readLine(false);
bc_lex_fixText(l, vm.buffer.v, vm.buffer.len - 1);
return good;
}
void bc_lex_text(BcLex *l, const char *text, bool is_stdin) {
assert(l != NULL && text != NULL);
bc_lex_fixText(l, text, strlen(text));
l->i = 0;
l->len = strlen(text);
l->t = l->last = BC_LEX_INVALID;
l->is_stdin = is_stdin;
bc_lex_next(l);
}

View File

@ -137,10 +137,10 @@ void bc_parse_number(BcParse *p) {
#endif // BC_ENABLE_EXTRA_MATH
}
void bc_parse_text(BcParse *p, const char *text) {
void bc_parse_text(BcParse *p, const char *text, bool is_stdin) {
// Make sure the pointer isn't invalidated.
p->func = bc_vec_item(&p->prog->fns, p->fidx);
bc_lex_text(&p->l, text);
bc_lex_text(&p->l, text, is_stdin);
}
void bc_parse_reset(BcParse *p) {

View File

@ -461,7 +461,7 @@ static void bc_program_read(BcProgram *p) {
if (s == BC_STATUS_EOF) bc_vm_err(BC_ERR_EXEC_READ_EXPR);
bc_parse_text(&parse, buf.v);
bc_parse_text(&parse, buf.v, false);
vm.expr(&parse, BC_PARSE_NOREAD | BC_PARSE_NEEDVAL);
if (BC_ERR(parse.l.t != BC_LEX_NLINE && parse.l.t != BC_LEX_EOF))
@ -1564,7 +1564,7 @@ static void bc_program_execStr(BcProgram *p, const char *restrict code,
BC_SIG_UNLOCK;
bc_parse_text(&prs, str);
bc_parse_text(&prs, str, false);
vm.expr(&prs, BC_PARSE_NOCALL);
BC_SIG_LOCK;

122
src/vm.c
View File

@ -690,9 +690,9 @@ static void bc_vm_clean(void) {
}
}
static void bc_vm_process(const char *text) {
static void bc_vm_process(const char *text, bool is_stdin) {
bc_parse_text(&vm.prs, text);
bc_parse_text(&vm.prs, text, is_stdin);
do {
@ -727,7 +727,14 @@ static void bc_vm_endif(void) {
}
if (good) {
while (BC_PARSE_IF_END(&vm.prs)) bc_vm_process("else {}");
bool is_stdin = vm.is_stdin;
vm.is_stdin = false;
while (BC_PARSE_IF_END(&vm.prs)) bc_vm_process("else {}", false);
vm.is_stdin = is_stdin;
}
else bc_parse_err(&vm.prs, BC_ERR_PARSE_BLOCK);
}
@ -751,7 +758,7 @@ static void bc_vm_file(const char *file) {
BC_SIG_UNLOCK;
bc_vm_process(data);
bc_vm_process(data, false);
#if BC_ENABLED
if (BC_IS_BC) bc_vm_endif();
@ -770,87 +777,66 @@ err:
BC_LONGJMP_CONT;
}
bool bc_vm_readLine(bool clear) {
BcStatus s;
bool good;
if (clear) bc_vec_empty(&vm.buffer);
bc_vec_empty(&vm.line_buf);
if (vm.eof) return false;
do {
s = bc_read_line(&vm.line_buf, ">>> ");
vm.eof = (s == BC_STATUS_EOF);
} while (!(s) && !vm.eof && vm.line_buf.len < 1);
good = (vm.line_buf.len > 1);
if (good) bc_vec_concat(&vm.buffer, vm.line_buf.v);
return good;
}
static void bc_vm_stdin(void) {
BcStatus s;
BcVec buf, buffer;
size_t string = 0;
bool comment = false, hash = false;
bool clear = true;
vm.is_stdin = true;
bc_lex_file(&vm.prs.l, bc_program_stdin_name);
BC_SIG_LOCK;
bc_vec_init(&buffer, sizeof(uchar), NULL);
bc_vec_init(&buf, sizeof(uchar), NULL);
bc_vec_pushByte(&buffer, '\0');
bc_vec_init(&vm.buffer, sizeof(uchar), NULL);
bc_vec_init(&vm.line_buf, sizeof(uchar), NULL);
BC_SETJMP_LOCKED(err);
BC_SIG_UNLOCK;
// This label is because errors can cause jumps to end up at the err label
// below. If that happens, and the error should be cleared and execution
// continue, then we need to jump back.
restart:
// This loop is complex because the vm tries not to send any lines that end
// with a backslash to the parser. The reason for that is because the parser
// treats a backslash+newline combo as whitespace, per the bc spec. In that
// case, and for strings and comments, the parser will expect more stuff.
while ((!(s = bc_read_line(&buf, ">>> ")) ||
(vm.eof = (s == BC_STATUS_EOF))) && buf.len > 1)
{
char c2, *str = buf.v;
size_t i, len = buf.len - 1;
while (bc_vm_readLine(clear)) {
for (i = 0; i < len; ++i) {
size_t len = vm.buffer.len - 1;
const char *str = vm.buffer.v;
bool notend = len > i + 1;
uchar c = (uchar) str[i];
clear = (len < 2 || str[len - 2] != '\\' || str[len - 1] != '\n');
if (!clear) continue;
hash = (!comment && !string && ((hash && c != '\n') ||
(!hash && c == '#')));
if (!hash && !comment && (i - 1 > len || str[i - 1] != '\\')) {
if (BC_IS_BC) string ^= (c == '"');
else if (c == ']') string -= 1;
else if (c == '[') string += 1;
}
if (BC_IS_BC && !hash && !string && notend) {
c2 = str[i + 1];
if (c == '/' && !comment && c2 == '*') {
comment = true;
i += 1;
}
else if (c == '*' && comment && c2 == '/') {
comment = false;
i += 1;
}
}
}
bc_vec_concat(&buffer, buf.v);
if (string || comment) continue;
if (len >= 2 && str[len - 2] == '\\' && str[len - 1] == '\n') continue;
#if BC_ENABLE_HISTORY
if (vm.history.stdin_has_data) continue;
#endif // BC_ENABLE_HISTORY
bc_vm_process(buffer.v);
bc_vec_empty(&buffer);
bc_vm_process(vm.buffer.v, true);
if (vm.eof) break;
else bc_vm_clean();
}
if (!BC_STATUS_IS_ERROR(s)) {
if (BC_ERR(comment))
bc_parse_err(&vm.prs, BC_ERR_PARSE_COMMENT);
else if (BC_ERR(string))
bc_parse_err(&vm.prs, BC_ERR_PARSE_STRING);
#if BC_ENABLED
else if (BC_IS_BC) bc_vm_endif();
if (!BC_STATUS_IS_ERROR(s) && BC_IS_BC) bc_vm_endif();
#endif // BC_ENABLED
}
err:
BC_SIG_MAYLOCK;
@ -869,14 +855,14 @@ err:
#endif // !BC_ENABLE_MEMCHECK
if (!vm.status && !vm.eof) {
bc_vec_empty(&buffer);
bc_vec_empty(&vm.buffer);
BC_LONGJMP_STOP;
BC_SIG_UNLOCK;
goto restart;
}
bc_vec_free(&buf);
bc_vec_free(&buffer);
bc_vec_free(&vm.line_buf);
bc_vec_free(&vm.buffer);
BC_LONGJMP_CONT;
}
@ -885,7 +871,7 @@ err:
static void bc_vm_load(const char *name, const char *text) {
bc_lex_file(&vm.prs.l, name);
bc_parse_text(&vm.prs, text);
bc_parse_text(&vm.prs, text, false);
while (vm.prs.l.t != BC_LEX_EOF) vm.parse(&vm.prs);
}
@ -984,7 +970,7 @@ static void bc_vm_exec(void) {
more = bc_read_buf(&buf, vm.exprs.v, &len);
bc_vec_pushByte(&buf, '\0');
bc_vm_process(buf.v);
bc_vm_process(buf.v, false);
bc_vec_popAll(&buf);