Fix dc bug
This bug fix also made things much simpler. What it does is allow the lexers to ask for more data from stdin when needed, like when a string or comment is not ended properly. The dc bug was that it just didn't work well with register commands. Signed-off-by: Gavin Howard <[email protected]>
This commit is contained in:
parent
1c9d222b46
commit
8a1d001dcf
|
@ -433,6 +433,8 @@ typedef struct BcLex {
|
|||
/// the string.
|
||||
BcVec str;
|
||||
|
||||
bool is_stdin;
|
||||
|
||||
} BcLex;
|
||||
|
||||
/**
|
||||
|
@ -458,10 +460,11 @@ void bc_lex_file(BcLex *l, const char *file);
|
|||
|
||||
/**
|
||||
* Sets the text the lexer will lex.
|
||||
* @param l The lexer.
|
||||
* @param text The text to lex.
|
||||
* @param l The lexer.
|
||||
* @param text The text to lex.
|
||||
* @param is_stdin True if the text is from stdin, false otherwise.
|
||||
*/
|
||||
void bc_lex_text(BcLex *l, const char *text);
|
||||
void bc_lex_text(BcLex *l, const char *text, bool is_stdin);
|
||||
|
||||
/**
|
||||
* Generic next function for the parser to call. It takes care of calling the
|
||||
|
@ -518,4 +521,10 @@ void bc_lex_commonTokens(BcLex *l, char c);
|
|||
*/
|
||||
void bc_lex_invalidChar(BcLex *l, char c);
|
||||
|
||||
/**
|
||||
* Reads a line from stdin and puts it into the lexer's buffer.
|
||||
* @param l The lexer.
|
||||
*/
|
||||
bool bc_lex_readLine(BcLex *l);
|
||||
|
||||
#endif // BC_LEX_H
|
||||
|
|
|
@ -218,10 +218,11 @@ void bc_parse_pushName(const BcParse* p, char *name, bool var);
|
|||
|
||||
/**
|
||||
* Sets the text that the parser will parse.
|
||||
* @param p The parser.
|
||||
* @param text The text that will be parsed.
|
||||
* @param p The parser.
|
||||
* @param text The text to lex.
|
||||
* @param is_stdin True if the text is from stdin, false otherwise.
|
||||
*/
|
||||
void bc_parse_text(BcParse *p, const char *text);
|
||||
void bc_parse_text(BcParse *p, const char *text, bool is_stdin);
|
||||
|
||||
// References to const 0 and 1 strings for special cases. bc and dc have
|
||||
// specific instructions for 0 and 1 because they pop up so often and (in the
|
||||
|
|
|
@ -334,6 +334,9 @@ typedef struct BcVm {
|
|||
#if !BC_ENABLE_LIBRARY
|
||||
BcParse prs;
|
||||
BcProgram prog;
|
||||
|
||||
BcVec line_buf;
|
||||
BcVec buffer;
|
||||
#endif // !BC_ENABLE_LIBRARY
|
||||
|
||||
BcVec jmp_bufs;
|
||||
|
@ -374,6 +377,7 @@ typedef struct BcVm {
|
|||
bool no_exit_exprs;
|
||||
bool exit_exprs;
|
||||
bool eof;
|
||||
bool is_stdin;
|
||||
#endif // !BC_ENABLE_LIBRARY
|
||||
|
||||
BcBigDig maxes[BC_PROG_GLOBALS_LEN + BC_ENABLE_EXTRA_MATH];
|
||||
|
@ -450,6 +454,8 @@ void* bc_vm_malloc(size_t n);
|
|||
void* bc_vm_realloc(void *ptr, size_t n);
|
||||
char* bc_vm_strdup(const char *str);
|
||||
|
||||
bool bc_vm_readLine(bool clear);
|
||||
|
||||
void bc_pledge(const char *promises, const char *execpromises);
|
||||
|
||||
char* bc_vm_getenv(const char* var);
|
||||
|
|
|
@ -139,19 +139,12 @@ If no files are given on the command-line and no files or expressions are given
|
|||
by the **-f**, **-\-file**, **-e**, or **-\-expression** options, then dc(1)
|
||||
read from **stdin**.
|
||||
|
||||
However, there are a few caveats to this.
|
||||
However, there is a caveat to this.
|
||||
|
||||
First, **stdin** is evaluated a line at a time. The only exception to this is if
|
||||
a string has been finished, but not ended. This means that, except for escaped
|
||||
brackets, all brackets must be balanced before dc(1) parses and executes.
|
||||
|
||||
Second, as a consequence of the above, if the user attempts to use the left
|
||||
bracket character, **[**, as a register name, dc(1) will not execute until a
|
||||
balancing right bracket, **]** is given. Then it will give an error since it is
|
||||
an error to use the left bracket as register name (see the **REGISTERS**
|
||||
section). In fact, this is why it is an error.
|
||||
|
||||
|
||||
# STDOUT
|
||||
|
||||
Any non-error output is written to **stdout**. In addition, if history (see the
|
||||
|
|
26
src/bc_lex.c
26
src/bc_lex.c
|
@ -90,17 +90,31 @@ static void bc_lex_identifier(BcLex *l) {
|
|||
static void bc_lex_string(BcLex *l) {
|
||||
|
||||
// We need to keep track of newlines to increment them properly.
|
||||
size_t len, nlines = 0, i = l->i;
|
||||
const char *buf = l->buf;
|
||||
size_t len, nlines, i;
|
||||
const char *buf;
|
||||
char c;
|
||||
bool got_more;
|
||||
|
||||
l->t = BC_LEX_STR;
|
||||
|
||||
// Fortunately for us, bc doesn't escape quotes. Instead, the equivalent is
|
||||
// '\q', which makes this loop simpler.
|
||||
for (; (c = buf[i]) && c != '"'; ++i) nlines += (c == '\n');
|
||||
do {
|
||||
|
||||
if (BC_ERR(c == '\0')) {
|
||||
nlines = 0;
|
||||
buf = l->buf;
|
||||
got_more = false;
|
||||
|
||||
assert(!vm.is_stdin || buf == vm.buffer.v);
|
||||
|
||||
// Fortunately for us, bc doesn't escape quotes. Instead, the equivalent
|
||||
// is '\q', which makes this loop simpler.
|
||||
for (i = l->i; (c = buf[i]) && c != '"'; ++i) nlines += (c == '\n');
|
||||
|
||||
if (BC_ERR(c == '\0') && !vm.eof && l->is_stdin)
|
||||
got_more = bc_lex_readLine(l);
|
||||
|
||||
} while (got_more && c != '"');
|
||||
|
||||
if (c != '"') {
|
||||
l->i = i;
|
||||
bc_lex_err(l, BC_ERR_PARSE_STRING);
|
||||
}
|
||||
|
|
56
src/dc_lex.c
56
src/dc_lex.c
|
@ -72,10 +72,8 @@ static void dc_lex_register(BcLex *l) {
|
|||
else {
|
||||
|
||||
// I don't allow newlines because newlines are used for controlling when
|
||||
// execution happens, and allowing newlines would just be complex. For
|
||||
// the same reason, I don't allow the '[' character; it would just be
|
||||
// too complex.
|
||||
if (BC_ERR(l->buf[l->i - 1] == '\n' || l->buf[l->i - 1] == '['))
|
||||
// execution happens, and allowing newlines would just be complex.
|
||||
if (BC_ERR(l->buf[l->i - 1] == '\n'))
|
||||
bc_lex_verr(l, BC_ERR_PARSE_CHAR, l->buf[l->i - 1]);
|
||||
|
||||
// Set the lexer string and token.
|
||||
|
@ -94,33 +92,49 @@ static void dc_lex_register(BcLex *l) {
|
|||
*/
|
||||
static void dc_lex_string(BcLex *l) {
|
||||
|
||||
size_t depth = 1, nls = 0, i = l->i;
|
||||
size_t depth, nls, i;
|
||||
char c;
|
||||
bool got_more;
|
||||
|
||||
// Set the token and clear the string.
|
||||
l->t = BC_LEX_STR;
|
||||
bc_vec_popAll(&l->str);
|
||||
|
||||
// This is the meat. As long as we don't run into the NUL byte, and we have
|
||||
// "depth", which means we haven't completely balanced brackets yet, we
|
||||
// continue eating the string.
|
||||
for (; (c = l->buf[i]) && depth; ++i) {
|
||||
do {
|
||||
|
||||
// Check for escaped brackets and set the depths as appropriate.
|
||||
if (c == '\\') {
|
||||
c = l->buf[++i];
|
||||
if (!c) break;
|
||||
}
|
||||
else {
|
||||
depth += (c == '[');
|
||||
depth -= (c == ']');
|
||||
depth = 1;
|
||||
nls = 0;
|
||||
got_more = false;
|
||||
|
||||
assert(!l->is_stdin || l->buf == vm.buffer.v);
|
||||
|
||||
// This is the meat. As long as we don't run into the NUL byte, and we
|
||||
// have "depth", which means we haven't completely balanced brackets
|
||||
// yet, we continue eating the string.
|
||||
for (i = l->i; (c = l->buf[i]) && depth; ++i) {
|
||||
|
||||
// Check for escaped brackets and set the depths as appropriate.
|
||||
if (c == '\\') {
|
||||
c = l->buf[++i];
|
||||
if (!c) break;
|
||||
}
|
||||
else {
|
||||
depth += (c == '[');
|
||||
depth -= (c == ']');
|
||||
}
|
||||
|
||||
// We want to adjust the line in the lexer as necessary.
|
||||
nls += (c == '\n');
|
||||
|
||||
if (depth) bc_vec_push(&l->str, &c);
|
||||
}
|
||||
|
||||
// We want to adjust the line in the lexer as necessary.
|
||||
nls += (c == '\n');
|
||||
if (BC_ERR(c == '\0' && depth)) {
|
||||
if (!vm.eof && l->is_stdin) got_more = bc_lex_readLine(l);
|
||||
if (got_more) bc_vec_popAll(&l->str);
|
||||
}
|
||||
|
||||
if (depth) bc_vec_push(&l->str, &c);
|
||||
}
|
||||
} while (got_more && depth);
|
||||
|
||||
// Obviously, if we didn't balance, that's an error.
|
||||
if (BC_ERR(c == '\0' && depth)) {
|
||||
|
|
51
src/lex.c
51
src/lex.c
|
@ -55,23 +55,38 @@ void bc_lex_lineComment(BcLex *l) {
|
|||
void bc_lex_comment(BcLex *l) {
|
||||
|
||||
size_t i, nlines = 0;
|
||||
const char *buf = l->buf;
|
||||
bool end = false;
|
||||
const char *buf;
|
||||
bool end = false, got_more;
|
||||
char c;
|
||||
|
||||
l->i += 1;
|
||||
l->t = BC_LEX_WHITESPACE;
|
||||
|
||||
for (i = l->i; !end; i += !end) {
|
||||
do {
|
||||
|
||||
for (; (c = buf[i]) && c != '*'; ++i) nlines += (c == '\n');
|
||||
buf = l->buf;
|
||||
got_more = false;
|
||||
|
||||
if (BC_ERR(!c || buf[i + 1] == '\0')) {
|
||||
l->i = i;
|
||||
bc_lex_err(l, BC_ERR_PARSE_COMMENT);
|
||||
assert(!vm.is_stdin || buf == vm.buffer.v);
|
||||
|
||||
for (i = l->i; !end; i += !end) {
|
||||
|
||||
for (; (c = buf[i]) && c != '*'; ++i) nlines += (c == '\n');
|
||||
|
||||
if (BC_ERR(!c || buf[i + 1] == '\0')) {
|
||||
|
||||
if (!vm.eof && l->is_stdin) got_more = bc_lex_readLine(l);
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
end = (buf[i + 1] == '/');
|
||||
}
|
||||
} while (got_more && !end);
|
||||
|
||||
end = buf[i + 1] == '/';
|
||||
if (!end) {
|
||||
l->i = i;
|
||||
bc_lex_err(l, BC_ERR_PARSE_COMMENT);
|
||||
}
|
||||
|
||||
l->i = i + 2;
|
||||
|
@ -220,11 +235,25 @@ void bc_lex_next(BcLex *l) {
|
|||
} while (l->t == BC_LEX_WHITESPACE);
|
||||
}
|
||||
|
||||
void bc_lex_text(BcLex *l, const char *text) {
|
||||
assert(l != NULL && text != NULL);
|
||||
static void bc_lex_fixText(BcLex *l, const char *text, size_t len) {
|
||||
l->buf = text;
|
||||
l->len = len;
|
||||
}
|
||||
|
||||
bool bc_lex_readLine(BcLex *l) {
|
||||
|
||||
bool good = bc_vm_readLine(false);
|
||||
|
||||
bc_lex_fixText(l, vm.buffer.v, vm.buffer.len - 1);
|
||||
|
||||
return good;
|
||||
}
|
||||
|
||||
void bc_lex_text(BcLex *l, const char *text, bool is_stdin) {
|
||||
assert(l != NULL && text != NULL);
|
||||
bc_lex_fixText(l, text, strlen(text));
|
||||
l->i = 0;
|
||||
l->len = strlen(text);
|
||||
l->t = l->last = BC_LEX_INVALID;
|
||||
l->is_stdin = is_stdin;
|
||||
bc_lex_next(l);
|
||||
}
|
||||
|
|
|
@ -137,10 +137,10 @@ void bc_parse_number(BcParse *p) {
|
|||
#endif // BC_ENABLE_EXTRA_MATH
|
||||
}
|
||||
|
||||
void bc_parse_text(BcParse *p, const char *text) {
|
||||
void bc_parse_text(BcParse *p, const char *text, bool is_stdin) {
|
||||
// Make sure the pointer isn't invalidated.
|
||||
p->func = bc_vec_item(&p->prog->fns, p->fidx);
|
||||
bc_lex_text(&p->l, text);
|
||||
bc_lex_text(&p->l, text, is_stdin);
|
||||
}
|
||||
|
||||
void bc_parse_reset(BcParse *p) {
|
||||
|
|
|
@ -461,7 +461,7 @@ static void bc_program_read(BcProgram *p) {
|
|||
|
||||
if (s == BC_STATUS_EOF) bc_vm_err(BC_ERR_EXEC_READ_EXPR);
|
||||
|
||||
bc_parse_text(&parse, buf.v);
|
||||
bc_parse_text(&parse, buf.v, false);
|
||||
vm.expr(&parse, BC_PARSE_NOREAD | BC_PARSE_NEEDVAL);
|
||||
|
||||
if (BC_ERR(parse.l.t != BC_LEX_NLINE && parse.l.t != BC_LEX_EOF))
|
||||
|
@ -1564,7 +1564,7 @@ static void bc_program_execStr(BcProgram *p, const char *restrict code,
|
|||
|
||||
BC_SIG_UNLOCK;
|
||||
|
||||
bc_parse_text(&prs, str);
|
||||
bc_parse_text(&prs, str, false);
|
||||
vm.expr(&prs, BC_PARSE_NOCALL);
|
||||
|
||||
BC_SIG_LOCK;
|
||||
|
|
122
src/vm.c
122
src/vm.c
|
@ -690,9 +690,9 @@ static void bc_vm_clean(void) {
|
|||
}
|
||||
}
|
||||
|
||||
static void bc_vm_process(const char *text) {
|
||||
static void bc_vm_process(const char *text, bool is_stdin) {
|
||||
|
||||
bc_parse_text(&vm.prs, text);
|
||||
bc_parse_text(&vm.prs, text, is_stdin);
|
||||
|
||||
do {
|
||||
|
||||
|
@ -727,7 +727,14 @@ static void bc_vm_endif(void) {
|
|||
}
|
||||
|
||||
if (good) {
|
||||
while (BC_PARSE_IF_END(&vm.prs)) bc_vm_process("else {}");
|
||||
|
||||
bool is_stdin = vm.is_stdin;
|
||||
|
||||
vm.is_stdin = false;
|
||||
|
||||
while (BC_PARSE_IF_END(&vm.prs)) bc_vm_process("else {}", false);
|
||||
|
||||
vm.is_stdin = is_stdin;
|
||||
}
|
||||
else bc_parse_err(&vm.prs, BC_ERR_PARSE_BLOCK);
|
||||
}
|
||||
|
@ -751,7 +758,7 @@ static void bc_vm_file(const char *file) {
|
|||
|
||||
BC_SIG_UNLOCK;
|
||||
|
||||
bc_vm_process(data);
|
||||
bc_vm_process(data, false);
|
||||
|
||||
#if BC_ENABLED
|
||||
if (BC_IS_BC) bc_vm_endif();
|
||||
|
@ -770,87 +777,66 @@ err:
|
|||
BC_LONGJMP_CONT;
|
||||
}
|
||||
|
||||
bool bc_vm_readLine(bool clear) {
|
||||
|
||||
BcStatus s;
|
||||
bool good;
|
||||
|
||||
if (clear) bc_vec_empty(&vm.buffer);
|
||||
|
||||
bc_vec_empty(&vm.line_buf);
|
||||
|
||||
if (vm.eof) return false;
|
||||
|
||||
do {
|
||||
s = bc_read_line(&vm.line_buf, ">>> ");
|
||||
vm.eof = (s == BC_STATUS_EOF);
|
||||
} while (!(s) && !vm.eof && vm.line_buf.len < 1);
|
||||
|
||||
good = (vm.line_buf.len > 1);
|
||||
|
||||
if (good) bc_vec_concat(&vm.buffer, vm.line_buf.v);
|
||||
|
||||
return good;
|
||||
}
|
||||
|
||||
static void bc_vm_stdin(void) {
|
||||
|
||||
BcStatus s;
|
||||
BcVec buf, buffer;
|
||||
size_t string = 0;
|
||||
bool comment = false, hash = false;
|
||||
bool clear = true;
|
||||
|
||||
vm.is_stdin = true;
|
||||
|
||||
bc_lex_file(&vm.prs.l, bc_program_stdin_name);
|
||||
|
||||
BC_SIG_LOCK;
|
||||
bc_vec_init(&buffer, sizeof(uchar), NULL);
|
||||
bc_vec_init(&buf, sizeof(uchar), NULL);
|
||||
bc_vec_pushByte(&buffer, '\0');
|
||||
bc_vec_init(&vm.buffer, sizeof(uchar), NULL);
|
||||
bc_vec_init(&vm.line_buf, sizeof(uchar), NULL);
|
||||
BC_SETJMP_LOCKED(err);
|
||||
BC_SIG_UNLOCK;
|
||||
|
||||
// This label is because errors can cause jumps to end up at the err label
|
||||
// below. If that happens, and the error should be cleared and execution
|
||||
// continue, then we need to jump back.
|
||||
restart:
|
||||
|
||||
// This loop is complex because the vm tries not to send any lines that end
|
||||
// with a backslash to the parser. The reason for that is because the parser
|
||||
// treats a backslash+newline combo as whitespace, per the bc spec. In that
|
||||
// case, and for strings and comments, the parser will expect more stuff.
|
||||
while ((!(s = bc_read_line(&buf, ">>> ")) ||
|
||||
(vm.eof = (s == BC_STATUS_EOF))) && buf.len > 1)
|
||||
{
|
||||
char c2, *str = buf.v;
|
||||
size_t i, len = buf.len - 1;
|
||||
while (bc_vm_readLine(clear)) {
|
||||
|
||||
for (i = 0; i < len; ++i) {
|
||||
size_t len = vm.buffer.len - 1;
|
||||
const char *str = vm.buffer.v;
|
||||
|
||||
bool notend = len > i + 1;
|
||||
uchar c = (uchar) str[i];
|
||||
clear = (len < 2 || str[len - 2] != '\\' || str[len - 1] != '\n');
|
||||
if (!clear) continue;
|
||||
|
||||
hash = (!comment && !string && ((hash && c != '\n') ||
|
||||
(!hash && c == '#')));
|
||||
|
||||
if (!hash && !comment && (i - 1 > len || str[i - 1] != '\\')) {
|
||||
if (BC_IS_BC) string ^= (c == '"');
|
||||
else if (c == ']') string -= 1;
|
||||
else if (c == '[') string += 1;
|
||||
}
|
||||
|
||||
if (BC_IS_BC && !hash && !string && notend) {
|
||||
|
||||
c2 = str[i + 1];
|
||||
|
||||
if (c == '/' && !comment && c2 == '*') {
|
||||
comment = true;
|
||||
i += 1;
|
||||
}
|
||||
else if (c == '*' && comment && c2 == '/') {
|
||||
comment = false;
|
||||
i += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bc_vec_concat(&buffer, buf.v);
|
||||
|
||||
if (string || comment) continue;
|
||||
if (len >= 2 && str[len - 2] == '\\' && str[len - 1] == '\n') continue;
|
||||
#if BC_ENABLE_HISTORY
|
||||
if (vm.history.stdin_has_data) continue;
|
||||
#endif // BC_ENABLE_HISTORY
|
||||
|
||||
bc_vm_process(buffer.v);
|
||||
bc_vec_empty(&buffer);
|
||||
bc_vm_process(vm.buffer.v, true);
|
||||
|
||||
if (vm.eof) break;
|
||||
else bc_vm_clean();
|
||||
}
|
||||
|
||||
if (!BC_STATUS_IS_ERROR(s)) {
|
||||
if (BC_ERR(comment))
|
||||
bc_parse_err(&vm.prs, BC_ERR_PARSE_COMMENT);
|
||||
else if (BC_ERR(string))
|
||||
bc_parse_err(&vm.prs, BC_ERR_PARSE_STRING);
|
||||
#if BC_ENABLED
|
||||
else if (BC_IS_BC) bc_vm_endif();
|
||||
if (!BC_STATUS_IS_ERROR(s) && BC_IS_BC) bc_vm_endif();
|
||||
#endif // BC_ENABLED
|
||||
}
|
||||
|
||||
err:
|
||||
BC_SIG_MAYLOCK;
|
||||
|
@ -869,14 +855,14 @@ err:
|
|||
#endif // !BC_ENABLE_MEMCHECK
|
||||
|
||||
if (!vm.status && !vm.eof) {
|
||||
bc_vec_empty(&buffer);
|
||||
bc_vec_empty(&vm.buffer);
|
||||
BC_LONGJMP_STOP;
|
||||
BC_SIG_UNLOCK;
|
||||
goto restart;
|
||||
}
|
||||
|
||||
bc_vec_free(&buf);
|
||||
bc_vec_free(&buffer);
|
||||
bc_vec_free(&vm.line_buf);
|
||||
bc_vec_free(&vm.buffer);
|
||||
|
||||
BC_LONGJMP_CONT;
|
||||
}
|
||||
|
@ -885,7 +871,7 @@ err:
|
|||
static void bc_vm_load(const char *name, const char *text) {
|
||||
|
||||
bc_lex_file(&vm.prs.l, name);
|
||||
bc_parse_text(&vm.prs, text);
|
||||
bc_parse_text(&vm.prs, text, false);
|
||||
|
||||
while (vm.prs.l.t != BC_LEX_EOF) vm.parse(&vm.prs);
|
||||
}
|
||||
|
@ -984,7 +970,7 @@ static void bc_vm_exec(void) {
|
|||
|
||||
more = bc_read_buf(&buf, vm.exprs.v, &len);
|
||||
bc_vec_pushByte(&buf, '\0');
|
||||
bc_vm_process(buf.v);
|
||||
bc_vm_process(buf.v, false);
|
||||
|
||||
bc_vec_popAll(&buf);
|
||||
|
||||
|
|
Loadingâ¦
Reference in New Issue