Skip to content

Commit 1353882

Browse files
Parsing Error Detection (#31)
* add error detection in parsing table, parser * added more parsing tests * minor bug fixes * fixed linting bug * removed unnecessary declarations * updates Co-authored-by: noob77777 <[email protected]>
1 parent 61d53d9 commit 1353882

File tree

7 files changed

+206
-17
lines changed

7 files changed

+206
-17
lines changed

src/include/parser/parser.h

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
#include <stack>
55
#include <string>
6+
#include <utility>
67
#include <vector>
78

89
#include "parser/parsing_table.h"
@@ -49,6 +50,11 @@ class Parser {
4950
*/
5051
std::vector<std::string> current_string_;
5152

53+
/**
54+
* Errors incurred during the parsing of the given input file.
55+
*/
56+
std::vector<std::string> parser_errors_;
57+
5258
public:
5359
/**
5460
* Constructor for initializing stack and other members.
@@ -79,7 +85,8 @@ class Parser {
7985
void SetInputString(std::vector<std::string> inps);
8086
void SetParsingTable(ParsingTable table);
8187
void SetStartSymbol(std::string start);
82-
[[nodiscard]] const std::vector<int> &GetProductionHistory();
88+
[[nodiscard]] const std::vector<int> &GetProductionHistory() { return production_history_; }
89+
[[nodiscard]] const std::vector<std::string> &GetParserErrors() { return parser_errors_; }
8390
};
8491
} // namespace parser
8592

src/include/parser/parsing_table.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,17 @@ class ParsingTable {
5252
*/
5353
std::vector<std::string> non_terminals_;
5454

55+
/**
56+
* Stores the errors if any, in case of a non - LL(1)
57+
* grammar during the construction of the parsing table.
58+
*/
59+
std::vector<std::string> errors_;
60+
61+
/**
62+
* A helper function to generate error message.
63+
*/
64+
static std::string GenerateErrorMessage(const std::string &production, const std::string &symbol);
65+
5566
public:
5667
/**
5768
* Default constructor
@@ -88,6 +99,7 @@ class ParsingTable {
8899
[[nodiscard]] const std::vector<std::string> &GetNonTerminals() { return non_terminals_; }
89100
[[nodiscard]] const std::vector<std::string> &GetTerminals() { return terminals_; }
90101
[[nodiscard]] const Table &GetTable() { return table_; }
102+
[[nodiscard]] const std::vector<std::string> &GetErrors() { return errors_; }
91103
};
92104

93105
} // namespace parser

src/main/main.cpp

Lines changed: 35 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,10 +25,14 @@
2525
* jucc begins execution here.
2626
*/
2727
auto main(int argc, char *argv[]) -> int {
28-
// print a hello world message
28+
/* print a hello world message */
2929
std::cout << jucc::Hello();
30-
jucc::InputParser input_parser = jucc::InputParser(argc, argv);
3130

31+
/**
32+
* InputParser parses the cmd line arguments and returns
33+
* input file path and grammar file path
34+
*/
35+
jucc::InputParser input_parser = jucc::InputParser(argc, argv);
3236
std::string file_grammar = input_parser.GetArgument("-g");
3337
if (file_grammar.empty()) {
3438
std::cout << "jucc: usage: jucc -g <grammar_file> -f <input_file>\n";
@@ -40,25 +44,33 @@ auto main(int argc, char *argv[]) -> int {
4044
return 0;
4145
}
4246

47+
/* Parse the grammar file and check for formatting errors */
4348
jucc::grammar::Parser grammar_parser = jucc::grammar::Parser(file_grammar.c_str());
4449
if (!grammar_parser.Parse()) {
4550
std::cout << "jucc: " << grammar_parser.GetError() << '\n';
4651
return 0;
4752
}
4853

54+
/* Check if the input file path is good */
4955
std::ifstream ifs(file_input);
5056
if (!ifs.good()) {
5157
std::cout << "jucc: cannot read input file, bad file!\n";
5258
return 0;
5359
}
5460

61+
/**
62+
* Get the parsed grammar production and process it
63+
* Steps include:
64+
* 1. Left recursion removal
65+
* 2. Left factoring
66+
*/
5567
jucc::grammar::Productions raw_productions = grammar_parser.GetProductions();
5668
jucc::grammar::Productions productions = jucc::utils::RemoveAllPossibleAmbiguity(raw_productions);
5769

70+
/* Calculate first and follows to build the LL(1) parsing table */
5871
auto nullables = jucc::utils::CalcNullables(productions);
5972
auto firsts = jucc::utils::CalcFirsts(productions, nullables);
6073
auto follows = jucc::utils::CalcFollows(productions, firsts, nullables, grammar_parser.GetStartSymbol());
61-
6274
auto terminals = grammar_parser.GetTerminals();
6375
auto non_terminals = jucc::utils::GetAllNonTerminals(productions);
6476

@@ -68,14 +80,25 @@ auto main(int argc, char *argv[]) -> int {
6880
parsing_table.SetProductions(productions);
6981
parsing_table.BuildTable();
7082

71-
// use lexer to get input tokens
83+
/* Check for errors in grammar and exit if errors exist */
84+
auto err = parsing_table.GetErrors();
85+
if (!err.empty()) {
86+
std::cout << "jucc: ";
87+
for (auto &e : err) {
88+
std::cout << e << '\n';
89+
}
90+
return 0;
91+
}
92+
93+
/* Use Lexer to get input tokens */
7294
std::vector<std::string> input_tokens;
7395
jucc::lexer::Lexer lexer = jucc::lexer::Lexer();
7496
int token;
7597
while ((token = lexer.GetToken(ifs)) != jucc::lexer::TOK_EOF) {
7698
input_tokens.emplace_back(jucc::lexer::Lexer::GetTokenType(token));
7799
}
78100

101+
/* Parse the input file using the parsing table and report errors */
79102
jucc::parser::Parser parser = jucc::parser::Parser();
80103
parser.SetInputString(input_tokens);
81104
parser.SetStartSymbol(grammar_parser.GetStartSymbol());
@@ -85,5 +108,13 @@ auto main(int argc, char *argv[]) -> int {
85108
parser.ParseNextStep();
86109
}
87110

111+
err = parser.GetParserErrors();
112+
if (!err.empty()) {
113+
std::cout << "jucc: ";
114+
for (auto &e : err) {
115+
std::cout << e << '\n';
116+
}
117+
}
118+
88119
return 0;
89120
}

src/parser/parser.cpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,9 @@ void Parser::ParseNextStep() {
5555
ParsingTable::Table table = table_.GetTable();
5656
// skip tokens until it is in the first or is a synch token
5757
while (!IsComplete() && table[top_symbol][current_token] == std::string(ERROR_TOKEN)) {
58+
std::string ret;
59+
ret += "parser error: at symbol: " + current_token;
60+
parser_errors_.push_back(ret);
5861
DoNextStep();
5962
if (current_step_ < static_cast<int>(current_string_.size())) {
6063
current_token = current_string_[current_step_];
@@ -63,6 +66,9 @@ void Parser::ParseNextStep() {
6366
if (!IsComplete()) {
6467
// if SYNCH TOKEN - We skip the current symbol on stack top
6568
if (table[top_symbol][current_token] == std::string(SYNCH_TOKEN)) {
69+
std::string ret;
70+
ret += "parser error: at symbol: " + current_token;
71+
parser_errors_.push_back(ret);
6672
stack_.pop();
6773
} else {
6874
// check if current stack top matches the current token
@@ -90,6 +96,4 @@ void Parser::ParseNextStep() {
9096
}
9197
}
9298

93-
const std::vector<int> &Parser::GetProductionHistory() { return production_history_; }
94-
9599
} // namespace jucc::parser

src/parser/parsing_table.cpp

Lines changed: 29 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,16 @@
55

66
namespace jucc::parser {
77

8+
std::string ParsingTable::GenerateErrorMessage(const std::string &production, const std::string &symbol) {
9+
std::string ret;
10+
ret += "parsing table error: duplicate entry in parsing table, ";
11+
ret += "production: ";
12+
ret = ret.append(production);
13+
ret += " symbol: ";
14+
ret = ret.append(symbol);
15+
return ret;
16+
}
17+
818
void ParsingTable::BuildTable() {
919
// fill initially all errors
1020
for (auto &nt : non_terminals_) {
@@ -17,6 +27,9 @@ void ParsingTable::BuildTable() {
1727
for (auto &nt : non_terminals_) {
1828
if (follows_.count(nt) != 0U) {
1929
for (const auto &symbol : follows_[nt]) {
30+
if (table_[nt][symbol] != std::string(ERROR_TOKEN)) {
31+
errors_.push_back(GenerateErrorMessage(nt, symbol));
32+
}
2033
table_[nt][symbol] = std::string(SYNCH_TOKEN);
2134
}
2235
}
@@ -27,32 +40,38 @@ void ParsingTable::BuildTable() {
2740
auto rules = productions_[prod_no].GetRules();
2841
for (int rule_no = 0; rule_no < static_cast<int>(rules.size()); rule_no++) {
2942
std::string first_entity = rules[rule_no].GetEntities()[0];
30-
3143
// check if first_entity is terminal
3244
if (std::find(terminals_.begin(), terminals_.end(), first_entity) != terminals_.end()) {
45+
std::string entry = table_[productions_[prod_no].GetParent()][first_entity];
46+
if ((entry != std::string(ERROR_TOKEN)) && (entry != std::string(SYNCH_TOKEN))) {
47+
errors_.push_back(GenerateErrorMessage(productions_[prod_no].GetParent(), first_entity));
48+
}
49+
3350
table_[productions_[prod_no].GetParent()][first_entity] = std::to_string(prod_no * 100 + rule_no);
3451
} else if (std::find(non_terminals_.begin(), non_terminals_.end(), first_entity) != non_terminals_.end()) {
3552
// first entity is a non-terminal
3653
if (firsts_.count(first_entity) != 0U) {
3754
for (auto &symbol : firsts_[first_entity]) {
3855
if (symbol != std::string(grammar::EPSILON)) {
39-
table_[productions_[prod_no].GetParent()][symbol] = std::to_string(prod_no * 100 + rule_no);
40-
}
41-
}
42-
// if epsilon present add production under follow(A) symbols
43-
if (std::find(firsts_[first_entity].begin(), firsts_[first_entity].end(), std::string(grammar::EPSILON)) !=
44-
firsts_[first_entity].end()) {
45-
if (follows_.count(productions_[prod_no].GetParent()) != 0U) {
46-
for (auto &symbol : follows_[productions_[prod_no].GetParent()]) {
47-
table_[productions_[prod_no].GetParent()][symbol] = std::to_string(prod_no * 100 + rule_no);
56+
std::string entry = table_[productions_[prod_no].GetParent()][symbol];
57+
if ((entry != std::string(ERROR_TOKEN)) && (entry != std::string(SYNCH_TOKEN))) {
58+
errors_.push_back(GenerateErrorMessage(productions_[prod_no].GetParent(), symbol));
4859
}
60+
61+
table_[productions_[prod_no].GetParent()][symbol] = std::to_string(prod_no * 100 + rule_no);
4962
}
5063
}
5164
}
65+
5266
} else if (first_entity == std::string(grammar::EPSILON)) {
5367
// first entity is epsilon
5468
if (follows_.count(productions_[prod_no].GetParent()) != 0U) {
5569
for (auto &symbol : follows_[productions_[prod_no].GetParent()]) {
70+
std::string entry = table_[productions_[prod_no].GetParent()][symbol];
71+
if ((entry != std::string(ERROR_TOKEN)) && (entry != std::string(SYNCH_TOKEN))) {
72+
errors_.push_back(GenerateErrorMessage(productions_[prod_no].GetParent(), symbol));
73+
}
74+
5675
table_[productions_[prod_no].GetParent()][symbol] = std::to_string(prod_no * 100 + rule_no);
5776
}
5877
}

test/parser/parser_test.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,10 @@ TEST(parser, Parser1) {
6262
pars.ParseNextStep();
6363

6464
std::vector<int> history;
65+
std::vector<std::string> parser_errors;
6566
history = pars.GetProductionHistory();
67+
parser_errors = pars.GetParserErrors();
68+
ASSERT_EQ(parser_errors.size(), 1);
6669
// Stack - T E' $
6770
ASSERT_EQ(history.size(), 1);
6871
ASSERT_EQ(history[0], 0);
@@ -99,6 +102,9 @@ TEST(parser, Parser1) {
99102
pars.ParseNextStep();
100103
history = pars.GetProductionHistory();
101104

105+
parser_errors = pars.GetParserErrors();
106+
ASSERT_EQ(parser_errors.size(), 2);
107+
102108
// Stack - T' E' $
103109
pars.ParseNextStep();
104110
history = pars.GetProductionHistory();

0 commit comments

Comments
 (0)