Skip to content

Commit 3a236ef

Browse files
LL1 parser fix (#33)
* cool boii🍺 * added parser test and symbol table errors * fixed format issue * minor fix Co-authored-by: Shuvayan Ghosh Dastidar <[email protected]>
1 parent b7d55cf commit 3a236ef

File tree

5 files changed

+229
-6
lines changed

5 files changed

+229
-6
lines changed

src/include/parser/parser.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,11 @@ class Parser {
5555
*/
5656
std::vector<std::string> parser_errors_;
5757

58+
/**
59+
* Helper function to generate error messages for parsing.
60+
*/
61+
static std::string GenerateErrorMessage(const std::string &current_token);
62+
5863
public:
5964
/**
6065
* Constructor for initializing stack and other members.

src/main/main.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,9 +94,20 @@ auto main(int argc, char *argv[]) -> int {
9494
std::vector<std::string> input_tokens;
9595
jucc::lexer::Lexer lexer = jucc::lexer::Lexer();
9696
int token;
97+
9798
while ((token = lexer.GetToken(ifs)) != jucc::lexer::TOK_EOF) {
9899
input_tokens.emplace_back(jucc::lexer::Lexer::GetTokenType(token));
99100
}
101+
std::vector<std::string> errors = lexer.GetUndeclaredSymbolErrors();
102+
errors.insert(errors.end(), lexer.GetDuplicateSymbolErrors().begin(), lexer.GetDuplicateSymbolErrors().end());
103+
104+
if (!errors.empty()) {
105+
std::cout << "jucc: ";
106+
for (auto &e : errors) {
107+
std::cout << e << '\n';
108+
}
109+
return 0;
110+
}
100111

101112
/* Parse the input file using the parsing table and report errors */
102113
jucc::parser::Parser parser = jucc::parser::Parser();

src/parser/parser.cpp

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,12 @@ Parser::Parser() {
1111
current_string_.clear();
1212
}
1313

14+
std::string Parser::GenerateErrorMessage(const std::string &current_token) {
15+
std::string ret_string;
16+
ret_string += "parser error: at symbol: " + current_token;
17+
return ret_string;
18+
}
19+
1420
void Parser::SetInputString(std::vector<std::string> inps) {
1521
if (!inps.empty()) {
1622
input_string_ = std::move(inps);
@@ -55,9 +61,7 @@ void Parser::ParseNextStep() {
5561
ParsingTable::Table table = table_.GetTable();
5662
// skip tokens until it is in the first or is a synch token
5763
while (!IsComplete() && table[top_symbol][current_token] == std::string(ERROR_TOKEN)) {
58-
std::string ret;
59-
ret += "parser error: at symbol: " + current_token;
60-
parser_errors_.push_back(ret);
64+
parser_errors_.push_back(GenerateErrorMessage(current_token));
6165
DoNextStep();
6266
if (current_step_ < static_cast<int>(current_string_.size())) {
6367
current_token = current_string_[current_step_];
@@ -66,15 +70,18 @@ void Parser::ParseNextStep() {
6670
if (!IsComplete()) {
6771
// if SYNCH TOKEN - We skip the current symbol on stack top
6872
if (table[top_symbol][current_token] == std::string(SYNCH_TOKEN)) {
69-
std::string ret;
70-
ret += "parser error: at symbol: " + current_token;
71-
parser_errors_.push_back(ret);
73+
parser_errors_.push_back(GenerateErrorMessage(current_token));
7274
stack_.pop();
7375
} else {
76+
auto terminals = table_.GetTerminals();
7477
// check if current stack top matches the current token
7578
if (top_symbol == current_token) {
7679
stack_.pop();
7780
DoNextStep();
81+
} else if (std::find(terminals.begin(), terminals.end(), top_symbol) != terminals.end() &&
82+
std::find(terminals.begin(), terminals.end(), current_token) != terminals.end()) {
83+
parser_errors_.push_back(GenerateErrorMessage(current_token));
84+
DoNextStep();
7885
} else {
7986
// we expand the production
8087
auto prod_rule = table_.GetEntry(top_symbol, current_token);

test/parser/grammar.g

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
## This is the grammar file for JuCC
2+
## Edit this file to make changes to the parsing grammar
3+
## Epsilon is represented by special string EPSILON
4+
5+
## Terminals
6+
%terminals
7+
else float if int void
8+
( ) { } * + - / % ,
9+
<< >> < > <= >= = == != ;
10+
identifier integer_constant float_constant
11+
main cin cout
12+
%
13+
14+
## Non Terminals
15+
%non_terminals
16+
<primary_expression> <constant> <unary_operator> <type_specifier>
17+
<unary_expression> <multiplicative_expression> <additive_expression>
18+
<shift_expression> <relational_expression>
19+
<equality_expression> <assignment_expression> <expression>
20+
<declaration> <init_declarator_list> <init_declarator>
21+
<initializer> <declarator> <direct_declarator>
22+
<statement> <compound_statement> <block_item_list> <block_item>
23+
<expression_statement> <selection_statement> <program>
24+
%
25+
26+
## Start Symbol
27+
%start
28+
<program>
29+
%
30+
31+
## Grammar for the language
32+
%rules
33+
## Expressions
34+
<primary_expression> : identifier
35+
<primary_expression> : <constant>
36+
<primary_expression> : ( <expression> )
37+
<constant> : integer_constant
38+
<constant> : float_constant
39+
<unary_operator> : +
40+
<unary_operator> : -
41+
<unary_expression> : <primary_expression>
42+
<unary_expression> : <unary_operator> <primary_expression>
43+
<multiplicative_expression> : <unary_expression>
44+
<multiplicative_expression> : <multiplicative_expression> * <unary_expression>
45+
<multiplicative_expression> : <multiplicative_expression> / <unary_expression>
46+
<multiplicative_expression> : <multiplicative_expression> % <unary_expression>
47+
<additive_expression> : <multiplicative_expression>
48+
<additive_expression> : <additive_expression> + <multiplicative_expression>
49+
<additive_expression> : <additive_expression> - <multiplicative_expression>
50+
<shift_expression> : <additive_expression>
51+
<shift_expression> : cin >> <additive_expression>
52+
<shift_expression> : cout << <additive_expression>
53+
<shift_expression> : <shift_expression> << <additive_expression>
54+
<shift_expression> : <shift_expression> >> <additive_expression>
55+
<relational_expression> : <shift_expression>
56+
<relational_expression> : <relational_expression> < <shift_expression>
57+
<relational_expression> : <relational_expression> > <shift_expression>
58+
<relational_expression> : <relational_expression> <= <shift_expression>
59+
<relational_expression> : <relational_expression> >= <shift_expression>
60+
<equality_expression> : <relational_expression>
61+
<equality_expression> : <equality_expression> == <relational_expression>
62+
<equality_expression> : <equality_expression> != <relational_expression>
63+
<assignment_expression> : <equality_expression>
64+
<expression> : <assignment_expression>
65+
66+
## Declarations
67+
<declaration> : <type_specifier> <init_declarator_list> ;
68+
<init_declarator_list> : <init_declarator>
69+
<init_declarator_list> : <init_declarator> , <init_declarator_list>
70+
<init_declarator_list> : EPSILON
71+
<init_declarator> : <declarator>
72+
<init_declarator> : <declarator> = <initializer>
73+
<type_specifier> : void
74+
<type_specifier> : int
75+
<type_specifier> : float
76+
<declarator> : <direct_declarator>
77+
<direct_declarator> : identifier
78+
<direct_declarator> : ( <declarator> )
79+
<initializer> : <assignment_expression>
80+
81+
## Statements
82+
<statement> : <compound_statement>
83+
<statement> : <expression_statement>
84+
<statement> : <selection_statement>
85+
<compound_statement> : { <block_item_list> }
86+
<block_item_list> : <block_item>
87+
<block_item_list> : <block_item> <block_item_list>
88+
<block_item_list> : EPSILON
89+
<block_item> : <declaration>
90+
<block_item> : <statement>
91+
<expression_statement> : <expression> ;
92+
<expression_statement> : ;
93+
<selection_statement> : if ( <expression> ) <compound_statement>
94+
<selection_statement> : if ( <expression> ) <compound_statement> else <compound_statement>
95+
96+
## Main
97+
<program> : <type_specifier> main ( ) <compound_statement>
98+
%

test/parser/parser_test.cpp

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
#include "grammar/grammar.h"
44
#include "gtest/gtest.h"
55
#include "parser/parsing_table.h"
6+
#include "utils/utils.h"
67

78
using jucc::parser::Parser;
89
using jucc::parser::ParsingTable;
@@ -162,3 +163,104 @@ TEST(parser, Parser1) {
162163

163164
ASSERT_TRUE(pars.IsComplete());
164165
}
166+
167+
TEST(parser, Parser2) {
168+
grammar::Parser grammar_parser = grammar::Parser("../test/parser/grammar.g");
169+
ASSERT_TRUE(grammar_parser.Parse());
170+
171+
grammar::Productions raw_productions = grammar_parser.GetProductions();
172+
grammar::Productions productions = utils::RemoveAllPossibleAmbiguity(raw_productions);
173+
174+
/* Calculate first and follows to build the LL(1) parsing table */
175+
auto nullables = utils::CalcNullables(productions);
176+
auto firsts = utils::CalcFirsts(productions, nullables);
177+
auto follows = utils::CalcFollows(productions, firsts, nullables, grammar_parser.GetStartSymbol());
178+
auto terminals = grammar_parser.GetTerminals();
179+
auto non_terminals = utils::GetAllNonTerminals(productions);
180+
181+
ParsingTable parsing_table = ParsingTable(terminals, non_terminals);
182+
parsing_table.SetFirsts(firsts);
183+
parsing_table.SetFollows(follows);
184+
parsing_table.SetProductions(productions);
185+
parsing_table.BuildTable();
186+
ASSERT_EQ(parsing_table.GetErrors().size(), 0);
187+
std::vector<std::string> input_string = {"int", "main", "(", ")", "{", "if", "integer_constant", ";", "}"};
188+
Parser pars = Parser();
189+
190+
pars.SetInputString(input_string);
191+
pars.SetStartSymbol(grammar_parser.GetStartSymbol());
192+
pars.ResetParsing();
193+
pars.SetParsingTable(parsing_table);
194+
195+
std::vector<std::string> parser_errors;
196+
197+
pars.ParseNextStep();
198+
parser_errors = pars.GetParserErrors();
199+
ASSERT_EQ(parser_errors.size(), 0);
200+
201+
pars.ParseNextStep();
202+
parser_errors = pars.GetParserErrors();
203+
ASSERT_EQ(parser_errors.size(), 0);
204+
205+
pars.ParseNextStep();
206+
parser_errors = pars.GetParserErrors();
207+
ASSERT_EQ(parser_errors.size(), 0);
208+
209+
pars.ParseNextStep();
210+
parser_errors = pars.GetParserErrors();
211+
ASSERT_EQ(parser_errors.size(), 0);
212+
213+
pars.ParseNextStep();
214+
parser_errors = pars.GetParserErrors();
215+
ASSERT_EQ(parser_errors.size(), 0);
216+
217+
pars.ParseNextStep();
218+
parser_errors = pars.GetParserErrors();
219+
ASSERT_EQ(parser_errors.size(), 0);
220+
221+
pars.ParseNextStep();
222+
parser_errors = pars.GetParserErrors();
223+
ASSERT_EQ(parser_errors.size(), 0);
224+
225+
pars.ParseNextStep();
226+
parser_errors = pars.GetParserErrors();
227+
ASSERT_EQ(parser_errors.size(), 0);
228+
229+
pars.ParseNextStep();
230+
parser_errors = pars.GetParserErrors();
231+
ASSERT_EQ(parser_errors.size(), 0);
232+
233+
pars.ParseNextStep();
234+
parser_errors = pars.GetParserErrors();
235+
ASSERT_EQ(parser_errors.size(), 0);
236+
237+
pars.ParseNextStep();
238+
parser_errors = pars.GetParserErrors();
239+
ASSERT_EQ(parser_errors.size(), 0);
240+
241+
pars.ParseNextStep();
242+
parser_errors = pars.GetParserErrors();
243+
ASSERT_EQ(parser_errors.size(), 0);
244+
245+
pars.ParseNextStep();
246+
parser_errors = pars.GetParserErrors();
247+
ASSERT_EQ(parser_errors.size(), 0);
248+
249+
pars.ParseNextStep();
250+
parser_errors = pars.GetParserErrors();
251+
ASSERT_EQ(parser_errors.size(), 1);
252+
253+
pars.ParseNextStep();
254+
parser_errors = pars.GetParserErrors();
255+
ASSERT_EQ(parser_errors.size(), 2);
256+
257+
pars.ParseNextStep();
258+
parser_errors = pars.GetParserErrors();
259+
ASSERT_EQ(parser_errors.size(), 3);
260+
261+
pars.ParseNextStep();
262+
parser_errors = pars.GetParserErrors();
263+
ASSERT_EQ(parser_errors.size(), 4);
264+
265+
ASSERT_TRUE(pars.IsComplete());
266+
}

0 commit comments

Comments
 (0)