Skip to content

Commit 61d53d9

Browse files
authored
Update CalcFirsts (#30)
* Changed: firsts only returns non-terminals * Changed: test names now uniform
1 parent b5234c5 commit 61d53d9

File tree

10 files changed

+94
-63
lines changed

10 files changed

+94
-63
lines changed

src/include/utils/first_follow.h

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,8 @@ std::unordered_map<std::string, bool> CalcNullables(const grammar::Productions &
2424
using SymbolsMap = std::unordered_map<std::string, std::vector<std::string>>;
2525

2626
/**
27-
* For each non terminal in given set of productions computes Firsts, and in case of terminals
28-
* it returns the same.
29-
* @returns an unordered_map keyed by the each symbol in the grammar of a vector of terminals.
27+
* For each non terminal in given set of productions computes Firsts.
28+
* @returns an unordered_map keyed by non terminals in the grammar of a vector of terminals.
3029
*/
3130
SymbolsMap CalcFirsts(const grammar::Productions & /*augmented_grammar*/,
3231
const std::unordered_map<std::string, bool> & /*nullables*/);

src/include/utils/utils.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,11 @@ grammar::Productions RemoveAllPossibleAmbiguity(const grammar::Productions & /*p
2323
*/
2424
std::vector<std::string> GetAllNonTerminals(const grammar::Productions & /*prods*/);
2525

26+
/**
27+
* @returns a list of terminal symbols from the given set of productions.
28+
*/
29+
std::vector<std::string> GetAllTerminals(const grammar::Productions & /*prods*/);
30+
2631
} // namespace utils
2732
} // namespace jucc
2833
#endif

src/main/main.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,6 @@ auto main(int argc, char *argv[]) -> int {
5353
}
5454

5555
jucc::grammar::Productions raw_productions = grammar_parser.GetProductions();
56-
5756
jucc::grammar::Productions productions = jucc::utils::RemoveAllPossibleAmbiguity(raw_productions);
5857

5958
auto nullables = jucc::utils::CalcNullables(productions);

src/utils/first_follow.cpp

Lines changed: 29 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -2,21 +2,15 @@
22

33
#include <functional>
44

5-
#include "utils/left_factoring.h"
5+
#include "utils/utils.h"
66

77
namespace jucc::utils {
88
std::unordered_map<std::string, bool> CalcNullables(const grammar::Productions &augmented_grammar) {
99
std::unordered_map<std::string, bool> nullables;
1010
// set all terminals to non - nullable
11-
for (const auto &production : augmented_grammar) {
12-
for (const auto &rule : production.GetRules()) {
13-
for (const auto &symbol : rule.GetEntities()) {
14-
// if symbol is not present as a parent in grammar, then it is a terminal
15-
if (!HasParent(augmented_grammar, symbol)) {
16-
nullables[symbol] = false;
17-
}
18-
}
19-
}
11+
auto terminals = GetAllTerminals(augmented_grammar);
12+
for (const auto &term : terminals) {
13+
nullables[term] = false;
2014
}
2115

2216
// EPSILON is nullable by definition
@@ -77,17 +71,14 @@ SymbolsMap CalcFirsts(const grammar::Productions &augmented_grammar,
7771
// finished -> used to check if any new symbols are added to any FIRST(non-terminal) in a particular iteration
7872
// useful in case of cycles in productions
7973
bool finished = false;
80-
for (const auto &production : augmented_grammar) {
81-
for (const auto &rules : production.GetRules()) {
82-
for (const auto &symbol : rules.GetEntities()) {
83-
if (!HasParent(augmented_grammar, symbol)) {
84-
// if symbol is not present as a parent in grammar, then it is a terminal
85-
// if X is a terminal, then FIRST(X) = {X}
86-
firsts[symbol] = {symbol};
87-
}
88-
}
89-
}
74+
// store terminals to later remove from final results
75+
// terminals used as base case in recursion
76+
auto terminals = GetAllTerminals(augmented_grammar);
77+
for (const auto &term : terminals) {
78+
firsts[term] = {term};
9079
}
80+
// add base case for EPSILON
81+
firsts[std::string(grammar::EPSILON)] = {std::string(grammar::EPSILON)};
9182

9283
/* at this point, map of FIRST only contains terminal -> {terminal} mappings */
9384
std::function<std::vector<std::string>(const std::string &, std::vector<std::string> &)> calc_recursive;
@@ -96,7 +87,7 @@ SymbolsMap CalcFirsts(const grammar::Productions &augmented_grammar,
9687
// lambda function that returns vector<string>
9788
calc_recursive = [&](const std::string &key, std::vector<std::string> &path) {
9889
// FIRST(terminal) = {terminal}
99-
if (!HasParent(augmented_grammar, key)) {
90+
if (!grammar::HasParent(augmented_grammar, key)) {
10091
return firsts[key];
10192
}
10293

@@ -170,6 +161,12 @@ SymbolsMap CalcFirsts(const grammar::Productions &augmented_grammar,
170161
}
171162
}
172163

164+
// remove terminals and EPSILON from final results
165+
for (const auto &term : terminals) {
166+
firsts.erase(term);
167+
}
168+
firsts.erase(std::string(grammar::EPSILON));
169+
173170
return firsts;
174171
}
175172

@@ -178,6 +175,15 @@ SymbolsMap CalcFollows(const grammar::Productions &augmented_grammar, const Symb
178175
SymbolsMap follows;
179176
bool finished = false;
180177

178+
// calculating augmented firsts with terminals and EPSILON
179+
auto augmented_firsts = firsts;
180+
augmented_firsts[std::string(grammar::EPSILON)] = {std::string(grammar::EPSILON)};
181+
auto terminals = GetAllTerminals(augmented_grammar);
182+
for (const auto &term : terminals) {
183+
augmented_firsts[term] = {term};
184+
}
185+
186+
// initialize follows
181187
for (const auto &production : augmented_grammar) {
182188
if (production.GetParent() == start_symbol) {
183189
// if production head / parent is the start_symbol, then FOLLOW(head) = {$}
@@ -197,11 +203,11 @@ SymbolsMap CalcFollows(const grammar::Productions &augmented_grammar, const Symb
197203
// if A -> alpha B beta is a production, where B -> non-terminal & alpha, beta -> set of symbols
198204
// then FOLLOW(B) contains {FIRST(beta) - EPSILON}
199205
// if mid aka current symbol is terminal, then ignore
200-
if (HasParent(augmented_grammar, mid)) {
206+
if (grammar::HasParent(augmented_grammar, mid)) {
201207
auto next_itr = symbol_itr + 1;
202208
for (; next_itr != rule.GetEntities().end(); next_itr++) {
203209
// next_itr -> iterates through every symbol in beta
204-
for (const auto &der : firsts.at(*next_itr)) {
210+
for (const auto &der : augmented_firsts.at(*next_itr)) {
205211
// discarding EPSILON
206212
if (der != std::string(grammar::EPSILON) &&
207213
find(follows[mid].begin(), follows[mid].end(), der) == follows[mid].end()) {

src/utils/utils.cpp

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
#include "utils/utils.h"
22

3+
#include <algorithm>
4+
35
namespace jucc::utils {
46
grammar::Productions RemoveAllPossibleAmbiguity(const grammar::Productions &prods) {
57
grammar::Productions clean;
@@ -10,6 +12,7 @@ grammar::Productions RemoveAllPossibleAmbiguity(const grammar::Productions &prod
1012
clean.insert(clean.end(), nag.begin(), nag.end());
1113
}
1214
}
15+
1316
return clean;
1417
}
1518

@@ -18,7 +21,26 @@ std::vector<std::string> GetAllNonTerminals(const grammar::Productions &prods) {
1821
for (const auto &prod : prods) {
1922
non_terminals.push_back(prod.GetParent());
2023
}
24+
2125
return non_terminals;
2226
}
2327

28+
std::vector<std::string> GetAllTerminals(const grammar::Productions &prods) {
29+
std::vector<std::string> terminals;
30+
for (const auto &production : prods) {
31+
for (const auto &rule : production.GetRules()) {
32+
for (const auto &symbol : rule.GetEntities()) {
33+
// if symbol is not present as a parent in grammar, then it is a terminal
34+
// EPSILION is ignored
35+
if (!grammar::HasParent(prods, symbol) && symbol != std::string(grammar::EPSILON)) {
36+
terminals.push_back(symbol);
37+
}
38+
}
39+
}
40+
}
41+
42+
terminals.erase(std::unique(terminals.begin(), terminals.end()), terminals.end());
43+
return terminals;
44+
}
45+
2446
} // namespace jucc::utils

test/grammar/grammar_test.cpp

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
using jucc::grammar::Parser;
66

7-
TEST(grammar, parser0) {
7+
TEST(grammar, Parser0) {
88
Parser parser = Parser("../test/grammar/grammar_test_0.g");
99
ASSERT_EQ(true, parser.Parse());
1010
ASSERT_EQ("<primary_expression>", parser.GetStartSymbol());
@@ -56,115 +56,115 @@ TEST(grammar, parser0) {
5656
ASSERT_EQ(rule3, parser.GetProductions()[0].GetRules()[0].GetEntities());
5757
}
5858

59-
TEST(grammar, parser1) {
59+
TEST(grammar, Parser1) {
6060
Parser parser = Parser("../test/grammar/grammar_test_1.g");
6161
ASSERT_EQ(false, parser.Parse());
6262
ASSERT_EQ("grammar parsing error: invalid token %terminals", parser.GetError());
6363
}
6464

65-
TEST(grammar, parser2) {
65+
TEST(grammar, Parser2) {
6666
Parser parser = Parser("../test/grammar/grammar_test_2.g");
6767
ASSERT_EQ(false, parser.Parse());
6868
ASSERT_EQ("grammar parsing error: invalid token %non_terminals", parser.GetError());
6969
}
7070

71-
TEST(grammar, parser3) {
71+
TEST(grammar, Parser3) {
7272
Parser parser = Parser("../test/grammar/grammar_test_3.g");
7373
ASSERT_EQ(false, parser.Parse());
7474
ASSERT_EQ("grammar parsing error: invalid token %rules", parser.GetError());
7575
}
7676

77-
TEST(grammar, parser4) {
77+
TEST(grammar, Parser4) {
7878
Parser parser = Parser("../test/grammar/grammar_test_4.g");
7979
ASSERT_EQ(false, parser.Parse());
8080
ASSERT_EQ("grammar parsing error: invalid token %start", parser.GetError());
8181
}
8282

83-
TEST(grammar, parser5) {
83+
TEST(grammar, Parser5) {
8484
Parser parser = Parser("../test/grammar/grammar_test_5.g");
8585
ASSERT_EQ(false, parser.Parse());
8686
ASSERT_EQ("grammar parsing error: invalid token %", parser.GetError());
8787
}
8888

89-
TEST(grammar, parser6) {
89+
TEST(grammar, Parser6) {
9090
Parser parser = Parser("../test/grammar/grammar_test_6.g");
9191
ASSERT_EQ(false, parser.Parse());
9292
ASSERT_EQ("grammar parsing error: invalid token outside block: bruh", parser.GetError());
9393
}
9494

95-
TEST(grammar, parser7) {
95+
TEST(grammar, Parser7) {
9696
Parser parser = Parser("../test/grammar/grammar_test_7.g");
9797
ASSERT_EQ(false, parser.Parse());
9898
ASSERT_EQ("grammar parsing error: EPSILON is reserved", parser.GetError());
9999
}
100100

101-
TEST(grammar, parser8) {
101+
TEST(grammar, Parser8) {
102102
Parser parser = Parser("../test/grammar/grammar_test_8.g");
103103
ASSERT_EQ(false, parser.Parse());
104104
ASSERT_EQ("grammar parsing error: EPSILON is reserved", parser.GetError());
105105
}
106106

107-
TEST(grammar, parser9) {
107+
TEST(grammar, Parser9) {
108108
Parser parser = Parser("../test/grammar/grammar_test_9.g");
109109
ASSERT_EQ(false, parser.Parse());
110110
ASSERT_EQ("grammar parsing error: ambiguous start symbol", parser.GetError());
111111
}
112112

113-
TEST(grammar, parser10) {
113+
TEST(grammar, Parser10) {
114114
Parser parser = Parser("../test/grammar/grammar_test_10.g");
115115
ASSERT_EQ(false, parser.Parse());
116116
ASSERT_EQ("grammar parsing error: production cannot start with EPSILON", parser.GetError());
117117
}
118118

119-
TEST(grammar, parser11) {
119+
TEST(grammar, Parser11) {
120120
Parser parser = Parser("../test/grammar/grammar_test_11.g");
121121
ASSERT_EQ(false, parser.Parse());
122122
ASSERT_EQ("grammar parsing error: rules syntax error ':' expected: bruh", parser.GetError());
123123
}
124124

125-
TEST(grammar, parser12) {
125+
TEST(grammar, Parser12) {
126126
Parser parser = Parser("../test/grammar/grammar_test_12.g");
127127
ASSERT_EQ(false, parser.Parse());
128128
ASSERT_EQ("grammar parsing error: rules syntax error ':' expected", parser.GetError());
129129
}
130130

131-
TEST(grammar, parser13) {
131+
TEST(grammar, Parser13) {
132132
Parser parser = Parser("../test/grammar/grammar_test_13.g");
133133
ASSERT_EQ(false, parser.Parse());
134134
ASSERT_EQ("grammar parsing error: block is incomplete '%' expected", parser.GetError());
135135
}
136136

137-
TEST(grammar, parser14) {
137+
TEST(grammar, Parser14) {
138138
Parser parser = Parser("../test/grammar/grammar_test_14.g");
139139
ASSERT_EQ(false, parser.Parse());
140140
ASSERT_EQ("grammar parsing error: inconsistent or duplicate terminals", parser.GetError());
141141
}
142142

143-
TEST(grammar, parser15) {
143+
TEST(grammar, Parser15) {
144144
Parser parser = Parser("../test/grammar/grammar_test_15.g");
145145
ASSERT_EQ(false, parser.Parse());
146146
ASSERT_EQ("grammar parsing error: inconsistent or duplicate non_terminals", parser.GetError());
147147
}
148148

149-
TEST(grammar, parser16) {
149+
TEST(grammar, Parser16) {
150150
Parser parser = Parser("../test/grammar/grammar_test_16.g");
151151
ASSERT_EQ(false, parser.Parse());
152152
ASSERT_EQ("grammar parsing error: terminals and non_terminals not disjoint", parser.GetError());
153153
}
154154

155-
TEST(grammar, parser17) {
155+
TEST(grammar, Parser17) {
156156
Parser parser = Parser("../test/grammar/grammar_test_17.g");
157157
ASSERT_EQ(false, parser.Parse());
158158
ASSERT_EQ("grammar parsing error: non_terminal not found: <bruh>", parser.GetError());
159159
}
160160

161-
TEST(grammar, parser18) {
161+
TEST(grammar, Parser18) {
162162
Parser parser = Parser("../test/grammar/grammar_test_18.g");
163163
ASSERT_EQ(false, parser.Parse());
164164
ASSERT_EQ("grammar parsing error: rule token is not defined: bruh", parser.GetError());
165165
}
166166

167-
TEST(grammar, parser19) {
167+
TEST(grammar, Parser19) {
168168
Parser parser = Parser("invalid_file_path");
169169
ASSERT_EQ(false, parser.Parse());
170170
ASSERT_EQ("grammar parsing error: file not found", parser.GetError());

test/lexer/lexer_test.cpp

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
using jucc::lexer::Lexer;
66

7-
TEST(lexer, lexer1) {
7+
TEST(lexer, Lexer1) {
88
std::string filename("../test/lexer/input.txt");
99
Lexer lex = Lexer();
1010

@@ -74,7 +74,7 @@ TEST(lexer, lexer1) {
7474
is.close();
7575
}
7676

77-
TEST(lexer, lexer2) {
77+
TEST(lexer, Lexer2) {
7878
std::string filename("../test/lexer/input_err1.txt");
7979
Lexer lex = Lexer();
8080

@@ -149,7 +149,7 @@ TEST(lexer, lexer2) {
149149
is.close();
150150
}
151151

152-
TEST(lexer, lexer3) {
152+
TEST(lexer, Lexer3) {
153153
std::string filename("../test/lexer/comments.txt");
154154
Lexer lex = Lexer();
155155

@@ -188,7 +188,7 @@ TEST(lexer, lexer3) {
188188
is.close();
189189
}
190190

191-
TEST(lexer, lexer4) {
191+
TEST(lexer, Lexer4) {
192192
std::string filename("../test/lexer/input_err2.txt");
193193
Lexer lex = Lexer();
194194

@@ -227,7 +227,7 @@ TEST(lexer, lexer4) {
227227
is.close();
228228
}
229229

230-
TEST(lexer, lexer5) {
230+
TEST(lexer, Lexer5) {
231231
/**
232232
* To test the symbol table implementation with lexer
233233
*
@@ -329,7 +329,7 @@ TEST(lexer, lexer5) {
329329
is.close();
330330
}
331331

332-
TEST(lexer, lexer6) {
332+
TEST(lexer, Lexer6) {
333333
std::string filename("../test/lexer/arithmetic.txt");
334334
Lexer lex = Lexer();
335335

@@ -482,7 +482,7 @@ TEST(lexer, lexer6) {
482482
is.close();
483483
}
484484

485-
TEST(lexer, lexer7) {
485+
TEST(lexer, Lexer7) {
486486
std::string filename("../test/lexer/comments.txt");
487487
Lexer lex = Lexer();
488488

0 commit comments

Comments
 (0)