Skip to content

Commit 7a5dbcf

Browse files
authored
Fixed: Lexer and added more tests (#9)
* Fixed: Lexer and added more tests * Update: test files
1 parent 5c6b901 commit 7a5dbcf

File tree

10 files changed

+156
-66
lines changed

10 files changed

+156
-66
lines changed

src/grammar/grammar.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -185,6 +185,7 @@ bool Parser::Parse() {
185185
}
186186

187187
// sanity checks
188+
// check for undefined tokens and duplicates
188189
if (curr_parse_state != BASIC) {
189190
error_ = "grammar parsing error: block is incomplete '%' expected";
190191
return false;
@@ -198,7 +199,7 @@ bool Parser::Parse() {
198199
}
199200

200201
if (terminals.size() != terminals_.size()) {
201-
error_ = "grammar parsing error: inconsistent terminals";
202+
error_ = "grammar parsing error: inconsistent or duplicate terminals";
202203
return false;
203204
}
204205

@@ -211,7 +212,7 @@ bool Parser::Parse() {
211212
}
212213

213214
if (non_terminals.size() != non_terminals_.size()) {
214-
error_ = "grammar parsing error: inconsistent non_terminals";
215+
error_ = "grammar parsing error: inconsistent or duplicate non_terminals";
215216
return false;
216217
}
217218

src/include/lexer/lexer.h

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,11 @@
11
#ifndef JUCC_LEXER_LEXER_H
22
#define JUCC_LEXER_LEXER_H
33

4-
#include <cctype>
5-
#include <cstdio>
64
#include <fstream>
7-
#include <iostream>
85
#include <string>
96

107
namespace jucc {
8+
namespace lexer {
119

1210
enum Token {
1311
TOK_EOF = -1,
@@ -86,6 +84,7 @@ class Lexer {
8684
int GetToken(std::ifstream &is);
8785
}; // class Lexer
8886

87+
} // namespace lexer
8988
} // namespace jucc
9089

9190
#endif

src/lexer/lexer.cpp

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
#include "lexer/lexer.h"
22

3-
namespace jucc {
3+
namespace jucc::lexer {
44

55
int Lexer::GetToken(std::ifstream &is) {
66
static char last_char = ' ';
@@ -9,6 +9,11 @@ int Lexer::GetToken(std::ifstream &is) {
99
is.get(last_char);
1010
}
1111

12+
// return TOK_EOF if end of file is reached.
13+
if (is.eof()) {
14+
return TOK_EOF;
15+
}
16+
1217
// check for identifier and literal tokens
1318
// RE : [a-zA-Z][0-9a-zA-Z]
1419
if (isalpha(last_char) != 0) {
@@ -190,4 +195,4 @@ int Lexer::GetToken(std::ifstream &is) {
190195
error_string_ = "Unexpected Token\n";
191196
return TOK_ERROR;
192197
}
193-
} // namespace jucc
198+
} // namespace jucc::lexer

src/main/main.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@
2020
*-------------------------------------------------------------------------
2121
*/
2222

23-
#include "lexer/lexer.h"
2423
#include "main/jucc.h"
2524
using jucc::Hello;
2625

test/grammar/grammar_test.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -137,13 +137,13 @@ TEST(grammar, parser12) {
137137
TEST(grammar, parser13) {
138138
Parser parser = Parser("../test/grammar/grammar_test_13.g");
139139
ASSERT_EQ(false, parser.Parse());
140-
ASSERT_EQ("grammar parsing error: inconsistent terminals", parser.GetError());
140+
ASSERT_EQ("grammar parsing error: inconsistent or duplicate terminals", parser.GetError());
141141
}
142142

143143
TEST(grammar, parser14) {
144144
Parser parser = Parser("../test/grammar/grammar_test_14.g");
145145
ASSERT_EQ(false, parser.Parse());
146-
ASSERT_EQ("grammar parsing error: inconsistent non_terminals", parser.GetError());
146+
ASSERT_EQ("grammar parsing error: inconsistent or duplicate non_terminals", parser.GetError());
147147
}
148148

149149
TEST(grammar, parser15) {

test/lexer/comments.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,4 +9,4 @@ int main(){
99
// Just another comment
1010
// Basically a hello world program, Peace!
1111
cout << "Hello";
12-
}
12+
}

test/lexer/input.txt

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11

2-
3-
4-
int main(){
2+
int main() {
3+
int x, y = 5;
4+
float f = 5.8;
55
cout << "Hello";
6-
}
6+
}

test/lexer/input_err1.txt

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11

22

3-
4-
53
int main(){
64
int a = 5;
7-
int b = 5.6d;
8-
cout << b;
9-
}
5+
int b= 5.6d;
6+
cin >>b;
7+
if (5 <4.2) {
8+
cout << 4; }
9+
}

test/lexer/input_err2.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
2+
int main() { int x == 'bruh' ; }
3+
// bruh

test/lexer/lexer_test.cpp

Lines changed: 129 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
#include "gtest/gtest.h"
44

5-
using jucc::Lexer;
5+
using jucc::lexer::Lexer;
66

77
TEST(lexer, lexer1) {
88
std::string filename("../test/lexer/input.txt");
@@ -12,26 +12,51 @@ TEST(lexer, lexer1) {
1212
int token;
1313

1414
token = lex.GetToken(is);
15-
ASSERT_EQ(jucc::TOK_INT, token);
15+
ASSERT_EQ(jucc::lexer::TOK_INT, token);
1616
token = lex.GetToken(is);
17-
ASSERT_EQ(jucc::TOK_IDENTIFIER, token);
17+
ASSERT_EQ(jucc::lexer::TOK_IDENTIFIER, token);
1818
token = lex.GetToken(is);
19-
ASSERT_EQ(jucc::TOK_PAREN_OPEN, token);
19+
ASSERT_EQ(jucc::lexer::TOK_PAREN_OPEN, token);
2020
token = lex.GetToken(is);
21-
ASSERT_EQ(jucc::TOK_PAREN_CLOSE, token);
21+
ASSERT_EQ(jucc::lexer::TOK_PAREN_CLOSE, token);
2222
token = lex.GetToken(is);
23-
ASSERT_EQ(jucc::TOK_CURLY_OPEN, token);
23+
ASSERT_EQ(jucc::lexer::TOK_CURLY_OPEN, token);
2424
token = lex.GetToken(is);
25-
ASSERT_EQ(jucc::TOK_COUT, token);
25+
ASSERT_EQ(jucc::lexer::TOK_INT, token);
2626
token = lex.GetToken(is);
27-
ASSERT_EQ(jucc::TOK_LEFT_SHIFT, token);
27+
ASSERT_EQ(jucc::lexer::TOK_IDENTIFIER, token);
2828
token = lex.GetToken(is);
29-
ASSERT_EQ(jucc::TOK_LITERAL, token);
29+
ASSERT_EQ(jucc::lexer::TOK_COMMA, token);
3030
token = lex.GetToken(is);
31-
ASSERT_EQ(jucc::TOK_SEMICOLON, token);
31+
ASSERT_EQ(jucc::lexer::TOK_IDENTIFIER, token);
3232
token = lex.GetToken(is);
33-
ASSERT_EQ(jucc::TOK_CURLY_CLOSE, token);
34-
33+
ASSERT_EQ(jucc::lexer::TOK_ASSIGNMENT, token);
34+
token = lex.GetToken(is);
35+
ASSERT_EQ(jucc::lexer::TOK_DECIMAL, token);
36+
token = lex.GetToken(is);
37+
ASSERT_EQ(jucc::lexer::TOK_SEMICOLON, token);
38+
token = lex.GetToken(is);
39+
ASSERT_EQ(jucc::lexer::TOK_FLOAT, token);
40+
token = lex.GetToken(is);
41+
ASSERT_EQ(jucc::lexer::TOK_IDENTIFIER, token);
42+
token = lex.GetToken(is);
43+
ASSERT_EQ(jucc::lexer::TOK_ASSIGNMENT, token);
44+
token = lex.GetToken(is);
45+
ASSERT_EQ(jucc::lexer::TOK_FRACTIONAL, token);
46+
token = lex.GetToken(is);
47+
ASSERT_EQ(jucc::lexer::TOK_SEMICOLON, token);
48+
token = lex.GetToken(is);
49+
ASSERT_EQ(jucc::lexer::TOK_COUT, token);
50+
token = lex.GetToken(is);
51+
ASSERT_EQ(jucc::lexer::TOK_LEFT_SHIFT, token);
52+
token = lex.GetToken(is);
53+
ASSERT_EQ(jucc::lexer::TOK_LITERAL, token);
54+
token = lex.GetToken(is);
55+
ASSERT_EQ(jucc::lexer::TOK_SEMICOLON, token);
56+
token = lex.GetToken(is);
57+
ASSERT_EQ(jucc::lexer::TOK_CURLY_CLOSE, token);
58+
token = lex.GetToken(is);
59+
ASSERT_EQ(jucc::lexer::TOK_EOF, token);
3560
is.close();
3661
}
3762

@@ -42,45 +67,71 @@ TEST(lexer, lexer2) {
4267
std::ifstream is(filename);
4368
int token;
4469
token = lex.GetToken(is);
45-
ASSERT_EQ(jucc::TOK_INT, token);
70+
ASSERT_EQ(jucc::lexer::TOK_INT, token);
71+
token = lex.GetToken(is);
72+
ASSERT_EQ(jucc::lexer::TOK_IDENTIFIER, token);
73+
token = lex.GetToken(is);
74+
ASSERT_EQ(jucc::lexer::TOK_PAREN_OPEN, token);
75+
token = lex.GetToken(is);
76+
ASSERT_EQ(jucc::lexer::TOK_PAREN_CLOSE, token);
77+
token = lex.GetToken(is);
78+
ASSERT_EQ(jucc::lexer::TOK_CURLY_OPEN, token);
79+
token = lex.GetToken(is);
80+
ASSERT_EQ(jucc::lexer::TOK_INT, token);
81+
token = lex.GetToken(is);
82+
ASSERT_EQ(jucc::lexer::TOK_IDENTIFIER, token);
83+
token = lex.GetToken(is);
84+
ASSERT_EQ(jucc::lexer::TOK_ASSIGNMENT, token);
85+
token = lex.GetToken(is);
86+
ASSERT_EQ(jucc::lexer::TOK_DECIMAL, token);
87+
token = lex.GetToken(is);
88+
ASSERT_EQ(jucc::lexer::TOK_SEMICOLON, token);
4689
token = lex.GetToken(is);
47-
ASSERT_EQ(jucc::TOK_IDENTIFIER, token);
90+
ASSERT_EQ(jucc::lexer::TOK_INT, token);
4891
token = lex.GetToken(is);
49-
ASSERT_EQ(jucc::TOK_PAREN_OPEN, token);
92+
ASSERT_EQ(jucc::lexer::TOK_IDENTIFIER, token);
5093
token = lex.GetToken(is);
51-
ASSERT_EQ(jucc::TOK_PAREN_CLOSE, token);
94+
ASSERT_EQ(jucc::lexer::TOK_ASSIGNMENT, token);
5295
token = lex.GetToken(is);
53-
ASSERT_EQ(jucc::TOK_CURLY_OPEN, token);
96+
ASSERT_EQ(jucc::lexer::TOK_ERROR, token);
5497
token = lex.GetToken(is);
55-
ASSERT_EQ(jucc::TOK_INT, token);
98+
ASSERT_EQ(jucc::lexer::TOK_SEMICOLON, token);
5699
token = lex.GetToken(is);
57-
ASSERT_EQ(jucc::TOK_IDENTIFIER, token);
100+
ASSERT_EQ(jucc::lexer::TOK_CIN, token);
58101
token = lex.GetToken(is);
59-
ASSERT_EQ(jucc::TOK_ASSIGNMENT, token);
102+
ASSERT_EQ(jucc::lexer::TOK_RIGHT_SHIFT, token);
60103
token = lex.GetToken(is);
61-
ASSERT_EQ(jucc::TOK_DECIMAL, token);
104+
ASSERT_EQ(jucc::lexer::TOK_IDENTIFIER, token);
62105
token = lex.GetToken(is);
63-
ASSERT_EQ(jucc::TOK_SEMICOLON, token);
106+
ASSERT_EQ(jucc::lexer::TOK_SEMICOLON, token);
64107
token = lex.GetToken(is);
65-
ASSERT_EQ(jucc::TOK_INT, token);
108+
ASSERT_EQ(jucc::lexer::TOK_IF, token);
66109
token = lex.GetToken(is);
67-
ASSERT_EQ(jucc::TOK_IDENTIFIER, token);
110+
ASSERT_EQ(jucc::lexer::TOK_PAREN_OPEN, token);
68111
token = lex.GetToken(is);
69-
ASSERT_EQ(jucc::TOK_ASSIGNMENT, token);
112+
ASSERT_EQ(jucc::lexer::TOK_DECIMAL, token);
70113
token = lex.GetToken(is);
71-
ASSERT_EQ(jucc::TOK_ERROR, token);
114+
ASSERT_EQ(jucc::lexer::TOK_LESS_THAN, token);
72115
token = lex.GetToken(is);
73-
ASSERT_EQ(jucc::TOK_SEMICOLON, token);
116+
ASSERT_EQ(jucc::lexer::TOK_FRACTIONAL, token);
74117
token = lex.GetToken(is);
75-
ASSERT_EQ(jucc::TOK_COUT, token);
118+
ASSERT_EQ(jucc::lexer::TOK_PAREN_CLOSE, token);
76119
token = lex.GetToken(is);
77-
ASSERT_EQ(jucc::TOK_LEFT_SHIFT, token);
120+
ASSERT_EQ(jucc::lexer::TOK_CURLY_OPEN, token);
78121
token = lex.GetToken(is);
79-
ASSERT_EQ(jucc::TOK_IDENTIFIER, token);
122+
ASSERT_EQ(jucc::lexer::TOK_COUT, token);
80123
token = lex.GetToken(is);
81-
ASSERT_EQ(jucc::TOK_SEMICOLON, token);
124+
ASSERT_EQ(jucc::lexer::TOK_LEFT_SHIFT, token);
82125
token = lex.GetToken(is);
83-
ASSERT_EQ(jucc::TOK_CURLY_CLOSE, token);
126+
ASSERT_EQ(jucc::lexer::TOK_DECIMAL, token);
127+
token = lex.GetToken(is);
128+
ASSERT_EQ(jucc::lexer::TOK_SEMICOLON, token);
129+
token = lex.GetToken(is);
130+
ASSERT_EQ(jucc::lexer::TOK_CURLY_CLOSE, token);
131+
token = lex.GetToken(is);
132+
ASSERT_EQ(jucc::lexer::TOK_CURLY_CLOSE, token);
133+
token = lex.GetToken(is);
134+
ASSERT_EQ(jucc::lexer::TOK_EOF, token);
84135
is.close();
85136
}
86137

@@ -91,32 +142,64 @@ TEST(lexer, lexer3) {
91142
std::ifstream is(filename);
92143
int token;
93144
token = lex.GetToken(is);
94-
ASSERT_EQ(jucc::TOK_COMMENT, token);
145+
ASSERT_EQ(jucc::lexer::TOK_COMMENT, token);
146+
token = lex.GetToken(is);
147+
ASSERT_EQ(jucc::lexer::TOK_COMMENT, token);
148+
token = lex.GetToken(is);
149+
ASSERT_EQ(jucc::lexer::TOK_INT, token);
150+
token = lex.GetToken(is);
151+
ASSERT_EQ(jucc::lexer::TOK_IDENTIFIER, token);
152+
token = lex.GetToken(is);
153+
ASSERT_EQ(jucc::lexer::TOK_PAREN_OPEN, token);
154+
token = lex.GetToken(is);
155+
ASSERT_EQ(jucc::lexer::TOK_PAREN_CLOSE, token);
156+
token = lex.GetToken(is);
157+
ASSERT_EQ(jucc::lexer::TOK_CURLY_OPEN, token);
158+
token = lex.GetToken(is);
159+
ASSERT_EQ(jucc::lexer::TOK_COMMENT, token);
95160
token = lex.GetToken(is);
96-
ASSERT_EQ(jucc::TOK_COMMENT, token);
161+
ASSERT_EQ(jucc::lexer::TOK_COMMENT, token);
97162
token = lex.GetToken(is);
98-
ASSERT_EQ(jucc::TOK_INT, token);
163+
ASSERT_EQ(jucc::lexer::TOK_COUT, token);
99164
token = lex.GetToken(is);
100-
ASSERT_EQ(jucc::TOK_IDENTIFIER, token);
165+
ASSERT_EQ(jucc::lexer::TOK_LEFT_SHIFT, token);
101166
token = lex.GetToken(is);
102-
ASSERT_EQ(jucc::TOK_PAREN_OPEN, token);
167+
ASSERT_EQ(jucc::lexer::TOK_LITERAL, token);
103168
token = lex.GetToken(is);
104-
ASSERT_EQ(jucc::TOK_PAREN_CLOSE, token);
169+
ASSERT_EQ(jucc::lexer::TOK_SEMICOLON, token);
105170
token = lex.GetToken(is);
106-
ASSERT_EQ(jucc::TOK_CURLY_OPEN, token);
171+
ASSERT_EQ(jucc::lexer::TOK_CURLY_CLOSE, token);
107172
token = lex.GetToken(is);
108-
ASSERT_EQ(jucc::TOK_COMMENT, token);
173+
ASSERT_EQ(jucc::lexer::TOK_EOF, token);
174+
is.close();
175+
}
176+
177+
TEST(lexer, lexer4) {
178+
std::string filename("../test/lexer/input_err2.txt");
179+
Lexer lex = Lexer();
180+
181+
std::ifstream is(filename);
182+
int token;
183+
token = lex.GetToken(is);
184+
ASSERT_EQ(jucc::lexer::TOK_INT, token);
185+
token = lex.GetToken(is);
186+
ASSERT_EQ(jucc::lexer::TOK_IDENTIFIER, token);
109187
token = lex.GetToken(is);
110-
ASSERT_EQ(jucc::TOK_COMMENT, token);
188+
ASSERT_EQ(jucc::lexer::TOK_PAREN_OPEN, token);
111189
token = lex.GetToken(is);
112-
ASSERT_EQ(jucc::TOK_COUT, token);
190+
ASSERT_EQ(jucc::lexer::TOK_PAREN_CLOSE, token);
113191
token = lex.GetToken(is);
114-
ASSERT_EQ(jucc::TOK_LEFT_SHIFT, token);
192+
ASSERT_EQ(jucc::lexer::TOK_CURLY_OPEN, token);
115193
token = lex.GetToken(is);
116-
ASSERT_EQ(jucc::TOK_LITERAL, token);
194+
ASSERT_EQ(jucc::lexer::TOK_INT, token);
117195
token = lex.GetToken(is);
118-
ASSERT_EQ(jucc::TOK_SEMICOLON, token);
196+
ASSERT_EQ(jucc::lexer::TOK_IDENTIFIER, token);
119197
token = lex.GetToken(is);
120-
ASSERT_EQ(jucc::TOK_CURLY_CLOSE, token);
198+
ASSERT_EQ(jucc::lexer::TOK_EQUAL_TO, token);
199+
token = lex.GetToken(is);
200+
ASSERT_EQ(jucc::lexer::TOK_ERROR, token);
201+
202+
// UNEXPECTED BEHAVIOR AFTER THIS
203+
// TOKENS received -2 -100 -2 -1
121204
is.close();
122205
}

0 commit comments

Comments
 (0)