Skip to content

Commit 6d663b7

Browse files
Feature : Parsing Implementation. (#19)
* added parsing table structure * added parsing table implementation * added parsing implementation * fix static cast bug * fix static cast bug * fixed linting bug * added parsing integration with main method * added parsing table tests * added more tests * removed printing code for parsing table * Added: cin and cout tokens to grammar.g (#22) * added parsing table structure * added parsing table implementation * added parsing implementation * fix static cast bug * fix static cast bug * fixed linting bug * added parsing integration with main method * added parsing table tests * added more tests * removed printing code for parsing table * added more tests and fixed bugs * changed codecov target * Changes addressed * Fixed: tests * Fixed: test names Co-authored-by: Abhishek Pal <[email protected]> Co-authored-by: noob77777 <[email protected]>
1 parent c9bff4d commit 6d663b7

File tree

16 files changed

+1106
-15
lines changed

16 files changed

+1106
-15
lines changed

codecov.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ coverage:
1212
threshold: 5%
1313
patch:
1414
default:
15-
target: 90%
15+
target: 85%
1616

1717
parsers:
1818
gcov:

src/include/lexer/lexer.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,8 @@ enum Token {
6161
TOK_NOT = -36, // !
6262
TOK_NOT_EQUAL_TO = -37, // !=
6363

64+
TOK_MAIN = -38, // main
65+
6466
// cout, cin
6567

6668
TOK_COUT = -27, // cout
@@ -141,6 +143,11 @@ class Lexer {
141143
*/
142144
std::string GetCurrentDatatype();
143145

146+
/**
147+
* Returns the terminal of the enum token returned by the lexer
148+
*/
149+
static std::string GetTokenType(int token);
150+
144151
/**
145152
* Getter for the current nesting level.
146153
*/

src/include/main/jucc.h

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,50 @@
33

44
#include <iostream>
55
#include <string>
6+
#include <vector>
7+
8+
#include "grammar/grammar.h"
9+
#include "lexer/lexer.h"
10+
#include "parser/parser.h"
11+
#include "parser/parsing_table.h"
12+
#include "utils/first_follow.h"
13+
#include "utils/left_factoring.h"
14+
#include "utils/left_recursion.h"
615

716
namespace jucc {
817
/**
918
* Dummy function to test setup clang and build config
1019
*/
1120
std::string Hello();
1221

22+
class InputParser {
23+
std::vector<std::string> tokens_;
24+
25+
public:
26+
/**
27+
* public constructor for initializing command line arguments
28+
* and converting them to string tokens for efficient search
29+
* for flags in the command line input
30+
*/
31+
InputParser(int argc, char *argv[]);
32+
33+
/**
34+
* Returns true if the command line options has the required flag 'flag'
35+
* Searches the tokens
36+
* @ Returns true or false whether the given input flag is present in the
37+
* command line options.
38+
*/
39+
bool HasFlag(const std::string &flag);
40+
41+
/**
42+
* Returns the next argument for a flag in command line options.
43+
* $ jucc -f <filename>
44+
* GetArgument("-f") returns filename
45+
* @ Returns string
46+
*/
47+
std::string GetArgument(const std::string &flag);
48+
};
49+
1350
/**
1451
* Dummy function increments computes x + y the retarded way
1552
* for benchmarking only

src/include/parser/parser.h

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
#ifndef JUCC_PARSER_PARSER_H
2+
#define JUCC_PARSER_PARSER_H
3+
4+
#include <stack>
5+
#include <string>
6+
#include <vector>
7+
8+
#include "parser/parsing_table.h"
9+
#include "utils/first_follow.h"
10+
11+
namespace jucc {
12+
13+
namespace parser {
14+
15+
class Parser {
16+
/**
17+
* A stack to put the symbols and perform the actual parsing
18+
*/
19+
std::stack<std::string> stack_;
20+
21+
/**
22+
* The given input string to parse.
23+
*/
24+
std::vector<std::string> input_string_;
25+
26+
/**
27+
* The start symbol for the grammar
28+
*/
29+
std::string start_symbol_;
30+
31+
/**
32+
* Holds the current step of parsing.
33+
*/
34+
int current_step_{0};
35+
36+
/**
37+
* Holds the build up parsing table object
38+
*/
39+
ParsingTable table_;
40+
41+
/**
42+
* Holds the history of the productions parsed during parsing
43+
*/
44+
std::vector<int> production_history_;
45+
46+
/**
47+
* Holds a copy of the input string initially
48+
* and changes with each step of parsing.
49+
*/
50+
std::vector<std::string> current_string_;
51+
52+
public:
53+
/**
54+
* Constructor for initializing stack and other members.
55+
*/
56+
Parser();
57+
58+
/**
59+
* Used for parsing the next token of the input string
60+
*/
61+
void ParseNextStep();
62+
63+
/**
64+
* Resets the entire parsing process
65+
*/
66+
void ResetParsing();
67+
68+
/**
69+
* Function that returns true when the parsing is completed
70+
*/
71+
bool IsComplete();
72+
73+
/**
74+
* Completes a step of parsing
75+
*/
76+
void DoNextStep();
77+
78+
/* getters and setters*/
79+
void SetInputString(std::vector<std::string> inps);
80+
void SetParsingTable(ParsingTable table);
81+
void SetStartSymbol(std::string start);
82+
[[nodiscard]] const std::vector<int> &GetProductionHistory();
83+
};
84+
} // namespace parser
85+
86+
} // namespace jucc
87+
88+
#endif

src/include/parser/parsing_table.h

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
#ifndef JUCC_PARSER_PARSING_TABLE_H
2+
#define JUCC_PARSER_PARSING_TABLE_H
3+
4+
#include <string>
5+
#include <unordered_map>
6+
#include <utility>
7+
#include <vector>
8+
9+
#include "grammar/grammar.h"
10+
#include "utils/first_follow.h"
11+
12+
namespace jucc {
13+
14+
namespace parser {
15+
16+
const char SYNCH_TOKEN[] = "synch";
17+
18+
const char ERROR_TOKEN[] = "error";
19+
20+
class ParsingTable {
21+
public:
22+
using Table = std::unordered_map<std::string, std::unordered_map<std::string, std::string>>;
23+
24+
private:
25+
/**
26+
* stores the parsing table, which is calculated from the productions in the grammar
27+
* and it's first and follow set
28+
*/
29+
Table table_;
30+
31+
/**
32+
* Stores firsts of the productions
33+
*/
34+
utils::SymbolsMap firsts_;
35+
36+
/**
37+
* Stores the follows of the productions
38+
*/
39+
utils::SymbolsMap follows_;
40+
41+
/**
42+
* Stores the productions of the grammar.
43+
*/
44+
grammar::Productions productions_;
45+
46+
/**
47+
* Stores the terminals of the grammar
48+
*/
49+
std::vector<std::string> terminals_;
50+
51+
/**
52+
* Stores the non-termninals of the grammar.
53+
*/
54+
std::vector<std::string> non_terminals_;
55+
56+
public:
57+
/**
58+
* Default constructor
59+
*/
60+
ParsingTable() = default;
61+
62+
/**
63+
* Used for setting synchronization tokens in the parsing table calculated from the
64+
* follow set.
65+
*/
66+
ParsingTable(std::vector<std::string> terms, std::vector<std::string> non_terms)
67+
: terminals_(std::move(terms)), non_terminals_(std::move(non_terms)) {}
68+
69+
/**
70+
* Builds the parsing table from the firsts and follows
71+
*/
72+
void BuildTable();
73+
74+
/**
75+
* Gets the entry in the parsing table corresponding to a terminal and a non-terminal
76+
* Gets the production and the rule number
77+
*/
78+
std::pair<int, int> GetEntry(const std::string &non_terminal_, const std::string &terminal_);
79+
80+
/* getters and setters */
81+
void SetFirsts(utils::SymbolsMap firsts) { firsts_ = std::move(firsts); }
82+
void SetProductions(grammar::Productions productions) { productions_ = std::move(productions); }
83+
void SetFollows(utils::SymbolsMap follows) { follows_ = std::move(follows); }
84+
[[nodiscard]] const utils::SymbolsMap &GetFirsts() { return firsts_; }
85+
[[nodiscard]] const utils::SymbolsMap &GetFollows() { return follows_; }
86+
[[nodiscard]] const grammar::Productions &GetProductions() { return productions_; }
87+
[[nodiscard]] const std::vector<std::string> &GetNonTerminals() { return non_terminals_; }
88+
[[nodiscard]] const std::vector<std::string> &GetTerminals() { return terminals_; }
89+
[[nodiscard]] const Table &GetTable() { return table_; }
90+
};
91+
92+
} // namespace parser
93+
} // namespace jucc
94+
95+
#endif

src/lexer/lexer.cpp

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,9 @@ int Lexer::GetToken(std::ifstream &is) {
4343
} else if (identifier_string_ == "cin") {
4444
current_datatype_ = "";
4545
ret_token = TOK_CIN;
46+
} else if (identifier_string_ == "main") {
47+
current_datatype_ = "";
48+
ret_token = TOK_MAIN;
4649
} else {
4750
ret_token = TOK_IDENTIFIER;
4851

@@ -250,6 +253,79 @@ int Lexer::GetToken(std::ifstream &is) {
250253
return TOK_ERROR;
251254
}
252255

256+
std::string Lexer::GetTokenType(int token) {
257+
std::string ret_string;
258+
if (token == TOK_INT) {
259+
ret_string = "int";
260+
} else if (token == TOK_FLOAT) {
261+
ret_string = "float";
262+
} else if (token == TOK_VOID) {
263+
ret_string = "void";
264+
} else if (token == TOK_IF) {
265+
ret_string = "if";
266+
} else if (token == TOK_ELSE) {
267+
ret_string = "else";
268+
} else if (token == TOK_PAREN_OPEN) {
269+
ret_string = "(";
270+
} else if (token == TOK_PAREN_CLOSE) {
271+
ret_string = ")";
272+
} else if (token == TOK_CURLY_OPEN) {
273+
ret_string = "{";
274+
} else if (token == TOK_CURLY_CLOSE) {
275+
ret_string = "}";
276+
} else if (token == TOK_COMMA) {
277+
ret_string = ",";
278+
} else if (token == TOK_DOT) {
279+
ret_string = ".";
280+
} else if (token == TOK_PLUS) {
281+
ret_string = "+";
282+
} else if (token == TOK_MINUS) {
283+
ret_string = "-";
284+
} else if (token == TOK_DIVIDE) {
285+
ret_string = "/";
286+
} else if (token == TOK_MULTIPLY) {
287+
ret_string = "*";
288+
} else if (token == TOK_MODULUS) {
289+
ret_string = "%";
290+
} else if (token == TOK_SEMICOLON) {
291+
ret_string = ";";
292+
} else if (token == TOK_LEFT_SHIFT) {
293+
ret_string = "<<";
294+
} else if (token == TOK_RIGHT_SHIFT) {
295+
ret_string = ">>";
296+
} else if (token == TOK_EQUAL_TO) {
297+
ret_string = "==";
298+
} else if (token == TOK_ASSIGNMENT) {
299+
ret_string = "=";
300+
} else if (token == TOK_NOT) {
301+
ret_string = "!";
302+
} else if (token == TOK_NOT_EQUAL_TO) {
303+
ret_string = "!=";
304+
} else if (token == TOK_GREATER_THAN) {
305+
ret_string = ">";
306+
} else if (token == TOK_LESS_THAN) {
307+
ret_string = "<";
308+
} else if (token == TOK_GREATER_THAN_OR_EQUALS) {
309+
ret_string = ">=";
310+
} else if (token == TOK_LESS_THAN_OR_EQUALS) {
311+
ret_string = "<=";
312+
} else if (token == TOK_IDENTIFIER) {
313+
ret_string = "identifier";
314+
} else if (token == TOK_DECIMAL) {
315+
ret_string = "integer_constant";
316+
} else if (token == TOK_FRACTIONAL) {
317+
ret_string = "float_constant";
318+
} else if (token == TOK_COUT) {
319+
ret_string = "cout";
320+
} else if (token == TOK_CIN) {
321+
ret_string = "cin";
322+
} else if (token == TOK_MAIN) {
323+
ret_string = "main";
324+
}
325+
326+
return ret_string;
327+
}
328+
253329
std::string Lexer::GetCurrentDatatype() { return current_datatype_; }
254330

255331
std::vector<std::string> Lexer::GetDuplicateSymbolErrors() { return duplicate_symbol_errors_; }

0 commit comments

Comments
 (0)