Skip to content

Commit 7a498df

Browse files
TheSYNcoderbisakhmondalnoob77777
authored
Feature : symbol table Implementation (#6)
* added symbol table structure * Left recursion, Left Factoring, Trie, Unit Tests (#4) * template * direct left recursion * updated grammar * feat: brand new trie * feat: left factoring * left factoring gtest * minor fixes && code quality improvements * efficient impl * unwanted dockerfile comment removed * CI fix * more comments!! HUH I WANT TO HAVE SEX MAAN🥺!! * more comments!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! * more * added symbol table structure * added symbol table implementation * fixed clang format * fixed test name in grammar_test * added mem check in symbol_table * Fixed: seg_fault * Merge branch * symbol table integrated with lexer * minor fixes and improved code quality * Updated: lexer_test Co-authored-by: Bisakh Mondal <[email protected]> Co-authored-by: noob77777 <[email protected]>
1 parent 76fe90d commit 7a498df

File tree

9 files changed

+559
-27
lines changed

9 files changed

+559
-27
lines changed

src/grammar/grammar.cpp

Lines changed: 22 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -6,27 +6,6 @@
66

77
namespace jucc::grammar {
88

9-
std::string Rule::ToString() const {
10-
std::stringstream ss;
11-
for (const auto &entity : entities_) {
12-
ss << entity;
13-
}
14-
return ss.str();
15-
}
16-
17-
bool Rule::HasPrefix(const Entity &prefix) const {
18-
// Takes care of even EPSILON too.
19-
if (prefix.size() > entities_.size()) {
20-
return false;
21-
}
22-
for (int i = 0; i < static_cast<int>(prefix.size()); i++) {
23-
if (entities_[i] != prefix[i]) {
24-
return false;
25-
}
26-
}
27-
return true;
28-
}
29-
309
Parser::Parser(const char *filepath) { file_ = std::ifstream(filepath); }
3110

3211
Parser::~Parser() {
@@ -272,4 +251,26 @@ bool Parser::Parse() {
272251
error_ = "grammar parsing error: file not found";
273252
return false;
274253
}
254+
255+
std::string Rule::ToString() const {
256+
std::stringstream ss;
257+
for (const auto &entity : entities_) {
258+
ss << entity;
259+
}
260+
return ss.str();
261+
}
262+
263+
bool Rule::HasPrefix(const Entity &prefix) const {
264+
// Takes care of even EPSILON too.
265+
if (prefix.size() > entities_.size()) {
266+
return false;
267+
}
268+
for (int i = 0; i < static_cast<int>(prefix.size()); i++) {
269+
if (entities_[i] != prefix[i]) {
270+
return false;
271+
}
272+
}
273+
return true;
274+
}
275+
275276
} // namespace jucc::grammar

src/include/lexer/lexer.h

Lines changed: 59 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,9 @@
33

44
#include <fstream>
55
#include <string>
6+
#include <vector>
7+
8+
#include "symbol_table/symbol_table.h"
69

710
namespace jucc {
811
namespace lexer {
@@ -50,7 +53,6 @@ enum Token {
5053
};
5154

5255
class Lexer {
53-
public:
5456
/**
5557
* used to store a identifier token
5658
*/
@@ -60,28 +62,84 @@ class Lexer {
6062
* suppose a numerical token 16.3ere which is
6163
* neither a numerical token or a identifier
6264
*/
65+
6366
std::string error_string_;
6467
/**
6568
* used to store a literal string
6669
* literal strings are of type "a string"
6770
*/
71+
6872
std::string literal_string_;
6973
/**
7074
* used to store the value of the integer token
7175
* during tokenization.
7276
*/
77+
7378
int intval_;
7479
/**
7580
* used to store the value of the float token
7681
* during tokenization.
7782
*/
83+
7884
double floatval_;
85+
/**
86+
* The current nesting level as parsed by the lexer in
87+
* the input file.
88+
*/
89+
90+
int current_nesting_level_{0};
91+
/**
92+
* vector to store duplicate symbol errors.
93+
*/
94+
std::vector<std::string> duplicate_symbol_errors_;
95+
/**
96+
* vector to store undeclared symbol errors.
97+
*/
98+
std::vector<std::string> undeclared_symbol_errors_;
99+
100+
/**
101+
* Stores the current datatype of the identifier.
102+
* for example int a = 5;
103+
* When tokenizing a current_datatype_ = "int", for all others it is
104+
* an empty string.
105+
*/
106+
std::string current_datatype_;
107+
108+
/**
109+
* A symbol table object for building up the symbol table for the input file.
110+
* Check src/include/symbol_table/symbol_table.h and src/symbol_table/symbol_table.cpp
111+
* for more details.
112+
*/
113+
symbol_table::SymbolTable symbol_table_;
114+
115+
public:
116+
Lexer() = default;
79117

80118
/**
81119
* Takes a ifstream object as input and gets the next character
82120
* from the input file and returns the appropriate token.
83121
*/
84122
int GetToken(std::ifstream &is);
123+
124+
/**
125+
* Getter for the current_datatype.
126+
*/
127+
std::string GetCurrentDatatype();
128+
129+
/**
130+
* Getter for the current nesting level.
131+
*/
132+
[[nodiscard]] int GetCurrentNestingLevel() const;
133+
134+
/**
135+
* Getter for undeclared symbol errors.
136+
*/
137+
std::vector<std::string> GetUndeclaredSymbolErrors();
138+
139+
/**
140+
* Getter for duplicate symbol errors.
141+
*/
142+
std::vector<std::string> GetDuplicateSymbolErrors();
85143
}; // class Lexer
86144

87145
} // namespace lexer
Lines changed: 147 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,147 @@
1+
#ifndef JUCC_SYMBOL_TABLE_SYMBOL_TABLE_H
2+
#define JUCC_SYMBOL_TABLE_SYMBOL_TABLE_H
3+
4+
#include <string>
5+
#include <unordered_map>
6+
#include <utility>
7+
#include <vector>
8+
9+
namespace jucc {
10+
11+
namespace symbol_table {
12+
13+
struct Node {
14+
/**
15+
* Used to store the name of the identifier
16+
* obtained during tokenization
17+
*/
18+
std::string identifier_;
19+
/**
20+
* Used to store the data type of the identifier
21+
* One of int or float
22+
*/
23+
std::string data_type_;
24+
/**
25+
* Used to store the nesting level of scoping
26+
* Deeper the nesting, higher the scope
27+
*
28+
* ...
29+
* int a0_;
30+
* if ( condition ) {
31+
* int a1_;
32+
* if ( second_condition ) {
33+
* int a2_;
34+
* if ( third_condition) {
35+
* int a3_;
36+
* ...
37+
* }
38+
* }
39+
* }
40+
* ...
41+
* a0_ has 0 level , a1_ has 1 level and so on ...
42+
*/
43+
int nesting_level_;
44+
/**
45+
* The pointer to the next node.
46+
*/
47+
Node *next_;
48+
/**
49+
* Constructor for node class
50+
*/
51+
Node(std::string it_, std::string dt_, int nt_)
52+
: identifier_(std::move(it_)), data_type_(std::move(dt_)), nesting_level_(nt_), next_(nullptr) {}
53+
};
54+
55+
class LinkedList {
56+
/**
57+
* The head_ of the linked list
58+
*/
59+
Node *head_{nullptr};
60+
61+
public:
62+
LinkedList() = default;
63+
/**
64+
* Allocates memory for a new Node and returns it after initializing.
65+
*/
66+
static Node *CreateNewNode(std::string it_, std::string dt_, int nt_);
67+
/**
68+
* Adds a new node at the starting of the linked list
69+
*/
70+
void AddNewNode(std::string it_, std::string dt_, int nt_);
71+
/**
72+
* Deletes the first node of the linked list
73+
*/
74+
void DeleteStartingNode();
75+
/**
76+
* Returns true if linked list is empty or vice-versa
77+
*/
78+
bool IsEmpty();
79+
/**
80+
* returns head_
81+
*/
82+
Node *GetHead();
83+
};
84+
85+
class SymbolTable {
86+
/**
87+
* Store the identfier mappings with respect to their presence
88+
* in different nesting levels in the program
89+
*/
90+
std::unordered_map<std::string, LinkedList> hash_table_;
91+
/**
92+
* A vector to store different duplicate symbols found in the input
93+
*/
94+
std::vector<std::string> duplicate_symbols_;
95+
96+
/**
97+
* A vector to store the undeclared symbols in the input file.
98+
*/
99+
std::vector<std::string> undeclared_symbols_;
100+
101+
public:
102+
/**
103+
* Checks if the current identifier is present in the same nesting level
104+
* int the hash_table. If present reports a duplicate symbol error, that is,
105+
* inserts into the duplicate_symbols vector.
106+
*/
107+
void CheckAndAddEntry(Node *node_);
108+
109+
/**
110+
* On scope end - sc_
111+
* Removes the nodes of all the variables in the hash_table which have nesting_level_ = sc_
112+
*/
113+
void RemoveNodesOnScopeEnd(int level_);
114+
115+
/**
116+
* Inserts symbols into duplicate symbols array
117+
*/
118+
void InsertIntoDuplicateSymbols(const std::string &identifier_);
119+
120+
/**
121+
* Returns the linked list or list of nodes associated with an
122+
* identifier.
123+
*/
124+
Node *GetLinkedListById(const std::string &id_);
125+
126+
/**
127+
* Getter method for duplicated symbols.
128+
*/
129+
std::vector<std::string> GetDuplicateSymbols();
130+
131+
/**
132+
* Getter method for undeclared variables.
133+
*/
134+
std::vector<std::string> GetUndeclaredSymbols();
135+
136+
/**
137+
* Checks if the identifier is present in hash_table_
138+
* Utility function for testing
139+
*/
140+
int CheckOccurrencesOfId(const std::string &id_);
141+
};
142+
143+
} // namespace symbol_table
144+
145+
} // namespace jucc
146+
147+
#endif

0 commit comments

Comments
 (0)