Skip to content

Commit 76fe90d

Browse files
authored
Left recursion, Left Factoring, Trie, Unit Tests (#4)
* template * direct left recursion * updated grammar * feat: brand new trie * feat: left factoring * left factoring gtest * minor fixes && code quality improvements * efficient impl * unwanted dockerfile comment removed * CI fix * more comments!! HUH I WANT TO HAVE SEX MAAN🥺!! * more comments!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! * more
1 parent 7a5dbcf commit 76fe90d

File tree

10 files changed

+542
-5
lines changed

10 files changed

+542
-5
lines changed

src/grammar/grammar.cpp

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,27 @@
66

77
namespace jucc::grammar {
88

9+
std::string Rule::ToString() const {
10+
std::stringstream ss;
11+
for (const auto &entity : entities_) {
12+
ss << entity;
13+
}
14+
return ss.str();
15+
}
16+
17+
bool Rule::HasPrefix(const Entity &prefix) const {
18+
// Takes care of even EPSILON too.
19+
if (prefix.size() > entities_.size()) {
20+
return false;
21+
}
22+
for (int i = 0; i < static_cast<int>(prefix.size()); i++) {
23+
if (entities_[i] != prefix[i]) {
24+
return false;
25+
}
26+
}
27+
return true;
28+
}
29+
930
Parser::Parser(const char *filepath) { file_ = std::ifstream(filepath); }
1031

1132
Parser::~Parser() {

src/include/grammar/grammar.h

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
namespace jucc {
1010
namespace grammar {
1111
const char EPSILON[] = "EPSILON";
12+
using Entity = std::vector<std::string>;
1213

1314
class Rule {
1415
/**
@@ -17,15 +18,25 @@ class Rule {
1718
* Example:
1819
* For production: E : F + E => { "F", "+", "E" } is a rule.
1920
*/
20-
std::vector<std::string> entities_;
21+
Entity entities_;
2122

2223
public:
2324
Rule() = default;
24-
explicit Rule(std::vector<std::string> entities) : entities_(std::move(entities)) {}
25-
[[nodiscard]] const std::vector<std::string> &GetEntities() const { return entities_; }
26-
void SetEntities(const std::vector<std::string> &entities) { Rule::entities_ = entities; }
25+
explicit Rule(Entity entities) : entities_(std::move(entities)) {}
26+
[[nodiscard]] const Entity &GetEntities() const { return entities_; }
27+
void SetEntities(const Entity &entities) { Rule::entities_ = entities; }
28+
[[nodiscard]] std::string ToString() const;
29+
30+
/**
31+
* Takes an Entity and checks if the entries of the Entity is a perfect
32+
* prefix of the this->entities_ or not.
33+
* @return a boolean after checking if param is actually a prefix or not.
34+
*/
35+
[[nodiscard]] bool HasPrefix(const Entity & /*prefix*/) const;
2736
};
2837

38+
using Rules = std::vector<grammar::Rule>;
39+
2940
class Production {
3041
/**
3142
* class Parser returns a list of Productions.
@@ -40,6 +51,8 @@ class Production {
4051

4152
public:
4253
Production() = default;
54+
Production(std::string parent, std::vector<Rule> rules) : parent_(std::move(parent)), rules_(std::move(rules)) {}
55+
4356
[[nodiscard]] const std::string &GetParent() const { return parent_; }
4457
[[nodiscard]] const std::vector<Rule> &GetRules() const { return rules_; }
4558
void SetParent(const std::string &parent) { Production::parent_ = parent; }
@@ -94,4 +107,4 @@ class Parser {
94107
} // namespace grammar
95108
} // namespace jucc
96109

97-
#endif
110+
#endif // JUCC_GRAMMAR_GRAMMAR_H

src/include/utils/left_factoring.h

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
#ifndef JUCC_LEFT_FACTORING_H
2+
#define JUCC_LEFT_FACTORING_H
3+
#include "grammar/grammar.h"
4+
namespace grammar = jucc::grammar;
5+
6+
namespace jucc::utils {
7+
/**
8+
* Does remove left factor from a production.
9+
* @return a set of productions free from left factors.
10+
*/
11+
grammar::Productions RemoveLeftFactors(const grammar::Production & /*prod*/);
12+
13+
/**
14+
* Finds longest prefix which is most common to the rules of the productions.
15+
* @return a grammar Entity which can be used as a core of grammar::Rule.
16+
*/
17+
grammar::Entity LongestCommonPrefix(const grammar::Production & /*prod*/);
18+
19+
} // namespace jucc::utils
20+
21+
#endif // JUCC_LEFT_FACTORING_H

src/include/utils/left_recursion.h

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
#ifndef JUCC_LEFT_RECURSION_H
2+
#define JUCC_LEFT_RECURSION_H
3+
#include "grammar/grammar.h"
4+
5+
namespace jucc::utils {
6+
// Standard practice.
7+
const char DASH[] = "'";
8+
9+
/**
10+
* Removes direct left Recursion.
11+
* Example: for production
12+
* E -> E + T
13+
* E -> T
14+
* E -> EPSILON
15+
* @return a set of productions after removing left recursion.
16+
* E' -> +TE' | EPSILON
17+
* E -> TE' | EPSILON E'
18+
*/
19+
grammar::Productions RemoveDirectLeftRecursion(const grammar::Production & /*prod*/);
20+
21+
/**
22+
* Checks if a given production is recursive.
23+
* @return boolean
24+
*/
25+
bool IsRecursive(const grammar::Production & /*prod*/);
26+
27+
/**
28+
* TODO!! (If required)
29+
* Removes Indirect Left Recursion.
30+
* A -> B
31+
* B -> C
32+
* C -> A | B | EPSILON
33+
* @return a set of Productions.
34+
*/
35+
grammar::Productions RemoveIndirectLeftRecursions(const grammar::Productions & /*prod*/);
36+
37+
} // namespace jucc::utils
38+
#endif // JUCC_LEFT_RECURSION_H
Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
#ifndef JUCC_UTILS_MEMORY_EFFICIENT_TRIE_H
2+
#define JUCC_UTILS_MEMORY_EFFICIENT_TRIE_H
3+
#include <sstream>
4+
#include <string>
5+
#include <unordered_map>
6+
#include <vector>
7+
8+
#include "grammar/grammar.h"
9+
10+
namespace jucc {
11+
namespace utils {
12+
13+
class Trie {
14+
/**
15+
* Class Trie to store a production efficiently for any required
16+
* operations, mainly for prefix matching.
17+
*/
18+
public:
19+
// Creates a trie for set of Rules where each entity is used as a key.
20+
std::unordered_map<std::string, Trie *> nodes_;
21+
22+
grammar::Entity keys_list_; // Stores the prefix list of entities upto the current entity.
23+
int count_; // Number of occurrences of the current entity after insertion of a set of Rules.
24+
25+
Trie();
26+
};
27+
28+
class TrieManager {
29+
/**
30+
* A higher order abstration to manager a complete Trie.
31+
* Takes care of unwanted memory leaking through a custom garbage collector.
32+
*/
33+
Trie *master_; // Current head of the trie.
34+
35+
std::vector<Trie *> gc_; // The garbage collector which stores any newly created trie node.
36+
37+
public:
38+
/**
39+
* Constructor.
40+
*/
41+
TrieManager();
42+
43+
/**
44+
* Customised no-param constructor.
45+
*/
46+
Trie *NewTrieNode();
47+
48+
/**
49+
* Getter: Returns master_ node, i.e. the Head of the trie.
50+
*/
51+
[[nodiscard]] Trie *GetMaster() const;
52+
53+
/**
54+
* Insert a particular grammar Entity into the master_.
55+
* @param grammar::Entity &
56+
*/
57+
void Insert(const grammar::Entity & /*entities*/);
58+
59+
/**
60+
* Insert a whole production into the trie node master_.
61+
*/
62+
void InsertAll(const grammar::Production & /*prod*/);
63+
64+
/**
65+
* Makes a preorder traversal efficiently of the master_ node of the Trie and
66+
* returns the most common prefix of the Production Rules formed by individual
67+
* Rule entities.
68+
*/
69+
// NOLINTNEXTLINE
70+
static void GreedyPreorder(Trie * /*head*/, int & /*len*/, grammar::Entity & /*max_str*/, bool /*is_prime_head*/);
71+
72+
/**
73+
* Virtual Destructor performs garbage collection where created memory gets released.
74+
*/
75+
virtual ~TrieManager();
76+
};
77+
78+
} // namespace utils
79+
} // namespace jucc
80+
#endif // JUCC_MEMORYEFFICIENTTRIE_H

src/utils/left_factoring.cpp

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
#include "utils/left_factoring.h"
2+
3+
#include <sstream>
4+
5+
#include "utils/left_recursion.h"
6+
#include "utils/trie/memory_efficient_trie.h"
7+
8+
namespace jucc::utils {
9+
10+
grammar::Productions RemoveLeftFactors(const grammar::Production &prod) {
11+
grammar::Productions prods;
12+
13+
bool state_not_updated = false;
14+
auto current_prod = prod;
15+
while (!state_not_updated) {
16+
auto max_common_prefix = utils::LongestCommonPrefix(current_prod);
17+
if (max_common_prefix.empty()) {
18+
state_not_updated = true;
19+
continue;
20+
}
21+
22+
auto parent = current_prod.GetParent(); // similar to E
23+
auto parent_dash = current_prod.GetParent() + std::string(utils::DASH); // similar to E'
24+
25+
grammar::Rules parent_rules;
26+
grammar::Rules parent_dash_rules;
27+
28+
// insert the commonE' prod
29+
grammar::Entity common_factor_with_dash(max_common_prefix.begin(), max_common_prefix.end());
30+
// push E' after the common part
31+
common_factor_with_dash.emplace_back(parent_dash);
32+
parent_rules.emplace_back(grammar::Rule(common_factor_with_dash));
33+
34+
bool has_epsilon_inserted = false;
35+
// produce two production after matching
36+
for (const auto &rule : current_prod.GetRules()) {
37+
if (!rule.HasPrefix(max_common_prefix)) {
38+
parent_rules.emplace_back(rule);
39+
} else {
40+
auto new_entities = grammar::Entity(rule.GetEntities().begin() + static_cast<int>(max_common_prefix.size()),
41+
rule.GetEntities().end());
42+
if (new_entities.empty()) {
43+
has_epsilon_inserted = true;
44+
new_entities.emplace_back(std::string(grammar::EPSILON));
45+
}
46+
parent_dash_rules.push_back(grammar::Rule(new_entities));
47+
}
48+
}
49+
if (!has_epsilon_inserted) {
50+
parent_dash_rules.emplace_back(grammar::Entity{std::string(grammar::EPSILON)});
51+
}
52+
// do this recursive
53+
prods.push_back(grammar::Production(parent, parent_rules));
54+
current_prod = grammar::Production(parent_dash, parent_dash_rules);
55+
}
56+
prods.push_back(current_prod);
57+
return prods;
58+
}
59+
60+
grammar::Entity LongestCommonPrefix(const grammar::Production &prod) {
61+
TrieManager trie_manager;
62+
trie_manager.InsertAll(prod);
63+
grammar::Entity common_prefixes;
64+
int len = 1;
65+
TrieManager::GreedyPreorder(trie_manager.GetMaster(), len, common_prefixes, true);
66+
67+
return common_prefixes;
68+
}
69+
70+
} // namespace jucc::utils

src/utils/left_recursion.cpp

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
#include "utils/left_recursion.h"
2+
3+
#include <algorithm>
4+
namespace jucc::utils {
5+
6+
grammar::Productions RemoveDirectLeftRecursion(const grammar::Production &prod) {
7+
if (!IsRecursive(prod)) {
8+
return grammar::Productions{prod};
9+
}
10+
const std::string &parent = prod.GetParent();
11+
std::string parent_dash = prod.GetParent() + std::string(DASH);
12+
13+
grammar::Productions prods(2);
14+
prods[0].SetParent(parent_dash);
15+
prods[1].SetParent(parent);
16+
17+
auto parent_rules = prods[0].GetRules();
18+
auto parent_dash_rules = prods[1].GetRules();
19+
20+
for (const auto &rule : prod.GetRules()) {
21+
auto entries = rule.GetEntities();
22+
if (entries[0] == parent) {
23+
auto new_entries = std::vector<std::string>(entries.begin() + 1, entries.end());
24+
new_entries.push_back(parent_dash);
25+
parent_dash_rules.emplace_back(grammar::Rule(new_entries));
26+
} else {
27+
entries.push_back(parent_dash);
28+
parent_rules.emplace_back(grammar::Rule(entries));
29+
}
30+
}
31+
parent_dash_rules.push_back(grammar::Rule({std::string(grammar::EPSILON)}));
32+
prods[0].SetRules(parent_dash_rules);
33+
prods[1].SetRules(parent_rules);
34+
return prods;
35+
}
36+
37+
bool IsRecursive(const grammar::Production &prod) {
38+
const auto &rules = prod.GetRules();
39+
40+
return static_cast<bool>(std::any_of(rules.begin(), rules.end(),
41+
[&](const auto &rule) { return prod.GetParent() == rule.GetEntities()[0]; }));
42+
}
43+
44+
} // namespace jucc::utils

0 commit comments

Comments
 (0)