|
| 1 | +#include "utils/left_factoring.h" |
| 2 | + |
| 3 | +#include <sstream> |
| 4 | + |
| 5 | +#include "utils/left_recursion.h" |
| 6 | +#include "utils/trie/memory_efficient_trie.h" |
| 7 | + |
| 8 | +namespace jucc::utils { |
| 9 | + |
| 10 | +grammar::Productions RemoveLeftFactors(const grammar::Production &prod) { |
| 11 | + grammar::Productions prods; |
| 12 | + |
| 13 | + bool state_not_updated = false; |
| 14 | + auto current_prod = prod; |
| 15 | + while (!state_not_updated) { |
| 16 | + auto max_common_prefix = utils::LongestCommonPrefix(current_prod); |
| 17 | + if (max_common_prefix.empty()) { |
| 18 | + state_not_updated = true; |
| 19 | + continue; |
| 20 | + } |
| 21 | + |
| 22 | + auto parent = current_prod.GetParent(); // similar to E |
| 23 | + auto parent_dash = current_prod.GetParent() + std::string(utils::DASH); // similar to E' |
| 24 | + |
| 25 | + grammar::Rules parent_rules; |
| 26 | + grammar::Rules parent_dash_rules; |
| 27 | + |
| 28 | + // insert the commonE' prod |
| 29 | + grammar::Entity common_factor_with_dash(max_common_prefix.begin(), max_common_prefix.end()); |
| 30 | + // push E' after the common part |
| 31 | + common_factor_with_dash.emplace_back(parent_dash); |
| 32 | + parent_rules.emplace_back(grammar::Rule(common_factor_with_dash)); |
| 33 | + |
| 34 | + bool has_epsilon_inserted = false; |
| 35 | + // produce two production after matching |
| 36 | + for (const auto &rule : current_prod.GetRules()) { |
| 37 | + if (!rule.HasPrefix(max_common_prefix)) { |
| 38 | + parent_rules.emplace_back(rule); |
| 39 | + } else { |
| 40 | + auto new_entities = grammar::Entity(rule.GetEntities().begin() + static_cast<int>(max_common_prefix.size()), |
| 41 | + rule.GetEntities().end()); |
| 42 | + if (new_entities.empty()) { |
| 43 | + has_epsilon_inserted = true; |
| 44 | + new_entities.emplace_back(std::string(grammar::EPSILON)); |
| 45 | + } |
| 46 | + parent_dash_rules.push_back(grammar::Rule(new_entities)); |
| 47 | + } |
| 48 | + } |
| 49 | + if (!has_epsilon_inserted) { |
| 50 | + parent_dash_rules.emplace_back(grammar::Entity{std::string(grammar::EPSILON)}); |
| 51 | + } |
| 52 | + // do this recursive |
| 53 | + prods.push_back(grammar::Production(parent, parent_rules)); |
| 54 | + current_prod = grammar::Production(parent_dash, parent_dash_rules); |
| 55 | + } |
| 56 | + prods.push_back(current_prod); |
| 57 | + return prods; |
| 58 | +} |
| 59 | + |
| 60 | +grammar::Entity LongestCommonPrefix(const grammar::Production &prod) { |
| 61 | + TrieManager trie_manager; |
| 62 | + trie_manager.InsertAll(prod); |
| 63 | + grammar::Entity common_prefixes; |
| 64 | + int len = 1; |
| 65 | + TrieManager::GreedyPreorder(trie_manager.GetMaster(), len, common_prefixes, true); |
| 66 | + |
| 67 | + return common_prefixes; |
| 68 | +} |
| 69 | + |
| 70 | +} // namespace jucc::utils |
0 commit comments