Skip to content

Commit 59389c9

Browse files
authored
Feature: Formatted JSON Parse Tree output (#35)
* Added: Dependency nlohmann/json * Added: ParserStack and parse_tree_ json pretty printing * Added: parse tree generator * Review: addresses points * Review: addresses points
1 parent 3a236ef commit 59389c9

File tree

6 files changed

+243
-7
lines changed

6 files changed

+243
-7
lines changed

CMakeLists.txt

Lines changed: 31 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -50,8 +50,9 @@
5050
# HEADER Project definition.
5151
# HEADER Safety checks.
5252
# HEADER System info.
53-
# HEADER jucc libraries.
54-
# HEADER jucc binary.
53+
# HEADER Dependencies
54+
# HEADER JuCC libraries.
55+
# HEADER JuCC binary.
5556
# HEADER Tests.
5657
# HEADER Benchmarks.
5758
# HEADER Generated file destinations.
@@ -166,6 +167,30 @@ else ()
166167
endif ()
167168
message(STATUS "CMAKE_BUILD_TYPE: ${CMAKE_BUILD_TYPE}")
168169

170+
#######################################################################################################################
171+
# HEADER Dependencies
172+
#######################################################################################################################
173+
174+
set(JUCC_DEPENDENCIES "")
175+
176+
# nlohmann_json for json formatting
177+
configure_file("${PROJECT_SOURCE_DIR}/build_support/nlohmann_json_CMakeLists.txt.in" nlohmann-json-download/CMakeLists.txt)
178+
execute_process(COMMAND "${CMAKE_COMMAND}" -G "${CMAKE_GENERATOR}" .
179+
RESULT_VARIABLE result
180+
WORKING_DIRECTORY "${CMAKE_BINARY_DIR}/nlohmann-json-download")
181+
if(result)
182+
message(FATAL_ERROR "CMake step for nlohmann_json failed: ${result}")
183+
endif()
184+
execute_process(COMMAND "${CMAKE_COMMAND}" --build . --config "Release"
185+
RESULT_VARIABLE result
186+
WORKING_DIRECTORY "${CMAKE_BINARY_DIR}/nlohmann-json-download")
187+
if(result)
188+
message(FATAL_ERROR "Build step for nlohmann_json failed: ${result}")
189+
endif()
190+
set(nlohmann_json_force_shared_crt ON CACHE BOOL "" FORCE) # don't override our compiler/linker options when building nlohmann_json
191+
add_subdirectory("${CMAKE_BINARY_DIR}/nlohmann-json-src" "${CMAKE_BINARY_DIR}/nlohmann-json-build")
192+
list(APPEND JUCC_DEPENDENCIES nlohmann_json)
193+
169194
#######################################################################################################################
170195
# HEADER JuCC libraries.
171196
# jucc_objlib : JuCC object library, built once and linked into both static and shared targets.
@@ -179,6 +204,8 @@ file(GLOB_RECURSE
179204
CONFIGURE_DEPENDS # See above. Ask CMake to regenerate the build system if these files change.
180205
${PROJECT_SOURCE_DIR}/src/*.cpp
181206
${PROJECT_SOURCE_DIR}/src/include/*.h
207+
${PROJECT_SOURCE_DIR}/third_party/*.cpp
208+
${PROJECT_SOURCE_DIR}/third_party/*.h
182209
)
183210

184211
# Remove the main program from JuCC sources.
@@ -188,6 +215,7 @@ list(REMOVE_ITEM JUCC_SRCS ${PROJECT_SOURCE_DIR}/src/main/main.cpp)
188215
# The OBJECT library is built first so that the same .o files can be linked into static and shared libraries.
189216
# This allows both jucc_static and jucc_shared to be built with a single compilation of translation units.
190217
add_library(jucc_objlib OBJECT ${JUCC_SRCS})
218+
target_link_libraries(jucc_objlib PUBLIC ${JUCC_DEPENDENCIES}) # Add Dependencies required for compilation
191219

192220
set_target_properties(jucc_objlib PROPERTIES
193221
POSITION_INDEPENDENT_CODE ON # Required for static linking into other shared libraries.
@@ -226,7 +254,7 @@ target_link_options(jucc_static PUBLIC # PUBLIC: all consumers of t
226254
)
227255

228256
#######################################################################################################################
229-
# HEADER jucc binary.
257+
# HEADER JuCC binary.
230258
# jucc : The main compiler binary.
231259
#######################################################################################################################
232260

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
cmake_minimum_required(VERSION 3.16)
2+
3+
project(nlohmann-json-download NONE)
4+
5+
include(ExternalProject)
6+
ExternalProject_Add(nlohmann_json
7+
GIT_REPOSITORY https://github.com/nlohmann/json.git
8+
GIT_TAG develop
9+
SOURCE_DIR "${CMAKE_BINARY_DIR}/nlohmann-json-src"
10+
BINARY_DIR "${CMAKE_BINARY_DIR}/nlohmann-json-build"
11+
CONFIGURE_COMMAND ""
12+
BUILD_COMMAND ""
13+
INSTALL_COMMAND ""
14+
TEST_COMMAND ""
15+
)

src/include/parser/parser.h

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,14 +6,26 @@
66
#include <utility>
77
#include <vector>
88

9+
#include "nlohmann/json.hpp"
910
#include "parser/parsing_table.h"
1011
#include "utils/first_follow.h"
1112

12-
namespace jucc {
13+
using json = nlohmann::ordered_json;
1314

15+
namespace jucc {
1416
namespace parser {
1517

1618
class Parser {
19+
/**
20+
* json pretty print indentation for generated parse tree
21+
*/
22+
static const int INDENTATION = 4;
23+
24+
/**
25+
* parse tree for Treant.js integration
26+
*/
27+
json parse_tree_;
28+
1729
/**
1830
* A stack to put the symbols and perform the actual parsing
1931
*/
@@ -86,12 +98,25 @@ class Parser {
8698
*/
8799
void DoNextStep();
88100

101+
/**
102+
* Build the parse tree from production history
103+
* Parse tree not built if parser in error state
104+
*/
105+
void BuildParseTree();
106+
107+
/**
108+
* Dumps the parse tree in given path in json format
109+
* @returns true on success
110+
*/
111+
bool WriteParseTree(const std::string &filepath);
112+
89113
/* getters and setters*/
90114
void SetInputString(std::vector<std::string> inps);
91115
void SetParsingTable(ParsingTable table);
92116
void SetStartSymbol(std::string start);
93117
[[nodiscard]] const std::vector<int> &GetProductionHistory() { return production_history_; }
94118
[[nodiscard]] const std::vector<std::string> &GetParserErrors() { return parser_errors_; }
119+
[[nodiscard]] const json &GetParseTree() { return parse_tree_; }
95120
};
96121
} // namespace parser
97122

src/main/main.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
*/
2222

2323
#include "main/jucc.h"
24+
2425
/**
2526
* jucc begins execution here.
2627
*/
@@ -94,13 +95,13 @@ auto main(int argc, char *argv[]) -> int {
9495
std::vector<std::string> input_tokens;
9596
jucc::lexer::Lexer lexer = jucc::lexer::Lexer();
9697
int token;
97-
9898
while ((token = lexer.GetToken(ifs)) != jucc::lexer::TOK_EOF) {
9999
input_tokens.emplace_back(jucc::lexer::Lexer::GetTokenType(token));
100100
}
101+
102+
/* Check for symbol table errors and exit if errors exist */
101103
std::vector<std::string> errors = lexer.GetUndeclaredSymbolErrors();
102104
errors.insert(errors.end(), lexer.GetDuplicateSymbolErrors().begin(), lexer.GetDuplicateSymbolErrors().end());
103-
104105
if (!errors.empty()) {
105106
std::cout << "jucc: ";
106107
for (auto &e : errors) {

src/parser/parser.cpp

Lines changed: 68 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
11
#include "parser/parser.h"
22

33
#include <algorithm>
4+
#include <fstream>
45

56
namespace jucc::parser {
67

7-
Parser::Parser() {
8+
Parser::Parser() : parse_tree_(json::object({})) {
89
// initialize the stack
910
stack_.push(std::string(utils::STRING_ENDMARKER));
1011
input_string_.clear();
@@ -103,4 +104,70 @@ void Parser::ParseNextStep() {
103104
}
104105
}
105106

107+
void Parser::BuildParseTree() {
108+
// if errors cannot build tree
109+
if (!parser_errors_.empty()) {
110+
return;
111+
}
112+
113+
// init parse tree state
114+
parse_tree_[start_symbol_] = json::object({});
115+
std::stack<json *> parent_node_stack;
116+
parent_node_stack.push(&parse_tree_[start_symbol_]);
117+
118+
auto productions = table_.GetProductions();
119+
auto terminals = table_.GetTerminals();
120+
auto non_terminals = table_.GetNonTerminals();
121+
// iterate over production history and build tree
122+
for (const auto &prod : production_history_) {
123+
int production_index = prod / 100;
124+
int rule_index = prod % 100;
125+
auto parent = productions[production_index].GetParent();
126+
auto rule = productions[production_index].GetRules()[rule_index];
127+
auto entities = rule.GetEntities();
128+
json *parent_node = parent_node_stack.top();
129+
/**
130+
* rename entities to handle duplicates
131+
* Example:
132+
* change entities from {"A", "A", "B", "A", "C", "B"} to
133+
* {"A", "A_1", "B", "A_2", "C", "B_1"} inplace
134+
*/
135+
std::unordered_map<std::string, int> symbol_count;
136+
// store an reverse map for name of entities before renaming
137+
std::unordered_map<std::string, std::string> default_name;
138+
for (auto &entity : entities) {
139+
auto p_entity = entity;
140+
if (symbol_count[entity]++ != 0) {
141+
entity += "_" + std::to_string(symbol_count[entity] - 1);
142+
}
143+
default_name[entity] = p_entity;
144+
145+
// add renamed entities to current parent node
146+
if (std::find(terminals.begin(), terminals.end(), p_entity) != terminals.end()) {
147+
(*parent_node)[entity] = json();
148+
} else {
149+
(*parent_node)[entity] = json::object({});
150+
}
151+
}
152+
153+
// update parent_node_stack
154+
parent_node_stack.pop();
155+
for (auto it = entities.rbegin(); it < entities.rend(); it++) {
156+
if (std::find(non_terminals.begin(), non_terminals.end(), default_name[*it]) != non_terminals.end()) {
157+
parent_node_stack.push(&((*parent_node)[*it]));
158+
}
159+
}
160+
}
161+
}
162+
163+
bool Parser::WriteParseTree(const std::string &filepath) {
164+
std::ofstream ofs(filepath);
165+
if (ofs.is_open()) {
166+
ofs << parse_tree_.dump(INDENTATION) << '\n';
167+
return true;
168+
}
169+
170+
return false;
171+
}
172+
106173
} // namespace jucc::parser

test/parser/parser_test.cpp

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -264,3 +264,103 @@ TEST(parser, Parser2) {
264264

265265
ASSERT_TRUE(pars.IsComplete());
266266
}
267+
268+
TEST(parser, Parser3) {
269+
/**
270+
* Grammar:
271+
* S -> Aa
272+
* A -> BD
273+
* B -> b | EPSILON
274+
* D -> d | EPSILON
275+
*/
276+
grammar::Production p1;
277+
p1.SetParent("S");
278+
p1.SetRules({grammar::Rule({"A", "a"})});
279+
grammar::Production p2;
280+
p2.SetParent("A");
281+
p2.SetRules({grammar::Rule({"B", "D"})});
282+
grammar::Production p3;
283+
p3.SetParent("B");
284+
p3.SetRules({grammar::Rule({"b"}), grammar::Rule({grammar::EPSILON})});
285+
grammar::Production p4;
286+
p4.SetParent("D");
287+
p4.SetRules({grammar::Rule({"d"}), grammar::Rule({grammar::EPSILON})});
288+
289+
grammar::Productions grammar = {p1, p2, p3, p4};
290+
291+
auto nullables = utils::CalcNullables(grammar);
292+
auto firsts = utils::CalcFirsts(grammar, nullables);
293+
auto follows = utils::CalcFollows(grammar, firsts, nullables, "S");
294+
295+
std::vector<std::string> terminals = {"a", "b", "d"};
296+
std::vector<std::string> non_terminals = {"S", "A", "B", "D"};
297+
298+
ParsingTable table = ParsingTable(terminals, non_terminals);
299+
table.SetFirsts(firsts);
300+
table.SetFollows(follows);
301+
table.SetProductions(grammar);
302+
table.BuildTable();
303+
304+
Parser pars = Parser();
305+
306+
std::vector<std::string> input = {"b", "d", "a"};
307+
pars.SetInputString(input);
308+
pars.SetStartSymbol("S");
309+
pars.SetParsingTable(table);
310+
311+
while (!pars.IsComplete()) {
312+
pars.ParseNextStep();
313+
}
314+
315+
pars.BuildParseTree();
316+
auto tree = pars.GetParseTree();
317+
ASSERT_EQ(tree.dump(), "{\"S\":{\"A\":{\"B\":{\"b\":null},\"D\":{\"d\":null}},\"a\":null}}");
318+
}
319+
320+
TEST(parser, Parser4) {
321+
/**
322+
* Grammar:
323+
* S -> ABA
324+
* A -> a
325+
* B -> b
326+
*/
327+
grammar::Production p1;
328+
p1.SetParent("S");
329+
p1.SetRules({grammar::Rule({"A", "B", "A"})});
330+
grammar::Production p2;
331+
p2.SetParent("A");
332+
p2.SetRules({grammar::Rule({"a"})});
333+
grammar::Production p3;
334+
p3.SetParent("B");
335+
p3.SetRules({grammar::Rule({"b"})});
336+
337+
grammar::Productions grammar = {p1, p2, p3};
338+
339+
auto nullables = utils::CalcNullables(grammar);
340+
auto firsts = utils::CalcFirsts(grammar, nullables);
341+
auto follows = utils::CalcFollows(grammar, firsts, nullables, "S");
342+
343+
std::vector<std::string> terminals = {"a", "b"};
344+
std::vector<std::string> non_terminals = {"S", "A", "B"};
345+
346+
ParsingTable table = ParsingTable(terminals, non_terminals);
347+
table.SetFirsts(firsts);
348+
table.SetFollows(follows);
349+
table.SetProductions(grammar);
350+
table.BuildTable();
351+
352+
Parser pars = Parser();
353+
354+
std::vector<std::string> input = {"a", "b", "a"};
355+
pars.SetInputString(input);
356+
pars.SetStartSymbol("S");
357+
pars.SetParsingTable(table);
358+
359+
while (!pars.IsComplete()) {
360+
pars.ParseNextStep();
361+
}
362+
363+
pars.BuildParseTree();
364+
auto tree = pars.GetParseTree();
365+
ASSERT_EQ(tree.dump(), "{\"S\":{\"A\":{\"a\":null},\"B\":{\"b\":null},\"A_1\":{\"a\":null}}}");
366+
}

0 commit comments

Comments
 (0)