From eb0c669eeec4e34b02ea5a75db0cdb1a5a1a46e3 Mon Sep 17 00:00:00 2001 From: dmezh <54985569+dmezh@users.noreply.github.com> Date: Fri, 5 Nov 2021 20:35:34 -0400 Subject: [PATCH 1/5] rough theoretical MOV squashing, mostly works --- CMakeLists.txt | 11 ++++++ optimize/opt_flatten_adjacent_mov.c | 54 +++++++++++++++++++++++++++++ optimize/opt_flatten_adjacent_mov.h | 8 +++++ target_x86/asmgen.c | 3 ++ 4 files changed, 76 insertions(+) create mode 100644 optimize/opt_flatten_adjacent_mov.c create mode 100644 optimize/opt_flatten_adjacent_mov.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 9f08c8f..42ce6c3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -8,6 +8,7 @@ set(CMAKE_EXE_LINKER_FLAGS "-fsanitize=undefined -rdynamic") include_directories(.) include_directories(lexer) +include_directories(optimize) include_directories(parser) include_directories(quads) include_directories(target_x86) @@ -40,6 +41,7 @@ include_directories(${GENERATED}) add_executable(dcc ${BISON_dcc_parser_OUTPUTS} ${FLEX_dcc_lexer_OUTPUTS} +<<<<<<< HEAD parser/ast.c parser/ast_print.c parser/symtab.c @@ -48,6 +50,15 @@ ${FLEX_dcc_lexer_OUTPUTS} quads/quads.c quads/quads_cf.c quads/quads_print.c +======= + 2-parser/ast.c + 2-parser/symtab.c + 2-parser/types.c + 3-quads/quads.c + 3-quads/quads_cf.c + 3-quads/quads_print.c + optimize/opt_flatten_adjacent_mov.c +>>>>>>> rough theoretical MOV squashing, mostly works target_x86/asmgen.c common/charutil common/semval.c diff --git a/optimize/opt_flatten_adjacent_mov.c b/optimize/opt_flatten_adjacent_mov.c new file mode 100644 index 0000000..d2a3c39 --- /dev/null +++ b/optimize/opt_flatten_adjacent_mov.c @@ -0,0 +1,54 @@ +#include "opt_flatten_adjacent_mov.h" + +#include + +#include "quads.h" +#include "quads_cf.h" + +void try_replace_temp(astn* n, struct astn_qtemp* del, struct astn_qtemp* replace) { + if (n && n->type == ASTN_QTEMP && n->astn_qtemp.tempno == del->tempno) + n->astn_qtemp = *replace; +} + +void opt_flatten_adjacent_mov(BBL* root, unsigned passes) { + BBL* head = root; + while (passes--) { +// quad *q = head->me->start; + + BBL* bbl = head; + if (head == &bb_root) bbl = bbl_next(bbl); + + BB* b = bbl_data(bbl); + + quad *q = b->start; + + while (q && q->next && q->next->next) { + quad *n = q->next; + quad *nn = n->next; + if ((q->op == Q_MOV && n->op == Q_MOV) && + (q->target->type == ASTN_SYMPTR && q->src1->type == ASTN_QTEMP) && + (n->target->type == ASTN_QTEMP && n->src1->type == ASTN_SYMPTR) && + (q->src1->astn_symptr.e == n->target->astn_symptr.e)) { // this line broken + fprintf(stderr, "found!\n"); + // we found the pattern. + // now we will remove the second quad entirely + // and replace all references to its target with the parent's source. + struct astn_qtemp* del = &n->target->astn_qtemp; + struct astn_qtemp* replace = &q->src1->astn_qtemp; + fprintf(stderr, "del is %u, replace is %u\n", del->tempno, replace->tempno); + + q->next = nn; + free(n); // todo: potentially still leaky? + + while (nn) { + try_replace_temp(nn->target, del, replace); + try_replace_temp(nn->src1, del, replace); + try_replace_temp(nn->src2, del, replace); + nn = nn->next; + } + } + q = q->next; + } + } +} + diff --git a/optimize/opt_flatten_adjacent_mov.h b/optimize/opt_flatten_adjacent_mov.h new file mode 100644 index 0000000..cf8a345 --- /dev/null +++ b/optimize/opt_flatten_adjacent_mov.h @@ -0,0 +1,8 @@ +#ifndef OPT_FLATTEN_ADJACENT_MOV_H +#define OPT_FLATTEN_ADJACENT_MOV_H + +#include "quads_cf.h" + +void opt_flatten_adjacent_mov(BBL* root, unsigned passes); + +#endif diff --git a/target_x86/asmgen.c b/target_x86/asmgen.c index f1f616b..e2bb1f3 100644 --- a/target_x86/asmgen.c +++ b/target_x86/asmgen.c @@ -9,6 +9,8 @@ #include "types.h" #include "util.h" +#include "opt_flatten_adjacent_mov.h" + #include FILE* out; @@ -242,6 +244,7 @@ void e_bb(const BB* b) { fprintf(out, "BB.%s.%d", b->fn, b->bbno); } void asmgen(const BBL* head, FILE* f) { // init output file out = f; + opt_flatten_adjacent_mov(head, 1); fprintf(out, "# ASM OUTPUT\n# compiled poorly :)\n\n"); // init globals, except functions From 552c01a8f7b6ed94c93f7103b977150d21ed9b18 Mon Sep 17 00:00:00 2001 From: dmezh <54985569+dmezh@users.noreply.github.com> Date: Fri, 5 Nov 2021 20:48:35 -0400 Subject: [PATCH 2/5] in progress.. --- optimize/opt_flatten_adjacent_mov.c | 62 ++++++++++++++--------------- 1 file changed, 31 insertions(+), 31 deletions(-) diff --git a/optimize/opt_flatten_adjacent_mov.c b/optimize/opt_flatten_adjacent_mov.c index d2a3c39..7d0d8ae 100644 --- a/optimize/opt_flatten_adjacent_mov.c +++ b/optimize/opt_flatten_adjacent_mov.c @@ -13,41 +13,41 @@ void try_replace_temp(astn* n, struct astn_qtemp* del, struct astn_qtemp* replac void opt_flatten_adjacent_mov(BBL* root, unsigned passes) { BBL* head = root; while (passes--) { -// quad *q = head->me->start; - BBL* bbl = head; if (head == &bb_root) bbl = bbl_next(bbl); - BB* b = bbl_data(bbl); - - quad *q = b->start; - - while (q && q->next && q->next->next) { - quad *n = q->next; - quad *nn = n->next; - if ((q->op == Q_MOV && n->op == Q_MOV) && - (q->target->type == ASTN_SYMPTR && q->src1->type == ASTN_QTEMP) && - (n->target->type == ASTN_QTEMP && n->src1->type == ASTN_SYMPTR) && - (q->src1->astn_symptr.e == n->target->astn_symptr.e)) { // this line broken - fprintf(stderr, "found!\n"); - // we found the pattern. - // now we will remove the second quad entirely - // and replace all references to its target with the parent's source. - struct astn_qtemp* del = &n->target->astn_qtemp; - struct astn_qtemp* replace = &q->src1->astn_qtemp; - fprintf(stderr, "del is %u, replace is %u\n", del->tempno, replace->tempno); - - q->next = nn; - free(n); // todo: potentially still leaky? - - while (nn) { - try_replace_temp(nn->target, del, replace); - try_replace_temp(nn->src1, del, replace); - try_replace_temp(nn->src2, del, replace); - nn = nn->next; - } + while (bbl) { + BB* b = bbl_data(bbl); + bbl = bbl_next(bbl); + quad *q = b->start; + + while (q && q->next && q->next->next) { + quad *n = q->next; + quad *nn = n->next; + if ((q->op == Q_MOV && n->op == Q_MOV) && + (q->target->type == ASTN_SYMPTR && q->src1->type == ASTN_QTEMP) && + (n->target->type == ASTN_QTEMP && n->src1->type == ASTN_SYMPTR)){ + //(q->src1->astn_symptr.e == n->target->astn_symptr.e)) { // this line broken + fprintf(stderr, "found!\n"); + // we found the pattern. + // now we will remove the second quad entirely + // and replace all references to its target with the parent's source. + struct astn_qtemp* del = &n->target->astn_qtemp; + struct astn_qtemp* replace = &q->src1->astn_qtemp; + fprintf(stderr, "del is %u, replace is %u\n", del->tempno, replace->tempno); + + q->next = nn; + free(n); // todo: potentially still leaky? + + while (nn) { + try_replace_temp(nn->target, del, replace); + try_replace_temp(nn->src1, del, replace); + try_replace_temp(nn->src2, del, replace); + nn = nn->next; + } + } + q = q->next; } - q = q->next; } } } From aed6f03d713b2bcba1be2c463d49f747f50ab0ed Mon Sep 17 00:00:00 2001 From: dmezh <54985569+dmezh@users.noreply.github.com> Date: Sun, 7 Nov 2021 18:30:43 -0500 Subject: [PATCH 3/5] More opt work --- CMakeLists.txt | 11 +--------- optimize/opt_flatten_adjacent_mov.c | 32 +++++++++++++++++++++++++---- target_x86/asmgen.c | 2 +- 3 files changed, 30 insertions(+), 15 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 42ce6c3..23b57a5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -41,7 +41,7 @@ include_directories(${GENERATED}) add_executable(dcc ${BISON_dcc_parser_OUTPUTS} ${FLEX_dcc_lexer_OUTPUTS} -<<<<<<< HEAD + optimize/opt_flatten_adjacent_mov.c parser/ast.c parser/ast_print.c parser/symtab.c @@ -50,15 +50,6 @@ ${FLEX_dcc_lexer_OUTPUTS} quads/quads.c quads/quads_cf.c quads/quads_print.c -======= - 2-parser/ast.c - 2-parser/symtab.c - 2-parser/types.c - 3-quads/quads.c - 3-quads/quads_cf.c - 3-quads/quads_print.c - optimize/opt_flatten_adjacent_mov.c ->>>>>>> rough theoretical MOV squashing, mostly works target_x86/asmgen.c common/charutil common/semval.c diff --git a/optimize/opt_flatten_adjacent_mov.c b/optimize/opt_flatten_adjacent_mov.c index 7d0d8ae..aa93269 100644 --- a/optimize/opt_flatten_adjacent_mov.c +++ b/optimize/opt_flatten_adjacent_mov.c @@ -5,14 +5,21 @@ #include "quads.h" #include "quads_cf.h" +unsigned total_replaced = 0; + void try_replace_temp(astn* n, struct astn_qtemp* del, struct astn_qtemp* replace) { if (n && n->type == ASTN_QTEMP && n->astn_qtemp.tempno == del->tempno) n->astn_qtemp = *replace; + + total_replaced++; } void opt_flatten_adjacent_mov(BBL* root, unsigned passes) { BBL* head = root; - while (passes--) { + unsigned hits = 0; + for (unsigned pass = 1; pass <= passes; pass++) { + fprintf(stderr, "Pass %d/%d running...\n", pass, passes); + unsigned this_hits = 0, quads_visited = 0; BBL* bbl = head; if (head == &bb_root) bbl = bbl_next(bbl); @@ -22,19 +29,20 @@ void opt_flatten_adjacent_mov(BBL* root, unsigned passes) { quad *q = b->start; while (q && q->next && q->next->next) { + quads_visited++; quad *n = q->next; quad *nn = n->next; if ((q->op == Q_MOV && n->op == Q_MOV) && (q->target->type == ASTN_SYMPTR && q->src1->type == ASTN_QTEMP) && (n->target->type == ASTN_QTEMP && n->src1->type == ASTN_SYMPTR)){ //(q->src1->astn_symptr.e == n->target->astn_symptr.e)) { // this line broken - fprintf(stderr, "found!\n"); + this_hits++; // we found the pattern. // now we will remove the second quad entirely // and replace all references to its target with the parent's source. struct astn_qtemp* del = &n->target->astn_qtemp; struct astn_qtemp* replace = &q->src1->astn_qtemp; - fprintf(stderr, "del is %u, replace is %u\n", del->tempno, replace->tempno); + //fprintf(stderr, "del is %u, replace is %u\n", del->tempno, replace->tempno); q->next = nn; free(n); // todo: potentially still leaky? @@ -49,6 +57,22 @@ void opt_flatten_adjacent_mov(BBL* root, unsigned passes) { q = q->next; } } + hits += this_hits; + fprintf(stderr, "--> Quads visited: %d | hits for this pass: %d, all passes: %d\n", quads_visited, this_hits, hits); } -} + BBL* bbl = head; + if (head == &bb_root) bbl = bbl_next(bbl); + + unsigned remaining_quads = 0; + while (bbl) { + BB* b = bbl_data(bbl); + bbl = bbl_next(bbl); + quad *q = b->start; + while (q) { + q = q->next; + remaining_quads++; + } + } + fprintf(stderr, "Remaining quads after optimization: %d\n", remaining_quads); +} diff --git a/target_x86/asmgen.c b/target_x86/asmgen.c index e2bb1f3..4f9dc9e 100644 --- a/target_x86/asmgen.c +++ b/target_x86/asmgen.c @@ -244,7 +244,7 @@ void e_bb(const BB* b) { fprintf(out, "BB.%s.%d", b->fn, b->bbno); } void asmgen(const BBL* head, FILE* f) { // init output file out = f; - opt_flatten_adjacent_mov(head, 1); + opt_flatten_adjacent_mov(head, 2); fprintf(out, "# ASM OUTPUT\n# compiled poorly :)\n\n"); // init globals, except functions From 7a0d30e9f555876017b66cbec683317f0abecda6 Mon Sep 17 00:00:00 2001 From: dmezh <54985569+dmezh@users.noreply.github.com> Date: Sun, 7 Nov 2021 18:47:04 -0500 Subject: [PATCH 4/5] Fix opt check (fixes failed test cases) --- optimize/opt_flatten_adjacent_mov.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/optimize/opt_flatten_adjacent_mov.c b/optimize/opt_flatten_adjacent_mov.c index aa93269..9804516 100644 --- a/optimize/opt_flatten_adjacent_mov.c +++ b/optimize/opt_flatten_adjacent_mov.c @@ -34,8 +34,8 @@ void opt_flatten_adjacent_mov(BBL* root, unsigned passes) { quad *nn = n->next; if ((q->op == Q_MOV && n->op == Q_MOV) && (q->target->type == ASTN_SYMPTR && q->src1->type == ASTN_QTEMP) && - (n->target->type == ASTN_QTEMP && n->src1->type == ASTN_SYMPTR)){ - //(q->src1->astn_symptr.e == n->target->astn_symptr.e)) { // this line broken + (n->target->type == ASTN_QTEMP && n->src1->type == ASTN_SYMPTR) && + (q->target->astn_symptr.e == n->src1->astn_symptr.e)) { this_hits++; // we found the pattern. // now we will remove the second quad entirely From a6340e183758e23db1556b92ab4db3c5722a0756 Mon Sep 17 00:00:00 2001 From: dmezh <54985569+dmezh@users.noreply.github.com> Date: Sat, 25 Dec 2021 10:42:35 -0500 Subject: [PATCH 5/5] Cleaning up optimization a bit --- CMakeLists.txt | 1 + main.c | 19 ++++++++++++++++--- main.h | 2 +- optimize/opt_flatten_adjacent_mov.c | 16 ++++++++-------- optimize/optimization.c | 7 +++++++ optimize/optimization.h | 8 ++++++++ target_x86/asmgen.c | 3 +-- target_x86/asmgen.h | 2 +- 8 files changed, 43 insertions(+), 15 deletions(-) create mode 100644 optimize/optimization.c create mode 100644 optimize/optimization.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 23b57a5..8f9d9de 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -42,6 +42,7 @@ add_executable(dcc ${BISON_dcc_parser_OUTPUTS} ${FLEX_dcc_lexer_OUTPUTS} optimize/opt_flatten_adjacent_mov.c + optimize/optimization.c parser/ast.c parser/ast_print.c parser/symtab.c diff --git a/main.c b/main.c index cb77ace..da63094 100644 --- a/main.c +++ b/main.c @@ -9,11 +9,12 @@ #include #include "asmgen.h" +#include "optimization.h" #include "parser.tab.h" #include "quads.h" #include "util.h" -#define DCC_VERSION "0.2.0" +#define DCC_VERSION "0.2.0_opt" #define DCC_ARCHITECTURE "x86_32" #define BRED "\033[1;31m" @@ -32,11 +33,15 @@ FILE* tmp; static struct opt { bool debug; bool asm_out; + bool optimize; const char* out_file; const char* in_file; } opt = { + .debug = false, .asm_out = false, + .optimize = false, .out_file = NULL, + .in_file = NULL }; static void print_usage_additional() { @@ -61,6 +66,7 @@ static void print_usage() { "\n -h show extended usage" "\n -o output_file specify output file" "\n -S output assembly only" + "\n -O enable optimization." "\n -v debug mode:" "\n -v: enable INFO messages" "\n -vv: enable VERBOSE messages" @@ -74,7 +80,7 @@ static void print_usage() { static void get_options(int argc, char** argv) { int a; opterr = 0; - while ((a = getopt(argc, argv, "hvVSo:")) != -1) { + while ((a = getopt(argc, argv, "hvVSOo:")) != -1) { switch (a) { case 'h': print_usage(); @@ -93,6 +99,9 @@ static void get_options(int argc, char** argv) { case 'S': opt.asm_out = true; break; + case 'O': + opt.optimize = true; + break; case 'o': opt.out_file = optarg; break; @@ -207,6 +216,10 @@ int main(int argc, char** argv) { fclose(tmp); } -void parse_done_cb(const BBL* root) { +void parse_done_cb(BBL* root) { + if (opt.optimize) { + // optimization passes + dcc_optimize(root); + } asmgen(root, tmp); } diff --git a/main.h b/main.h index f83524b..4829e16 100644 --- a/main.h +++ b/main.h @@ -1,6 +1,6 @@ #ifndef MAIN_H #define MAIN_H -void parse_done_cb(const BBL* root); +void parse_done_cb(BBL* root); #endif diff --git a/optimize/opt_flatten_adjacent_mov.c b/optimize/opt_flatten_adjacent_mov.c index 9804516..0612f3f 100644 --- a/optimize/opt_flatten_adjacent_mov.c +++ b/optimize/opt_flatten_adjacent_mov.c @@ -8,8 +8,8 @@ unsigned total_replaced = 0; void try_replace_temp(astn* n, struct astn_qtemp* del, struct astn_qtemp* replace) { - if (n && n->type == ASTN_QTEMP && n->astn_qtemp.tempno == del->tempno) - n->astn_qtemp = *replace; + if (n && n->type == ASTN_QTEMP && n->Qtemp.tempno == del->tempno) + n->Qtemp = *replace; total_replaced++; } @@ -18,7 +18,7 @@ void opt_flatten_adjacent_mov(BBL* root, unsigned passes) { BBL* head = root; unsigned hits = 0; for (unsigned pass = 1; pass <= passes; pass++) { - fprintf(stderr, "Pass %d/%d running...\n", pass, passes); + // fprintf(stderr, "Pass %d/%d running...\n", pass, passes); unsigned this_hits = 0, quads_visited = 0; BBL* bbl = head; if (head == &bb_root) bbl = bbl_next(bbl); @@ -35,13 +35,13 @@ void opt_flatten_adjacent_mov(BBL* root, unsigned passes) { if ((q->op == Q_MOV && n->op == Q_MOV) && (q->target->type == ASTN_SYMPTR && q->src1->type == ASTN_QTEMP) && (n->target->type == ASTN_QTEMP && n->src1->type == ASTN_SYMPTR) && - (q->target->astn_symptr.e == n->src1->astn_symptr.e)) { + (q->target->Symptr.e == n->src1->Symptr.e)) { this_hits++; // we found the pattern. // now we will remove the second quad entirely // and replace all references to its target with the parent's source. - struct astn_qtemp* del = &n->target->astn_qtemp; - struct astn_qtemp* replace = &q->src1->astn_qtemp; + struct astn_qtemp* del = &n->target->Qtemp; + struct astn_qtemp* replace = &q->src1->Qtemp; //fprintf(stderr, "del is %u, replace is %u\n", del->tempno, replace->tempno); q->next = nn; @@ -58,7 +58,7 @@ void opt_flatten_adjacent_mov(BBL* root, unsigned passes) { } } hits += this_hits; - fprintf(stderr, "--> Quads visited: %d | hits for this pass: %d, all passes: %d\n", quads_visited, this_hits, hits); + // fprintf(stderr, "--> Quads visited: %d | hits for this pass: %d, all passes: %d\n", quads_visited, this_hits, hits); } BBL* bbl = head; if (head == &bb_root) bbl = bbl_next(bbl); @@ -74,5 +74,5 @@ void opt_flatten_adjacent_mov(BBL* root, unsigned passes) { remaining_quads++; } } - fprintf(stderr, "Remaining quads after optimization: %d\n", remaining_quads); + // fprintf(stderr, "Remaining quads after optimization: %d\n", remaining_quads); } diff --git a/optimize/optimization.c b/optimize/optimization.c new file mode 100644 index 0000000..fda241e --- /dev/null +++ b/optimize/optimization.c @@ -0,0 +1,7 @@ +#include "optimization.h" + +#include "opt_flatten_adjacent_mov.h" + +void dcc_optimize(BBL* head) { + opt_flatten_adjacent_mov(head, 1); +} diff --git a/optimize/optimization.h b/optimize/optimization.h new file mode 100644 index 0000000..f00b0db --- /dev/null +++ b/optimize/optimization.h @@ -0,0 +1,8 @@ +#ifndef OPTIMIZATION_H +#define OPTIMIZATION_H + +#include "quads_cf.h" + +void dcc_optimize(BBL* head); + +#endif diff --git a/target_x86/asmgen.c b/target_x86/asmgen.c index 4f9dc9e..a2d3408 100644 --- a/target_x86/asmgen.c +++ b/target_x86/asmgen.c @@ -241,10 +241,9 @@ void e_cbr(const char *op, quad* q) { void e_bba(const astn *n) { e_bb(n->Qbbno.bb); } void e_bb(const BB* b) { fprintf(out, "BB.%s.%d", b->fn, b->bbno); } -void asmgen(const BBL* head, FILE* f) { +void asmgen(BBL* head, FILE* f) { // init output file out = f; - opt_flatten_adjacent_mov(head, 2); fprintf(out, "# ASM OUTPUT\n# compiled poorly :)\n\n"); // init globals, except functions diff --git a/target_x86/asmgen.h b/target_x86/asmgen.h index 64309e9..6d6a90a 100644 --- a/target_x86/asmgen.h +++ b/target_x86/asmgen.h @@ -22,6 +22,6 @@ typedef struct adir { void e_cbr(const char *op, quad* q); void e_bba(const astn *n); void e_bb(const BB* b); -void asmgen(const BBL* bbl, FILE* out); +void asmgen(BBL* bbl, FILE* out); #endif