From c7df9f1b9168d3ba35d257d1221a2179ffb94cf2 Mon Sep 17 00:00:00 2001 From: marcocapozzoli Date: Mon, 9 Feb 2026 07:44:02 -0300 Subject: [PATCH 1/3] Add ContextLoader --- src/MODULE.bazel | 3 + src/MODULE.bazel.lock | 3 +- src/db_adapter/BUILD | 14 ++ src/db_adapter/ContextLoader.h | 211 +++++++++++++++++++++++++++++++ src/db_adapter/DataTypes.h | 7 + src/tests/cpp/db_adapter_test.cc | 63 ++++++++- 6 files changed, 298 insertions(+), 3 deletions(-) create mode 100644 src/db_adapter/ContextLoader.h diff --git a/src/MODULE.bazel b/src/MODULE.bazel index 68bab589..05ce78f0 100644 --- a/src/MODULE.bazel +++ b/src/MODULE.bazel @@ -100,3 +100,6 @@ find_rpm = use_extension( use_repo(find_rpm, "rules_pkg_rpmbuild") register_toolchains("@rules_pkg_rpmbuild//:all") + +# Dependency for JSON +bazel_dep(name = "nlohmann_json", version = "3.12.0.bcr.1") diff --git a/src/MODULE.bazel.lock b/src/MODULE.bazel.lock index cfc97893..ea88a550 100644 --- a/src/MODULE.bazel.lock +++ b/src/MODULE.bazel.lock @@ -204,7 +204,8 @@ "https://bcr.bazel.build/modules/nanobind_bazel/2.4.0/MODULE.bazel": "414f1333ca83ad2b8e5fcb443942696f58b9c16d591af9afc4127998f7cc5860", "https://bcr.bazel.build/modules/nanobind_bazel/2.4.0/source.json": "0ace0d1b574cd4cf371196cbb98330ceeaf9a8e543cba95d9e714c491d11282a", "https://bcr.bazel.build/modules/nlohmann_json/3.11.3/MODULE.bazel": "87023db2f55fc3a9949c7b08dc711fae4d4be339a80a99d04453c4bb3998eefc", - "https://bcr.bazel.build/modules/nlohmann_json/3.11.3/source.json": "296c63a90c6813e53b3812d24245711981fc7e563d98fe15625f55181494488a", + "https://bcr.bazel.build/modules/nlohmann_json/3.12.0.bcr.1/MODULE.bazel": "a1c8bb07b5b91d971727c635f449d05623ac9608f6fe4f5f04254ea12f08e349", + "https://bcr.bazel.build/modules/nlohmann_json/3.12.0.bcr.1/source.json": "93f82a5ae985eb935c539bfee95e04767187818189241ac956f3ccadbdb8fb02", "https://bcr.bazel.build/modules/nlohmann_json/3.6.1/MODULE.bazel": "6f7b417dcc794d9add9e556673ad25cb3ba835224290f4f848f8e2db1e1fca74", "https://bcr.bazel.build/modules/opencensus-cpp/0.0.0-20230502-50eb5de.bcr.2/MODULE.bazel": "cc18734138dd18c912c6ce2a59186db28f85d8058c99c9f21b46ca3e0aba0ebe", "https://bcr.bazel.build/modules/opencensus-cpp/0.0.0-20230502-50eb5de.bcr.2/source.json": "7c135f9d42bb3b045669c3c6ab3bb3c208e00b46aca4422eea64c29811a5b240", diff --git a/src/db_adapter/BUILD b/src/db_adapter/BUILD index f0dd4689..5ac293d5 100644 --- a/src/db_adapter/BUILD +++ b/src/db_adapter/BUILD @@ -6,9 +6,12 @@ cc_library( name = "db_adapter_lib", includes = ["."], deps = [ + ":context_loader", ":data_mapper", ":data_types", ":db_wrapper", + "//commons:commons_lib", + "//commons/atoms:atoms_lib", "//db_adapter/postgres:postgres_lib", ], ) @@ -19,6 +22,7 @@ cc_library( hdrs = ["DataMapper.h"], includes = ["."], deps = [ + ":data_types", "//commons:commons_lib", "//commons/atoms:atoms_lib", ], @@ -44,3 +48,13 @@ cc_library( "//commons/atoms:atoms_lib", ], ) + +cc_library( + name = "context_loader", + hdrs = ["ContextLoader.h"], + includes = ["."], + deps = [ + "//commons:commons_lib", + "@nlohmann_json//:json", + ], +) diff --git a/src/db_adapter/ContextLoader.h b/src/db_adapter/ContextLoader.h new file mode 100644 index 00000000..d2e2a116 --- /dev/null +++ b/src/db_adapter/ContextLoader.h @@ -0,0 +1,211 @@ +#include +#include +#include +#include +#include + +#include "DataTypes.h" +#include "Logger.h" +#include "Utils.h" + +using namespace std; +using namespace commons; +using namespace db_adapter; +using json = nlohmann::json; + +namespace fs = std::filesystem; + +bool load_context_file(const string& file_path, vector& out) { + out.clear(); + + bool has_error = false; + + if (!fs::exists(file_path)) { + LOG_ERROR("Context file " + file_path + " does not exist"); + has_error = true; + } + + ifstream f(file_path); + + json contexts = json::parse(f); + + for (size_t i = 0; i < contexts.size(); ++i) { + string msg_base = "contexts[" + to_string(i) + "]"; + + const json& ctx = contexts[i]; + + if (!ctx.contains("type")) { + LOG_ERROR(msg_base + " missing required key: 'type'"); + has_error = true; + continue; + } + + if (!ctx["type"].is_number_integer()) { + LOG_ERROR(msg_base + " 'type' must be integer 1 or 2"); + has_error = true; + continue; + } + + int type = ctx["type"].get(); + + if (type != 1 && type != 2) { + LOG_ERROR(msg_base + " 'type' must be either 1 or 2"); + has_error = true; + continue; + } + + if (type == 1) { + if (!ctx.contains("tables")) { + LOG_ERROR(msg_base + " (type 1) missing required key: 'tables'"); + has_error = true; + continue; + } + if (!ctx["tables"].is_array()) { + LOG_ERROR(msg_base + ".tables must be an array in context of type 1"); + has_error = true; + continue; + } + + const json& tables = ctx["tables"]; + + TableMapping tm; + + for (size_t t = 0; t < tables.size(); ++t) { + string msg_tbase = msg_base + ".tables[" + to_string(t) + "]"; + + const json& table = tables[t]; + + if (!table.contains("table_name")) { + LOG_ERROR(msg_tbase + " missing required key: 'table_name'"); + has_error = true; + } else if (!table["table_name"].is_string()) { + LOG_ERROR(msg_tbase + ".table_name must be a string in a table entry"); + has_error = true; + } else { + string tn = table["table_name"].get(); + size_t count_dot = 0; + for (char c : tn) + if (c == '.') ++count_dot; + if (count_dot != 1) { + LOG_ERROR(msg_tbase + + ".table_name must be in format 'schema.table' (single dot)"); + has_error = true; + } else { + size_t pos = tn.find('.'); + if (pos == 0 || pos + 1 >= tn.size()) { + LOG_ERROR(msg_tbase + + ".table_name parts must not be empty in a table entry"); + has_error = true; + } + } + } + + tm.table_name = table["table_name"]; + + if (!table.contains("skip_columns")) { + LOG_ERROR(msg_tbase + " missing required key: 'skip_columns'"); + has_error = true; + } else { + const json& sc = table["skip_columns"]; + if (!sc.is_null()) { + if (!sc.is_array()) { + LOG_ERROR( + msg_tbase + + ".skip_columns must be an array of strings or null in a table entry"); + has_error = true; + } else { + tm.skip_columns.emplace(); + for (size_t k = 0; k < sc.size(); ++k) { + if (!sc[k].is_string()) { + LOG_ERROR(msg_tbase + ".skip_columns[" + to_string(k) + + "] must be a string in a table entry"); + has_error = true; + } + tm.skip_columns->push_back(sc[k]); + } + } + } + } + + if (!table.contains("where_clauses")) { + LOG_ERROR(msg_tbase + " missing required key: 'where_clauses'"); + has_error = true; + } else { + const json& wc = table["where_clauses"]; + if (!wc.is_null()) { + if (!wc.is_array()) { + LOG_ERROR( + msg_tbase + + ".where_clauses must be an array of strings or null in a table entry"); + has_error = true; + } else { + tm.where_clauses.emplace(); + for (size_t k = 0; k < wc.size(); ++k) { + if (!wc[k].is_string()) { + LOG_ERROR(msg_tbase + ".where_clauses[" + to_string(k) + + "] must be a string in a table entry"); + has_error = true; + } + tm.where_clauses->push_back(wc[k]); + } + } + } + } + + out.push_back(tm); + } + } else if (type == 2) { + if (!ctx.contains("queries")) { + LOG_ERROR(msg_base + " (type 2) missing required key: 'queries'"); + has_error = true; + continue; + } + + if (!ctx["queries"].is_array()) { + LOG_ERROR(msg_base + ".queries must be an array"); + has_error = true; + continue; + } + + const json& queries = ctx["queries"]; + + TableMapping tmq; + + for (size_t q = 0; q < queries.size(); ++q) { + string msg_qbase = msg_base + ".queries[" + to_string(q) + "]"; + + const json& query = queries[q]; + + if (!query.contains("virtual_name")) { + LOG_ERROR(msg_qbase + " missing required key: 'virtual_name'."); + has_error = true; + } else if (!query["virtual_name"].is_string()) { + LOG_ERROR(msg_qbase + ".virtual_name must be a string."); + has_error = true; + } + + tmq.table_name = query["virtual_name"]; + + if (!query.contains("query")) { + LOG_ERROR(msg_qbase + " missing required key: 'query'."); + has_error = true; + } else if (!query["query"].is_string()) { + LOG_ERROR(msg_qbase + ".query must be a string."); + has_error = true; + } + + tmq.query = query["query"]; + + out.push_back(tmq); + } + } else { + LOG_INFO("Type unknown"); + } + } + + if (has_error) { + LOG_ERROR("Context file validation failed with errors. Please fix the issues and try again."); + return false; + } + return true; +} \ No newline at end of file diff --git a/src/db_adapter/DataTypes.h b/src/db_adapter/DataTypes.h index 9906be55..7cb24463 100644 --- a/src/db_adapter/DataTypes.h +++ b/src/db_adapter/DataTypes.h @@ -91,4 +91,11 @@ struct Table { */ enum class MAPPER_TYPE { SQL2METTA, SQL2ATOMS }; +struct TableMapping { + string table_name; + optional> where_clauses = nullopt; + optional> skip_columns = nullopt; + optional query = nullopt; +}; + } // namespace db_adapter \ No newline at end of file diff --git a/src/tests/cpp/db_adapter_test.cc b/src/tests/cpp/db_adapter_test.cc index 7b3fb96a..30b8531d 100644 --- a/src/tests/cpp/db_adapter_test.cc +++ b/src/tests/cpp/db_adapter_test.cc @@ -7,6 +7,8 @@ #include #include "Atom.h" +#include "ContextLoader.h" +#include "DataTypes.h" #include "Logger.h" #include "Node.h" #include "PostgresWrapper.h" @@ -53,14 +55,43 @@ class PostgresWrapperTest : public ::testing::Test { int TOTAL_ROWS_CVTERMS = 10; int TOTAL_ROWS_FEATURES = 26; - void SetUp() override {} + void SetUp() override { + temp_file_path = "/tmp/context.json"; + + ofstream file(temp_file_path); + file << R"([ + { + "type": 1, + "tables": [ + { + "table_name": "public.organism", + "skip_columns": [], + "where_clauses": ["organism_id = 1"] + }, + { + "table_name": "public.feature", + "skip_columns": [], + "where_clauses": ["feature_id = 1"] + }, + { + "table_name": "public.cvterm", + "skip_columns": [], + "where_clauses": ["cvterm_id = 1"] + } + ] + } + ])"; + file.close(); + } - void TearDown() override {} + void TearDown() override { std::remove(temp_file_path.c_str()); } shared_ptr create_wrapper(MAPPER_TYPE mapper_type = MAPPER_TYPE::SQL2ATOMS) { return make_shared( TEST_HOST, TEST_PORT, TEST_DB, TEST_USER, TEST_PASSWORD, mapper_type); } + + string temp_file_path; }; TEST_F(PostgresWrapperTest, Connection) { @@ -611,6 +642,34 @@ TEST_F(PostgresWrapperTest, MapSqlQueryWithInvalidClauseMetta) { EXPECT_EQ(wrapper->mapper_handle_trie_size(), 0); } +TEST_F(PostgresWrapperTest, MapTablesFirstRowAtomsWithContextFile) { + vector tables_mapping; + + if (!load_context_file("/tmp/context.json", tables_mapping)) { + Utils::error("Failed to load context file. Aborting test."); + } + + auto wrapper = create_wrapper(); + + vector atoms_sizes; + + for (const auto& tm : tables_mapping) { + if (!tm.query.has_value()) { + string table_name = tm.table_name; + vector skip_columns = tm.skip_columns.value_or(vector{}); + vector where_clauses = tm.where_clauses.value_or(vector{}); + + Table table = wrapper->get_table(table_name); + EXPECT_NO_THROW({ wrapper->map_table(table, where_clauses, skip_columns, false); }); + atoms_sizes.push_back(wrapper->mapper_handle_trie_size()); + } + } + EXPECT_EQ(atoms_sizes.size(), 3); + EXPECT_EQ(atoms_sizes[0], 34); + EXPECT_EQ(atoms_sizes[1], 81); + EXPECT_EQ(atoms_sizes[2], 101); +} + int main(int argc, char** argv) { ::testing::InitGoogleTest(&argc, argv); ::testing::AddGlobalTestEnvironment(new PostgresWrapperTestEnvironment); From edf7523d960239e482152c40ff790d2c6bbe0a6b Mon Sep 17 00:00:00 2001 From: marcocapozzoli Date: Mon, 9 Feb 2026 16:38:51 -0300 Subject: [PATCH 2/3] adjust .json file --- src/db_adapter/ContextLoader.h | 205 +++++++++---------------------- src/db_adapter/DataTypes.h | 1 - src/tests/cpp/db_adapter_test.cc | 50 ++++---- 3 files changed, 78 insertions(+), 178 deletions(-) diff --git a/src/db_adapter/ContextLoader.h b/src/db_adapter/ContextLoader.h index d2e2a116..2d1c2d44 100644 --- a/src/db_adapter/ContextLoader.h +++ b/src/db_adapter/ContextLoader.h @@ -27,180 +27,89 @@ bool load_context_file(const string& file_path, vector& out) { ifstream f(file_path); - json contexts = json::parse(f); + json tables = json::parse(f); - for (size_t i = 0; i < contexts.size(); ++i) { - string msg_base = "contexts[" + to_string(i) + "]"; + for (size_t i = 0; i < tables.size(); ++i) { + string msg_base = "table[" + to_string(i) + "]"; - const json& ctx = contexts[i]; + const json& table = tables[i]; - if (!ctx.contains("type")) { - LOG_ERROR(msg_base + " missing required key: 'type'"); - has_error = true; - continue; - } + TableMapping tm; - if (!ctx["type"].is_number_integer()) { - LOG_ERROR(msg_base + " 'type' must be integer 1 or 2"); + if (!table.contains("table_name")) { + LOG_ERROR(msg_base + " missing required key: 'table_name'"); has_error = true; - continue; - } - - int type = ctx["type"].get(); - - if (type != 1 && type != 2) { - LOG_ERROR(msg_base + " 'type' must be either 1 or 2"); + } else if (!table["table_name"].is_string()) { + LOG_ERROR(msg_base + ".table_name must be a string in a table entry"); has_error = true; - continue; - } - - if (type == 1) { - if (!ctx.contains("tables")) { - LOG_ERROR(msg_base + " (type 1) missing required key: 'tables'"); - has_error = true; - continue; - } - if (!ctx["tables"].is_array()) { - LOG_ERROR(msg_base + ".tables must be an array in context of type 1"); + } else { + string tn = table["table_name"].get(); + size_t count_dot = 0; + for (char c : tn) + if (c == '.') ++count_dot; + if (count_dot != 1) { + LOG_ERROR(msg_base + ".table_name must be in format 'schema.table' (single dot)"); has_error = true; - continue; - } - - const json& tables = ctx["tables"]; - - TableMapping tm; - - for (size_t t = 0; t < tables.size(); ++t) { - string msg_tbase = msg_base + ".tables[" + to_string(t) + "]"; - - const json& table = tables[t]; - - if (!table.contains("table_name")) { - LOG_ERROR(msg_tbase + " missing required key: 'table_name'"); - has_error = true; - } else if (!table["table_name"].is_string()) { - LOG_ERROR(msg_tbase + ".table_name must be a string in a table entry"); + } else { + size_t pos = tn.find('.'); + if (pos == 0 || pos + 1 >= tn.size()) { + LOG_ERROR(msg_base + ".table_name parts must not be empty in a table entry"); has_error = true; - } else { - string tn = table["table_name"].get(); - size_t count_dot = 0; - for (char c : tn) - if (c == '.') ++count_dot; - if (count_dot != 1) { - LOG_ERROR(msg_tbase + - ".table_name must be in format 'schema.table' (single dot)"); - has_error = true; - } else { - size_t pos = tn.find('.'); - if (pos == 0 || pos + 1 >= tn.size()) { - LOG_ERROR(msg_tbase + - ".table_name parts must not be empty in a table entry"); - has_error = true; - } - } } + } + } - tm.table_name = table["table_name"]; + tm.table_name = table["table_name"]; - if (!table.contains("skip_columns")) { - LOG_ERROR(msg_tbase + " missing required key: 'skip_columns'"); + if (!table.contains("skip_columns")) { + LOG_ERROR(msg_base + " missing required key: 'skip_columns'"); + has_error = true; + } else { + const json& sc = table["skip_columns"]; + if (!sc.is_null()) { + if (!sc.is_array()) { + LOG_ERROR(msg_base + + ".skip_columns must be an array of strings or null in a table entry"); has_error = true; } else { - const json& sc = table["skip_columns"]; - if (!sc.is_null()) { - if (!sc.is_array()) { - LOG_ERROR( - msg_tbase + - ".skip_columns must be an array of strings or null in a table entry"); + tm.skip_columns.emplace(); + for (size_t k = 0; k < sc.size(); ++k) { + if (!sc[k].is_string()) { + LOG_ERROR(msg_base + ".skip_columns[" + to_string(k) + + "] must be a string in a table entry"); has_error = true; - } else { - tm.skip_columns.emplace(); - for (size_t k = 0; k < sc.size(); ++k) { - if (!sc[k].is_string()) { - LOG_ERROR(msg_tbase + ".skip_columns[" + to_string(k) + - "] must be a string in a table entry"); - has_error = true; - } - tm.skip_columns->push_back(sc[k]); - } } + tm.skip_columns->push_back(sc[k]); } } + } + } - if (!table.contains("where_clauses")) { - LOG_ERROR(msg_tbase + " missing required key: 'where_clauses'"); + if (!table.contains("where_clauses")) { + LOG_ERROR(msg_base + " missing required key: 'where_clauses'"); + has_error = true; + } else { + const json& wc = table["where_clauses"]; + if (!wc.is_null()) { + if (!wc.is_array()) { + LOG_ERROR(msg_base + + ".where_clauses must be an array of strings or null in a table entry"); has_error = true; } else { - const json& wc = table["where_clauses"]; - if (!wc.is_null()) { - if (!wc.is_array()) { - LOG_ERROR( - msg_tbase + - ".where_clauses must be an array of strings or null in a table entry"); + tm.where_clauses.emplace(); + for (size_t k = 0; k < wc.size(); ++k) { + if (!wc[k].is_string()) { + LOG_ERROR(msg_base + ".where_clauses[" + to_string(k) + + "] must be a string in a table entry"); has_error = true; - } else { - tm.where_clauses.emplace(); - for (size_t k = 0; k < wc.size(); ++k) { - if (!wc[k].is_string()) { - LOG_ERROR(msg_tbase + ".where_clauses[" + to_string(k) + - "] must be a string in a table entry"); - has_error = true; - } - tm.where_clauses->push_back(wc[k]); - } } + tm.where_clauses->push_back(wc[k]); } } - - out.push_back(tm); - } - } else if (type == 2) { - if (!ctx.contains("queries")) { - LOG_ERROR(msg_base + " (type 2) missing required key: 'queries'"); - has_error = true; - continue; - } - - if (!ctx["queries"].is_array()) { - LOG_ERROR(msg_base + ".queries must be an array"); - has_error = true; - continue; - } - - const json& queries = ctx["queries"]; - - TableMapping tmq; - - for (size_t q = 0; q < queries.size(); ++q) { - string msg_qbase = msg_base + ".queries[" + to_string(q) + "]"; - - const json& query = queries[q]; - - if (!query.contains("virtual_name")) { - LOG_ERROR(msg_qbase + " missing required key: 'virtual_name'."); - has_error = true; - } else if (!query["virtual_name"].is_string()) { - LOG_ERROR(msg_qbase + ".virtual_name must be a string."); - has_error = true; - } - - tmq.table_name = query["virtual_name"]; - - if (!query.contains("query")) { - LOG_ERROR(msg_qbase + " missing required key: 'query'."); - has_error = true; - } else if (!query["query"].is_string()) { - LOG_ERROR(msg_qbase + ".query must be a string."); - has_error = true; - } - - tmq.query = query["query"]; - - out.push_back(tmq); } - } else { - LOG_INFO("Type unknown"); } + + out.push_back(tm); } if (has_error) { diff --git a/src/db_adapter/DataTypes.h b/src/db_adapter/DataTypes.h index 7cb24463..230be5af 100644 --- a/src/db_adapter/DataTypes.h +++ b/src/db_adapter/DataTypes.h @@ -95,7 +95,6 @@ struct TableMapping { string table_name; optional> where_clauses = nullopt; optional> skip_columns = nullopt; - optional query = nullopt; }; } // namespace db_adapter \ No newline at end of file diff --git a/src/tests/cpp/db_adapter_test.cc b/src/tests/cpp/db_adapter_test.cc index 30b8531d..2b40dfc3 100644 --- a/src/tests/cpp/db_adapter_test.cc +++ b/src/tests/cpp/db_adapter_test.cc @@ -61,26 +61,20 @@ class PostgresWrapperTest : public ::testing::Test { ofstream file(temp_file_path); file << R"([ { - "type": 1, - "tables": [ - { - "table_name": "public.organism", - "skip_columns": [], - "where_clauses": ["organism_id = 1"] - }, - { - "table_name": "public.feature", - "skip_columns": [], - "where_clauses": ["feature_id = 1"] - }, - { - "table_name": "public.cvterm", - "skip_columns": [], - "where_clauses": ["cvterm_id = 1"] - } - ] - } - ])"; + "table_name": "public.organism", + "skip_columns": [], + "where_clauses": ["organism_id = 1"] + }, + { + "table_name": "public.feature", + "skip_columns": [], + "where_clauses": ["feature_id = 1"] + }, + { + "table_name": "public.cvterm", + "skip_columns": [], + "where_clauses": ["cvterm_id = 1"] + }])"; file.close(); } @@ -654,15 +648,13 @@ TEST_F(PostgresWrapperTest, MapTablesFirstRowAtomsWithContextFile) { vector atoms_sizes; for (const auto& tm : tables_mapping) { - if (!tm.query.has_value()) { - string table_name = tm.table_name; - vector skip_columns = tm.skip_columns.value_or(vector{}); - vector where_clauses = tm.where_clauses.value_or(vector{}); - - Table table = wrapper->get_table(table_name); - EXPECT_NO_THROW({ wrapper->map_table(table, where_clauses, skip_columns, false); }); - atoms_sizes.push_back(wrapper->mapper_handle_trie_size()); - } + string table_name = tm.table_name; + vector skip_columns = tm.skip_columns.value_or(vector{}); + vector where_clauses = tm.where_clauses.value_or(vector{}); + + Table table = wrapper->get_table(table_name); + EXPECT_NO_THROW({ wrapper->map_table(table, where_clauses, skip_columns, false); }); + atoms_sizes.push_back(wrapper->mapper_handle_trie_size()); } EXPECT_EQ(atoms_sizes.size(), 3); EXPECT_EQ(atoms_sizes[0], 34); From 19c1cdfb56ba810a023e6cec8f04c6d02657aa82 Mon Sep 17 00:00:00 2001 From: marcocapozzoli Date: Wed, 11 Feb 2026 20:43:06 -0300 Subject: [PATCH 3/3] add ContextLoad class --- src/db_adapter/BUILD | 2 + src/db_adapter/ContextLoader.cc | 122 +++++++++++++++++++++++++++++++ src/db_adapter/ContextLoader.h | 116 +---------------------------- src/tests/cpp/db_adapter_test.cc | 48 +++++++++--- 4 files changed, 165 insertions(+), 123 deletions(-) create mode 100644 src/db_adapter/ContextLoader.cc diff --git a/src/db_adapter/BUILD b/src/db_adapter/BUILD index 5ac293d5..5a0f3a40 100644 --- a/src/db_adapter/BUILD +++ b/src/db_adapter/BUILD @@ -51,9 +51,11 @@ cc_library( cc_library( name = "context_loader", + srcs = ["ContextLoader.cc"], hdrs = ["ContextLoader.h"], includes = ["."], deps = [ + ":data_types", "//commons:commons_lib", "@nlohmann_json//:json", ], diff --git a/src/db_adapter/ContextLoader.cc b/src/db_adapter/ContextLoader.cc new file mode 100644 index 00000000..049cb03f --- /dev/null +++ b/src/db_adapter/ContextLoader.cc @@ -0,0 +1,122 @@ +#include "ContextLoader.h" + +#include +#include +#include + +#include "Logger.h" +#include "Utils.h" + +using namespace std; +using namespace commons; + +using json = nlohmann::json; + +namespace fs = std::filesystem; + +vector ContextLoader::load_context_file(const string& file_path) { + if (!fs::exists(file_path)) { + Utils::error("Context file " + file_path + " does not exist"); + } + + ifstream f(file_path); + + json tables = json::parse(f); + + vector out; + + bool has_error = false; + + for (size_t i = 0; i < tables.size(); ++i) { + string msg_base = "table[" + to_string(i) + "]"; + + const json& table = tables[i]; + + TableMapping tm; + + if (!table.contains("table_name")) { + LOG_ERROR(msg_base + " missing required key: 'table_name'"); + has_error = true; + } else if (!table["table_name"].is_string()) { + LOG_ERROR(msg_base + ".table_name must be a string in a table entry"); + has_error = true; + } else { + string tn = table["table_name"].get(); + size_t count_dot = 0; + for (char c : tn) { + if (c == '.') ++count_dot; + } + if (count_dot != 1) { + LOG_ERROR(msg_base + ".table_name must be in format 'schema.table'"); + has_error = true; + } else { + size_t pos = tn.find('.'); + if (pos == 0 || pos + 1 >= tn.size()) { + LOG_ERROR(msg_base + "table_name must be in format 'schema.table'"); + has_error = true; + } + } + } + + if (!table.contains("skip_columns")) { + LOG_ERROR(msg_base + " missing required key: 'skip_columns'"); + has_error = true; + } else { + const json& sc = table["skip_columns"]; + if (!sc.is_null()) { + if (!sc.is_array()) { + LOG_ERROR(msg_base + + ".skip_columns must be an array of strings or null in a table entry"); + has_error = true; + } else { + tm.skip_columns.emplace(); + for (size_t k = 0; k < sc.size(); ++k) { + if (!sc[k].is_string()) { + LOG_ERROR(msg_base + ".skip_columns[" + to_string(k) + + "] must be a string in a table entry"); + has_error = true; + } else { + tm.skip_columns->push_back(sc[k]); + } + } + } + } + } + + if (!table.contains("where_clauses")) { + LOG_ERROR(msg_base + " missing required key: 'where_clauses'"); + has_error = true; + } else { + const json& wc = table["where_clauses"]; + if (!wc.is_null()) { + if (!wc.is_array()) { + LOG_ERROR(msg_base + + ".where_clauses must be an array of strings or null in a table entry"); + has_error = true; + } else { + tm.where_clauses.emplace(); + for (size_t k = 0; k < wc.size(); ++k) { + if (!wc[k].is_string()) { + LOG_ERROR(msg_base + ".where_clauses[" + to_string(k) + + "] must be a string in a table entry"); + has_error = true; + } else { + tm.where_clauses->push_back(wc[k]); + } + } + } + } + } + + if (!has_error) { + tm.table_name = table["table_name"]; + out.push_back(tm); + } + } + + if (has_error) { + LOG_ERROR("Context file validation failed with errors. Please fix the issues and try again."); + return vector{}; + } + return out; +} \ No newline at end of file diff --git a/src/db_adapter/ContextLoader.h b/src/db_adapter/ContextLoader.h index 2d1c2d44..758132ec 100644 --- a/src/db_adapter/ContextLoader.h +++ b/src/db_adapter/ContextLoader.h @@ -1,120 +1,12 @@ -#include -#include -#include #include #include #include "DataTypes.h" -#include "Logger.h" -#include "Utils.h" using namespace std; -using namespace commons; using namespace db_adapter; -using json = nlohmann::json; -namespace fs = std::filesystem; - -bool load_context_file(const string& file_path, vector& out) { - out.clear(); - - bool has_error = false; - - if (!fs::exists(file_path)) { - LOG_ERROR("Context file " + file_path + " does not exist"); - has_error = true; - } - - ifstream f(file_path); - - json tables = json::parse(f); - - for (size_t i = 0; i < tables.size(); ++i) { - string msg_base = "table[" + to_string(i) + "]"; - - const json& table = tables[i]; - - TableMapping tm; - - if (!table.contains("table_name")) { - LOG_ERROR(msg_base + " missing required key: 'table_name'"); - has_error = true; - } else if (!table["table_name"].is_string()) { - LOG_ERROR(msg_base + ".table_name must be a string in a table entry"); - has_error = true; - } else { - string tn = table["table_name"].get(); - size_t count_dot = 0; - for (char c : tn) - if (c == '.') ++count_dot; - if (count_dot != 1) { - LOG_ERROR(msg_base + ".table_name must be in format 'schema.table' (single dot)"); - has_error = true; - } else { - size_t pos = tn.find('.'); - if (pos == 0 || pos + 1 >= tn.size()) { - LOG_ERROR(msg_base + ".table_name parts must not be empty in a table entry"); - has_error = true; - } - } - } - - tm.table_name = table["table_name"]; - - if (!table.contains("skip_columns")) { - LOG_ERROR(msg_base + " missing required key: 'skip_columns'"); - has_error = true; - } else { - const json& sc = table["skip_columns"]; - if (!sc.is_null()) { - if (!sc.is_array()) { - LOG_ERROR(msg_base + - ".skip_columns must be an array of strings or null in a table entry"); - has_error = true; - } else { - tm.skip_columns.emplace(); - for (size_t k = 0; k < sc.size(); ++k) { - if (!sc[k].is_string()) { - LOG_ERROR(msg_base + ".skip_columns[" + to_string(k) + - "] must be a string in a table entry"); - has_error = true; - } - tm.skip_columns->push_back(sc[k]); - } - } - } - } - - if (!table.contains("where_clauses")) { - LOG_ERROR(msg_base + " missing required key: 'where_clauses'"); - has_error = true; - } else { - const json& wc = table["where_clauses"]; - if (!wc.is_null()) { - if (!wc.is_array()) { - LOG_ERROR(msg_base + - ".where_clauses must be an array of strings or null in a table entry"); - has_error = true; - } else { - tm.where_clauses.emplace(); - for (size_t k = 0; k < wc.size(); ++k) { - if (!wc[k].is_string()) { - LOG_ERROR(msg_base + ".where_clauses[" + to_string(k) + - "] must be a string in a table entry"); - has_error = true; - } - tm.where_clauses->push_back(wc[k]); - } - } - } - } - - out.push_back(tm); - } - - if (has_error) { - LOG_ERROR("Context file validation failed with errors. Please fix the issues and try again."); - return false; - } - return true; -} \ No newline at end of file +class ContextLoader { + public: + static vector load_context_file(const string& file_path); +}; \ No newline at end of file diff --git a/src/tests/cpp/db_adapter_test.cc b/src/tests/cpp/db_adapter_test.cc index 2b40dfc3..30190ead 100644 --- a/src/tests/cpp/db_adapter_test.cc +++ b/src/tests/cpp/db_adapter_test.cc @@ -1,6 +1,7 @@ #include #include +#include #include #include #include @@ -10,7 +11,6 @@ #include "ContextLoader.h" #include "DataTypes.h" #include "Logger.h" -#include "Node.h" #include "PostgresWrapper.h" #include "TestConfig.h" @@ -56,10 +56,11 @@ class PostgresWrapperTest : public ::testing::Test { int TOTAL_ROWS_FEATURES = 26; void SetUp() override { - temp_file_path = "/tmp/context.json"; + temp_file_path_1 = "/tmp/context_1.json"; + temp_file_path_2 = "/tmp/context_2.json"; - ofstream file(temp_file_path); - file << R"([ + ofstream file_1(temp_file_path_1); + file_1 << R"([ { "table_name": "public.organism", "skip_columns": [], @@ -75,17 +76,40 @@ class PostgresWrapperTest : public ::testing::Test { "skip_columns": [], "where_clauses": ["cvterm_id = 1"] }])"; - file.close(); + file_1.close(); + + ofstream file_2(temp_file_path_2); + file_2 << R"([ + { + "table_name": "public.organism", + "skip_columns": [2, "genus"], + "where_clauses": ["organism_id = 1"] + }, + { + "table_name": "feature", + "skip_columns": [], + "where_clauses": ["feature_id = 1"] + }, + { + "table_name": "public.cvterm", + "skip_columns": [], + "where_clauses": ["cvterm_id = 1"] + }])"; + file_2.close(); } - void TearDown() override { std::remove(temp_file_path.c_str()); } + void TearDown() override { + std::remove(temp_file_path_1.c_str()); + std::remove(temp_file_path_2.c_str()); + } shared_ptr create_wrapper(MAPPER_TYPE mapper_type = MAPPER_TYPE::SQL2ATOMS) { return make_shared( TEST_HOST, TEST_PORT, TEST_DB, TEST_USER, TEST_PASSWORD, mapper_type); } - string temp_file_path; + string temp_file_path_1; + string temp_file_path_2; }; TEST_F(PostgresWrapperTest, Connection) { @@ -637,11 +661,9 @@ TEST_F(PostgresWrapperTest, MapSqlQueryWithInvalidClauseMetta) { } TEST_F(PostgresWrapperTest, MapTablesFirstRowAtomsWithContextFile) { - vector tables_mapping; + vector tables_mapping = ContextLoader::load_context_file("/tmp/context_1.json"); - if (!load_context_file("/tmp/context.json", tables_mapping)) { - Utils::error("Failed to load context file. Aborting test."); - } + EXPECT_FALSE(tables_mapping.empty()); auto wrapper = create_wrapper(); @@ -660,6 +682,10 @@ TEST_F(PostgresWrapperTest, MapTablesFirstRowAtomsWithContextFile) { EXPECT_EQ(atoms_sizes[0], 34); EXPECT_EQ(atoms_sizes[1], 81); EXPECT_EQ(atoms_sizes[2], 101); + + vector tables_mapping_2 = ContextLoader::load_context_file("/tmp/context_2.json"); + + EXPECT_TRUE(tables_mapping_2.empty()); } int main(int argc, char** argv) {