diff --git a/src/MODULE.bazel b/src/MODULE.bazel index 68bab589..05ce78f0 100644 --- a/src/MODULE.bazel +++ b/src/MODULE.bazel @@ -100,3 +100,6 @@ find_rpm = use_extension( use_repo(find_rpm, "rules_pkg_rpmbuild") register_toolchains("@rules_pkg_rpmbuild//:all") + +# Dependency for JSON +bazel_dep(name = "nlohmann_json", version = "3.12.0.bcr.1") diff --git a/src/MODULE.bazel.lock b/src/MODULE.bazel.lock index cfc97893..ea88a550 100644 --- a/src/MODULE.bazel.lock +++ b/src/MODULE.bazel.lock @@ -204,7 +204,8 @@ "https://bcr.bazel.build/modules/nanobind_bazel/2.4.0/MODULE.bazel": "414f1333ca83ad2b8e5fcb443942696f58b9c16d591af9afc4127998f7cc5860", "https://bcr.bazel.build/modules/nanobind_bazel/2.4.0/source.json": "0ace0d1b574cd4cf371196cbb98330ceeaf9a8e543cba95d9e714c491d11282a", "https://bcr.bazel.build/modules/nlohmann_json/3.11.3/MODULE.bazel": "87023db2f55fc3a9949c7b08dc711fae4d4be339a80a99d04453c4bb3998eefc", - "https://bcr.bazel.build/modules/nlohmann_json/3.11.3/source.json": "296c63a90c6813e53b3812d24245711981fc7e563d98fe15625f55181494488a", + "https://bcr.bazel.build/modules/nlohmann_json/3.12.0.bcr.1/MODULE.bazel": "a1c8bb07b5b91d971727c635f449d05623ac9608f6fe4f5f04254ea12f08e349", + "https://bcr.bazel.build/modules/nlohmann_json/3.12.0.bcr.1/source.json": "93f82a5ae985eb935c539bfee95e04767187818189241ac956f3ccadbdb8fb02", "https://bcr.bazel.build/modules/nlohmann_json/3.6.1/MODULE.bazel": "6f7b417dcc794d9add9e556673ad25cb3ba835224290f4f848f8e2db1e1fca74", "https://bcr.bazel.build/modules/opencensus-cpp/0.0.0-20230502-50eb5de.bcr.2/MODULE.bazel": "cc18734138dd18c912c6ce2a59186db28f85d8058c99c9f21b46ca3e0aba0ebe", "https://bcr.bazel.build/modules/opencensus-cpp/0.0.0-20230502-50eb5de.bcr.2/source.json": "7c135f9d42bb3b045669c3c6ab3bb3c208e00b46aca4422eea64c29811a5b240", diff --git a/src/db_adapter/BUILD b/src/db_adapter/BUILD index f0dd4689..5a0f3a40 100644 --- a/src/db_adapter/BUILD +++ b/src/db_adapter/BUILD @@ -6,9 +6,12 @@ cc_library( name = "db_adapter_lib", includes = ["."], deps = [ + ":context_loader", ":data_mapper", ":data_types", ":db_wrapper", + "//commons:commons_lib", + "//commons/atoms:atoms_lib", "//db_adapter/postgres:postgres_lib", ], ) @@ -19,6 +22,7 @@ cc_library( hdrs = ["DataMapper.h"], includes = ["."], deps = [ + ":data_types", "//commons:commons_lib", "//commons/atoms:atoms_lib", ], @@ -44,3 +48,15 @@ cc_library( "//commons/atoms:atoms_lib", ], ) + +cc_library( + name = "context_loader", + srcs = ["ContextLoader.cc"], + hdrs = ["ContextLoader.h"], + includes = ["."], + deps = [ + ":data_types", + "//commons:commons_lib", + "@nlohmann_json//:json", + ], +) diff --git a/src/db_adapter/ContextLoader.cc b/src/db_adapter/ContextLoader.cc new file mode 100644 index 00000000..049cb03f --- /dev/null +++ b/src/db_adapter/ContextLoader.cc @@ -0,0 +1,122 @@ +#include "ContextLoader.h" + +#include +#include +#include + +#include "Logger.h" +#include "Utils.h" + +using namespace std; +using namespace commons; + +using json = nlohmann::json; + +namespace fs = std::filesystem; + +vector ContextLoader::load_context_file(const string& file_path) { + if (!fs::exists(file_path)) { + Utils::error("Context file " + file_path + " does not exist"); + } + + ifstream f(file_path); + + json tables = json::parse(f); + + vector out; + + bool has_error = false; + + for (size_t i = 0; i < tables.size(); ++i) { + string msg_base = "table[" + to_string(i) + "]"; + + const json& table = tables[i]; + + TableMapping tm; + + if (!table.contains("table_name")) { + LOG_ERROR(msg_base + " missing required key: 'table_name'"); + has_error = true; + } else if (!table["table_name"].is_string()) { + LOG_ERROR(msg_base + ".table_name must be a string in a table entry"); + has_error = true; + } else { + string tn = table["table_name"].get(); + size_t count_dot = 0; + for (char c : tn) { + if (c == '.') ++count_dot; + } + if (count_dot != 1) { + LOG_ERROR(msg_base + ".table_name must be in format 'schema.table'"); + has_error = true; + } else { + size_t pos = tn.find('.'); + if (pos == 0 || pos + 1 >= tn.size()) { + LOG_ERROR(msg_base + "table_name must be in format 'schema.table'"); + has_error = true; + } + } + } + + if (!table.contains("skip_columns")) { + LOG_ERROR(msg_base + " missing required key: 'skip_columns'"); + has_error = true; + } else { + const json& sc = table["skip_columns"]; + if (!sc.is_null()) { + if (!sc.is_array()) { + LOG_ERROR(msg_base + + ".skip_columns must be an array of strings or null in a table entry"); + has_error = true; + } else { + tm.skip_columns.emplace(); + for (size_t k = 0; k < sc.size(); ++k) { + if (!sc[k].is_string()) { + LOG_ERROR(msg_base + ".skip_columns[" + to_string(k) + + "] must be a string in a table entry"); + has_error = true; + } else { + tm.skip_columns->push_back(sc[k]); + } + } + } + } + } + + if (!table.contains("where_clauses")) { + LOG_ERROR(msg_base + " missing required key: 'where_clauses'"); + has_error = true; + } else { + const json& wc = table["where_clauses"]; + if (!wc.is_null()) { + if (!wc.is_array()) { + LOG_ERROR(msg_base + + ".where_clauses must be an array of strings or null in a table entry"); + has_error = true; + } else { + tm.where_clauses.emplace(); + for (size_t k = 0; k < wc.size(); ++k) { + if (!wc[k].is_string()) { + LOG_ERROR(msg_base + ".where_clauses[" + to_string(k) + + "] must be a string in a table entry"); + has_error = true; + } else { + tm.where_clauses->push_back(wc[k]); + } + } + } + } + } + + if (!has_error) { + tm.table_name = table["table_name"]; + out.push_back(tm); + } + } + + if (has_error) { + LOG_ERROR("Context file validation failed with errors. Please fix the issues and try again."); + return vector{}; + } + return out; +} \ No newline at end of file diff --git a/src/db_adapter/ContextLoader.h b/src/db_adapter/ContextLoader.h new file mode 100644 index 00000000..758132ec --- /dev/null +++ b/src/db_adapter/ContextLoader.h @@ -0,0 +1,12 @@ +#include +#include + +#include "DataTypes.h" + +using namespace std; +using namespace db_adapter; + +class ContextLoader { + public: + static vector load_context_file(const string& file_path); +}; \ No newline at end of file diff --git a/src/db_adapter/DataTypes.h b/src/db_adapter/DataTypes.h index 9906be55..230be5af 100644 --- a/src/db_adapter/DataTypes.h +++ b/src/db_adapter/DataTypes.h @@ -91,4 +91,10 @@ struct Table { */ enum class MAPPER_TYPE { SQL2METTA, SQL2ATOMS }; +struct TableMapping { + string table_name; + optional> where_clauses = nullopt; + optional> skip_columns = nullopt; +}; + } // namespace db_adapter \ No newline at end of file diff --git a/src/tests/cpp/db_adapter_test.cc b/src/tests/cpp/db_adapter_test.cc index 7b3fb96a..30190ead 100644 --- a/src/tests/cpp/db_adapter_test.cc +++ b/src/tests/cpp/db_adapter_test.cc @@ -1,14 +1,16 @@ #include #include +#include #include #include #include #include #include "Atom.h" +#include "ContextLoader.h" +#include "DataTypes.h" #include "Logger.h" -#include "Node.h" #include "PostgresWrapper.h" #include "TestConfig.h" @@ -53,14 +55,61 @@ class PostgresWrapperTest : public ::testing::Test { int TOTAL_ROWS_CVTERMS = 10; int TOTAL_ROWS_FEATURES = 26; - void SetUp() override {} + void SetUp() override { + temp_file_path_1 = "/tmp/context_1.json"; + temp_file_path_2 = "/tmp/context_2.json"; + + ofstream file_1(temp_file_path_1); + file_1 << R"([ + { + "table_name": "public.organism", + "skip_columns": [], + "where_clauses": ["organism_id = 1"] + }, + { + "table_name": "public.feature", + "skip_columns": [], + "where_clauses": ["feature_id = 1"] + }, + { + "table_name": "public.cvterm", + "skip_columns": [], + "where_clauses": ["cvterm_id = 1"] + }])"; + file_1.close(); + + ofstream file_2(temp_file_path_2); + file_2 << R"([ + { + "table_name": "public.organism", + "skip_columns": [2, "genus"], + "where_clauses": ["organism_id = 1"] + }, + { + "table_name": "feature", + "skip_columns": [], + "where_clauses": ["feature_id = 1"] + }, + { + "table_name": "public.cvterm", + "skip_columns": [], + "where_clauses": ["cvterm_id = 1"] + }])"; + file_2.close(); + } - void TearDown() override {} + void TearDown() override { + std::remove(temp_file_path_1.c_str()); + std::remove(temp_file_path_2.c_str()); + } shared_ptr create_wrapper(MAPPER_TYPE mapper_type = MAPPER_TYPE::SQL2ATOMS) { return make_shared( TEST_HOST, TEST_PORT, TEST_DB, TEST_USER, TEST_PASSWORD, mapper_type); } + + string temp_file_path_1; + string temp_file_path_2; }; TEST_F(PostgresWrapperTest, Connection) { @@ -611,6 +660,34 @@ TEST_F(PostgresWrapperTest, MapSqlQueryWithInvalidClauseMetta) { EXPECT_EQ(wrapper->mapper_handle_trie_size(), 0); } +TEST_F(PostgresWrapperTest, MapTablesFirstRowAtomsWithContextFile) { + vector tables_mapping = ContextLoader::load_context_file("/tmp/context_1.json"); + + EXPECT_FALSE(tables_mapping.empty()); + + auto wrapper = create_wrapper(); + + vector atoms_sizes; + + for (const auto& tm : tables_mapping) { + string table_name = tm.table_name; + vector skip_columns = tm.skip_columns.value_or(vector{}); + vector where_clauses = tm.where_clauses.value_or(vector{}); + + Table table = wrapper->get_table(table_name); + EXPECT_NO_THROW({ wrapper->map_table(table, where_clauses, skip_columns, false); }); + atoms_sizes.push_back(wrapper->mapper_handle_trie_size()); + } + EXPECT_EQ(atoms_sizes.size(), 3); + EXPECT_EQ(atoms_sizes[0], 34); + EXPECT_EQ(atoms_sizes[1], 81); + EXPECT_EQ(atoms_sizes[2], 101); + + vector tables_mapping_2 = ContextLoader::load_context_file("/tmp/context_2.json"); + + EXPECT_TRUE(tables_mapping_2.empty()); +} + int main(int argc, char** argv) { ::testing::InitGoogleTest(&argc, argv); ::testing::AddGlobalTestEnvironment(new PostgresWrapperTestEnvironment);