diff --git a/src/BUILD b/src/BUILD index fea3d30b3..095f790a8 100644 --- a/src/BUILD +++ b/src/BUILD @@ -44,6 +44,7 @@ cc_shared_library( "//atomdb:atomdb_lib", "//attention_broker:attention_broker_lib", "//commons:commons_lib", + "//db_adapter:db_adapter_lib", "//distributed_algorithm_node:distributed_algorithm_node_lib", "//hasher:hasher_lib", "//metta:metta_lib", diff --git a/src/assets/libpqxx-7.10.5.tar.gz b/src/assets/libpqxx-7.10.5.tar.gz new file mode 100644 index 000000000..8fc0c0a71 Binary files /dev/null and b/src/assets/libpqxx-7.10.5.tar.gz differ diff --git a/src/commons/HandleTrie.cc b/src/commons/HandleTrie.cc index ff0ab0cd4..b1462895e 100644 --- a/src/commons/HandleTrie.cc +++ b/src/commons/HandleTrie.cc @@ -3,6 +3,7 @@ #include #include +#include "Logger.h" #include "Utils.h" #include "expression_hasher.h" @@ -26,7 +27,10 @@ HandleTrie::TrieNode::~TrieNode() { delete children[i]; } delete[] children; - delete value; + if (value != NULL) { + delete value; + value = NULL; + } } string HandleTrie::TrieValue::to_string() { return ""; } @@ -264,3 +268,8 @@ void HandleTrie::traverse(bool keep_root_locked, root->trie_node_mutex.unlock(); } } + +bool HandleTrie::exists(const string& key) { + TrieNode* node = lookup_node(key); + return node != NULL ? true : false; +} \ No newline at end of file diff --git a/src/commons/HandleTrie.h b/src/commons/HandleTrie.h index bb66acbb0..f69b7d47f 100644 --- a/src/commons/HandleTrie.h +++ b/src/commons/HandleTrie.h @@ -74,6 +74,8 @@ class HandleTrie { */ TrieValue* lookup(const string& key); + bool exists(const string& key); + /** * Remove a key from this HandleTrie and its associated value. * diff --git a/src/db_adapter/BUILD b/src/db_adapter/BUILD new file mode 100644 index 000000000..f0dd46890 --- /dev/null +++ b/src/db_adapter/BUILD @@ -0,0 +1,46 @@ +load("@rules_cc//cc:cc_library.bzl", "cc_library") + +package(default_visibility = ["//visibility:public"]) + +cc_library( + name = "db_adapter_lib", + includes = ["."], + deps = [ + ":data_mapper", + ":data_types", + ":db_wrapper", + "//db_adapter/postgres:postgres_lib", + ], +) + +cc_library( + name = "data_mapper", + srcs = ["DataMapper.cc"], + hdrs = ["DataMapper.h"], + includes = ["."], + deps = [ + "//commons:commons_lib", + "//commons/atoms:atoms_lib", + ], +) + +cc_library( + name = "data_types", + hdrs = ["DataTypes.h"], + includes = ["."], + deps = [ + "//commons:commons_lib", + "//commons/atoms:atoms_lib", + ], +) + +cc_library( + name = "db_wrapper", + hdrs = ["DBWrapper.h"], + includes = ["."], + deps = [ + ":data_types", + "//commons:commons_lib", + "//commons/atoms:atoms_lib", + ], +) diff --git a/src/db_adapter/DBWrapper.h b/src/db_adapter/DBWrapper.h new file mode 100644 index 000000000..f4e812970 --- /dev/null +++ b/src/db_adapter/DBWrapper.h @@ -0,0 +1,98 @@ +#pragma once + +#include +#include +#include +#include + +#include "DataMapper.h" + +using namespace std; +using namespace db_adapter; + +namespace db_adapter { + +/** + * @class DatabaseWrapper + * @brief Generic interface for a database connection wrapper. + * + * @tparam ConnT The underlying connection object type (e.g., pqxx::connection). + */ +template +class DatabaseWrapper { + public: + explicit DatabaseWrapper(shared_ptr mapper, MAPPER_TYPE mapper_type) + : mapper(move(mapper)), mapper_type(mapper_type) {} + virtual ~DatabaseWrapper() = default; + + /** + * @brief Closes the connection. + */ + virtual void disconnect() = 0; + + unsigned int mapper_handle_trie_size() { return mapper->handle_trie_size(); } + + protected: + /** + * @brief Establishes connection to the database. + */ + virtual unique_ptr connect() = 0; + + unique_ptr db_client; + shared_ptr mapper; + MAPPER_TYPE mapper_type; +}; + +/** + * @class SQLWrapper + * @brief Specialization of DatabaseWrapper for SQL-based databases. + */ +template +class SQLWrapper : public DatabaseWrapper { + public: + explicit SQLWrapper(MAPPER_TYPE mapper_type) + : DatabaseWrapper(create_mapper(mapper_type), mapper_type) {} + virtual ~SQLWrapper() = default; + + /** + * @brief Retrieves schema information for a specific table. + */ + virtual Table get_table(const string& name) = 0; + + /** + * @brief Lists all tables in the database. + */ + virtual vector list_tables() = 0; + + /** + * @brief Maps a table's data to the target format. + * + * @param table The table metadata. + * @param clauses Additional SQL WHERE clauses. + * @param skip_columns Columns to exclude from mapping. + * @param second_level Boolean flag for recursive/depth mapping logic. + */ + virtual void map_table(const Table& table, + const vector& clauses, + const vector& skip_columns, + bool second_level) = 0; + + /** + * @brief Executes a raw SQL query and maps the result. + */ + virtual void map_sql_query(const string& virtual_name, const string& raw_query) = 0; + + private: + // Factory method for creating the specific mapper strategy + static shared_ptr create_mapper(MAPPER_TYPE mapper_type) { + switch (mapper_type) { + case MAPPER_TYPE::SQL2METTA: + return make_shared(); + case MAPPER_TYPE::SQL2ATOMS: + return make_shared(); + default: + throw invalid_argument("Unknown mapper type"); + } + } +}; +} // namespace db_adapter \ No newline at end of file diff --git a/src/db_adapter/DataMapper.cc b/src/db_adapter/DataMapper.cc new file mode 100644 index 000000000..282a88905 --- /dev/null +++ b/src/db_adapter/DataMapper.cc @@ -0,0 +1,414 @@ +#include "DataMapper.h" + +#include +#include + +#include "Atom.h" +#include "DataTypes.h" +#include "Hasher.h" +#include "Link.h" +#include "MettaMapping.h" +#include "Node.h" + +#define LOG_LEVEL INFO_LEVEL +#include "Logger.h" + +using namespace db_adapter; +using namespace commons; +using namespace atoms; + +string BaseSQL2Mapper::SYMBOL; +string BaseSQL2Mapper::EXPRESSION; + +// -- BaseSQL2Mapper + +const OutputList BaseSQL2Mapper::map(const DbInput& data) { + vector> all_foreign_keys; + SqlRow sql_row = get(data); + string table_name = sql_row.table_name; + + string primary_key_value = sql_row.primary_key ? sql_row.primary_key->value : ""; + + this->map_primary_key(table_name, primary_key_value); + + for (auto& field : sql_row.fields) { + string column_name = table_name + "." + field.name; + + if (this->is_foreign_key(column_name)) { + size_t separator_pos = column_name.find('|'); + + if (separator_pos == string::npos) { + Utils::error("Invalid foreign key format: " + column_name); + } + + string fk_column = column_name.substr(0, separator_pos); + string fk_table = column_name.substr(separator_pos + 1); + all_foreign_keys.push_back({fk_column, fk_table, field.value}); + + this->map_foreign_key_column( + table_name, column_name, field.value, primary_key_value, fk_table, fk_column); + } else { + this->map_regular_column(table_name, column_name, field.value, primary_key_value); + }; + } + this->map_foreign_keys_combinations(all_foreign_keys); + + auto result = move(this->get_output()); + this->clear(); + return result; +} + +bool BaseSQL2Mapper::is_foreign_key(const string& column_name) { + size_t pos = column_name.find('|'); + if (pos == string::npos) return false; + return true; +} + +string BaseSQL2Mapper::escape_inner_quotes(string text) { + const auto count = std::count(text.begin(), text.end(), '"'); + + if (count == 0 || count == 2) return text; + + if (count == 1) { + text.insert(text.begin(), '"'); + text.push_back('"'); + return text; + } + + if (text.size() >= 2 && text.front() == '"' && text.back() == '"') { + string inner = text.substr(1, text.size() - 2); + + string escaped; + escaped.reserve(inner.size()); + for (char c : inner) { + if (c == '"') { + escaped.push_back('\\'); + } + escaped.push_back(c); + } + + return "\"" + escaped + "\""; + } + return ""; +} + +bool BaseSQL2Mapper::insert_handle_if_missing(const string& handle) { + auto exists = this->handle_trie.exists(handle); + if (exists) return false; + this->handle_trie.insert(handle, NULL); + return true; +} + +// -- SQL2MettaMapper + +SQL2MettaMapper::SQL2MettaMapper() { this->initialize_statics(); } + +SQL2MettaMapper::~SQL2MettaMapper() {} + +OutputList SQL2MettaMapper::get_output() { return this->metta_expressions; } + +void SQL2MettaMapper::clear() { this->metta_expressions.clear(); } + +void SQL2MettaMapper::add_metta_if_new(const string& s_expression) { + string key = Hasher::context_handle(s_expression); + if (this->insert_handle_if_missing(key)) { + this->metta_expressions.push_back(s_expression); + } +}; + +void SQL2MettaMapper::map_primary_key(const string& table_name, const string& primary_key_value) { + string literal_pk = this->escape_inner_quotes("\"" + primary_key_value + "\""); + + string predicate_link = "(Predicate " + table_name + ")"; + this->add_metta_if_new(predicate_link); + + string concept_inner_link = "(" + table_name + " " + literal_pk + ")"; + this->add_metta_if_new(concept_inner_link); + + string concept_link = "(Concept " + concept_inner_link + ")"; + this->add_metta_if_new(concept_link); + + string evaluation_link = "(Evaluation " + predicate_link + " " + concept_link + ")"; + this->add_metta_if_new(evaluation_link); +} + +void SQL2MettaMapper::map_foreign_key_column(const string& table_name, + const string& column_name, + const string& value, + const string& primary_key_value, + const string& fk_table, + const string& fk_column) { + string literal_value = this->escape_inner_quotes("\"" + value + "\""); + string literal_pk = this->escape_inner_quotes("\"" + primary_key_value + "\""); + + string predicate_inner_1_link = "(" + fk_table + " " + literal_value + ")"; + this->add_metta_if_new(predicate_inner_1_link); + + string predicate_inner_2_link = "(Concept " + predicate_inner_1_link + ")"; + this->add_metta_if_new(predicate_inner_2_link); + + string predicate_inner_3_link = "(" + fk_column + " " + predicate_inner_2_link + ")"; + this->add_metta_if_new(predicate_inner_3_link); + + string predicate_link = "(Predicate " + predicate_inner_3_link + ")"; + this->add_metta_if_new(predicate_link); + + string concept_inner_link = "(" + table_name + " " + literal_pk + ")"; + this->add_metta_if_new(concept_inner_link); + + string concept_link = "(Concept " + concept_inner_link + ")"; + this->add_metta_if_new(concept_link); + + string evaluation_link = "(Evaluation " + predicate_link + " " + concept_link + ")"; + this->add_metta_if_new(evaluation_link); +} + +void SQL2MettaMapper::map_regular_column(const string& table_name, + const string& column_name, + const string& value, + const string& primary_key_value) { + string literal_value = this->escape_inner_quotes("\"" + value + "\""); + string literal_pk = this->escape_inner_quotes("\"" + primary_key_value + "\""); + + string predicate_inner_link = "(" + table_name + " " + column_name + " " + literal_value + ")"; + this->add_metta_if_new(predicate_inner_link); + + string predicate_link = "(Predicate " + predicate_inner_link + ")"; + this->add_metta_if_new(predicate_link); + + string concept_inner_link = "(" + table_name + " " + literal_pk + ")"; + this->add_metta_if_new(concept_inner_link); + + string concept_link = "(Concept " + concept_inner_link + ")"; + this->add_metta_if_new(concept_link); + + string evaluation_link = "(Evaluation " + predicate_link + " " + concept_link + ")"; + this->add_metta_if_new(evaluation_link); +} + +void SQL2MettaMapper::map_foreign_keys_combinations( + const vector>& all_foreign_keys) { + for (const auto& [column_name, foreign_table_name, value] : all_foreign_keys) { + for (const auto& [column_name2, foreign_table_name2, value2] : all_foreign_keys) { + if (make_pair(foreign_table_name, value) != make_pair(foreign_table_name2, value2)) { + string literal_value = this->escape_inner_quotes("\"" + value + "\""); + string literal_value_2 = this->escape_inner_quotes("\"" + value2 + "\""); + + string predicate_inner_1_link = "(" + foreign_table_name + " " + literal_value + ")"; + this->add_metta_if_new(predicate_inner_1_link); + + string predicate_inner_2_link = "(Concept " + predicate_inner_1_link + ")"; + this->add_metta_if_new(predicate_inner_2_link); + + string predicate_inner_3_link = "(" + column_name + " " + predicate_inner_2_link + ")"; + this->add_metta_if_new(predicate_inner_3_link); + + string predicate_link = "(Predicate " + predicate_inner_3_link + ")"; + this->add_metta_if_new(predicate_link); + + string concept_inner_link = "(" + foreign_table_name2 + " " + literal_value_2 + ")"; + this->add_metta_if_new(concept_inner_link); + + string concept_link = "(Concept " + concept_inner_link + ")"; + this->add_metta_if_new(concept_link); + + string evaluation_link = "(Evaluation " + predicate_link + " " + concept_link + ")"; + this->add_metta_if_new(evaluation_link); + } + } + } +} + +// -- SQL2AtomsMapper + +SQL2AtomsMapper::SQL2AtomsMapper() { this->initialize_statics(); } + +SQL2AtomsMapper::~SQL2AtomsMapper() {} + +OutputList SQL2AtomsMapper::get_output() { return this->atoms; } + +void SQL2AtomsMapper::clear() { this->atoms.clear(); } + +string SQL2AtomsMapper::add_atom_if_new(SQL2AtomsMapper::ATOM_TYPE atom_type, + variant> value, + bool is_toplevel) { + Atom* atom; + + if (atom_type == SQL2AtomsMapper::ATOM_TYPE::NODE) { + string name = get(value); + atom = new Node(BaseSQL2Mapper::SYMBOL, name); + } else if (atom_type == SQL2AtomsMapper::ATOM_TYPE::LINK) { + vector targets = get>(value); + atom = new Link(BaseSQL2Mapper::EXPRESSION, targets, is_toplevel); + } else { + Utils::error("Either name or targets must be provided to create an Atom."); + } + + string handle = atom->handle(); + if (this->insert_handle_if_missing(handle)) { + this->atoms.push_back(atom); + } else { + delete atom; + } + return handle; +}; + +void SQL2AtomsMapper::map_primary_key(const string& table_name, const string& primary_key_value) { + string literal_pk = this->escape_inner_quotes("\"" + primary_key_value + "\""); + + // Nodes + string predicate_node_h = + this->add_atom_if_new(SQL2AtomsMapper::ATOM_TYPE::NODE, string("Predicate")); + string concept_node_h = this->add_atom_if_new(SQL2AtomsMapper::ATOM_TYPE::NODE, string("Concept")); + string evaluation_node_h = + this->add_atom_if_new(SQL2AtomsMapper::ATOM_TYPE::NODE, string("Evaluation")); + string literal_pk_node_h = + this->add_atom_if_new(SQL2AtomsMapper::ATOM_TYPE::NODE, string(literal_pk)); + string table_name_node_h = + this->add_atom_if_new(SQL2AtomsMapper::ATOM_TYPE::NODE, string(table_name)); + + // Links + string predicate_link_h = this->add_atom_if_new(SQL2AtomsMapper::ATOM_TYPE::LINK, + vector{predicate_node_h, table_name_node_h}); + string concept_inner_link_h = this->add_atom_if_new( + SQL2AtomsMapper::ATOM_TYPE::LINK, vector{table_name_node_h, literal_pk_node_h}); + string concept_link_h = this->add_atom_if_new(SQL2AtomsMapper::ATOM_TYPE::LINK, + vector{concept_node_h, concept_inner_link_h}); + this->add_atom_if_new(SQL2AtomsMapper::ATOM_TYPE::LINK, + vector{evaluation_node_h, predicate_link_h, concept_link_h}, + true); +} + +void SQL2AtomsMapper::map_foreign_key_column(const string& table_name, + const string& column_name, + const string& value, + const string& primary_key_value, + const string& fk_table, + const string& fk_column) { + string literal_value = this->escape_inner_quotes("\"" + value + "\""); + string literal_pk = this->escape_inner_quotes("\"" + primary_key_value + "\""); + + // Nodes + string predicate_node_h = + this->add_atom_if_new(SQL2AtomsMapper::ATOM_TYPE::NODE, string("Predicate")); + string concept_node_h = this->add_atom_if_new(SQL2AtomsMapper::ATOM_TYPE::NODE, string("Concept")); + string evaluation_node_h = + this->add_atom_if_new(SQL2AtomsMapper::ATOM_TYPE::NODE, string("Evaluation")); + string literal_pk_node_h = + this->add_atom_if_new(SQL2AtomsMapper::ATOM_TYPE::NODE, string(literal_pk)); + string table_name_node_h = + this->add_atom_if_new(SQL2AtomsMapper::ATOM_TYPE::NODE, string(table_name)); + string fk_table_node_h = this->add_atom_if_new(SQL2AtomsMapper::ATOM_TYPE::NODE, string(fk_table)); + string literal_value_node_h = + this->add_atom_if_new(SQL2AtomsMapper::ATOM_TYPE::NODE, string(literal_value)); + string fk_column_node_h = this->add_atom_if_new(SQL2AtomsMapper::ATOM_TYPE::NODE, string(fk_column)); + + // Links + string predicate_inner_1_link_h = this->add_atom_if_new( + SQL2AtomsMapper::ATOM_TYPE::LINK, vector{fk_table_node_h, literal_value_node_h}); + string predicate_inner_2_link_h = this->add_atom_if_new( + SQL2AtomsMapper::ATOM_TYPE::LINK, vector{concept_node_h, predicate_inner_1_link_h}); + string predicate_inner_3_link_h = this->add_atom_if_new( + SQL2AtomsMapper::ATOM_TYPE::LINK, vector{fk_column_node_h, predicate_inner_2_link_h}); + string predicate_link_h = this->add_atom_if_new( + SQL2AtomsMapper::ATOM_TYPE::LINK, vector{predicate_node_h, predicate_inner_3_link_h}); + string concept_inner_link_h = this->add_atom_if_new( + SQL2AtomsMapper::ATOM_TYPE::LINK, vector{table_name_node_h, literal_pk_node_h}); + string concept_link_h = this->add_atom_if_new(SQL2AtomsMapper::ATOM_TYPE::LINK, + vector{concept_node_h, concept_inner_link_h}); + + this->add_atom_if_new(SQL2AtomsMapper::ATOM_TYPE::LINK, + vector{evaluation_node_h, predicate_link_h, concept_link_h}, + true); +} + +void SQL2AtomsMapper::map_regular_column(const string& table_name, + const string& column_name, + const string& value, + const string& primary_key_value) { + string literal_pk = this->escape_inner_quotes("\"" + primary_key_value + "\""); + string literal_value = this->escape_inner_quotes("\"" + value + "\""); + + // Nodes + string predicate_node_h = + this->add_atom_if_new(SQL2AtomsMapper::ATOM_TYPE::NODE, string("Predicate")); + string concept_node_h = this->add_atom_if_new(SQL2AtomsMapper::ATOM_TYPE::NODE, string("Concept")); + string evaluation_node_h = + this->add_atom_if_new(SQL2AtomsMapper::ATOM_TYPE::NODE, string("Evaluation")); + string table_name_node_h = + this->add_atom_if_new(SQL2AtomsMapper::ATOM_TYPE::NODE, string(table_name)); + string column_name_node_h = + this->add_atom_if_new(SQL2AtomsMapper::ATOM_TYPE::NODE, string(column_name)); + string literal_value_node_h = + this->add_atom_if_new(SQL2AtomsMapper::ATOM_TYPE::NODE, string(literal_value)); + string literal_pk_node_h = + this->add_atom_if_new(SQL2AtomsMapper::ATOM_TYPE::NODE, string(literal_pk)); + + // Links + string predicate_inner_link_h = this->add_atom_if_new( + SQL2AtomsMapper::ATOM_TYPE::LINK, + vector{table_name_node_h, column_name_node_h, literal_value_node_h}); + string predicate_link_h = this->add_atom_if_new( + SQL2AtomsMapper::ATOM_TYPE::LINK, vector{predicate_node_h, predicate_inner_link_h}); + string concept_inner_link_h = this->add_atom_if_new( + SQL2AtomsMapper::ATOM_TYPE::LINK, vector{table_name_node_h, literal_pk_node_h}); + string concept_link_h = this->add_atom_if_new(SQL2AtomsMapper::ATOM_TYPE::LINK, + vector{concept_node_h, concept_inner_link_h}); + this->add_atom_if_new(SQL2AtomsMapper::ATOM_TYPE::LINK, + vector{evaluation_node_h, predicate_link_h, concept_link_h}, + true); +} + +void SQL2AtomsMapper::map_foreign_keys_combinations( + const vector>& all_foreign_keys) { + for (const auto& [column_name, foreign_table_name, value] : all_foreign_keys) { + for (const auto& [column_name2, foreign_table_name2, value2] : all_foreign_keys) { + if (make_pair(foreign_table_name, value) != make_pair(foreign_table_name2, value2)) { + string literal_value = this->escape_inner_quotes("\"" + value + "\""); + string literal_value_2 = this->escape_inner_quotes("\"" + value2 + "\""); + + // Nodes + string predicate_node_h = + this->add_atom_if_new(SQL2AtomsMapper::ATOM_TYPE::NODE, string("Predicate")); + string concept_node_h = + this->add_atom_if_new(SQL2AtomsMapper::ATOM_TYPE::NODE, string("Concept")); + string evaluation_node_h = + this->add_atom_if_new(SQL2AtomsMapper::ATOM_TYPE::NODE, string("Evaluation")); + string fk_column_node_h = + this->add_atom_if_new(SQL2AtomsMapper::ATOM_TYPE::NODE, string(column_name)); + string foreign_table_name_node_h = + this->add_atom_if_new(SQL2AtomsMapper::ATOM_TYPE::NODE, string(foreign_table_name)); + string literal_value_node_h = + this->add_atom_if_new(SQL2AtomsMapper::ATOM_TYPE::NODE, string(literal_value)); + string foreign_table_name2_node_h = + this->add_atom_if_new(SQL2AtomsMapper::ATOM_TYPE::NODE, string(foreign_table_name2)); + string literal_value2_node_h = + this->add_atom_if_new(SQL2AtomsMapper::ATOM_TYPE::NODE, string(literal_value_2)); + // Links + string predicate_inner_1_link_h = this->add_atom_if_new( + SQL2AtomsMapper::ATOM_TYPE::LINK, + vector{foreign_table_name_node_h, literal_value_node_h}); + string predicate_inner_2_link_h = + this->add_atom_if_new(SQL2AtomsMapper::ATOM_TYPE::LINK, + vector{concept_node_h, predicate_inner_1_link_h}); + string predicate_inner_3_link_h = + this->add_atom_if_new(SQL2AtomsMapper::ATOM_TYPE::LINK, + vector{fk_column_node_h, predicate_inner_2_link_h}); + string predicate_link_h = + this->add_atom_if_new(SQL2AtomsMapper::ATOM_TYPE::LINK, + vector{predicate_node_h, predicate_inner_3_link_h}); + string concept_inner_link_h = this->add_atom_if_new( + SQL2AtomsMapper::ATOM_TYPE::LINK, + vector{foreign_table_name2_node_h, literal_value2_node_h}); + string concept_link_h = + this->add_atom_if_new(SQL2AtomsMapper::ATOM_TYPE::LINK, + vector{concept_node_h, concept_inner_link_h}); + + this->add_atom_if_new( + SQL2AtomsMapper::ATOM_TYPE::LINK, + vector{evaluation_node_h, predicate_link_h, concept_link_h}, + true); + } + } + } +} diff --git a/src/db_adapter/DataMapper.h b/src/db_adapter/DataMapper.h new file mode 100644 index 000000000..a6046b0ce --- /dev/null +++ b/src/db_adapter/DataMapper.h @@ -0,0 +1,154 @@ +#pragma once + +#include +#include +#include +#include +#include + +#include "DataTypes.h" +#include "HandleTrie.h" +#include "MettaMapping.h" + +using namespace std; +using namespace atoms; +using namespace commons; + +namespace db_adapter { + +class MapperValue : public HandleTrie::TrieValue { + public: + MapperValue() {} + void merge(TrieValue* other) {} +}; + +/** + * @class Mapper + * @brief Abstract base class for transforming database input into a target representation. + */ +class Mapper { + public: + virtual ~Mapper() = default; + + /** + * @brief Transforms the input data into the output format. + * @param data The database row or document to map. + * @return OutputList A variant containing either strings or Atoms. + */ + virtual const OutputList map(const DbInput& data) = 0; + + unsigned int handle_trie_size() { return handle_trie.size; } + + protected: + Mapper() = default; + HandleTrie handle_trie{32}; +}; + +/** + * @class BaseSQL2Mapper + * @brief Common logic for mapping SQL data, handling Primary and Foreign keys. + */ +class BaseSQL2Mapper : public Mapper { + public: + virtual ~BaseSQL2Mapper() override = default; + + const OutputList map(const DbInput& data) override; + + static string SYMBOL; + static string EXPRESSION; + + static void initialize_statics() { + SYMBOL = MettaMapping::SYMBOL_NODE_TYPE; + EXPRESSION = MettaMapping::EXPRESSION_LINK_TYPE; + } + + protected: + BaseSQL2Mapper() = default; + + bool is_foreign_key(const string& column_name); + string escape_inner_quotes(string text); + virtual OutputList get_output() = 0; + virtual void clear() = 0; + bool insert_handle_if_missing(const string& handle); + + virtual void map_primary_key(const string& table_name, const string& primary_key_value) = 0; + virtual void map_foreign_key_column(const string& table_name, + const string& column_name, + const string& value, + const string& primary_key_value, + const string& fk_table, + const string& fk_column) = 0; + virtual void map_regular_column(const string& table_name, + const string& column_name, + const string& value, + const string& primary_key_value) = 0; + virtual void map_foreign_keys_combinations( + const vector>& all_foreign_keys) = 0; +}; + +/** + * @class SQL2MettaMapper + * @brief Maps SQL rows to Metta S-Expressions. + */ +class SQL2MettaMapper : public BaseSQL2Mapper { + public: + SQL2MettaMapper(); + ~SQL2MettaMapper() override; + + private: + vector metta_expressions; + OutputList get_output() override; + void clear() override; + void add_metta_if_new(const string& s_expression); + + void map_primary_key(const string& table_name, const string& primary_key_value) override; + void map_foreign_key_column(const string& table_name, + const string& column_name, + const string& value, + const string& primary_key_value, + const string& fk_table, + const string& fk_column) override; + void map_regular_column(const string& table_name, + const string& column_name, + const string& value, + const string& primary_key_value) override; + void map_foreign_keys_combinations( + const vector>& all_foreign_keys) override; +}; + +/** + * @class SQL2AtomsMapper + * @brief Maps SQL rows to Atom objects. + */ +class SQL2AtomsMapper : public BaseSQL2Mapper { + public: + SQL2AtomsMapper(); + ~SQL2AtomsMapper() override; + + enum ATOM_TYPE { NODE, LINK }; + + private: + vector atoms; + + OutputList get_output() override; + void clear() override; + string add_atom_if_new(SQL2AtomsMapper::ATOM_TYPE atom_type, + variant> value, + bool is_toplevel = false); + + void map_primary_key(const string& table_name, const string& primary_key_value) override; + void map_foreign_key_column(const string& table_name, + const string& column_name, + const string& value, + const string& primary_key_value, + const string& fk_table, + const string& fk_column) override; + void map_regular_column(const string& table_name, + const string& column_name, + const string& value, + const string& primary_key_value) override; + void map_foreign_keys_combinations( + const vector>& all_foreign_keys) override; +}; + +} // namespace db_adapter \ No newline at end of file diff --git a/src/db_adapter/DataTypes.h b/src/db_adapter/DataTypes.h new file mode 100644 index 000000000..9906be55a --- /dev/null +++ b/src/db_adapter/DataTypes.h @@ -0,0 +1,94 @@ +#pragma once + +#include +#include +#include +#include + +#include "Atom.h" + +using namespace std; +using namespace atoms; + +namespace db_adapter { + +/** + * @struct ColumnValue + * @brief Represents a single cell in a database row. + */ +struct ColumnValue { + string name; + string value; +}; + +/** + * @struct SqlRow + * @brief Represents a single row from a SQL query result. + */ +struct SqlRow { + string table_name; + optional primary_key; + vector fields; + + /** + * @brief Adds a field to the row. + * @param name Column name. + * @param value Column value. + */ + void add_field(string name, string value) { fields.push_back(ColumnValue{move(name), move(value)}); } + + /** + * @brief Retrieves a value by column name. + * @param name The column name to search for. + * @return optional The value if found, otherwise nullopt. + */ + optional get(const string& name) const { + if (primary_key && primary_key->name == name) { + return primary_key->value; + } + for (const auto& field : fields) { + if (field.name == name) { + return field.value; + } + } + return nullopt; + } + + size_t size() const { return (primary_key ? 1 : 0) + fields.size(); } +}; + +struct NoSqlDocument {}; + +/** + * @typedef DbInput + * @brief A variant representing raw input from either SQL or NoSQL sources. + */ +using DbInput = variant; + +/** + * @typedef OutputList + * @brief The result of the mapping process. + * + * Can be a list of S-Expression strings (SQL2METTA) or a list of Atom pointers (SQL2ATOMS). + */ +using OutputList = variant, vector>; + +/** + * @struct Table + * @brief Metadata describing a database table. + */ +struct Table { + string name; + int row_count = 0; + vector column_names; + string primary_key; + vector foreign_keys; +}; + +/** + * @enum MAPPER_TYPE + * @brief Defines the strategy used to transform database rows. + */ +enum class MAPPER_TYPE { SQL2METTA, SQL2ATOMS }; + +} // namespace db_adapter \ No newline at end of file diff --git a/src/db_adapter/README.md b/src/db_adapter/README.md new file mode 100644 index 000000000..e69de29bb diff --git a/src/db_adapter/postgres/BUILD b/src/db_adapter/postgres/BUILD new file mode 100644 index 000000000..9e8450a2c --- /dev/null +++ b/src/db_adapter/postgres/BUILD @@ -0,0 +1,23 @@ +load("@rules_cc//cc:cc_library.bzl", "cc_library") + +package(default_visibility = ["//visibility:public"]) + +cc_library( + name = "postgres_lib", + includes = ["."], + deps = [ + ":postgres_wrapper", + ], +) + +cc_library( + name = "postgres_wrapper", + srcs = ["PostgresWrapper.cc"], + hdrs = ["PostgresWrapper.h"], + includes = ["."], + deps = [ + "//commons:commons_lib", + "//db_adapter:data_mapper", + "//db_adapter:db_wrapper", + ], +) diff --git a/src/db_adapter/postgres/PostgresWrapper.cc b/src/db_adapter/postgres/PostgresWrapper.cc new file mode 100644 index 000000000..bfe1b703e --- /dev/null +++ b/src/db_adapter/postgres/PostgresWrapper.cc @@ -0,0 +1,459 @@ +#include "PostgresWrapper.h" + +#include +#include +#include +#include +#include +#include +#include + +#define LOG_LEVEL INFO_LEVEL +#include "Logger.h" + +using namespace std; + +PostgresWrapper::PostgresWrapper(const string& host, + int port, + const string& database, + const string& user, + const string& password, + MAPPER_TYPE mapper_type) + : SQLWrapper(mapper_type), + host(host), + port(port), + database(database), + user(user), + password(password) { + this->db_client = this->connect(); +} + +PostgresWrapper::~PostgresWrapper() { this->disconnect(); } + +unique_ptr PostgresWrapper::connect() { + try { + string conn_str = "host=" + host + " port=" + to_string(port) + " dbname=" + database; + if (!user.empty()) { + conn_str += " user=" + user; + } + if (!password.empty()) { + conn_str += " password=" + password; + } + return make_unique(conn_str); + } catch (const pqxx::sql_error& e) { + throw runtime_error("Could not connect to database: " + string(e.what())); + } catch (const exception& e) { + throw runtime_error("Could not connect to database: " + string(e.what())); + } +} + +void PostgresWrapper::disconnect() { + if (this->db_client) { + this->db_client->close(); + } +} + +Table PostgresWrapper::get_table(const string& name) { + auto tables = this->list_tables(); + for (const auto& table : tables) { + if (table.name == name) return table; + } + Utils::error("Table '" + name + "' not found in the database."); +} + +vector
PostgresWrapper::list_tables() { + if (tables_cache.has_value()) { + return tables_cache.value(); + } + + string query = R"(WITH table_info AS ( + SELECT + schemaname || '.' || tablename AS table_name, + (SELECT reltuples::bigint FROM pg_class WHERE oid = (schemaname || '.' || tablename)::regclass) AS row_count, + pg_tables.schemaname, + pg_tables.tablename + FROM + pg_tables + WHERE + schemaname NOT IN ('pg_catalog', 'information_schema') + ), + column_info AS ( + SELECT + table_schema || '.' || table_name AS table_name, + string_agg(column_name, ',') AS columns + FROM + information_schema.columns + WHERE + table_schema NOT IN ('pg_catalog', 'information_schema') + GROUP BY + table_schema, table_name + ), + pk_info AS ( + SELECT + tc.table_schema || '.' || tc.table_name AS table_name, + string_agg(kcu.column_name, ',') AS pk_column + FROM + information_schema.table_constraints AS tc + JOIN information_schema.key_column_usage AS kcu + ON tc.constraint_name = kcu.constraint_name + AND tc.table_schema = kcu.table_schema + WHERE + tc.constraint_type = 'PRIMARY KEY' + GROUP BY + tc.table_schema, tc.table_name + ), + fk_info AS ( + SELECT + tc.table_schema || '.' || tc.table_name AS table_name, + string_agg(kcu.column_name || '|' || ccu.table_schema || '.' || ccu.table_name, ',') AS fk_columns + FROM + information_schema.table_constraints AS tc + JOIN information_schema.key_column_usage AS kcu + ON tc.constraint_name = kcu.constraint_name + AND tc.table_schema = kcu.table_schema + JOIN information_schema.constraint_column_usage AS ccu + ON ccu.constraint_name = tc.constraint_name + WHERE + tc.constraint_type = 'FOREIGN KEY' + GROUP BY + tc.table_schema, tc.table_name + ) + SELECT + ti.table_name, + ti.row_count, + ci.columns, + COALESCE(pk.pk_column, '') AS pk_column, + COALESCE(fk.fk_columns, '') AS fk_columns + FROM + table_info ti + LEFT JOIN + column_info ci ON ti.table_name = ci.table_name + LEFT JOIN + pk_info pk ON ti.table_name = pk.table_name + LEFT JOIN + fk_info fk ON ti.table_name = fk.table_name + ORDER BY + pg_total_relation_size(ti.table_name) ASC; + )"; + auto result = this->execute_query(query); + vector
tables; + tables.reserve(result.size()); + + for (const auto& row : result) { + string table_name = row["table_name"].c_str(); + int row_count = row["row_count"].as(0); + string columns = row["columns"].c_str(); + string pk_column = row["pk_column"].c_str(); + string fk_columns = row["fk_columns"].c_str(); + + Table t; + t.name = table_name; + t.row_count = row_count; + t.primary_key = pk_column; + t.column_names = columns.empty() ? vector{} : Utils::split(columns, ','); + t.foreign_keys = fk_columns.empty() ? vector{} : Utils::split(fk_columns, ','); + + tables.push_back(move(t)); + } + this->tables_cache = tables; + return tables; +} + +void PostgresWrapper::map_table(const Table& table, + const vector& clauses, + const vector& skip_columns, + bool second_level) { + LOG_DEBUG("Mapping table: " << table.name); + + string where_clauses; + + for (size_t i = 0; i < clauses.size(); ++i) { + if (i > 0) where_clauses += " AND "; + where_clauses += clauses[i]; + } + + auto columns = build_columns_to_map(table, skip_columns); + + string cols_sql = Utils::join(columns, ','); + string base_query = "SELECT " + cols_sql + " FROM " + table.name; + if (!where_clauses.empty()) { + base_query += " WHERE " + where_clauses; + } + LOG_DEBUG("Base query: " << base_query); + this->fetch_rows_paginated(table, columns, base_query); + + if (second_level) { + for (const auto& fk : table.foreign_keys) { + auto parts = Utils::split(fk, '|'); + + if (parts.size() != 2) { + Utils::error("Invalid foreign key format: " + fk); + } + + string column = parts[0]; + string ref_table_name = parts[1]; + + // Collect distinct non-null foreign-key values + auto fk_ids = this->collect_fk_ids(table.name, column, where_clauses); + + if (fk_ids.empty()) continue; + + auto ref_table = this->get_table(ref_table_name); + auto ref_columns = this->build_columns_to_map(ref_table); + + string where_clause = ref_table.primary_key + " IN " + "(" + Utils::join(fk_ids, ',') + ")"; + + string cols_sql = Utils::join(ref_columns, ','); + string base_query = "SELECT " + cols_sql + " FROM " + ref_table.name; + if (!where_clause.empty()) { + base_query += " WHERE " + where_clause; + } + + this->fetch_rows_paginated(ref_table, ref_columns, base_query); + } + } +} + +vector PostgresWrapper::build_columns_to_map(const Table& table, + const vector& skip_columns) { + for (const auto& skipo_col : skip_columns) { + if (find(table.column_names.begin(), table.column_names.end(), skipo_col) == + table.column_names.end()) { + Utils::error("Skip column '" + skipo_col + "' not found in table '" + table.name + "'"); + } + } + + vector columns_to_process = table.column_names; + + vector non_primary_key_columns; + for (const auto& col : columns_to_process) { + if (col != table.primary_key) non_primary_key_columns.push_back(col); + } + + if (!skip_columns.empty()) { + vector filtered_columns; + for (const auto& col : non_primary_key_columns) { + if (find(skip_columns.begin(), skip_columns.end(), col) == skip_columns.end()) { + filtered_columns.push_back(col); + } + } + non_primary_key_columns = move(filtered_columns); + } + + vector columns_list; + columns_list.push_back(table.primary_key); + columns_list.insert( + columns_list.end(), non_primary_key_columns.begin(), non_primary_key_columns.end()); + + vector final_columns; + for (const auto& item : columns_list) { + if (!item.empty()) final_columns.push_back(item); + } + + return final_columns; +} + +vector PostgresWrapper::collect_fk_ids(const string& table_name, + const string& column_name, + const string& where_clause) { + vector ids; + + size_t offset = 0; + size_t limit = 10000; + + while (true) { + string query = "SELECT " + column_name + " FROM " + table_name + " WHERE " + where_clause + + " LIMIT " + to_string(limit) + " OFFSET " + to_string(offset) + ";"; + pqxx::result rows = this->execute_query(query); + + if (rows.empty()) break; + + for (const pqxx::row& row : rows) { + auto field = row[0]; + if (!field.is_null()) { + string value = field.c_str(); + ids.push_back(value); + } + } + + offset += limit; + } + return ids; +} + +void PostgresWrapper::map_sql_query(const string& virtual_name, const string& raw_query) { + map> table_columns_map = this->extract_aliases_from_query(raw_query); + + if (table_columns_map.empty()) { + Utils::error("No valid aliases found in query for " + virtual_name); + } + + map tables_metadata; + + // Search metadata (PK, FK, ...) of each referenced table + // and validate that each table has its PK included in the aliases + for (const auto& table_columns : table_columns_map) { + string table_name = table_columns.first; + vector columns = table_columns.second; + try { + tables_metadata[table_name] = this->get_table(table_name); + string pk = tables_metadata[table_name].primary_key; + if (find(columns.begin(), columns.end(), pk) == columns.end()) { + auto parts = Utils::split(table_name, '.'); + string schema = parts[0]; + string table = parts[1]; + Utils::error("Primary key '" + pk + "' of table '" + table_name + + "' must be included in SELECT aliases. Add: " + table + "." + pk + " AS " + + schema + "_" + table + "__" + pk); + } + } catch (const exception& e) { + Utils::error("Error retrieving metadata for table '" + table_name + "': " + e.what()); + } + } + + string base_query = Utils::trim(raw_query); + + if (!base_query.empty() && base_query.back() == ';') base_query.pop_back(); + + for (const auto& table_columns : table_columns_map) { + string table_name = table_columns.first; + vector columns = table_columns.second; + Table table = tables_metadata[table_name]; + this->fetch_rows_paginated(table, columns, base_query); + } +} + +map> PostgresWrapper::extract_aliases_from_query(const string& query) { + map> tables; + + regex alias_pattern(alias_pattern_regex, regex_constants::icase); + + auto matches_begin = sregex_iterator(query.begin(), query.end(), alias_pattern); + auto matches_end = sregex_iterator(); + + for (auto it = matches_begin; it != matches_end; ++it) { + smatch match = *it; + string table_part = match[1].str(); + string column_name = match[2].str(); + + string table_name; + + size_t dot_pos = table_part.find('.'); + if (dot_pos != string::npos) { + table_name = table_part; + } else { + size_t underscore_pos = table_part.find('_'); + if (underscore_pos != string::npos) { + table_name = + table_part.substr(0, underscore_pos) + "." + table_part.substr(underscore_pos + 1); + } else { + table_name = "public." + table_part; + } + } + + auto& columns = tables[table_name]; + if (find(columns.begin(), columns.end(), column_name) == columns.end()) { + columns.push_back(column_name); + } + } + + return tables; +} + +void PostgresWrapper::fetch_rows_paginated(const Table& table, + const vector& columns, + const string& query) { + size_t offset = 0; + size_t limit = 10000; + + while (true) { + string paginated_query = query + " LIMIT " + to_string(limit) + " OFFSET " + to_string(offset); + pqxx::result rows = this->execute_query(paginated_query); + + LOG_DEBUG("Executing paginated query: " << paginated_query); + LOG_DEBUG("Fetched " << rows.size() << " rows from table " << table.name); + + if (rows.empty()) break; + + for (const auto& row : rows) { + SqlRow sql_row = this->build_sql_row(row, table, columns); + + LOG_DEBUG("Built SqlRow for table " + << table.name << " with primary key: " + << (sql_row.primary_key ? sql_row.primary_key->value : "NULL")); + for (const auto& field : sql_row.fields) { + LOG_DEBUG(" Field: " << field.name << " = " << field.value); + } + + auto output = this->mapper->map(DbInput{sql_row}); + + if (this->mapper_type == MAPPER_TYPE::SQL2ATOMS) { + vector atoms = get>(output); + LOG_DEBUG("Atoms count: " << atoms.size()); + // WIP - send atoms to SharedQueue + + } else { + vector metta_expressions = get>(output); + LOG_DEBUG("Metta Expressions count: " << metta_expressions.size()); + // WIP - save metta expressions to file + } + + LOG_DEBUG("Mapper HandleTrie size: " << this->mapper->handle_trie_size()); + } + + offset += limit; + } +} + +SqlRow PostgresWrapper::build_sql_row(const pqxx::row& row, const Table& table, vector columns) { + SqlRow sql_row; + sql_row.table_name = table.name; + sql_row.primary_key = ColumnValue{columns[0], row[0].c_str()}; + + for (size_t i = 1; i < columns.size() && i < row.size(); i++) { + string col = columns[i]; + auto field = row[i]; + + if (field.is_null()) continue; + + string value = field.c_str(); + + // datetime → SKIP + // YYYY-MM-DD HH:MM:SS... + if (value.size() >= 19 && value[4] == '-' && value[7] == '-') continue; + + if (value.empty()) { + continue; + } else if (value.size() > MAX_VALUE_SIZE) { + continue; + } + + string column_name = col; + for (const auto& fk : table.foreign_keys) { + if (fk.find(col) != string::npos) { + column_name = fk; + break; + } + } + sql_row.add_field(column_name, value); + } + return sql_row; +} + +pqxx::result PostgresWrapper::execute_query(const string& query) { + if (!this->db_client || !this->db_client->is_open()) { + Utils::error("Database connection is not open."); + } + + try { + pqxx::work transaction(*this->db_client); + pqxx::result result = transaction.exec(query); + transaction.commit(); + return result; + } catch (const pqxx::sql_error& e) { + Utils::error("SQL error during query execution: " + string(e.what())); + } catch (const exception& e) { + Utils::error("Error during query execution: " + string(e.what())); + } + return pqxx::result{}; +} \ No newline at end of file diff --git a/src/db_adapter/postgres/PostgresWrapper.h b/src/db_adapter/postgres/PostgresWrapper.h new file mode 100644 index 000000000..1f98deb47 --- /dev/null +++ b/src/db_adapter/postgres/PostgresWrapper.h @@ -0,0 +1,80 @@ +#pragma once + +#include + +#include +#include +#include +#include +#include +#include + +#include "DBWrapper.h" + +#define MAX_VALUE_SIZE ((size_t) 1000) + +using namespace std; +using namespace atoms; +using namespace commons; + +namespace db_adapter { + +/** + * @class PostgresWrapper + * @brief Concrete implementation of SQLWrapper for PostgreSQL using libpqxx. + */ +class PostgresWrapper : public SQLWrapper { + public: + /** + * @brief Constructs a PostgresWrapper. + * + * @param host The hostname or IP address. + * @param port The port number. + * @param database The database name (default: "postgres"). + * @param user The username. + * @param password The password. + * @param mapper_type The strategy for mapping results. + */ + PostgresWrapper(const string& host, + int port, + const string& database = "postgres", + const string& user = "postgres", + const string& password = "postgres", + MAPPER_TYPE mapper_type = MAPPER_TYPE::SQL2ATOMS); + + ~PostgresWrapper() override; + + void disconnect() override; + Table get_table(const string& name) override; + vector
list_tables() override; + void map_table(const Table& table, + const vector& clauses, + const vector& skip_columns = {}, + bool second_level = false) override; + void map_sql_query(const string& virtual_name, const string& raw_query) override; + pqxx::result execute_query(const string& query); + + protected: + unique_ptr connect() override; + // Regex for parsing alias patterns (e.g., "AS public_feature__uniquename") + const string alias_pattern_regex = R"(\bAS\s+([a-zA-Z_][a-zA-Z0-9_]*)__([a-zA-Z_][a-zA-Z0-9_]*))"; + + private: + string host; + int port; + string database; + string user; + string password; + + // Store tables in cache to avoid repeated database queries. + optional> tables_cache; + vector build_columns_to_map(const Table& table, const vector& skip_columns = {}); + vector collect_fk_ids(const string& table_name, + const string& column_name, + const string& where_clause = ""); + map> extract_aliases_from_query(const string& query); + void fetch_rows_paginated(const Table& table, const vector& columns, const string& query); + SqlRow build_sql_row(const pqxx::row& row, const Table& table, vector columns); +}; + +} // namespace db_adapter \ No newline at end of file diff --git a/src/docker/Dockerfile b/src/docker/Dockerfile index 378b40b99..21f4dc3ac 100644 --- a/src/docker/Dockerfile +++ b/src/docker/Dockerfile @@ -20,7 +20,7 @@ RUN mkdir -p \ RUN yes | apt update -y \ && yes | apt install -y git build-essential curl protobuf-compiler python3 python3-pip cmake unzip uuid-runtime lcov rpm bc \ - libevent-dev libssl-dev pkg-config libncurses5 \ + libevent-dev libssl-dev pkg-config libncurses5 libpq-dev \ && yes | apt clean -y \ && rm -rf /var/lib/apt/lists/* @@ -62,6 +62,16 @@ RUN cd /tmp \ && ln -s /usr/local/include/mongocxx/v_noabi/mongocxx/* /usr/local/include/mongocxx/ \ && ldconfig +# Postgres client (libpqxx) +COPY src/assets/libpqxx-7.10.5.tar.gz /tmp +RUN cd /tmp \ + && tar xzvf libpqxx-7.10.5.tar.gz \ + && cd /tmp/libpqxx-7.10.5 \ + && cmake -S . -B build -DCMAKE_BUILD_TYPE=RelWithDebInfo \ + && cmake --build build -j \ + && cmake --install build \ + && ldconfig + # cpp-httplib RUN cd /tmp \ && unzip -q cpp-httplib-master.zip \ diff --git a/src/tests/assets/postgres_schema.sql b/src/tests/assets/postgres_schema.sql new file mode 100644 index 000000000..7a9470401 --- /dev/null +++ b/src/tests/assets/postgres_schema.sql @@ -0,0 +1,197 @@ +-- ============================================================================= +-- Test Database Schema for PostgresWrapper Tests +-- ============================================================================= +-- +-- This creates 3 tables: +-- 1. organism - Referenced by feature (FK target) +-- 2. cvterm - Referenced by feature (FK target) - "controlled vocabulary term" +-- 3. feature - Main table with FKs to organism and cvterm +-- +-- To use: +-- 1. Create database: createdb postgres_wrapper_test +-- 2. Load schema: psql -d postgres_wrapper_test -f test_schema.sql +-- +-- Or with Docker: +-- docker run -d --name pg_test -e POSTGRES_PASSWORD=test -e POSTGRES_DB=postgres_wrapper_test -p 5433:5432 postgres:15 +-- docker exec -i pg_test psql -U postgres -d postgres_wrapper_test < test_schema.sql +-- ============================================================================= + +-- Drop tables if they exist (for re-running) +DROP TABLE IF EXISTS public.feature CASCADE; +DROP TABLE IF EXISTS public.organism CASCADE; +DROP TABLE IF EXISTS public.cvterm CASCADE; + +-- ============================================================================= +-- Table 1: organism +-- ============================================================================= +CREATE TABLE public.organism ( + organism_id SERIAL PRIMARY KEY, + genus VARCHAR(255) NOT NULL, + species VARCHAR(255) NOT NULL, + common_name VARCHAR(255), + abbreviation VARCHAR(255), + comment TEXT, + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP +); + +COMMENT ON TABLE public.organism IS 'Stores organism/species information'; + +-- ============================================================================= +-- Table 2: cvterm (Controlled Vocabulary Term) +-- ============================================================================= +CREATE TABLE public.cvterm ( + cvterm_id SERIAL PRIMARY KEY, + name VARCHAR(255) NOT NULL, + definition TEXT, + is_obsolete BOOLEAN DEFAULT FALSE, + is_relationshiptype BOOLEAN DEFAULT FALSE, + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP +); + +COMMENT ON TABLE public.cvterm IS 'Stores controlled vocabulary terms (types)'; + +-- ============================================================================= +-- Table 3: feature (Main table with Foreign Keys) +-- ============================================================================= +CREATE TABLE public.feature ( + feature_id SERIAL PRIMARY KEY, + organism_id INTEGER NOT NULL REFERENCES public.organism(organism_id), + type_id INTEGER NOT NULL REFERENCES public.cvterm(cvterm_id), + name VARCHAR(255), + uniquename VARCHAR(255) NOT NULL, + residues TEXT, + seqlen INTEGER, + md5checksum VARCHAR(32), + is_analysis BOOLEAN DEFAULT FALSE, + is_obsolete BOOLEAN DEFAULT FALSE, + timeaccessioned TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + timelastmodified TIMESTAMP DEFAULT CURRENT_TIMESTAMP +); + +COMMENT ON TABLE public.feature IS 'Main feature table with FKs to organism and cvterm'; + +-- Create indexes for foreign keys +CREATE INDEX idx_feature_organism_id ON public.feature(organism_id); +CREATE INDEX idx_feature_type_id ON public.feature(type_id); +CREATE INDEX idx_feature_uniquename ON public.feature(uniquename); + +-- ============================================================================= +-- Insert Test Data: Organisms +-- ============================================================================= +INSERT INTO public.organism (organism_id, genus, species, common_name, abbreviation, comment) VALUES + (1, 'Drosophila', 'melanogaster', 'fruit fly', 'Dmel', 'Model organism for genetics'), + (2, 'Homo', 'sapiens', 'human', 'Hsap', 'Human species'), + (3, 'Mus', 'musculus', 'mouse', 'Mmus', 'Laboratory mouse'), + (4, 'Saccharomyces', 'cerevisiae', 'yeast', 'Scer', 'Baker''s yeast'), + (5, 'Caenorhabditis', 'elegans', 'roundworm', 'Cele', 'Model organism for development'); + +-- ============================================================================= +-- Insert Test Data: CV Terms (Feature Types) +-- ============================================================================= +INSERT INTO public.cvterm (cvterm_id, name, definition, is_obsolete, is_relationshiptype) VALUES + (1, 'gene', 'A region of biological sequence that encodes a gene', FALSE, FALSE), + (2, 'mRNA', 'Messenger RNA', FALSE, FALSE), + (3, 'exon', 'A region of the transcript that is included in the mature RNA', FALSE, FALSE), + (4, 'intron', 'A region of the transcript that is not included in the mature RNA', FALSE, FALSE), + (5, 'protein', 'A sequence of amino acids', FALSE, FALSE), + (6, 'chromosome', 'A structure that contains genetic material', FALSE, FALSE), + (7, 'promoter', 'A regulatory region', FALSE, FALSE), + (8, 'obsolete_term', 'This term is obsolete', TRUE, FALSE), + (9, 'part_of', 'Relationship type: part of', FALSE, TRUE), + (10, 'derives_from', 'Relationship type: derives from', FALSE, TRUE); + +-- ============================================================================= +-- Insert Test Data: Features +-- ============================================================================= + +-- Drosophila melanogaster features (organism_id = 1) +INSERT INTO public.feature (feature_id, organism_id, type_id, name, uniquename, residues, seqlen, is_analysis, is_obsolete) VALUES + (1, 1, 1, 'white', 'FBgn0003996', 'ATGCGATCGATCG', 13, FALSE, FALSE), + (2, 1, 1, 'yellow', 'FBgn0004034', 'GCTAGCTAGCTAG', 13, FALSE, FALSE), + (3, 1, 1, 'vermilion', 'FBgn0003979', NULL, NULL, FALSE, FALSE), + (4, 1, 2, 'white-RA', 'FBtr0070001', 'ATGCGATCG', 9, FALSE, FALSE), + (5, 1, 2, 'white-RB', 'FBtr0070002', 'ATGCGA', 6, FALSE, FALSE), + (6, 1, 3, 'white:1', 'FBexon0001', 'ATG', 3, FALSE, FALSE), + (7, 1, 3, 'white:2', 'FBexon0002', 'CGA', 3, FALSE, FALSE), + (8, 1, 5, 'white-PA', 'FBpp0070001', 'MVLSPADKTNVKAAWGKVGAHAGEYGAEALERMFLSFPTTKTYFPHFDLSH', 51, FALSE, FALSE), + (9, 1, 6, 'chr2L', 'FBchr0000001', NULL, 23513712, FALSE, FALSE), + (10, 1, 1, 'obsolete_gene', 'FBgn9999999', NULL, NULL, FALSE, TRUE); + +-- Human features (organism_id = 2) +INSERT INTO public.feature (feature_id, organism_id, type_id, name, uniquename, residues, seqlen, is_analysis, is_obsolete) VALUES + (11, 2, 1, 'BRCA1', 'ENSG00000012048', 'ATGCGATCGATCGATCG', 17, FALSE, FALSE), + (12, 2, 1, 'TP53', 'ENSG00000141510', 'GCTAGCTAGCTAGCTAG', 17, FALSE, FALSE), + (13, 2, 2, 'BRCA1-201', 'ENST00000357654', 'ATGCGATCG', 9, FALSE, FALSE), + (14, 2, 5, 'BRCA1-P01', 'ENSP00000350283', 'MDLSALRVEEVQNVINAMQKILECPI', 26, FALSE, FALSE); + +-- Mouse features (organism_id = 3) +INSERT INTO public.feature (feature_id, organism_id, type_id, name, uniquename, residues, seqlen, is_analysis, is_obsolete) VALUES + (15, 3, 1, 'Brca1', 'MGI:104537', 'ATGCGATCG', 9, FALSE, FALSE), + (16, 3, 1, 'Trp53', 'MGI:98834', 'GCTAGCTAG', 9, FALSE, FALSE); + +-- Yeast features (organism_id = 4) +INSERT INTO public.feature (feature_id, organism_id, type_id, name, uniquename, residues, seqlen, is_analysis, is_obsolete) VALUES + (17, 4, 1, 'GAL4', 'YPL248C', 'ATGCGA', 6, FALSE, FALSE), + (18, 4, 1, 'ACT1', 'YFL039C', 'GCTAGC', 6, FALSE, FALSE); + +-- C. elegans features (organism_id = 5) +INSERT INTO public.feature (feature_id, organism_id, type_id, name, uniquename, residues, seqlen, is_analysis, is_obsolete) VALUES + (19, 5, 1, 'unc-54', 'WBGene00006789', 'ATGCGATCGATCG', 13, FALSE, FALSE), + (20, 5, 1, 'dpy-10', 'WBGene00001072', 'GCTAGCTAGCTAG', 13, FALSE, FALSE); + +-- ============================================================================= +-- Special Test Cases +-- ============================================================================= + +-- Feature with NULL name (for testing NULL handling) +INSERT INTO public.feature (feature_id, organism_id, type_id, name, uniquename) VALUES + (21, 1, 1, NULL, 'FBgn_null_name'); + +-- Feature with empty string name (for testing empty string handling) +INSERT INTO public.feature (feature_id, organism_id, type_id, name, uniquename) VALUES + (22, 1, 1, '', 'FBgn_empty_name'); + +-- Feature with very long residues (> MAX_VALUE_SIZE = 1000) +INSERT INTO public.feature (feature_id, organism_id, type_id, name, uniquename, residues, seqlen) VALUES + (23, 1, 1, 'long_residues_gene', 'FBgn_long', REPEAT('ATGC', 300), 1200); + +-- Feature with special characters in name +INSERT INTO public.feature (feature_id, organism_id, type_id, name, uniquename) VALUES + (24, 1, 1, 'gene''s "special" ', 'FBgn_special_chars'); + +-- Feature with unicode in name +INSERT INTO public.feature (feature_id, organism_id, type_id, name, uniquename) VALUES + (25, 1, 1, 'gène_français_日本語', 'FBgn_unicode'); + +-- Feature with analysis flag +INSERT INTO public.feature (feature_id, organism_id, type_id, name, uniquename, is_analysis) VALUES + (26, 1, 1, 'computed_gene', 'FBgn_analysis', TRUE); + +-- ============================================================================= +-- Reset sequences to continue after manual IDs +-- ============================================================================= +SELECT setval('public.organism_organism_id_seq', (SELECT MAX(organism_id) FROM public.organism)); +SELECT setval('public.cvterm_cvterm_id_seq', (SELECT MAX(cvterm_id) FROM public.cvterm)); +SELECT setval('public.feature_feature_id_seq', (SELECT MAX(feature_id) FROM public.feature)); + +-- ============================================================================= +-- Verify data loaded correctly +-- ============================================================================= +DO $$ +DECLARE + org_count INTEGER; + cvterm_count INTEGER; + feature_count INTEGER; +BEGIN + SELECT COUNT(*) INTO org_count FROM public.organism; + SELECT COUNT(*) INTO cvterm_count FROM public.cvterm; + SELECT COUNT(*) INTO feature_count FROM public.feature; + + RAISE NOTICE '==========================================='; + RAISE NOTICE 'Test database loaded successfully!'; + RAISE NOTICE '==========================================='; + RAISE NOTICE 'Organisms: % rows', org_count; + RAISE NOTICE 'CV Terms: % rows', cvterm_count; + RAISE NOTICE 'Features: % rows', feature_count; + RAISE NOTICE '==========================================='; +END $$; \ No newline at end of file diff --git a/src/tests/cpp/BUILD b/src/tests/cpp/BUILD index 1d27f686e..be2db5d53 100644 --- a/src/tests/cpp/BUILD +++ b/src/tests/cpp/BUILD @@ -787,3 +787,27 @@ cc_test( "@com_github_google_googletest//:gtest_main", ], ) + +cc_test( + name = "db_adapter_test", + size = "small", + srcs = [ + "db_adapter_test.cc", + ], + copts = [ + "-Iexternal/gtest/googletest/include", + "-Iexternal/gtest/googletest", + ], + linkopts = [ + "-L/usr/local/lib", + "-lpqxx", + "-lpq", + ], + linkstatic = 1, + deps = [ + "//db_adapter:db_adapter_lib", + "//tests/cpp/test_commons", + "@com_github_google_googletest//:gtest_main", + "@mbedtls", + ], +) diff --git a/src/tests/cpp/db_adapter_test.cc b/src/tests/cpp/db_adapter_test.cc new file mode 100644 index 000000000..7b3fb96ab --- /dev/null +++ b/src/tests/cpp/db_adapter_test.cc @@ -0,0 +1,618 @@ +#include + +#include +#include +#include +#include +#include + +#include "Atom.h" +#include "Logger.h" +#include "Node.h" +#include "PostgresWrapper.h" +#include "TestConfig.h" + +using namespace std; +using namespace db_adapter; +using namespace atoms; + +class PostgresWrapperTestEnvironment : public ::testing::Environment { + public: + void SetUp() override { TestConfig::load_environment(); } + + void TearDown() override {} +}; + +class PostgresWrapperTest : public ::testing::Test { + protected: + string TEST_HOST = "localhost"; + int TEST_PORT = 5433; + string TEST_DB = "postgres_wrapper_test"; + string TEST_USER = "postgres"; + string TEST_PASSWORD = "test"; + + string INVALID_HOST = "invalid.host"; + int INVALID_PORT = 99999; + string INVALID_DB = "database_xyz"; + + string FEATURE_TABLE = "public.feature"; + string ORGANISM_TABLE = "public.organism"; + string CVTERM_TABLE = "public.cvterm"; + string FEATURE_PK = "feature_id"; + string ORGANISM_PK = "organism_id"; + string CVTERM_PK = "cvterm_id"; + + int DROSOPHILA_ORGANISM_ID = 1; + int HUMAN_ORGANISM_ID = 2; + + int WHITE_GENE_ID = 1; + string WHITE_GENE_NAME = "white"; + string WHITE_GENE_UNIQUENAME = "FBgn0003996"; + + int TOTAL_ROWS_ORGANISMS = 5; + int TOTAL_ROWS_CVTERMS = 10; + int TOTAL_ROWS_FEATURES = 26; + + void SetUp() override {} + + void TearDown() override {} + + shared_ptr create_wrapper(MAPPER_TYPE mapper_type = MAPPER_TYPE::SQL2ATOMS) { + return make_shared( + TEST_HOST, TEST_PORT, TEST_DB, TEST_USER, TEST_PASSWORD, mapper_type); + } +}; + +TEST_F(PostgresWrapperTest, Connection) { + auto wrapper = create_wrapper(); + + auto result = wrapper->execute_query("SELECT 1"); + + EXPECT_FALSE(result.empty()); + EXPECT_EQ(result[0][0].as(), 1); + + EXPECT_THROW({ PostgresWrapper("invalid.host", TEST_PORT, TEST_DB, TEST_USER, TEST_PASSWORD); }, + std::runtime_error); + EXPECT_THROW({ PostgresWrapper(TEST_HOST, 99999, TEST_DB, TEST_USER, TEST_PASSWORD); }, + std::runtime_error); + EXPECT_THROW({ PostgresWrapper(TEST_HOST, TEST_PORT, "non_existent_db", TEST_USER, TEST_PASSWORD); }, + std::runtime_error); + + wrapper->disconnect(); + + EXPECT_THROW(wrapper->execute_query("SELECT 1"), std::runtime_error); +} + +TEST_F(PostgresWrapperTest, GetTable) { + auto wrapper = create_wrapper(); + auto tables = wrapper->list_tables(); + ASSERT_FALSE(tables.empty()); + + string target_name = tables[0].name; + Table t = wrapper->get_table(target_name); + + EXPECT_EQ(t.name, target_name); + EXPECT_EQ(t.primary_key, tables[0].primary_key); + + Table feature = wrapper->get_table("public.feature"); + + EXPECT_EQ(feature.name, "public.feature"); + EXPECT_EQ(feature.primary_key, "feature_id"); + + EXPECT_THROW(wrapper->get_table("fake_table_name"), std::runtime_error); +} + +TEST_F(PostgresWrapperTest, ListTables) { + auto wrapper = create_wrapper(); + + auto tables = wrapper->list_tables(); + + EXPECT_GE(tables.size(), 3); + + bool found_organism = false; + bool found_cvterm = false; + bool found_feature = false; + + for (const auto& table : tables) { + if (table.name == ORGANISM_TABLE) found_organism = true; + if (table.name == CVTERM_TABLE) found_cvterm = true; + if (table.name == FEATURE_TABLE) found_feature = true; + } + + EXPECT_TRUE(found_organism); + EXPECT_TRUE(found_cvterm); + EXPECT_TRUE(found_feature); + + vector
tables_cached = wrapper->list_tables(); + + ASSERT_EQ(tables_cached.size(), tables_cached.size()); + if (!tables_cached.empty()) { + EXPECT_EQ(tables_cached[0].name, tables_cached[0].name); + } +} + +TEST_F(PostgresWrapperTest, TablesStructure) { + auto wrapper = create_wrapper(); + + Table organism_table = wrapper->get_table(ORGANISM_TABLE); + + EXPECT_EQ(organism_table.name, ORGANISM_TABLE); + EXPECT_EQ(organism_table.primary_key, ORGANISM_PK); + EXPECT_TRUE(organism_table.foreign_keys.empty()); + + vector expected_cols = { + "organism_id", "genus", "species", "common_name", "abbreviation", "comment", "created_at"}; + for (const auto& expected : expected_cols) { + bool found = + find(organism_table.column_names.begin(), organism_table.column_names.end(), expected) != + organism_table.column_names.end(); + EXPECT_TRUE(found); + } + + Table cvterm_table = wrapper->get_table(CVTERM_TABLE); + + EXPECT_EQ(cvterm_table.name, CVTERM_TABLE); + EXPECT_EQ(cvterm_table.primary_key, CVTERM_PK); + EXPECT_TRUE(cvterm_table.foreign_keys.empty()); + + Table feature_table = wrapper->get_table(FEATURE_TABLE); + + EXPECT_EQ(feature_table.name, FEATURE_TABLE); + EXPECT_EQ(feature_table.primary_key, FEATURE_PK); + EXPECT_EQ(feature_table.foreign_keys.size(), 2); + + bool has_organism_fk = false; + bool has_type_fk = false; + for (const auto& fk : feature_table.foreign_keys) { + if (fk.find("organism_id") != string::npos && fk.find("public.organism") != string::npos) { + has_organism_fk = true; + } + if (fk.find("type_id") != string::npos && fk.find("public.cvterm") != string::npos) { + has_type_fk = true; + } + } + EXPECT_TRUE(has_organism_fk); + EXPECT_TRUE(has_type_fk); +} + +TEST_F(PostgresWrapperTest, CheckData) { + auto wrapper = create_wrapper(); + + auto result = wrapper->execute_query( + "SELECT organism_id, genus, species, common_name FROM organism WHERE organism_id = 1"); + + ASSERT_EQ(result.size(), 1); + EXPECT_EQ(result[0]["organism_id"].as(), 1); + EXPECT_EQ(result[0]["genus"].as(), "Drosophila"); + EXPECT_EQ(result[0]["species"].as(), "melanogaster"); + EXPECT_EQ(result[0]["common_name"].as(), "fruit fly"); + + auto result2 = + wrapper->execute_query("SELECT feature_id, name, uniquename FROM feature WHERE feature_id = " + + to_string(WHITE_GENE_ID)); + + ASSERT_EQ(result2.size(), 1); + EXPECT_EQ(result2[0]["feature_id"].as(), WHITE_GENE_ID); + EXPECT_EQ(result2[0]["name"].as(), WHITE_GENE_NAME); + EXPECT_EQ(result2[0]["uniquename"].as(), WHITE_GENE_UNIQUENAME); + + auto result3 = wrapper->execute_query("SELECT COUNT(*) FROM organism"); + + EXPECT_EQ(result3[0][0].as(), TOTAL_ROWS_ORGANISMS); + + auto result4 = wrapper->execute_query("SELECT COUNT(*) FROM cvterm"); + + EXPECT_EQ(result4[0][0].as(), TOTAL_ROWS_CVTERMS); + + auto result5 = wrapper->execute_query("SELECT COUNT(*) FROM feature"); + + EXPECT_EQ(result5[0][0].as(), TOTAL_ROWS_FEATURES); +} + +// map_table - SQL2ATOMS +TEST_F(PostgresWrapperTest, MapTablesFirstRowAtoms) { + auto wrapper = create_wrapper(); + + Table organism_table = wrapper->get_table(ORGANISM_TABLE); + EXPECT_NO_THROW({ wrapper->map_table(organism_table, {"organism_id = 1"}, {}, false); }); + EXPECT_EQ(wrapper->mapper_handle_trie_size(), 34); + + Table feature_table = wrapper->get_table(FEATURE_TABLE); + EXPECT_NO_THROW({ wrapper->map_table(feature_table, {"feature_id = 1"}, {}, false); }); + EXPECT_EQ(wrapper->mapper_handle_trie_size(), 81); + + Table cvterm_table = wrapper->get_table(CVTERM_TABLE); + EXPECT_NO_THROW({ wrapper->map_table(cvterm_table, {"cvterm_id = 1"}, {}, false); }); + EXPECT_EQ(wrapper->mapper_handle_trie_size(), 101); +} + +TEST_F(PostgresWrapperTest, MapTableWithClausesAndSkipColumnsAtoms) { + auto wrapper = create_wrapper(); + + Table table = wrapper->get_table(FEATURE_TABLE); + vector clauses = {"organism_id = " + to_string(DROSOPHILA_ORGANISM_ID), "feature_id <= 5"}; + vector skip_columns = {"residues", "md5checksum", "seqlen"}; + + EXPECT_NO_THROW({ wrapper->map_table(table, clauses, skip_columns, false); }); + EXPECT_EQ(wrapper->mapper_handle_trie_size(), 114); +} + +TEST_F(PostgresWrapperTest, MapTableZeroRowsAtoms) { + auto wrapper = create_wrapper(); + + Table table = wrapper->get_table(FEATURE_TABLE); + vector clauses = {"feature_id = -999"}; + + EXPECT_NO_THROW({ wrapper->map_table(table, clauses, {}, false); }); + EXPECT_EQ(wrapper->mapper_handle_trie_size(), 0); +} + +TEST_F(PostgresWrapperTest, MapTableWithNonExistentSkipColumnAtoms) { + auto wrapper = create_wrapper(); + + Table table = wrapper->get_table(FEATURE_TABLE); + + vector clauses = {"feature_id < 10"}; + vector skip_columns = {"column_xyz"}; + + EXPECT_THROW({ wrapper->map_table(table, clauses, skip_columns, false); }, std::runtime_error); + EXPECT_EQ(wrapper->mapper_handle_trie_size(), 0); +} + +TEST_F(PostgresWrapperTest, MapTableWithInvalidClauseAtoms) { + auto wrapper = create_wrapper(); + + Table table = wrapper->get_table(FEATURE_TABLE); + + vector clauses = {"INVALID CLAUSE SYNTAX !!!"}; + + EXPECT_THROW({ wrapper->map_table(table, clauses, {}, false); }, std::runtime_error); + EXPECT_EQ(wrapper->mapper_handle_trie_size(), 0); +} + +// map_table - SQL2METTA +TEST_F(PostgresWrapperTest, MapTablesFirstRowMetta) { + auto wrapper = create_wrapper(MAPPER_TYPE::SQL2METTA); + + Table organism_table = wrapper->get_table(ORGANISM_TABLE); + EXPECT_NO_THROW({ wrapper->map_table(organism_table, {"organism_id = 1"}, {}, false); }); + EXPECT_EQ(wrapper->mapper_handle_trie_size(), 19); + + Table feature_table = wrapper->get_table(FEATURE_TABLE); + EXPECT_NO_THROW({ wrapper->map_table(feature_table, {"feature_id = 1"}, {}, false); }); + EXPECT_EQ(wrapper->mapper_handle_trie_size(), 51); + + Table cvterm_table = wrapper->get_table(CVTERM_TABLE); + EXPECT_NO_THROW({ wrapper->map_table(cvterm_table, {"cvterm_id = 1"}, {}, false); }); + EXPECT_EQ(wrapper->mapper_handle_trie_size(), 65); +} + +TEST_F(PostgresWrapperTest, MapTableWithClausesAndSkipColumnsMetta) { + auto wrapper = create_wrapper(MAPPER_TYPE::SQL2METTA); + + Table table = wrapper->get_table(FEATURE_TABLE); + vector clauses = {"organism_id = " + to_string(DROSOPHILA_ORGANISM_ID), "feature_id <= 5"}; + vector skip_columns = {"residues", "md5checksum", "seqlen"}; + + EXPECT_NO_THROW({ wrapper->map_table(table, clauses, skip_columns, false); }); + EXPECT_EQ(wrapper->mapper_handle_trie_size(), 86); +} + +TEST_F(PostgresWrapperTest, MapTableZeroRowsMetta) { + auto wrapper = create_wrapper(MAPPER_TYPE::SQL2METTA); + + Table table = wrapper->get_table(FEATURE_TABLE); + vector clauses = {"feature_id = -999"}; + + EXPECT_NO_THROW({ wrapper->map_table(table, clauses, {}, false); }); + EXPECT_EQ(wrapper->mapper_handle_trie_size(), 0); +} + +TEST_F(PostgresWrapperTest, MapTableWithNonExistentSkipColumnMetta) { + auto wrapper = create_wrapper(MAPPER_TYPE::SQL2METTA); + + Table table = wrapper->get_table(FEATURE_TABLE); + + vector clauses = {"feature_id < 10"}; + vector skip_columns = {"column_xyz"}; + + EXPECT_THROW({ wrapper->map_table(table, clauses, skip_columns, false); }, std::runtime_error); + EXPECT_EQ(wrapper->mapper_handle_trie_size(), 0); +} + +TEST_F(PostgresWrapperTest, MapTableWithInvalidClauseMetta) { + auto wrapper = create_wrapper(MAPPER_TYPE::SQL2METTA); + + Table table = wrapper->get_table(FEATURE_TABLE); + + vector clauses = {"INVALID CLAUSE SYNTAX !!!"}; + + EXPECT_THROW({ wrapper->map_table(table, clauses, {}, false); }, std::runtime_error); + EXPECT_EQ(wrapper->mapper_handle_trie_size(), 0); +} + +// map_sql_query - SQL2ATOMS +TEST_F(PostgresWrapperTest, MapSqlQueryFirstRowAtoms) { + auto wrapper = create_wrapper(); + + string query_organism = R"( + SELECT + o.organism_id AS public_organism__organism_id, + o.genus AS public_organism__genus, + o.species AS public_organism__species, + o.common_name AS public_organism__common_name, + o.abbreviation AS public_organism__abbreviation, + o.comment AS public_organism__comment + FROM organism AS o + WHERE o.organism_id = 1 + )"; + + EXPECT_NO_THROW({ wrapper->map_sql_query("test_organism", query_organism); }); + EXPECT_EQ(wrapper->mapper_handle_trie_size(), 34); + + string query_feature = R"( + SELECT + f.feature_id AS public_feature__feature_id, + f.organism_id AS public_feature__organism_id, + f.type_id AS public_feature__type_id, + f.name AS public_feature__name, + f.uniquename AS public_feature__uniquename, + f.residues AS public_feature__residues, + f.md5checksum AS public_feature__md5checksum, + f.seqlen AS public_feature__seqlen, + f.is_analysis AS public_feature__is_analysis, + f.is_obsolete AS public_feature__is_obsolete + FROM feature AS f + WHERE f.feature_id = 1 + )"; + + EXPECT_NO_THROW({ wrapper->map_sql_query("test_feature", query_feature); }); + EXPECT_EQ(wrapper->mapper_handle_trie_size(), 81); + + string query_cvterm = R"( + SELECT + c.cvterm_id AS public_cvterm__cvterm_id, + c.name AS public_cvterm__name, + c.definition AS public_cvterm__definition, + c.is_obsolete AS public_cvterm__is_obsolete, + c.is_relationshiptype AS public_cvterm__is_relationshiptype + FROM cvterm AS c + WHERE c.cvterm_id = 1 + )"; + + EXPECT_NO_THROW({ wrapper->map_sql_query("test_cvterm", query_cvterm); }); + EXPECT_EQ(wrapper->mapper_handle_trie_size(), 101); +} + +TEST_F(PostgresWrapperTest, MapSqlQueryWithClausesAndSkipColumnsAtoms) { + auto wrapper = create_wrapper(); + + string query = R"( + SELECT + f.feature_id AS public_feature__feature_id, + f.organism_id AS public_feature__organism_id, + f.type_id AS public_feature__type_id, + f.name AS public_feature__name, + f.uniquename AS public_feature__uniquename, + f.is_analysis AS public_feature__is_analysis, + f.is_obsolete AS public_feature__is_obsolete + FROM feature AS f + WHERE f.organism_id = )" + + to_string(DROSOPHILA_ORGANISM_ID) + R"( AND f.feature_id <= 5)"; + + EXPECT_NO_THROW({ wrapper->map_sql_query("test_feature_clause_and_skip", query); }); + EXPECT_EQ(wrapper->mapper_handle_trie_size(), 114); +} + +TEST_F(PostgresWrapperTest, MapSqlQueryZeroRowsAtoms) { + auto wrapper = create_wrapper(); + + string query = R"( + SELECT + f.feature_id AS public_feature__feature_id, + f.organism_id AS public_feature__organism_id, + f.type_id AS public_feature__type_id, + f.name AS public_feature__name, + f.uniquename AS public_feature__uniquename, + f.residues AS public_feature__residues, + f.md5checksum AS public_feature__md5checksum, + f.seqlen AS public_feature__seqlen, + f.is_analysis AS public_feature__is_analysis, + f.is_obsolete AS public_feature__is_obsolete + FROM feature AS f + WHERE f.feature_id = -999 + )"; + + EXPECT_NO_THROW({ wrapper->map_sql_query("test_feature_zero_rows", query); }); + EXPECT_EQ(wrapper->mapper_handle_trie_size(), 0); +} + +TEST_F(PostgresWrapperTest, MapSqlQueryWithNonExistentSkipColumnAtoms) { + auto wrapper = create_wrapper(); + + string query = R"( + SELECT + f.feature_id AS public_feature__feature_id, + f.organism_id AS public_feature__organism_id, + f.type_id AS public_feature__type_id, + f.name AS public_feature__name, + f.uniquename AS public_feature__uniquename, + f.column_xyz AS public_feature__column_xyz + FROM feature AS f + WHERE f.feature_id < 10 + )"; + + EXPECT_THROW({ wrapper->map_sql_query("test_feature_with_non_existent_column", query); }, + std::runtime_error); + EXPECT_EQ(wrapper->mapper_handle_trie_size(), 0); +} + +TEST_F(PostgresWrapperTest, MapSqlQueryWithInvalidClauseAtoms) { + auto wrapper = create_wrapper(); + + string query = R"( + SELECT + f.feature_id AS public_feature__feature_id, + f.organism_id AS public_feature__organism_id, + f.type_id AS public_feature__type_id, + f.name AS public_feature__name, + f.uniquename AS public_feature__uniquename, + f.residues AS public_feature__residues, + f.md5checksum AS public_feature__md5checksum, + f.seqlen AS public_feature__seqlen, + f.is_analysis AS public_feature__is_analysis, + f.is_obsolete AS public_feature__is_obsolete + FROM feature AS f + WHERE INVALID CLAUSE SYNTAX !!! + )"; + + EXPECT_THROW({ wrapper->map_sql_query("test_feature_with_invalid_clause", query); }, + std::runtime_error); + EXPECT_EQ(wrapper->mapper_handle_trie_size(), 0); +} + +// map_sql_query - SQL2METTA +TEST_F(PostgresWrapperTest, MapSqlQueryFirstRowMetta) { + auto wrapper = create_wrapper(MAPPER_TYPE::SQL2METTA); + + string query_organism = R"( + SELECT + o.organism_id AS public_organism__organism_id, + o.genus AS public_organism__genus, + o.species AS public_organism__species, + o.common_name AS public_organism__common_name, + o.abbreviation AS public_organism__abbreviation, + o.comment AS public_organism__comment + FROM organism AS o + WHERE o.organism_id = 1 + )"; + + EXPECT_NO_THROW({ wrapper->map_sql_query("test_organism", query_organism); }); + EXPECT_EQ(wrapper->mapper_handle_trie_size(), 19); + + string query_feature = R"( + SELECT + f.feature_id AS public_feature__feature_id, + f.organism_id AS public_feature__organism_id, + f.type_id AS public_feature__type_id, + f.name AS public_feature__name, + f.uniquename AS public_feature__uniquename, + f.residues AS public_feature__residues, + f.md5checksum AS public_feature__md5checksum, + f.seqlen AS public_feature__seqlen, + f.is_analysis AS public_feature__is_analysis, + f.is_obsolete AS public_feature__is_obsolete + FROM feature AS f + WHERE f.feature_id = 1 + )"; + + EXPECT_NO_THROW({ wrapper->map_sql_query("test_feature", query_feature); }); + EXPECT_EQ(wrapper->mapper_handle_trie_size(), 51); + + string query_cvterm = R"( + SELECT + c.cvterm_id AS public_cvterm__cvterm_id, + c.name AS public_cvterm__name, + c.definition AS public_cvterm__definition, + c.is_obsolete AS public_cvterm__is_obsolete, + c.is_relationshiptype AS public_cvterm__is_relationshiptype + FROM cvterm AS c + WHERE c.cvterm_id = 1 + )"; + + EXPECT_NO_THROW({ wrapper->map_sql_query("test_cvterm", query_cvterm); }); + EXPECT_EQ(wrapper->mapper_handle_trie_size(), 65); +} + +TEST_F(PostgresWrapperTest, MapSqlQueryWithClausesAndSkipColumnsMetta) { + auto wrapper = create_wrapper(MAPPER_TYPE::SQL2METTA); + + string query = R"( + SELECT + f.feature_id AS public_feature__feature_id, + f.organism_id AS public_feature__organism_id, + f.type_id AS public_feature__type_id, + f.name AS public_feature__name, + f.uniquename AS public_feature__uniquename, + f.is_analysis AS public_feature__is_analysis, + f.is_obsolete AS public_feature__is_obsolete + FROM feature AS f + WHERE f.organism_id = )" + + to_string(DROSOPHILA_ORGANISM_ID) + R"( AND f.feature_id <= 5)"; + + EXPECT_NO_THROW({ wrapper->map_sql_query("test_feature_clause_and_skip", query); }); + EXPECT_EQ(wrapper->mapper_handle_trie_size(), 86); +} + +TEST_F(PostgresWrapperTest, MapSqlQueryZeroRowsMetta) { + auto wrapper = create_wrapper(MAPPER_TYPE::SQL2METTA); + + string query = R"( + SELECT + f.feature_id AS public_feature__feature_id, + f.organism_id AS public_feature__organism_id, + f.type_id AS public_feature__type_id, + f.name AS public_feature__name, + f.uniquename AS public_feature__uniquename, + f.residues AS public_feature__residues, + f.md5checksum AS public_feature__md5checksum, + f.seqlen AS public_feature__seqlen, + f.is_analysis AS public_feature__is_analysis, + f.is_obsolete AS public_feature__is_obsolete + FROM feature AS f + WHERE f.feature_id = -999 + )"; + + EXPECT_NO_THROW({ wrapper->map_sql_query("test_feature_zero_rows", query); }); + EXPECT_EQ(wrapper->mapper_handle_trie_size(), 0); +} + +TEST_F(PostgresWrapperTest, MapSqlQueryWithNonExistentSkipColumnMetta) { + auto wrapper = create_wrapper(MAPPER_TYPE::SQL2METTA); + + string query = R"( + SELECT + f.feature_id AS public_feature__feature_id, + f.organism_id AS public_feature__organism_id, + f.type_id AS public_feature__type_id, + f.name AS public_feature__name, + f.uniquename AS public_feature__uniquename, + f.column_xyz AS public_feature__column_xyz + FROM feature AS f + WHERE f.feature_id < 10 + )"; + + EXPECT_THROW({ wrapper->map_sql_query("test_feature_with_non_existent_column", query); }, + std::runtime_error); + EXPECT_EQ(wrapper->mapper_handle_trie_size(), 0); +} + +TEST_F(PostgresWrapperTest, MapSqlQueryWithInvalidClauseMetta) { + auto wrapper = create_wrapper(MAPPER_TYPE::SQL2METTA); + + string query = R"( + SELECT + f.feature_id AS public_feature__feature_id, + f.organism_id AS public_feature__organism_id, + f.type_id AS public_feature__type_id, + f.name AS public_feature__name, + f.uniquename AS public_feature__uniquename, + f.residues AS public_feature__residues, + f.md5checksum AS public_feature__md5checksum, + f.seqlen AS public_feature__seqlen, + f.is_analysis AS public_feature__is_analysis, + f.is_obsolete AS public_feature__is_obsolete + FROM feature AS f + WHERE INVALID CLAUSE SYNTAX !!! + )"; + + EXPECT_THROW({ wrapper->map_sql_query("test_feature_with_invalid_clause", query); }, + std::runtime_error); + EXPECT_EQ(wrapper->mapper_handle_trie_size(), 0); +} + +int main(int argc, char** argv) { + ::testing::InitGoogleTest(&argc, argv); + ::testing::AddGlobalTestEnvironment(new PostgresWrapperTestEnvironment); + return RUN_ALL_TESTS(); +}