From 3bf7c05513d4b804578ed46e1604eed4f0f9db89 Mon Sep 17 00:00:00 2001 From: Paulo Bernardo Date: Sun, 19 Oct 2025 15:09:34 -0300 Subject: [PATCH 1/2] RESET THIS COMMIT, THIS BRANCH IS IN DRAFT --- README.md | 2 + docs/contributing.md | 118 ++++ docs/opendbtdocs/index.html | 539 +++++++++++++++++- mkdocs.yml | 3 +- opendbt/catalog/CLASSES.md | 114 ++++ opendbt/catalog/__init__.py | 105 +++- opendbt/catalog/bk.py | 390 +++++++++++++ opendbt/dbt/docs/index.html | 538 ++++++++++++++++- tests/resources/dbtcore/dbt_project.yml | 4 - tests/resources/dbtcore/profiles.yml | 14 +- tests/resources/dbtstarrocks/.gitignore | 4 + tests/resources/dbtstarrocks/.sqlfluff | 53 ++ tests/resources/dbtstarrocks/dbt_project.yml | 18 + .../dbtstarrocks/models/my_core_table1.sql | 8 + .../models/my_executepython_model.py | 22 + .../models/my_first_dbt_model.sql | 12 + .../models/my_second_dbt_model.sql | 7 + .../resources/dbtstarrocks/models/schema.yml | 49 ++ tests/resources/dbtstarrocks/profiles.yml | 13 + 19 files changed, 1948 insertions(+), 65 deletions(-) create mode 100644 docs/contributing.md create mode 100644 opendbt/catalog/CLASSES.md create mode 100644 opendbt/catalog/bk.py create mode 100644 tests/resources/dbtstarrocks/.gitignore create mode 100644 tests/resources/dbtstarrocks/.sqlfluff create mode 100644 tests/resources/dbtstarrocks/dbt_project.yml create mode 100644 tests/resources/dbtstarrocks/models/my_core_table1.sql create mode 100644 tests/resources/dbtstarrocks/models/my_executepython_model.py create mode 100644 tests/resources/dbtstarrocks/models/my_first_dbt_model.sql create mode 100644 tests/resources/dbtstarrocks/models/my_second_dbt_model.sql create mode 100644 tests/resources/dbtstarrocks/models/schema.yml create mode 100644 tests/resources/dbtstarrocks/profiles.yml diff --git a/README.md b/README.md index abd0160..ec316a6 100644 --- a/README.md +++ b/README.md @@ -59,6 +59,8 @@ pip install https://github.com/memiiso/opendbt/archive/refs/tags/0.4.0.zip --upg The project completely open-source, using the Apache 2.0 license. opendbt still is a young project and there are things to improve. Please feel free to test it, give feedback, open feature requests or send pull requests. +Check out the [Contributing Guide](docs/contributing.md) for a full walkthrough on setting up your environment, +running tests, and submitting changes. ### Contributors diff --git a/docs/contributing.md b/docs/contributing.md new file mode 100644 index 0000000..f24bc92 --- /dev/null +++ b/docs/contributing.md @@ -0,0 +1,118 @@ +# Contributing Guide + +Thanks for your interest in improving **OpenDBT**! This guide explains how to set up a development +environment, run the test-suite, and submit a change. If you bump into anything that is unclear, feel free +to open an issue or start a discussion – feedback helps us make this guide better. + +--- + +## Before You Start + +- Make sure you have **Python 3.8+** available. +- Familiarise yourself with the project goals by reading `README.md` and browsing the existing docs. +- Check the issue tracker to see whether someone else is already working on the problem you care about. + +--- + +## Environment Setup + +```bash +git clone https://github.com//opendbt.git +cd opendbt +python -m venv .venv +source .venv/bin/activate # Windows: .venv\Scripts\activate +pip install -e ".[test,dev]" +``` + +The editable install gives you the command line entry point (`opendbt`) and all dependencies used in the +test suite (`.[test]`) and documentation tooling (`.[dev]`). + +If you rely on optional integrations (for example, Airflow), install the relevant extras as well: + +```bash +pip install -e ".[airflow]" +``` + +--- + +## Development Workflow + +1. Create a topic branch off `main`: + ```bash + git checkout -b feat/ + ``` +2. Make your changes in small, logical commits. Keep commit messages focused on the “what” and “why”. +3. Add or update tests that cover the behaviour you changed. +4. Run the test-suite (see below) and ensure it passes locally before opening a pull request. +5. Rebase on top of the latest `main` before submitting so the history stays clean. + +--- + +## Running Tests + +The project uses `pytest` to run a suite of unit and integration tests that wrap dbt invocations. + +```bash +pytest +``` + +Useful options: + +- Run a single test file: `pytest tests/test_catalog.py` +- Run a single test case: `pytest tests/test_catalog.py::TestCatalog::test_export` +- Show `print` output during tests: `pytest -s` + +Some tests rely on Docker or external services. If you run into failures that look infrastructure related, +double-check the `tests/resources` configuration and ensure supporting services are available. + +Linting is provided via `pylint`: + +```bash +pylint opendbt +``` + +--- + +## Working With Documentation + +All documentation lives inside the `docs/` directory and is served with MkDocs. + +- Preview: `mkdocs serve` (the site will be available at http://127.0.0.1:8000/) +- Build static site: `mkdocs build` + +If you add a new Markdown page, remember to update `mkdocs.yml` so the page appears in the navigation, +and keep pages written in clear, concise English. + +--- + +## Manual Verification + +Depending on your change, you may want to run additional manual checks: + +- Validate `opendbt docs generate` on one of the example projects under `tests/resources`. +- For catalog-related updates, regenerate docs in `tests/resources/dbtcore` and open `target/index.html` + with a simple `python -m http.server`. +- When modifying packaging metadata, run `python -m build` to ensure wheels and source distributions + can be created successfully. + +--- + +## Pull Request Checklist + +Before you submit your PR: + +- [ ] Tests are green locally (`pytest`). +- [ ] New or updated behaviour is covered by tests. +- [ ] Documentation and examples reflect the change. +- [ ] `mkdocs serve` works if docs were touched. +- [ ] No unrelated formatting changes were introduced. +- [ ] Each commit tells a coherent story (squash if needed). + +Open the pull request against the `main` branch and provide context: + +1. What problem does the change solve? +2. How does it work? +3. Any trade-offs or follow-up work the reviewers should be aware of? + +Reviewers might request modifications – this is normal. Once everything looks good, the PR will be merged +and your work will become part of OpenDBT. Thank you for contributing! 🎉 diff --git a/docs/opendbtdocs/index.html b/docs/opendbtdocs/index.html index 184387b..b4d94f6 100644 --- a/docs/opendbtdocs/index.html +++ b/docs/opendbtdocs/index.html @@ -639,6 +639,13 @@

> Lineage + @@ -930,8 +937,95 @@

Statistics

Rendering lineage graph...

-
- Error rendering lineage graph. +
+ Error rendering lineage graph. +
+
+ + +
+
+
+

Assistente de IA para {{ selectedItem.name }}

+

Informe sua OPENAI_KEY (é salva apenas no seu navegador) e descreva o que deseja, por exemplo: "melhore a documentação atual" ou "crie unit tests".

+
+
+ + +

A chave fica guardada no localStorage do navegador. Use um usuário com permissões limitadas.

+
+
+
+ + +
+
+ +
+
+
+
+
+ + {{ message.role === 'assistant' ? 'Assistente' : 'Você' }} + + {{ new Date(message.timestamp).toLocaleString() }} +
+
+
+
+
+ {{ activeAiSession.error }} +
+
+ + +

Dicas: "Melhore a documentação atual", "Crie testes unitários para as macros", "Sugira validações adicionais para as colunas críticas".

+
+
+
+ A resposta leva em conta o contexto do modelo atual, como colunas e SQL compilado. +
+
@@ -1072,7 +1166,7 @@

Welcome to DBT Docs

- \ No newline at end of file + diff --git a/mkdocs.yml b/mkdocs.yml index 3ae1de1..dd06fa9 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -18,6 +18,7 @@ nav: - Python(Local) Model: python_integration.md - DLT Model: dlt_integration.md - Examples: examples.md + - Contributing: contributing.md - Data Catalog: catalog.md - Data Catalog (Demo): opendbtdocs/index.html @@ -37,4 +38,4 @@ markdown_extensions: - attr_list - pymdownx.emoji: emoji_index: !!python/name:material.extensions.emoji.twemoji - emoji_generator: !!python/name:material.extensions.emoji.to_svg \ No newline at end of file + emoji_generator: !!python/name:material.extensions.emoji.to_svg diff --git a/opendbt/catalog/CLASSES.md b/opendbt/catalog/CLASSES.md new file mode 100644 index 0000000..080864c --- /dev/null +++ b/opendbt/catalog/CLASSES.md @@ -0,0 +1,114 @@ +# Documentation for the `opendbt.opendbt.catalog` module + +This document summarizes the classes defined in `__init__.py`, explaining their responsibilities, main attributes, and how they relate within the column lineage catalog generation flow. + +## Overview of relationships + +``` +OpenDbtCatalog +└── OpenDbtNode* + ├── OpenDbtTableRef + └── OpenDbtColumn* + ├── transformations + └── depends_on → OpenDbtColumn (from other OpenDbtNode) +``` + +* `OpenDbtCatalog` serves as the entry point: it loads the dbt manifest and catalog, instantiates `OpenDbtNode` for each relevant resource, and coordinates column-lineage computation. +* Each `OpenDbtNode` represents a dbt resource (model, seed, snapshot, or source) and aggregates: + + * An `OpenDbtTableRef`, used to build fully qualified names (FQN). + * A set of `OpenDbtColumn` objects, which store metadata, types, transformations, and column-to-column dependencies. +* `Dialect` is an auxiliary enumerator used by both `OpenDbtTableRef` and `OpenDbtNode` to adjust behavior depending on the SQL engine. + +## `Dialect` + +Enum listing supported SQL dialects (`postgres`, `snowflake`, `bigquery`, etc.). + +* **Responsibility:** provide a canonical set of values to identify the adapter in use. +* **Usage:** `OpenDbtTableRef` checks if the dialect is `starrocks` to decide whether to include the database name in the FQN. `OpenDbtNode` derives the dialect directly from the manifest and passes it to SQLGlot utilities. + +## `OpenDbtTableRef` + +Immutable dataclass encapsulating a table’s identity. + +* **Attributes:** `database`, `schema`, `table`, `dialect`. +* **Main methods:** + + * `table_fqn()`: returns the fully qualified name, respecting dialect-specific nuances. + * `_generate_db_structure()`: builds the base dictionary (database/schema) used in the FQN. +* **Relationships:** + + * Instantiated by `OpenDbtNode` when loading each resource. + * Used by `OpenDbtColumn` to generate `column_fqn` and `table_fqn`. + * Referenced in dependency objects (`depends_on`) to trace a column’s origin. + +## `OpenDbtColumn` + +Simple class representing a catalog column with extended metadata. + +* **Initialization:** receives an `OpenDbtTableRef` and a raw dictionary from the manifest/catalog. During construction: + + * Normalizes `name` and `type`. + * Computes `column_fqn`, `table_fqn`, and `table_relative_fqn`. + * Initializes `transformations` and `depends_on` lists. +* **Exposed properties:** `name`, `type`, `transformations`, `depends_on`. +* **List usage:** + + * `transformations`: SQL fragments (strings) describing how the column was derived. + * `depends_on`: list of `OpenDbtColumn` instances that serve as data sources (each may include a `model_id` when mapped to another node). +* **Relationships:** created and managed exclusively by `OpenDbtNode`; exported to the final JSON by `OpenDbtCatalog`. + +## `OpenDbtNode` + +Subclass of `OpenDbtLogger` that encapsulates a dbt resource enriched with catalog data. + +* **Construction:** receives `manifest_node`, `catalog_node`, and `dialect`. + + * Merges metadata and stats. + * Creates an `OpenDbtTableRef` and a dictionary of `OpenDbtColumn` objects. + * Stores `parent_nodes` to facilitate later compositions. +* **Main methods/properties:** + + * `columns`: mapping `name → OpenDbtColumn`. + * `depends_on`: list of parent IDs (from the manifest). + * `populate_lineage(tables2nodes)`: uses SQLGlot to compute column-to-column lineage, filling in `transformations` and `depends_on`. + * `db_schema_dict()` / `parent_db_schema_dict()`: builds SQLGlot-compatible structures to resolve names in complex queries. + * `compiled_code`: retrieves compiled SQL (ignores Python models). +* **Relationships:** + + * Created by `OpenDbtCatalog` for each relevant node/source. + * Bridges manifest dependencies (`depends_on`) and actual instances (`parent_nodes`), stored in `OpenDbtCatalog.nodes`. + * Converts table IDs (`tables2nodes`) into real references during lineage population. + +## `OpenDbtCatalog` + +Orchestrator responsible for merging the manifest and catalog and producing the enriched JSON (`catalogl.json`). + +* **Initialization:** loads JSON files, stores `catalog_file` (output), and keeps the default dialect. +* **Main methods:** + + * `nodes`: builds (once) all `OpenDbtNode` and source instances and caches them. + + * Updates each node’s `parent_nodes` based on `depends_on`. + * `tables2nodes`: mapping `table_fqn → node_id` used during lineage computation. + * `table(fqn)` / `node(id)`: convenient accessors to loaded structures. + * `export()`: iterates over all `OpenDbtNode` instances, calls `populate_lineage`, filters relevant fields, and writes the final JSON. +* **Relationships:** + + * The only class that instantiates `OpenDbtNode`. + * Provides SQLGlot schema context via node methods. + * Uses `tables2nodes` to link derived columns across tables/models. + +## Summary flow + +1. `OpenDbtCatalog` reads the input files and builds `OpenDbtNode` objects (models and sources). +2. For each node, `OpenDbtNode`: + + * Constructs an `OpenDbtTableRef`. + * Prepares `OpenDbtColumn` objects for all known columns. + * Expands `parent_nodes` based on `depends_on`. +3. `OpenDbtCatalog.export()` calls `populate_lineage` on each node: + + * SQLGlot parses the SQL (`compiled_code`) and returns a dependency tree. + * Each `OpenDbtColumn` receives its transformations and references to source columns (new instances linked via `tables2nodes`). +4. The final catalog (`catalogl.json`) aggregates metadata, stats, columns, and computed lineage. diff --git a/opendbt/catalog/__init__.py b/opendbt/catalog/__init__.py index e3f05f6..405f8a4 100644 --- a/opendbt/catalog/__init__.py +++ b/opendbt/catalog/__init__.py @@ -1,3 +1,4 @@ +from enum import Enum import json from dataclasses import dataclass from pathlib import Path @@ -11,19 +12,45 @@ from opendbt.logger import OpenDbtLogger from opendbt.utils import Utils +class Dialect(Enum): + POSTGRES = "postgres" + SNOWFLAKE = "snowflake" + BIGQUERY = "bigquery" + REDSHIFT = "redshift" + MYSQL = "mysql" + SQLITE = "sqlite" + ORACLE = "oracle" + SQLSERVER = "sqlserver" + TRINO = "trino" + DATABRICKS = "databricks" + ATHENA = "athena" + STARROCKS = "starrocks" + @dataclass(frozen=True) class OpenDbtTableRef: database: str schema: str table: str + dialect: str = "default" def __str__(self) -> str: return self.table_fqn() def table_fqn(self) -> str: - return ".".join([self.database, self.schema, self.table]) + db_structure = self._generate_db_structure() + return ".".join([*db_structure.values(), self.table]) + + def _generate_db_structure(self): + + db_structure = {} + if self.dialect != Dialect.STARROCKS.value: + db_structure["database"] = self.database + db_structure["schema"] = self.schema + return db_structure + db_structure["schema"] = self.schema + return db_structure class OpenDbtColumn: """Represents a column within a dbt node, including lineage info.""" @@ -36,8 +63,8 @@ def __init__(self, table_ref: OpenDbtTableRef, data: dict): self.data["column_fqn"] = f"{self.table_ref.table_fqn()}.{self.name}".lower() self.data["table_fqn"] = self.table_ref.table_fqn().lower() self.data["table_relative_fqn"] = f"{self.table_ref.schema}.{self.table_ref.table}" - self.data["transformations"] = [] - self.data["depends_on"] = [] + self.data["transformations"] = self.data.get("transformations", []) + self.data["depends_on"] = self.data.get("depends_on", []) @property def transformations(self): @@ -73,12 +100,14 @@ def __init__(self, manifest_node: dict, catalog_node: dict, dialect: str): dict2=catalog_node.get("stats", {})) self.table_ref = OpenDbtTableRef(database=self.node.get("database", "$database"), schema=self.node.get("schema", "$schema"), - table=self.node.get("name", "$name")) + table=self.node.get("name", "$name"), + dialect=dialect) self.node["columns"]: Dict[str, OpenDbtColumn] = self.__columns( catalog_cols=catalog_node.get("columns", {}) ) self.dialect = dialect self.parent_nodes: Dict[str, OpenDbtNode] = {} + self._lineage_populated = False def __columns(self, catalog_cols: dict) -> Dict[str, OpenDbtColumn]: combined = Utils.merge_dicts(dict1=self.node.get("columns", {}), @@ -143,7 +172,7 @@ def _sqlglot_column_ref(self, node): return OpenDbtColumn(table_ref=table_ref, data={"name": column_name}) - def populate_lineage(self, tables2nodes: dict): + def populate_lineage(self, tables2nodes: dict, force: bool = False): """ Calculates the column-level lineage for the node. @@ -156,6 +185,13 @@ def populate_lineage(self, tables2nodes: dict): - 'transformation': A string containing the SQL expression that transforms the source(s) into the target column. """ + if self._lineage_populated and not force: + return self.columns + + for column in self.columns.values(): + column.transformations.clear() + column.depends_on.clear() + sqlglot_column_lineage_map = self.sqlglot_column_lineage_map() # pylint: disable=too-many-nested-blocks for column_name, node in sqlglot_column_lineage_map.items(): @@ -185,7 +221,15 @@ def populate_lineage(self, tables2nodes: dict): None) if parent_model_id: parent_column.data["model_id"] = parent_model_id - self.columns[column_name].depends_on.append(parent_column) + parent_dict = parent_column.to_dict().copy() + self.columns[column_name].depends_on.append({ + "column": parent_dict["name"], + "column_fqn": parent_dict["column_fqn"], + "table_fqn": parent_dict["table_fqn"], + "table_relative_fqn": parent_dict["table_relative_fqn"], + "model_id": parent_dict.get("model_id"), + "type": parent_dict.get("type"), + }) except Exception as e: self.log.error( f"Unexpected error processing lineage source node for column " @@ -202,8 +246,35 @@ def populate_lineage(self, tables2nodes: dict): ) if self.columns[column_name].transformations: self.columns[column_name].transformations.reverse() + self._lineage_populated = True + self.node["column_lineage"] = self.column_lineage() return self.columns + def column_lineage(self) -> dict: + lineage_map = {} + for column_name, column in self.columns.items(): + column_dict = column.to_dict() + lineage_map[column_name] = { + "column_fqn": column_dict.get("column_fqn"), + "table_fqn": column_dict.get("table_fqn"), + "transformations": list(column.transformations), + "depends_on": [] + } + for dependency in column.depends_on: + if isinstance(dependency, OpenDbtColumn): + dependency_dict = dependency.to_dict() + else: + dependency_dict = dependency + lineage_map[column_name]["depends_on"].append({ + "column": dependency_dict.get("column") or dependency_dict.get("name"), + "column_fqn": dependency_dict.get("column_fqn"), + "table_fqn": dependency_dict.get("table_fqn"), + "table_relative_fqn": dependency_dict.get("table_relative_fqn"), + "model_id": dependency_dict.get("model_id"), + "type": dependency_dict.get("type"), + }) + return lineage_map + def sqlglot_column_lineage_map(self): if not self.compiled_code: if self.resource_type == "model": @@ -249,12 +320,11 @@ def parent_db_schema_dict(self): def db_schema_dict(self, include_parents=True) -> dict: db_structure = {} - db, schema, table = self.table_ref.database, self.table_ref.schema, self.table_ref.table - db_structure[db] = {} - db_structure[db][schema] = {} - db_structure[db][schema][table] = {} + schema, table = self.table_ref.schema, self.table_ref.table + db_structure[schema] = {} + db_structure[schema][table] = {} for col_name, col in self.columns.items(): - db_structure[db][schema][table][col_name] = col.type + db_structure[schema][table][col_name] = col.type if include_parents is False: return db_structure @@ -305,9 +375,9 @@ def tables2nodes(self) -> dict: def export(self): self.log.info("Generating catalogl.json data with column level lineage.") catalog = self.catalog - catalog["nodes"] = {} - catalog["sources"] = {} - keys_to_export = {"metadata", "stats", "columns"} + catalog["nodes"] = self.catalog.get("nodes", {}) + catalog["sources"] = self.catalog.get("sources", {}) + keys_to_export = {"metadata", "stats", "columns", "column_lineage"} for model_id, model in tqdm.tqdm(self.nodes.items()): model.populate_lineage(self.tables2nodes) node_dict = {key: model.node[key] for key in keys_to_export if key in model.node} @@ -360,4 +430,11 @@ def nodes(self): else: self.log.warning(f"Parent model {parent_node_id} not found in catalog") + self._tables2nodes = {node.table_fqn.strip().lower(): key for key, node in self._nodes.items()} + for node_id, node in self._nodes.items(): + try: + node.populate_lineage(self._tables2nodes) + except Exception as e: + self.log.warning(f"Could not populate lineage for '{node_id}': {e}") + return self._nodes diff --git a/opendbt/catalog/bk.py b/opendbt/catalog/bk.py new file mode 100644 index 0000000..963a700 --- /dev/null +++ b/opendbt/catalog/bk.py @@ -0,0 +1,390 @@ +from enum import Enum +import json +from dataclasses import dataclass +from pathlib import Path +from typing import Dict, Optional + +import sqlglot +import tqdm +from sqlglot import Expression +from sqlglot.lineage import lineage, SqlglotError, exp + +from opendbt.logger import OpenDbtLogger +from opendbt.utils import Utils + +class Dialect(Enum): + POSTGRES = "postgres" + SNOWFLAKE = "snowflake" + BIGQUERY = "bigquery" + REDSHIFT = "redshift" + MYSQL = "mysql" + SQLITE = "sqlite" + ORACLE = "oracle" + SQLSERVER = "sqlserver" + TRINO = "trino" + DATABRICKS = "databricks" + ATHENA = "athena" + STARROCKS = "starrocks" + + +@dataclass(frozen=True) +class OpenDbtTableRef: + database: str + schema: str + table: str + dialect: str = "default" + + def __str__(self) -> str: + return self.table_fqn() + + def table_fqn(self) -> str: + db_structure = self._generate_db_structure() + return ".".join([*db_structure.values(), self.table]) + + def _generate_db_structure(self): + + db_structure = {} + if self.dialect != Dialect.STARROCKS.value: + db_structure["database"] = self.database + db_structure["schema"] = self.schema + return db_structure + + db_structure["schema"] = self.schema + return db_structure + +class OpenDbtColumn: + """Represents a column within a dbt node, including lineage info.""" + + def __init__(self, table_ref: OpenDbtTableRef, data: dict): + self.table_ref: OpenDbtTableRef = table_ref + self.data: dict = data or {} + self.data["name"] = self.data["name"] if "name" in self.data else "unknown-column" + self.data["type"] = self.data["type"] if "type" in self.data else "unknown" + self.data["column_fqn"] = f"{self.table_ref.table_fqn()}.{self.name}".lower() + self.data["table_fqn"] = self.table_ref.table_fqn().lower() + self.data["table_relative_fqn"] = f"{self.table_ref.schema}.{self.table_ref.table}" + self.data["transformations"] = [] + self.data["depends_on"] = [] + + @property + def transformations(self): + return self.data["transformations"] + + @property + def depends_on(self): + return self.data["depends_on"] + + # @property + # def column_fqn(self): + # return self.data["column_fqn"] + + @property + def type(self): + return self.data["type"] + + @property + def name(self): + return self.data["name"] + + def to_dict(self): + return self.data + + +class OpenDbtNode(OpenDbtLogger): + def __init__(self, manifest_node: dict, catalog_node: dict, dialect: str): + self.node: dict = manifest_node + # Enrich node with catalog information + self.node["metadata"] = Utils.merge_dicts(dict1=self.node.get("metadata", {}), + dict2=catalog_node.get("metadata", {})) + self.node["stats"] = Utils.merge_dicts(dict1=self.node.get("stats", {}), + dict2=catalog_node.get("stats", {})) + self.table_ref = OpenDbtTableRef(database=self.node.get("database", "$database"), + schema=self.node.get("schema", "$schema"), + table=self.node.get("name", "$name"), + dialect=dialect) + self.node["columns"]: Dict[str, OpenDbtColumn] = self.__columns( + catalog_cols=catalog_node.get("columns", {}) + ) + self.dialect = dialect + self.parent_nodes: Dict[str, OpenDbtNode] = {} + + def __columns(self, catalog_cols: dict) -> Dict[str, OpenDbtColumn]: + combined = Utils.merge_dicts(dict1=self.node.get("columns", {}), + dict2=catalog_cols.get("columns", {})) + cols = {} + for col_name, col_data in combined.items(): + col_name: str + cols[col_name.strip().lower()] = OpenDbtColumn(table_ref=self.table_ref, data=col_data) + return cols + + def to_dict(self): + return self.node + + @property + def columns(self) -> Dict[str, OpenDbtColumn]: + return self.node.get("columns", {}) + + @property + def depends_on(self) -> list: + return self.node.get("depends_on", {}).get("nodes", []) + + def column_types(self): + return {name: data.type for name, data in self.columns.items()} + + @property + def unique_id(self): + return self.node['unique_id'] + + @property + def resource_type(self): + return self.node['resource_type'] + + @property + def table_fqn(self): + return self.table_ref.table_fqn() + + @property + def is_python_model(self) -> bool: + return self.node.get("language") == "python" + + @property + def compiled_code(self) -> Optional[str]: + """ + Returns the compiled SQL code for this node, if available. + Returns None if the node is not a model, is a Python model, + or has no compiled code defined in the manifest. + """ + if self.resource_type != "model" or self.is_python_model: + return None + + return self.node.get("compiled_code", None) + + @property + def column_names(self) -> list[str]: + return [item.name.lower() for item in self.columns.values()] + + def _sqlglot_column_ref(self, node): + if node.source.key != "table": + raise ValueError(f"Node source is not a table, but {node.source.key}") + column_name = node.name.split(".")[-1].lower() + table_ref = OpenDbtTableRef(database=node.source.catalog, schema=node.source.db, table=node.source.name) + + return OpenDbtColumn(table_ref=table_ref, data={"name": column_name}) + + def populate_lineage(self, tables2nodes: dict): + """ + Calculates the column-level lineage for the node. + + Returns: + dict: A dictionary where keys are lowercase column names of the current node, + and values are lists of dictionaries. Each inner dictionary represents + a source column and contains: + - 'parent_column': The name of the source column (lowercase). + - 'parent_table': An OpenDbtTableRef object for the source table. + - 'transformation': A string containing the SQL expression that + transforms the source(s) into the target column. + """ + sqlglot_column_lineage_map = self.sqlglot_column_lineage_map() + # pylint: disable=too-many-nested-blocks + for column_name, node in sqlglot_column_lineage_map.items(): + column_name = column_name.strip().lower() + if column_name not in self.columns: + self.columns[column_name] = OpenDbtColumn(table_ref=self.table_ref, data={"name": column_name}) + + # Handle cases where lineage couldn't be determined or returned an unexpected format. + # sqlglot.lineage might return an empty list or other non-Node types on failure/no lineage. + if not node or not isinstance(node, sqlglot.lineage.Node): + self.log.debug( + f"No lineage node or invalid format for column '{column_name}' in model '{self.unique_id}'.") + continue # Skip to the next column + + try: + for n in node.walk(): + try: + transf_sql = n.expression.sql(dialect=self.dialect) + if transf_sql: + self.columns[column_name].transformations.append(transf_sql) + except: + pass + if n.source and isinstance(n.source, exp.Table): # More specific check + try: + parent_column = self._sqlglot_column_ref(n) + parent_model_id = tables2nodes.get(parent_column.table_ref.table_fqn().strip().lower(), + None) + if parent_model_id: + parent_column.data["model_id"] = parent_model_id + self.columns[column_name].depends_on.append(parent_column) + except Exception as e: + self.log.error( + f"Unexpected error processing lineage source node for column " + f"'{column_name}' in model '{self.unique_id}': {e}" + ) + except Exception as e: + self.log.warning( + f"Error walking lineage tree for column '{column_name}' in model '{self.unique_id}': {e}") + + if not self.columns[column_name].depends_on: + self.log.debug( + f"No source table/column found during lineage walk for column '{column_name}' " + f"in model '{self.unique_id}'." + ) + if self.columns[column_name].transformations: + self.columns[column_name].transformations.reverse() + return self.columns + + def sqlglot_column_lineage_map(self): + if not self.compiled_code: + if self.resource_type == "model": + self.log.warning(f"Compiled code not found for model {self.unique_id}") + return {} + + selected_columns = self.column_names + lineage_data = {} + if not selected_columns: + try: + sql: Expression = sqlglot.parse_one(sql=self.compiled_code, dialect=self.dialect) + selected_columns = [] + for column in sql.expressions: + if isinstance(column, (exp.Column, exp.Alias)): + selected_columns.append(column.alias_or_name.lower()) + + except Exception as e: + self.log.warning(f"Error parsing SQL for model {self.unique_id}: {str(e)}") + return {} + + for column_name in selected_columns: + lineage_data[column_name] = [] + try: + sqlglot_lineage = lineage(column=column_name, + sql=self.compiled_code, + schema=self.db_schema_dict(), + dialect=self.dialect) + lineage_data[column_name] = sqlglot_lineage + except SqlglotError as e: + self.log.warning(f"Error processing model {self.unique_id}, column {column_name}: {e}") + except Exception as e: + self.log.debug(f"Unexpected error processing model {self.unique_id}, column {column_name}: {e}") + + return lineage_data + + def parent_db_schema_dict(self): + db_structure = {} + for parent_key, parent_node in self.parent_nodes.items(): + parent_db_schema_dict = parent_node.db_schema_dict(include_parents=False) + db_structure = Utils.merge_dicts(db_structure, parent_db_schema_dict) + + return db_structure + + def db_schema_dict(self, include_parents=True) -> dict: + db_structure = {} + schema, table = self.table_ref.schema, self.table_ref.table + db_structure[schema] = {} + db_structure[schema][table] = {} + for col_name, col in self.columns.items(): + db_structure[schema][table][col_name] = col.type + + if include_parents is False: + return db_structure + + parent_db_schema_dict = self.parent_db_schema_dict() + db_structure = Utils.merge_dicts(db_structure, parent_db_schema_dict) + + return db_structure + + +class OpenDbtCatalog(OpenDbtLogger): + def __init__(self, manifest_path: Path, catalog_path: Path): + self.manifest: dict = json.loads(manifest_path.read_text()) + self.catalog_file = manifest_path.parent.joinpath("catalogl.json") + if catalog_path.exists(): + self.catalog: dict = json.loads(catalog_path.read_text()) + else: + self.catalog: dict = {} + + self._nodes: Optional[Dict[str, OpenDbtNode]] = None + self._tables2nodes = None + self.dialect = self.manifest["metadata"]["adapter_type"] + + def table(self, table_fqn: str) -> OpenDbtNode: + if table_fqn in self.tables2nodes: + node_id = self.tables2nodes[table_fqn] + try: + return self.node(node_id=node_id) + except: + raise Exception(f"Given table {table_fqn}, node: {node_id} not found in catalog") + + raise Exception(f"Given table {table_fqn} not found in catalog") + + def node(self, node_id: str) -> OpenDbtNode: + if node_id in self.nodes: + return self.nodes.get(node_id) + + raise Exception(f"Given node {node_id} not found in catalog") + + @property + def tables2nodes(self) -> dict: + if not self._tables2nodes: + self._tables2nodes = {} + self._tables2nodes = {node.table_fqn.strip().lower(): key for key, node in self.nodes.items()} + + return self._tables2nodes + + def export(self): + self.log.info("Generating catalogl.json data with column level lineage.") + catalog = self.catalog + catalog["nodes"] = self.catalog.get("nodes", {}) + catalog["sources"] = self.catalog.get("sources", {}) + keys_to_export = {"metadata", "stats", "columns"} + for model_id, model in tqdm.tqdm(self.nodes.items()): + model.populate_lineage(self.tables2nodes) + node_dict = {key: model.node[key] for key in keys_to_export if key in model.node} + catalog["nodes"][model_id] = node_dict + + self.catalog_file.unlink(missing_ok=True) + self.catalog_file.write_text(json.dumps(obj=catalog, default=lambda obj: obj.to_dict())) + + @property + def nodes(self): + # pylint: disable=(too-many-locals) + if not self._nodes: + self._nodes = {} + manifest_nodes = self.manifest.get("nodes", {}) + catalog_nodes = self.catalog.get("nodes", {}) + + for node_id, manifest_node_data in manifest_nodes.items(): + if manifest_node_data.get("resource_type") in ["model", "seed", "snapshot"]: + # Find corresponding catalog data, default to empty dict if not found + catalog_node_data = catalog_nodes.get(node_id, {}) + try: + merged_node = OpenDbtNode(manifest_node=manifest_node_data, + catalog_node=catalog_node_data, + dialect=self.dialect) + self._nodes[node_id] = merged_node + except Exception as e: + self.log.warning(f"Could not create MergedDBTNode for node '{node_id}' {str(e)}") + + manifest_sources = self.manifest.get("sources", {}) + catalog_sources = self.catalog.get("sources", {}) + for source_id, manifest_source_data in manifest_sources.items(): + catalog_source_data = catalog_sources.get(source_id, {}) + try: + merged_source = OpenDbtNode(manifest_node=manifest_source_data, + catalog_node=catalog_source_data, + dialect=self.dialect) + self._nodes[source_id] = merged_source + except Exception as e: + self.log.warning(f"Could not create MergedDBTNode for source '{source_id}': {e}", + exc_info=True) # Add traceback + + self.log.info(f"Loaded {len(self._nodes)} merged nodes and sources.") + + # update parent nodes for each node + for node_id, node in self._nodes.items(): + for parent_node_id in node.depends_on: + if parent_node_id in self.nodes: + parent_node_obj = self.nodes[parent_node_id] + node.parent_nodes[parent_node_id] = parent_node_obj + else: + self.log.warning(f"Parent model {parent_node_id} not found in catalog") + + return self._nodes diff --git a/opendbt/dbt/docs/index.html b/opendbt/dbt/docs/index.html index 3731bb0..fce909b 100644 --- a/opendbt/dbt/docs/index.html +++ b/opendbt/dbt/docs/index.html @@ -645,6 +645,13 @@

> Lineage + @@ -948,8 +955,95 @@

Configuration

Rendering lineage graph...

-
- Error rendering lineage graph. +
+ Error rendering lineage graph. +
+
+ + +
+
+
+

Assistente de IA para {{ selectedItem.name }}

+

Informe sua OPENAI_KEY (armazenada apenas no seu navegador) e descreva o que deseja, por exemplo: "melhore a documentação atual" ou "crie unit tests".

+
+
+ + +

Guarde apenas uma chave com permissões restritas. O valor fica salvo no localStorage.

+
+
+
+ + +
+
+ +
+
+
+
+
+ + {{ message.role === 'assistant' ? 'Assistente' : 'Você' }} + + {{ new Date(message.timestamp).toLocaleString() }} +
+
+
+
+
+ {{ activeAiSession.error }} +
+
+ + +

Sugestões: "Melhore a documentação atual", "Crie testes unitários", "Sugira consultas de validação".

+
+
+
+ A resposta usa o contexto do modelo atual, incluindo colunas e SQL compilado. +
+
@@ -972,7 +1066,7 @@

Welcome to DBT Docs

- \ No newline at end of file + diff --git a/tests/resources/dbtcore/dbt_project.yml b/tests/resources/dbtcore/dbt_project.yml index c43062e..34f4326 100644 --- a/tests/resources/dbtcore/dbt_project.yml +++ b/tests/resources/dbtcore/dbt_project.yml @@ -16,7 +16,3 @@ clean-targets: models: dbtcore: +materialized: table - -vars: - dbt_custom_adapter: opendbt.examples.DuckDBAdapterV2Custom - dbt_callbacks: opendbt.examples.email_dbt_test_callback \ No newline at end of file diff --git a/tests/resources/dbtcore/profiles.yml b/tests/resources/dbtcore/profiles.yml index 79861c9..abb1c6c 100644 --- a/tests/resources/dbtcore/profiles.yml +++ b/tests/resources/dbtcore/profiles.yml @@ -1,9 +1,13 @@ dbtcore: outputs: dev: - type: duckdb - schema: core - path: ./../dev.duckdb - threads: 1 - + auth_plugin: '' + host: localhost + password: '' + port: 9030 + schema: dbt + type: starrocks + username: root + version: '' target: dev + diff --git a/tests/resources/dbtstarrocks/.gitignore b/tests/resources/dbtstarrocks/.gitignore new file mode 100644 index 0000000..49f147c --- /dev/null +++ b/tests/resources/dbtstarrocks/.gitignore @@ -0,0 +1,4 @@ + +target/ +dbt_packages/ +logs/ diff --git a/tests/resources/dbtstarrocks/.sqlfluff b/tests/resources/dbtstarrocks/.sqlfluff new file mode 100644 index 0000000..b6b3e3d --- /dev/null +++ b/tests/resources/dbtstarrocks/.sqlfluff @@ -0,0 +1,53 @@ +[sqlfluff] +templater = dbt +dialect = duckdb +# This change (from jinja to dbt templater) will make linting slower +# because linting will first compile dbt code into data warehouse code. +runaway_limit = 1000 +max_line_length = 180 +indent_unit = space + +[sqlfluff:indentation] +tab_space_size = 4 + +[sqlfluff:layout:type:comma] +spacing_before = touch +line_position = trailing + +# For rule specific configuration, use dots between the names exactly +# as you would in .sqlfluff. In the background, SQLFluff will unpack the +# configuration paths accordingly. +[tool.sqlfluff.rules.capitalisation.keywords] +capitalisation_policy = "upper" + +# The default configuration for capitalisation rules is "consistent" +# which will auto-detect the setting from the rest of the file. This +# is less desirable in a new project and you may find this (slightly +# more strict) setting more useful. +# Typically we find users rely on syntax highlighting rather than +# capitalisation to distinguish between keywords and identifiers. +# Clearly, if your organisation has already settled on uppercase +# formatting for any of these syntax elements then set them to "upper". +# See https://stackoverflow.com/questions/608196/why-should-i-capitalize-my-sql-keywords-is-there-a-good-reason +[sqlfluff:rules:capitalisation.keywords] +capitalisation_policy = upper +[sqlfluff:rules:capitalisation.identifiers] +capitalisation_policy = upper +[sqlfluff:rules:capitalisation.functions] +extended_capitalisation_policy = upper +# [sqlfluff:rules:capitalisation.literals] +# capitalisation_policy = lower +[sqlfluff:rules:capitalisation.types] +extended_capitalisation_policy = upper + +[sqlfluff:rules:aliasing.table] +aliasing = explicit + +[sqlfluff:rules:aliasing.column] +aliasing = explicit + +[sqlfluff:rules:aliasing.expression] +allow_scalar = False + +[sqlfluff:rules:ambiguous.column_references] # Number in group by +group_by_and_order_by_style = implicit \ No newline at end of file diff --git a/tests/resources/dbtstarrocks/dbt_project.yml b/tests/resources/dbtstarrocks/dbt_project.yml new file mode 100644 index 0000000..34f4326 --- /dev/null +++ b/tests/resources/dbtstarrocks/dbt_project.yml @@ -0,0 +1,18 @@ +name: 'dbtcore' +version: '1.0.0' + +profile: 'dbtcore' + +# include opendbt macros +macro-paths: [ "macros", "../../../opendbt/macros/" ] +# use opendbt index.html for docs +docs-paths: [ "../../../opendbt/docs/" ] + +clean-targets: + - "target" + - "dbt_packages" + - "logs" + +models: + dbtcore: + +materialized: table diff --git a/tests/resources/dbtstarrocks/models/my_core_table1.sql b/tests/resources/dbtstarrocks/models/my_core_table1.sql new file mode 100644 index 0000000..c00e8ab --- /dev/null +++ b/tests/resources/dbtstarrocks/models/my_core_table1.sql @@ -0,0 +1,8 @@ +with source_data as ( + select 1 as id, 'row1' as row_data + union all + select 2 as id, 'row1' as row_data +) + +SELECT * +FROM source_data \ No newline at end of file diff --git a/tests/resources/dbtstarrocks/models/my_executepython_model.py b/tests/resources/dbtstarrocks/models/my_executepython_model.py new file mode 100644 index 0000000..8b67828 --- /dev/null +++ b/tests/resources/dbtstarrocks/models/my_executepython_model.py @@ -0,0 +1,22 @@ +import os +import platform + +from dbt import version + + +def print_info(): + _str = f"name:{os.name}, system:{platform.system()} release:{platform.release()}" + _str += f"\npython version:{platform.python_version()}, dbt:{version.__version__}" + print(_str) + + +def model(dbt, connection: "Connection"): + dbt.config(materialized="executepython") + print("==================================================") + print("========IM LOCALLY EXECUTED PYTHON MODEL==========") + print("==================================================") + print_info() + print("==================================================") + print("===============MAKE DBT GREAT AGAIN===============") + print("==================================================") + return None diff --git a/tests/resources/dbtstarrocks/models/my_first_dbt_model.sql b/tests/resources/dbtstarrocks/models/my_first_dbt_model.sql new file mode 100644 index 0000000..cc63f3b --- /dev/null +++ b/tests/resources/dbtstarrocks/models/my_first_dbt_model.sql @@ -0,0 +1,12 @@ +{{ config(materialized='table', table_type='DUPLICATE') }} + +with source_data as ( + select 1 as id, 'test-value' as data_value, 'test-value' as column_3 + union all + select 1 as id, 'test-value' as data_value, 'test-value' as column_3 + union all + select 2 as id, 'test-value' as data_value, 'test-value' as column_3 +) +SELECT * +FROM source_data +-- where id is not null diff --git a/tests/resources/dbtstarrocks/models/my_second_dbt_model.sql b/tests/resources/dbtstarrocks/models/my_second_dbt_model.sql new file mode 100644 index 0000000..8d5ec50 --- /dev/null +++ b/tests/resources/dbtstarrocks/models/my_second_dbt_model.sql @@ -0,0 +1,7 @@ +SELECT + t1.id AS pk_id, + t1.data_value AS data_value1, + CONCAT(t1.column_3, '-concat-1', t1.data_value, t2.row_data) AS data_value2 +FROM {{ ref('my_first_dbt_model') }} AS t1 +LEFT JOIN {{ ref('my_core_table1') }} AS t2 ON t1.id = t2.id +WHERE t1.id IN (1, 2) diff --git a/tests/resources/dbtstarrocks/models/schema.yml b/tests/resources/dbtstarrocks/models/schema.yml new file mode 100644 index 0000000..6e215d0 --- /dev/null +++ b/tests/resources/dbtstarrocks/models/schema.yml @@ -0,0 +1,49 @@ + +version: 2 + +models: + - name: my_first_dbt_model + description: > + # A starter dbt model + + this is a __sample__ model used as an example + columns: + - name: data_value + - name: column_3 + - name: id + description: "The **primary key** for this table" + tests: + - unique: + config: + severity: error + error_if: ">1000" + warn_if: ">0" + - not_null: + config: + severity: error + error_if: ">1000" + warn_if: ">0" + + - name: my_second_dbt_model + description: "A starter dbt model" + columns: + - name: pk_id + description: "The primary key for this table" + data_tests: + - unique + - not_null + - name: data_value1 + - name: data_value2 + - name: event_tstamp + - name: my_core_table1 + columns: + - name: id + - name: row_data + - name: my_executedlt_model + columns: + - name: event_id + - name: event_tstamp + - name: my_executepython_model + columns: + - name: event_id + - name: event_tstamp \ No newline at end of file diff --git a/tests/resources/dbtstarrocks/profiles.yml b/tests/resources/dbtstarrocks/profiles.yml new file mode 100644 index 0000000..abb1c6c --- /dev/null +++ b/tests/resources/dbtstarrocks/profiles.yml @@ -0,0 +1,13 @@ +dbtcore: + outputs: + dev: + auth_plugin: '' + host: localhost + password: '' + port: 9030 + schema: dbt + type: starrocks + username: root + version: '' + target: dev + From dd14e7580583bef8d8ca9e866d6c95e610562017 Mon Sep 17 00:00:00 2001 From: lorena Date: Wed, 22 Oct 2025 14:46:25 +0100 Subject: [PATCH 2/2] Commit Lorena --- .gitignore | 1 + docs/contributing.md | 6 +---- opendbt/catalog/__init__.py | 2 +- .../dbtstarrocks/models/incremental_model.sql | 24 +++++++++++++++++++ .../dbtstarrocks/models/microbatch_model.sql | 22 +++++++++++++++++ .../dbtstarrocks/models/raw_events_model.sql | 24 +++++++++++++++++++ .../dbtstarrocks/models/ref_my_1.sql | 7 ++++++ 7 files changed, 80 insertions(+), 6 deletions(-) create mode 100644 tests/resources/dbtstarrocks/models/incremental_model.sql create mode 100644 tests/resources/dbtstarrocks/models/microbatch_model.sql create mode 100644 tests/resources/dbtstarrocks/models/raw_events_model.sql create mode 100644 tests/resources/dbtstarrocks/models/ref_my_1.sql diff --git a/.gitignore b/.gitignore index 67c7aba..56bc11e 100644 --- a/.gitignore +++ b/.gitignore @@ -215,6 +215,7 @@ venv/ ENV/ env.bak/ venv.bak/ +myvenv/ # Spyder project settings .spyderproject diff --git a/docs/contributing.md b/docs/contributing.md index f24bc92..485db20 100644 --- a/docs/contributing.md +++ b/docs/contributing.md @@ -21,7 +21,7 @@ git clone https://github.com//opendbt.git cd opendbt python -m venv .venv source .venv/bin/activate # Windows: .venv\Scripts\activate -pip install -e ".[test,dev]" +pip install -e . ``` The editable install gives you the command line entry point (`opendbt`) and all dependencies used in the @@ -29,10 +29,6 @@ test suite (`.[test]`) and documentation tooling (`.[dev]`). If you rely on optional integrations (for example, Airflow), install the relevant extras as well: -```bash -pip install -e ".[airflow]" -``` - --- ## Development Workflow diff --git a/opendbt/catalog/__init__.py b/opendbt/catalog/__init__.py index 405f8a4..ec476e0 100644 --- a/opendbt/catalog/__init__.py +++ b/opendbt/catalog/__init__.py @@ -111,7 +111,7 @@ def __init__(self, manifest_node: dict, catalog_node: dict, dialect: str): def __columns(self, catalog_cols: dict) -> Dict[str, OpenDbtColumn]: combined = Utils.merge_dicts(dict1=self.node.get("columns", {}), - dict2=catalog_cols.get("columns", {})) + dict2=catalog_cols) cols = {} for col_name, col_data in combined.items(): col_name: str diff --git a/tests/resources/dbtstarrocks/models/incremental_model.sql b/tests/resources/dbtstarrocks/models/incremental_model.sql new file mode 100644 index 0000000..29742f5 --- /dev/null +++ b/tests/resources/dbtstarrocks/models/incremental_model.sql @@ -0,0 +1,24 @@ +{{ + config( + materialized='incremental', + unique_key='event_hash', + event_time='event_date', + tags=['bronze'], + ) +}} + + +with + source_data as ( + select md5(concat(cast(event_date as string), event_payload)) as event_hash, + event_date, + event_payload + from {{ ref('raw_events_model') }} + + {% if is_incremental() %} + where event_date > (select max(event_date) from {{ this }}) + {% endif %} +) + +select event_hash, event_date, event_payload +from source_data \ No newline at end of file diff --git a/tests/resources/dbtstarrocks/models/microbatch_model.sql b/tests/resources/dbtstarrocks/models/microbatch_model.sql new file mode 100644 index 0000000..02c7703 --- /dev/null +++ b/tests/resources/dbtstarrocks/models/microbatch_model.sql @@ -0,0 +1,22 @@ +{{ + config( + materialized='incremental', + incremental_strategy='microbatch', + unique_key='event_hash', + event_time='event_date', + begin='2025-10-08', + batch_size='day', + ) +}} + +with source_data as ( + select md5(concat(cast(event_date as string), event_payload)) as event_hash, + event_date, event_payload + from {{ ref('incremental_model') }} +) + +select + event_hash, + event_date, + event_payload as payload +from source_data \ No newline at end of file diff --git a/tests/resources/dbtstarrocks/models/raw_events_model.sql b/tests/resources/dbtstarrocks/models/raw_events_model.sql new file mode 100644 index 0000000..9b6f7a7 --- /dev/null +++ b/tests/resources/dbtstarrocks/models/raw_events_model.sql @@ -0,0 +1,24 @@ +{{ + config( + materialized='table' + ) +}} + +SELECT DATE '2025-10-01' AS event_date, '{"user": "carol", "action": "login"}' AS event_payload +UNION ALL SELECT DATE '2025-09-30', '{"user": "carol", "action": "logout"}' +UNION ALL SELECT DATE '2025-09-30', '{"user": "carol", "action": "login"}' +UNION ALL SELECT DATE '2025-09-29', '{"user": "david", "action": "logout"}' +UNION ALL SELECT DATE '2025-09-28', '{"user": "david", "action": "login"}' +UNION ALL SELECT DATE '2025-09-27', '{"user": "david", "action": "logout"}' +UNION ALL SELECT DATE '2025-09-26', '{"user": "bob", "action": "login"}' +UNION ALL SELECT DATE '2025-09-26', '{"user": "bob", "action": "logout"}' +UNION ALL SELECT DATE '2025-10-06', '{"user": "lorena", "action": "login"}' +UNION ALL SELECT DATE '2025-10-07', '{"user": "lorena", "action": "logout"}' +UNION ALL SELECT DATE '2025-10-05', '{"user": "lorena", "action": "login"}' +UNION ALL SELECT DATE '2025-10-05', '{"user": "lorena", "action": "logout"}' +UNION ALL SELECT DATE '2025-10-08', '{"user": "maria", "action": "login"}' +UNION ALL SELECT DATE '2025-10-08', '{"user": "maria", "action": "logout"}' +UNION ALL SELECT DATE '2025-10-09', '{"user": "mariana", "action": "login"}' +UNION ALL SELECT DATE '2025-10-10', '{"user": "mariana", "action": "logout"}' +UNION ALL SELECT DATE '2025-10-15', '{"user": "pedro", "action": "login"}' +UNION ALL SELECT DATE '2025-10-19', '{"user": "pedro", "action": "logout"}'; diff --git a/tests/resources/dbtstarrocks/models/ref_my_1.sql b/tests/resources/dbtstarrocks/models/ref_my_1.sql new file mode 100644 index 0000000..9587299 --- /dev/null +++ b/tests/resources/dbtstarrocks/models/ref_my_1.sql @@ -0,0 +1,7 @@ +{{ + config( + materialized='table' + ) +}} + +select * from {{ ref('my_first_dbt_model') }} \ No newline at end of file