From 0790dc7136e06a0781d03c4ef1fefcd957cfd097 Mon Sep 17 00:00:00 2001
From: James Cha-Earley <james.cha.earley@gmail.com>
Date: Fri, 20 Mar 2026 15:41:12 -0700
Subject: [PATCH 1/2] Add 3 Snowflake cursor rules for data engineering, Cortex
 AI, and Snowpark/dbt

Add Snowflake-specific cursor rules covering:
- Data Engineering: SQL best practices, data pipelines (Dynamic Tables, Streams,
  Tasks, Snowpipe), semi-structured data, Snowflake Postgres, cost optimization
- Cortex AI: AI Functions (AI_COMPLETE, AI_CLASSIFY, AI_EXTRACT, etc.) and
  Cortex Search for hybrid vector+keyword search and RAG applications
- Snowpark Python & dbt: server-side DataFrames, UDFs, UDTFs, stored procedures,
  and dbt-snowflake adapter with dynamic table materialization

All rules grounded in official Snowflake documentation.
---
 README.md                                     |   3 +
 .../.cursorrules                              | 134 ++++++++++++++
 .../README.md                                 |  18 ++
 .../.cursorrules                              | 151 ++++++++++++++++
 .../README.md                                 |  16 ++
 .../.cursorrules                              | 163 ++++++++++++++++++
 .../README.md                                 |  16 ++
 7 files changed, 501 insertions(+)
 create mode 100644 rules/snowflake-cortex-ai-cursorrules-prompt-file/.cursorrules
 create mode 100644 rules/snowflake-cortex-ai-cursorrules-prompt-file/README.md
 create mode 100644 rules/snowflake-data-engineering-cursorrules-prompt-file/.cursorrules
 create mode 100644 rules/snowflake-data-engineering-cursorrules-prompt-file/README.md
 create mode 100644 rules/snowflake-snowpark-dbt-cursorrules-prompt-file/.cursorrules
 create mode 100644 rules/snowflake-snowpark-dbt-cursorrules-prompt-file/README.md

diff --git a/README.md b/README.md
index 63c6a2ff..3415edfd 100644
--- a/README.md
+++ b/README.md
@@ -186,6 +186,9 @@ By creating a `.cursorrules` file in your project's root directory, you can leve
 
 - [GraphQL (Apollo Client)](./rules/react-graphql-apollo-client-cursorrules-prompt-file/.cursorrules) - Cursor rules for GraphQL development with Apollo Client integration.
 - [TypeScript (Axios)](./rules/typescript-axios-cursorrules-prompt-file/.cursorrules) - Cursor rules for TypeScript development with Axios integration.
+- [Snowflake Data Engineering](./rules/snowflake-data-engineering-cursorrules-prompt-file/.cursorrules) - Cursor rules for Snowflake SQL, data pipelines (Dynamic Tables, Streams, Tasks, Snowpipe), semi-structured data, Snowflake Postgres, and cost optimization.
+- [Snowflake Cortex AI](./rules/snowflake-cortex-ai-cursorrules-prompt-file/.cursorrules) - Cursor rules for Snowflake Cortex AI Functions (AI_COMPLETE, AI_CLASSIFY, AI_EXTRACT, etc.) and Cortex Search for RAG applications.
+- [Snowflake Snowpark Python & dbt](./rules/snowflake-snowpark-dbt-cursorrules-prompt-file/.cursorrules) - Cursor rules for Snowpark Python (DataFrames, UDFs, stored procedures) and dbt with the Snowflake adapter.
 
 ### Testing
 
diff --git a/rules/snowflake-cortex-ai-cursorrules-prompt-file/.cursorrules b/rules/snowflake-cortex-ai-cursorrules-prompt-file/.cursorrules
new file mode 100644
index 00000000..c271b810
--- /dev/null
+++ b/rules/snowflake-cortex-ai-cursorrules-prompt-file/.cursorrules
@@ -0,0 +1,134 @@
+// Snowflake Cortex AI
+// Expert guidance for Cortex AI Functions and Cortex Search (hybrid vector+keyword search)
+
+You are an expert in Snowflake Cortex — the AI layer of Snowflake including Cortex AI Functions (SQL-callable LLM/ML functions) and Cortex Search (managed hybrid search for RAG applications). All processing runs inside Snowflake with no data leaving the platform.
+
+// ═══════════════════════════════════════════
+// CORTEX AI FUNCTIONS
+// ═══════════════════════════════════════════
+
+// Available Functions (use these names — they are the current versions):
+// AI_COMPLETE       — General-purpose LLM completion (text, images, documents).
+// AI_CLASSIFY       — Classify text/images into user-defined categories (multi-label supported).
+// AI_FILTER         — Returns TRUE/FALSE for text/image input. Use in WHERE clauses.
+// AI_AGG            — Aggregate insights across rows of text (no context window limit).
+// AI_EMBED          — Generate embedding vectors (similarity search, clustering).
+// AI_EXTRACT        — Extract structured info from text, images, or documents.
+// AI_SENTIMENT      — Sentiment score from text (-1 to 1).
+// AI_SUMMARIZE_AGG  — Summarize across rows (no context window limit).
+// AI_SIMILARITY     — Embedding similarity between two inputs.
+// AI_TRANSCRIBE     — Transcribe audio/video from stages.
+// AI_PARSE_DOCUMENT — OCR or text+layout extraction from documents in stages.
+// AI_REDACT         — Redact PII from text.
+// AI_TRANSLATE      — Translate between supported languages.
+
+// Helper Functions:
+// TO_FILE('@stage', 'filename')  — File reference for document processing.
+// AI_COUNT_TOKENS(model, text)   — Check token count before calling a model.
+// PROMPT('template {0}', arg)    — Build prompt objects for AI_COMPLETE.
+// TRY_COMPLETE                   — Returns NULL on failure instead of error.
+
+// AI_COMPLETE — The Primary Function
+// Models: claude-4-opus, claude-4-sonnet, claude-sonnet-4-5, claude-opus-4-5, claude-haiku-4-5,
+//         gemini-3-pro, llama3.1-70b, llama3.1-8b, llama3.3-70b, mistral-large2, mistral-small2, deepseek-r1
+
+// Text completion:
+SELECT AI_COMPLETE(MODEL => 'claude-4-sonnet', PROMPT => 'Summarize: ' || review_text) FROM reviews;
+
+// Document processing:
+SELECT AI_COMPLETE(
+  MODEL => 'claude-4-sonnet',
+  PROMPT => PROMPT('Extract the invoice total from {0}', TO_FILE('@docs', 'invoice.pdf'))
+);
+
+// Structured JSON output:
+SELECT AI_COMPLETE(MODEL => 'claude-4-sonnet',
+  PROMPT => 'Extract name, email, company as JSON: ' || raw_text)::VARIANT AS extracted FROM contacts;
+
+// AI_CLASSIFY:
+SELECT AI_CLASSIFY(ticket_text, ['billing', 'technical', 'account', 'other']) AS category FROM tickets;
+// Multi-label: AI_CLASSIFY(input, categories, {'output_mode': 'multi'})
+
+// AI_FILTER (natural-language WHERE):
+SELECT * FROM reviews WHERE AI_FILTER(review_text, 'mentions product quality issues');
+
+// AI_AGG (cross-row aggregation):
+SELECT AI_AGG(feedback_text, 'What are the top 3 themes?') FROM customer_feedback;
+
+// AI_EXTRACT (entity extraction):
+SELECT AI_EXTRACT(email_body, 'meeting date', 'attendees', 'action items') FROM emails;
+
+// AI_SENTIMENT: SELECT review_text, AI_SENTIMENT(review_text) AS sentiment FROM product_reviews;
+// AI_EMBED:     SELECT AI_EMBED(description) AS embedding FROM products;
+// AI_PARSE_DOCUMENT: SELECT AI_PARSE_DOCUMENT(TO_FILE('@docs', 'contract.pdf'), MODE => 'LAYOUT');
+// AI_TRANSCRIBE:     SELECT AI_TRANSCRIBE(TO_FILE('@media', 'recording.mp3')) AS transcript;
+// AI_REDACT:         SELECT AI_REDACT(customer_notes) AS redacted FROM support_cases;
+
+// Privileges: USE AI FUNCTIONS account privilege + SNOWFLAKE.CORTEX_USER database role (both granted to PUBLIC by default).
+
+// ═══════════════════════════════════════════
+// CORTEX SEARCH — Hybrid Vector + Keyword Search
+// ═══════════════════════════════════════════
+
+// Fully managed search combining vector (semantic) and keyword (lexical) search.
+// Use cases: RAG for LLM chatbots, enterprise search, AI-powered Q&A.
+
+// Single-index (simplest):
+CREATE OR REPLACE CORTEX SEARCH SERVICE my_search
+  ON transcript_text
+  ATTRIBUTES region, agent_id
+  WAREHOUSE = my_wh
+  TARGET_LAG = '1 day'
+  EMBEDDING_MODEL = 'snowflake-arctic-embed-l-v2.0'
+  AS (SELECT transcript_text, region, agent_id FROM support_transcripts);
+
+// Multi-index (text + vector on multiple columns):
+CREATE OR REPLACE CORTEX SEARCH SERVICE my_multi_search
+  TEXT INDEXES transcript_text, summary
+  VECTOR INDEXES transcript_text (model='snowflake-arctic-embed-l-v2.0')
+  ATTRIBUTES region
+  WAREHOUSE = my_wh
+  TARGET_LAG = '1 hour'
+  AS (SELECT transcript_text, summary, region FROM support_transcripts);
+
+// Key Parameters: ON (single-index column), TEXT INDEXES, VECTOR INDEXES, ATTRIBUTES (filter columns),
+// TARGET_LAG (freshness), EMBEDDING_MODEL, PRIMARY KEY (optimized incremental refresh).
+
+// Query — Python API (recommended for apps):
+from snowflake.core import Root
+root = Root(session)
+service = root.databases["db"].schemas["schema"].cortex_search_services["my_search"]
+resp = service.search(
+    query="internet connection issues",
+    columns=["transcript_text", "region"],
+    filter={"@eq": {"region": "North America"}},
+    limit=5
+)
+
+// Query — REST API:
+// POST /api/v2/databases/<db>/schemas/<schema>/cortex-search-services/<service>:query
+// Body: {"query": "...", "columns": [...], "filter": {...}, "limit": N}
+
+// Filter syntax:
+// {"@eq": {"region": "NA"}}, {"@contains": {"tags": "urgent"}}, {"@gte": {"score": 0.8}}
+// {"@and": [f1, f2]}, {"@or": [f1, f2]}, {"@not": f}
+
+// Scoring config — adjust text vs vector vs reranker weights:
+resp = service.search(query="billing dispute", columns=["transcript_text"],
+    scoring_config={"weights": {"texts": 0.3, "vectors": 0.5, "reranker": 0.2}}, limit=10)
+
+// RAG Pattern: 1) Search for context, 2) Pass to AI_COMPLETE:
+//   results = service.search(query=question, columns=["content"], limit=5)
+//   SELECT AI_COMPLETE(MODEL=>'claude-4-sonnet', PROMPT=>'Answer from context: '||context||' Q: '||question);
+
+// Best Practices
+- Use AI_CLASSIFY for classification (cheaper than AI_COMPLETE).
+- Check token counts with AI_COUNT_TOKENS before large batch jobs.
+- Set PRIMARY KEY on Cortex Search for optimized incremental refresh.
+- Use ATTRIBUTES for filterable columns. Use SEARCH_PREVIEW for testing, Python/REST for production.
+- Use dedicated warehouse (no larger than MEDIUM) per search service.
+
+// Anti-Patterns
+- Do NOT use old function names (COMPLETE, CLASSIFY_TEXT, etc.) — use AI_* versions.
+- Do NOT pass entire tables through AI_COMPLETE row-by-row without cost estimation.
+- Do NOT hardcode model names without considering regional availability.
diff --git a/rules/snowflake-cortex-ai-cursorrules-prompt-file/README.md b/rules/snowflake-cortex-ai-cursorrules-prompt-file/README.md
new file mode 100644
index 00000000..07cb1b4f
--- /dev/null
+++ b/rules/snowflake-cortex-ai-cursorrules-prompt-file/README.md
@@ -0,0 +1,18 @@
+# Snowflake Cortex AI Cursor Rules
+
+Rules for Snowflake Cortex — AI Functions (AI_COMPLETE, AI_CLASSIFY, AI_EXTRACT, AI_EMBED, and more) and Cortex Search (managed hybrid vector+keyword search for RAG applications).
+
+## Usage
+
+Copy the `.cursorrules` file to the root of your Snowflake AI project.
+
+## Rules Summary
+
+- All 14 Cortex AI Functions with syntax and examples
+- AI_COMPLETE for text, image, and document processing
+- AI_CLASSIFY, AI_FILTER, AI_AGG, AI_EXTRACT, AI_SENTIMENT
+- AI_PARSE_DOCUMENT, AI_TRANSCRIBE, AI_REDACT, AI_TRANSLATE
+- Cortex Search: CREATE SERVICE (single-index and multi-index)
+- Python, REST, and SQL query APIs with filter syntax
+- RAG pattern combining Cortex Search + AI_COMPLETE
+- Cost awareness and anti-patterns
diff --git a/rules/snowflake-data-engineering-cursorrules-prompt-file/.cursorrules b/rules/snowflake-data-engineering-cursorrules-prompt-file/.cursorrules
new file mode 100644
index 00000000..a4e551ca
--- /dev/null
+++ b/rules/snowflake-data-engineering-cursorrules-prompt-file/.cursorrules
@@ -0,0 +1,151 @@
+// Snowflake Data Engineering
+// Comprehensive guidance for SQL, data pipelines, and platform best practices on Snowflake
+
+You are an expert Snowflake data engineer with deep knowledge of the entire platform: SQL, data pipelines (Dynamic Tables, Streams, Tasks, Snowpipe), semi-structured data, Snowflake Postgres, and cost optimization.
+
+// Architecture
+// Snowflake separates storage (columnar micro-partitions), compute (elastic virtual warehouses), and services (metadata, security, optimization).
+
+// ═══════════════════════════════════════════
+// SQL AND SEMI-STRUCTURED DATA
+// ═══════════════════════════════════════════
+
+// Use VARIANT, OBJECT, and ARRAY types for JSON, Avro, Parquet, ORC.
+// Access nested fields with colon notation: src:customer.name::STRING
+// Cast explicitly: src:price::NUMBER(10,2), src:created_at::TIMESTAMP_NTZ
+// Flatten arrays:
+//   SELECT f.value:name::STRING AS name
+//   FROM my_table, LATERAL FLATTEN(input => src:items) f;
+// Flatten semi-structured into relational columns when data contains dates, numbers as strings, or arrays.
+// Avoid mixed types in the same VARIANT field — prevents subcolumnarization.
+// VARIANT null vs SQL NULL: JSON null stored as string "null". Use STRIP_NULL_VALUES => TRUE on load.
+
+// SQL Coding Standards
+// - snake_case for all identifiers. Avoid quoted identifiers.
+// - CTEs over nested subqueries. CREATE OR REPLACE for idempotent DDL.
+// - COPY INTO for bulk loading, not INSERT. MERGE for upserts:
+//   MERGE INTO target t USING source s ON t.id = s.id
+//   WHEN MATCHED THEN UPDATE SET t.name = s.name
+//   WHEN NOT MATCHED THEN INSERT (id, name) VALUES (s.id, s.name);
+
+// Stored Procedures — prefix variables with colon : inside SQL statements:
+//   CREATE PROCEDURE my_proc(p_id INT) RETURNS STRING LANGUAGE SQL AS
+//   BEGIN
+//     LET result STRING;
+//     SELECT name INTO :result FROM users WHERE id = :p_id;
+//     RETURN result;
+//   END;
+
+// ═══════════════════════════════════════════
+// PERFORMANCE OPTIMIZATION
+// ═══════════════════════════════════════════
+
+// Cluster keys: for very large tables (multi-TB), on WHERE/JOIN/GROUP BY columns.
+//   ALTER TABLE large_events CLUSTER BY (event_date, region);
+// Search Optimization Service: point lookups on high-cardinality columns, substring/regex.
+//   ALTER TABLE logs ADD SEARCH OPTIMIZATION ON EQUALITY(sender_ip), SUBSTRING(error_message);
+// Materialized Views: pre-compute expensive aggregations (single table only).
+// Use RESULT_SCAN(LAST_QUERY_ID()) to reuse results. Query tags for attribution:
+//   ALTER SESSION SET QUERY_TAG = 'etl_daily_load';
+
+// ═══════════════════════════════════════════
+// DATA PIPELINES
+// ═══════════════════════════════════════════
+
+// Choose Your Approach:
+// Dynamic Tables   — Declarative. Define the query, Snowflake handles refresh. Best for most pipelines.
+// Streams + Tasks  — Imperative CDC + scheduling. Best for procedural logic, stored procedure calls.
+// Snowpipe         — Continuous file loading from S3/GCS/Azure.
+// Snowpipe Streaming — Low-latency row-level ingestion via SDK (Java, Python).
+
+// Dynamic Tables
+CREATE OR REPLACE DYNAMIC TABLE cleaned_events
+  TARGET_LAG = '5 minutes'
+  WAREHOUSE = transform_wh
+  AS
+  SELECT event_id, event_type, user_id, event_data:page::STRING AS page, event_timestamp
+  FROM raw_events
+  WHERE event_type IS NOT NULL;
+
+// Chain for multi-step pipelines:
+CREATE OR REPLACE DYNAMIC TABLE user_sessions
+  TARGET_LAG = '10 minutes'
+  WAREHOUSE = transform_wh
+  AS
+  SELECT user_id, MIN(event_timestamp) AS session_start, MAX(event_timestamp) AS session_end, COUNT(*) AS event_count
+  FROM cleaned_events GROUP BY user_id;
+
+// TARGET_LAG: freshness target. REFRESH_MODE: AUTO, FULL, or INCREMENTAL.
+// Manage: ALTER DYNAMIC TABLE ... SET TARGET_LAG / REFRESH / SUSPEND / RESUME.
+
+// Streams (CDC)
+CREATE OR REPLACE STREAM raw_events_stream ON TABLE raw_events;
+// Columns added: METADATA$ACTION, METADATA$ISUPDATE, METADATA$ROW_ID
+// APPEND_ONLY = TRUE for insert-only sources (lower overhead).
+
+// Tasks (Scheduled/Triggered)
+CREATE OR REPLACE TASK process_events
+  WAREHOUSE = transform_wh
+  SCHEDULE = 'USING CRON 0 */1 * * * America/Los_Angeles'
+  WHEN SYSTEM$STREAM_HAS_DATA('raw_events_stream')
+  AS
+  INSERT INTO cleaned_events
+  SELECT event_id, event_type, user_id, event_timestamp
+  FROM raw_events_stream WHERE event_type IS NOT NULL;
+
+// Task DAGs: CREATE TASK child_task ... AFTER parent_task ...
+// Tasks start SUSPENDED — ALTER TASK ... RESUME to enable.
+
+// Snowpipe
+CREATE OR REPLACE PIPE my_pipe AUTO_INGEST = TRUE AS
+  COPY INTO raw_events FROM @my_external_stage FILE_FORMAT = (TYPE = 'JSON');
+
+// Common Pattern: Snowpipe → Dynamic Table chain (simplest end-to-end pipeline).
+
+// ═══════════════════════════════════════════
+// TIME TRAVEL AND DATA PROTECTION
+// ═══════════════════════════════════════════
+
+// Time Travel (default 1 day, up to 90 on Enterprise+):
+//   SELECT * FROM my_table AT(TIMESTAMP => '2024-01-15 10:00:00'::TIMESTAMP);
+//   SELECT * FROM my_table BEFORE(STATEMENT => '<query_id>');
+// UNDROP TABLE/SCHEMA/DATABASE to recover dropped objects.
+// Zero-copy cloning: CREATE TABLE clone CLONE source; CREATE SCHEMA dev CLONE prod;
+
+// ═══════════════════════════════════════════
+// SNOWFLAKE POSTGRES
+// ═══════════════════════════════════════════
+
+// Managed PostgreSQL (v16/17/18) with full wire compatibility.
+// CREATE POSTGRES INSTANCE my_instance COMPUTE_FAMILY='STANDARD_S' STORAGE_SIZE_GB=50;
+// Bridge OLTP to analytics via pg_lake extension (Iceberg tables readable from both Postgres and Snowflake).
+// FORK for point-in-time recovery. HIGH_AVAILABILITY = TRUE for production.
+
+// ═══════════════════════════════════════════
+// WAREHOUSE AND COST MANAGEMENT
+// ═══════════════════════════════════════════
+
+// Size by query complexity, not data volume. Start X-Small, scale up.
+// AUTO_SUSPEND = 60, AUTO_RESUME = TRUE. Separate warehouses per workload.
+// Multi-cluster for concurrency scaling. Transient tables for staging (no Fail-safe cost).
+// Monitor: SNOWFLAKE.ACCOUNT_USAGE.QUERY_HISTORY, WAREHOUSE_METERING_HISTORY.
+// Resource Monitors for credit limits. Avoid SELECT * on wide tables.
+
+// Access Control
+// Least-privilege RBAC. Database roles for object grants.
+// Masking policies for PII. Row access policies for multi-tenant isolation.
+// Functional roles: loader (write raw), transformer (read raw, write analytics), analyst (read analytics).
+
+// Data Sharing
+// CREATE SHARE for zero-copy cross-account sharing. Snowflake Marketplace for exchange.
+
+// Iceberg Tables
+// CREATE ICEBERG TABLE ... CATALOG='SNOWFLAKE' EXTERNAL_VOLUME='vol' BASE_LOCATION='path/';
+// Interoperable with Spark, Flink, Trino.
+
+// Anti-Patterns
+- Do NOT use streams+tasks for simple transformations that dynamic tables can handle.
+- Do NOT set TARGET_LAG shorter than needed — directly impacts cost.
+- Do NOT forget to RESUME tasks after creation.
+- Do NOT use SELECT * on wide tables. Do NOT skip clustering analysis on multi-TB tables.
+- Do NOT hardcode database/schema names in reusable code.
diff --git a/rules/snowflake-data-engineering-cursorrules-prompt-file/README.md b/rules/snowflake-data-engineering-cursorrules-prompt-file/README.md
new file mode 100644
index 00000000..caa3244b
--- /dev/null
+++ b/rules/snowflake-data-engineering-cursorrules-prompt-file/README.md
@@ -0,0 +1,16 @@
+# Snowflake Data Engineering Cursor Rules
+
+Rules for comprehensive data engineering on Snowflake — SQL best practices, data pipelines (Dynamic Tables, Streams, Tasks, Snowpipe), semi-structured data handling, Snowflake Postgres, Time Travel, and cost optimization.
+
+## Usage
+
+Copy the `.cursorrules` file to the root of your Snowflake project.
+
+## Rules Summary
+
+- Snowflake SQL coding standards and semi-structured data (VARIANT, FLATTEN, colon notation)
+- Performance optimization (cluster keys, search optimization, materialized views)
+- Data pipeline architecture: Dynamic Tables, Streams, Tasks, Snowpipe
+- Time Travel, zero-copy cloning, data protection
+- Snowflake Postgres with pg_lake/Iceberg integration
+- Warehouse sizing, RBAC, cost management, data sharing, Iceberg tables
diff --git a/rules/snowflake-snowpark-dbt-cursorrules-prompt-file/.cursorrules b/rules/snowflake-snowpark-dbt-cursorrules-prompt-file/.cursorrules
new file mode 100644
index 00000000..eec8c13d
--- /dev/null
+++ b/rules/snowflake-snowpark-dbt-cursorrules-prompt-file/.cursorrules
@@ -0,0 +1,163 @@
+// Snowflake Snowpark Python & dbt
+// Expert guidance for Snowpark Python development and dbt with the Snowflake adapter
+
+You are an expert in Snowpark Python (Snowflake's server-side Python API) and dbt with the dbt-snowflake adapter. You build production-grade data transformation pipelines using both tools.
+
+// ═══════════════════════════════════════════
+// SNOWPARK PYTHON
+// ═══════════════════════════════════════════
+
+// Snowpark runs Python server-side in Snowflake warehouses. Data never leaves Snowflake.
+// Core abstractions: Session, DataFrame, UDF, UDTF, UDAF, Stored Procedure.
+
+// Session
+from snowflake.snowpark import Session
+session = Session.builder.configs({
+    "account": "myaccount", "user": "myuser", "password": "mypassword",
+    "role": "my_role", "warehouse": "my_wh", "database": "my_db", "schema": "my_schema"
+}).create()
+
+// DataFrame API — Lazy evaluation, builds query plan executed on collect()/show().
+df = session.table("customers")
+df_filtered = df.filter(df["region"] == "US").select("name", "email", "revenue")
+df_agg = df.group_by("region").agg(sum("revenue").alias("total_revenue"))
+df_agg.show()
+
+// Key operations: .filter(), .select(), .group_by().agg(), .join(), .sort(),
+// .with_column(), .drop(), .distinct(), .limit(), .union_all(), .flatten(),
+// .write.save_as_table()
+
+// Scalar UDFs
+from snowflake.snowpark.functions import udf
+@udf(name="normalize_email", replace=True)
+def normalize_email(email: str) -> str:
+    return email.strip().lower() if email else None
+
+// Vectorized UDFs (10-100x faster for ML inference):
+import pandas as pd
+@udf(name="predict_score", packages=["scikit-learn", "pandas"], replace=True)
+def predict_score(features: pd.Series) -> pd.Series:
+    import pickle, sys
+    model = pickle.load(open(sys.path[0] + "/model.pkl", "rb"))
+    return pd.Series(model.predict(features.values.reshape(-1, 1)))
+
+// UDTFs (return multiple rows per input):
+class Tokenizer:
+    def process(self, text: str):
+        for token in text.split():
+            yield (token,)
+
+tokenize = session.udtf.register(Tokenizer,
+    output_schema=StructType([StructField("token", StringType())]),
+    input_types=[StringType()], name="tokenize", replace=True)
+
+// Stored Procedures (server-side multi-step logic):
+from snowflake.snowpark.functions import sproc
+@sproc(name="daily_etl", replace=True, packages=["snowflake-snowpark-python"])
+def daily_etl(session: Session) -> str:
+    raw = session.table("raw_events")
+    cleaned = raw.filter(raw["event_type"].is_not_null())
+    cleaned.write.mode("overwrite").save_as_table("cleaned_events")
+    return f"Processed {cleaned.count()} rows"
+
+// Third-Party Packages: session.add_packages("pandas", "scikit-learn==1.3.0", "xgboost")
+// File Access: session.add_import("@my_stage/model.pkl") for static files.
+// pandas on Snowflake (no data movement):
+//   import modin.pandas as pd; import snowflake.snowpark.modin.plugin
+//   df = pd.read_snowflake("my_table")
+
+// ═══════════════════════════════════════════
+// DBT WITH SNOWFLAKE ADAPTER
+// ═══════════════════════════════════════════
+
+// Install: pip install dbt-snowflake
+// profiles.yml:
+my_project:
+  target: dev
+  outputs:
+    dev:
+      type: snowflake
+      account: myaccount
+      user: myuser
+      password: "{{ env_var('SNOWFLAKE_PASSWORD') }}"
+      role: transformer
+      database: analytics
+      warehouse: transforming
+      schema: public
+      threads: 4
+
+// Materializations: view, table, incremental, ephemeral, dynamic_table
+
+// Dynamic Tables in dbt:
+// {{ config(materialized='dynamic_table', snowflake_warehouse='transforming', target_lag='1 hour') }}
+// SELECT customer_id, SUM(amount) AS lifetime_value FROM {{ ref('stg_orders') }} GROUP BY 1
+
+// Incremental Models:
+{{
+  config(
+    materialized='incremental',
+    unique_key='event_id',
+    incremental_strategy='merge',
+    on_schema_change='sync_all_columns'
+  )
+}}
+SELECT * FROM {{ ref('stg_events') }}
+{% if is_incremental() %}
+  WHERE event_timestamp > (SELECT MAX(event_timestamp) FROM {{ this }})
+{% endif %}
+
+// Snowflake-Specific Configs:
+// cluster_by=['col1', 'col2']  — Clustering (large tables only)
+// transient=true               — No Fail-safe (lower storage cost)
+// query_tag='finance_daily'    — Workload attribution
+// copy_grants=true             — Preserve access on replace
+// snowflake_warehouse='lg_wh'  — Per-model warehouse override
+// secure=true                  — Secure views
+
+// Sources (models/staging/_sources.yml):
+sources:
+  - name: raw
+    database: raw_db
+    schema: jaffle_shop
+    tables:
+      - name: customers
+        loaded_at_field: _loaded_at
+        freshness:
+          warn_after: {count: 12, period: hour}
+          error_after: {count: 24, period: hour}
+
+// Testing (schema.yml):
+models:
+  - name: stg_customers
+    columns:
+      - name: customer_id
+        tests: [unique, not_null]
+
+// Key Commands:
+// dbt run, dbt test, dbt build (run+test in order), dbt compile
+// dbt run --select my_model+  (model + downstream)
+// dbt run --select +my_model  (model + upstream)
+// dbt source freshness, dbt docs generate && dbt docs serve
+
+// Custom schema macro (macros/generate_schema_name.sql):
+{% macro generate_schema_name(custom_schema_name, node) %}
+  {% if custom_schema_name %}{{ custom_schema_name | trim }}{% else %}{{ target.schema }}{% endif %}
+{% endmacro %}
+
+// Best Practices
+- Prefer vectorized UDFs (pandas) for ML inference — much faster than scalar UDFs.
+- Pin package versions in production UDFs and stored procedures.
+- Use DataFrame API over raw SQL strings in reusable Python pipelines.
+- Use staging models (stg_*) to rename/type-cast, mart models for business tables.
+- Use incremental for fact tables; dynamic_table for near-real-time.
+- Set on_schema_change='sync_all_columns' on incremental models.
+- Use copy_grants=true to avoid permission issues. Tag models for selective execution.
+- Use separate warehouses for dbt runs vs analyst queries.
+
+// Anti-Patterns
+- Do NOT collect() large DataFrames to client — process server-side.
+- Do NOT use Python loops over rows — use DataFrame operations or vectorized UDFs.
+- Do NOT use {{ this }} without {% if is_incremental() %} guard.
+- Do NOT set cluster_by on small tables (< 1TB).
+- Do NOT use materialized='table' for everything — views are free.
+- Do NOT hardcode database/schema — use {{ ref() }} and {{ source() }}.
diff --git a/rules/snowflake-snowpark-dbt-cursorrules-prompt-file/README.md b/rules/snowflake-snowpark-dbt-cursorrules-prompt-file/README.md
new file mode 100644
index 00000000..2eb5fd76
--- /dev/null
+++ b/rules/snowflake-snowpark-dbt-cursorrules-prompt-file/README.md
@@ -0,0 +1,16 @@
+# Snowflake Snowpark Python & dbt Cursor Rules
+
+Rules for building data pipelines with Snowpark Python (server-side DataFrames, UDFs, stored procedures) and dbt with the Snowflake adapter (dynamic tables, incremental models, Snowflake-specific configs).
+
+## Usage
+
+Copy the `.cursorrules` file to the root of your Snowpark or dbt-snowflake project.
+
+## Rules Summary
+
+- Snowpark Python: Session, DataFrame API, scalar and vectorized UDFs, UDTFs, stored procedures
+- pandas on Snowflake (modin), third-party packages, file access in UDFs
+- dbt-snowflake: profiles.yml, all materializations including dynamic_table
+- Incremental models with merge strategy and schema evolution
+- Snowflake-specific dbt configs (cluster_by, transient, query_tag, copy_grants)
+- Sources, testing, macros, and key dbt commands

From f38a28c6e20b767a300d6e6749c7070ed43daddc Mon Sep 17 00:00:00 2001
From: James Cha-Earley <james.cha.earley@gmail.com>
Date: Mon, 23 Mar 2026 10:23:04 -0700
Subject: [PATCH 2/2] Address CodeRabbit review feedback

- Sort README entries alphabetically within Database and API section
- Add author attribution (Snowflake DevRel) to all 3 rule README files
- Replace hardcoded credentials with env vars in Snowpark session example
---
 README.md                                                    | 4 ++--
 rules/snowflake-cortex-ai-cursorrules-prompt-file/README.md  | 2 ++
 .../README.md                                                | 2 ++
 .../.cursorrules                                             | 5 ++++-
 .../snowflake-snowpark-dbt-cursorrules-prompt-file/README.md | 2 ++
 5 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index 3415edfd..45091e08 100644
--- a/README.md
+++ b/README.md
@@ -185,10 +185,10 @@ By creating a `.cursorrules` file in your project's root directory, you can leve
 ### Database and API
 
 - [GraphQL (Apollo Client)](./rules/react-graphql-apollo-client-cursorrules-prompt-file/.cursorrules) - Cursor rules for GraphQL development with Apollo Client integration.
-- [TypeScript (Axios)](./rules/typescript-axios-cursorrules-prompt-file/.cursorrules) - Cursor rules for TypeScript development with Axios integration.
-- [Snowflake Data Engineering](./rules/snowflake-data-engineering-cursorrules-prompt-file/.cursorrules) - Cursor rules for Snowflake SQL, data pipelines (Dynamic Tables, Streams, Tasks, Snowpipe), semi-structured data, Snowflake Postgres, and cost optimization.
 - [Snowflake Cortex AI](./rules/snowflake-cortex-ai-cursorrules-prompt-file/.cursorrules) - Cursor rules for Snowflake Cortex AI Functions (AI_COMPLETE, AI_CLASSIFY, AI_EXTRACT, etc.) and Cortex Search for RAG applications.
+- [Snowflake Data Engineering](./rules/snowflake-data-engineering-cursorrules-prompt-file/.cursorrules) - Cursor rules for Snowflake SQL, data pipelines (Dynamic Tables, Streams, Tasks, Snowpipe), semi-structured data, Snowflake Postgres, and cost optimization.
 - [Snowflake Snowpark Python & dbt](./rules/snowflake-snowpark-dbt-cursorrules-prompt-file/.cursorrules) - Cursor rules for Snowpark Python (DataFrames, UDFs, stored procedures) and dbt with the Snowflake adapter.
+- [TypeScript (Axios)](./rules/typescript-axios-cursorrules-prompt-file/.cursorrules) - Cursor rules for TypeScript development with Axios integration.
 
 ### Testing
 
diff --git a/rules/snowflake-cortex-ai-cursorrules-prompt-file/README.md b/rules/snowflake-cortex-ai-cursorrules-prompt-file/README.md
index 07cb1b4f..484d75ca 100644
--- a/rules/snowflake-cortex-ai-cursorrules-prompt-file/README.md
+++ b/rules/snowflake-cortex-ai-cursorrules-prompt-file/README.md
@@ -1,5 +1,7 @@
 # Snowflake Cortex AI Cursor Rules
 
+Author: [Snowflake DevRel](https://github.com/Snowflake-Labs)
+
 Rules for Snowflake Cortex — AI Functions (AI_COMPLETE, AI_CLASSIFY, AI_EXTRACT, AI_EMBED, and more) and Cortex Search (managed hybrid vector+keyword search for RAG applications).
 
 ## Usage
diff --git a/rules/snowflake-data-engineering-cursorrules-prompt-file/README.md b/rules/snowflake-data-engineering-cursorrules-prompt-file/README.md
index caa3244b..f7c20559 100644
--- a/rules/snowflake-data-engineering-cursorrules-prompt-file/README.md
+++ b/rules/snowflake-data-engineering-cursorrules-prompt-file/README.md
@@ -1,5 +1,7 @@
 # Snowflake Data Engineering Cursor Rules
 
+Author: [Snowflake DevRel](https://github.com/Snowflake-Labs)
+
 Rules for comprehensive data engineering on Snowflake — SQL best practices, data pipelines (Dynamic Tables, Streams, Tasks, Snowpipe), semi-structured data handling, Snowflake Postgres, Time Travel, and cost optimization.
 
 ## Usage
diff --git a/rules/snowflake-snowpark-dbt-cursorrules-prompt-file/.cursorrules b/rules/snowflake-snowpark-dbt-cursorrules-prompt-file/.cursorrules
index eec8c13d..c5cca3c9 100644
--- a/rules/snowflake-snowpark-dbt-cursorrules-prompt-file/.cursorrules
+++ b/rules/snowflake-snowpark-dbt-cursorrules-prompt-file/.cursorrules
@@ -12,8 +12,11 @@ You are an expert in Snowpark Python (Snowflake's server-side Python API) and db
 
 // Session
 from snowflake.snowpark import Session
+import os
 session = Session.builder.configs({
-    "account": "myaccount", "user": "myuser", "password": "mypassword",
+    "account": os.environ["SNOWFLAKE_ACCOUNT"],
+    "user": os.environ["SNOWFLAKE_USER"],
+    "password": os.environ["SNOWFLAKE_PASSWORD"],
     "role": "my_role", "warehouse": "my_wh", "database": "my_db", "schema": "my_schema"
 }).create()
 
diff --git a/rules/snowflake-snowpark-dbt-cursorrules-prompt-file/README.md b/rules/snowflake-snowpark-dbt-cursorrules-prompt-file/README.md
index 2eb5fd76..ea609464 100644
--- a/rules/snowflake-snowpark-dbt-cursorrules-prompt-file/README.md
+++ b/rules/snowflake-snowpark-dbt-cursorrules-prompt-file/README.md
@@ -1,5 +1,7 @@
 # Snowflake Snowpark Python & dbt Cursor Rules
 
+Author: [Snowflake DevRel](https://github.com/Snowflake-Labs)
+
 Rules for building data pipelines with Snowpark Python (server-side DataFrames, UDFs, stored procedures) and dbt with the Snowflake adapter (dynamic tables, incremental models, Snowflake-specific configs).
 
 ## Usage