diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json index 82cd106..c6bb4fa 100644 --- a/.claude-plugin/marketplace.json +++ b/.claude-plugin/marketplace.json @@ -47,6 +47,36 @@ "source": "./plugins/amazon-location-service", "tags": ["aws", "location", "maps", "geospatial"], "version": "1.0.0" + }, + { + "category": "database", + "description": "Build with Aurora DSQL - manage schemas, execute queries, handle migrations, and develop applications with a serverless, distributed SQL database with PostgreSQL compatibility.", + "keywords": [ + "aws", + "aws agent skills", + "amazon", + "aurora", + "dsql", + "aurora-dsql", + "database", + "distributed-sql", + "distributed", + "distributed-database", + "serverless", + "serverless-database", + "postgresql", + "postgres", + "sql", + "schema", + "migration", + "multi-tenant", + "iam-auth", + "mcp" + ], + "name": "aurora-dsql", + "source": "./plugins/aurora-dsql", + "tags": ["aws", "database", "dsql", "aurora", "distributed-sql", "serverless", "postgresql"], + "version": "1.0.0" } ] } diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 0a361aa..aa4a9e2 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -31,6 +31,7 @@ tools/ @awslabs/agent-plugins-admins ## Plugins (alphabetically listed) plugins/deploy-on-aws @awslabs/agent-plugins-admins @awslabs/agent-plugins-maintainers @awslabs/agent-plugins-deploy-on-aws +plugins/aurora-dsql @awslabs/agent-plugins-admins @awslabs/agent-plugins-maintainers @awslabs/agent-plugins-dsql ## File must end with CODEOWNERS file diff --git a/plugins/aurora-dsql/.claude-plugin/plugin.json b/plugins/aurora-dsql/.claude-plugin/plugin.json new file mode 100644 index 0000000..73b741b --- /dev/null +++ b/plugins/aurora-dsql/.claude-plugin/plugin.json @@ -0,0 +1,25 @@ +{ + "author": { + "name": "Amazon Web Services" + }, + "description": "Build with Aurora DSQL - manage schemas, execute queries, handle migrations, and develop applications with DSQL-specific requirements. Aurora DSQL is a serverless, distributed SQL database with PostgreSQL compatibility.", + "homepage": "https://github.com/awslabs/agent-plugins", + "keywords": [ + "aws", + "aurora", + "dsql", + "database", + "distributed-sql", + "serverless", + "postgresql", + "postgres", + "sql", + "schema", + "migration", + "iam-auth" + ], + "license": "Apache-2.0", + "name": "aurora-dsql", + "repository": "https://github.com/awslabs/agent-plugins", + "version": "1.0.0" +} diff --git a/plugins/aurora-dsql/.mcp.json b/plugins/aurora-dsql/.mcp.json new file mode 100644 index 0000000..3597099 --- /dev/null +++ b/plugins/aurora-dsql/.mcp.json @@ -0,0 +1,13 @@ +{ + "mcpServers": { + "aurora-dsql": { + "command": "uvx", + "args": [ + "awslabs.aurora-dsql-mcp-server@latest" + ], + "env": { + "FASTMCP_LOG_LEVEL": "ERROR" + } + } + } +} diff --git a/plugins/aurora-dsql/hooks/hooks.json b/plugins/aurora-dsql/hooks/hooks.json new file mode 100644 index 0000000..40bdb63 --- /dev/null +++ b/plugins/aurora-dsql/hooks/hooks.json @@ -0,0 +1,15 @@ +{ + "hooks": { + "PostToolUse": [ + { + "matcher": "mcp__aurora-dsql.*__transact", + "hooks": [ + { + "type": "prompt", + "prompt": "A DSQL transact operation completed. Verify the result: if it was a DDL change, confirm the schema looks correct using get_schema. If it was a DML change, confirm the affected row count is expected." + } + ] + } + ] + } +} diff --git a/plugins/aurora-dsql/scripts/README.md b/plugins/aurora-dsql/scripts/README.md new file mode 100644 index 0000000..fa4345c --- /dev/null +++ b/plugins/aurora-dsql/scripts/README.md @@ -0,0 +1,199 @@ +# Aurora DSQL Scripts + +Bash scripts for common Aurora DSQL cluster management and connection operations. +These scripts can be executed directly, used as agent tools, or configured as hooks. + +## Prerequisites + +- AWS CLI configured with credentials (`aws configure`) +- `psql` client installed (for psql-connect.sh) +- `jq` installed (for JSON parsing) +- Appropriate IAM permissions: + - `dsql:CreateCluster` (for create-cluster.sh) + - `dsql:DeleteCluster` (for delete-cluster.sh) + - `dsql:GetCluster` (for cluster-info.sh) + - `dsql:ListClusters` (for list-clusters.sh) + - `dsql:DbConnect` or `dsql:DbConnectAdmin` (for psql-connect.sh) + +## Using Scripts as Tools + +Agents can execute these scripts directly via shell tool calls. Each script supports `--help` for usage: + +```bash +# List available clusters +./scripts/list-clusters.sh --region us-east-1 + +# Get cluster details +./scripts/cluster-info.sh abc123def456 + +# Connect and run a query +./scripts/psql-connect.sh --command "SELECT COUNT(*) FROM entities" +``` + +## Plugin Hooks + +This plugin ships a default `PostToolUse` hook in `hooks/hooks.json` that prompts schema/row verification after `transact` operations. The hook fires automatically — no user configuration required. + +### Adding Custom Hooks + +Add additional hooks to `.claude/settings.json` or override the defaults: + +```json +{ + "hooks": { + "PreToolUse": [ + { + "matcher": "mcp__aurora-dsql.*__transact", + "hooks": [ + { + "type": "command", + "command": "${CLAUDE_PLUGIN_ROOT}/scripts/cluster-info.sh $CLUSTER --region $REGION 2>/dev/null || true" + } + ] + } + ] + } +} +``` + +--- + +## Available Scripts + +### create-cluster.sh + +Create a new Aurora DSQL cluster. + +```bash +./scripts/create-cluster.sh --created-by claude-opus-4-6 +./scripts/create-cluster.sh --created-by claude-opus-4-6 --region us-east-1 +./scripts/create-cluster.sh --created-by claude-opus-4-6 --region us-west-2 --tags Environment=dev,Project=myapp +``` + +**Output:** Cluster identifier, endpoint, and ARN. Exports environment variables for use with other scripts. + +--- + +### delete-cluster.sh + +Delete an existing Aurora DSQL cluster. + +```bash +./scripts/delete-cluster.sh abc123def456 +./scripts/delete-cluster.sh abc123def456 --region us-west-2 +./scripts/delete-cluster.sh abc123def456 --force +``` + +**Note:** Deletion is permanent and cannot be undone. + +--- + +### psql-connect.sh + +Connect to Aurora DSQL using psql with automatic IAM authentication. + +```bash +export CLUSTER=abc123def456 +export REGION=us-east-1 +./scripts/psql-connect.sh + +./scripts/psql-connect.sh abc123def456 --region us-west-2 +./scripts/psql-connect.sh --user myuser +./scripts/psql-connect.sh --command "SELECT * FROM entities LIMIT 5" +./scripts/psql-connect.sh --admin +``` + +**Features:** + +- Automatically generates IAM auth token (valid for 15 minutes) +- Supports both interactive sessions and command execution +- Uses `admin` user by default (override with `--user` or `$DB_USER`) + +--- + +### list-clusters.sh + +List all Aurora DSQL clusters in a region. + +```bash +./scripts/list-clusters.sh +./scripts/list-clusters.sh --region us-west-2 +``` + +--- + +### cluster-info.sh + +Get detailed information about a specific cluster. + +```bash +./scripts/cluster-info.sh abc123def456 +./scripts/cluster-info.sh abc123def456 --region us-west-2 +``` + +**Output:** JSON with cluster identifier, endpoint, ARN, status, and creation time. + +--- + +### loader.sh + +Install and run Aurora DSQL Loader for bulk data loading from S3. + +```bash +./scripts/loader.sh --source-uri s3://my-bucket/data.parquet --table analytics_data +./scripts/loader.sh --source-uri s3://bucket/data.csv --table my_table --if-not-exists +./scripts/loader.sh --source-uri s3://bucket/data.csv --table my_table --dry-run +./scripts/loader.sh --install-only +``` + +**Features:** + +- Platform detection (Linux/macOS, x86_64/aarch64) +- Binary validation and secure downloads +- Resume interrupted loads with `--resume-job-id` +- Dry run validation with `--dry-run` + +--- + +## Environment Variables + +Scripts respect these environment variables: + +- `CLUSTER` - Default cluster identifier +- `REGION` - Default AWS region +- `AWS_REGION` - Fallback AWS region if `REGION` not set +- `DB_USER` - Default database user (defaults to 'admin') +- `AWS_PROFILE` - AWS CLI profile to use + +## Quick Start Workflow + +```bash +# 1. Create a cluster +./scripts/create-cluster.sh --created-by claude-opus-4-6 --region us-east-1 + +# Copy the export commands from output +export CLUSTER=abc123def456 +export REGION=us-east-1 + +# 2. Connect with psql +./scripts/psql-connect.sh + +# 3. Inside psql, create a table +CREATE TABLE entities ( + entity_id VARCHAR(255) PRIMARY KEY, + tenant_id VARCHAR(255) NOT NULL, + name VARCHAR(255) NOT NULL +); + +# 4. Exit psql and run a query from command line +./scripts/psql-connect.sh --command "SELECT * FROM information_schema.tables WHERE table_schema='public'" + +# 5. When done, delete the cluster +./scripts/delete-cluster.sh $CLUSTER +``` + +## Notes + +- **Token Expiry:** IAM auth tokens expire after 15 minutes. +- **Connection Limit:** DSQL supports up to 10,000 concurrent connections per cluster. +- **Database Name:** Always use `postgres` (only database available in DSQL). diff --git a/plugins/aurora-dsql/scripts/cluster-info.sh b/plugins/aurora-dsql/scripts/cluster-info.sh new file mode 100755 index 0000000..1e792cf --- /dev/null +++ b/plugins/aurora-dsql/scripts/cluster-info.sh @@ -0,0 +1,77 @@ +#!/usr/bin/env bash +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +set -euo pipefail + +# cluster-info.sh - Get detailed information about a DSQL cluster +# +# Usage: ./cluster-info.sh CLUSTER_IDENTIFIER [--region REGION] +# +# Examples: +# ./cluster-info.sh abc123def456 +# ./cluster-info.sh abc123def456 --region us-west-2 + +if [[ $# -lt 1 ]]; then + echo "Usage: $0 CLUSTER_IDENTIFIER [--region REGION]" + echo "" + echo "Get detailed information about an Aurora DSQL cluster." + echo "" + echo "Arguments:" + echo " CLUSTER_IDENTIFIER The cluster identifier" + echo "" + echo "Options:" + echo " --region REGION AWS region (default: \$AWS_REGION or us-east-1)" + exit 1 +fi + +CLUSTER_ID="$1" +shift + +REGION="${AWS_REGION:-us-east-1}" + +# Parse remaining arguments +while [[ $# -gt 0 ]]; do + case $1 in + --region) + REGION="$2" + shift 2 + ;; + *) + echo "Unknown option: $1" + exit 1 + ;; + esac +done + +echo "Fetching cluster information for: $CLUSTER_ID" +echo "" + +# Get cluster details +aws dsql get-cluster \ + --identifier "$CLUSTER_ID" \ + --region "$REGION" \ + --output json | jq '{ + identifier: .identifier, + endpoint: .endpoint, + arn: .arn, + status: .status, + creationTime: .creationTime + }' + +echo "" +ENDPOINT="${CLUSTER_ID}.dsql.${REGION}.on.aws" +echo "To connect with psql:" +echo "export CLUSTER=$CLUSTER_ID" +echo "export REGION=$REGION" +echo "./scripts/psql-connect.sh" diff --git a/plugins/aurora-dsql/scripts/create-cluster.sh b/plugins/aurora-dsql/scripts/create-cluster.sh new file mode 100755 index 0000000..f0490e4 --- /dev/null +++ b/plugins/aurora-dsql/scripts/create-cluster.sh @@ -0,0 +1,117 @@ +#!/usr/bin/env bash +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +set -euo pipefail + +# create-cluster.sh - Create an Aurora DSQL cluster +# +# Usage: ./create-cluster.sh --created-by MODEL_ID [--region REGION] [--tags KEY=VALUE,...] +# +# Examples: +# ./create-cluster.sh --created-by claude-opus-4-6 +# ./create-cluster.sh --created-by claude-opus-4-6 --region us-east-1 +# ./create-cluster.sh --created-by claude-opus-4-6 --region us-west-2 --tags Environment=dev,Project=myapp + +REGION="${AWS_REGION:-us-east-1}" +TAGS="" +CREATED_BY="" + +# Parse arguments +while [[ $# -gt 0 ]]; do + case $1 in + --region) + REGION="$2" + shift 2 + ;; + --tags) + TAGS="$2" + shift 2 + ;; + --created-by) + CREATED_BY="$2" + shift 2 + ;; + -h|--help) + echo "Usage: $0 [--region REGION] [--tags KEY=VALUE,...]" + echo "" + echo "Creates an Aurora DSQL cluster in the specified region." + echo "" + echo "Options:" + echo " --region REGION AWS region (default: \$AWS_REGION or us-east-1)" + echo " --tags TAGS Comma-separated tags (e.g., Env=dev,Project=app)" + echo " --created-by ID Model/agent identifier added as a 'created_by' cluster tag" + echo " -h, --help Show this help message" + exit 0 + ;; + *) + echo "Unknown option: $1" + exit 1 + ;; + esac +done + +echo "Creating Aurora DSQL cluster in $REGION..." + +# Prepend created_by tag if --created-by was provided +if [[ -n "$CREATED_BY" ]]; then + # Validate: allow only alphanumeric, hyphens, underscores, and dots (e.g. claude-opus-4-6) + if [[ ! "$CREATED_BY" =~ ^[a-zA-Z0-9._-]+$ ]]; then + echo "Error: --created-by must contain only alphanumeric characters, hyphens, underscores, and dots." >&2 + exit 1 + fi + if [[ -n "$TAGS" ]]; then + TAGS="created_by=${CREATED_BY},${TAGS}" + else + TAGS="created_by=${CREATED_BY}" + fi +fi + +# Build the AWS CLI command as an array to avoid eval and shell injection +CMD=(aws dsql create-cluster --region "$REGION") + +# Add tags if provided +if [[ -n "$TAGS" ]]; then + # Convert comma-separated tags to JSON format using jq for safe escaping + TAG_JSON=$(printf '%s\n' "$TAGS" | tr ',' '\n' | jq -Rn ' + [inputs | split("=") | {(.[0]): .[1:] | join("=")}] | add // {} + ') + CMD+=(--tags "$TAG_JSON") +fi + +# Execute the command directly (no eval) +"${CMD[@]}" > /tmp/dsql-cluster-create.json + +# Extract cluster identifier and endpoint +CLUSTER_ID=$(jq -r '.identifier' /tmp/dsql-cluster-create.json) +CLUSTER_ENDPOINT="${CLUSTER_ID}.dsql.${REGION}.on.aws" +CLUSTER_ARN=$(jq -r '.arn' /tmp/dsql-cluster-create.json) + +echo "" +echo "✓ Cluster created successfully!" +echo "" +echo "Cluster Identifier: $CLUSTER_ID" +echo "Cluster Endpoint: $CLUSTER_ENDPOINT" +echo "Cluster ARN: $CLUSTER_ARN" +echo "Region: $REGION" +echo "" +echo "Export these environment variables to use with MCP:" +echo "" +echo "export CLUSTER=$CLUSTER_ID" +echo "export REGION=$REGION" +echo "" +echo "To connect with psql:" +echo "./scripts/psql-connect.sh" + +# Clean up temp file +rm /tmp/dsql-cluster-create.json diff --git a/plugins/aurora-dsql/scripts/delete-cluster.sh b/plugins/aurora-dsql/scripts/delete-cluster.sh new file mode 100755 index 0000000..96eb9e6 --- /dev/null +++ b/plugins/aurora-dsql/scripts/delete-cluster.sh @@ -0,0 +1,87 @@ +#!/usr/bin/env bash +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +set -euo pipefail + +# delete-cluster.sh - Delete an Aurora DSQL cluster +# +# Usage: ./delete-cluster.sh CLUSTER_IDENTIFIER [--region REGION] [--force] +# +# Examples: +# ./delete-cluster.sh abc123def456 +# ./delete-cluster.sh abc123def456 --region us-west-2 +# ./delete-cluster.sh abc123def456 --force + +if [[ $# -lt 1 ]]; then + echo "Usage: $0 CLUSTER_IDENTIFIER [--region REGION] [--force]" + echo "" + echo "Deletes an Aurora DSQL cluster." + echo "" + echo "Arguments:" + echo " CLUSTER_IDENTIFIER The cluster identifier to delete" + echo "" + echo "Options:" + echo " --region REGION AWS region (default: \$AWS_REGION or us-east-1)" + echo " --force Skip confirmation prompt" + exit 1 +fi + +CLUSTER_ID="$1" +shift + +REGION="${AWS_REGION:-us-east-1}" +FORCE=false + +# Parse remaining arguments +while [[ $# -gt 0 ]]; do + case $1 in + --region) + REGION="$2" + shift 2 + ;; + --force) + FORCE=true + shift + ;; + *) + echo "Unknown option: $1" + exit 1 + ;; + esac +done + +# Confirmation prompt unless --force is used +if [[ "$FORCE" != "true" ]]; then + echo "⚠️ WARNING: This will permanently delete cluster: $CLUSTER_ID" + echo "" + read -p "Are you sure you want to continue? (type 'yes' to confirm): " CONFIRM + + if [[ "$CONFIRM" != "yes" ]]; then + echo "Deletion cancelled." + exit 0 + fi +fi + +echo "Deleting Aurora DSQL cluster: $CLUSTER_ID in $REGION..." + +# Delete the cluster +aws dsql delete-cluster \ + --identifier "$CLUSTER_ID" \ + --region "$REGION" + +echo "" +echo "✓ Cluster deletion initiated!" +echo "" +echo "Note: The cluster may take a few minutes to fully delete." +echo "Check status with: aws dsql get-cluster --identifier $CLUSTER_ID --region $REGION" diff --git a/plugins/aurora-dsql/scripts/list-clusters.sh b/plugins/aurora-dsql/scripts/list-clusters.sh new file mode 100755 index 0000000..7c126b3 --- /dev/null +++ b/plugins/aurora-dsql/scripts/list-clusters.sh @@ -0,0 +1,59 @@ +#!/usr/bin/env bash +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +set -euo pipefail + +# list-clusters.sh - List all Aurora DSQL clusters +# +# Usage: ./list-clusters.sh [--region REGION] +# +# Examples: +# ./list-clusters.sh +# ./list-clusters.sh --region us-west-2 + +REGION="${AWS_REGION:-us-east-1}" + +# Parse arguments +while [[ $# -gt 0 ]]; do + case $1 in + --region) + REGION="$2" + shift 2 + ;; + -h|--help) + echo "Usage: $0 [--region REGION]" + echo "" + echo "List all Aurora DSQL clusters in the specified region." + echo "" + echo "Options:" + echo " --region REGION AWS region (default: \$AWS_REGION or us-east-1)" + echo " -h, --help Show this help message" + exit 0 + ;; + *) + echo "Unknown option: $1" + exit 1 + ;; + esac +done + +echo "Listing Aurora DSQL clusters in $REGION..." +echo "" + +# List clusters +aws dsql list-clusters --region "$REGION" --output table + +echo "" +echo "To get details about a cluster:" +echo "./scripts/cluster-info.sh CLUSTER_IDENTIFIER" diff --git a/plugins/aurora-dsql/scripts/loader.sh b/plugins/aurora-dsql/scripts/loader.sh new file mode 100755 index 0000000..635ee7d --- /dev/null +++ b/plugins/aurora-dsql/scripts/loader.sh @@ -0,0 +1,410 @@ +#!/usr/bin/env bash +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +set -euo pipefail + +# loader.sh - Install and run Aurora DSQL Loader to load data from S3 +# +# Usage: ./loader.sh [CLUSTER_ID] --source-uri S3_URI --table TABLE [OPTIONS] +# +# Examples: +# ./loader.sh --source-uri s3://my-bucket/data.parquet --table analytics_data +# ./loader.sh abc123def456 --source-uri s3://bucket/data.csv --table my_table --region us-west-2 +# ./loader.sh --source-uri s3://bucket/data.csv --table my_table --if-not-exists +# ./loader.sh --source-uri s3://bucket/data.csv --table my_table --resume-job-id abc-123-def-456 +# ./loader.sh --install-only + +CLUSTER_ID="${CLUSTER:-}" +REGION="${REGION:-${AWS_REGION:-us-east-1}}" +SOURCE_URI="" +TABLE="" +RESUME_JOB_ID="" +MANIFEST_DIR="" +IF_NOT_EXISTS=false +DRY_RUN=false +INSTALL_ONLY=false +LOADER_VERSION="latest" + +# Installation directory +INSTALL_DIR="${HOME}/.local/bin" +LOADER_BIN="${INSTALL_DIR}/aurora-dsql-loader" + +show_help() { + cat << EOF +Usage: $0 [CLUSTER_ID] --source-uri S3_URI --table TABLE [OPTIONS] + +Install and run Aurora DSQL Loader to load data from S3 into Aurora DSQL. + +Arguments: + CLUSTER_ID Cluster identifier (default: \$CLUSTER env var) + +Required Options: + --source-uri URI Source data URI (S3 path or local file) + --table TABLE Target table name + +Options: + --region REGION AWS region (default: \$REGION or \$AWS_REGION or us-east-1) + --resume-job-id ID Resume a previously interrupted load job + --manifest-dir DIR Directory for load manifest storage + --if-not-exists Auto-create table if it doesn't exist + --dry-run Validate without loading data + --install-only Only install the loader, don't run it + --version VERSION Loader version to install (default: latest) + -h, --help Show this help message + +Environment Variables: + CLUSTER Default cluster identifier + REGION Default AWS region + AWS_REGION Fallback AWS region + +Examples: + # Basic load from S3 + ./loader.sh --source-uri s3://my-bucket/data.parquet --table analytics_data + + # Load with auto-table creation + ./loader.sh --source-uri s3://bucket/data.csv --table my_table --if-not-exists + + # Resume a failed load (requires manifest-dir from original load) + ./loader.sh --source-uri s3://bucket/data.csv --table my_table --resume-job-id abc-123 --manifest-dir /path/to/manifest + + # Dry run to validate + ./loader.sh --source-uri s3://bucket/data.csv --table my_table --dry-run + +For more information, see: https://github.com/aws-samples/aurora-dsql-loader +EOF +} + +# Parse arguments +while [[ $# -gt 0 ]]; do + case $1 in + --source-uri) + SOURCE_URI="$2" + shift 2 + ;; + --table) + TABLE="$2" + shift 2 + ;; + --region) + REGION="$2" + shift 2 + ;; + --resume-job-id) + RESUME_JOB_ID="$2" + shift 2 + ;; + --manifest-dir) + MANIFEST_DIR="$2" + shift 2 + ;; + --if-not-exists) + IF_NOT_EXISTS=true + shift + ;; + --dry-run) + DRY_RUN=true + shift + ;; + --install-only) + INSTALL_ONLY=true + shift + ;; + -h|--help) + show_help + exit 0 + ;; + -*) + echo "Unknown option: $1" + echo "Use --help for usage information." + exit 1 + ;; + *) + CLUSTER_ID="$1" + shift + ;; + esac +done + +# Detect OS and architecture for GitHub release asset naming +detect_platform() { + local os arch + os="$(uname -s)" + arch="$(uname -m)" + + case "$os" in + Linux) + os="unknown-linux-gnu" + ;; + Darwin) + os="apple-darwin" + ;; + *) + echo "Error: Unsupported operating system: $os" >&2 + exit 1 + ;; + esac + + case "$arch" in + x86_64|amd64) + arch="x86_64" + ;; + aarch64|arm64) + arch="aarch64" + ;; + *) + echo "Error: Unsupported architecture: $arch" >&2 + exit 1 + ;; + esac + + echo "${arch}-${os}" +} + +# Minimum expected binary size in bytes (1 MB) to detect truncated or corrupt downloads +MIN_BINARY_SIZE=1048576 + +# Allowed download URL domain patterns +ALLOWED_DOWNLOAD_DOMAINS="^https://github\.com/aws-samples/aurora-dsql-loader/|^https://objects\.githubusercontent\.com/" + +# Validate that a downloaded file is a real executable binary, not an error page or corrupt file +validate_binary() { + local file_path="$1" + + # Check minimum file size + local file_size + file_size=$(wc -c < "$file_path") + if [[ "$file_size" -lt "$MIN_BINARY_SIZE" ]]; then + echo "Error: Downloaded file is too small (${file_size} bytes). Expected at least ${MIN_BINARY_SIZE} bytes." >&2 + echo "This may indicate a corrupt or incomplete download." >&2 + return 1 + fi + + # Verify the file is an actual binary (ELF on Linux, Mach-O on macOS), not an HTML error page + local file_type + file_type=$(file "$file_path") + if echo "$file_type" | grep -qiE "HTML|text|ASCII|XML|JSON"; then + echo "Error: Downloaded file appears to be text, not a binary executable." >&2 + echo "File type: $file_type" >&2 + echo "This may indicate the download URL returned an error page." >&2 + return 1 + fi + + local os + os="$(uname -s)" + case "$os" in + Linux) + if ! echo "$file_type" | grep -q "ELF"; then + echo "Error: Downloaded file is not a valid Linux ELF binary." >&2 + echo "File type: $file_type" >&2 + return 1 + fi + ;; + Darwin) + if ! echo "$file_type" | grep -qE "Mach-O|universal binary"; then + echo "Error: Downloaded file is not a valid macOS Mach-O binary." >&2 + echo "File type: $file_type" >&2 + return 1 + fi + ;; + esac + + return 0 +} + +# Install the loader if not present +install_loader() { + if [[ -x "$LOADER_BIN" ]]; then + echo "Aurora DSQL Loader already installed at $LOADER_BIN" >&2 + "$LOADER_BIN" --help 2>/dev/null || true + return 0 + fi + + echo "Installing Aurora DSQL Loader..." >&2 + + # Create install directory + mkdir -p "$INSTALL_DIR" + + local platform release_url download_url + platform="$(detect_platform)" + + # Get the download URL from GitHub releases + if [[ "$LOADER_VERSION" == "latest" ]]; then + release_url="https://api.github.com/repos/aws-samples/aurora-dsql-loader/releases/latest" + else + release_url="https://api.github.com/repos/aws-samples/aurora-dsql-loader/releases/tags/${LOADER_VERSION}" + fi + + echo "Fetching release information from GitHub..." >&2 + + # Extract the download URL for the appropriate platform + # Use --proto =https to enforce HTTPS-only and --fail to error on HTTP failures + local release_json + release_json=$(curl --proto "=https" --fail --show-error -sL "$release_url") || { + echo "Error: Failed to fetch release information from GitHub." >&2 + exit 1 + } + + download_url=$(echo "$release_json" | grep -o "https://[^\"]*aurora-dsql-loader-${platform}[^\"]*" | head -1) + + if [[ -z "$download_url" ]]; then + echo "Error: Could not find download URL for platform: $platform" >&2 + echo "You may need to build from source. See: https://github.com/aws-samples/aurora-dsql-loader" >&2 + exit 1 + fi + + # Validate the download URL points to an expected GitHub domain + if ! echo "$download_url" | grep -qE "$ALLOWED_DOWNLOAD_DOMAINS"; then + echo "Error: Download URL points to an unexpected domain." >&2 + echo "URL: $download_url" >&2 + echo "Expected: github.com/aws-samples/aurora-dsql-loader or objects.githubusercontent.com" >&2 + exit 1 + fi + + echo "Downloading from: $download_url" >&2 + + # Download with HTTPS enforcement and HTTP error detection + local temp_file + temp_file=$(mktemp) + trap "rm -f '$temp_file'" EXIT + + if ! curl --proto "=https" --fail --show-error -L "$download_url" -o "$temp_file"; then + echo "Error: Failed to download loader" >&2 + exit 1 + fi + + # Check if it's a tar.gz or direct binary + if file "$temp_file" | grep -q "gzip"; then + # Extract to a temporary directory first to avoid contaminating INSTALL_DIR on failure + local temp_extract_dir + temp_extract_dir=$(mktemp -d) + trap "rm -f '$temp_file'; rm -rf '$temp_extract_dir'" EXIT + + tar -xzf "$temp_file" -C "$temp_extract_dir" + + # Find the extracted binary + local extracted_bin + extracted_bin=$(find "$temp_extract_dir" -name "aurora-dsql-loader*" -type f 2>/dev/null | head -1) + if [[ -z "$extracted_bin" ]]; then + extracted_bin=$(find "$temp_extract_dir" -name "aurora-dsql-loader" -type f 2>/dev/null | head -1) + fi + + if [[ -z "$extracted_bin" ]]; then + echo "Error: Could not find aurora-dsql-loader binary in the downloaded archive." >&2 + exit 1 + fi + + chmod +x "$extracted_bin" + + # Validate the extracted binary before moving it into place + if ! validate_binary "$extracted_bin"; then + echo "Error: Binary validation failed. Aborting installation." >&2 + exit 1 + fi + + mv "$extracted_bin" "$LOADER_BIN" + rm -rf "$temp_extract_dir" + else + chmod +x "$temp_file" + + # Validate the binary before moving it into place + if ! validate_binary "$temp_file"; then + echo "Error: Binary validation failed. Aborting installation." >&2 + exit 1 + fi + + mv "$temp_file" "$LOADER_BIN" + trap - EXIT + fi + + echo "Aurora DSQL Loader installed successfully at $LOADER_BIN" >&2 + "$LOADER_BIN" --version 2>/dev/null || true + + # Check if install dir is in PATH + if [[ ":$PATH:" != *":$INSTALL_DIR:"* ]]; then + echo "" >&2 + echo "Note: $INSTALL_DIR is not in your PATH." >&2 + echo "Add it with: export PATH=\"\$PATH:$INSTALL_DIR\"" >&2 + fi +} + +# Main execution +main() { + # Always ensure loader is installed + install_loader + + if [[ "$INSTALL_ONLY" == "true" ]]; then + exit 0 + fi + + # Validate required parameters for load operation + if [[ -z "$SOURCE_URI" ]]; then + echo "Error: --source-uri is required" >&2 + echo "Use --help for usage information." >&2 + exit 1 + fi + + if [[ -z "$TABLE" ]]; then + echo "Error: --table is required" >&2 + echo "Use --help for usage information." >&2 + exit 1 + fi + + if [[ -z "$CLUSTER_ID" ]]; then + echo "Error: CLUSTER_ID is required. Set \$CLUSTER env var or pass as argument." >&2 + echo "" >&2 + echo "Usage: $0 CLUSTER_ID --source-uri URI --table TABLE [options]" >&2 + echo " or: export CLUSTER=abc123 && $0 --source-uri URI --table TABLE [options]" >&2 + exit 1 + fi + + # Build endpoint + local endpoint="${CLUSTER_ID}.dsql.${REGION}.on.aws" + + echo "Loading data into Aurora DSQL..." >&2 + echo " Endpoint: $endpoint" >&2 + echo " Source: $SOURCE_URI" >&2 + echo " Table: $TABLE" >&2 + [[ -n "$RESUME_JOB_ID" ]] && echo " Resume Job: $RESUME_JOB_ID" >&2 + [[ -n "$MANIFEST_DIR" ]] && echo " Manifest: $MANIFEST_DIR" >&2 + [[ "$IF_NOT_EXISTS" == "true" ]] && echo " Auto-create table if not exists" >&2 + [[ "$DRY_RUN" == "true" ]] && echo " DRY RUN MODE" >&2 + echo "" >&2 + + # Build the command + local cmd=("$LOADER_BIN" "load") + cmd+=("--endpoint" "$endpoint") + cmd+=("--source-uri" "$SOURCE_URI") + cmd+=("--table" "$TABLE") + + if [[ -n "$RESUME_JOB_ID" ]]; then + cmd+=("--resume-job-id" "$RESUME_JOB_ID") + fi + + if [[ -n "$MANIFEST_DIR" ]]; then + cmd+=("--manifest-dir" "$MANIFEST_DIR") + fi + + if [[ "$IF_NOT_EXISTS" == "true" ]]; then + cmd+=("--if-not-exists") + fi + + if [[ "$DRY_RUN" == "true" ]]; then + cmd+=("--dry-run") + fi + + # Execute the loader + "${cmd[@]}" +} + +main diff --git a/plugins/aurora-dsql/scripts/psql-connect.sh b/plugins/aurora-dsql/scripts/psql-connect.sh new file mode 100755 index 0000000..fcccfd6 --- /dev/null +++ b/plugins/aurora-dsql/scripts/psql-connect.sh @@ -0,0 +1,150 @@ +#!/usr/bin/env bash +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +set -euo pipefail + +# psql-connect.sh - Connect to Aurora DSQL using psql with IAM auth +# +# Usage: ./psql-connect.sh [CLUSTER_ID] [--region REGION] [--user USER] [--admin] [--ai-model MODEL_ID] [--command "SQL"] +# +# Examples: +# ./psql-connect.sh --ai-model claude-opus-4-6 +# ./psql-connect.sh abc123def456 --ai-model claude-opus-4-6 --region us-west-2 +# ./psql-connect.sh --ai-model claude-opus-4-6 --admin +# ./psql-connect.sh --ai-model claude-opus-4-6 --command "SELECT * FROM entities LIMIT 5" + +CLUSTER_ID="${CLUSTER:-}" +REGION="${REGION:-${AWS_REGION:-us-east-1}}" +USER="${DB_USER:-admin}" +ADMIN=false +COMMAND="" +AI_MODEL="" + +# Parse arguments +while [[ $# -gt 0 ]]; do + case $1 in + --region) + REGION="$2" + shift 2 + ;; + --user) + USER="$2" + shift 2 + ;; + --admin) + ADMIN=true + shift + ;; + --command|-c) + COMMAND="$2" + shift 2 + ;; + --ai-model) + AI_MODEL="$2" + shift 2 + ;; + -h|--help) + echo "Usage: $0 [CLUSTER_ID] [--region REGION] [--user USER] [--admin] [--command SQL]" + echo "" + echo "Connect to Aurora DSQL using psql with IAM authentication." + echo "" + echo "Arguments:" + echo " CLUSTER_ID Cluster identifier (default: \$CLUSTER env var)" + echo "" + echo "Options:" + echo " --region REGION AWS region (default: \$REGION or \$AWS_REGION or us-east-1)" + echo " --user USER Database user (default: \$DB_USER or 'admin')" + echo " --admin Generate admin token (uses generate-db-connect-admin-auth-token)" + echo " --command SQL Execute SQL command and exit" + echo " --ai-model ID AI model identifier appended to application_name (e.g. claude-opus-4-6)" + echo " -h, --help Show this help message" + echo "" + echo "Environment Variables:" + echo " CLUSTER Default cluster identifier" + echo " REGION Default AWS region" + echo " DB_USER Default database user" + exit 0 + ;; + -*) + echo "Unknown option: $1" + exit 1 + ;; + *) + CLUSTER_ID="$1" + shift + ;; + esac +done + +# Validate cluster ID +if [[ -z "$CLUSTER_ID" ]]; then + echo "Error: CLUSTER_ID is required. Set \$CLUSTER env var or pass as argument." + echo "" + echo "Usage: $0 CLUSTER_ID [options]" + echo " or: export CLUSTER=abc123 && $0 [options]" + exit 1 +fi + +# Build endpoint +ENDPOINT="${CLUSTER_ID}.dsql.${REGION}.on.aws" + +# Generate auth token +echo "Generating IAM auth token for $ENDPOINT..." >&2 + +if [[ "$ADMIN" == "true" ]]; then + TOKEN=$(aws dsql generate-db-connect-admin-auth-token \ + --hostname "$ENDPOINT" \ + --region "$REGION") +else + TOKEN=$(aws dsql generate-db-connect-auth-token \ + --hostname "$ENDPOINT" \ + --region "$REGION") +fi + +# Check if token generation was successful +if [[ -z "$TOKEN" ]]; then + echo "Error: Failed to generate auth token. Check your AWS credentials." >&2 + exit 1 +fi + +echo "Connecting to $ENDPOINT as $USER..." >&2 +echo "" >&2 + +# Set application_name with AI model identifier if provided +PGAPPNAME="dsql-skill" +if [[ -n "$AI_MODEL" ]]; then + # Validate: allow only alphanumeric, hyphens, underscores, and dots + if [[ ! "$AI_MODEL" =~ ^[a-zA-Z0-9._-]+$ ]]; then + echo "Error: --ai-model must contain only alphanumeric characters, hyphens, underscores, and dots." >&2 + exit 1 + fi + PGAPPNAME="dsql-skill/${AI_MODEL}" +fi +export PGAPPNAME + +# Connect with psql +if [[ -n "$COMMAND" ]]; then + # Execute command and exit + PGPASSWORD="$TOKEN" psql \ + -h "$ENDPOINT" \ + -U "$USER" \ + -d postgres \ + -c "$COMMAND" +else + # Interactive session + PGPASSWORD="$TOKEN" psql \ + -h "$ENDPOINT" \ + -U "$USER" \ + -d postgres +fi diff --git a/plugins/aurora-dsql/skills/aurora-dsql/SKILL.md b/plugins/aurora-dsql/skills/aurora-dsql/SKILL.md new file mode 100644 index 0000000..2f22d98 --- /dev/null +++ b/plugins/aurora-dsql/skills/aurora-dsql/SKILL.md @@ -0,0 +1,258 @@ +--- +name: aurora-dsql +description: > + Build with Aurora DSQL - manage schemas, execute queries, and handle migrations with DSQL-specific requirements. + Use when developing a scalable or distributed database/application or user requests DSQL, Aurora DSQL, or distributed SQL. + Triggers on: "use DSQL", "create DSQL table", "migrate to DSQL", "DSQL schema", "distributed SQL database", + "serverless database", "Aurora DSQL", "PostgreSQL-compatible distributed". +license: Apache-2.0 +metadata: + tags: aws, aurora, dsql, distributed-sql, distributed, distributed-database, database, serverless, serverless-database, postgresql, postgres, sql, schema, migration, multi-tenant, iam-auth, aurora-dsql, mcp +--- + +# Amazon Aurora DSQL Skill + +Aurora DSQL is a serverless, PostgreSQL-compatible distributed SQL database. This skill provides direct database interaction via MCP tools, schema management, migration support, and multi-tenant patterns. + +**Key capabilities:** + +- Direct query execution via MCP tools +- Schema management with DSQL constraints +- Migration support and safe schema evolution +- Multi-tenant isolation patterns +- IAM-based authentication + +--- + +## Reference Files + +Load these files as needed for detailed guidance: + +### [development-guide.md](references/development-guide.md) + +**When:** ALWAYS load before implementing schema changes or database operations +**Contains:** DDL rules, connection patterns, transaction limits, security best practices + +### MCP: + +#### [mcp-setup.md](mcp/mcp-setup.md) + +**When:** Always load for guidance using or updating the DSQL MCP server +**Contains:** Instructions for setting up the DSQL MCP server with 2 configuration options as +sampled in [.mcp.json](../../.mcp.json) + +1. Documentation-Tools Only +2. Database Operations (requires a cluster endpoint) + +#### [mcp-tools.md](mcp/mcp-tools.md) + +**When:** Load when you need detailed MCP tool syntax and examples +**Contains:** Tool parameters, detailed examples, usage patterns + +### [language.md](references/language.md) + +**When:** MUST load when making language-specific implementation choices +**Contains:** Driver selection, framework patterns, connection code for Python/JS/Go/Java/Rust + +### [dsql-examples.md](references/dsql-examples.md) + +**When:** Load when looking for specific implementation examples +**Contains:** Code examples, repository patterns, multi-tenant implementations + +### [troubleshooting.md](references/troubleshooting.md) + +**When:** Load when debugging errors or unexpected behavior +**Contains:** Common pitfalls, error messages, solutions + +### [onboarding.md](references/onboarding.md) + +**When:** User explicitly requests to "Get started with DSQL" or similar phrase +**Contains:** Interactive step-by-step guide for new users + +### [access-control.md](references/access-control.md) + +**When:** MUST load when creating database roles, granting permissions, setting up schemas for applications, or handling sensitive data +**Contains:** Scoped role setup, IAM-to-database role mapping, schema separation for sensitive data, role design patterns + +### DDL Migrations (modular): + +#### [ddl-migrations/overview.md](references/ddl-migrations/overview.md) + +**When:** MUST load when performing DROP COLUMN, RENAME COLUMN, ALTER COLUMN TYPE, or DROP CONSTRAINT +**Contains:** Table recreation pattern overview, transaction rules, common verify & swap pattern + +#### [ddl-migrations/column-operations.md](references/ddl-migrations/column-operations.md) + +**When:** Load for DROP COLUMN, ALTER COLUMN TYPE, SET/DROP NOT NULL, SET/DROP DEFAULT migrations +**Contains:** Step-by-step migration patterns for column-level changes + +#### [ddl-migrations/constraint-operations.md](references/ddl-migrations/constraint-operations.md) + +**When:** Load for ADD/DROP CONSTRAINT, MODIFY PRIMARY KEY, column split/merge migrations +**Contains:** Step-by-step migration patterns for constraint and structural changes + +#### [ddl-migrations/batched-migration.md](references/ddl-migrations/batched-migration.md) + +**When:** Load when migrating tables exceeding 3,000 rows +**Contains:** OFFSET-based and cursor-based batching patterns, progress tracking, error handling + +### MySQL Migrations (modular): + +#### [mysql-migrations/type-mapping.md](references/mysql-migrations/type-mapping.md) + +**When:** MUST load when migrating MySQL schemas to DSQL +**Contains:** MySQL data type mappings, feature alternatives, DDL operation mapping + +#### [mysql-migrations/ddl-operations.md](references/mysql-migrations/ddl-operations.md) + +**When:** Load when translating MySQL DDL operations to DSQL equivalents +**Contains:** ALTER COLUMN, DROP COLUMN, AUTO_INCREMENT, ENUM, SET, FOREIGN KEY migration patterns + +#### [mysql-migrations/full-example.md](references/mysql-migrations/full-example.md) + +**When:** Load when migrating a complete MySQL table to DSQL +**Contains:** End-to-end MySQL CREATE TABLE migration example with decision summary + +--- + +## MCP Tools Available + +The `aurora-dsql` MCP server provides these tools: + +**Database Operations:** + +1. **readonly_query** - Execute SELECT queries (returns list of dicts) +2. **transact** - Execute DDL/DML statements in transaction (takes list of SQL statements) +3. **get_schema** - Get table structure for a specific table + +**Documentation & Knowledge:** + +1. **dsql_search_documentation** - Search Aurora DSQL documentation +2. **dsql_read_documentation** - Read specific documentation pages +3. **dsql_recommend** - Get DSQL best practice recommendations + +**Note:** There is no `list_tables` tool. Use `readonly_query` with information_schema. + +See [mcp-setup.md](mcp/mcp-setup.md) for detailed setup instructions. +See [mcp-tools.md](mcp/mcp-tools.md) for detailed usage and examples. + +--- + +## CLI Scripts Available + +Bash scripts in [scripts/](../../scripts/) for cluster management (create, delete, list, cluster info), psql connection, and bulk data loading from local/s3 csv/tsv/parquet files. +See [scripts/README.md](../../scripts/README.md) for usage and hook configuration. + +--- + +## Quick Start + +### 1. List tables and explore schema + +``` +Use readonly_query with information_schema to list tables +Use get_schema to understand table structure +``` + +### 2. Query data + +``` +Use readonly_query for SELECT queries +Always include tenant_id in WHERE clause for multi-tenant apps +Validate inputs carefully (no parameterized queries available) +``` + +### 3. Execute schema changes + +``` +Use transact tool with list of SQL statements +Follow one-DDL-per-transaction rule +Always use CREATE INDEX ASYNC in separate transaction +``` + +--- + +## Common Workflows + +### Workflow 1: Create Multi-Tenant Schema + +1. Create main table with tenant_id column using transact +2. Create async index on tenant_id in separate transact call +3. Create composite indexes for common query patterns (separate transact calls) +4. Verify schema with get_schema + +- MUST include tenant_id in all tables +- MUST use `CREATE INDEX ASYNC` exclusively +- MUST issue each DDL in its own transact call: `transact(["CREATE TABLE ..."])` +- MUST store arrays/JSON as TEXT + +### Workflow 2: Safe Data Migration + +1. Add column using transact: `transact(["ALTER TABLE ... ADD COLUMN ..."])` +2. Populate existing rows with UPDATE in separate transact calls (batched under 3,000 rows) +3. Verify migration with readonly_query using COUNT +4. Create async index for new column using transact if needed + +- MUST add column first, populate later +- MUST issue ADD COLUMN with only name and type; apply DEFAULT via separate UPDATE +- MUST batch updates under 3,000 rows in separate transact calls +- MUST issue each ALTER TABLE in its own transaction + +### Workflow 3: Application-Layer Referential Integrity + +**INSERT:** MUST validate parent exists with readonly_query → throw error if not found → insert child with transact. + +**DELETE:** MUST check dependents with readonly_query COUNT → return error if dependents exist → delete with transact if safe. + +### Workflow 4: Query with Tenant Isolation + +1. ALWAYS include tenant_id in WHERE clause +2. MUST validate and sanitize tenant_id input (no parameterized queries!) +3. MUST use readonly_query with validated tenant_id + +- MUST validate ALL inputs before building SQL (SQL injection risk!) +- MUST reject cross-tenant access at application layer +- SHOULD use allowlists or regex validation for tenant IDs + +### Workflow 5: Set Up Scoped Database Roles + +MUST load [access-control.md](references/access-control.md) for role setup, IAM mapping, and schema permissions. + +### Workflow 6: Table Recreation DDL Migration + +MUST load [ddl-migrations/overview.md](references/ddl-migrations/overview.md) for steps, rules, and verify & swap pattern. + +### Workflow 7: MySQL to DSQL Schema Migration + +MUST load [mysql-migrations/type-mapping.md](references/mysql-migrations/type-mapping.md) for type mappings, feature alternatives, and migration steps. + +--- + +## Best Practices + +- **SHOULD read guidelines first** - Check [development-guide.md](references/development-guide.md) before making schema changes +- **SHOULD use preferred language patterns** - Check [language.md](references/language.md) +- **SHOULD Execute queries directly** - PREFER MCP tools for ad-hoc queries +- **REQUIRED: Follow DDL Guidelines** - Refer to [DDL Rules](references/development-guide.md#schema-ddl-rules) +- **SHALL repeatedly generate fresh tokens** - Refer to [Connection Limits](references/development-guide.md#connection-rules) +- **ALWAYS use ASYNC indexes** - `CREATE INDEX ASYNC` is mandatory +- **MUST Serialize arrays/JSON as TEXT** - Store arrays/JSON as TEXT (comma separated, JSON.stringify) +- **ALWAYS Batch under 3,000 rows** - maintain transaction limits +- **REQUIRED: Sanitize SQL inputs with allowlists, regex, and quote escaping** - See [Input Validation](mcp/mcp-tools.md#input-validation-critical) +- **MUST follow correct Application Layer Patterns** - when multi-tenant isolation or application referential integrity are required; refer to [Application Layer Patterns](references/development-guide.md#application-layer-patterns) +- **REQUIRED use DELETE for truncation** - DELETE is the only supported operation for truncation +- **SHOULD test any migrations** - Verify DDL on dev clusters before production +- **Plan for Horizontal Scale** - DSQL is designed to optimize for massive scales without latency drops; refer to [Horizontal Scaling](references/development-guide.md#horizontal-scaling-best-practice) +- **SHOULD use connection pooling in production applications** - Refer to [Connection Pooling](references/development-guide.md#connection-pooling-recommended) +- **SHOULD debug with the troubleshooting guide** - Always refer to the resources and guidelines in [troubleshooting.md](references/troubleshooting.md) +- **ALWAYS use scoped roles for applications** - Create database roles with `dsql:DbConnect`; refer to [Access Control](references/access-control.md) + +--- + +## Additional Resources + +- [Aurora DSQL Documentation](https://docs.aws.amazon.com/aurora-dsql/latest/userguide/) +- [Code Samples Repository](https://github.com/aws-samples/aurora-dsql-samples) +- [PostgreSQL Compatibility](https://docs.aws.amazon.com/aurora-dsql/latest/userguide/working-with-postgresql-compatibility.html) +- [IAM Authentication Guide](https://docs.aws.amazon.com/aurora-dsql/latest/userguide/using-database-and-iam-roles.html) +- [CloudFormation Resource](https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-resource-dsql-cluster.html) diff --git a/plugins/aurora-dsql/skills/aurora-dsql/mcp/mcp-setup.md b/plugins/aurora-dsql/skills/aurora-dsql/mcp/mcp-setup.md new file mode 100644 index 0000000..2fe4520 --- /dev/null +++ b/plugins/aurora-dsql/skills/aurora-dsql/mcp/mcp-setup.md @@ -0,0 +1,375 @@ +## Plugin Default Configuration + +The plugin ships with a documentation-only `.mcp.json` at the plugin root (no cluster endpoint, no `--allow-writes`). This means the MCP server provides DSQL documentation search, reading, and recommendations out of the box without requiring any cluster connection. + +To enable database operations (queries, schema exploration, DDL, DML), users must update the plugin's `.mcp.json` with their cluster details. + +### Default Documentation-Only Config + +The plugin's `.mcp.json` is pre-configured as follows: + +```json +{ + "mcpServers": { + "aurora-dsql": { + "command": "uvx", + "args": ["awslabs.aurora-dsql-mcp-server@latest"], + "env": { "FASTMCP_LOG_LEVEL": "ERROR" } + } + } +} +``` + +To upgrade to full database operations, add `--cluster_endpoint`, `--region`, `--database_user`, and optionally `--allow-writes` to the args array in the plugin's `.mcp.json`. + +--- + +# MCP Server Setup Instructions + +## Prerequisites: + +```bash +uv --version +``` + +**If missing:** + +- Install from: [Astral](https://docs.astral.sh/uv/getting-started/installation/) + +## General MCP Configuration: + +Add the following configuration after checking if the user wants documentation-only functionality +or database operation support too. + +### Documentation-Only Configuration + +```json +{ + "mcpServers": { + "awslabs.aurora-dsql-mcp-server": { + "command": "uvx", + "args": [ + "awslabs.aurora-dsql-mcp-server@latest" + ], + "env": { + "FASTMCP_LOG_LEVEL": "ERROR" + }, + "disabled": false, + "autoApprove": [] + } + } +} +``` + +### Database Operation Support Configuration + +```json +{ + "mcpServers": { + "aurora-dsql": { + "command": "uvx", + "args": [ + "awslabs.aurora-dsql-mcp-server@latest", + "--cluster_endpoint", + "[your dsql cluster endpoint, e.g. abcdefghijklmnopqrst234567.dsql.us-east-1.on.aws]", + "--region", + "[your dsql cluster region, e.g. us-east-1]", + "--database_user", + "[your dsql username, e.g. admin]", + "--profile", + "[your aws profile name, eg. default]", + "--allow-writes" + ], + "env": { + "FASTMCP_LOG_LEVEL": "ERROR", + "REGION": "[your dsql cluster region, eg. us-east-1, only when necessary]", + "AWS_PROFILE": "[your aws profile name, eg. default]" + }, + "disabled": false, + "autoApprove": [] + } + } +} +``` + +### Optional Arguments and Environment Variables: + +The following args and environment variables are not required, but may be required if the user +has custom AWS configurations or would like to allow/disallow the MCP server mutating their database. + +- Arg: `--profile` or Env: `"AWS_PROFILE"` only need + to be configured for non-default values. +- Env: `"REGION"` when the cluster region management is + distinct from user's primary region in project/application. +- Arg: `--allow-writes` based on how permissive the user wants + to be for the MCP server. Always ask the user if writes + should be allowed. + +## Coding Assistant - Custom Instructions + +Before proceeding, identify which coding assistant you are adding the MCP server to and +navigate to those custom instructions. + +1. [Claude Code](#claude-code) +2. [Gemini](#gemini) +3. [Codex](#codex) + +## == STOP READING HERE AND PROCEED TO CORRECT SECTION == + +## Claude Code + +**Check if MCP server is configured:** +Look for `aurora-dsql` in MCP settings in either `~/.claude.json` or in a `.mcp.json` +file in the project root. + +**If not configured, offer to set up:** + +Edit the appropriate MCP settings file as outlined below. + +### Claude Code CLI + +Check if the Claude CLI is installed: + +```bash +claude --version +``` + +If present, prefer [default installation](#default-installation---claude-code-cli-command). +If missing, prefer [alternative installation](#alternative-directly-editupdate-the-json-configurations) + +### Setup Instructions: + +#### Choosing the Right Scope + +Claude Code offers 3 different scopes: local (default), project, and user and details which scope to +choose based on credential sensitivity and need to share. _**What scope does the user prefer?**_ + +1. **Local-scoped** servers represent the default configuration level and are stored in + `~/.claude.json` under your project's path. They're **both** private to you and only accessible + within the current project directory. This is the default `scope` when creating MCP servers. +2. **Project-scoped** servers **enable team collaboration** while still only being accessible in a + project directory. Project-scoped servers add a `.mcp.json` file at your project's root directory. + This file is designed to be checked into version control, ensuring all team members have access + to the same MCP tools and services. When you add a project-scoped server, Claude Code automatically + creates or updates this file with the appropriate configuration structure. +3. **User-scoped** servers are stored in `~/.claude.json` and are available across all projects on + your machine while remaining **private to your user account.** + +#### Default Installation - Claude Code CLI Command + +Use the Claude Code CLI. + +```bash +claude mcp add aurora-dsql \ + --scope $SCOPE \ + --env FASTMCP_LOG_LEVEL="ERROR" \ + -- uvx "awslabs.aurora-dsql-mcp-server@latest" \ + --cluster_endpoint "[dsql-cluster-id].dsql.[region].on.aws" \ + --region "[dsql cluster region, eg. us-east-1]" \ + --database_user "[your-username]" +``` + +**Does the user want to allow writes?** +Add the additional argument flag. + +```bash +--allow-writes +``` + +##### **Troubleshooting: Using Claude Code with Bedrock on a different AWS Account** + +If Claude Code is configured with a Bedrock AWS account or profile that is distinct from the profile +needed to connect to your dsql cluster, additional environment variables are required: + +``` +--env AWS_PROFILE="[dsql profile, eg. default]" \ +--env AWS_REGION="[dsql cluster region, eg. us-east-1]" \ +``` + +#### Alternative: Directly edit/update the JSON Configurations + +You can also directly configure the MCP adding the [provided MCP json configuration](#general-mcp-configuration) +to the (new or existing) relevant json file and field by scope. + +##### Local + +Update `~/.claude.json` within the project-specific `mcpServers` field: + +``` +{ + "projects": { + "/path/to/project": { + "mcpServers": {} + } + } +} +``` + +##### Project + +Add/update the `.mcp.json` file in the project root with the specified MCP configuration, +([sample file](../../../.mcp.json)) + +##### User + +Update `~/.claude.json` at a top-level `mcpServers` field: + +``` +{ + "mcpServers": {} +} +``` + +### Verification + +After setup, verify the MCP server status. You may need to restart your Claude Code session. You should see the `amazon-aurora-dsql` server listed with its current status. + +``` +claude mcp list +``` + +## Gemini + +**Check if the MCP server is configured:** +Look for the `aurora-dsql` MCP server: + +Gemini CLI command: + +```bash +gemini mcp list +``` + +### Setup Instructions: + +#### Choosing the Right Scope + +Gemini offers 2 scopes: project (default) and user. _**What scope does the user prefer?**_ + +1. **Project-Scoped** servers are only accessible from the project's root directory and added to + the project configuation: `.gemini/settings.json`. Useful for project-specific tools that should + stay wthin the codebase. +2. **User-Scoped** servers are accessible from all projects you work on with the Gemini CLI and + added to global configuration: `~/.gemini/settings.json` + +#### Default Installation - Gemini CLI Command + +Using the Gemini CLI. + +```bash +gemini mcp add \ + --scope $SCOPE \ + --env FASTMCP_LOG_LEVEL="ERROR" \ + aurora-dsql \ + uvx "awslabs.aurora-dsql-mcp-server@latest" \ + -- \ + --cluster_endpoint "[dsql-cluster-id].dsql.[region].on.aws" \ + --region "[dsql cluster region, eg. us-east-1]" \ + --database_user "[your-username]" +``` + +#### Alternative: Directly edit/update the JSON Configurations + +You can also directly configure the MCP adding the [provided MCP json configuration](#general-mcp-configuration) +to `.gemini/settings.json` (project scope) or `~/.gemini/settings.json` + +``` +{ + ...other fields... + "mcpServers": { + } +} +``` + +#### Troubleshooting and Optional Arguments + +**Does the user want to allow writes?** +Add the additional argument flag. + +```bash +--allow-writes +``` + +**Are there multiple AWS credentials configured in the application or environment?** +Add environment variables for AWS Profile and Region for the DSQL cluster to the command. + +```bash +--env AWS_PROFILE="[dsql profile, eg. default]" \ +--env AWS_REGION="[dsql cluster region, eg. us-east-1]" \ +``` + +### Verification + +Restart Gemini CLI. + +```bash +gemini mcp list +``` + +Should see `aurora-dsql` with a `Connected` status. + +## Codex + +**Check if the MCP server is configured:** + +Look for `aurora-dsql` in the TUI + +```bash +/mcp +``` + +### Setup Instructions + +#### Default Installation - Codex CLI + +Using the Codex CLI: + +```bash +codex mcp add aurora-dsql \ + --env FASTMCP_LOG_LEVEL="ERROR" \ + -- uvx "awslabs.aurora-dsql-mcp-server@latest" \ + --cluster_endpoint "[dsql-cluster-id].dsql.[region].on.aws" \ + --region "[dsql cluster region, eg. us-east-1]" \ + --database_user "[your-username]" +``` + +#### Alternative: Directly modifying `config.toml` + +For more fine grained control over MCP server options, you can manually edit the `~/.codex/config.toml` +configuration file. Each MCP server is configured with a `[mcp_servers.]` table in the +config file. + +``` +[mcp_servers.amazon-aurora-dsql] +command = "uvx" +args = [ + "awslabs.aurora-dsql-mcp-server@latest", + "--cluster_endpoint", ".dsql..on.aws", + "--region", "", + "--database_user", "" +] + +[mcp_servers.amazon-aurora-dsql.env] +FASTMCP_LOG_LEVEL = "ERROR" +``` + +#### Troubleshooting and Optional Arguments + +**Does the user want to allow writes?** +Add the additional argument flag. + +```bash +--allow-writes +``` + +**Are there multiple AWS credentials configured in the application or environment?** +Add environment variables for AWS Profile and Region for the DSQL cluster to the command. + +``` +AWS_PROFILE = "[dsql profile, eg. default]" \ +AWS_REGION = "[dsql cluster region, eg. us-east-1]" \ +``` + +## Additional Documentation + +- [MCP Server Setup Guide](https://awslabs.github.io/mcp/servers/aurora-dsql-mcp-server) +- [DSQL MCP User Guide](https://docs.aws.amazon.com/aurora-dsql/latest/userguide/SECTION_aurora-dsql-mcp-server.html) diff --git a/plugins/aurora-dsql/skills/aurora-dsql/mcp/mcp-tools.md b/plugins/aurora-dsql/skills/aurora-dsql/mcp/mcp-tools.md new file mode 100644 index 0000000..c8aa7c9 --- /dev/null +++ b/plugins/aurora-dsql/skills/aurora-dsql/mcp/mcp-tools.md @@ -0,0 +1,345 @@ +# Aurora DSQL MCP Tools Reference + +Detailed reference for the aurora-dsql MCP server tools based on the actual implementation. + +## MCP Server Configuration + +**Package:** `awslabs.aurora-dsql-mcp-server@latest` +**Connection:** uvx-based MCP server +**Authentication:** AWS IAM credentials with automatic token generation + +**Environment Variables:** + +- `CLUSTER` - Your DSQL cluster identifier (used to form endpoint) +- `REGION` - AWS region (e.g., "us-east-1") +- `AWS_PROFILE` - AWS CLI profile (optional, uses default if not set) + +**Command Line Flags:** + +- `--cluster_endpoint` - Full cluster endpoint (e.g., "abc123.dsql.us-east-1.on.aws") +- `--database_user` - Database username (typically "admin") +- `--region` - AWS region +- `--allow-writes` - Enable write operations (required for `transact` tool) +- `--profile` - AWS credentials profile + +**Permissions Required:** + +- `dsql:DbConnect` - Connect to DSQL cluster +- `dsql:DbConnectAdmin` - Admin access for DDL operations + +**Database Name**: Always use `postgres` (only database available in DSQL) + +--- + +## Database Operation Tools + +### 1. readonly_query - Execute read-only SQL queries + +**Use for:** SELECT queries, data exploration, ad-hoc analysis + +**Parameters:** + +- `sql` (string, required) - SQL query to run + +**Returns:** List of dictionaries containing query results + +**Security:** + +- Automatically prevents mutating keywords (INSERT, UPDATE, DELETE, etc.) +- Checks for SQL injection risks +- Prevents transaction bypass attempts + +**Examples:** + +```sql +-- Simple SELECT +SELECT * FROM entities WHERE tenant_id = 'tenant-123' LIMIT 10 + +-- Aggregate query +SELECT tenant_id, COUNT(*) as count FROM objectives GROUP BY tenant_id + +-- Join query +SELECT e.entity_id, e.name, o.title +FROM entities e +INNER JOIN objectives o ON e.entity_id = o.entity_id +WHERE e.tenant_id = 'tenant-123' +``` + +**Note:** Parameterized queries ($1, $2) are NOT supported by this MCP tool. Use string interpolation carefully and validate inputs to prevent SQL injection. + +--- + +### 2. transact - Execute write operations in a transaction + +**Use for:** INSERT, UPDATE, DELETE, CREATE TABLE, ALTER TABLE + +**Parameters:** + +- `sql_list` (List[string], required) - **List of SQL statements** to execute in a transaction + +**Returns:** List of dictionaries with execution results + +**Requirements:** + +- Server must be started with `--allow-writes` flag +- Cannot be used in read-only mode + +**Behavior:** + +- Automatically wraps statements in BEGIN/COMMIT +- Rolls back on any error +- All statements execute atomically + +**Examples:** + +```python +# Single DDL statement (still needs to be in a list) +["CREATE TABLE IF NOT EXISTS entities (...)"] + +# Create table with index (two separate statements) +[ + "CREATE TABLE IF NOT EXISTS entities (...)", + "CREATE INDEX ASYNC idx_entities_tenant ON entities(tenant_id)" +] + +# Insert multiple rows in one transaction +[ + "INSERT INTO entities (entity_id, tenant_id, name) VALUES ('e1', 't1', 'Entity 1')", + "INSERT INTO entities (entity_id, tenant_id, name) VALUES ('e2', 't1', 'Entity 2')", + "INSERT INTO entities (entity_id, tenant_id, name) VALUES ('e3', 't1', 'Entity 3')" +] + +# Safe migration pattern +[ + "ALTER TABLE entities ADD COLUMN status VARCHAR(50)" +] +# Then in a separate transaction: +[ + "UPDATE entities SET status = 'active' WHERE status IS NULL AND tenant_id = 'tenant-123'" +] + +# Batch update +[ + "UPDATE entities SET status = 'archived', updated_at = CURRENT_TIMESTAMP WHERE tenant_id = 'tenant-123' AND created_at < '2024-01-01'" +] +``` + +**Important Notes:** + +- Each ALTER TABLE must be in its own transaction (DSQL limitation) +- Keep transactions under 3,000 rows and 10 MiB +- For large batch operations, split into multiple transact calls +- Cannot use parameterized queries - validate inputs before building SQL strings + +--- + +### 3. get_schema - Get table schema details + +**Use for:** Understanding table structure, planning migrations, exploring database + +**Parameters:** + +- `table_name` (string, required) - Name of table to inspect + +**Returns:** List of dictionaries with column information (name, type, nullable, default, etc.) + +**Example:** + +```python +# Get schema for entities table +table_name = "entities" + +# Returns column definitions like: +# [ +# {"column_name": "entity_id", "data_type": "character varying", "is_nullable": "NO", ...}, +# {"column_name": "tenant_id", "data_type": "character varying", "is_nullable": "NO", ...}, +# ... +# ] +``` + +**Note:** There is no `list_tables` tool. To discover tables, use `readonly_query` with: + +```sql +SELECT table_name FROM information_schema.tables WHERE table_schema = 'public' +``` + +--- + +## Documentation and Knowledge Tools + +### 4. dsql_search_documentation - Search Aurora DSQL documentation + +**Use for:** Finding relevant documentation, looking up features, troubleshooting + +**Parameters:** + +- `search_phrase` (string, required) - Search query +- `limit` (int, optional) - Maximum number of results + +**Returns:** Dictionary of search results with URLs and snippets + +**Example:** + +```python +search_phrase = "foreign key constraints" +limit = 5 +``` + +--- + +### 5. dsql_read_documentation - Read specific DSQL documentation pages + +**Use for:** Retrieving detailed documentation content + +**Parameters:** + +- `url` (string, required) - URL of documentation page +- `start_index` (int, optional) - Starting character index +- `max_length` (int, optional) - Maximum characters to return + +**Returns:** Dictionary with documentation content + +**Example:** + +```python +url = "https://docs.aws.amazon.com/aurora-dsql/latest/userguide/..." +start_index = 0 +max_length = 5000 +``` + +--- + +### 6. dsql_recommend - Get DSQL best practice recommendations + +**Use for:** Getting contextual recommendations for DSQL usage + +**Parameters:** + +- `url` (string, required) - URL of documentation page to get recommendations for + +**Returns:** Dictionary with recommendations + +--- + +## Common Workflow Patterns + +### Pattern 1: Explore Schema + +```python +# Step 1: List all tables +readonly_query("SELECT table_name FROM information_schema.tables WHERE table_schema = 'public'") + +# Step 2: Get schema for specific table +get_schema("entities") + +# Step 3: Query data +readonly_query("SELECT * FROM entities LIMIT 10") +``` + +### Pattern 2: Create Table with Index + +```python +# WRONG - Separate DDL and index into distinct transactions +transact([ + "CREATE TABLE entities (...)", + "CREATE INDEX ASYNC idx_tenant ON entities(tenant_id)" # ❌ Will fail +]) + +# CORRECT - Separate transactions +transact(["CREATE TABLE entities (...)"]) +transact(["CREATE INDEX ASYNC idx_tenant ON entities(tenant_id)"]) +``` + +### Pattern 3: Safe Data Migration + +```python +# Step 1: Add column (one transaction) +transact(["ALTER TABLE entities ADD COLUMN status VARCHAR(50)"]) + +# Step 2: Populate in batches (separate transactions) +transact(["UPDATE entities SET status = 'active' WHERE status IS NULL LIMIT 1000"]) +transact(["UPDATE entities SET status = 'active' WHERE status IS NULL LIMIT 1000"]) + +# Step 3: Verify +readonly_query("SELECT COUNT(*) as total, COUNT(status) as with_status FROM entities") + +# Step 4: Create index (separate transaction) +transact(["CREATE INDEX ASYNC idx_status ON entities(tenant_id, status)"]) +``` + +### Pattern 4: Batch Inserts + +```python +# Build list of INSERT statements +inserts = [] +for i in range(100): # Keep under 3,000 rows per transaction + inserts.append(f"INSERT INTO entities (entity_id, tenant_id, name) VALUES ('e{i}', 't1', 'Entity {i}')") + +# Execute in one transaction +transact(inserts) +``` + +### Pattern 5: Application-Layer Foreign Key Check + +```python +# Step 1: Validate parent exists +result = readonly_query("SELECT entity_id FROM entities WHERE entity_id = 'parent-123' AND tenant_id = 'tenant-123'") + +if len(result) == 0: + raise Error("Invalid parent reference") + +# Step 2: Insert child +transact([ + "INSERT INTO objectives (objective_id, entity_id, tenant_id, title) VALUES ('obj-456', 'parent-123', 'tenant-123', 'My Objective')" +]) +``` + +--- + +## Best Practices + +### Follow General Developing Best Practices + +Refer to the listed [Best Practices](./development-guide.md#best-practices). + +### Input Validation (Critical!) + +Since parameterized queries are NOT supported, you MUST validate and sanitize inputs: + +```python +# BAD - SQL injection risk +user_input = request.get("tenant_id") +sql = f"SELECT * FROM entities WHERE tenant_id = '{user_input}'" +readonly_query(sql) # ❌ Vulnerable! + +# GOOD - Validate input format +import re +user_input = request.get("tenant_id") +if not re.match(r'^[a-zA-Z0-9-]+$', user_input): + raise ValueError("Invalid tenant_id format") +sql = f"SELECT * FROM entities WHERE tenant_id = '{user_input}'" +readonly_query(sql) # ✓ Safe after validation + +# BETTER - Use allowlist for tenant IDs +ALLOWED_TENANTS = {"tenant-123", "tenant-456"} +if user_input not in ALLOWED_TENANTS: + raise ValueError("Unknown tenant") +sql = f"SELECT * FROM entities WHERE tenant_id = '{user_input}'" +readonly_query(sql) # ✓ Most secure +``` + +### Quote Escaping + +```python +# Escape single quotes in string values +name = user_input.replace("'", "''") +sql = f"INSERT INTO entities (name) VALUES ('{name}')" +``` + +--- + +## Additional Resources + +- [Aurora DSQL MCP Server Documentation](https://awslabs.github.io/mcp/servers/aurora-dsql-mcp-server) +- [Aurora DSQL MCP Server README](https://github.com/awslabs/mcp/tree/main/src/aurora-dsql-mcp-server) +- [Aurora DSQL Documentation](https://docs.aws.amazon.com/aurora-dsql/latest/userguide/) diff --git a/plugins/aurora-dsql/skills/aurora-dsql/references/access-control.md b/plugins/aurora-dsql/skills/aurora-dsql/references/access-control.md new file mode 100644 index 0000000..dba39e2 --- /dev/null +++ b/plugins/aurora-dsql/skills/aurora-dsql/references/access-control.md @@ -0,0 +1,163 @@ +# Access Control & Role-Based Permissions + +ALWAYS prefer scoped database roles over the `admin` role. The `admin` role should ONLY be +used for initial cluster setup, creating roles, and granting permissions. Applications and +services MUST connect using scoped-down database roles with `dsql:DbConnect`. + +--- + +## Scoped Roles Over Admin + +- **ALWAYS** use scoped database roles for application connections and routine operations +- **MUST** create purpose-specific database roles for each application component +- **MUST** place user-sensitive data (PII, credentials) in a dedicated schema — NOT `public` +- **MUST** grant only the minimum permissions each role requires +- **MUST** create an IAM role with `dsql:DbConnect` for each database role +- **SHOULD** audit role mappings regularly: `SELECT * FROM sys.iam_pg_role_mappings;` + +--- + +## Setting Up Scoped Roles + +Connect as `admin` (the only time `admin` should be used): + +```sql +-- 1. Create scoped database roles +CREATE ROLE app_readonly WITH LOGIN; +CREATE ROLE app_readwrite WITH LOGIN; +CREATE ROLE user_service WITH LOGIN; + +-- 2. Map each to an IAM role (each IAM role needs dsql:DbConnect permission) +AWS IAM GRANT app_readonly TO 'arn:aws:iam::*:role/AppReadOnlyRole'; +AWS IAM GRANT app_readwrite TO 'arn:aws:iam::*:role/AppReadWriteRole'; +AWS IAM GRANT user_service TO 'arn:aws:iam::*:role/UserServiceRole'; + +-- 3. Create a dedicated schema for sensitive data +CREATE SCHEMA users_schema; + +-- 4. Grant scoped permissions +GRANT USAGE ON SCHEMA public TO app_readonly; +GRANT SELECT ON ALL TABLES IN SCHEMA public TO app_readonly; + +GRANT USAGE ON SCHEMA public TO app_readwrite; +GRANT SELECT, INSERT, UPDATE, DELETE ON ALL TABLES IN SCHEMA public TO app_readwrite; + +GRANT USAGE ON SCHEMA users_schema TO user_service; +GRANT SELECT, INSERT, UPDATE ON ALL TABLES IN SCHEMA users_schema TO user_service; +GRANT CREATE ON SCHEMA users_schema TO user_service; +``` + +--- + +## IAM Role Requirements + +Each scoped database role requires a corresponding IAM role with `dsql:DbConnect`: + +```json +{ + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Action": "dsql:DbConnect", + "Resource": "arn:aws:dsql:*:*:cluster/*" + } + ] +} +``` + +Reserve `dsql:DbConnectAdmin` strictly for administrative IAM identities: + +```json +{ + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Action": "dsql:DbConnectAdmin", + "Resource": "arn:aws:dsql:us-east-1:123456789012:cluster/*" + } + ] +} +``` + +--- + +## Schema Separation for Sensitive Data + +- **MUST** place user PII, credentials, and tokens in a dedicated schema (e.g., `users_schema`) +- **MUST** restrict sensitive schema access to only the roles that need it +- **SHOULD** name schemas descriptively: `users_schema`, `billing_schema`, `audit_schema` +- **SHOULD** use `public` only for non-sensitive, shared application data + +```sql +-- Sensitive data: dedicated schema +CREATE TABLE users_schema.profiles ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + tenant_id VARCHAR(255) NOT NULL, + email VARCHAR(255) NOT NULL, + name VARCHAR(255), + phone VARCHAR(50) +); + +-- Non-sensitive data: public schema +CREATE TABLE public.products ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + tenant_id VARCHAR(255) NOT NULL, + name VARCHAR(255) NOT NULL, + category VARCHAR(100) +); +``` + +--- + +## Connecting as a Scoped Role + +Applications generate tokens with `generate-db-connect-auth-token` (NOT the admin variant): + +```bash +# Application connection — uses DbConnect +PGPASSWORD="$(aws dsql generate-db-connect-auth-token \ + --hostname ${CLUSTER_ENDPOINT} \ + --region ${REGION})" \ +psql -h ${CLUSTER_ENDPOINT} -U app_readwrite -d postgres +``` + +Set the search path to the correct schema after connecting: + +```sql +SET search_path TO users_schema, public; +``` + +--- + +## Role Design Patterns + +| Component | Database Role | Permissions | Schema Access | +| --------------- | -------------------- | ------------------------------ | ------------------------ | +| Web API (read) | `api_readonly` | SELECT | `public` | +| Web API (write) | `api_readwrite` | SELECT, INSERT, UPDATE, DELETE | `public` | +| User service | `user_service` | SELECT, INSERT, UPDATE | `users_schema`, `public` | +| Reporting | `reporting_readonly` | SELECT | `public`, `users_schema` | +| Admin setup | `admin` | ALL (setup only) | ALL | + +--- + +## Revoking Access + +```sql +-- Revoke database permissions +REVOKE ALL ON ALL TABLES IN SCHEMA users_schema FROM app_readonly; +REVOKE USAGE ON SCHEMA users_schema FROM app_readonly; + +-- Revoke IAM mapping +AWS IAM REVOKE app_readonly FROM 'arn:aws:iam::*:role/AppReadOnlyRole'; +``` + +--- + +## References + +- [Using Database and IAM Roles](https://docs.aws.amazon.com/aurora-dsql/latest/userguide/using-database-and-iam-roles.html) +- [PostgreSQL GRANT](https://www.postgresql.org/docs/current/sql-grant.html) +- [PostgreSQL Privileges](https://www.postgresql.org/docs/current/ddl-priv.html) diff --git a/plugins/aurora-dsql/skills/aurora-dsql/references/ddl-migrations/batched-migration.md b/plugins/aurora-dsql/skills/aurora-dsql/references/ddl-migrations/batched-migration.md new file mode 100644 index 0000000..7d38310 --- /dev/null +++ b/plugins/aurora-dsql/skills/aurora-dsql/references/ddl-migrations/batched-migration.md @@ -0,0 +1,119 @@ +# DDL Migrations: Batched Migration Pattern + +**REQUIRED for tables exceeding 3,000 rows.** + +For the full Table Recreation Pattern and verify & swap steps, see [overview.md](overview.md). + +--- + +## Batch Size Rules + +- **PREFER batches of 500-1,000 rows** for optimal performance +- Smaller batches reduce lock contention and enable better concurrency + +--- + +## OFFSET-Based Batching + +```sql +readonly_query("SELECT COUNT(*) as total FROM target_table") +-- Calculate: batches_needed = CEIL(total / 1000) + +-- Batch 1 +transact([ + "INSERT INTO target_table_new (id, col1, col2) + SELECT id, col1, col2 FROM target_table + ORDER BY id LIMIT 1000 OFFSET 0" +]) + +-- Batch 2 +transact([ + "INSERT INTO target_table_new (id, col1, col2) + SELECT id, col1, col2 FROM target_table + ORDER BY id LIMIT 1000 OFFSET 1000" +]) +-- Continue until all rows migrated... +``` + +--- + +## Cursor-Based Batching (Preferred for Large Tables) + +Better performance than OFFSET for very large tables: + +```sql +-- First batch +transact([ + "INSERT INTO target_table_new (id, col1, col2) + SELECT id, col1, col2 FROM target_table + ORDER BY id LIMIT 1000" +]) + +-- Get last processed ID +readonly_query("SELECT MAX(id) as last_id FROM target_table_new") + +-- Subsequent batches +transact([ + "INSERT INTO target_table_new (id, col1, col2) + SELECT id, col1, col2 FROM target_table + WHERE id > 'last_processed_id' + ORDER BY id LIMIT 1000" +]) +``` + +--- + +## Progress Tracking + +```sql +readonly_query( + "SELECT (SELECT COUNT(*) FROM target_table_new) as migrated, + (SELECT COUNT(*) FROM target_table) as total" +) +``` + +--- + +## Error Handling + +### Pre-Migration Checks + +1. **Verify table exists** + + ```sql + readonly_query( + "SELECT table_name FROM information_schema.tables + WHERE table_name = 'target_table'" + ) + ``` + +2. **Verify DDL permissions** + +### Data Validation Errors + +**MUST abort migration and report** when: + +- Type conversion would fail +- Value truncation would occur +- NOT NULL constraint would be violated + +```sql +-- Find problematic rows +readonly_query( + "SELECT id, problematic_column FROM target_table + WHERE problematic_column !~ '^-?[0-9]+$' LIMIT 100" +) +``` + +### Recovery from Failed Migration + +```sql +-- Check table state +readonly_query( + "SELECT table_name FROM information_schema.tables + WHERE table_name IN ('target_table', 'target_table_new')" +) +``` + +- **Both tables exist:** Original safe → `DROP TABLE IF EXISTS target_table_new` and restart +- **Only new table exists:** Verify count, then complete rename diff --git a/plugins/aurora-dsql/skills/aurora-dsql/references/ddl-migrations/column-operations.md b/plugins/aurora-dsql/skills/aurora-dsql/references/ddl-migrations/column-operations.md new file mode 100644 index 0000000..61a1edb --- /dev/null +++ b/plugins/aurora-dsql/skills/aurora-dsql/references/ddl-migrations/column-operations.md @@ -0,0 +1,217 @@ +# DDL Migrations: Column Operations + +Step-by-step migration patterns for column-level changes using the Table Recreation Pattern. + +**MUST read [overview.md](overview.md) first** for destructive operation warnings and the common verify & swap pattern. + +--- + +## DROP COLUMN Migration + +**Goal:** Remove a column from an existing table. + +### Pre-Migration Validation + +```sql +readonly_query("SELECT COUNT(*) as total_rows FROM target_table") +get_schema("target_table") +``` + +### Migration Steps + +#### Step 1: Create new table excluding the column + +```sql +transact([ + "CREATE TABLE target_table_new ( + id UUID PRIMARY KEY, + tenant_id VARCHAR(255) NOT NULL, + kept_column1 VARCHAR(255), + kept_column2 INTEGER + -- dropped_column is NOT included + )" +]) +``` + +#### Step 2: Migrate data + +```sql +transact([ + "INSERT INTO target_table_new (id, tenant_id, kept_column1, kept_column2) + SELECT id, tenant_id, kept_column1, kept_column2 + FROM target_table" +]) +``` + +For tables > 3,000 rows, use [Batched Migration Pattern](batched-migration.md). + +**Step 3: Verify and swap** (see [Common Pattern](overview.md#common-verify--swap-pattern)) + +--- + +## ALTER COLUMN TYPE Migration + +**Goal:** Change a column's data type. + +### Pre-Migration Validation + +**MUST validate data compatibility BEFORE migration** to prevent data loss. + +```sql +-- Example: VARCHAR to INTEGER - check for non-numeric values +readonly_query( + "SELECT COUNT(*) as invalid_count FROM target_table + WHERE column_to_change !~ '^-?[0-9]+$'" +) +-- MUST abort if invalid_count > 0 + +-- Show problematic rows +readonly_query( + "SELECT id, column_to_change FROM target_table + WHERE column_to_change !~ '^-?[0-9]+$' LIMIT 100" +) +``` + +### Data Type Compatibility Matrix + +| From Type | To Type | Validation | +| --------- | ---------- | ------------------------------------------------------- | +| VARCHAR | INTEGER | MUST validate all values are numeric | +| VARCHAR | BOOLEAN | MUST validate values are 'true'/'false'/'t'/'f'/'1'/'0' | +| INTEGER | VARCHAR | Safe conversion | +| TEXT | VARCHAR(n) | MUST validate max length ≤ n | +| TIMESTAMP | DATE | Safe (truncates time) | +| INTEGER | DECIMAL | Safe conversion | + +### Migration Steps + +#### Step 1: Create new table with changed type + +```sql +transact([ + "CREATE TABLE target_table_new ( + id UUID PRIMARY KEY, + converted_column INTEGER, -- Changed from VARCHAR + other_column TEXT + )" +]) +``` + +#### Step 2: Copy data with type casting + +```sql +transact([ + "INSERT INTO target_table_new (id, converted_column, other_column) + SELECT id, CAST(converted_column AS INTEGER), other_column + FROM target_table" +]) +``` + +**Step 3: Verify and swap** (see [Common Pattern](overview.md#common-verify--swap-pattern)) + +--- + +## ALTER COLUMN SET/DROP NOT NULL Migration + +**Goal:** Change a column's nullability constraint. + +### Pre-Migration Validation (for SET NOT NULL) + +```sql +readonly_query( + "SELECT COUNT(*) as null_count FROM target_table + WHERE target_column IS NULL" +) +-- MUST ABORT if null_count > 0, or plan to provide default values +``` + +### Migration Steps + +#### Step 1: Create new table with changed constraint + +```sql +transact([ + "CREATE TABLE target_table_new ( + id UUID PRIMARY KEY, + target_column VARCHAR(255) NOT NULL, -- Changed from nullable + other_column TEXT + )" +]) +``` + +#### Step 2: Copy data (with default for NULLs if needed) + +```sql +transact([ + "INSERT INTO target_table_new (id, target_column, other_column) + SELECT id, COALESCE(target_column, 'default_value'), other_column + FROM target_table" +]) +``` + +**Step 3: Verify and swap** (see [Common Pattern](overview.md#common-verify--swap-pattern)) + +--- + +## ALTER COLUMN SET/DROP DEFAULT Migration + +**Goal:** Add or remove a default value for a column. + +### Pre-Migration Validation + +```sql +get_schema("target_table") +-- Identify current column definition and any existing defaults +``` + +### Migration Steps (SET DEFAULT) + +#### Step 1: Create new table with default value + +```sql +transact([ + "CREATE TABLE target_table_new ( + id UUID PRIMARY KEY, + status VARCHAR(50) DEFAULT 'pending', -- Added default + other_column TEXT + )" +]) +``` + +#### Step 2: Copy data + +```sql +transact([ + "INSERT INTO target_table_new (id, status, other_column) + SELECT id, status, other_column + FROM target_table" +]) +``` + +**Step 3: Verify and swap** (see [Common Pattern](overview.md#common-verify--swap-pattern)) + +### Migration Steps (DROP DEFAULT) + +#### Step 1: Create new table without default + +```sql +transact([ + "CREATE TABLE target_table_new ( + id UUID PRIMARY KEY, + status VARCHAR(50), -- Removed DEFAULT + other_column TEXT + )" +]) +``` + +#### Step 2: Copy data + +```sql +transact([ + "INSERT INTO target_table_new (id, status, other_column) + SELECT id, status, other_column + FROM target_table" +]) +``` + +**Step 3: Verify and swap** (see [Common Pattern](overview.md#common-verify--swap-pattern)) diff --git a/plugins/aurora-dsql/skills/aurora-dsql/references/ddl-migrations/constraint-operations.md b/plugins/aurora-dsql/skills/aurora-dsql/references/ddl-migrations/constraint-operations.md new file mode 100644 index 0000000..8b742f2 --- /dev/null +++ b/plugins/aurora-dsql/skills/aurora-dsql/references/ddl-migrations/constraint-operations.md @@ -0,0 +1,208 @@ +# DDL Migrations: Constraint & Structural Operations + +Step-by-step migration patterns for constraint changes, primary key modifications, and column transformations. + +**MUST read [overview.md](overview.md) first** for destructive operation warnings and the common verify & swap pattern. + +--- + +## ADD CONSTRAINT Migration + +**Goal:** Add a constraint (UNIQUE, CHECK) to an existing table. + +### Pre-Migration Validation + +**MUST validate existing data satisfies the new constraint.** + +```sql +-- For UNIQUE constraint: check for duplicates +readonly_query( + "SELECT target_column, COUNT(*) as cnt FROM target_table + GROUP BY target_column HAVING COUNT(*) > 1 LIMIT 10" +) +-- MUST ABORT if any duplicates exist + +-- For CHECK constraint: validate all rows pass +readonly_query( + "SELECT COUNT(*) as invalid_count FROM target_table + WHERE NOT (check_condition)" +) +-- MUST ABORT if invalid_count > 0 +``` + +### Migration Steps + +#### Step 1: Create new table with the constraint + +```sql +transact([ + "CREATE TABLE target_table_new ( + id UUID PRIMARY KEY, + email VARCHAR(255) UNIQUE, -- Added UNIQUE constraint + age INTEGER CHECK (age >= 0), -- Added CHECK constraint + other_column TEXT + )" +]) +``` + +#### Step 2: Copy data + +```sql +transact([ + "INSERT INTO target_table_new (id, email, age, other_column) + SELECT id, email, age, other_column + FROM target_table" +]) +``` + +**Step 3: Verify and swap** (see [Common Pattern](overview.md#common-verify--swap-pattern)) + +--- + +## DROP CONSTRAINT Migration + +**Goal:** Remove a constraint (UNIQUE, CHECK) from a table. + +### Pre-Migration Validation + +```sql +-- Identify existing constraints +readonly_query( + "SELECT constraint_name, constraint_type + FROM information_schema.table_constraints + WHERE table_name = 'target_table' + AND constraint_type IN ('UNIQUE', 'CHECK')" +) +``` + +### Migration Steps + +#### Step 1: Create new table without the constraint + +```sql +transact([ + "CREATE TABLE target_table_new ( + id UUID PRIMARY KEY, + email VARCHAR(255), -- Removed UNIQUE constraint + other_column TEXT + )" +]) +``` + +#### Step 2: Copy data + +```sql +transact([ + "INSERT INTO target_table_new (id, email, other_column) + SELECT id, email, other_column + FROM target_table" +]) +``` + +**Step 3: Verify and swap** (see [Common Pattern](overview.md#common-verify--swap-pattern)) + +--- + +## MODIFY PRIMARY KEY Migration + +**Goal:** Change which column(s) form the primary key. + +### Pre-Migration Validation + +**MUST validate new PK column has unique, non-null values.** + +```sql +-- Check for duplicates +readonly_query( + "SELECT new_pk_column, COUNT(*) as cnt FROM target_table + GROUP BY new_pk_column HAVING COUNT(*) > 1 LIMIT 10" +) +-- MUST ABORT if any duplicates exist + +-- Check for NULLs +readonly_query( + "SELECT COUNT(*) as null_count FROM target_table + WHERE new_pk_column IS NULL" +) +-- MUST ABORT if null_count > 0 +``` + +### Migration Steps + +#### Step 1: Create new table with new primary key + +```sql +transact([ + "CREATE TABLE target_table_new ( + new_pk_column UUID PRIMARY KEY, -- New PK + old_pk_column VARCHAR(255), -- Demoted to regular column + other_column TEXT + )" +]) +``` + +#### Step 2: Copy data + +```sql +transact([ + "INSERT INTO target_table_new (new_pk_column, old_pk_column, other_column) + SELECT new_pk_column, old_pk_column, other_column + FROM target_table" +]) +``` + +**Step 3: Verify and swap** (see [Common Pattern](overview.md#common-verify--swap-pattern)) + +--- + +## Column Transformations (Split/Merge) + +### Split Column + +**Goal:** Split one column into multiple (e.g., `full_name` → `first_name` + `last_name`). + +```sql +-- Create new table with split columns +transact([ + "CREATE TABLE target_table_new ( + id UUID PRIMARY KEY, + first_name VARCHAR(255), + last_name VARCHAR(255) + )" +]) + +-- Copy with transformation +transact([ + "INSERT INTO target_table_new (id, first_name, last_name) + SELECT id, + SPLIT_PART(full_name, ' ', 1), + SUBSTRING(full_name FROM POSITION(' ' IN full_name) + 1) + FROM target_table" +]) + +-- Verify, swap, re-index (see Common Pattern) +``` + +### Merge Columns + +**Goal:** Combine multiple columns into one (e.g., `first_name` + `last_name` → `display_name`). + +```sql +-- Create new table with merged column +transact([ + "CREATE TABLE target_table_new ( + id UUID PRIMARY KEY, + display_name VARCHAR(512) + )" +]) + +-- Copy with concatenation +transact([ + "INSERT INTO target_table_new (id, display_name) + SELECT id, + CONCAT(COALESCE(first_name, ''), ' ', COALESCE(last_name, '')) + FROM target_table" +]) + +-- Verify, swap, re-index (see Common Pattern) +``` diff --git a/plugins/aurora-dsql/skills/aurora-dsql/references/ddl-migrations/overview.md b/plugins/aurora-dsql/skills/aurora-dsql/references/ddl-migrations/overview.md new file mode 100644 index 0000000..1e8730b --- /dev/null +++ b/plugins/aurora-dsql/skills/aurora-dsql/references/ddl-migrations/overview.md @@ -0,0 +1,133 @@ +# DSQL DDL Migration Guide - Overview + +This guide provides the **Table Recreation Pattern** for schema modifications that require rebuilding tables. + +For column-level operations, see [column-operations.md](column-operations.md). +For constraint and structural operations, see [constraint-operations.md](constraint-operations.md). +For batched migration patterns, see [batched-migration.md](batched-migration.md). + +--- + +## CRITICAL: Destructive Operations Warning + +**The Table Recreation Pattern involves DESTRUCTIVE operations that can result in DATA LOSS.** + +Table recreation requires dropping the original table, which is **irreversible**. If any step fails after the original table is dropped, data may be permanently lost. + +### Mandatory User Verification Requirements + +Agents MUST obtain explicit user approval before executing migrations on live tables: + +1. **MUST present the complete migration plan** to the user before any execution +2. **MUST clearly state** that this operation will DROP the original table +3. **MUST confirm** the user has a current backup or accepts the risk of data loss +4. **MUST verify with the user** at each checkpoint before proceeding: + - Before creating the new table structure + - Before beginning data migration + - Before dropping the original table (CRITICAL CHECKPOINT) + - Before renaming the new table +5. **MUST NOT proceed** with any destructive action without explicit user confirmation +6. **MUST recommend** performing migrations on non-production environments first + +### Risk Acknowledgment + +Before proceeding, the user MUST confirm: + +- [ ] They understand this is a destructive operation +- [ ] They have a backup of the table data (or accept the risk) +- [ ] They approve the agent to execute each step with verification +- [ ] They understand the migration cannot be automatically rolled back after DROP TABLE + +--- + +## Table Recreation Operations + +The following ALTER TABLE operations MUST use the **Table Recreation Pattern**: + +| Operation | Key Approach | +| ------------------------------ | ---------------------------------------------- | +| DROP COLUMN | Exclude column from new table | +| ALTER COLUMN TYPE | Cast data type in SELECT | +| ALTER COLUMN SET/DROP NOT NULL | Change constraint in new table definition | +| ALTER COLUMN SET/DROP DEFAULT | Define default in new table definition | +| ADD CONSTRAINT | Include constraint in new table definition | +| DROP CONSTRAINT | Remove constraint from new table definition | +| MODIFY PRIMARY KEY | Define new PK, validate uniqueness first | +| Split/Merge Columns | Use SPLIT_PART, SUBSTRING, or CONCAT in SELECT | + +**Note:** The following operations ARE supported directly: + +- `ALTER TABLE ... RENAME COLUMN` - Rename a column +- `ALTER TABLE ... RENAME TO` - Rename a table +- `ALTER TABLE ... ADD COLUMN` - Add a new column + +--- + +## Table Recreation Pattern Overview + +MUST follow this sequence with user verification at each step: + +1. **Plan & Confirm** - MUST present migration plan and obtain user approval to proceed +2. **Validate** - Check data compatibility with new structure; MUST report findings to user +3. **Create** - Create new table with desired structure; MUST verify with user before execution +4. **Migrate** - Copy data (batched for tables > 3,000 rows); MUST report progress to user +5. **Verify** - Confirm row counts match; MUST present comparison to user +6. **Swap** - CRITICAL: MUST obtain explicit user confirmation before DROP TABLE +7. **Re-index** - Recreate indexes using ASYNC; MUST confirm completion with user + +### Transaction Rules + +- **MUST batch** migrations exceeding 3,000 row mutations +- **PREFER batches of 500-1,000 rows** for optimal throughput +- **MUST respect** 10 MiB data size per transaction +- **MUST respect** 5-minute transaction duration + +--- + +## Common Verify & Swap Pattern + +All migrations end with this pattern (referenced in [column-operations.md](column-operations.md) and [constraint-operations.md](constraint-operations.md)). + +**CRITICAL: MUST obtain explicit user confirmation before DROP TABLE step.** + +```sql +-- MUST verify counts match +readonly_query("SELECT COUNT(*) FROM target_table") +readonly_query("SELECT COUNT(*) FROM target_table_new") + +-- CHECKPOINT: MUST present count comparison to user and obtain confirmation +-- Agent MUST display: "Original table has X rows, new table has Y rows. +-- Proceeding will DROP the original table. This action is IRREVERSIBLE. +-- Do you want to proceed? (yes/no)" +-- MUST NOT proceed without explicit "yes" confirmation + +-- MUST swap tables (DESTRUCTIVE - requires user confirmation above) +transact(["DROP TABLE target_table"]) +transact(["ALTER TABLE target_table_new RENAME TO target_table"]) + +-- MUST recreate indexes +transact(["CREATE INDEX ASYNC idx_target_tenant ON target_table(tenant_id)"]) +``` + +--- + +## Best Practices Summary + +### User Verification (CRITICAL) + +- **MUST present** complete migration plan to user before any execution +- **MUST obtain** explicit user confirmation before DROP TABLE operations +- **MUST verify** with user at each checkpoint during migration +- **MUST NOT** proceed with destructive actions without explicit user approval +- **MUST recommend** testing migrations on non-production data first +- **MUST confirm** user has backup or accepts data loss risk + +### Technical Requirements + +- **MUST validate** data compatibility before type changes +- **MUST batch** tables exceeding 3,000 rows +- **MUST verify** row counts before and after migration +- **MUST recreate** indexes after table swap using ASYNC +- **MUST NOT** drop original table until new table is verified +- **PREFER** cursor-based batching for very large tables +- **PREFER** batches of 500-1,000 rows for optimal throughput diff --git a/plugins/aurora-dsql/skills/aurora-dsql/references/development-guide.md b/plugins/aurora-dsql/skills/aurora-dsql/references/development-guide.md new file mode 100644 index 0000000..6f361da --- /dev/null +++ b/plugins/aurora-dsql/skills/aurora-dsql/references/development-guide.md @@ -0,0 +1,428 @@ +# DSQL Development Guide + +DSQL is the **PREFERRED** relational database. This decision was made for fast serverless distributed SQL, +effortless scaling, multi-region viability, among other advantages. + +--- + +## Best Practices + +- **SHOULD read guidelines first** - Check [development_guide.md](development-guide.md) before making schema changes +- **SHOULD use preferred language patterns** - Check [language.md](language.md) +- **SHOULD Execute queries directly** - PREFER MCP tools for ad-hoc queries +- **REQUIRED: Follow DDL Guidelines** - Refer to [DDL Rules](#schema-ddl-rules) +- **SHALL repeatedly generate fresh tokens** - Refer to [Connection Limits](#connection-rules) +- **ALWAYS use ASYNC indexes** - `CREATE INDEX ASYNC` is mandatory +- **MUST Serialize arrays/JSON as TEXT** - Store arrays/JSON as TEXT (comma separated, JSON.stringify) +- **ALWAYS Batch under 3,000 rows** - maintain transaction limits +- **REQUIRED: Sanitize SQL inputs with allowlists, regex, and quote escaping** - See [Input Validation](../mcp/mcp-tools.md#input-validation-critical) +- **MUST follow correct Application Layer Patterns** - when multi-tenant isolation or application referential itegrity are required; refer to [Application Layer Patterns](#application-layer-patterns) +- **REQUIRED use DELETE for truncation** - DELETE is the only supported operation for truncation +- **SHOULD test any migrations** - Verify DDL on dev clusters before production +- **Plan for Horizontal Scale** - DSQL is designed to optimize for massive scales without latency drops; refer to [Horizontal Scaling](#horizontal-scaling-best-practice) +- **SHOULD use connection pooling in production applications** - Refer to [Connection Pooling](#connection-pooling-recommended) +- **SHOULD debug with the troubleshooting guide:** - Always refer to the resources and guidelines in [troubleshooting.md](troubleshooting.md) +- **ALWAYS use scoped roles for applications** - Create database roles with `dsql:DbConnect`; refer to [Access Control](access-control.md) + +--- + +## Basic Development Guidelines + +### Connection and Authentication + +#### IAM Authentication + +**Principle of least privilege:** + +- Grant only `dsql:DbConnect` for standard users +- Reserve `dsql:DbConnectAdmin` for administrative operations +- Link database roles to IAM roles for proper access control +- Use IAM policies to restrict cluster access by resource tags + +```json +{ + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Action": "dsql:DbConnect", + "Resource": "arn:aws:dsql:us-east-1:123456789012:cluster/*", + "Condition": { + "StringEquals": { + "aws:ResourceTag/Environment": "production" + } + } + } + ] +} +``` + +#### Token Management + +**Rotation strategies:** + +- Generate fresh token per connection (simplest, most secure) +- Implement periodic refresh before 15-minute expiration +- Use connection pool hooks for automated refresh +- Handle token expiration gracefully with retry logic + +**Best practices:** + +- Keep authentication tokens in memory only; discard after use +- Regenerate token on connection errors +- Monitor token generation failures +- Set connection timeouts appropriately + +#### Secrets Management + +**ALWAYS dynamically assign credentials:** + +- Use environment variables for configuration +- Store cluster endpoints in AWS Systems Manager Parameter Store +- Use AWS Secrets Manager for any sensitive configuration +- Rotate credentials regularly even though tokens are short-lived + +```bash +# Good - Use Parameter Store +export CLUSTER_ENDPOINT=$(aws ssm get-parameter \ + --name /myapp/dsql/endpoint \ + --query 'Parameter.Value' \ + --output text) + +# Bad - Hardcoded in code +const endpoint = "abc123.dsql.us-east-1.on.aws" // ❌ Use Parameter Store instead +``` + +#### Connection Rules: + +- 15-minute token expiry +- 60-minute connection maximum +- 10,000 connections per cluster +- SSL required + +#### SSL/TLS Requirements + +Aurora DSQL uses the [PostgreSQL wire protocol](https://docs.aws.amazon.com/aurora-dsql/latest/userguide/working-with-postgresql-compatibility.html) and enforces SSL: + +``` +sslmode: verify-full +sslnegotiation: direct # PostgreSQL 17+ drivers (better performance) +port: 5432 +database: postgres # single database per cluster +``` + +**Key details:** + +- SSL always enabled server-side +- Use `verify-full` to verify server certificate +- Use `direct` TLS negotiation for PostgreSQL 17+ compatible drivers +- System trust store must include Amazon Root CA + +#### Connection Pooling (Recommended) + +For production applications: + +- SHOULD Implement connection pooling +- ALWAYS Configure token refresh before expiration +- MUST Set appropriate pool size (e.g., max: 10, min: 2) +- MUST Configure connection lifetime and idle timeout +- MUST Generate fresh token in `BeforeConnect` or equivalent hook + +#### Security Best Practices + +- ALWAYS dynamically set crededntials +- MUST use IAM authentication exclusively +- ALWAYS use SSL/TLS with certificate verification +- SHOULD grant least privilege IAM permissions +- ALWAYS rotate tokens before expiration +- SHOULD use connection pooling to minimize token generation overhead + +--- + +### Audit Logging + +**CloudTrail integration:** + +- Enable CloudTrail logging for DSQL API calls +- Monitor token generation patterns +- Track cluster configuration changes +- Set up alerts for suspicious activity + +**Query logging:** + +- Enable query logging if available +- Monitor slow queries and connection patterns +- Track failed authentication attempts +- Review logs regularly for anomalies + +--- + +### Access Control + +**ALWAYS prefer scoped database roles over the `admin` role.** + +- **ALWAYS** use scoped database roles for application connections — reserve `admin` for initial setup and role management +- **MUST** create purpose-specific database roles and connect with `dsql:DbConnect` +- **MUST** place sensitive data (PII, credentials) in dedicated schemas — not `public` +- **MUST** grant only the minimum privileges each role requires +- **SHOULD** audit role mappings: `SELECT * FROM sys.iam_pg_role_mappings;` + +For complete role setup instructions, schema separation patterns, and IAM configuration, +see [access-control.md](access-control.md). + +--- + +## Operational Rules + +### Query Execution + +**For Ad-Hoc Queries and Data Exploration:** + +- MUST ALWAYS Execute DIRECTLY using MCP server or psql one-liners +- SHOULD Return results immediately + +**Writing Scripts REQUIRES at least 1 of:** + +- Permanent migrations in database +- Reusable utilities +- EXPLICIT user request + +--- + +### Schema Design Rules + +- MUST use **simple PostgreSQL types:** VARCHAR, TEXT, INTEGER, BOOLEAN, TIMESTAMP +- MUST store arrays as TEXT (comma-separated is recommended) +- MUST store JSON objects as TEXT (JSON.stringify) +- ALWAYS include tenant_id in tables for multi-tenant isolation +- SHOULD create async indexes for tenant_id and common query patterns + +### Schema (DDL) Rules + +- REQUIRED: **at most one DDL statement** per operation +- ALWAYS separate schema (DDL) and data (DML) changes +- MUST use **`CREATE INDEX ASYNC`:** No synchronous creation + - MAXIMUM: **24 indexes per table** + - MAXIMUM: **8 columns per index** +- **Asynchronous Execution:** DDL ALWAYS runs asynchronously +- To add a column with DEFAULT or NOT NULL: + 1. MUST issue ADD COLUMN specifying only the column name and data type + 2. MUST then issue UPDATE to populate existing rows + 3. MAY then issue ALTER COLUMN to apply the constraint +- MUST issue a **separate ALTER TABLE statement for each column** modification. + +### Transaction Rules + +- SHOULD modify **at most 3000 rows** per transaction +- SHOULD have maximum **10 MiB data size** per write transaction +- SHOULD expect **5-minute** transaction duration +- ALWAYS expect repeatable read isolation + +--- + +### Application-Layer Patterns + +**MANDATORY for Application Referential Integrity:** +If foreign key constraints (application referential integrity) are required, +instead implementation: + +- MUST validate parent references before INSERT +- MUST check for dependents before DELETE +- MUST implement cascade logic in application code +- MUST handle orphaned records in application layer + +**MANDATORY for Multi-Tenant Isolation:** + +- tenantId is ALWAYS first parameter in repository methods +- ALL queries include WHERE tenant_id = ? +- ALWAYS validate tenant ownership before operations +- ALWAYS reject cross-tenant data access + +### Migration Patterns + +- REQUIRED: One DDL statement per migration step +- SHOULD Use IF NOT EXISTS for idempotency +- SHOULD Add column first, then UPDATE with defaults +- REQUIRED: Each DDL executes separately + +--- + +## Database Connectivity Tools + +DSQL has many tools for connecting including 10 database drivers, 4, ORM libraries, and 3 specialized adapters +across various languages as listed in the [programming guide](https://docs.aws.amazon.com/aurora-dsql/latest/userguide/aws-sdks.html). PREFER using connectors, drivers, ORM libraries, and adapters. + +### Database Drivers + +Low-level libraries that directly connect to the database: + +| Programming Language | Driver | Sample Repository | +| -------------------- | -------------------------------- | ------------------------------------------------------------------------------------------------------------ | +| **C++** | libpq | [C++ libpq samples](https://github.com/aws-samples/aurora-dsql-samples/tree/main/cpp/libpq) | +| **C# (.NET)** | Npgsql | [.NET Npgsql samples](https://github.com/aws-samples/aurora-dsql-samples/tree/main/dotnet/npgsql) | +| **Go** | pgx | [Go pgx samples](https://github.com/aws-samples/aurora-dsql-samples/tree/main/go/pgx) | +| **Java** | pgJDBC | [Java pgJDBC samples](https://github.com/aws-samples/aurora-dsql-samples/tree/main/java/pgjdbc) | +| **Java** | DSQL Connector for JDBC | JDBC samples | +| **JavaScript** | DSQL Connector for node-postgres | [Node.js samples](https://github.com/aws-samples/aurora-dsql-samples/tree/main/javascript/node-postgres) | +| **JavaScript** | DSQL Connector for Postgres.js | [Postgres.js samples](https://github.com/aws-samples/aurora-dsql-samples/tree/main/javascript/postgres-js) | +| **Python** | Psycopg | [Python Psycopg samples](https://github.com/aws-samples/aurora-dsql-samples/tree/main/python/psycopg) | +| **Python** | DSQL Connector for Psycopg2 | [Python Psycopg2 samples](https://github.com/aws-samples/aurora-dsql-samples/tree/main/python/psycopg2) | +| **Python** | DSQL Connector for Asyncpg | [Python Asyncpg samples](https://github.com/awslabs/aurora-dsql-python-connector/tree/main/examples/asyncpg) | +| **Ruby** | pg | [Ruby pg samples](https://github.com/aws-samples/aurora-dsql-samples/tree/main/ruby/ruby-pg) | +| **Rust** | SQLx | [Rust SQLx samples](https://github.com/aws-samples/aurora-dsql-samples/tree/main/rust/sqlx) | + +### Object-Relational Mapping (ORM) Libraries + +Standalone libraries that provide object-relational mapping functionality: + +| Programming Language | ORM Library | Sample Repository | +| -------------------- | ----------- | ----------------------------------------------------------------------------------------------------------------- | +| **Java** | Hibernate | [Hibernate Pet Clinic App](https://github.com/awslabs/aurora-dsql-hibernate/tree/main/examples/pet-clinic-app) | +| **Python** | SQLAlchemy | [SQLAlchemy Pet Clinic App](https://github.com/awslabs/aurora-dsql-sqlalchemy/tree/main/examples/pet-clinic-app) | +| **TypeScript** | Sequelize | [TypeScript Sequelize samples](https://github.com/aws-samples/aurora-dsql-samples/tree/main/typescript/sequelize) | +| **TypeScript** | TypeORM | [TypeScript TypeORM samples](https://github.com/aws-samples/aurora-dsql-samples/tree/main/typescript/type-orm) | + +### Aurora DSQL Adapters and Dialects + +Specific extensions that make existing ORMs work with Aurora DSQL: + +| Programming Language | ORM/Framework | Repository | +| -------------------- | ------------- | ------------------------------------------------------------------------------------ | +| **Java** | Hibernate | [Aurora DSQL Hibernate Adapter](https://github.com/awslabs/aurora-dsql-hibernate/) | +| **Python** | Django | [Aurora DSQL Django Adapter](https://github.com/awslabs/aurora-dsql-django/) | +| **Python** | SQLAlchemy | [Aurora DSQL SQLAlchemy Adapter](https://github.com/awslabs/aurora-dsql-sqlalchemy/) | + +--- + +## Horizontal Scaling: Best Practice + +Aurora DSQL is designed for massive horizontal scale without latency degradation. + +### Connection Strategy + +- **PREFER more concurrent connections with smaller batches** - Higher concurrency typically yields better throughput +- **SHOULD implement connection pooling** - Reuse connections to minimize token overhead; respect 10,000 max per cluster +- **PREFER imitial pool size 10-50 per instance** - Generate fresh tokens in pool hooks (e.g., `BeforeConnect`) for 15-minute expiration +- **SHOULD retry internal errors with new connection** - Internal errors are retryable, but SHOULD use a new connection from the pool +- **SHOULD implement backoff with jitter** - Avoid thundering herd; scale pools gradually + +### Batch Size Optimization + +- **PREFER batches of 500-1,000 rows** - Balance throughput and transaction limits (3,000 rows, 10 MiB, 5 minutes max) +- **SHOULD process batches concurrently** - Use multiple connections; consider multiple threads for bulk loading +- **Smaller batches reduce** lock contention, enable better concurrency, fail faster, distribute load evenly + +### AVOID Hot Keys + +Hot keys (frequently accessed rows) create bottlenecks. For detailed analysis, see ["How to avoid hot keys in Aurora DSQL"](https://marc-bowes.com/dsql-avoid-hot-keys.html). + +**Key strategies:** + +- **PREFER UUIDs for primary keys** - UUIDs are the recommended default identifier because they avoid coordination; use `gen_random_uuid()` for distributed writes + - **Sequences and IDENTITY columns are available** when compact, human-readable integer identifiers are needed (e.g., account numbers, reference IDs). CACHE must be specified explicitly as either 1 or >= 65536. See [Choosing Identifier Types](#choosing-identifier-types) + - **ALWAYS use `GENERATED { ALWAYS | BY DEFAULT } AS IDENTITY`** for auto-incrementing columns (SERIAL is not supported) +- **SHOULD avoid aggregate update patterns** - Year-to-date totals and running counters create hot keys via read-modify-write + - **RECOMMENDED: Compute aggregates via queries** - Calculate totals with SELECT when needed; eventual consistency often acceptable +- **Accept contention only for genuine constraints** - Inventory management and account balances justify contention; sequential numbering and visit tracking are better served by coordination-free approaches + +### Choosing Identifier Types + +Aurora DSQL supports both UUID-based identifiers and integer values generated using sequences or IDENTITY columns. + +- **UUIDs** can be generated without coordination and are recommended as the default identifier type, especially for primary keys where scalability is important and strict ordering is not required +- **Sequences and IDENTITY columns** generate compact integer values convenient for human-readable identifiers, reporting, and external interfaces. When numeric identifiers are preferred, we recommend using a sequence or IDENTITY column in combination with UUID-based primary keys +- **ALWAYS use `GENERATED { ALWAYS | BY DEFAULT } AS IDENTITY`** for auto-incrementing columns (SERIAL is not supported) + +#### Choosing a CACHE Size + +**REQUIRED:** Specify CACHE explicitly when creating sequences or identity columns. Supported values are 1 or >= 65536. + +- **CACHE >= 65536** — suited for high-frequency identifier generation, many concurrent sessions, and workloads that tolerate gaps and ordering effects (e.g., IoT/telemetry ingestion, job run IDs, internal order numbers) +- **CACHE = 1** — suited for low allocation rates where identifiers should follow allocation order more closely and minimizing gaps matters more than throughput (e.g., account numbers, reference numbers) + +--- + +## Data Loading Tools + +The [DSQL Loader](https://github.com/aws-samples/aurora-dsql-loader) is a fast parallel data loader for DSQL that supports +loading from CSV, TSV, and Parquet files into DSQL with automatic schema detection and progress tracking. + +Developers SHOULD PREFER the DSQL Loader for: + +- quick, managed loading without user supervision +- populating test tables +- migrating data into DSQL from local files or S3 URIs of type csv, tsv, or parquet +- automated schema detection and progress tracking + +ALWAYS use the loader's schema inference, PREFERRED to separate schema +creation for data migration. + +**Install and use the DSQL Loader with [loader.sh](../../scripts/loader.sh)** + +### Common Examples + +**Load from S3:** + +```bash +aurora-dsql-loader load \ + --endpoint your-cluster.dsql.us-east-1.on.aws \ + --source-uri s3://my-bucket/data.parquet \ + --table analytics_data +``` + +**Create table automatically from a local filepath:** + +```bash +aurora-dsql-loader load \ + --endpoint your-cluster.dsql.us-east-1.on.aws \ + --source-uri data.csv \ + --table new_table \ + --if-not-exists +``` + +**Validate a local file without loading:** + +```bash +aurora-dsql-loader load \ + --endpoint your-cluster.dsql.us-east-1.on.aws \ + --source-uri data.csv \ + --table my_table \ + --dry-run +``` + +--- + +## Quick Reference + +### Schema Operations + +```sql +CREATE INDEX ASYNC idx_name ON table(column); ← ALWAYS ASYNC +ALTER TABLE t ADD COLUMN c VARCHAR(50); ← ONE AT A TIME +ALTER TABLE t ADD COLUMN c2 INTEGER; ← SEPARATE STATEMENT +UPDATE table SET c = 'default' WHERE c IS NULL; ← AFTER ADD COLUMN +``` + +### Supported Data Types + +``` +VARCHAR, TEXT, INTEGER, DECIMAL, BOOLEAN, TIMESTAMP, UUID +``` + +### Supported Key + +``` +PRIMARY KEY, UNIQUE, NOT NULL, CHECK, DEFAULT (in CREATE TABLE) +``` + +Join on any keys; DSQL preserves DB referential integrity, when needed application referential +integrity must be separately enforced. + +### Transaction Requirements + +``` +Rows: 3,000 max +Size: 10 MiB max +Duration: 5 minutes max +Isolation: Repeatable Read (fixed) +``` diff --git a/plugins/aurora-dsql/skills/aurora-dsql/references/dsql-examples.md b/plugins/aurora-dsql/skills/aurora-dsql/references/dsql-examples.md new file mode 100644 index 0000000..6292732 --- /dev/null +++ b/plugins/aurora-dsql/skills/aurora-dsql/references/dsql-examples.md @@ -0,0 +1,489 @@ +# Aurora DSQL Implementation Examples + +This file contains DSQL integration code examples; only load this when actively implementing database code. + +For language-specific framework selection, recommendations, and examples see [language.md](./language.md). + +For developer rules, see [development-guide.md](./development-guide.md). + +For additional samples, including in alternative language and driver support, refer to the official +[aurora-dsql-samples](https://github.com/aws-samples/aurora-dsql-samples). + +--- + +## Ad-Hoc Queries with psql + +PREFER connecting with a scoped database role using `generate-db-connect-auth-token`. +Reserve `admin` for role and schema setup only. See [access-control.md](./access-control.md). + +```bash +# PREFERRED: Execute queries with a scoped role +PGPASSWORD="$(aws dsql generate-db-connect-auth-token \ + --hostname ${CLUSTER}.dsql.${REGION}.on.aws \ + --region ${REGION})" \ +psql -h ${CLUSTER}.dsql.${REGION}.on.aws -U app_readwrite -d postgres \ + -c "SELECT COUNT(*) FROM objectives WHERE tenant_id = 'tenant-123';" + +# Admin only — for role/schema setup +PGPASSWORD="$(aws dsql generate-db-connect-admin-auth-token \ + --hostname ${CLUSTER}.dsql.${REGION}.on.aws \ + --region ${REGION})" \ +PGAPPNAME="/" \ +psql -h ${CLUSTER}.dsql.${REGION}.on.aws -U admin -d postgres +``` + +--- + +## Connection Management + +### RECOMMENDED: DSQL Connector + +Source: [aurora-dsql-samples/javascript](https://github.com/aws-samples/aurora-dsql-samples/tree/main/javascript) + +```javascript +import { AuroraDSQLPool } from "@aws/aurora-dsql-node-postgres-connector"; + +function createPool(clusterEndpoint, user) { + return new AuroraDSQLPool({ + host: clusterEndpoint, + user: user, + application_name: "/", + max: 10, + idleTimeoutMillis: 30000, + connectionTimeoutMillis: 10000, + }); +} + +async function example() { + const pool = createPool(process.env.CLUSTER_ENDPOINT, process.env.CLUSTER_USER); + + try { + const result = await pool.query("SELECT $1::int as value", [42]); + console.log(`Result: ${result.rows[0].value}`); + } finally { + await pool.end(); + } +} +``` + +### Token Generation for Custom Implementations + +For custom drivers or languages without DSQL Connector. Source: [aurora-dsql-samples/javascript/authentication](https://github.com/aws-samples/aurora-dsql-samples/tree/main/javascript/authentication) + +```javascript +import { DsqlSigner } from "@aws-sdk/dsql-signer"; + +// PREFERRED: Generate token for scoped role (uses dsql:DbConnect) +async function generateToken(clusterEndpoint, region) { + const signer = new DsqlSigner({ hostname: clusterEndpoint, region }); + return await signer.getDbConnectAuthToken(); +} + +// Admin only — for role/schema setup (uses dsql:DbConnectAdmin) +async function generateAdminToken(clusterEndpoint, region) { + const signer = new DsqlSigner({ hostname: clusterEndpoint, region }); + return await signer.getDbConnectAdminAuthToken(); +} +``` + +--- + +## Schema Design: Table Creation + +SHOULD use UUIDs with `gen_random_uuid()` for distributed write performance. Source: [aurora-dsql-samples/java/liquibase](https://github.com/aws-samples/aurora-dsql-samples/tree/main/java/liquibase) + +```sql +CREATE TABLE IF NOT EXISTS owner ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + name VARCHAR(30) NOT NULL, + city VARCHAR(80) NOT NULL, + telephone VARCHAR(20) +); + +CREATE TABLE IF NOT EXISTS orders ( + order_id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + tenant_id VARCHAR(255) NOT NULL, + status VARCHAR(50) NOT NULL, + tags TEXT, + metadata TEXT, + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP +); +``` + +--- + +## Schema Design: Index Creation + +MUST use `CREATE INDEX ASYNC` (max 24 indexes/table, 8 columns/index). Source: [aurora-dsql-samples/java/liquibase](https://github.com/aws-samples/aurora-dsql-samples/tree/main/java/liquibase) + +```sql +CREATE INDEX ASYNC idx_owner_city ON owner(city); +CREATE INDEX ASYNC idx_orders_tenant ON orders(tenant_id); +CREATE INDEX ASYNC idx_orders_status ON orders(tenant_id, status); +``` + +--- + +## Schema Design: Column Modifications + +MUST use two-step process: add column, then UPDATE for defaults (ALTER COLUMN not supported). + +```sql +ALTER TABLE orders ADD COLUMN priority INTEGER; +UPDATE orders SET priority = 0 WHERE priority IS NULL; +``` + +--- + +## Data Operations: Basic CRUD + +Source: [aurora-dsql-samples/quickstart_data](https://github.com/aws-samples/aurora-dsql-samples/tree/main/quickstart_data) + +```sql +-- Insert with transaction +BEGIN; +INSERT INTO owner (name, city) VALUES + ('John Doe', 'New York'), + ('Mary Major', 'Anytown'); +COMMIT; + +-- Query with JOIN +SELECT o.name, COUNT(p.id) as pet_count +FROM owner o +LEFT JOIN pet p ON p.owner_id = o.id +GROUP BY o.name; + +-- Update and delete +UPDATE owner SET city = 'Boston' WHERE name = 'John Doe'; +DELETE FROM owner WHERE city = 'Portland'; +``` + +--- + +## Data Operations: Batch Processing + +**Transaction Limits:** + +- Maximum 3,000 rows per transaction +- Maximum 10 MiB data size per transaction +- Maximum 5 minutes per transaction + +### Safe Batch Insert + +```javascript +async function batchInsert(pool, tenantId, items) { + const BATCH_SIZE = 500; + + for (let i = 0; i < items.length; i += BATCH_SIZE) { + const batch = items.slice(i, i + BATCH_SIZE); + const client = await pool.connect(); + + try { + await client.query('BEGIN'); + + for (const item of batch) { + await client.query( + `INSERT INTO entities (tenant_id, name, metadata) + VALUES ($1, $2, $3)`, + [tenantId, item.name, JSON.stringify(item.metadata)] + ); + } + + await client.query('COMMIT'); + } catch (error) { + await client.query('ROLLBACK'); + throw error; + } finally { + client.release(); + } + } +} +``` + +### Concurrent Batch Processing + +**Pattern:** SHOULD use concurrent connections for better throughput + +Source: Adapted from [aurora-dsql-samples/javascript](https://github.com/aws-samples/aurora-dsql-samples/tree/main/javascript) + +```javascript +// Split into batches and process concurrently +async function concurrentBatchInsert(pool, tenantId, items) { + const BATCH_SIZE = 500; + const NUM_WORKERS = 8; + + const batches = []; + for (let i = 0; i < items.length; i += BATCH_SIZE) { + batches.push(items.slice(i, i + BATCH_SIZE)); + } + + const workers = []; + for (let i = 0; i < NUM_WORKERS && i < batches.length; i++) { + workers.push(processBatches(pool, tenantId, batches, i, NUM_WORKERS)); + } + + await Promise.all(workers); +} + +async function processBatches(pool, tenantId, batches, startIdx, step) { + for (let i = startIdx; i < batches.length; i += step) { + const batch = batches[i]; + const client = await pool.connect(); + + try { + await client.query('BEGIN'); + + for (const item of batch) { + await client.query( + 'INSERT INTO entities (tenant_id, name, metadata) VALUES ($1, $2, $3)', + [tenantId, item.name, JSON.stringify(item.metadata)] + ); + } + + await client.query('COMMIT'); + } catch (error) { + await client.query('ROLLBACK'); + throw error; + } finally { + client.release(); + } + } +} +``` + +--- + +## Migration Execution + +**Pattern:** MUST execute each DDL statement separately (DDL statements execute outside transactions) + +Source: Adapted from [aurora-dsql-samples/java/liquibase](https://github.com/aws-samples/aurora-dsql-samples/tree/main/java/liquibase) + +```javascript +const migrations = [ + { + id: '001_initial_schema', + description: 'Create owner and pet tables', + statements: [ + `CREATE TABLE IF NOT EXISTS owner ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + name VARCHAR(30) NOT NULL, + city VARCHAR(80) NOT NULL, + telephone VARCHAR(20) + )`, + `CREATE TABLE IF NOT EXISTS pet ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + name VARCHAR(30) NOT NULL, + birth_date DATE NOT NULL, + owner_id UUID + )`, + ] + }, + { + id: '002_create_indexes', + description: 'Create async indexes', + statements: [ + 'CREATE INDEX ASYNC idx_owner_city ON owner(city)', + 'CREATE INDEX ASYNC idx_pet_owner ON pet(owner_id)', + ] + }, + { + id: '003_add_columns', + description: 'Add status column', + statements: [ + 'ALTER TABLE pet ADD COLUMN IF NOT EXISTS status VARCHAR(20)', + "UPDATE pet SET status = 'active' WHERE status IS NULL", + ] + } +]; + +async function runMigrations(pool, migrations) { + for (const migration of migrations) { + for (const statement of migration.statements) { + if (statement.trim()) { + await pool.query(statement); + } + } + } +} +``` + +--- + +## Multi-Tenant Isolation + +ALWAYS include tenant_id in WHERE clauses; tenant_id is always first parameter. + +```javascript +async function getOrders(pool, tenantId, status) { + const result = await pool.query( + 'SELECT * FROM orders WHERE tenant_id = $1 AND status = $2', + [tenantId, status] + ); + return result.rows; +} + +async function deleteOrder(pool, tenantId, orderId) { + const check = await pool.query( + 'SELECT order_id FROM orders WHERE tenant_id = $1 AND order_id = $2', + [tenantId, orderId] + ); + + if (check.rows.length === 0) { + throw new Error('Order not found or access denied'); + } + + await pool.query( + 'DELETE FROM orders WHERE tenant_id = $1 AND order_id = $2', + [tenantId, orderId] + ); +} +``` + +--- + +## Application-Layer Referential Integrity + +SHOULD validate references for custom business rules (DSQL provides database-level integrity). + +```javascript +async function createLineItem(pool, tenantId, lineItemData) { + const orderCheck = await pool.query( + 'SELECT order_id FROM orders WHERE tenant_id = $1 AND order_id = $2', + [tenantId, lineItemData.order_id] + ); + + if (orderCheck.rows.length === 0) { + throw new Error('Order does not exist'); + } + + await pool.query( + 'INSERT INTO line_items (tenant_id, order_id, product_id, quantity) VALUES ($1, $2, $3, $4)', + [tenantId, lineItemData.order_id, lineItemData.product_id, lineItemData.quantity] + ); +} + +async function deleteProduct(pool, tenantId, productId) { + const check = await pool.query( + 'SELECT COUNT(*) as count FROM line_items WHERE tenant_id = $1 AND product_id = $2', + [tenantId, productId] + ); + + if (parseInt(check.rows[0].count) > 0) { + throw new Error('Product has existing orders'); + } + + await pool.query( + 'DELETE FROM products WHERE tenant_id = $1 AND product_id = $2', + [tenantId, productId] + ); +} +``` + +--- + +## Sequences and Identity Columns + +Sequences and IDENTITY columns generate integer values and are useful when compact or human-readable identifiers are needed. + +### Identity Columns + +An identity column is a special column generated automatically from an implicit sequence. Use the `GENERATED ... AS IDENTITY` clause in `CREATE TABLE`. CACHE must be specified explicitly as either 1 or >= 65536. + +```sql +CREATE TABLE people ( + id BIGINT GENERATED ALWAYS AS IDENTITY (CACHE 70000) PRIMARY KEY, + name VARCHAR(255), + address TEXT +); + +-- Or with BY DEFAULT, which allows explicit value overrides +CREATE TABLE orders ( + order_number BIGINT GENERATED BY DEFAULT AS IDENTITY (CACHE 70000) PRIMARY KEY, + tenant_id VARCHAR(255) NOT NULL, + status VARCHAR(50) NOT NULL +); +``` + +Inserting rows without specifying the identity column generates values automatically: + +```sql +INSERT INTO people (name, address) VALUES ('A', 'foo'); +INSERT INTO people (name, address) VALUES ('B', 'bar'); + +-- Use DEFAULT to explicitly request the generated value +INSERT INTO people (id, name, address) VALUES (DEFAULT, 'C', 'baz'); +``` + +### Standalone Sequences + +Use `CREATE SEQUENCE` when you need a sequence independent of a specific table column: + +```sql +CREATE SEQUENCE order_seq CACHE 1 START 101; + +SELECT nextval('order_seq'); +-- Returns: 101 + +INSERT INTO distributors VALUES (nextval('order_seq'), 'nothing'); +``` + +### Choosing a CACHE Size + +- **CACHE >= 65536** — high-frequency identifier generation, many concurrent sessions, tolerates gaps (e.g., IoT ingestion, job run IDs) +- **CACHE = 1** — low allocation rates, identifiers should follow allocation order more closely, minimizing gaps matters (e.g., account numbers, reference numbers) + +--- + +## Data Serialization + +**Pattern:** MUST store arrays and JSON as TEXT (runtime-only types). Per [DSQL docs](https://docs.aws.amazon.com/aurora-dsql/latest/userguide/working-with-postgresql-compatibility-supported-data-types.html), cast to JSON at query time. + +```javascript +function toTextArray(values) { + return values.join(','); +} + +function fromTextArray(textValue) { + return textValue ? textValue.split(',').map(v => v.trim()) : []; +} + +function toTextJSON(object) { + return JSON.stringify(object); +} + +function fromTextJSON(textValue) { + if (!textValue) return null; + try { + return JSON.parse(textValue); + } catch (err) { + console.warn('Invalid JSON in column:', err.message); + return null; + } +} + +const categoriesText = toTextArray(['backend', 'api', 'database']); +await pool.query('INSERT INTO projects (project_id, categories) VALUES ($1, $2)', [projectId, categoriesText]); + +const configText = toTextJSON({ theme: 'dark', notifications: true }); +await pool.query('INSERT INTO user_settings (user_id, preferences) VALUES ($1, $2)', [userId, configText]); +``` + +Query-time operations: + +```sql +SELECT user_id, preferences::jsonb->>'theme' as theme +FROM user_settings WHERE preferences::jsonb->>'notifications' = 'true'; + +SELECT project_id, string_to_array(categories, ',') as category_array FROM projects; +``` + +--- + +## References + +- **Development Guide:** [development-guide.md](./development-guide.md) +- **Language Guide:** [language.md](./language.md) +- **Onboarding Guide:** [onboarding.md](./onboarding.md) +- **AWS Documentation:** [DSQL User Guide](https://docs.aws.amazon.com/aurora-dsql/latest/userguide/) +- **Sample Code:** [aurora-dsql-samples](https://github.com/aws-samples/aurora-dsql-samples) diff --git a/plugins/aurora-dsql/skills/aurora-dsql/references/language.md b/plugins/aurora-dsql/skills/aurora-dsql/references/language.md new file mode 100644 index 0000000..2782855 --- /dev/null +++ b/plugins/aurora-dsql/skills/aurora-dsql/references/language.md @@ -0,0 +1,171 @@ +# DSQL Language-Specific Implementation Examples and Guides + +## Tenets + +- ALWAYS prefer DSQL Connector when available +- MUST follow patterns outlined in [aurora-dsql-samples](https://github.com/aws-samples/aurora-dsql-samples/tree/main/) + for common uses such as installing clients, handling authentication, and performing CRUD operations unless user + requirements have explicit conflicts with implementatin approach. + +## `aurora-dsql-samples` Directory Structures + +### Directories WITH Connectors + +``` +// +├── README.md +├── +├── src/ +│ ├── example_preferred. # Synced from connector (pool concurrent if available) +│ ├── alternatives/ +│ │ ├── no_connection_pool/ +│ │ │ ├── example_with_no_connector. # SDK-based, samples-only +│ │ │ └── example_with_no_connection_pool. # Synced from connector +│ │ └── pool/ +│ │ └── # Synced from connector +│ └── +└── test/ # Matching test directory layout for all examples +``` + +**MUST use** `src/example_preferred.` unless user requirements explicitly conflict with its implementation approach. + +### Directories WITHOUT Connectors + +``` +// +├── README.md +├── +├── src/ +│ ├── example. +│ └── +└── test/ # Matching test directory layout for all examples +``` + +**MUST use** `src/example.` unless user requirements explicitly conflict with its implementation approach. + +## Framework and Connection Notes for Languages and Drivers + +### Python + +PREFER using the [DSQL Python Connector](https://docs.aws.amazon.com/aurora-dsql/latest/userguide/SECTION_program-with-dsql-connector-for-python.html) for automatic IAM Auth: + +- Compatible support in both: psycopg, psycopg2, and asyncpg - install only the needed library + - **psycopg** + - modern async/sync + - `import aurora_dsql_psycopg as dsql` + - [DSQL psycopg preferred example](https://github.com/aws-samples/aurora-dsql-samples/blob/main/python/psycopg/src/example_preferred.py) + - See [aurora-dsql-samples/python/psycopg](https://github.com/aws-samples/aurora-dsql-samples/tree/main/python/psycopg) + - **psycopg2** + - synchronous + - `import aurora_dsql_psycopg2 as dsql` + - [DSQL psycopg2 preferred example](https://github.com/aws-samples/aurora-dsql-samples/blob/main/python/psycopg2/src/example_preferred.py) + - See [aurora-dsql-samples/python/psycopg2](https://github.com/aws-samples/aurora-dsql-samples/tree/main/python/psycopg2) + - **asyncpg** + - full asynchronous style + - `import aurora_dsql_asyncpg as dsql` + - [DSQL asyncpg preferred example](https://github.com/aws-samples/aurora-dsql-samples/blob/main/python/asyncpg/src/example_preferred.py) + - See [aurora-dsql-samples/python/asyncpg](https://github.com/aws-samples/aurora-dsql-samples/tree/main/python/asyncpg) + +#### SQLAlchemy + +- Supports `psycopg` and `psycopg2` +- See [aurora-dsql-samples/python/sqlalchemy](https://github.com/aws-samples/aurora-dsql-samples/tree/main/python/sqlalchemy) +- Dialect Source: [aurora-dsql-sqlalchemy](https://github.com/awslabs/aurora-dsql-sqlalchemy/tree/main/) + +#### JupyterLab + +- Still SHOULD PREFER using the python connector. +- Popular data science option for interactive computing environment that combines code, text, and visualizations +- Options for Local or using Anazon SageMaker +- REQUIRES downloading the Amazon root certificate from the official trust store +- See [aurora-dsql-samples/python/jupyter](https://github.com/aws-samples/aurora-dsql-samples/blob/main/python/jupyter/) + +### Go + +PREFER using the [DSQL Go Connector](https://github.com/awslabs/aurora-dsql-connectors/tree/main/go/pgx) for automatic IAM auth: + +- **pgx** (recommended) + - Use `aurora-dsql-connectors/go/pgx/dsql` for automatic IAM auth with token caching + - [DSQL pgx preferred example](https://github.com/aws-samples/aurora-dsql-samples/blob/main/go/pgx/src/example_preferred.go) + - Connector: [aurora-dsql-connectors/go/pgx](https://github.com/awslabs/aurora-dsql-connectors/tree/main/go/pgx) + - See [aurora-dsql-samples/go/pgx](https://github.com/aws-samples/aurora-dsql-samples/tree/main/go/pgx) + +### JavaScript/TypeScript + +PREFER using one of the DSQL Node.js Connectors: +[node-postgres](https://docs.aws.amazon.com/aurora-dsql/latest/userguide/SECTION_program-with-dsql-connector-for-node-postgres.html) +or [postgres-js](https://docs.aws.amazon.com/aurora-dsql/latest/userguide/SECTION_program-with-dsql-connector-for-postgresjs.html). + +**node-postgres (pg)** (recommended) + +- Use `@aws/aurora-dsql-node-postgres-connector` for automatic IAM auth +- [DSQL node-postgres preferred example](https://github.com/aws-samples/aurora-dsql-samples/blob/main/javascript/node-postgres/src/example_preferred.js) +- See [aurora-dsql-samples/javascript/node-postgres](https://github.com/aws-samples/aurora-dsql-samples/tree/main/javascript/node-postgres) + +**postgres.js** (recommended) + +- Lightweight alternative with `@aws/aurora-dsql-node-postgres-connector` +- Good for serverless environments +- [DSQL postgres-js preferred example](https://github.com/aws-samples/aurora-dsql-samples/blob/main/javascript/postgres-js/src/example_preferred.js) +- See [aurora-dsql-samples/javascript/postgres-js](https://github.com/aws-samples/aurora-dsql-samples/tree/main/javascript/postgres-js) + +#### Prisma + +- Custom `directUrl` with token refresh middleware +- See [aurora-dsql-samples/typescript/prisma](https://github.com/aws-samples/aurora-dsql-samples/tree/main/typescript/prisma) + +#### Sequelize + +- Configure `dialectOptions` for SSL +- Token refresh in `beforeConnect` hook +- See [aurora-dsql-samples/typescript/sequelize](https://github.com/aws-samples/aurora-dsql-samples/tree/main/typescript/sequelize) + +#### TypeORM + +- Custom DataSource with token refresh +- Create migrations table manually via psql +- See [aurora-dsql-samples/typescript/type-orm](https://github.com/aws-samples/aurora-dsql-samples/tree/main/typescript/type-orm) + +### Java + +PREFER using JDBC with the [DSQL JDBC Connector](https://docs.aws.amazon.com/aurora-dsql/latest/userguide/SECTION_program-with-jdbc-connector.html) + +**JDBC** (PostgreSQL JDBC Driver) + +- Use DSQL JDBC Connector for automatic IAM auth + - URL format: `jdbc:aws-dsql:postgresql:///postgres` + - See [aurora-dsql-samples/java/pgjdbc](https://github.com/aws-samples/aurora-dsql-samples/tree/main/java/pgjdbc) +- Properties: `wrapperPlugins=iam`, `ssl=true`, `sslmode=verify-full` + +**HikariCP** (Connection Pooling) + +- Wrap JDBC connection, configure max lifetime < 1 hour +- See [aurora-dsql-samples/java/pgjdbc_hikaricp](https://github.com/aws-samples/aurora-dsql-samples/tree/main/java/pgjdbc_hikaricp) + +### Rust + +**SQLx** (async) + +- Use `aws-sdk-dsql` for token generation +- Connection format: `postgres://admin:{token}@{endpoint}:5432/postgres?sslmode=verify-full&application_name=/` +- Use `after_connect` hook: `.after_connect(|conn, _| conn.execute("SET search_path = public"))` +- Implement periodic token refresh with `tokio::spawn` +- See [aurora-dsql-samples/rust/sqlx](https://github.com/aws-samples/aurora-dsql-samples/tree/main/rust/sqlx) + +**Tokio-Postgres** (lower-level async) + +- Direct control over connection lifecycle +- Use `Arc>` for shared token state +- Handle connection errors with retry logic + +### Elixir + +#### Postgrex + +- MUST use Erlang/OTP 26+ +- Driver: [Postgrex](https://hexdocs.pm/postgrex/) ~> 0.19 + - Use Postgrex.query! for all queries + - See [aurora-dsql-samples/elixir/postgrex](https://github.com/aws-samples/aurora-dsql-samples/tree/main/elixir/postgrex) +- Connection: Implement `Repo.init/2` callback for dynamic token injection + - MUST set `ssl: true` with `ssl_opts: [verify: :verify_peer, cacerts: :public_key.cacerts_get()]` + - MAY prefer AWS CLI via `System.cmd` to call `generate-db-connect-auth-token` diff --git a/plugins/aurora-dsql/skills/aurora-dsql/references/mysql-migrations/ddl-operations.md b/plugins/aurora-dsql/skills/aurora-dsql/references/mysql-migrations/ddl-operations.md new file mode 100644 index 0000000..e2b4d4b --- /dev/null +++ b/plugins/aurora-dsql/skills/aurora-dsql/references/mysql-migrations/ddl-operations.md @@ -0,0 +1,826 @@ +# MySQL to DSQL Migration: DDL Operations + +Migration patterns for specific MySQL DDL operations to DSQL-compatible equivalents. + +**MUST read [type-mapping.md](type-mapping.md) first** for data type mappings and the CRITICAL Destructive Operations Warning. +**MUST read [ddl-migrations/overview.md](../ddl-migrations/overview.md)** for the general Table Recreation Pattern and user verification requirements. + +--- + +## Table Recreation Pattern Overview + +MUST follow this sequence with user verification at each step: + +1. **Plan & Confirm** - MUST present migration plan and obtain user approval to proceed +2. **Validate** - Check data compatibility with new structure; MUST report findings to user +3. **Create** - Create new table with desired structure; MUST verify with user before execution +4. **Migrate** - Copy data (batched for tables > 3,000 rows); MUST report progress to user +5. **Verify** - Confirm row counts match; MUST present comparison to user +6. **Swap** - CRITICAL: MUST obtain explicit user confirmation before DROP TABLE +7. **Re-index** - Recreate indexes using ASYNC; MUST confirm completion with user + +### Transaction Rules + +- **MUST batch** migrations exceeding 3,000 row mutations +- **PREFER batches of 500-1,000 rows** for optimal throughput +- **MUST respect** 10 MiB data size per transaction +- **MUST respect** 5-minute transaction duration + +--- + +## Common Verify & Swap Pattern + +All migrations end with this pattern (referenced in examples below). + +**CRITICAL: MUST obtain explicit user confirmation before DROP TABLE step.** + +```sql +-- MUST verify counts match +readonly_query("SELECT COUNT(*) FROM target_table") +readonly_query("SELECT COUNT(*) FROM target_table_new") + +-- CHECKPOINT: MUST present count comparison to user and obtain confirmation +-- Agent MUST display: "Original table has X rows, new table has Y rows. +-- Proceeding will DROP the original table. This action is IRREVERSIBLE. +-- Do you want to proceed? (yes/no)" +-- MUST NOT proceed without explicit "yes" confirmation + +-- MUST swap tables (DESTRUCTIVE - requires user confirmation above) +transact(["DROP TABLE target_table"]) +transact(["ALTER TABLE target_table_new RENAME TO target_table"]) + +-- MUST recreate indexes +transact(["CREATE INDEX ASYNC idx_target_tenant ON target_table(tenant_id)"]) +``` + +--- + +## ALTER TABLE ... ALTER COLUMN (Change Column Type) + +**MySQL syntax:** + +```sql +ALTER TABLE table_name ALTER COLUMN column_name datatype; +-- or MySQL-specific: +ALTER TABLE table_name MODIFY COLUMN column_name new_datatype; +ALTER TABLE table_name CHANGE COLUMN old_name new_name new_datatype; +``` + +**DSQL:** MUST use **Table Recreation Pattern**. + +### Pre-Migration Validation + +**MUST validate data compatibility BEFORE migration** to prevent data loss. + +```sql +-- Get current table state +readonly_query("SELECT COUNT(*) as total_rows FROM target_table") +get_schema("target_table") + +-- Example: VARCHAR to INTEGER - check for non-numeric values +readonly_query( + "SELECT COUNT(*) as invalid_count FROM target_table + WHERE column_to_change !~ '^-?[0-9]+$'" +) +-- MUST abort if invalid_count > 0 + +-- Show problematic rows +readonly_query( + "SELECT id, column_to_change FROM target_table + WHERE column_to_change !~ '^-?[0-9]+$' LIMIT 100" +) +``` + +### Migration Steps + +#### Step 1: Create new table with changed type + +```sql +transact([ + "CREATE TABLE target_table_new ( + id UUID PRIMARY KEY, + converted_column INTEGER, -- Changed from VARCHAR + other_column TEXT + )" +]) +``` + +#### Step 2: Copy data with type casting + +```sql +transact([ + "INSERT INTO target_table_new (id, converted_column, other_column) + SELECT id, CAST(converted_column AS INTEGER), other_column + FROM target_table" +]) +``` + +**Step 3: Verify and swap** (see [Common Pattern](#common-verify--swap-pattern)) + +--- + +## ALTER TABLE ... DROP COLUMN + +**MySQL syntax:** + +```sql +ALTER TABLE table_name DROP COLUMN column_name; +``` + +**DSQL:** MUST use **Table Recreation Pattern**. + +### Pre-Migration Validation + +```sql +readonly_query("SELECT COUNT(*) as total_rows FROM target_table") +get_schema("target_table") +``` + +### Migration Steps + +#### Step 1: Create new table excluding the column + +```sql +transact([ + "CREATE TABLE target_table_new ( + id UUID PRIMARY KEY, + tenant_id VARCHAR(255) NOT NULL, + kept_column1 VARCHAR(255), + kept_column2 INTEGER + -- dropped_column is NOT included + )" +]) +``` + +#### Step 2: Migrate data + +```sql +transact([ + "INSERT INTO target_table_new (id, tenant_id, kept_column1, kept_column2) + SELECT id, tenant_id, kept_column1, kept_column2 + FROM target_table" +]) +``` + +For tables > 3,000 rows, use [Batched Migration Pattern](#batched-migration-pattern). + +**Step 3: Verify and swap** (see [Common Pattern](#common-verify--swap-pattern)) + +--- + +## AUTO_INCREMENT Migration + +**MySQL syntax:** + +```sql +CREATE TABLE users ( + id INT AUTO_INCREMENT PRIMARY KEY, + name VARCHAR(255) +); +``` + +DSQL provides three options for replacing MySQL's AUTO_INCREMENT. Choose based on your workload requirements. See [Choosing Identifier Types](development-guide.md#choosing-identifier-types) in the development guide for detailed guidance. + +**ALWAYS use `GENERATED AS IDENTITY`** for auto-incrementing integer columns. + +### Option 1: UUID Primary Key (Recommended for Scalability) + +UUIDs are the recommended default because they avoid coordination and scale well for distributed writes. + +```sql +transact([ + "CREATE TABLE users ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + name VARCHAR(255) + )" +]) +``` + +### Option 2: IDENTITY Column (Recommended for Integer Auto-Increment) + +Use `GENERATED { ALWAYS | BY DEFAULT } AS IDENTITY` when compact, human-readable integer IDs are needed. CACHE **MUST** be specified explicitly as either `1` or `>= 65536`. + +```sql +-- GENERATED ALWAYS: DSQL always generates the value; explicit inserts rejected unless OVERRIDING SYSTEM VALUE +transact([ + "CREATE TABLE users ( + id BIGINT GENERATED ALWAYS AS IDENTITY (CACHE 65536) PRIMARY KEY, + name VARCHAR(255) + )" +]) + +-- GENERATED BY DEFAULT: DSQL generates a value unless an explicit value is provided (closer to MySQL AUTO_INCREMENT behavior) +transact([ + "CREATE TABLE users ( + id BIGINT GENERATED BY DEFAULT AS IDENTITY (CACHE 65536) PRIMARY KEY, + name VARCHAR(255) + )" +]) +``` + +#### Choosing a CACHE Size + +**REQUIRED:** Specify CACHE explicitly. Supported values are `1` or `>= 65536`. + +- **CACHE >= 65536** — High-frequency inserts, many concurrent sessions, tolerates gaps and ordering effects (e.g., IoT/telemetry, job IDs, order numbers) +- **CACHE = 1** — Low allocation rates, identifiers should follow allocation order closely, minimizing gaps matters more than throughput (e.g., account numbers, reference numbers) + +### Option 3: Explicit SEQUENCE + +Use a standalone sequence when multiple tables share a counter or when you need `nextval`/`setval` control. + +```sql +-- Create the sequence (CACHE MUST be 1 or >= 65536) +transact(["CREATE SEQUENCE users_id_seq CACHE 65536 START 1"]) + +-- Create table using the sequence +transact([ + "CREATE TABLE users ( + id BIGINT PRIMARY KEY DEFAULT nextval('users_id_seq'), + name VARCHAR(255) + )" +]) +``` + +### Migrating Existing AUTO_INCREMENT Data + +#### To UUID Primary Key + +```sql +transact([ + "CREATE TABLE users_new ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + legacy_id INTEGER, -- Preserve original AUTO_INCREMENT ID for reference + name VARCHAR(255) + )" +]) + +transact([ + "INSERT INTO users_new (id, legacy_id, name) + SELECT gen_random_uuid(), id, name + FROM users" +]) +``` + +If other tables reference the old integer ID, update those references to use the new UUID or the `legacy_id` column. + +#### To IDENTITY Column (Preserving Integer IDs) + +```sql +-- Use GENERATED BY DEFAULT to allow explicit ID values during migration +transact([ + "CREATE TABLE users_new ( + id BIGINT GENERATED BY DEFAULT AS IDENTITY (CACHE 65536) PRIMARY KEY, + name VARCHAR(255) + )" +]) + +-- Migrate with original integer IDs preserved +transact([ + "INSERT INTO users_new (id, name) + SELECT id, name + FROM users" +]) + +-- Set the identity sequence to continue after the max existing ID +-- Get the max ID first: +readonly_query("SELECT MAX(id) as max_id FROM users_new") +-- Then reset the sequence (replace 'users_new_id_seq' with actual sequence name from get_schema): +transact(["SELECT setval('users_new_id_seq', (SELECT MAX(id) FROM users_new))"]) +``` + +**Verify and swap** (see [Common Pattern](#common-verify--swap-pattern)) + +--- + +## ENUM Type Migration + +**MySQL syntax:** + +```sql +CREATE TABLE orders ( + id INT AUTO_INCREMENT PRIMARY KEY, + status ENUM('pending', 'processing', 'shipped', 'delivered') NOT NULL +); +``` + +**DSQL equivalent using VARCHAR with CHECK:** + +```sql +transact([ + "CREATE TABLE orders ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + status VARCHAR(255) NOT NULL CHECK (status IN ('pending', 'processing', 'shipped', 'delivered')) + )" +]) +``` + +### Migrating Existing ENUM Data + +```sql +-- ENUM values are already stored as strings; direct copy is safe +transact([ + "INSERT INTO orders_new (id, status) + SELECT gen_random_uuid(), status + FROM orders" +]) +``` + +--- + +## SET Type Migration + +**MySQL syntax:** + +```sql +CREATE TABLE user_preferences ( + id INT AUTO_INCREMENT PRIMARY KEY, + permissions SET('read', 'write', 'delete', 'admin') +); +``` + +**DSQL equivalent using TEXT (comma-separated):** + +```sql +transact([ + "CREATE TABLE user_preferences ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + permissions TEXT -- Stored as comma-separated: 'read,write,admin' + )" +]) +``` + +**Note:** Application layer MUST validate and parse SET values. MySQL stores SET values as comma-separated strings internally, so direct migration preserves the format. + +--- + +## ON UPDATE CURRENT_TIMESTAMP Migration + +**MySQL syntax:** + +```sql +CREATE TABLE records ( + id INT AUTO_INCREMENT PRIMARY KEY, + data TEXT, + updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP +); +``` + +**DSQL equivalent:** + +```sql +transact([ + "CREATE TABLE records ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + data TEXT, + updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP + )" +]) +``` + +**MUST explicitly set** `updated_at = CURRENT_TIMESTAMP` in every UPDATE statement to replicate `ON UPDATE CURRENT_TIMESTAMP` behavior: + +```sql +transact([ + "UPDATE records SET data = 'new_value', updated_at = CURRENT_TIMESTAMP + WHERE id = 'record-uuid'" +]) +``` + +--- + +## FOREIGN KEY Migration + +**MySQL syntax:** + +```sql +CREATE TABLE orders ( + id INT AUTO_INCREMENT PRIMARY KEY, + customer_id INT, + FOREIGN KEY (customer_id) REFERENCES customers(id) +); +``` + +**MUST implement referential integrity at the application layer:** + +```sql +-- Create table with reference column (enforce integrity in application layer) +transact([ + "CREATE TABLE orders ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + customer_id UUID NOT NULL + )" +]) + +-- Create index for the reference column +transact(["CREATE INDEX ASYNC idx_orders_customer ON orders(customer_id)"]) +``` + +**Application layer MUST enforce referential integrity:** + +```sql +-- Before INSERT: validate parent exists +readonly_query( + "SELECT id FROM customers WHERE id = 'customer-uuid'" +) +-- MUST abort INSERT if parent not found + +-- Before DELETE of parent: check for dependents +readonly_query( + "SELECT COUNT(*) as dependent_count FROM orders + WHERE customer_id = 'customer-uuid'" +) +-- MUST abort DELETE if dependent_count > 0 +``` + +--- + +## ALTER COLUMN SET/DROP NOT NULL Migration + +**MySQL syntax:** + +```sql +ALTER TABLE table_name MODIFY COLUMN column_name datatype NOT NULL; +ALTER TABLE table_name MODIFY COLUMN column_name datatype NULL; +``` + +**DSQL:** MUST use **Table Recreation Pattern**. + +### Pre-Migration Validation (for SET NOT NULL) + +```sql +readonly_query( + "SELECT COUNT(*) as null_count FROM target_table + WHERE target_column IS NULL" +) +-- MUST ABORT if null_count > 0, or plan to provide default values +``` + +### Migration Steps + +#### Step 1: Create new table with changed constraint + +```sql +transact([ + "CREATE TABLE target_table_new ( + id UUID PRIMARY KEY, + target_column VARCHAR(255) NOT NULL, -- Changed from nullable + other_column TEXT + )" +]) +``` + +#### Step 2: Copy data (with default for NULLs if needed) + +```sql +transact([ + "INSERT INTO target_table_new (id, target_column, other_column) + SELECT id, COALESCE(target_column, 'default_value'), other_column + FROM target_table" +]) +``` + +**Step 3: Verify and swap** (see [Common Pattern](#common-verify--swap-pattern)) + +--- + +## ALTER COLUMN SET/DROP DEFAULT Migration + +**MySQL syntax:** + +```sql +ALTER TABLE table_name ALTER COLUMN column_name SET DEFAULT value; +ALTER TABLE table_name ALTER COLUMN column_name DROP DEFAULT; +``` + +**DSQL:** MUST use **Table Recreation Pattern**. + +### Migration Steps (SET DEFAULT) + +#### Step 1: Create new table with default value + +```sql +transact([ + "CREATE TABLE target_table_new ( + id UUID PRIMARY KEY, + status VARCHAR(50) DEFAULT 'pending', -- Added default + other_column TEXT + )" +]) +``` + +#### Step 2: Copy data + +```sql +transact([ + "INSERT INTO target_table_new (id, status, other_column) + SELECT id, status, other_column + FROM target_table" +]) +``` + +**Step 3: Verify and swap** (see [Common Pattern](#common-verify--swap-pattern)) + +### Migration Steps (DROP DEFAULT) + +#### Step 1: Create new table without default + +```sql +transact([ + "CREATE TABLE target_table_new ( + id UUID PRIMARY KEY, + status VARCHAR(50), -- Removed DEFAULT + other_column TEXT + )" +]) +``` + +#### Step 2: Copy data + +```sql +transact([ + "INSERT INTO target_table_new (id, status, other_column) + SELECT id, status, other_column + FROM target_table" +]) +``` + +**Step 3: Verify and swap** (see [Common Pattern](#common-verify--swap-pattern)) + +--- + +## ADD/DROP CONSTRAINT Migration + +**MySQL syntax:** + +```sql +ALTER TABLE table_name ADD CONSTRAINT constraint_name UNIQUE (column_name); +ALTER TABLE table_name ADD CONSTRAINT constraint_name CHECK (condition); +ALTER TABLE table_name DROP CONSTRAINT constraint_name; +-- or MySQL-specific: +ALTER TABLE table_name DROP INDEX index_name; +ALTER TABLE table_name DROP CHECK constraint_name; +``` + +**DSQL:** MUST use **Table Recreation Pattern**. + +### Pre-Migration Validation (for ADD CONSTRAINT) + +**MUST validate existing data satisfies the new constraint.** + +```sql +-- For UNIQUE constraint: check for duplicates +readonly_query( + "SELECT target_column, COUNT(*) as cnt FROM target_table + GROUP BY target_column HAVING COUNT(*) > 1 LIMIT 10" +) +-- MUST ABORT if any duplicates exist + +-- For CHECK constraint: validate all rows pass +readonly_query( + "SELECT COUNT(*) as invalid_count FROM target_table + WHERE NOT (check_condition)" +) +-- MUST ABORT if invalid_count > 0 +``` + +### Migration Steps (ADD CONSTRAINT) + +#### Step 1: Create new table with the constraint + +```sql +transact([ + "CREATE TABLE target_table_new ( + id UUID PRIMARY KEY, + email VARCHAR(255) UNIQUE, -- Added UNIQUE constraint + age INTEGER CHECK (age >= 0), -- Added CHECK constraint + other_column TEXT + )" +]) +``` + +#### Step 2: Copy data + +```sql +transact([ + "INSERT INTO target_table_new (id, email, age, other_column) + SELECT id, email, age, other_column + FROM target_table" +]) +``` + +**Step 3: Verify and swap** (see [Common Pattern](#common-verify--swap-pattern)) + +### Migration Steps (DROP CONSTRAINT) + +#### Step 1: Identify existing constraints + +```sql +readonly_query( + "SELECT constraint_name, constraint_type + FROM information_schema.table_constraints + WHERE table_name = 'target_table' + AND constraint_type IN ('UNIQUE', 'CHECK')" +) +``` + +#### Step 2: Create new table without the constraint + +```sql +transact([ + "CREATE TABLE target_table_new ( + id UUID PRIMARY KEY, + email VARCHAR(255), -- Removed UNIQUE constraint + other_column TEXT + )" +]) +``` + +#### Step 3: Copy data + +```sql +transact([ + "INSERT INTO target_table_new (id, email, other_column) + SELECT id, email, other_column + FROM target_table" +]) +``` + +**Step 4: Verify and swap** (see [Common Pattern](#common-verify--swap-pattern)) + +--- + +## MODIFY PRIMARY KEY Migration + +**MySQL syntax:** + +```sql +ALTER TABLE table_name DROP PRIMARY KEY, ADD PRIMARY KEY (new_column); +``` + +**DSQL:** MUST use **Table Recreation Pattern**. + +### Pre-Migration Validation + +**MUST validate new PK column has unique, non-null values.** + +```sql +-- Check for duplicates +readonly_query( + "SELECT new_pk_column, COUNT(*) as cnt FROM target_table + GROUP BY new_pk_column HAVING COUNT(*) > 1 LIMIT 10" +) +-- MUST ABORT if any duplicates exist + +-- Check for NULLs +readonly_query( + "SELECT COUNT(*) as null_count FROM target_table + WHERE new_pk_column IS NULL" +) +-- MUST ABORT if null_count > 0 +``` + +### Migration Steps + +#### Step 1: Create new table with new primary key + +```sql +transact([ + "CREATE TABLE target_table_new ( + new_pk_column UUID PRIMARY KEY, -- New PK + old_pk_column VARCHAR(255), -- Demoted to regular column + other_column TEXT + )" +]) +``` + +#### Step 2: Copy data + +```sql +transact([ + "INSERT INTO target_table_new (new_pk_column, old_pk_column, other_column) + SELECT new_pk_column, old_pk_column, other_column + FROM target_table" +]) +``` + +**Step 3: Verify and swap** (see [Common Pattern](#common-verify--swap-pattern)) + +--- + +## Batched Migration Pattern + +**REQUIRED for tables exceeding 3,000 rows.** + +### Batch Size Rules + +- **PREFER batches of 500-1,000 rows** for optimal performance +- Smaller batches reduce lock contention and enable better concurrency + +### OFFSET-Based Batching + +```sql +readonly_query("SELECT COUNT(*) as total FROM target_table") +-- Calculate: batches_needed = CEIL(total / 1000) + +-- Batch 1 +transact([ + "INSERT INTO target_table_new (id, col1, col2) + SELECT id, col1, col2 FROM target_table + ORDER BY id LIMIT 1000 OFFSET 0" +]) + +-- Batch 2 +transact([ + "INSERT INTO target_table_new (id, col1, col2) + SELECT id, col1, col2 FROM target_table + ORDER BY id LIMIT 1000 OFFSET 1000" +]) +-- Continue until all rows migrated... +``` + +### Cursor-Based Batching (Preferred for Large Tables) + +Better performance than OFFSET for very large tables: + +```sql +-- First batch +transact([ + "INSERT INTO target_table_new (id, col1, col2) + SELECT id, col1, col2 FROM target_table + ORDER BY id LIMIT 1000" +]) + +-- Get last processed ID +readonly_query("SELECT MAX(id) as last_id FROM target_table_new") + +-- Subsequent batches +transact([ + "INSERT INTO target_table_new (id, col1, col2) + SELECT id, col1, col2 FROM target_table + WHERE id > 'last_processed_id' + ORDER BY id LIMIT 1000" +]) +``` + +### Progress Tracking + +```sql +readonly_query( + "SELECT (SELECT COUNT(*) FROM target_table_new) as migrated, + (SELECT COUNT(*) FROM target_table) as total" +) +``` + +--- + +## Error Handling + +### Pre-Migration Checks + +1. **Verify table exists** + + ```sql + readonly_query( + "SELECT table_name FROM information_schema.tables + WHERE table_name = 'target_table'" + ) + ``` + +2. **Verify DDL permissions** + +### Data Validation Errors + +**MUST abort migration and report** when: + +- Type conversion would fail (e.g., non-numeric VARCHAR to INTEGER) +- Value truncation would occur (e.g., TEXT to VARCHAR(n) exceeding length) +- NOT NULL constraint would be violated +- UNSIGNED check would fail on negative values + +```sql +-- Find problematic rows for type conversion +readonly_query( + "SELECT id, problematic_column FROM target_table + WHERE problematic_column !~ '^-?[0-9]+$' LIMIT 100" +) + +-- Find values exceeding target VARCHAR length +readonly_query( + "SELECT id, LENGTH(text_column) as len FROM target_table + WHERE LENGTH(text_column) > 255 LIMIT 100" +) +``` + +### Recovery from Failed Migration + +```sql +-- Check table state +readonly_query( + "SELECT table_name FROM information_schema.tables + WHERE table_name IN ('target_table', 'target_table_new')" +) +``` + +- **Both tables exist:** Original safe -> `DROP TABLE IF EXISTS target_table_new` and restart +- **Only new table exists:** Verify count, then complete rename diff --git a/plugins/aurora-dsql/skills/aurora-dsql/references/mysql-migrations/full-example.md b/plugins/aurora-dsql/skills/aurora-dsql/references/mysql-migrations/full-example.md new file mode 100644 index 0000000..e9c4860 --- /dev/null +++ b/plugins/aurora-dsql/skills/aurora-dsql/references/mysql-migrations/full-example.md @@ -0,0 +1,118 @@ +# MySQL to DSQL Migration: Full Example + +End-to-end example migrating a complete MySQL CREATE TABLE to DSQL. + +**MUST read [type-mapping.md](type-mapping.md) first** for data type mappings and the CRITICAL Destructive Operations Warning. +**MUST read [ddl-operations.md](ddl-operations.md)** for DDL operation patterns. + +--- + +## Original MySQL Schema + +```sql +CREATE TABLE products ( + id INT AUTO_INCREMENT PRIMARY KEY, + tenant_id INT NOT NULL, + name VARCHAR(255) NOT NULL, + description MEDIUMTEXT, + price DECIMAL(10,2) NOT NULL, + category ENUM('electronics', 'clothing', 'food', 'other') DEFAULT 'other', + tags SET('sale', 'new', 'featured'), + metadata JSON, + stock INT UNSIGNED DEFAULT 0, + is_active TINYINT(1) DEFAULT 1, + created_at DATETIME DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, + FOREIGN KEY (tenant_id) REFERENCES tenants(id), + INDEX idx_tenant (tenant_id), + INDEX idx_category (category), + FULLTEXT INDEX idx_name_desc (name, description) +) ENGINE=InnoDB; +``` + +--- + +## Migrated DSQL Schema + +```sql +-- Step 1: Create table (one DDL per transaction) +transact([ + "CREATE TABLE products ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + tenant_id VARCHAR(255) NOT NULL, + name VARCHAR(255) NOT NULL, + description TEXT, + price DECIMAL(10,2) NOT NULL, + category VARCHAR(255) DEFAULT 'other' CHECK (category IN ('electronics', 'clothing', 'food', 'other')), + tags TEXT, + metadata TEXT, + stock INTEGER DEFAULT 0 CHECK (stock >= 0), + is_active BOOLEAN DEFAULT true, + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP + )" +]) + +-- Step 2: Create indexes (each in separate transaction, MUST use ASYNC) +transact(["CREATE INDEX ASYNC idx_products_tenant ON products(tenant_id)"]) +transact(["CREATE INDEX ASYNC idx_products_category ON products(tenant_id, category)"]) +-- MUST implement text search at application layer for FULLTEXT index equivalent +``` + +--- + +## Migration Decisions Summary + +| MySQL Feature | DSQL Decision | +| ----------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------ | +| `AUTO_INCREMENT` | UUID with `gen_random_uuid()`, or IDENTITY column with CACHE, or SEQUENCE (see [AUTO_INCREMENT Migration](ddl-operations.md#auto_increment-migration)) | +| `INT` tenant_id | `VARCHAR(255)` for multi-tenant pattern | +| `MEDIUMTEXT` | `TEXT` | +| `ENUM(...)` | `VARCHAR(255)` with `CHECK` constraint | +| `SET(...)` | `TEXT` (comma-separated) | +| `JSON` | `TEXT` (JSON.stringify) | +| `UNSIGNED` | `CHECK (col >= 0)` | +| `TINYINT(1)` | `BOOLEAN` | +| `DATETIME` | `TIMESTAMP` | +| `ON UPDATE CURRENT_TIMESTAMP` | Application-layer `SET updated_at = CURRENT_TIMESTAMP` | +| `FOREIGN KEY` | Application-layer referential integrity | +| `INDEX` | `CREATE INDEX ASYNC` | +| `FULLTEXT INDEX` | Application-layer text search | +| `ENGINE=InnoDB` | MUST omit | + +--- + +## Best Practices Summary + +### User Verification (CRITICAL) + +- **MUST present** complete migration plan to user before any execution +- **MUST obtain** explicit user confirmation before DROP TABLE operations +- **MUST verify** with user at each checkpoint during migration +- **MUST obtain** explicit user approval before proceeding with destructive actions +- **MUST recommend** testing migrations on non-production data first +- **MUST confirm** user has backup or accepts data loss risk + +### MySQL-Specific Migration Rules + +- **MUST map** all MySQL data types to DSQL equivalents before creating tables +- **MUST convert** AUTO_INCREMENT to UUID with gen_random_uuid(), IDENTITY column with `GENERATED AS IDENTITY (CACHE ...)`, or explicit SEQUENCE -- ALWAYS use `GENERATED AS IDENTITY` for auto-incrementing columns (see [AUTO_INCREMENT Migration](ddl-operations.md#auto_increment-migration)) +- **MUST replace** ENUM with VARCHAR and CHECK constraint +- **MUST replace** SET with TEXT (comma-separated) +- **MUST replace** JSON columns with TEXT +- **MUST replace** FOREIGN KEY constraints with application-layer referential integrity +- **MUST replace** ON UPDATE CURRENT_TIMESTAMP with application-layer updates +- **MUST convert** all index creation to use CREATE INDEX ASYNC +- **MUST omit** ENGINE, CHARSET, COLLATE, and other MySQL-specific table options +- **MUST replace** UNSIGNED with CHECK (col >= 0) constraint +- **MUST convert** TINYINT(1) to BOOLEAN + +### Technical Requirements + +- **MUST validate** data compatibility before type changes +- **MUST batch** tables exceeding 3,000 rows +- **MUST verify** row counts before and after migration +- **MUST recreate** indexes after table swap using ASYNC +- **MUST verify** new table before dropping original table +- **PREFER** cursor-based batching for very large tables +- **PREFER** batches of 500-1,000 rows for optimal throughput diff --git a/plugins/aurora-dsql/skills/aurora-dsql/references/mysql-migrations/type-mapping.md b/plugins/aurora-dsql/skills/aurora-dsql/references/mysql-migrations/type-mapping.md new file mode 100644 index 0000000..23c3c0f --- /dev/null +++ b/plugins/aurora-dsql/skills/aurora-dsql/references/mysql-migrations/type-mapping.md @@ -0,0 +1,184 @@ +# MySQL to DSQL Migration: Type Mapping & Feature Alternatives + +This guide provides migration patterns for converting MySQL DDL operations to Aurora DSQL-compatible equivalents, including the **Table Recreation Pattern** for schema modifications that require rebuilding tables. + +For DDL operation details, see [ddl-operations.md](ddl-operations.md). For a full migration example, see [full-example.md](full-example.md). + +--- + +## CRITICAL: Destructive Operations Warning + +**The Table Recreation Pattern involves DESTRUCTIVE operations that can result in DATA LOSS.** + +Table recreation requires dropping the original table, which is **irreversible**. If any step fails after the original table is dropped, data may be permanently lost. + +### Mandatory User Verification Requirements + +Agents MUST obtain explicit user approval before executing migrations on live tables: + +1. **MUST present the complete migration plan** to the user before any execution +2. **MUST clearly state** that this operation will DROP the original table +3. **MUST confirm** the user has a current backup or accepts the risk of data loss +4. **MUST verify with the user** at each checkpoint before proceeding: + - Before creating the new table structure + - Before beginning data migration + - Before dropping the original table (CRITICAL CHECKPOINT) + - Before renaming the new table +5. **MUST NOT proceed** with any destructive action without explicit user confirmation +6. **MUST recommend** performing migrations on non-production environments first + +### Risk Acknowledgment + +Before proceeding, the user MUST confirm: + +- [ ] They understand this is a destructive operation +- [ ] They have a backup of the table data (or accept the risk) +- [ ] They approve the agent to execute each step with verification +- [ ] They understand the migration cannot be automatically rolled back after DROP TABLE + +--- + +## MySQL Data Type Mapping to DSQL + +Map MySQL data types to their DSQL equivalents. + +### Numeric Types + +| MySQL Type | DSQL Equivalent | Notes | +| --------------------------- | ----------------------------------------------- | ------------------------------------------------------ | +| TINYINT | SMALLINT | DSQL has no TINYINT; SMALLINT is smallest integer type | +| SMALLINT | SMALLINT | Direct equivalent | +| MEDIUMINT | INTEGER | DSQL has no MEDIUMINT; use INTEGER | +| INT / INTEGER | INTEGER | Direct equivalent | +| BIGINT | BIGINT | Direct equivalent | +| TINYINT(1) | BOOLEAN | MySQL convention for booleans maps to native BOOLEAN | +| FLOAT | REAL | Direct equivalent | +| DOUBLE | DOUBLE PRECISION | Direct equivalent | +| DECIMAL(p,s) / NUMERIC(p,s) | DECIMAL(p,s) / NUMERIC(p,s) | Direct equivalent | +| BIT(1) | BOOLEAN | Single bit maps to BOOLEAN | +| BIT(n) | BYTEA | Multi-bit maps to BYTEA | +| UNSIGNED integers | Use next-larger signed type or CHECK constraint | DSQL has no UNSIGNED; use CHECK (col >= 0) | + +### String Types + +| MySQL Type | DSQL Equivalent | Notes | +| ----------------- | ---------------------------------- | ---------------------------------------------------------------------------------------- | +| CHAR(n) | CHAR(n) | Direct equivalent | +| VARCHAR(n) | VARCHAR(n) | Direct equivalent | +| TINYTEXT | TEXT | DSQL uses TEXT for all unbounded strings | +| TEXT | TEXT | Direct equivalent | +| MEDIUMTEXT | TEXT | DSQL uses TEXT for all unbounded strings | +| LONGTEXT | TEXT | DSQL uses TEXT for all unbounded strings | +| ENUM('a','b','c') | VARCHAR(255) with CHECK constraint | See [ENUM Migration](ddl-operations.md#enum-type-migration) | +| SET('a','b','c') | TEXT | Store as comma-separated TEXT; see [SET Migration](ddl-operations.md#set-type-migration) | + +### Date/Time Types + +| MySQL Type | DSQL Equivalent | Notes | +| ---------- | --------------- | ---------------------------------------------------------------- | +| DATE | DATE | Direct equivalent | +| DATETIME | TIMESTAMP | DATETIME maps to TIMESTAMP | +| TIMESTAMP | TIMESTAMP | Direct equivalent; MUST manage auto-updates in application layer | +| TIME | TIME | Direct equivalent | +| YEAR | INTEGER | Store as 4-digit integer | + +### Binary Types + +| MySQL Type | DSQL Equivalent | Notes | +| ------------ | --------------- | ----------------------------------- | +| BINARY(n) | BYTEA | DSQL uses BYTEA for binary data | +| VARBINARY(n) | BYTEA | DSQL uses BYTEA for binary data | +| TINYBLOB | BYTEA | DSQL uses BYTEA for all binary data | +| BLOB | BYTEA | DSQL uses BYTEA for all binary data | +| MEDIUMBLOB | BYTEA | DSQL uses BYTEA for all binary data | +| LONGBLOB | BYTEA | DSQL uses BYTEA for all binary data | + +### Other Types + +| MySQL Type | DSQL Equivalent | Notes | +| -------------- | --------------------------------------------------------- | ------------------------------------------------------------------------------------------------ | +| JSON | TEXT | MUST store as TEXT | +| AUTO_INCREMENT | UUID with gen_random_uuid(), IDENTITY column, or SEQUENCE | See [AUTO_INCREMENT Migration](ddl-operations.md#auto_increment-migration) for all three options | + +--- + +## MySQL Features Requiring DSQL Alternatives + +MUST use the following DSQL alternatives for these MySQL features: + +| MySQL Feature | DSQL Alternative | +| ---------------------------------- | --------------------------------------------------- | +| FOREIGN KEY constraints | Application-layer referential integrity | +| FULLTEXT indexes | Application-layer text search | +| SPATIAL indexes | Application-layer spatial queries | +| ENGINE=InnoDB/MyISAM | MUST omit (DSQL manages storage automatically) | +| ON UPDATE CURRENT_TIMESTAMP | Application-layer timestamp management | +| GENERATED columns (virtual/stored) | Application-layer computation | +| PARTITION BY | MUST omit (DSQL manages distribution automatically) | +| TRIGGERS | Application-layer logic | +| STORED PROCEDURES / FUNCTIONS | Application-layer logic | + +--- + +## MySQL DDL Operation Mapping + +### Directly Supported Operations + +These MySQL operations have direct DSQL equivalents: + +| MySQL DDL | DSQL Equivalent | +| ------------------------------------------ | --------------------------------------------------- | +| `CREATE TABLE ...` | `CREATE TABLE ...` (with type adjustments) | +| `DROP TABLE table_name` | `DROP TABLE table_name` | +| `ALTER TABLE ... ADD COLUMN col type` | `ALTER TABLE ... ADD COLUMN col type` | +| `ALTER TABLE ... RENAME COLUMN old TO new` | `ALTER TABLE ... RENAME COLUMN old TO new` | +| `ALTER TABLE ... RENAME TO new_name` | `ALTER TABLE ... RENAME TO new_name` | +| `CREATE INDEX idx ON t(col)` | `CREATE INDEX ASYNC idx ON t(col)` (MUST use ASYNC) | +| `DROP INDEX idx ON t` | `DROP INDEX idx` (MUST omit the ON clause) | + +### Operations Requiring Table Recreation Pattern + +These MySQL operations MUST use the **Table Recreation Pattern** in DSQL: + +| MySQL DDL | DSQL Approach | +| -------------------------------------------------------------- | ------------------------------------------------------------- | +| `ALTER TABLE ... MODIFY COLUMN col new_type` | Table recreation with type cast | +| `ALTER TABLE ... CHANGE COLUMN old new new_type` | Table recreation (type change) or RENAME COLUMN (rename only) | +| `ALTER TABLE ... ALTER COLUMN col datatype` | Table recreation with type cast | +| `ALTER TABLE ... DROP COLUMN col` | Table recreation excluding the column | +| `ALTER TABLE ... ALTER COLUMN col SET DEFAULT val` | Table recreation with DEFAULT in new definition | +| `ALTER TABLE ... ALTER COLUMN col DROP DEFAULT` | Table recreation without DEFAULT | +| `ALTER TABLE ... ADD CONSTRAINT ... UNIQUE` | Table recreation with constraint | +| `ALTER TABLE ... ADD CONSTRAINT ... CHECK` | Table recreation with constraint | +| `ALTER TABLE ... DROP CONSTRAINT ...` | Table recreation without constraint | +| `ALTER TABLE ... DROP PRIMARY KEY, ADD PRIMARY KEY (new_cols)` | Table recreation with new PK | + +### Operations Requiring Application-Layer Implementation + +MUST implement these MySQL operations at the application layer: + +| MySQL DDL | DSQL Approach | +| -------------------------------------- | --------------------------------------------------------- | +| `ALTER TABLE ... ADD FOREIGN KEY` | MUST implement referential integrity in application layer | +| `ALTER TABLE ... ADD FULLTEXT INDEX` | MUST implement text search in application layer | +| `ALTER TABLE ... ADD SPATIAL INDEX` | MUST implement spatial queries in application layer | +| `ALTER TABLE ... ENGINE=...` | MUST omit | +| `ALTER TABLE ... AUTO_INCREMENT=...` | Use SEQUENCE with setval() or IDENTITY column | +| `CREATE TRIGGER` | MUST implement in application-layer logic | +| `CREATE PROCEDURE` / `CREATE FUNCTION` | MUST implement in application-layer logic | + +--- + +## MySQL-to-DSQL Type Conversion Validation Matrix + +| MySQL From Type | DSQL To Type | Validation | +| ----------------------------- | ------------------ | ------------------------------------------------------- | +| VARCHAR -> INT/INTEGER | VARCHAR -> INTEGER | MUST validate all values are numeric | +| VARCHAR -> TINYINT(1)/BOOLEAN | VARCHAR -> BOOLEAN | MUST validate values are 'true'/'false'/'t'/'f'/'1'/'0' | +| INT/INTEGER -> VARCHAR | INTEGER -> VARCHAR | Safe conversion | +| TEXT -> VARCHAR(n) | TEXT -> VARCHAR(n) | MUST validate max length <= n | +| DATETIME -> DATE | TIMESTAMP -> DATE | Safe (truncates time) | +| INT -> DECIMAL | INTEGER -> DECIMAL | Safe conversion | +| ENUM -> VARCHAR | VARCHAR -> VARCHAR | Safe (already stored as VARCHAR in DSQL) | +| MEDIUMINT -> BIGINT | INTEGER -> BIGINT | Safe conversion | +| FLOAT -> DECIMAL | REAL -> DECIMAL | May lose precision; MUST validate acceptable | diff --git a/plugins/aurora-dsql/skills/aurora-dsql/references/onboarding.md b/plugins/aurora-dsql/skills/aurora-dsql/references/onboarding.md new file mode 100644 index 0000000..e5a6ef0 --- /dev/null +++ b/plugins/aurora-dsql/skills/aurora-dsql/references/onboarding.md @@ -0,0 +1,376 @@ +--- +inclusion: manual +--- + +# Aurora DSQL Get Started Guide + +## Overview + +This guide provides steps to help users get started with Aurora DSQL in their project. It sets up their DSQL cluster with IAM authentication and connects their database to their code by understanding the context within the codebase. + +## Use Case + +These guidelines apply when users say "Get started with DSQL" or similar phrases. The user's codebase may be mature (with existing database connections) or have little to no code - the guidelines should apply to both cases. + +## Agent Communication Style + +**Keep all responses succinct:** + +- ALWAYS tell the user what you did. + - Responses MUST be concise and concrete. + - ALWAYS contain descriptions to necessary steps. + - ALWAYS remove unnecessary verbiage. + - Example: + - "Created an inventory table with 4 columns" + - "Updated the product column to be NOT NULL" +- Ask direct questions when needed: + - ALWAYS ask clarifying questions to avoid inaccurate assumptions + - User ambiguity SHOULD result in questions. + - MUST clarify incompatible user decisions + - Example: + - "What column names would you like in this table?" + - "What is the column name of the primary key?" + - "JSON must be serialized. Would you like to stringify the JSON to serialize it as TEXT?" + +**Examples:** + +- **Good**: "Generated auth token. Ready to connect with psql?" +- **Bad**: "I'm going to generate an authentication token using the AWS CLI which will allow you to connect to your database. This token will be valid for..." + +--- + +## Get Started with DSQL (Interactive Guide) + +**TRIGGER PHRASE:** When the user says "Get started with DSQL", "Get started with Aurora DSQL", or similar phrases, provide an interactive onboarding experience by following these steps: + +**Before starting:** Let the user know they can pause and resume anytime by saying "Continue with DSQL setup" if they need to come back later. + +**RESUME TRIGGER:** If the user says "Continue with DSQL setup" or similar, check what's already configured (AWS credentials, clusters, MCP server, connection tested) and resume from where they left off. Ask them which step they'd like to continue from or analyze their setup to determine automatically. + +### Step 1: Verify Prerequisites + +**Check AWS credentials:** + +```bash +aws sts get-caller-identity +``` + +**If not configured:** + +- Guide them through `aws configure` +- MUST verify IAM permissions include `dsql:CreateCluster`, `dsql:GetCluster`, `dsql:DbConnectAdmin` +- Recommend [`AmazonAuroraDSQLConsoleFullAccess`](https://docs.aws.amazon.com/aws-managed-policy/latest/reference/AmazonAuroraDSQLConsoleFullAccess.html) managed policy + +**Check PostgreSQL client:** + +```bash +psql --version +``` + +**If missing OR version <=14:** +DSQL requires SNI support from psql >=14. + +- macOS: `brew install postgresql@17` +- Linux (Debian/Ubuntu): `sudo apt-get install postgresql-client` +- Linux (RHEL/CentOS/Amazon Linux): + + ```bash + sudo yum install -y https://download.postgresql.org/pub/repos/yum/reporpms/EL-9-x86_64/pgdg-redhat-repo-latest.noarch.rpm + sudo yum install -y postgresql17 + ``` + +### Step 2: Check for Existing Clusters + +**Set region (uses AWS_REGION or REGION if set, defaults to us-east-1):** + +```bash +REGION=${AWS_REGION:-${REGION:-us-east-1}} +echo $REGION +``` + +**List clusters in the region:** + +```bash +aws dsql list-clusters --region $REGION +``` + +**If they have NO clusters:** + +- Ask: "Would you like to create a new DSQL cluster in $REGION or a different region?" + - If yes, proceed to create single-region cluster + - If they want different region, ask which one and update REGION variable + +**If they have ANY clusters:** + +- List ALL cluster identifiers with creation dates and status +- Ask: "Would you like to use one of these clusters or create a new one?" + - If using existing, proceed to Step 3. + - If creating new: + - "Which region would you like to create a enw cluster in?" + - Immediately update REGION variable +- Confirm all selections before proceeding. + +**Create cluster command (if needed):** + +```bash +aws dsql create-cluster --region $REGION --tags '{"Name":"my-dsql-cluster","created_by":""}' +``` + +**Wait for ACTIVE status** (takes ~60 seconds): + +```bash +aws dsql get-cluster --identifier CLUSTER_ID --region $REGION +``` + +### Step 3: Get Cluster Connection Details + +**Construct cluster endpoint:** + +```bash +CLUSTER_ID="" +CLUSTER_ENDPOINT="${CLUSTER_ID}.dsql.${REGION}.on.aws" +echo $CLUSTER_ENDPOINT +``` + +**Store endpoint for their project environment:** + +- Check for `.env` file or environment config +- Add or update: `DSQL_ENDPOINT=` +- Add region: `AWS_REGION=$REGION` +- ALWAYS try reading `.env` first before modifying +- If file is unreadable, use: `echo "DSQL_ENDPOINT=$CLUSTER_ENDPOINT" >> .env` + +### Step 4: Set Up MCP Server (Optional) + +Would the user like to be guided through setting up the MCP server? + +If so, follow the steps detailed in [mcp-setup.md](../mcp/mcp-setup.md) + +**MCP server provides:** + +- Direct query execution from agent +- Schema exploration tools +- Simplified database operations + +### Step 5: Test Connection + +**Generate authentication token and connect:** + +```bash +export PGPASSWORD=$(aws dsql generate-db-connect-admin-auth-token \ + --region $REGION \ + --hostname $CLUSTER_ENDPOINT \ + --expires-in 3600) + +export PGSSLMODE=require +export PGAPPNAME="/" + +psql --quiet -h $CLUSTER_ENDPOINT -U admin -d postgres +``` + +**Verify with test query:** + +```sql +SELECT current_database(), version(); +``` + +**If connection fails:** + +- Check token expiration (regenerate if needed) +- Verify SSL mode is set +- Confirm cluster is ACTIVE +- Check IAM permissions + +### Step 6: Understand the Project + +**First, check if this is an empty/new project:** + +- Look for existing source code, routes, or application logic +- Check if it's just minimal boilerplate + +**If empty or near-empty project:** + +- Ask briefly (1-2 questions): What are they building? Any specific tech preferences? +- Remember context for subsequent steps + +**If established project:** + +- Skip questions - infer from codebase +- Check for existing database code or ORMs +- Update relevant code to use DSQL + +**ALWAYS reference [`./development-guide.md`](./development-guide.md) before making schema changes** + +### Step 7: Install Database Driver + +**Based on their language, install appropriate driver (some examples):** + +**JavaScript/TypeScript:** + +```bash +npm install @aws-sdk/credential-providers @aws-sdk/dsql-signer pg tsx +npm install @aws/aurora-dsql-node-postgres-connector +``` + +**Python:** + +```bash +pip install psycopg2-binary +pip install aurora-dsql-python-connector +``` + +**Go:** + +```bash +go get github.com/jackc/pgx/v5 +``` + +**Rust:** + +```bash +cargo add sqlx --features postgres,runtime-tokio-native-tls +cargo add aws-sdk-dsql tokio --features full +``` + +**For implementation patterns, reference [`./dsql-examples.md`](./dsql-examples.md) and [`./language.md`](./language.md)** + +### Step 8: Schema Setup + +**Check for existing schema:** + +- Search for `.sql` files, migration folders, ORM schemas (Prisma, Drizzle, TypeORM) + +**If existing schema found:** + +- Show what you found +- Ask: "Found existing schema definitions. Want to migrate these to DSQL?" +- If yes, MUST verify DSQL compatibility: + - No SERIAL types (use `GENERATED AS IDENTITY` with sequences, or UUID) + - No foreign keys (implement in application) + - No array/JSON column types (serialize as TEXT) + - Reference [`./development-guide.md`](./development-guide.md) for full constraints + +**If no schema found:** + +- Ask if they want to: + 1. Create simple example table + 2. Design custom schema together + 3. Skip for now + +**If creating example table:** + +Use MCP server or psql to execute: + +```sql +CREATE TABLE users ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + email VARCHAR(255) UNIQUE NOT NULL, + name VARCHAR(255), + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP +); + +CREATE INDEX ASYNC idx_users_email ON users(email); +``` + +**For custom schema:** + +- Ask about their app's needs +- Design tables following DSQL constraints +- Reference [`./dsql-examples.md`](./dsql-examples.md) for patterns +- ALWAYS use `CREATE INDEX ASYNC` for all indexes + +### Step 9: Set Up Scoped Database Roles + +**Recommend creating scoped roles before application development begins.** + +- Ask: "Would you like to set up scoped database roles for your application? This is recommended over using `admin` directly." +- If yes, follow [access-control.md](./access-control.md) for detailed guidance +- At minimum, guide creating one application role: + +```sql +-- As admin +CREATE ROLE app_user WITH LOGIN; +AWS IAM GRANT app_user TO 'arn:aws:iam:::role/'; +GRANT USAGE ON SCHEMA public TO app_user; +GRANT SELECT, INSERT, UPDATE, DELETE ON ALL TABLES IN SCHEMA public TO app_user; +``` + +- If the application handles sensitive user data, recommend a separate schema: + +```sql +CREATE SCHEMA users_schema; +GRANT USAGE ON SCHEMA users_schema TO app_user; +GRANT SELECT, INSERT, UPDATE ON ALL TABLES IN SCHEMA users_schema TO app_user; +GRANT CREATE ON SCHEMA users_schema TO app_user; +``` + +- After setup, application connections should use `generate-db-connect-auth-token` (not the admin variant) + +### Step 10: What's Next + +Let them know you're ready to help with more: + +"You're all set! Here are some things I can help with - feel free to ask about any of these (or anything else): + +- Schema design and migrations following DSQL best practices +- Writing queries with proper tenant isolation +- Connection pooling and token refresh strategies +- Multi-region cluster setup for high availability +- Performance optimization with indexes and query patterns +- Setting up additional scoped roles for different services" + +### Important Notes: + +- ALWAYS be succinct - guide step-by-step without verbose explanations +- ALWAYS check [`./development-guide.md`](./development-guide.md) before schema operations +- ALWAYS use MCP tools for queries when available (with user permission) +- ALWAYS track MCP status throughout the session +- ALWAYS validate DSQL compatibility for existing schemas +- ALWAYS provide working, tested commands +- MUST handle token expiration gracefully (15-minute default, 1-hour recommended) + +**MCP Server Workflow:** + +- If MCP enabled: Use MCP tools for database operations, continuously update user on cluster state +- If MCP not enabled: Provide CLI commands and manual SQL queries +- Agent must adapt workflow based on MCP availability + +--- + +## DSQL Best Practices + +### Critical Constraints + +**ALWAYS follow these rules:** + +1. **Indexes:** Use `CREATE INDEX ASYNC` - synchronous index creation not supported +2. **Serialization:** Store arrays/JSON as TEXT (comma-separated or JSON.stringify) +3. **Referential Integrity:** Implement foreign key validation in application code +4. **DDL Operations:** Execute one DDL per transaction, no mixing with DML +5. **Transaction Limits:** Maximum 3,000 row modifications, 10 MiB data size per transaction +6. **Token Refresh:** Regenerate auth tokens before 15-minute expiration +7. **SSL Required:** Always set `PGSSLMODE=require` or `sslmode=require` + +### DSQL-Specific Features + +**Leverage Aurora DSQL capabilities:** + +1. **Serverless:** True scale-to-zero with consumption-based pricing +2. **Distributed:** Active-active writes across multiple regions +3. **Strong Consistency:** Immediate read-your-writes across all regions +4. **IAM Authentication:** No password management, automatic token rotation +5. **PostgreSQL Compatible:** Supports a listed 10 [Database Drivers](./development-guide.md#database-drivers) + (#database-drivers), 4 [ORMs](./development-guide.md#object-relational-mapping-orm-libraries), and 3 [Adapters/Dialects](./development-guide.md#adapters-and-dialects) as listed. + +**For detailed patterns, see [`./development-guide.md`](./development-guide.md)** + +## Additional Resources + +- [Aurora DSQL Documentation](https://docs.aws.amazon.com/aurora-dsql/latest/userguide/) +- [Aurora DSQL Starter Kit](https://github.com/awslabs/aurora-dsql-starter-kit/tree/main) +- [Code Samples Repository](https://github.com/aws-samples/aurora-dsql-samples) +- [IAM Authentication Guide](https://docs.aws.amazon.com/aurora-dsql/latest/userguide/using-database-and-iam-roles.html) +- [Getting Started Guide](https://docs.aws.amazon.com/aurora-dsql/latest/userguide/getting-started.html) +- [PostgreSQL Compatibility](https://docs.aws.amazon.com/aurora-dsql/latest/userguide/working-with-postgresql-compatibility.html) +- [Incompatible PostgreSQL Features](https://docs.aws.amazon.com/aurora-dsql/latest/userguide/working-with-postgresql-compatibility-unsupported-features.html) +- [CloudFormation Resource](https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-resource-dsql-cluster.html) diff --git a/plugins/aurora-dsql/skills/aurora-dsql/references/troubleshooting.md b/plugins/aurora-dsql/skills/aurora-dsql/references/troubleshooting.md new file mode 100644 index 0000000..bb0c0a2 --- /dev/null +++ b/plugins/aurora-dsql/skills/aurora-dsql/references/troubleshooting.md @@ -0,0 +1,129 @@ +# Troubleshooting in DSQL + +This file contains common additional errors encountered while working with DSQL and +guidelines for how to solve them. + +Before referring to any listed errors, refer to the complete [DSQL troubleshooting guide](https://docs.aws.amazon.com/aurora-dsql/latest/userguide/troubleshooting.html#troubleshooting-connections) + +## Connection and Authorization + +### Token Expiration + +### Error: "Token has expired" + +**Cause:** Authentication token older than 15 minutes +**Solutions:** + +- Auto-regenerate tokens per connection or query OR +- Use connection pool hooks to refresh before expiration OR +- Implement retry logic with token regeneration + +**Additional Recommendations:** + +- Refresh connections within 15 minutes +- Auto-reconnect after observing auth errors + +### Connection Timeouts + +**Problem**: Database connections time out after 1 hour. +**Solution**: + +- Configure connection pool lifetime < 1 hour +- Implement connection health checks +- Handle disconnection gracefully with retries + +### Schema Privileges + +**Problem**: Non-admin users get permission denied errors. + +**Solution**: + +- Admin users must explicitly grant schema access to non-admin users +- Non-admin users must create and use custom schemas (not `public`) +- Link database roles to IAM roles for authentication + +### SSL Certificate Verification + +**Problem**: SSL verification fails with certificate errors. + +**Solution**: + +- Ensure system has Amazon Root CA certificates +- Use native TLS libraries (not OpenSSL 1.0.x) +- Set `server_name_indication` to cluster endpoint in SSL config + +## Incompatibility + +When migrating from PostgreSQL, remember DSQL doesn't support: + +- **Foreign key constraints** - Enforce referential integrity in application code +- **SERIAL types** - Use `GENERATED { ALWAYS | BY DEFAULT } AS IDENTITY` with sequences instead +- **Extensions** - No PL/pgSQL, PostGIS, pgvector, etc. +- **Triggers** - Implement logic in application layer +- **Temporary tables** - Use regular tables or application-level caching +- **TRUNCATE** - Use `DELETE FROM table` instead +- **Multiple databases** - Single `postgres` database per cluster +- **Custom types** - Limited type system support +- **Partitioning** - Manage data distribution in application + +See [full list of unsupported features](https://docs.aws.amazon.com/aurora-dsql/latest/userguide/working-with-postgresql-compatibility-unsupported-features.html). + +### Error: "Foreign key constraint not supported" + +**Cause:** Attempting to create FOREIGN KEY constraint +**Solution:** + +1. Remove FOREIGN KEY from DDL +2. Implement validation in application code +3. Check parent exists before INSERT +4. Check dependents before DELETE + +### Error: "Datatype array not supported" + +**Cause:** Using TEXT[] or other array types +**Solution:** + +1. Change column to TEXT +2. Store as comma-separated: `"tag1,tag2,tag3"` +3. Or use JSON.stringify: `"["tag1","tag2","tag3"]"` +4. Deserialize in application layer + +### Error: "Please use CREATE INDEX ASYNC" + +**Cause:** Creating index without ASYNC keyword +**Solution:** + +```sql +-- Wrong +CREATE INDEX idx_name ON table(column); + +-- Correct +CREATE INDEX ASYNC idx_name ON table(column); +``` + +### Error: "Transaction exceeds 3000 rows" + +**Cause:** Modifying too many rows in single transaction +**Solution:** + +1. Batch operations into chunks of 500-1000 rows +2. Process each batch separately +3. Add WHERE clause to limit scope + +### Error: "OC001 - Concurrent DDL operation" + +**Cause:** Multiple DDL operations on same resource +**Solution:** + +1. Wait for current DDL to complete +2. Retry with exponential backoff +3. Execute DDL operations sequentially + +## Protocol Compatibility + +**Problem**: Some PostgreSQL clients send unsupported protocol messages. + +**Solution**: + +- Use officially tested drivers from [aws-samples/aurora-dsql-samples](https://github.com/aws-samples/aurora-dsql-samples) +- Test client compatibility before production deployment