-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathscrubbed_database.sh
More file actions
59 lines (43 loc) · 2.99 KB
/
scrubbed_database.sh
File metadata and controls
59 lines (43 loc) · 2.99 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
#!/bin/bash
set -e
# by: Simon Goring
# Documentation for pg_dump is available at https://www.postgresql.org/docs/current/app-pgdump.html
# This script uses pg_dump to duplicate the whole database to a local version.
# We're logging out to a proper log file here.
exec > >(tee -a /var/log/db-sanitize.log)
exec 2>&1
echo "💡 Starting database sanitization at $(date)"
DATESTAMP=$(date +"%Y-%m-%d")
echo "🔌 Connecting to the primary Neotoma database..."
source /home/app/connect_database.sh
echo "⛁ Dumping the primary database from ${DB_HOST}:${DB_PORT}:"
export PGPASSWORD=$REMOTE_PASSWORD
pg_dump -v -O -C -c --no-owner -x -U $REMOTE_USER -h ${DB_HOST} -p ${DB_PORT} \
--no-subscriptions -T ap.globalmammals -T ap.icesheets -T ap.hydrolakes -N cron -Fp -d neotoma > /home/archives/tempdump.dump
echo "Checking to ensure the dump is stable:"
pg_restore --list /home/archives/tempdump.dump | head -20
echo "🛠 Restoring the database locally"
export PGPASSWORD=$POSTGRES_PASSWORD
psql -U postgres -h localhost -d postgres -c "DROP DATABASE IF EXISTS neotoma;"
psql -U postgres -h localhost -d postgres -c "CREATE DATABASE neotoma;"
psql -U postgres -h localhost -d postgres -c "CREATE EXTENSION IF NOT EXISTS postgis;"
psql -U postgres -h localhost -d postgres -c "CREATE EXTENSION IF NOT EXISTS pg_trgm;"
psql -U postgres -h localhost -d postgres -c "CREATE EXTENSION IF NOT EXISTS intarray;"
psql -U postgres -h localhost -d postgres -c "CREATE EXTENSION IF NOT EXISTS unaccent;"
echo "Restoring database in container. Checking for errors."
psql -U postgres -h localhost -d neotoma < /home/archives/tempdump.dump
echo "🧹 Cleaning up all sensitive data from the database."
psql -U postgres -h localhost -d neotoma -c "UPDATE ti.stewards SET username=SUBSTRING(md5(random()::text) from 1 for 10), pwd=SUBSTRING(md5(random()::text) from 1 for 10);"
psql -U postgres -h localhost -d neotoma -c "UPDATE ndb.contacts SET address=SUBSTRING(md5(random()::text) from 1 for 10), phone=SUBSTRING(md5(random()::text) from 1 for 10), fax=SUBSTRING(md5(random()::text) from 1 for 10), email=SUBSTRING(md5(random()::text) from 1 for 10);"
echo "✍🏼 Creating the final cleaned dump."
PGPASSWORD=postgres pg_dump -C -v -O --no-owner -x -Fp -p 5432 -h localhost -d neotoma -U postgres > /home/archives/neotoma_clean_${DATESTAMP}.sql
rm /home/archives/tempdump.dump
echo "📦 Compressing the dumped database."
tar -zcvf /home/assets/neotoma_clean_${DATESTAMP}.tar.gz -C /home/archives .
echo "💾 Uploading the archive to S3."
aws s3 cp /home/assets/neotoma_clean_${DATESTAMP}.tar.gz s3://neotoma-remote-store/ --content-encoding "application/x-compressed-tar"
aws s3 cp s3://neotoma-remote-store/neotoma_clean_${DATESTAMP}.tar.gz s3://neotoma-remote-store/neotoma_clean_latest.tar.gz --content-encoding "application/x-compressed-tar"
echo "🗑️ Removing temporary files..."
rm /home/archives/neotoma_clean_${DATESTAMP}.sql
rm /home/assets/neotoma_clean_${DATESTAMP}.tar.gz
echo "✔ Database sanitization completed successfully at $(date)"