Skip to content

Commit ede13a4

Browse files
Merge pull request #399 from rustprooflabs/dev
Draft: Merge dev into main
2 parents 0c009a5 + 733ebd7 commit ede13a4

File tree

15 files changed

+360
-322
lines changed

15 files changed

+360
-322
lines changed

Makefile

Lines changed: 0 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,6 @@ RAM=2
3030
# to make unit test results visible at the end.
3131
.PHONY: all
3232
all: docker-exec-region docker-exec-input-file \
33-
docker-exec-replication-w-input-file \
3433
docker-exec-default unit-tests
3534

3635
.PHONY: docker-clean
@@ -115,44 +114,6 @@ docker-exec-input-file: build-run-docker
115114

116115

117116

118-
.PHONE: docker-exec-replication-w-input-file
119-
docker-exec-replication-w-input-file: build-run-docker
120-
# NOTE: This step tests --replication file for an initial load.
121-
# It does **NOT** test the actual replication process for updating data
122-
# using replication mode. Testing actual replication over time in this format
123-
# will not be trivial. The historic file used (2021-01-13) cannot be used
124-
# to seed a replication process, there is a time limit in upstream software
125-
# that requires more recency to the source data. This also cannot simply
126-
# download a file from Geofabrik, as the "latest" file will not have a diff
127-
# to apply so also will not test the actual replication.
128-
#
129-
# Open a PR, Issue, discussion on https://github.com/rustprooflabs/pgosm-flex
130-
# if you have an idea on how to implement this testing functionality.
131-
132-
# copy with arbitrary file name to test --input-file
133-
docker cp tests/data/district-of-columbia-2021-01-13.osm.pbf \
134-
pgosm:/app/output/$(INPUT_FILE_NAME)
135-
136-
# allow files created in later step to be created
137-
docker exec -it pgosm \
138-
chown $(CURRENT_UID):$(CURRENT_GID) /app/output/
139-
# Needed for unit-tests
140-
docker exec -it pgosm \
141-
chown $(CURRENT_UID):$(CURRENT_GID) /app/docker/
142-
143-
# process it, this time without providing the region but directly the filename
144-
docker exec -it \
145-
-e POSTGRES_PASSWORD=mysecretpassword \
146-
-e POSTGRES_USER=postgres \
147-
-u $(CURRENT_UID):$(CURRENT_GID) \
148-
pgosm python3 docker/pgosm_flex.py \
149-
--layerset=minimal \
150-
--ram=$(RAM) \
151-
--replication \
152-
--input-file=/app/output/$(INPUT_FILE_NAME) \
153-
--skip-qgis-style --skip-nested # Make this test run faster
154-
155-
156117
.PHONE: docker-exec-region
157118
docker-exec-region: build-run-docker
158119
# copy for simulating region

db/deploy/README.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
# PgOSM Flex SQL deploy scripts
2+
3+
The scripts in this folder are executed during PgOSM Flex initialization via
4+
the `prepare_osm_schema()` function in `docker/db.py`.
5+
New or removed files in this folder must be adjusted in that function
6+
as appropriate.

db/deploy/replication_functions.sql

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,9 @@
1+
/*
2+
Creates functions used for maintaining data when --replication is used.
13
4+
These functions are also used when using `--update append` mode of
5+
PgOSM Flex.
6+
*/
27
BEGIN;
38

49

docker/db.py

Lines changed: 40 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -210,7 +210,10 @@ def log_pg_details():
210210

211211

212212
def prepare_pgosm_db(skip_qgis_style, db_path, import_mode, schema_name):
213-
"""Runs through series of steps to prepare database for PgOSM.
213+
"""Runs through steps to prepare the target database for PgOSM Flex.
214+
215+
Includes additional preparation for using --replication and --updated=append
216+
modes.
214217
215218
Parameters
216219
--------------------------
@@ -245,6 +248,9 @@ def prepare_pgosm_db(skip_qgis_style, db_path, import_mode, schema_name):
245248
schema_name=schema_name)
246249
run_insert_pgosm_road(db_path=db_path, schema_name=schema_name)
247250

251+
if import_mode.replication_update or import_mode.update == 'append':
252+
osm2pgsql_replication_start()
253+
248254

249255
def start_import(pgosm_region, pgosm_date, srid, language, layerset, git_info,
250256
osm2pgsql_version, import_mode, schema_name, input_file):
@@ -477,7 +483,7 @@ def get_db_conn(conn_string):
477483
return conn
478484

479485

480-
def pgosm_after_import(flex_path):
486+
def pgosm_after_import(flex_path: str) -> bool:
481487
"""Runs post-processing SQL via Lua script.
482488
483489
Layerset logic is established via environment variable, must happen
@@ -508,17 +514,38 @@ def pgosm_after_import(flex_path):
508514

509515

510516
def pgosm_nested_admin_polygons(flex_path: str, schema_name: str):
511-
"""Runs stored procedure to calculate nested admin polygons via psql.
517+
"""Runs two stored procedures to calculate nested admin polygons via psql.
512518
513519
Parameters
514520
----------------------
515521
flex_path : str
516522
schema_name : str
517523
"""
518-
sql_raw = f'CALL {schema_name}.build_nested_admin_polygons();'
524+
# Populate the table
525+
sql_raw_1 = f'CALL {schema_name}.populate_place_polygon_nested();'
519526

520527
conn_string = os.environ['PGOSM_CONN']
521-
cmds = ['psql', '-d', conn_string, '-c', sql_raw]
528+
cmds = ['psql', '-d', conn_string, '-c', sql_raw_1]
529+
LOGGER.info('Populating place_polygon_nested table (osm.populate_place_polygon_nested() )')
530+
output = subprocess.run(cmds,
531+
text=True,
532+
cwd=flex_path,
533+
check=False,
534+
stdout=subprocess.PIPE,
535+
stderr=subprocess.STDOUT)
536+
LOGGER.info(f'Nested polygon output: \n {output.stdout}')
537+
538+
if output.returncode != 0:
539+
err_msg = f'Failed to populate nested polygon data. Return code: {output.returncode}'
540+
LOGGER.error(err_msg)
541+
sys.exit(f'{err_msg} - Check the log output for details.')
542+
543+
544+
# Build the data
545+
sql_raw_2 = f' CALL {schema_name}.build_nested_admin_polygons();'
546+
547+
conn_string = os.environ['PGOSM_CONN']
548+
cmds = ['psql', '-d', conn_string, '-c', sql_raw_2]
522549
LOGGER.info('Building nested polygons... (this can take a while)')
523550
output = subprocess.run(cmds,
524551
text=True,
@@ -537,18 +564,23 @@ def pgosm_nested_admin_polygons(flex_path: str, schema_name: str):
537564

538565
def osm2pgsql_replication_start():
539566
"""Runs pre-replication step to clean out FKs that would prevent updates.
567+
568+
This function is necessary for using `--replication (osm2pgsql-replication)
569+
and `--update append` mode.
540570
"""
541571
LOGGER.info('Prep database to allow data updates.')
542-
# This use of append applies to both osm2pgsql --append and osm2pgsq-replication, not renaming from "append"
543572
sql_raw = 'CALL osm.append_data_start();'
544573

545574
with get_db_conn(conn_string=connection_string()) as conn:
546575
cur = conn.cursor()
547576
cur.execute(sql_raw)
548577

549578

550-
def osm2pgsql_replication_finish(skip_nested):
551-
"""Runs post-replication step to put FKs back and refresh materialied views.
579+
def osm2pgsql_replication_finish(skip_nested: bool):
580+
"""Runs post-replication step to refresh materialized views and rebuild
581+
nested data when appropriate.
582+
583+
Only needed for `--replication`, not used for `--update append` mode.
552584
553585
Parameters
554586
---------------------

docker/geofabrik.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -107,8 +107,10 @@ def set_date_from_metadata(pbf_file: str):
107107
os.environ['PBF_TIMESTAMP'] = meta_timestamp
108108

109109

110-
def pbf_download_needed(pbf_file_with_date: str, md5_file_with_date: str,
111-
pgosm_date: str) -> bool:
110+
def pbf_download_needed(pbf_file_with_date: str,
111+
md5_file_with_date: str,
112+
pgosm_date: str
113+
) -> bool:
112114
"""Decides if the PBF/MD5 files need to be downloaded.
113115
114116
Parameters
@@ -123,6 +125,8 @@ def pbf_download_needed(pbf_file_with_date: str, md5_file_with_date: str,
123125
"""
124126
logger = logging.getLogger('pgosm-flex')
125127
# If the PBF file exists, check for the MD5 file too.
128+
logger.debug(f'Checking for PBF File: {pbf_file_with_date}')
129+
126130
if os.path.exists(pbf_file_with_date):
127131
logger.info(f'PBF File exists {pbf_file_with_date}')
128132

docker/helpers.py

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -109,8 +109,9 @@ def verify_checksum(md5_file: str, path: str):
109109
logger.debug('md5sum validated')
110110

111111

112-
def set_env_vars(region, subregion, srid, language, pgosm_date, layerset,
113-
layerset_path, replication, schema_name):
112+
def set_env_vars(region: str, subregion: str, srid: str, language: str,
113+
pgosm_date: str, layerset: str,
114+
layerset_path: str, schema_name: str, skip_nested: bool):
114115
"""Sets environment variables needed by PgOSM Flex. Also creates DB
115116
record in `osm.pgosm_flex` table.
116117
@@ -122,11 +123,11 @@ def set_env_vars(region, subregion, srid, language, pgosm_date, layerset,
122123
language : str
123124
pgosm_date : str
124125
layerset : str
126+
Name of layerset matching the INI filename.
125127
layerset_path : str
126128
str when set, or None
127-
replication : bool
128-
Indicates when osm2pgsql-replication is used
129129
schema_name : str
130+
skip_nested : bool
130131
"""
131132
logger = logging.getLogger('pgosm-flex')
132133
logger.debug('Ensuring env vars are not set from prior run')
@@ -159,6 +160,7 @@ def set_env_vars(region, subregion, srid, language, pgosm_date, layerset,
159160
pgosm_region = get_region_combined(region, subregion)
160161
logger.debug(f'PGOSM_REGION_COMBINED: {pgosm_region}')
161162

163+
os.environ['SKIP_NESTED'] = str(skip_nested)
162164

163165

164166
def get_region_combined(region: str, subregion: str) -> str:
@@ -225,7 +227,7 @@ def get_git_info(tag_only: bool=False) -> str:
225227

226228

227229
def unset_env_vars():
228-
"""Unsets environment variables used by PgOSM Flex.
230+
"""Unset environment variables used by PgOSM Flex.
229231
230232
Does not pop POSTGRES_DB on purpose to allow non-Docker operation.
231233
"""
@@ -239,6 +241,7 @@ def unset_env_vars():
239241
os.environ.pop('PGOSM_CONN', None)
240242
os.environ.pop('PGOSM_CONN_PG', None)
241243
os.environ.pop('SCHEMA_NAME', None)
244+
os.environ.pop('SKIP_NESTED', None)
242245

243246

244247
class ImportMode():
@@ -310,17 +313,17 @@ def okay_to_run(self, prior_import: dict) -> bool:
310313
"""
311314
self.logger.debug(f'Checking if it is okay to run...')
312315
if self.force:
313-
self.logger.warn(f'Using --force, kiss existing data goodbye')
316+
self.logger.warning('Using --force, kiss existing data goodbye.')
314317
return True
315318

316319
# If no prior imports, do not require force
317320
if len(prior_import) == 0:
318-
self.logger.debug(f'No prior import found, okay to proceed.')
321+
self.logger.debug('No prior import found, okay to proceed.')
319322
return True
320323

321324
prior_replication = prior_import['replication']
322325

323-
# Check git version against latest.
326+
# Check PgOSM version using Git tags
324327
# If current version is lower than prior version from latest import, stop.
325328
prior_import_version = prior_import['pgosm_flex_version_no_hash']
326329
git_tag = get_git_info(tag_only=True)
@@ -345,6 +348,9 @@ def okay_to_run(self, prior_import: dict) -> bool:
345348
self.logger.debug('Okay to proceed with replication')
346349
return True
347350

351+
if self.update == 'append':
352+
return True
353+
348354
msg = 'Prior data exists in the osm schema and --force was not used.'
349355
self.logger.error(msg)
350356
return False

docker/pgosm_flex.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,8 @@ def run_pgosm_flex(ram, region, subregion, debug, force,
9595
region = input_file
9696

9797
helpers.set_env_vars(region, subregion, srid, language, pgosm_date,
98-
layerset, layerset_path, replication, schema_name)
98+
layerset, layerset_path, schema_name,
99+
skip_nested)
99100
db.wait_for_postgres()
100101
if force and db.pg_conn_parts()['pg_host'] == 'localhost':
101102
msg = 'Using --force with the built-in database is unnecessary.'
@@ -267,7 +268,6 @@ def run_replication_update(skip_nested, flex_path):
267268
"""
268269
logger = logging.getLogger('pgosm-flex')
269270
conn_string = db.connection_string()
270-
db.osm2pgsql_replication_start()
271271

272272
update_cmd = """
273273
osm2pgsql-replication update -d $PGOSM_CONN \
@@ -531,10 +531,13 @@ def run_post_processing(flex_path, skip_nested, import_mode, schema_name):
531531
logger = logging.getLogger('pgosm-flex')
532532

533533
if not import_mode.run_post_sql:
534-
logger.info('Running with --update append: Skipping post-processing SQL')
534+
msg = 'Running with --update append: Skipping post-processing SQL.'
535+
msg += ' Running osm2pgsql_replication_finish() instead.'
536+
logger.info(msg)
537+
db.osm2pgsql_replication_finish(skip_nested=skip_nested)
535538
return True
536539

537-
post_processing_sql = db.pgosm_after_import(flex_path)
540+
post_processing_sql = db.pgosm_after_import(flex_path=flex_path)
538541

539542
if skip_nested:
540543
logger.info('Skipping calculating nested polygons')

docker/tests/test_geofabrik.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,10 @@
66
REGION_US = 'north-america/us'
77
SUBREGION_DC = 'district-of-columbia'
88
LAYERSET = 'default'
9-
PGOSM_DATE = '2021-12-02'
9+
PGOSM_DATE = '2021-01-13'
10+
11+
PBF_FILE_WITH_DATE = f'/app/tests/data/district-of-columbia-{PGOSM_DATE}.osm.pbf'
12+
MD5_FILE_WITH_DATE = f'/app/tests/data/district-of-columbia-{PGOSM_DATE}.osm.pbf.md5'
1013

1114

1215
class GeofabrikTests(unittest.TestCase):
@@ -19,8 +22,8 @@ def setUp(self):
1922
pgosm_date=PGOSM_DATE,
2023
layerset=LAYERSET,
2124
layerset_path=None,
22-
replication=False,
23-
schema_name='osm')
25+
schema_name='osm',
26+
skip_nested=True)
2427

2528

2629
def tearDown(self):
@@ -34,15 +37,15 @@ def test_get_region_filename_returns_subregion_when_exists(self):
3437
def test_get_region_filename_returns_region_when_subregion_None(self):
3538
# Override Subregion to None
3639
helpers.unset_env_vars()
37-
helpers.set_env_vars(region='north-america/us',
40+
helpers.set_env_vars(region=REGION_US,
3841
subregion=None,
3942
srid=3857,
4043
language=None,
4144
pgosm_date=PGOSM_DATE,
4245
layerset=LAYERSET,
4346
layerset_path=None,
44-
replication=False,
45-
schema_name='osm')
47+
schema_name='osm',
48+
skip_nested=True)
4649

4750
result = geofabrik.get_region_filename()
4851
expected = f'{REGION_US}-latest.osm.pbf'
@@ -64,7 +67,6 @@ def test_get_pbf_url_returns_proper_with_region_and_subregion(self):
6467

6568
def test_pbf_download_needed_returns_boolean(self):
6669
pgosm_date = geofabrik.helpers.get_today()
67-
region_filename = geofabrik.get_region_filename()
6870
expected = bool
6971
result = geofabrik.pbf_download_needed(pbf_file_with_date='does-not-matter',
7072
md5_file_with_date='not-a-file',
@@ -73,11 +75,9 @@ def test_pbf_download_needed_returns_boolean(self):
7375

7476
def test_pbf_download_needed_returns_true_when_file_not_exists(self):
7577
pgosm_date = geofabrik.helpers.get_today()
76-
region_filename = geofabrik.get_region_filename()
7778
expected = True
7879
result = geofabrik.pbf_download_needed(pbf_file_with_date='does-not-matter',
7980
md5_file_with_date='not-a-file',
8081
pgosm_date=pgosm_date)
8182
self.assertEqual(expected, result)
8283

83-

0 commit comments

Comments
 (0)