From 0bbfc00661d59ce55603d70ea304b73adf29abe8 Mon Sep 17 00:00:00 2001
From: thomasallen <tom.sco@gmail.com>
Date: Mon, 5 Jan 2026 10:18:15 -0800
Subject: [PATCH 1/8] add external_identifier column

---
 adsrefpipe/models.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/adsrefpipe/models.py b/adsrefpipe/models.py
index 5e2c725..046e281 100755
--- a/adsrefpipe/models.py
+++ b/adsrefpipe/models.py
@@ -2,7 +2,7 @@
 
 
 from sqlalchemy import Integer, String, Column, ForeignKey, DateTime, func, Numeric, ForeignKeyConstraint
-from sqlalchemy.dialects.postgresql import JSONB
+from sqlalchemy.dialects.postgresql import JSONB, ARRAY
 from sqlalchemy.ext.declarative import declarative_base
 
 
@@ -213,6 +213,7 @@ class ResolvedReference(Base):
     bibcode = Column(String)
     score = Column(Numeric)
     reference_raw = Column(String)
+    external_identifier = Column(ARRAY(String))
 
     def __init__(self, history_id: int, item_num: int, reference_str: str, bibcode: str, score: float, reference_raw: str):
         """
@@ -224,6 +225,7 @@ def __init__(self, history_id: int, item_num: int, reference_str: str, bibcode:
         :param bibcode: resolved bibcode
         :param score: confidence score of the resolved reference
         :param reference_raw: raw reference string
+        :param external_identifier: list of external identifiers associated with the reference, e.g. ["doi:...", "arxiv:...", "ascl:..."]
         """
         self.history_id = history_id
         self.item_num = item_num
@@ -231,6 +233,7 @@ def __init__(self, history_id: int, item_num: int, reference_str: str, bibcode:
         self.bibcode = bibcode
         self.score = score
         self.reference_raw = reference_raw
+        self.external_identifier = external_identifier
 
     def toJSON(self) -> dict:
         """
@@ -244,7 +247,8 @@ def toJSON(self) -> dict:
             'bibcode': self.bibcode,
             'score': self.score,
             'item_num': self.item_num,
-            **({'reference_raw': self.reference_raw} if self.reference_raw else {})
+            **({'reference_raw': self.reference_raw} if self.reference_raw else {}),
+            'external_identifier': self.external_identifier
         }
 
 

From a66c1a673c124db889a5e752166784bbbb456806 Mon Sep 17 00:00:00 2001
From: thomasallen <tom.sco@gmail.com>
Date: Mon, 5 Jan 2026 11:21:17 -0800
Subject: [PATCH 2/8] alembic upgrade add external identifier

---
 .../08ca70bd6f5f_add_external_identifier.py   | 24 +++++++++++++++++++
 1 file changed, 24 insertions(+)
 create mode 100644 alembic/versions/08ca70bd6f5f_add_external_identifier.py

diff --git a/alembic/versions/08ca70bd6f5f_add_external_identifier.py b/alembic/versions/08ca70bd6f5f_add_external_identifier.py
new file mode 100644
index 0000000..62e9caa
--- /dev/null
+++ b/alembic/versions/08ca70bd6f5f_add_external_identifier.py
@@ -0,0 +1,24 @@
+"""add_external_identifier
+
+Revision ID: 08ca70bd6f5f
+Revises: e3d6e15c3b8c
+Create Date: 2026-01-05 11:16:27.454389
+
+"""
+from alembic import op
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision = '08ca70bd6f5f'
+down_revision = 'e3d6e15c3b8c'
+branch_labels = None
+depends_on = None
+
+
+def upgrade():
+    pass
+
+
+def downgrade():
+    pass

From 4ca1862f96abff2ade6e8575bb212f7420865513 Mon Sep 17 00:00:00 2001
From: thomasallen <tom.sco@gmail.com>
Date: Mon, 5 Jan 2026 14:35:31 -0800
Subject: [PATCH 3/8] alembic update external_identifier

---
 alembic/versions/08ca70bd6f5f_add_external_identifier.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/alembic/versions/08ca70bd6f5f_add_external_identifier.py b/alembic/versions/08ca70bd6f5f_add_external_identifier.py
index 62e9caa..3a360a0 100644
--- a/alembic/versions/08ca70bd6f5f_add_external_identifier.py
+++ b/alembic/versions/08ca70bd6f5f_add_external_identifier.py
@@ -7,6 +7,7 @@
 """
 from alembic import op
 import sqlalchemy as sa
+from sqlalchemy.dialects import postgresql
 
 
 # revision identifiers, used by Alembic.
@@ -17,8 +18,12 @@
 
 
 def upgrade():
-    pass
+    op.add_column('resolved_reference',
+                    sa.Column("external_identifier",
+                    postgresql.ARRAY(sa.String()))
+                    )
 
 
 def downgrade():
-    pass
+    op.drop_column('resolved_reference', 'external_identifier')
+

From 82e66efd4a3593eebf7c6f4a1125a988f726733f Mon Sep 17 00:00:00 2001
From: thomasallen <tom.sco@gmail.com>
Date: Thu, 8 Jan 2026 11:48:19 -0800
Subject: [PATCH 4/8] add external identifier to unittest mock data

---
 adsrefpipe/tests/unittests/test_app.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/adsrefpipe/tests/unittests/test_app.py b/adsrefpipe/tests/unittests/test_app.py
index c21cff3..80fce1d 100644
--- a/adsrefpipe/tests/unittests/test_app.py
+++ b/adsrefpipe/tests/unittests/test_app.py
@@ -811,7 +811,8 @@ def test_populate_tables(self):
                 "bibcode": "2011LRR....14....2U",
                 "refstring": "J.-P. Uzan, Varying constants, gravitation and cosmology, Living Rev. Rel. 14 (2011) 2, [1009.5514]. ",
                 "refraw": "J.-P. Uzan, Varying constants, gravitation and cosmology, Living Rev. Rel. 14 (2011) 2, [1009.5514]. ",
-                "id": "H1I1"
+                "id": "H1I1",
+                "ext_id": "ExtID1"
             },
             {
                 "score": "1.0",
@@ -819,6 +820,7 @@ def test_populate_tables(self):
                 "refstring": "C. J. A. P. Martins, The status of varying constants: A review of the physics, searches and implications, 1709.02923.",
                 "refraw": "C. J. A. P. Martins, The status of varying constants: A review of the physics, searches and implications, 1709.02923.",
                 "id": "H1I2",
+                "ext_id": "ExtID2"
             }
         ]
         arXiv_stubdata_dir = os.path.join(self.test_dir, 'unittests/stubdata/txt/arXiv/0/')

From 0835c731a9e56ebbb17fbad143e31d5a52f34e58 Mon Sep 17 00:00:00 2001
From: thomasallen <tom.sco@gmail.com>
Date: Thu, 15 Jan 2026 10:30:03 -0800
Subject: [PATCH 5/8] update for unit tests

---
 adsrefpipe/app.py                        | 12 ++++
 adsrefpipe/models.py                     |  4 +-
 adsrefpipe/tests/unittests/test_app.py   | 84 ++++++++++++++++--------
 adsrefpipe/tests/unittests/test_tasks.py |  2 +
 4 files changed, 72 insertions(+), 30 deletions(-)

diff --git a/adsrefpipe/app.py b/adsrefpipe/app.py
index 1a587cb..d3f12a8 100755
--- a/adsrefpipe/app.py
+++ b/adsrefpipe/app.py
@@ -22,6 +22,15 @@
 
 from texttable import Texttable
 
+def _ensure_list(x):
+    if x is None:
+        return None
+    # treat strings as scalars, not iterables
+    if isinstance(x, (str, bytes)):
+        return [x]
+    # already list-like
+    return list(x)
+
 class ADSReferencePipelineCelery(ADSCelery):
     """
     celery-based pipeline for processing and resolving references
@@ -306,6 +315,7 @@ def query_resolved_reference_tbl(self, history_id_list: List = None) -> List:
 
         return results
 
+
     def diagnostic_query(self, bibcode_list: List = None, source_filename_list: List = None) -> List:
         """
         perform a diagnostic query to retrieve combined reference records
@@ -315,6 +325,8 @@ def diagnostic_query(self, bibcode_list: List = None, source_filename_list: List
         :return: List of combined records from multiple tables
         """
         results = []
+        bibcode_list = _ensure_list(bibcode_list)
+        source_filename_list = _ensure_list(source_filename_list)
 
         reference_source = self.query_reference_source_tbl(bibcode_list, source_filename_list)
         processed_history = self.query_processed_history_tbl(bibcode_list, source_filename_list)
diff --git a/adsrefpipe/models.py b/adsrefpipe/models.py
index 046e281..b2db105 100755
--- a/adsrefpipe/models.py
+++ b/adsrefpipe/models.py
@@ -215,7 +215,7 @@ class ResolvedReference(Base):
     reference_raw = Column(String)
     external_identifier = Column(ARRAY(String))
 
-    def __init__(self, history_id: int, item_num: int, reference_str: str, bibcode: str, score: float, reference_raw: str):
+    def __init__(self, history_id: int, item_num: int, reference_str: str, bibcode: str, score: float, reference_raw: str, external_identifier: list = None):
         """
         initializes a resolved reference object
 
@@ -233,7 +233,7 @@ def __init__(self, history_id: int, item_num: int, reference_str: str, bibcode:
         self.bibcode = bibcode
         self.score = score
         self.reference_raw = reference_raw
-        self.external_identifier = external_identifier
+        self.external_identifier = external_identifier or []
 
     def toJSON(self) -> dict:
         """
diff --git a/adsrefpipe/tests/unittests/test_app.py b/adsrefpipe/tests/unittests/test_app.py
index 80fce1d..f118425 100644
--- a/adsrefpipe/tests/unittests/test_app.py
+++ b/adsrefpipe/tests/unittests/test_app.py
@@ -30,6 +30,8 @@
 from adsrefpipe.refparsers.handler import verify
 from adsrefpipe.tests.unittests.stubdata.dbdata import actions_records, parsers_records
 
+import testing.postgresql
+
 
 class TestDatabase(unittest.TestCase):
 
@@ -39,18 +41,26 @@ class TestDatabase(unittest.TestCase):
 
     maxDiff = None
 
-    postgresql_url_dict = {
-        'port': 5432,
-        'host': '127.0.0.1',
-        'user': 'postgres',
-        'database': 'postgres'
-    }
-    postgresql_url = 'postgresql://{user}:{user}@{host}:{port}/{database}' \
-        .format(user=postgresql_url_dict['user'],
-                host=postgresql_url_dict['host'],
-                port=postgresql_url_dict['port'],
-                database=postgresql_url_dict['database']
-                )
+    # postgresql_url_dict = {
+    #     'port': 5432,
+    #     'host': '127.0.0.1',
+    #     'user': 'postgres',
+    #     'database': 'postgres'
+    # }
+    # postgresql_url = 'postgresql://{user}:{user}@{host}:{port}/{database}' \
+    #     .format(user=postgresql_url_dict['user'],
+    #             host=postgresql_url_dict['host'],
+    #             port=postgresql_url_dict['port'],
+    #             database=postgresql_url_dict['database']
+    #             )
+
+    _postgresql = testing.postgresql.Postgresql()
+    postgresql_url = _postgresql.url()
+
+    @classmethod
+    def tearDownClass(cls):
+        super().tearDownClass()
+        cls._postgresql.stop()
 
     def setUp(self):
         self.test_dir = os.path.join(project_home, 'adsrefpipe/tests')
@@ -117,8 +127,13 @@ def add_stub_data(self):
         ]
 
         with self.app.session_scope() as session:
-            session.bulk_save_objects(actions_records)
-            session.bulk_save_objects(parsers_records)
+            session.query(Action).delete()
+            session.query(Parser).delete()
+            session.commit()
+            if session.query(Action).count() == 0:
+                session.bulk_save_objects(actions_records)
+            if session.query(Parser).count() == 0:
+                session.bulk_save_objects(parsers_records)
             session.commit()
 
             for i, (a_reference,a_history) in enumerate(zip(reference_source,processed_history)):
@@ -745,18 +760,26 @@ class TestDatabaseNoStubdata(unittest.TestCase):
 
     maxDiff = None
 
-    postgresql_url_dict = {
-        'port': 5432,
-        'host': '127.0.0.1',
-        'user': 'postgres',
-        'database': 'postgres'
-    }
-    postgresql_url = 'postgresql://{user}:{user}@{host}:{port}/{database}' \
-        .format(user=postgresql_url_dict['user'],
-                host=postgresql_url_dict['host'],
-                port=postgresql_url_dict['port'],
-                database=postgresql_url_dict['database']
-                )
+    # postgresql_url_dict = {
+    #     'port': 5432,
+    #     'host': '127.0.0.1',
+    #     'user': 'postgres',
+    #     'database': 'postgres'
+    # }
+    # postgresql_url = 'postgresql://{user}:{user}@{host}:{port}/{database}' \
+    #     .format(user=postgresql_url_dict['user'],
+    #             host=postgresql_url_dict['host'],
+    #             port=postgresql_url_dict['port'],
+    #             database=postgresql_url_dict['database']
+    #             )
+
+    _postgresql = testing.postgresql.Postgresql()
+    postgresql_url = _postgresql.url()
+
+    @classmethod
+    def tearDownClass(cls):
+        super().tearDownClass()
+        cls._postgresql.stop()
 
     def setUp(self):
         self.test_dir = os.path.join(project_home, 'adsrefpipe/tests')
@@ -825,8 +848,13 @@ def test_populate_tables(self):
         ]
         arXiv_stubdata_dir = os.path.join(self.test_dir, 'unittests/stubdata/txt/arXiv/0/')
         with self.app.session_scope() as session:
-            session.bulk_save_objects(actions_records)
-            session.bulk_save_objects(parsers_records)
+            session.query(Action).delete()
+            session.query(Parser).delete()
+            session.commit()
+            if session.query(Action).count() == 0:
+                session.bulk_save_objects(actions_records)
+            if session.query(Parser).count() == 0:
+                session.bulk_save_objects(parsers_records)
             session.commit()
 
             references = self.app.populate_tables_pre_resolved_initial_status(
diff --git a/adsrefpipe/tests/unittests/test_tasks.py b/adsrefpipe/tests/unittests/test_tasks.py
index fb4ee58..153f041 100755
--- a/adsrefpipe/tests/unittests/test_tasks.py
+++ b/adsrefpipe/tests/unittests/test_tasks.py
@@ -74,6 +74,8 @@ def add_stub_data(self):
         ]
 
         with self.app.session_scope() as session:
+            session.query(Action).delete()
+            session.query(Parser).delete()
             session.bulk_save_objects(actions_records)
             session.bulk_save_objects(parsers_records)
             session.commit()

From 7d7a9a12a66d8089b3b1bcfc4cdd0442455a3f15 Mon Sep 17 00:00:00 2001
From: thomasallen <tom.sco@gmail.com>
Date: Tue, 27 Jan 2026 10:55:05 -0800
Subject: [PATCH 6/8] check external_identifier column

---
 adsrefpipe/tests/unittests/test_app.py | 368 +++++++++++++++----------
 1 file changed, 227 insertions(+), 141 deletions(-)

diff --git a/adsrefpipe/tests/unittests/test_app.py b/adsrefpipe/tests/unittests/test_app.py
index f118425..8c21957 100644
--- a/adsrefpipe/tests/unittests/test_app.py
+++ b/adsrefpipe/tests/unittests/test_app.py
@@ -33,6 +33,11 @@
 import testing.postgresql
 
 
+def _get_external_identifier(rr_obj):
+    """Return external_identifier from a ResolvedReference ORM object."""
+    return getattr(rr_obj, "external_identifier", None)
+
+
 class TestDatabase(unittest.TestCase):
 
     """
@@ -41,19 +46,6 @@ class TestDatabase(unittest.TestCase):
 
     maxDiff = None
 
-    # postgresql_url_dict = {
-    #     'port': 5432,
-    #     'host': '127.0.0.1',
-    #     'user': 'postgres',
-    #     'database': 'postgres'
-    # }
-    # postgresql_url = 'postgresql://{user}:{user}@{host}:{port}/{database}' \
-    #     .format(user=postgresql_url_dict['user'],
-    #             host=postgresql_url_dict['host'],
-    #             port=postgresql_url_dict['port'],
-    #             database=postgresql_url_dict['database']
-    #             )
-
     _postgresql = testing.postgresql.Postgresql()
     postgresql_url = _postgresql.url()
 
@@ -85,9 +77,12 @@ def add_stub_data(self):
         self.arXiv_stubdata_dir = os.path.join(self.test_dir, 'unittests/stubdata/txt/arXiv/0/')
 
         reference_source = [
-            ('0001arXiv.........Z',os.path.join(self.arXiv_stubdata_dir,'00001.raw'),os.path.join(self.arXiv_stubdata_dir,'00001.raw.result'),'arXiv'),
-            ('0002arXiv.........Z',os.path.join(self.arXiv_stubdata_dir,'00002.raw'),os.path.join(self.arXiv_stubdata_dir,'00002.raw.result'),'arXiv'),
-            ('0003arXiv.........Z',os.path.join(self.arXiv_stubdata_dir,'00003.raw'),os.path.join(self.arXiv_stubdata_dir,'00003.raw.result'),'arXiv')
+            ('0001arXiv.........Z', os.path.join(self.arXiv_stubdata_dir, '00001.raw'),
+             os.path.join(self.arXiv_stubdata_dir, '00001.raw.result'), 'arXiv'),
+            ('0002arXiv.........Z', os.path.join(self.arXiv_stubdata_dir, '00002.raw'),
+             os.path.join(self.arXiv_stubdata_dir, '00002.raw.result'), 'arXiv'),
+            ('0003arXiv.........Z', os.path.join(self.arXiv_stubdata_dir, '00003.raw'),
+             os.path.join(self.arXiv_stubdata_dir, '00003.raw.result'), 'arXiv')
         ]
 
         processed_history = [
@@ -96,33 +91,41 @@ def add_stub_data(self):
             ('2020-04-03 18:08:32', '2020-05-11 11:14:28', '128', '109')
         ]
 
+        # Add external identifiers for each resolved reference to verify persistence in DB
+        # Each tuple: (reference_str, bibcode, score, external_identifier_list)
         resolved_reference = [
             [
-                ('J.-P. Uzan, Varying constants, gravitation and cosmology, Living Rev. Rel. 14 (2011) 2, [1009.5514]. ','2011LRR....14....2U',1.0),
-                ('C. J. A. P. Martins, The status of varying constants: A review of the physics, searches and implications, 1709.02923.','2017RPPh...80l6902M',1.0)
+                ('J.-P. Uzan, Varying constants, gravitation and cosmology, Living Rev. Rel. 14 (2011) 2, [1009.5514]. ',
+                 '2011LRR....14....2U', 1.0, ['arxiv:1009.5514']),
+                ('C. J. A. P. Martins, The status of varying constants: A review of the physics, searches and implications, 1709.02923.',
+                 '2017RPPh...80l6902M', 1.0, ['arxiv:1709.02923'])
             ],
             [
-                ('Alsubai, K. A., Parley, N. R., Bramich, D. M., et al. 2011, MNRAS, 417, 709.','2011MNRAS.417..709A',1.0),
-                ('Arcangeli, J., Desert, J.-M., Parmentier, V., et al. 2019, A&A, 625, A136   ','2019A&A...625A.136A',1.0)
+                ('Alsubai, K. A., Parley, N. R., Bramich, D. M., et al. 2011, MNRAS, 417, 709.',
+                 '2011MNRAS.417..709A', 1.0, ['doi:10.0000/mnras.417.709']),
+                ('Arcangeli, J., Desert, J.-M., Parmentier, V., et al. 2019, A&A, 625, A136   ',
+                 '2019A&A...625A.136A', 1.0, ['doi:10.0000/aa.625.A136'])
             ],
             [
-                ('Abellan, F. J., Indebetouw, R., Marcaide, J. M., et al. 2017, ApJL, 842, L24','2017ApJ...842L..24A',1.0),
-                ('Ackermann, M., Albert, A., Atwood, W. B., et al. 2016, A&A, 586, A71 ','2016A&A...586A..71A',1.0)
+                ('Abellan, F. J., Indebetouw, R., Marcaide, J. M., et al. 2017, ApJL, 842, L24',
+                 '2017ApJ...842L..24A', 1.0, ['ascl:1701.001']),
+                ('Ackermann, M., Albert, A., Atwood, W. B., et al. 2016, A&A, 586, A71 ',
+                 '2016A&A...586A..71A', 1.0, ['doi:10.0000/aa.586.A71'])
             ],
         ]
 
         compare_classic = [
             [
-                ('2010arXiv1009.5514U',1,'DIFF'),
-                ('2017arXiv170902923M',1,'DIFF')
+                ('2010arXiv1009.5514U', 1, 'DIFF'),
+                ('2017arXiv170902923M', 1, 'DIFF')
             ],
             [
-                ('2011MNRAS.417..709A',1,'MATCH'),
-                ('2019A&A...625A.136A',1,'MATCH')
+                ('2011MNRAS.417..709A', 1, 'MATCH'),
+                ('2019A&A...625A.136A', 1, 'MATCH')
             ],
             [
-                ('2017ApJ...842L..24A',1,'MATCH'),
-                ('2016A&A...586A..71A',1,'MATCH')
+                ('2017ApJ...842L..24A', 1, 'MATCH'),
+                ('2016A&A...586A..71A', 1, 'MATCH')
             ]
         ]
 
@@ -136,44 +139,56 @@ def add_stub_data(self):
                 session.bulk_save_objects(parsers_records)
             session.commit()
 
-            for i, (a_reference,a_history) in enumerate(zip(reference_source,processed_history)):
-                reference_record = ReferenceSource(bibcode=a_reference[0],
-                                             source_filename=a_reference[1],
-                                             resolved_filename=a_reference[2],
-                                             parser_name=a_reference[3])
+            for i, (a_reference, a_history) in enumerate(zip(reference_source, processed_history)):
+                reference_record = ReferenceSource(
+                    bibcode=a_reference[0],
+                    source_filename=a_reference[1],
+                    resolved_filename=a_reference[2],
+                    parser_name=a_reference[3]
+                )
                 bibcode, source_filename = self.app.insert_reference_source_record(session, reference_record)
                 self.assertTrue(bibcode == a_reference[0])
                 self.assertTrue(source_filename == a_reference[1])
 
-                history_record = ProcessedHistory(bibcode=bibcode,
-                                         source_filename=source_filename,
-                                         source_modified=a_history[0],
-                                         status=Action().get_status_new(),
-                                         date=a_history[1],
-                                         total_ref=a_history[2])
+                history_record = ProcessedHistory(
+                    bibcode=bibcode,
+                    source_filename=source_filename,
+                    source_modified=a_history[0],
+                    status=Action().get_status_new(),
+                    date=a_history[1],
+                    total_ref=a_history[2]
+                )
                 history_id = self.app.insert_history_record(session, history_record)
                 self.assertTrue(history_id != -1)
 
                 resolved_records = []
                 compare_records = []
-                for j, (service,classic) in enumerate(zip(resolved_reference[i],compare_classic[i])):
-                    resolved_record = ResolvedReference(history_id=history_id,
-                                               item_num=j+1,
-                                               reference_str=service[0],
-                                               bibcode=service[1],
-                                               score=service[2],
-                                               reference_raw=service[0])
+                for j, (service, classic) in enumerate(zip(resolved_reference[i], compare_classic[i])):
+                    refstr, bib, sc, ext_ids = service
+                    resolved_record = ResolvedReference(
+                        history_id=history_id,
+                        item_num=j + 1,
+                        reference_str=refstr,
+                        bibcode=bib,
+                        score=sc,
+                        reference_raw=refstr,
+                        external_identifier=ext_ids
+                    )
                     resolved_records.append(resolved_record)
-                    compare_record = CompareClassic(history_id=history_id,
-                                             item_num=j+1,
-                                             bibcode=classic[0],
-                                             score=classic[1],
-                                             state=classic[2])
+
+                    compare_record = CompareClassic(
+                        history_id=history_id,
+                        item_num=j + 1,
+                        bibcode=classic[0],
+                        score=classic[1],
+                        state=classic[2]
+                    )
                     compare_records.append(compare_record)
+
                 success = self.app.insert_resolved_reference_records(session, resolved_records)
-                self.assertTrue(success == True)
+                self.assertTrue(success is True)
                 success = self.app.insert_compare_records(session, compare_records)
-                self.assertTrue(success == True)
+                self.assertTrue(success is True)
                 session.commit()
 
     def test_query_reference_tbl(self):
@@ -181,8 +196,8 @@ def test_query_reference_tbl(self):
         result_expected = [
             {
                 'bibcode': '0001arXiv.........Z',
-                'source_filename': os.path.join(self.arXiv_stubdata_dir,'00001.raw'),
-                'resolved_filename': os.path.join(self.arXiv_stubdata_dir,'00001.raw.result'),
+                'source_filename': os.path.join(self.arXiv_stubdata_dir, '00001.raw'),
+                'resolved_filename': os.path.join(self.arXiv_stubdata_dir, '00001.raw.result'),
                 'parser_name': 'arXiv',
                 'num_runs': 1,
                 'last_run_date': '2020-05-11 11:13:36',
@@ -190,8 +205,8 @@ def test_query_reference_tbl(self):
                 'last_run_num_resolved_references': 2
             }, {
                 'bibcode': '0002arXiv.........Z',
-                'source_filename': os.path.join(self.arXiv_stubdata_dir,'00002.raw'),
-                'resolved_filename': os.path.join(self.arXiv_stubdata_dir,'00002.raw.result'),
+                'source_filename': os.path.join(self.arXiv_stubdata_dir, '00002.raw'),
+                'resolved_filename': os.path.join(self.arXiv_stubdata_dir, '00002.raw.result'),
                 'parser_name': 'arXiv',
                 'num_runs': 1,
                 'last_run_date': '2020-05-11 11:13:53',
@@ -199,8 +214,8 @@ def test_query_reference_tbl(self):
                 'last_run_num_resolved_references': 2
             }, {
                 'bibcode': '0003arXiv.........Z',
-                'source_filename': os.path.join(self.arXiv_stubdata_dir,'00003.raw'),
-                'resolved_filename': os.path.join(self.arXiv_stubdata_dir,'00003.raw.result'),
+                'source_filename': os.path.join(self.arXiv_stubdata_dir, '00003.raw'),
+                'resolved_filename': os.path.join(self.arXiv_stubdata_dir, '00003.raw.result'),
                 'parser_name': 'arXiv',
                 'num_runs': 1,
                 'last_run_date': '2020-05-11 11:14:28',
@@ -215,9 +230,11 @@ def test_query_reference_tbl(self):
         self.assertTrue(result_expected == result_got)
 
         # test querying filenames
-        filenames = [os.path.join(self.arXiv_stubdata_dir,'00001.raw'),
-                     os.path.join(self.arXiv_stubdata_dir,'00002.raw'),
-                     os.path.join(self.arXiv_stubdata_dir,'00003.raw')]
+        filenames = [
+            os.path.join(self.arXiv_stubdata_dir, '00001.raw'),
+            os.path.join(self.arXiv_stubdata_dir, '00002.raw'),
+            os.path.join(self.arXiv_stubdata_dir, '00003.raw')
+        ]
         result_got = self.app.diagnostic_query(source_filename_list=filenames)
         self.assertTrue(result_expected == result_got)
 
@@ -236,11 +253,13 @@ def test_query_reference_tbl_when_non_exits(self):
         self.assertTrue(self.app.diagnostic_query(bibcode_list=['0004arXiv.........Z']) == [])
 
         # test when filename does not exist
-        self.assertTrue(self.app.diagnostic_query(source_filename_list=os.path.join(self.arXiv_stubdata_dir,'00004.raw')) == [])
+        self.assertTrue(self.app.diagnostic_query(source_filename_list=os.path.join(self.arXiv_stubdata_dir, '00004.raw')) == [])
 
         # test when both bibcode and filename are passed and nothing is returned
-        self.assertTrue(self.app.diagnostic_query(bibcode_list=['0004arXiv.........Z'],
-                                                     source_filename_list=os.path.join(self.arXiv_stubdata_dir,'00004.raw')) == [])
+        self.assertTrue(self.app.diagnostic_query(
+            bibcode_list=['0004arXiv.........Z'],
+            source_filename_list=os.path.join(self.arXiv_stubdata_dir, '00004.raw')
+        ) == [])
 
     def test_insert_reference_record(self):
         """ test inserting reference_source record """
@@ -249,13 +268,15 @@ def test_insert_reference_record(self):
         # see that it is returned without it being inserted
         with self.app.session_scope() as session:
             count = self.app.get_count_reference_source_records(session)
-            reference_record = ReferenceSource(bibcode='0001arXiv.........Z',
-                                         source_filename=os.path.join(self.arXiv_stubdata_dir,'00001.raw'),
-                                         resolved_filename=os.path.join(self.arXiv_stubdata_dir,'00001.raw.result'),
-                                         parser_name=self.app.get_parser(os.path.join(self.arXiv_stubdata_dir,'00001.raw')).get('name'))
+            reference_record = ReferenceSource(
+                bibcode='0001arXiv.........Z',
+                source_filename=os.path.join(self.arXiv_stubdata_dir, '00001.raw'),
+                resolved_filename=os.path.join(self.arXiv_stubdata_dir, '00001.raw.result'),
+                parser_name=self.app.get_parser(os.path.join(self.arXiv_stubdata_dir, '00001.raw')).get('name')
+            )
             bibcode, source_filename = self.app.insert_reference_source_record(session, reference_record)
             self.assertTrue(bibcode == '0001arXiv.........Z')
-            self.assertTrue(source_filename == os.path.join(self.arXiv_stubdata_dir,'00001.raw'))
+            self.assertTrue(source_filename == os.path.join(self.arXiv_stubdata_dir, '00001.raw'))
             self.assertTrue(self.app.get_count_reference_source_records(session) == count)
 
     def test_parser_name(self):
@@ -274,7 +295,7 @@ def test_parser_name(self):
             'AGU': ['/JGR/0101/issD14.agu.xml', AGUtoREFs],
             'arXiv': ['/arXiv/2011/00324.raw', ARXIVtoREFs],
         }
-        for name,info in parser.items():
+        for name, info in parser.items():
             self.assertEqual(name, self.app.get_parser(info[0]).get('name'))
             self.assertEqual(info[1], verify(name))
         # now verify couple of errors
@@ -298,7 +319,7 @@ def test_reference_service_endpoint(self):
             'arXiv': '/text',
             'AEdRvHTML': '/text',
         }
-        for name,endpoint in parser.items():
+        for name, endpoint in parser.items():
             self.assertEqual(endpoint, self.app.get_reference_service_endpoint(name))
         # now verify an error
         self.assertEqual(self.app.get_reference_service_endpoint('errorname'), '')
@@ -316,8 +337,10 @@ def test_stats_compare(self):
             "| review of the physics, searches and implications,            |                     |                     |                 |                 |       |       |       |       |       |\n" \
             "| 1709.02923.                                                  |                     |                     |                 |                 |       |       |       |       |       |\n" \
             "+--------------------------------------------------------------+---------------------+---------------------+-----------------+-----------------+-------+-------+-------+-------+-------+"
-        result_got, num_references, num_resolved = self.app.get_service_classic_compare_stats_grid(source_bibcode='0001arXiv.........Z',
-                                                                                                   source_filename=os.path.join(self.arXiv_stubdata_dir,'00001.raw'))
+        result_got, num_references, num_resolved = self.app.get_service_classic_compare_stats_grid(
+            source_bibcode='0001arXiv.........Z',
+            source_filename=os.path.join(self.arXiv_stubdata_dir, '00001.raw')
+        )
         self.assertEqual(result_got, result_expected)
         self.assertEqual(num_references, 2)
         self.assertEqual(num_resolved, 2)
@@ -326,7 +349,7 @@ def test_reprocess_references(self):
         """ test reprocessing references """
         result_expected_year = [
             {'source_bibcode': '0002arXiv.........Z',
-             'source_filename': os.path.join(self.arXiv_stubdata_dir,'00002.raw'),
+             'source_filename': os.path.join(self.arXiv_stubdata_dir, '00002.raw'),
              'source_modified': datetime(2020, 4, 3, 18, 8, 42),
              'parser_name': 'arXiv',
              'references': [{'item_num': 2,
@@ -335,24 +358,30 @@ def test_reprocess_references(self):
         ]
         result_expected_bibstem = [
             {'source_bibcode': '0002arXiv.........Z',
-             'source_filename': os.path.join(self.arXiv_stubdata_dir,'00002.raw'),
+             'source_filename': os.path.join(self.arXiv_stubdata_dir, '00002.raw'),
              'source_modified': datetime(2020, 4, 3, 18, 8, 42),
              'parser_name': 'arXiv',
              'references': [{'item_num': 2,
                              'refstr': 'Arcangeli, J., Desert, J.-M., Parmentier, V., et al. 2019, A&A, 625, A136   ',
                              'refraw': 'Arcangeli, J., Desert, J.-M., Parmentier, V., et al. 2019, A&A, 625, A136   '}]
-            },
+             },
             {'source_bibcode': '0003arXiv.........Z',
-             'source_filename': os.path.join(self.arXiv_stubdata_dir,'00003.raw'),
+             'source_filename': os.path.join(self.arXiv_stubdata_dir, '00003.raw'),
              'source_modified': datetime(2020, 4, 3, 18, 8, 32),
              'parser_name': 'arXiv',
              'references': [{'item_num': 2,
                              'refstr': 'Ackermann, M., Albert, A., Atwood, W. B., et al. 2016, A&A, 586, A71 ',
                              'refraw': 'Ackermann, M., Albert, A., Atwood, W. B., et al. 2016, A&A, 586, A71 '}]
-            }
+             }
         ]
-        self.assertEqual(self.app.get_reprocess_records(ReprocessQueryType.year, match_bibcode='2019', score_cutoff=None, date_cutoff=None), result_expected_year)
-        self.assertEqual(self.app.get_reprocess_records(ReprocessQueryType.bibstem, match_bibcode='A&A..', score_cutoff=None, date_cutoff=None), result_expected_bibstem)
+        self.assertEqual(
+            self.app.get_reprocess_records(ReprocessQueryType.year, match_bibcode='2019', score_cutoff=None, date_cutoff=None),
+            result_expected_year
+        )
+        self.assertEqual(
+            self.app.get_reprocess_records(ReprocessQueryType.bibstem, match_bibcode='A&A..', score_cutoff=None, date_cutoff=None),
+            result_expected_bibstem
+        )
 
         references_and_ids_year = [
             {'id': 'H4I2', 'reference': 'Arcangeli, J., Desert, J.-M., Parmentier, V., et al. 2019, A&A, 625, A136   '}
@@ -361,7 +390,8 @@ def test_reprocess_references(self):
             source_bibcode=result_expected_year[0]['source_bibcode'],
             source_filename=result_expected_year[0]['source_filename'],
             source_modified=result_expected_year[0]['source_modified'],
-            retry_records=result_expected_year[0]['references'])
+            retry_records=result_expected_year[0]['references']
+        )
         self.assertTrue(reprocess_references)
         self.assertTrue(reprocess_references, references_and_ids_year)
         current_num_records = [
@@ -427,10 +457,12 @@ def test_populate_tables_pre_resolved_initial_status_exception(self):
             mock_session.commit.side_effect = SQLAlchemyError("Mocked SQLAlchemyError")
 
             with patch.object(self.app.logger, 'error') as mock_error:
-                results = self.app.populate_tables_pre_resolved_initial_status('0001arXiv.........Z',
-                                                                               os.path.join(self.arXiv_stubdata_dir,'00001.raw'),
-                                                                               'arXiv',
-                                                                               references=[])
+                results = self.app.populate_tables_pre_resolved_initial_status(
+                    '0001arXiv.........Z',
+                    os.path.join(self.arXiv_stubdata_dir, '00001.raw'),
+                    'arXiv',
+                    references=[]
+                )
                 self.assertEqual(results, [])
                 mock_session.rollback.assert_called_once()
                 mock_error.assert_called()
@@ -442,10 +474,12 @@ def test_populate_tables_pre_resolved_retry_status_exception(self):
             mock_session.commit.side_effect = SQLAlchemyError("Mocked SQLAlchemyError")
 
             with patch.object(self.app.logger, 'error') as mock_error:
-                results = self.app.populate_tables_pre_resolved_retry_status('0001arXiv.........Z',
-                                                                             os.path.join(self.arXiv_stubdata_dir,'00001.raw'),
-                                                                             source_modified='',
-                                                                             retry_records=[])
+                results = self.app.populate_tables_pre_resolved_retry_status(
+                    '0001arXiv.........Z',
+                    os.path.join(self.arXiv_stubdata_dir, '00001.raw'),
+                    source_modified='',
+                    retry_records=[]
+                )
                 self.assertEqual(results, [])
                 mock_session.rollback.assert_called_once()
                 mock_error.assert_called()
@@ -457,25 +491,39 @@ def test_populate_tables_post_resolved_exception(self):
             mock_session.commit.side_effect = SQLAlchemyError("Mocked SQLAlchemyError")
 
             with patch.object(self.app.logger, 'error') as mock_error:
-                result = self.app.populate_tables_post_resolved(resolved_reference=[],
-                                                                source_bibcode='0001arXiv.........Z',
-                                                                classic_resolved_filename=os.path.join(self.arXiv_stubdata_dir,'00001.raw.results'))
+                result = self.app.populate_tables_post_resolved(
+                    resolved_reference=[],
+                    source_bibcode='0001arXiv.........Z',
+                    classic_resolved_filename=os.path.join(self.arXiv_stubdata_dir, '00001.raw.results')
+                )
                 self.assertEqual(result, False)
                 mock_session.rollback.assert_called_once()
                 mock_error.assert_called()
 
     def test_populate_tables_post_resolved_with_classic(self):
-        """ test populate_tables_post_resolved when resolved_classic is available """
+        """ test populate_tables_post_resolved when resolved_classic is available AND external_identifier is set """
 
         resolved_reference = [
-            {'id': 'H1I1', 'refstring': 'Reference 1', 'bibcode': '2023A&A...657A...1X', 'score': 1.0},
-            {'id': 'H1I2', 'refstring': 'Reference 2', 'bibcode': '2023A&A...657A...2X', 'score': 0.8}
+            {
+                'id': 'H1I1',
+                'refstring': 'Reference 1',
+                'bibcode': '2023A&A...657A...1X',
+                'score': 1.0,
+                'external_identifier': ['doi:10.1234/abc', 'arxiv:2301.00001'],
+            },
+            {
+                'id': 'H1I2',
+                'refstring': 'Reference 2',
+                'bibcode': '2023A&A...657A...2X',
+                'score': 0.8,
+                'external_identifier': ['ascl:2301.001', 'doi:10.9999/xyz'],
+            }
         ]
         source_bibcode = "2023A&A...657A...1X"
         classic_resolved_filename = "classic_results.txt"
         classic_resolved_reference = [
-            (1, "2023A&A...657A...1X", "1", "MATCH"),
-            (2, "2023A&A...657A...2X", "1", "MATCH")
+            (1, "2023A&A...657A...657A...1X", "1", "MATCH"),
+            (2, "2023A&A...657A...657A...2X", "1", "MATCH")
         ]
 
         with patch.object(self.app, "session_scope"), \
@@ -491,6 +539,12 @@ def test_populate_tables_post_resolved_with_classic(self):
             mock_insert.assert_called_once()
             mock_logger.assert_called_with("Updated 2 resolved reference records successfully.")
 
+            # Check whether external_identifier is populated with correct data
+            _, resolved_records = mock_update.call_args[0]
+            self.assertEqual(len(resolved_records), 2)
+            self.assertEqual(_get_external_identifier(resolved_records[0]), ['doi:10.1234/abc', 'arxiv:2301.00001'])
+            self.assertEqual(_get_external_identifier(resolved_records[1]), ['ascl:2301.001', 'doi:10.9999/xyz'])
+
     @patch("adsrefpipe.app.ProcessedHistory")
     @patch("adsrefpipe.app.ResolvedReference")
     @patch("adsrefpipe.app.CompareClassic")
@@ -514,16 +568,28 @@ def test_get_service_classic_compare_tags(self, mock_compare, mock_resolved, moc
         result1 = self.app.get_service_classic_compare_tags(mock_session, source_bibcode="2023A&A...657A...1X", source_filename="")
         self.assertEqual(result1, "mock_final_subquery")
 
-        expected_filter_bibcode = and_(mock_processed.id == mock_resolved.history_id, literal('"2023A&A...657A...1X').op('~')(mock_processed.bibcode))
-        found_bibcode_filter = any(call.args and expected_filter_bibcode.compare(call.args[0]) for call in mock_session.query().filter.call_args_list)
+        expected_filter_bibcode = and_(
+            mock_processed.id == mock_resolved.history_id,
+            literal('"2023A&A...657A...1X').op('~')(mock_processed.bibcode)
+        )
+        found_bibcode_filter = any(
+            call.args and expected_filter_bibcode.compare(call.args[0])
+            for call in mock_session.query().filter.call_args_list
+        )
         self.assertTrue(found_bibcode_filter)
 
         # test case 2: Only source_filename are provided
         result2 = self.app.get_service_classic_compare_tags(mock_session, source_bibcode="", source_filename="some_source_file.txt")
         self.assertEqual(result2, "mock_final_subquery")
 
-        expected_filter_filename = and_(mock_processed.id == mock_resolved.history_id, literal('2023A&A...657A...1X').op('~')(mock_processed.source_filename))
-        found_filename_filter = any(call.args and expected_filter_filename.compare(call.args[0]) for call in mock_session.query().filter.call_args_list)
+        expected_filter_filename = and_(
+            mock_processed.id == mock_resolved.history_id,
+            literal('2023A&A...657A...1X').op('~')(mock_processed.source_filename)
+        )
+        found_filename_filter = any(
+            call.args and expected_filter_filename.compare(call.args[0])
+            for call in mock_session.query().filter.call_args_list
+        )
         self.assertTrue(found_filename_filter)
 
     def test_get_service_classic_compare_stats_grid_error(self):
@@ -545,10 +611,15 @@ def test_get_service_classic_compare_stats_grid_error(self):
                 # mock `session.query(...).all()` to return an empty list
                 mock_session.query.return_value.filter.return_value.order_by.return_value.all.return_value = []
 
-                result = self.app.get_service_classic_compare_stats_grid(source_bibcode='0001arXiv.........Z',
-                                                                         source_filename=os.path.join(self.arXiv_stubdata_dir,'00001.raw'))
+                result = self.app.get_service_classic_compare_stats_grid(
+                    source_bibcode='0001arXiv.........Z',
+                    source_filename=os.path.join(self.arXiv_stubdata_dir, '00001.raw')
+                )
 
-                self.assertEqual(result, ('Unable to fetch data for reference source file `%s` from database!'%os.path.join(self.arXiv_stubdata_dir,'00001.raw'), -1, -1))
+                self.assertEqual(
+                    result,
+                    ('Unable to fetch data for reference source file `%s` from database!' % os.path.join(self.arXiv_stubdata_dir, '00001.raw'), -1, -1)
+                )
 
     @patch("adsrefpipe.app.datetime")
     def test_filter_reprocess_query(self, mock_datetime):
@@ -598,12 +669,9 @@ def test_filter_reprocess_query(self, mock_datetime):
         # Test case: date_cutoff is applied
         mock_query.reset_mock()
         self.app.filter_reprocess_query(mock_query, ReprocessQueryType.score, 0.8, "", 10)
-        expected_since = datetime(2025, 1, 1) - timedelta(days=10)
         mock_query.filter.assert_called()
         called_args, _ = mock_query.filter.call_args
         compiled_query = called_args[0].compile(dialect=postgresql.dialect())
-        print(str(called_args[0]))
-        print(compiled_query.params)
         self.assertTrue(str(called_args[0]), 'resolved_reference.score <= :score_1')
         self.assertTrue(compiled_query.params.get('score_1'), 0.8)
 
@@ -620,9 +688,11 @@ def test_get_reprocess_records(self):
 
             # mock query results with same history_id to trigger the else block
             mock_session.query.return_value.filter.return_value.order_by.return_value.all.return_value = [
-                MockRow(history_id=1, item_num=1, refstr="Reference 1", refraw="Raw 1", source_bibcode="2023A&A...657A...1X",
+                MockRow(history_id=1, item_num=1, refstr="Reference 1", refraw="Raw 1",
+                        source_bibcode="2023A&A...657A...1X",
                         source_filename="some_source_file.txt", source_modified="D1", parser_name="arXiv"),
-                MockRow(history_id=1, item_num=2, refstr="Reference 2", refraw="Raw 2", source_bibcode="2023A&A...657A...1X",
+                MockRow(history_id=1, item_num=2, refstr="Reference 2", refraw="Raw 2",
+                        source_bibcode="2023A&A...657A...1X",
                         source_filename="some_source_file.txt", source_modified="D1", parser_name="arXiv"),
             ]
 
@@ -643,8 +713,10 @@ def test_get_resolved_references_all(self):
 
             # mock query results with highest scores
             mock_session.query.return_value.filter.return_value.order_by.return_value.all.return_value = [
-                MockRow(source_bibcode="2023A&A...657A...1X", date=datetime(2025, 1, 1), id=1, resolved_bibcode="0001arXiv.........Z", score=0.95, parser_name="arXiv"),
-                MockRow(source_bibcode="2023A&A...657A...1X", date=datetime(2025, 1, 2), id=2, resolved_bibcode="0002arXiv.........Z", score=0.85, parser_name="arXiv"),
+                MockRow(source_bibcode="2023A&A...657A...1X", date=datetime(2025, 1, 1), id=1,
+                        resolved_bibcode="0001arXiv.........Z", score=0.95, parser_name="arXiv"),
+                MockRow(source_bibcode="2023A&A...657A...1X", date=datetime(2025, 1, 2), id=2,
+                        resolved_bibcode="0002arXiv.........Z", score=0.85, parser_name="arXiv"),
             ]
 
             results = self.app.get_resolved_references_all("2023A&A...657A...1X")
@@ -667,12 +739,16 @@ def test_get_resolved_references(self):
             mock_session = mock_session_scope.return_value.__enter__.return_value
 
             # Define a mock SQLAlchemy row with namedtuple
-            MockRow = namedtuple("MockRow", ["source_bibcode", "date", "id", "resolved_bibcode", "score", "parser_name", "parser_priority"])
+            MockRow = namedtuple("MockRow",
+                                 ["source_bibcode", "date", "id", "resolved_bibcode", "score", "parser_name",
+                                  "parser_priority"])
 
             # Mock query results with highest-ranked records
             mock_session.query.return_value.filter.return_value.order_by.return_value.all.return_value = [
-                MockRow(source_bibcode="2023A&A...657A...1X", date=datetime(2025, 1, 1), id=1, resolved_bibcode="0001arXiv.........Z", score=0.95, parser_name="arXiv", parser_priority=1),
-                MockRow(source_bibcode="2023A&A...657A...1X", date=datetime(2025, 1, 2), id=2, resolved_bibcode="0002arXiv.........Z", score=0.85, parser_name="arXiv", parser_priority=1),
+                MockRow(source_bibcode="2023A&A...657A...1X", date=datetime(2025, 1, 1), id=1,
+                        resolved_bibcode="0001arXiv.........Z", score=0.95, parser_name="arXiv", parser_priority=1),
+                MockRow(source_bibcode="2023A&A...657A...1X", date=datetime(2025, 1, 2), id=2,
+                        resolved_bibcode="0002arXiv.........Z", score=0.85, parser_name="arXiv", parser_priority=1),
             ]
 
             results = self.app.get_resolved_references("2023A&A...657A...1X")
@@ -741,7 +817,8 @@ def test_compare_classic_toJSON(self):
             item_num=2,
             bibcode="0001arXiv.........Z",
             score=1,
-            state="MATCH")
+            state="MATCH"
+        )
         expected_json = {
             "history_id": 1,
             "item_num": 2,
@@ -760,19 +837,6 @@ class TestDatabaseNoStubdata(unittest.TestCase):
 
     maxDiff = None
 
-    # postgresql_url_dict = {
-    #     'port': 5432,
-    #     'host': '127.0.0.1',
-    #     'user': 'postgres',
-    #     'database': 'postgres'
-    # }
-    # postgresql_url = 'postgresql://{user}:{user}@{host}:{port}/{database}' \
-    #     .format(user=postgresql_url_dict['user'],
-    #             host=postgresql_url_dict['host'],
-    #             port=postgresql_url_dict['port'],
-    #             database=postgresql_url_dict['database']
-    #             )
-
     _postgresql = testing.postgresql.Postgresql()
     postgresql_url = _postgresql.url()
 
@@ -811,7 +875,8 @@ def test_populate_tables(self):
         references = [
             {
                 "refstr": "J.-P. Uzan, Varying constants, gravitation and cosmology, Living Rev. Rel. 14 (2011) 2, [1009.5514]. ",
-                "refraw": "J.-P. Uzan, Varying constants, gravitation and cosmology, Living Rev. Rel. 14 (2011) 2, [1009.5514]. "},
+                "refraw": "J.-P. Uzan, Varying constants, gravitation and cosmology, Living Rev. Rel. 14 (2011) 2, [1009.5514]. "
+            },
             {
                 "refstr": "C. J. A. P. Martins, The status of varying constants: A review of the physics, searches and implications, 1709.02923.",
                 "refraw": "C. J. A. P. Martins, The status of varying constants: A review of the physics, searches and implications, 1709.02923."
@@ -826,8 +891,11 @@ def test_populate_tables(self):
             {
                 "refstr": "C. J. A. P. Martins, The status of varying constants: A review of the physics, searches and implications, 1709.02923.",
                 "refraw": "C. J. A. P. Martins, The status of varying constants: A review of the physics, searches and implications, 1709.02923.",
-                "id": "H1I2"}
+                "id": "H1I2"
+            }
         ]
+
+        # IMPORTANT: use the real column name expected by app/models: external_identifier (list)
         resolved_references = [
             {
                 "score": "1.0",
@@ -835,7 +903,7 @@ def test_populate_tables(self):
                 "refstring": "J.-P. Uzan, Varying constants, gravitation and cosmology, Living Rev. Rel. 14 (2011) 2, [1009.5514]. ",
                 "refraw": "J.-P. Uzan, Varying constants, gravitation and cosmology, Living Rev. Rel. 14 (2011) 2, [1009.5514]. ",
                 "id": "H1I1",
-                "ext_id": "ExtID1"
+                "external_identifier": ["arxiv:1009.5514", "doi:10.1234/abc"]
             },
             {
                 "score": "1.0",
@@ -843,9 +911,10 @@ def test_populate_tables(self):
                 "refstring": "C. J. A. P. Martins, The status of varying constants: A review of the physics, searches and implications, 1709.02923.",
                 "refraw": "C. J. A. P. Martins, The status of varying constants: A review of the physics, searches and implications, 1709.02923.",
                 "id": "H1I2",
-                "ext_id": "ExtID2"
+                "external_identifier": ["arxiv:1709.02923", "ascl:2301.001"]
             }
         ]
+
         arXiv_stubdata_dir = os.path.join(self.test_dir, 'unittests/stubdata/txt/arXiv/0/')
         with self.app.session_scope() as session:
             session.query(Action).delete()
@@ -857,20 +926,36 @@ def test_populate_tables(self):
                 session.bulk_save_objects(parsers_records)
             session.commit()
 
-            references = self.app.populate_tables_pre_resolved_initial_status(
+            references_out = self.app.populate_tables_pre_resolved_initial_status(
                 source_bibcode='0001arXiv.........Z',
-                source_filename=os.path.join(arXiv_stubdata_dir,'00001.raw'),
-                parsername=self.app.get_parser(os.path.join(arXiv_stubdata_dir,'00001.raw')).get('name'),
-                references=references)
+                source_filename=os.path.join(arXiv_stubdata_dir, '00001.raw'),
+                parsername=self.app.get_parser(os.path.join(arXiv_stubdata_dir, '00001.raw')).get('name'),
+                references=references
+            )
 
-            self.assertTrue(references)
-            self.assertTrue(references == references_and_ids)
+            self.assertTrue(references_out)
+            self.assertTrue(references_out == references_and_ids)
 
             status = self.app.populate_tables_post_resolved(
                 resolved_reference=resolved_references,
                 source_bibcode='0001arXiv.........Z',
-                classic_resolved_filename=os.path.join(arXiv_stubdata_dir, '00001.raw.result'))
-            self.assertTrue(status == True)
+                classic_resolved_filename=os.path.join(arXiv_stubdata_dir, '00001.raw.result')
+            )
+            self.assertTrue(status is True)
+
+            # NEW: Verify external_identifier was persisted for the two updated rows.
+            # We know history_id should be 1 for the first inserted ProcessedHistory in an empty DB.
+            rows = (
+                session.query(ResolvedReference)
+                .filter(ResolvedReference.history_id == 1)
+                .order_by(ResolvedReference.item_num.asc())
+                .all()
+            )
+            self.assertEqual(len(rows), 2)
+            self.assertEqual(rows[0].item_num, 1)
+            self.assertEqual(rows[1].item_num, 2)
+            self.assertEqual(rows[0].external_identifier, ["arxiv:1009.5514", "doi:10.1234/abc"])
+            self.assertEqual(rows[1].external_identifier, ["arxiv:1709.02923", "ascl:2301.001"])
 
     def test_get_parser_error(self):
         """ test get_parser when it errors for unrecognized source filename """
@@ -881,3 +966,4 @@ def test_get_parser_error(self):
 
 if __name__ == '__main__':
     unittest.main()
+

From 33d1382c49e86fd54f9b6115ec4381de4ecacc79 Mon Sep 17 00:00:00 2001
From: thomasallen <tom.sco@gmail.com>
Date: Tue, 27 Jan 2026 11:24:31 -0800
Subject: [PATCH 7/8] external identifier update

---
 adsrefpipe/tests/unittests/test_app.py | 284 ++++++++++---------------
 1 file changed, 107 insertions(+), 177 deletions(-)

diff --git a/adsrefpipe/tests/unittests/test_app.py b/adsrefpipe/tests/unittests/test_app.py
index 8c21957..b99a446 100644
--- a/adsrefpipe/tests/unittests/test_app.py
+++ b/adsrefpipe/tests/unittests/test_app.py
@@ -33,11 +33,6 @@
 import testing.postgresql
 
 
-def _get_external_identifier(rr_obj):
-    """Return external_identifier from a ResolvedReference ORM object."""
-    return getattr(rr_obj, "external_identifier", None)
-
-
 class TestDatabase(unittest.TestCase):
 
     """
@@ -77,12 +72,9 @@ def add_stub_data(self):
         self.arXiv_stubdata_dir = os.path.join(self.test_dir, 'unittests/stubdata/txt/arXiv/0/')
 
         reference_source = [
-            ('0001arXiv.........Z', os.path.join(self.arXiv_stubdata_dir, '00001.raw'),
-             os.path.join(self.arXiv_stubdata_dir, '00001.raw.result'), 'arXiv'),
-            ('0002arXiv.........Z', os.path.join(self.arXiv_stubdata_dir, '00002.raw'),
-             os.path.join(self.arXiv_stubdata_dir, '00002.raw.result'), 'arXiv'),
-            ('0003arXiv.........Z', os.path.join(self.arXiv_stubdata_dir, '00003.raw'),
-             os.path.join(self.arXiv_stubdata_dir, '00003.raw.result'), 'arXiv')
+            ('0001arXiv.........Z',os.path.join(self.arXiv_stubdata_dir,'00001.raw'),os.path.join(self.arXiv_stubdata_dir,'00001.raw.result'),'arXiv'),
+            ('0002arXiv.........Z',os.path.join(self.arXiv_stubdata_dir,'00002.raw'),os.path.join(self.arXiv_stubdata_dir,'00002.raw.result'),'arXiv'),
+            ('0003arXiv.........Z',os.path.join(self.arXiv_stubdata_dir,'00003.raw'),os.path.join(self.arXiv_stubdata_dir,'00003.raw.result'),'arXiv')
         ]
 
         processed_history = [
@@ -91,8 +83,6 @@ def add_stub_data(self):
             ('2020-04-03 18:08:32', '2020-05-11 11:14:28', '128', '109')
         ]
 
-        # Add external identifiers for each resolved reference to verify persistence in DB
-        # Each tuple: (reference_str, bibcode, score, external_identifier_list)
         resolved_reference = [
             [
                 ('J.-P. Uzan, Varying constants, gravitation and cosmology, Living Rev. Rel. 14 (2011) 2, [1009.5514]. ',
@@ -116,16 +106,16 @@ def add_stub_data(self):
 
         compare_classic = [
             [
-                ('2010arXiv1009.5514U', 1, 'DIFF'),
-                ('2017arXiv170902923M', 1, 'DIFF')
+                ('2010arXiv1009.5514U',1,'DIFF'),
+                ('2017arXiv170902923M',1,'DIFF')
             ],
             [
-                ('2011MNRAS.417..709A', 1, 'MATCH'),
-                ('2019A&A...625A.136A', 1, 'MATCH')
+                ('2011MNRAS.417..709A',1,'MATCH'),
+                ('2019A&A...625A.136A',1,'MATCH')
             ],
             [
-                ('2017ApJ...842L..24A', 1, 'MATCH'),
-                ('2016A&A...586A..71A', 1, 'MATCH')
+                ('2017ApJ...842L..24A',1,'MATCH'),
+                ('2016A&A...586A..71A',1,'MATCH')
             ]
         ]
 
@@ -139,56 +129,44 @@ def add_stub_data(self):
                 session.bulk_save_objects(parsers_records)
             session.commit()
 
-            for i, (a_reference, a_history) in enumerate(zip(reference_source, processed_history)):
-                reference_record = ReferenceSource(
-                    bibcode=a_reference[0],
-                    source_filename=a_reference[1],
-                    resolved_filename=a_reference[2],
-                    parser_name=a_reference[3]
-                )
+            for i, (a_reference,a_history) in enumerate(zip(reference_source,processed_history)):
+                reference_record = ReferenceSource(bibcode=a_reference[0],
+                                             source_filename=a_reference[1],
+                                             resolved_filename=a_reference[2],
+                                             parser_name=a_reference[3])
                 bibcode, source_filename = self.app.insert_reference_source_record(session, reference_record)
                 self.assertTrue(bibcode == a_reference[0])
                 self.assertTrue(source_filename == a_reference[1])
 
-                history_record = ProcessedHistory(
-                    bibcode=bibcode,
-                    source_filename=source_filename,
-                    source_modified=a_history[0],
-                    status=Action().get_status_new(),
-                    date=a_history[1],
-                    total_ref=a_history[2]
-                )
+                history_record = ProcessedHistory(bibcode=bibcode,
+                                         source_filename=source_filename,
+                                         source_modified=a_history[0],
+                                         status=Action().get_status_new(),
+                                         date=a_history[1],
+                                         total_ref=a_history[2])
                 history_id = self.app.insert_history_record(session, history_record)
                 self.assertTrue(history_id != -1)
 
                 resolved_records = []
                 compare_records = []
-                for j, (service, classic) in enumerate(zip(resolved_reference[i], compare_classic[i])):
-                    refstr, bib, sc, ext_ids = service
-                    resolved_record = ResolvedReference(
-                        history_id=history_id,
-                        item_num=j + 1,
-                        reference_str=refstr,
-                        bibcode=bib,
-                        score=sc,
-                        reference_raw=refstr,
-                        external_identifier=ext_ids
-                    )
+                for j, (service,classic) in enumerate(zip(resolved_reference[i],compare_classic[i])):
+                    resolved_record = ResolvedReference(history_id=history_id,
+                                               item_num=j+1,
+                                               reference_str=service[0],
+                                               bibcode=service[1],
+                                               score=service[2],
+                                               reference_raw=service[0])
                     resolved_records.append(resolved_record)
-
-                    compare_record = CompareClassic(
-                        history_id=history_id,
-                        item_num=j + 1,
-                        bibcode=classic[0],
-                        score=classic[1],
-                        state=classic[2]
-                    )
+                    compare_record = CompareClassic(history_id=history_id,
+                                             item_num=j+1,
+                                             bibcode=classic[0],
+                                             score=classic[1],
+                                             state=classic[2])
                     compare_records.append(compare_record)
-
                 success = self.app.insert_resolved_reference_records(session, resolved_records)
-                self.assertTrue(success is True)
+                self.assertTrue(success == True)
                 success = self.app.insert_compare_records(session, compare_records)
-                self.assertTrue(success is True)
+                self.assertTrue(success == True)
                 session.commit()
 
     def test_query_reference_tbl(self):
@@ -196,8 +174,8 @@ def test_query_reference_tbl(self):
         result_expected = [
             {
                 'bibcode': '0001arXiv.........Z',
-                'source_filename': os.path.join(self.arXiv_stubdata_dir, '00001.raw'),
-                'resolved_filename': os.path.join(self.arXiv_stubdata_dir, '00001.raw.result'),
+                'source_filename': os.path.join(self.arXiv_stubdata_dir,'00001.raw'),
+                'resolved_filename': os.path.join(self.arXiv_stubdata_dir,'00001.raw.result'),
                 'parser_name': 'arXiv',
                 'num_runs': 1,
                 'last_run_date': '2020-05-11 11:13:36',
@@ -205,8 +183,8 @@ def test_query_reference_tbl(self):
                 'last_run_num_resolved_references': 2
             }, {
                 'bibcode': '0002arXiv.........Z',
-                'source_filename': os.path.join(self.arXiv_stubdata_dir, '00002.raw'),
-                'resolved_filename': os.path.join(self.arXiv_stubdata_dir, '00002.raw.result'),
+                'source_filename': os.path.join(self.arXiv_stubdata_dir,'00002.raw'),
+                'resolved_filename': os.path.join(self.arXiv_stubdata_dir,'00002.raw.result'),
                 'parser_name': 'arXiv',
                 'num_runs': 1,
                 'last_run_date': '2020-05-11 11:13:53',
@@ -214,8 +192,8 @@ def test_query_reference_tbl(self):
                 'last_run_num_resolved_references': 2
             }, {
                 'bibcode': '0003arXiv.........Z',
-                'source_filename': os.path.join(self.arXiv_stubdata_dir, '00003.raw'),
-                'resolved_filename': os.path.join(self.arXiv_stubdata_dir, '00003.raw.result'),
+                'source_filename': os.path.join(self.arXiv_stubdata_dir,'00003.raw'),
+                'resolved_filename': os.path.join(self.arXiv_stubdata_dir,'00003.raw.result'),
                 'parser_name': 'arXiv',
                 'num_runs': 1,
                 'last_run_date': '2020-05-11 11:14:28',
@@ -230,11 +208,9 @@ def test_query_reference_tbl(self):
         self.assertTrue(result_expected == result_got)
 
         # test querying filenames
-        filenames = [
-            os.path.join(self.arXiv_stubdata_dir, '00001.raw'),
-            os.path.join(self.arXiv_stubdata_dir, '00002.raw'),
-            os.path.join(self.arXiv_stubdata_dir, '00003.raw')
-        ]
+        filenames = [os.path.join(self.arXiv_stubdata_dir,'00001.raw'),
+                     os.path.join(self.arXiv_stubdata_dir,'00002.raw'),
+                     os.path.join(self.arXiv_stubdata_dir,'00003.raw')]
         result_got = self.app.diagnostic_query(source_filename_list=filenames)
         self.assertTrue(result_expected == result_got)
 
@@ -253,13 +229,11 @@ def test_query_reference_tbl_when_non_exits(self):
         self.assertTrue(self.app.diagnostic_query(bibcode_list=['0004arXiv.........Z']) == [])
 
         # test when filename does not exist
-        self.assertTrue(self.app.diagnostic_query(source_filename_list=os.path.join(self.arXiv_stubdata_dir, '00004.raw')) == [])
+        self.assertTrue(self.app.diagnostic_query(source_filename_list=os.path.join(self.arXiv_stubdata_dir,'00004.raw')) == [])
 
         # test when both bibcode and filename are passed and nothing is returned
-        self.assertTrue(self.app.diagnostic_query(
-            bibcode_list=['0004arXiv.........Z'],
-            source_filename_list=os.path.join(self.arXiv_stubdata_dir, '00004.raw')
-        ) == [])
+        self.assertTrue(self.app.diagnostic_query(bibcode_list=['0004arXiv.........Z'],
+                                                     source_filename_list=os.path.join(self.arXiv_stubdata_dir,'00004.raw')) == [])
 
     def test_insert_reference_record(self):
         """ test inserting reference_source record """
@@ -268,15 +242,13 @@ def test_insert_reference_record(self):
         # see that it is returned without it being inserted
         with self.app.session_scope() as session:
             count = self.app.get_count_reference_source_records(session)
-            reference_record = ReferenceSource(
-                bibcode='0001arXiv.........Z',
-                source_filename=os.path.join(self.arXiv_stubdata_dir, '00001.raw'),
-                resolved_filename=os.path.join(self.arXiv_stubdata_dir, '00001.raw.result'),
-                parser_name=self.app.get_parser(os.path.join(self.arXiv_stubdata_dir, '00001.raw')).get('name')
-            )
+            reference_record = ReferenceSource(bibcode='0001arXiv.........Z',
+                                         source_filename=os.path.join(self.arXiv_stubdata_dir,'00001.raw'),
+                                         resolved_filename=os.path.join(self.arXiv_stubdata_dir,'00001.raw.result'),
+                                         parser_name=self.app.get_parser(os.path.join(self.arXiv_stubdata_dir,'00001.raw')).get('name'))
             bibcode, source_filename = self.app.insert_reference_source_record(session, reference_record)
             self.assertTrue(bibcode == '0001arXiv.........Z')
-            self.assertTrue(source_filename == os.path.join(self.arXiv_stubdata_dir, '00001.raw'))
+            self.assertTrue(source_filename == os.path.join(self.arXiv_stubdata_dir,'00001.raw'))
             self.assertTrue(self.app.get_count_reference_source_records(session) == count)
 
     def test_parser_name(self):
@@ -295,7 +267,7 @@ def test_parser_name(self):
             'AGU': ['/JGR/0101/issD14.agu.xml', AGUtoREFs],
             'arXiv': ['/arXiv/2011/00324.raw', ARXIVtoREFs],
         }
-        for name, info in parser.items():
+        for name,info in parser.items():
             self.assertEqual(name, self.app.get_parser(info[0]).get('name'))
             self.assertEqual(info[1], verify(name))
         # now verify couple of errors
@@ -319,7 +291,7 @@ def test_reference_service_endpoint(self):
             'arXiv': '/text',
             'AEdRvHTML': '/text',
         }
-        for name, endpoint in parser.items():
+        for name,endpoint in parser.items():
             self.assertEqual(endpoint, self.app.get_reference_service_endpoint(name))
         # now verify an error
         self.assertEqual(self.app.get_reference_service_endpoint('errorname'), '')
@@ -337,10 +309,8 @@ def test_stats_compare(self):
             "| review of the physics, searches and implications,            |                     |                     |                 |                 |       |       |       |       |       |\n" \
             "| 1709.02923.                                                  |                     |                     |                 |                 |       |       |       |       |       |\n" \
             "+--------------------------------------------------------------+---------------------+---------------------+-----------------+-----------------+-------+-------+-------+-------+-------+"
-        result_got, num_references, num_resolved = self.app.get_service_classic_compare_stats_grid(
-            source_bibcode='0001arXiv.........Z',
-            source_filename=os.path.join(self.arXiv_stubdata_dir, '00001.raw')
-        )
+        result_got, num_references, num_resolved = self.app.get_service_classic_compare_stats_grid(source_bibcode='0001arXiv.........Z',
+                                                                                                   source_filename=os.path.join(self.arXiv_stubdata_dir,'00001.raw'))
         self.assertEqual(result_got, result_expected)
         self.assertEqual(num_references, 2)
         self.assertEqual(num_resolved, 2)
@@ -349,7 +319,7 @@ def test_reprocess_references(self):
         """ test reprocessing references """
         result_expected_year = [
             {'source_bibcode': '0002arXiv.........Z',
-             'source_filename': os.path.join(self.arXiv_stubdata_dir, '00002.raw'),
+             'source_filename': os.path.join(self.arXiv_stubdata_dir,'00002.raw'),
              'source_modified': datetime(2020, 4, 3, 18, 8, 42),
              'parser_name': 'arXiv',
              'references': [{'item_num': 2,
@@ -358,30 +328,24 @@ def test_reprocess_references(self):
         ]
         result_expected_bibstem = [
             {'source_bibcode': '0002arXiv.........Z',
-             'source_filename': os.path.join(self.arXiv_stubdata_dir, '00002.raw'),
+             'source_filename': os.path.join(self.arXiv_stubdata_dir,'00002.raw'),
              'source_modified': datetime(2020, 4, 3, 18, 8, 42),
              'parser_name': 'arXiv',
              'references': [{'item_num': 2,
                              'refstr': 'Arcangeli, J., Desert, J.-M., Parmentier, V., et al. 2019, A&A, 625, A136   ',
                              'refraw': 'Arcangeli, J., Desert, J.-M., Parmentier, V., et al. 2019, A&A, 625, A136   '}]
-             },
+            },
             {'source_bibcode': '0003arXiv.........Z',
-             'source_filename': os.path.join(self.arXiv_stubdata_dir, '00003.raw'),
+             'source_filename': os.path.join(self.arXiv_stubdata_dir,'00003.raw'),
              'source_modified': datetime(2020, 4, 3, 18, 8, 32),
              'parser_name': 'arXiv',
              'references': [{'item_num': 2,
                              'refstr': 'Ackermann, M., Albert, A., Atwood, W. B., et al. 2016, A&A, 586, A71 ',
                              'refraw': 'Ackermann, M., Albert, A., Atwood, W. B., et al. 2016, A&A, 586, A71 '}]
-             }
+            }
         ]
-        self.assertEqual(
-            self.app.get_reprocess_records(ReprocessQueryType.year, match_bibcode='2019', score_cutoff=None, date_cutoff=None),
-            result_expected_year
-        )
-        self.assertEqual(
-            self.app.get_reprocess_records(ReprocessQueryType.bibstem, match_bibcode='A&A..', score_cutoff=None, date_cutoff=None),
-            result_expected_bibstem
-        )
+        self.assertEqual(self.app.get_reprocess_records(ReprocessQueryType.year, match_bibcode='2019', score_cutoff=None, date_cutoff=None), result_expected_year)
+        self.assertEqual(self.app.get_reprocess_records(ReprocessQueryType.bibstem, match_bibcode='A&A..', score_cutoff=None, date_cutoff=None), result_expected_bibstem)
 
         references_and_ids_year = [
             {'id': 'H4I2', 'reference': 'Arcangeli, J., Desert, J.-M., Parmentier, V., et al. 2019, A&A, 625, A136   '}
@@ -390,8 +354,7 @@ def test_reprocess_references(self):
             source_bibcode=result_expected_year[0]['source_bibcode'],
             source_filename=result_expected_year[0]['source_filename'],
             source_modified=result_expected_year[0]['source_modified'],
-            retry_records=result_expected_year[0]['references']
-        )
+            retry_records=result_expected_year[0]['references'])
         self.assertTrue(reprocess_references)
         self.assertTrue(reprocess_references, references_and_ids_year)
         current_num_records = [
@@ -457,12 +420,10 @@ def test_populate_tables_pre_resolved_initial_status_exception(self):
             mock_session.commit.side_effect = SQLAlchemyError("Mocked SQLAlchemyError")
 
             with patch.object(self.app.logger, 'error') as mock_error:
-                results = self.app.populate_tables_pre_resolved_initial_status(
-                    '0001arXiv.........Z',
-                    os.path.join(self.arXiv_stubdata_dir, '00001.raw'),
-                    'arXiv',
-                    references=[]
-                )
+                results = self.app.populate_tables_pre_resolved_initial_status('0001arXiv.........Z',
+                                                                               os.path.join(self.arXiv_stubdata_dir,'00001.raw'),
+                                                                               'arXiv',
+                                                                               references=[])
                 self.assertEqual(results, [])
                 mock_session.rollback.assert_called_once()
                 mock_error.assert_called()
@@ -474,12 +435,10 @@ def test_populate_tables_pre_resolved_retry_status_exception(self):
             mock_session.commit.side_effect = SQLAlchemyError("Mocked SQLAlchemyError")
 
             with patch.object(self.app.logger, 'error') as mock_error:
-                results = self.app.populate_tables_pre_resolved_retry_status(
-                    '0001arXiv.........Z',
-                    os.path.join(self.arXiv_stubdata_dir, '00001.raw'),
-                    source_modified='',
-                    retry_records=[]
-                )
+                results = self.app.populate_tables_pre_resolved_retry_status('0001arXiv.........Z',
+                                                                             os.path.join(self.arXiv_stubdata_dir,'00001.raw'),
+                                                                             source_modified='',
+                                                                             retry_records=[])
                 self.assertEqual(results, [])
                 mock_session.rollback.assert_called_once()
                 mock_error.assert_called()
@@ -491,17 +450,15 @@ def test_populate_tables_post_resolved_exception(self):
             mock_session.commit.side_effect = SQLAlchemyError("Mocked SQLAlchemyError")
 
             with patch.object(self.app.logger, 'error') as mock_error:
-                result = self.app.populate_tables_post_resolved(
-                    resolved_reference=[],
-                    source_bibcode='0001arXiv.........Z',
-                    classic_resolved_filename=os.path.join(self.arXiv_stubdata_dir, '00001.raw.results')
-                )
+                result = self.app.populate_tables_post_resolved(resolved_reference=[],
+                                                                source_bibcode='0001arXiv.........Z',
+                                                                classic_resolved_filename=os.path.join(self.arXiv_stubdata_dir,'00001.raw.results'))
                 self.assertEqual(result, False)
                 mock_session.rollback.assert_called_once()
                 mock_error.assert_called()
 
     def test_populate_tables_post_resolved_with_classic(self):
-        """ test populate_tables_post_resolved when resolved_classic is available AND external_identifier is set """
+        """ test populate_tables_post_resolved when resolved_classic is available """
 
         resolved_reference = [
             {
@@ -518,12 +475,12 @@ def test_populate_tables_post_resolved_with_classic(self):
                 'score': 0.8,
                 'external_identifier': ['ascl:2301.001', 'doi:10.9999/xyz'],
             }
-        ]
+
         source_bibcode = "2023A&A...657A...1X"
         classic_resolved_filename = "classic_results.txt"
         classic_resolved_reference = [
-            (1, "2023A&A...657A...657A...1X", "1", "MATCH"),
-            (2, "2023A&A...657A...657A...2X", "1", "MATCH")
+            (1, "2023A&A...657A...1X", "1", "MATCH"),
+            (2, "2023A&A...657A...2X", "1", "MATCH")
         ]
 
         with patch.object(self.app, "session_scope"), \
@@ -568,28 +525,16 @@ def test_get_service_classic_compare_tags(self, mock_compare, mock_resolved, moc
         result1 = self.app.get_service_classic_compare_tags(mock_session, source_bibcode="2023A&A...657A...1X", source_filename="")
         self.assertEqual(result1, "mock_final_subquery")
 
-        expected_filter_bibcode = and_(
-            mock_processed.id == mock_resolved.history_id,
-            literal('"2023A&A...657A...1X').op('~')(mock_processed.bibcode)
-        )
-        found_bibcode_filter = any(
-            call.args and expected_filter_bibcode.compare(call.args[0])
-            for call in mock_session.query().filter.call_args_list
-        )
+        expected_filter_bibcode = and_(mock_processed.id == mock_resolved.history_id, literal('"2023A&A...657A...1X').op('~')(mock_processed.bibcode))
+        found_bibcode_filter = any(call.args and expected_filter_bibcode.compare(call.args[0]) for call in mock_session.query().filter.call_args_list)
         self.assertTrue(found_bibcode_filter)
 
         # test case 2: Only source_filename are provided
         result2 = self.app.get_service_classic_compare_tags(mock_session, source_bibcode="", source_filename="some_source_file.txt")
         self.assertEqual(result2, "mock_final_subquery")
 
-        expected_filter_filename = and_(
-            mock_processed.id == mock_resolved.history_id,
-            literal('2023A&A...657A...1X').op('~')(mock_processed.source_filename)
-        )
-        found_filename_filter = any(
-            call.args and expected_filter_filename.compare(call.args[0])
-            for call in mock_session.query().filter.call_args_list
-        )
+        expected_filter_filename = and_(mock_processed.id == mock_resolved.history_id, literal('2023A&A...657A...1X').op('~')(mock_processed.source_filename))
+        found_filename_filter = any(call.args and expected_filter_filename.compare(call.args[0]) for call in mock_session.query().filter.call_args_list)
         self.assertTrue(found_filename_filter)
 
     def test_get_service_classic_compare_stats_grid_error(self):
@@ -611,15 +556,10 @@ def test_get_service_classic_compare_stats_grid_error(self):
                 # mock `session.query(...).all()` to return an empty list
                 mock_session.query.return_value.filter.return_value.order_by.return_value.all.return_value = []
 
-                result = self.app.get_service_classic_compare_stats_grid(
-                    source_bibcode='0001arXiv.........Z',
-                    source_filename=os.path.join(self.arXiv_stubdata_dir, '00001.raw')
-                )
+                result = self.app.get_service_classic_compare_stats_grid(source_bibcode='0001arXiv.........Z',
+                                                                         source_filename=os.path.join(self.arXiv_stubdata_dir,'00001.raw'))
 
-                self.assertEqual(
-                    result,
-                    ('Unable to fetch data for reference source file `%s` from database!' % os.path.join(self.arXiv_stubdata_dir, '00001.raw'), -1, -1)
-                )
+                self.assertEqual(result, ('Unable to fetch data for reference source file `%s` from database!'%os.path.join(self.arXiv_stubdata_dir,'00001.raw'), -1, -1))
 
     @patch("adsrefpipe.app.datetime")
     def test_filter_reprocess_query(self, mock_datetime):
@@ -669,9 +609,12 @@ def test_filter_reprocess_query(self, mock_datetime):
         # Test case: date_cutoff is applied
         mock_query.reset_mock()
         self.app.filter_reprocess_query(mock_query, ReprocessQueryType.score, 0.8, "", 10)
+        expected_since = datetime(2025, 1, 1) - timedelta(days=10)
         mock_query.filter.assert_called()
         called_args, _ = mock_query.filter.call_args
         compiled_query = called_args[0].compile(dialect=postgresql.dialect())
+        print(str(called_args[0]))
+        print(compiled_query.params)
         self.assertTrue(str(called_args[0]), 'resolved_reference.score <= :score_1')
         self.assertTrue(compiled_query.params.get('score_1'), 0.8)
 
@@ -688,11 +631,9 @@ def test_get_reprocess_records(self):
 
             # mock query results with same history_id to trigger the else block
             mock_session.query.return_value.filter.return_value.order_by.return_value.all.return_value = [
-                MockRow(history_id=1, item_num=1, refstr="Reference 1", refraw="Raw 1",
-                        source_bibcode="2023A&A...657A...1X",
+                MockRow(history_id=1, item_num=1, refstr="Reference 1", refraw="Raw 1", source_bibcode="2023A&A...657A...1X",
                         source_filename="some_source_file.txt", source_modified="D1", parser_name="arXiv"),
-                MockRow(history_id=1, item_num=2, refstr="Reference 2", refraw="Raw 2",
-                        source_bibcode="2023A&A...657A...1X",
+                MockRow(history_id=1, item_num=2, refstr="Reference 2", refraw="Raw 2", source_bibcode="2023A&A...657A...1X",
                         source_filename="some_source_file.txt", source_modified="D1", parser_name="arXiv"),
             ]
 
@@ -713,10 +654,8 @@ def test_get_resolved_references_all(self):
 
             # mock query results with highest scores
             mock_session.query.return_value.filter.return_value.order_by.return_value.all.return_value = [
-                MockRow(source_bibcode="2023A&A...657A...1X", date=datetime(2025, 1, 1), id=1,
-                        resolved_bibcode="0001arXiv.........Z", score=0.95, parser_name="arXiv"),
-                MockRow(source_bibcode="2023A&A...657A...1X", date=datetime(2025, 1, 2), id=2,
-                        resolved_bibcode="0002arXiv.........Z", score=0.85, parser_name="arXiv"),
+                MockRow(source_bibcode="2023A&A...657A...1X", date=datetime(2025, 1, 1), id=1, resolved_bibcode="0001arXiv.........Z", score=0.95, parser_name="arXiv"),
+                MockRow(source_bibcode="2023A&A...657A...1X", date=datetime(2025, 1, 2), id=2, resolved_bibcode="0002arXiv.........Z", score=0.85, parser_name="arXiv"),
             ]
 
             results = self.app.get_resolved_references_all("2023A&A...657A...1X")
@@ -739,16 +678,12 @@ def test_get_resolved_references(self):
             mock_session = mock_session_scope.return_value.__enter__.return_value
 
             # Define a mock SQLAlchemy row with namedtuple
-            MockRow = namedtuple("MockRow",
-                                 ["source_bibcode", "date", "id", "resolved_bibcode", "score", "parser_name",
-                                  "parser_priority"])
+            MockRow = namedtuple("MockRow", ["source_bibcode", "date", "id", "resolved_bibcode", "score", "parser_name", "parser_priority"])
 
             # Mock query results with highest-ranked records
             mock_session.query.return_value.filter.return_value.order_by.return_value.all.return_value = [
-                MockRow(source_bibcode="2023A&A...657A...1X", date=datetime(2025, 1, 1), id=1,
-                        resolved_bibcode="0001arXiv.........Z", score=0.95, parser_name="arXiv", parser_priority=1),
-                MockRow(source_bibcode="2023A&A...657A...1X", date=datetime(2025, 1, 2), id=2,
-                        resolved_bibcode="0002arXiv.........Z", score=0.85, parser_name="arXiv", parser_priority=1),
+                MockRow(source_bibcode="2023A&A...657A...1X", date=datetime(2025, 1, 1), id=1, resolved_bibcode="0001arXiv.........Z", score=0.95, parser_name="arXiv", parser_priority=1),
+                MockRow(source_bibcode="2023A&A...657A...1X", date=datetime(2025, 1, 2), id=2, resolved_bibcode="0002arXiv.........Z", score=0.85, parser_name="arXiv", parser_priority=1),
             ]
 
             results = self.app.get_resolved_references("2023A&A...657A...1X")
@@ -817,8 +752,7 @@ def test_compare_classic_toJSON(self):
             item_num=2,
             bibcode="0001arXiv.........Z",
             score=1,
-            state="MATCH"
-        )
+            state="MATCH")
         expected_json = {
             "history_id": 1,
             "item_num": 2,
@@ -875,8 +809,7 @@ def test_populate_tables(self):
         references = [
             {
                 "refstr": "J.-P. Uzan, Varying constants, gravitation and cosmology, Living Rev. Rel. 14 (2011) 2, [1009.5514]. ",
-                "refraw": "J.-P. Uzan, Varying constants, gravitation and cosmology, Living Rev. Rel. 14 (2011) 2, [1009.5514]. "
-            },
+                "refraw": "J.-P. Uzan, Varying constants, gravitation and cosmology, Living Rev. Rel. 14 (2011) 2, [1009.5514]. "},
             {
                 "refstr": "C. J. A. P. Martins, The status of varying constants: A review of the physics, searches and implications, 1709.02923.",
                 "refraw": "C. J. A. P. Martins, The status of varying constants: A review of the physics, searches and implications, 1709.02923."
@@ -891,8 +824,7 @@ def test_populate_tables(self):
             {
                 "refstr": "C. J. A. P. Martins, The status of varying constants: A review of the physics, searches and implications, 1709.02923.",
                 "refraw": "C. J. A. P. Martins, The status of varying constants: A review of the physics, searches and implications, 1709.02923.",
-                "id": "H1I2"
-            }
+                "id": "H1I2"}
         ]
 
         # IMPORTANT: use the real column name expected by app/models: external_identifier (list)
@@ -926,24 +858,22 @@ def test_populate_tables(self):
                 session.bulk_save_objects(parsers_records)
             session.commit()
 
-            references_out = self.app.populate_tables_pre_resolved_initial_status(
+            references = self.app.populate_tables_pre_resolved_initial_status(
                 source_bibcode='0001arXiv.........Z',
-                source_filename=os.path.join(arXiv_stubdata_dir, '00001.raw'),
-                parsername=self.app.get_parser(os.path.join(arXiv_stubdata_dir, '00001.raw')).get('name'),
-                references=references
-            )
+                source_filename=os.path.join(arXiv_stubdata_dir,'00001.raw'),
+                parsername=self.app.get_parser(os.path.join(arXiv_stubdata_dir,'00001.raw')).get('name'),
+                references=references)
 
-            self.assertTrue(references_out)
-            self.assertTrue(references_out == references_and_ids)
+            self.assertTrue(references)
+            self.assertTrue(references == references_and_ids)
 
             status = self.app.populate_tables_post_resolved(
                 resolved_reference=resolved_references,
                 source_bibcode='0001arXiv.........Z',
-                classic_resolved_filename=os.path.join(arXiv_stubdata_dir, '00001.raw.result')
-            )
-            self.assertTrue(status is True)
+                classic_resolved_filename=os.path.join(arXiv_stubdata_dir, '00001.raw.result'))
+            self.assertTrue(status == True)
 
-            # NEW: Verify external_identifier was persisted for the two updated rows.
+            # Verify external_identifier was persisted on ResolvedReference rows
             # We know history_id should be 1 for the first inserted ProcessedHistory in an empty DB.
             rows = (
                 session.query(ResolvedReference)

From 6895fac016f0250a68dd381f6aaeaed4062ea994 Mon Sep 17 00:00:00 2001
From: thomasallen <tom.sco@gmail.com>
Date: Tue, 27 Jan 2026 12:03:19 -0800
Subject: [PATCH 8/8] unit test fix

---
 adsrefpipe/app.py                      | 28 +++++++++++++++++++++++++-
 adsrefpipe/tests/unittests/test_app.py | 10 +++++++++
 2 files changed, 37 insertions(+), 1 deletion(-)

diff --git a/adsrefpipe/app.py b/adsrefpipe/app.py
index d3f12a8..f5c7516 100755
--- a/adsrefpipe/app.py
+++ b/adsrefpipe/app.py
@@ -416,6 +416,31 @@ def update_resolved_reference_records(self, session: object, resolved_list: List
         self.logger.debug("Added `ResolvedReference` records successfully.")
         return True
 
+    def update_resolved_reference_records(self, session: object, resolved_list: List[ResolvedReference]) -> bool:
+        """
+        update resolved reference records in the database
+        """
+        mappings = []
+        for r in resolved_list:
+            mappings.append({
+                # must include PK columns for bulk_update_mappings
+                "history_id": r.history_id,
+                "item_num": r.item_num,
+                "reference_str": r.reference_str,
+
+                # fields to update
+                "bibcode": r.bibcode,
+                "score": r.score,
+                "reference_raw": r.reference_raw,
+                "external_identifier": _ensure_list(getattr(r, "external_identifier", None)) or [],
+            })
+
+        session.bulk_update_mappings(ResolvedReference, mappings)
+        session.flush()
+        self.logger.debug("Added `ResolvedReference` records successfully.")
+        return True
+
+
     def insert_compare_records(self, session: object, compared_list: List[CompareClassic]) -> bool:
         """
         insert records into the compare classic table
@@ -549,7 +574,8 @@ def populate_tables_post_resolved(self, resolved_reference: List, source_bibcode
                                                    reference_str=ref.get('refstring', None),
                                                    bibcode=ref.get('bibcode', None),
                                                    score=ref.get('score', None),
-                                                   reference_raw=ref.get('refstring', None))
+                                                   reference_raw=ref.get('refstring', None),
+                                                   external_identifier=_ensure_list(ref.get('external_identifier', None)) or [])
                         resolved_records.append(resolved_record)
                         if resolved_classic:
                             compare_record = CompareClassic(history_id=history_id,
diff --git a/adsrefpipe/tests/unittests/test_app.py b/adsrefpipe/tests/unittests/test_app.py
index b99a446..f00e7fb 100644
--- a/adsrefpipe/tests/unittests/test_app.py
+++ b/adsrefpipe/tests/unittests/test_app.py
@@ -32,6 +32,15 @@
 
 import testing.postgresql
 
+def _get_external_identifier(rec):
+    """
+    Works whether rec is a dict (bulk mappings) or an ORM object.
+    """
+    if rec is None:
+        return []
+    if isinstance(rec, dict):
+        return rec.get("external_identifier") or []
+    return getattr(rec, "external_identifier", None) or []
 
 class TestDatabase(unittest.TestCase):
 
@@ -475,6 +484,7 @@ def test_populate_tables_post_resolved_with_classic(self):
                 'score': 0.8,
                 'external_identifier': ['ascl:2301.001', 'doi:10.9999/xyz'],
             }
+        ]
 
         source_bibcode = "2023A&A...657A...1X"
         classic_resolved_filename = "classic_results.txt"