Coleridge-Initiative · HaritzPuerto · Jan 1, 2021 · Jan 11, 2021
diff --git a/README.md b/README.md
@@ -42,9 +42,7 @@ Given that foundation, load the dependencies:
 pip install -r requirements.txt
 ```
 
-Fourth, set up the local `rc.cfg` configuration file and run unit the
-tests (see below) to confirm that this project has been installed and
-configured properly.
+Fourth, set up the local `rc.cfg` configuration file. To do this, you need first to download the file <https://github.com/Coleridge-Initiative/RCApi/blob/master/rc_template.cfg> into the root folder of the project under the name `rc.cfg` and populate it with your credentials. Then, run unit tests (see below) to confirm that this project has been installed and configured properly.
 
 
 ## Submodules
@@ -116,7 +114,7 @@ Test coverage reports can be viewed at
 <https://codecov.io/gh/Coleridge-Initiative/RCGraph>
 
 
-### Step 2: Gather the DOIs, etc.
+### Step 2: Gather the DOIs and additional metadata
 
 Use *title search* across the scholarly infrastructure APIs to
 identify a DOI and other metadata for each publication.
@@ -133,10 +131,10 @@ See the `misses_step2.json` file which reports the title of each
 publication that failed every API lookup.
 
 
-### Step 3: Gather the PDFs, etc.
+### Step 3: Search by DOIs additional metadata
 
-Use *publication lookup* with DOIs across the scholarly infrastructure
-APIs to identify open access PDFs, journals, authors, keywords, etc.
+Using the DOIs identified by the second step, use *publication lookup* with DOIs across the scholarly infrastructure
+APIs that were not used in step 2 to identify open access PDFs, journals, authors, keywords, etc.
 
 ```
 python run_step3.py

diff --git a/run_final.py b/run_final.py
@@ -65,6 +65,10 @@ def propagate_view (pub, graph, override):
     journal = graph.journals.select_best_entity(journal_list)
     view["journal"] = journal["id"]
 
+    # select the year of publication
+    year = graph.journals.extract_year(pub)
+    view["year"] = year
+
     # apply the manual override
     if title in override:
         override[title]["used"] = True

diff --git a/run_step2.py b/run_step2.py
@@ -25,7 +25,7 @@ def gather_doi (schol, graph, partition, pub):
     title = pub["title"]
     title_match = False
 
-    for api in [schol.openaire, schol.europepmc, schol.dimensions]:
+    for api in [schol.crossref, schol.openaire, schol.europepmc]:
         try:
             if api.has_credentials():
                 response = api.title_search(title)
@@ -76,7 +76,7 @@ def main (args):
             # already used all the API requests allowed in the time window
             if count == dimensions_requests_limits and time_elapsed < dimensions_time_limit:
                 to_sleep = dimensions_time_limit - math.floor(time_elapsed) + 1 # adding some extra margin
-                #print("API calls:",count,"time elapsed:", time_elapsed, "- will sleep:",to_sleep)
+                print("API calls:",count,"time elapsed:", time_elapsed, "- will sleep:",to_sleep)
                 time.sleep( to_sleep )
                 count = 0
                 t0 = time.time()

diff --git a/run_step3.py b/run_step3.py
@@ -26,7 +26,7 @@ def lookup_doi (schol, graph, partition, pub):
     doi_list = []
     doi_match = False
 
-    for source in ["original", schol.dimensions.name, schol.europepmc.name, schol.openaire.name]:
+    for source in ["original", schol.crossref.name, schol.europepmc.name, schol.openaire.name]:
         if (source in pub) and ("doi" in pub[source]):
             doi = graph.publications.verify_doi(pub[source]["doi"])
 

diff --git a/run_step4.py b/run_step4.py
@@ -58,7 +58,6 @@ def main (args):
     for partition, pub_iter in graph.iter_publications(graph.BUCKET_STAGE, filter=args.partition):
         for pub in tqdm(pub_iter, ascii=True, desc=partition[:30]):
             journal_list, message, freq_issn = reconcile_journal(schol, graph, pub, disputed)
-
             if len(journal_list) > 0:
                 journal_tally = graph.tally_list(journal_list, ignores=graph.journals.IGNORE_JOURNALS)
                 proposed[freq_issn] = journal_tally
@@ -77,7 +76,6 @@ def main (args):
     # show a tentative list of journals, considered for adding
     for freq_issn, tally in proposed.items():
         new_entity = graph.journals.add_entity(tally, freq_issn)
-
         if new_entity:
             print("{},".format(json.dumps(new_entity, indent=2, sort_keys=True)))