diff --git a/README.md b/README.md index 844c96f..fc0ed37 100644 --- a/README.md +++ b/README.md @@ -42,9 +42,7 @@ Given that foundation, load the dependencies: pip install -r requirements.txt ``` -Fourth, set up the local `rc.cfg` configuration file and run unit the -tests (see below) to confirm that this project has been installed and -configured properly. +Fourth, set up the local `rc.cfg` configuration file. To do this, you need first to download the file into the root folder of the project under the name `rc.cfg` and populate it with your credentials. Then, run unit tests (see below) to confirm that this project has been installed and configured properly. ## Submodules @@ -116,7 +114,7 @@ Test coverage reports can be viewed at -### Step 2: Gather the DOIs, etc. +### Step 2: Gather the DOIs and additional metadata Use *title search* across the scholarly infrastructure APIs to identify a DOI and other metadata for each publication. @@ -133,10 +131,10 @@ See the `misses_step2.json` file which reports the title of each publication that failed every API lookup. -### Step 3: Gather the PDFs, etc. +### Step 3: Search by DOIs additional metadata -Use *publication lookup* with DOIs across the scholarly infrastructure -APIs to identify open access PDFs, journals, authors, keywords, etc. +Using the DOIs identified by the second step, use *publication lookup* with DOIs across the scholarly infrastructure +APIs that were not used in step 2 to identify open access PDFs, journals, authors, keywords, etc. ``` python run_step3.py diff --git a/run_final.py b/run_final.py index b599d33..1c158a7 100755 --- a/run_final.py +++ b/run_final.py @@ -65,6 +65,10 @@ def propagate_view (pub, graph, override): journal = graph.journals.select_best_entity(journal_list) view["journal"] = journal["id"] + # select the year of publication + year = graph.journals.extract_year(pub) + view["year"] = year + # apply the manual override if title in override: override[title]["used"] = True diff --git a/run_step2.py b/run_step2.py index 84803d3..e2f76dc 100755 --- a/run_step2.py +++ b/run_step2.py @@ -25,7 +25,7 @@ def gather_doi (schol, graph, partition, pub): title = pub["title"] title_match = False - for api in [schol.openaire, schol.europepmc, schol.dimensions]: + for api in [schol.crossref, schol.openaire, schol.europepmc]: try: if api.has_credentials(): response = api.title_search(title) @@ -76,7 +76,7 @@ def main (args): # already used all the API requests allowed in the time window if count == dimensions_requests_limits and time_elapsed < dimensions_time_limit: to_sleep = dimensions_time_limit - math.floor(time_elapsed) + 1 # adding some extra margin - #print("API calls:",count,"time elapsed:", time_elapsed, "- will sleep:",to_sleep) + print("API calls:",count,"time elapsed:", time_elapsed, "- will sleep:",to_sleep) time.sleep( to_sleep ) count = 0 t0 = time.time() diff --git a/run_step3.py b/run_step3.py index a4a3530..937a7d8 100755 --- a/run_step3.py +++ b/run_step3.py @@ -26,7 +26,7 @@ def lookup_doi (schol, graph, partition, pub): doi_list = [] doi_match = False - for source in ["original", schol.dimensions.name, schol.europepmc.name, schol.openaire.name]: + for source in ["original", schol.crossref.name, schol.europepmc.name, schol.openaire.name]: if (source in pub) and ("doi" in pub[source]): doi = graph.publications.verify_doi(pub[source]["doi"]) diff --git a/run_step4.py b/run_step4.py index 6ac0344..ed62902 100755 --- a/run_step4.py +++ b/run_step4.py @@ -58,7 +58,6 @@ def main (args): for partition, pub_iter in graph.iter_publications(graph.BUCKET_STAGE, filter=args.partition): for pub in tqdm(pub_iter, ascii=True, desc=partition[:30]): journal_list, message, freq_issn = reconcile_journal(schol, graph, pub, disputed) - if len(journal_list) > 0: journal_tally = graph.tally_list(journal_list, ignores=graph.journals.IGNORE_JOURNALS) proposed[freq_issn] = journal_tally @@ -77,7 +76,6 @@ def main (args): # show a tentative list of journals, considered for adding for freq_issn, tally in proposed.items(): new_entity = graph.journals.add_entity(tally, freq_issn) - if new_entity: print("{},".format(json.dumps(new_entity, indent=2, sort_keys=True)))