Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 5 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,7 @@ Given that foundation, load the dependencies:
pip install -r requirements.txt
```

Fourth, set up the local `rc.cfg` configuration file and run unit the
tests (see below) to confirm that this project has been installed and
configured properly.
Fourth, set up the local `rc.cfg` configuration file. To do this, you need first to download the file <https://github.com/Coleridge-Initiative/RCApi/blob/master/rc_template.cfg> into the root folder of the project under the name `rc.cfg` and populate it with your credentials. Then, run unit tests (see below) to confirm that this project has been installed and configured properly.


## Submodules
Expand Down Expand Up @@ -116,7 +114,7 @@ Test coverage reports can be viewed at
<https://codecov.io/gh/Coleridge-Initiative/RCGraph>


### Step 2: Gather the DOIs, etc.
### Step 2: Gather the DOIs and additional metadata

Use *title search* across the scholarly infrastructure APIs to
identify a DOI and other metadata for each publication.
Expand All @@ -133,10 +131,10 @@ See the `misses_step2.json` file which reports the title of each
publication that failed every API lookup.


### Step 3: Gather the PDFs, etc.
### Step 3: Search by DOIs additional metadata

Use *publication lookup* with DOIs across the scholarly infrastructure
APIs to identify open access PDFs, journals, authors, keywords, etc.
Using the DOIs identified by the second step, use *publication lookup* with DOIs across the scholarly infrastructure
APIs that were not used in step 2 to identify open access PDFs, journals, authors, keywords, etc.

```
python run_step3.py
Expand Down
4 changes: 4 additions & 0 deletions run_final.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,10 @@ def propagate_view (pub, graph, override):
journal = graph.journals.select_best_entity(journal_list)
view["journal"] = journal["id"]

# select the year of publication
year = graph.journals.extract_year(pub)
view["year"] = year

# apply the manual override
if title in override:
override[title]["used"] = True
Expand Down
4 changes: 2 additions & 2 deletions run_step2.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def gather_doi (schol, graph, partition, pub):
title = pub["title"]
title_match = False

for api in [schol.openaire, schol.europepmc, schol.dimensions]:
for api in [schol.crossref, schol.openaire, schol.europepmc]:
try:
if api.has_credentials():
response = api.title_search(title)
Expand Down Expand Up @@ -76,7 +76,7 @@ def main (args):
# already used all the API requests allowed in the time window
if count == dimensions_requests_limits and time_elapsed < dimensions_time_limit:
to_sleep = dimensions_time_limit - math.floor(time_elapsed) + 1 # adding some extra margin
#print("API calls:",count,"time elapsed:", time_elapsed, "- will sleep:",to_sleep)
print("API calls:",count,"time elapsed:", time_elapsed, "- will sleep:",to_sleep)
time.sleep( to_sleep )
count = 0
t0 = time.time()
Expand Down
2 changes: 1 addition & 1 deletion run_step3.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def lookup_doi (schol, graph, partition, pub):
doi_list = []
doi_match = False

for source in ["original", schol.dimensions.name, schol.europepmc.name, schol.openaire.name]:
for source in ["original", schol.crossref.name, schol.europepmc.name, schol.openaire.name]:
if (source in pub) and ("doi" in pub[source]):
doi = graph.publications.verify_doi(pub[source]["doi"])

Expand Down
2 changes: 0 additions & 2 deletions run_step4.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,6 @@ def main (args):
for partition, pub_iter in graph.iter_publications(graph.BUCKET_STAGE, filter=args.partition):
for pub in tqdm(pub_iter, ascii=True, desc=partition[:30]):
journal_list, message, freq_issn = reconcile_journal(schol, graph, pub, disputed)

if len(journal_list) > 0:
journal_tally = graph.tally_list(journal_list, ignores=graph.journals.IGNORE_JOURNALS)
proposed[freq_issn] = journal_tally
Expand All @@ -77,7 +76,6 @@ def main (args):
# show a tentative list of journals, considered for adding
for freq_issn, tally in proposed.items():
new_entity = graph.journals.add_entity(tally, freq_issn)

if new_entity:
print("{},".format(json.dumps(new_entity, indent=2, sort_keys=True)))

Expand Down