diff --git a/src/bibx/__init__.py b/src/bibx/__init__.py index 9584b61..b352e88 100644 --- a/src/bibx/__init__.py +++ b/src/bibx/__init__.py @@ -28,7 +28,7 @@ "read_wos", ] -__version__ = "0.8.0" +__version__ = "0.9.0" def query_openalex( diff --git a/src/bibx/cli.py b/src/bibx/cli.py index a3a094d..e34f0af 100644 --- a/src/bibx/cli.py +++ b/src/bibx/cli.py @@ -109,6 +109,14 @@ def openalex( rprint(graph) +@app.command() +def csv(filename: str) -> None: + """Parse a scopus CSV file and print the collection.""" + with open(filename) as f: + c = read_scopus_csv(f) + rprint(list(c.citation_pairs)) + + def main() -> None: """Entry point for the CLI.""" app() diff --git a/src/bibx/sources/scopus_csv.py b/src/bibx/sources/scopus_csv.py index 9b35ad0..07b7a01 100644 --- a/src/bibx/sources/scopus_csv.py +++ b/src/bibx/sources/scopus_csv.py @@ -13,6 +13,8 @@ from .base import Source +_NUM_AUTHOR_PARTS = 3 + logger = logging.getLogger(__name__) @@ -24,6 +26,19 @@ def _split_str(value: str | None) -> list[str]: return value.strip().split("; ") if value else [] +def _rotate_authors(authors: list[str]) -> list[str]: + result = [] + for author in authors: + parts = author.split(", ") + if len(parts) != _NUM_AUTHOR_PARTS: + logger.debug("unexpected author format: %s", author) + result.append(author) + continue + initials, last, _ = parts + result.append(f"{last}, {initials}") + return result + + class Row(BaseModel): """Row model for Scopus CSV data.""" @@ -34,7 +49,7 @@ class Row(BaseModel): ] year: Annotated[int, Field(validation_alias="Year")] title: Annotated[str, Field(validation_alias="Title")] - journal: Annotated[str, Field(validation_alias="Abbreviated Source Title")] + journal: Annotated[str, Field(validation_alias="Source title")] volume: Annotated[ str | None, Field(validation_alias="Volume"), @@ -110,7 +125,7 @@ def _parse_file(self, file: TextIO) -> Generator[Article, None, None]: label="", ids=set(), title=datum.title, - authors=datum.authors, + authors=_rotate_authors(datum.authors), year=datum.year, journal=datum.journal, volume=datum.volume,